123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924 |
- // Copyright 2021 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.dataproc.v1;
- import "google/api/annotations.proto";
- import "google/api/client.proto";
- import "google/api/field_behavior.proto";
- import "google/longrunning/operations.proto";
- import "google/protobuf/empty.proto";
- import "google/protobuf/field_mask.proto";
- import "google/protobuf/timestamp.proto";
- option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
- option java_multiple_files = true;
- option java_outer_classname = "JobsProto";
- option java_package = "com.google.cloud.dataproc.v1";
- // The JobController provides methods to manage jobs.
- service JobController {
- option (google.api.default_host) = "dataproc.googleapis.com";
- option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
- // Submits a job to a cluster.
- rpc SubmitJob(SubmitJobRequest) returns (Job) {
- option (google.api.http) = {
- post: "/v1/projects/{project_id}/regions/{region}/jobs:submit"
- body: "*"
- };
- option (google.api.method_signature) = "project_id,region,job";
- }
- // Submits job to a cluster.
- rpc SubmitJobAsOperation(SubmitJobRequest) returns (google.longrunning.Operation) {
- option (google.api.http) = {
- post: "/v1/projects/{project_id}/regions/{region}/jobs:submitAsOperation"
- body: "*"
- };
- option (google.api.method_signature) = "project_id, region, job";
- option (google.longrunning.operation_info) = {
- response_type: "Job"
- metadata_type: "JobMetadata"
- };
- }
- // Gets the resource representation for a job in a project.
- rpc GetJob(GetJobRequest) returns (Job) {
- option (google.api.http) = {
- get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
- };
- option (google.api.method_signature) = "project_id,region,job_id";
- }
- // Lists regions/{region}/jobs in a project.
- rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
- option (google.api.http) = {
- get: "/v1/projects/{project_id}/regions/{region}/jobs"
- };
- option (google.api.method_signature) = "project_id,region";
- option (google.api.method_signature) = "project_id,region,filter";
- }
- // Updates a job in a project.
- rpc UpdateJob(UpdateJobRequest) returns (Job) {
- option (google.api.http) = {
- patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
- body: "job"
- };
- }
- // Starts a job cancellation request. To access the job resource
- // after cancellation, call
- // [regions/{region}/jobs.list](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/list)
- // or
- // [regions/{region}/jobs.get](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/get).
- rpc CancelJob(CancelJobRequest) returns (Job) {
- option (google.api.http) = {
- post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel"
- body: "*"
- };
- option (google.api.method_signature) = "project_id,region,job_id";
- }
- // Deletes the job from the project. If the job is active, the delete fails,
- // and the response returns `FAILED_PRECONDITION`.
- rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
- option (google.api.http) = {
- delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
- };
- option (google.api.method_signature) = "project_id,region,job_id";
- }
- }
- // The runtime logging config of the job.
- message LoggingConfig {
- // The Log4j level for job execution. When running an
- // [Apache Hive](https://hive.apache.org/) job, Cloud
- // Dataproc configures the Hive client to an equivalent verbosity level.
- enum Level {
- // Level is unspecified. Use default level for log4j.
- LEVEL_UNSPECIFIED = 0;
- // Use ALL level for log4j.
- ALL = 1;
- // Use TRACE level for log4j.
- TRACE = 2;
- // Use DEBUG level for log4j.
- DEBUG = 3;
- // Use INFO level for log4j.
- INFO = 4;
- // Use WARN level for log4j.
- WARN = 5;
- // Use ERROR level for log4j.
- ERROR = 6;
- // Use FATAL level for log4j.
- FATAL = 7;
- // Turn off log4j.
- OFF = 8;
- }
- // The per-package log levels for the driver. This may include
- // "root" package name to configure rootLogger.
- // Examples:
- // 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- map<string, Level> driver_log_levels = 2;
- }
- // A Dataproc job for running
- // [Apache Hadoop
- // MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
- // jobs on [Apache Hadoop
- // YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
- message HadoopJob {
- // Required. Indicates the location of the driver's main class. Specify
- // either the jar file that contains the main class or the main class name.
- // To specify both, add the jar file to `jar_file_uris`, and then specify
- // the main class name in this property.
- oneof driver {
- // The HCFS URI of the jar file containing the main class.
- // Examples:
- // 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
- // 'hdfs:/tmp/test-samples/custom-wordcount.jar'
- // 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- string main_jar_file_uri = 1;
- // The name of the driver's main class. The jar file containing the class
- // must be in the default CLASSPATH or specified in `jar_file_uris`.
- string main_class = 2;
- }
- // Optional. The arguments to pass to the driver. Do not
- // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as
- // job properties, since a collision may occur that causes an incorrect job
- // submission.
- repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Jar file URIs to add to the CLASSPATHs of the
- // Hadoop driver and tasks.
- repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
- // to the working directory of Hadoop drivers and distributed tasks. Useful
- // for naively parallel tasks.
- repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of archives to be extracted in the working directory of
- // Hadoop drivers and tasks. Supported file types:
- // .jar, .tar, .tar.gz, .tgz, or .zip.
- repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values, used to configure Hadoop.
- // Properties that conflict with values set by the Dataproc API may be
- // overwritten. Can include properties set in /etc/hadoop/conf/*-site and
- // classes in user code.
- map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The runtime log config for job execution.
- LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
- }
- // A Dataproc job for running [Apache Spark](http://spark.apache.org/)
- // applications on YARN.
- message SparkJob {
- // Required. The specification of the main method to call to drive the job.
- // Specify either the jar file that contains the main class or the main class
- // name. To pass both a main jar and a main class in that jar, add the jar to
- // `CommonJob.jar_file_uris`, and then specify the main class name in
- // `main_class`.
- oneof driver {
- // The HCFS URI of the jar file that contains the main class.
- string main_jar_file_uri = 1;
- // The name of the driver's main class. The jar file that contains the class
- // must be in the default CLASSPATH or specified in `jar_file_uris`.
- string main_class = 2;
- }
- // Optional. The arguments to pass to the driver. Do not include arguments,
- // such as `--conf`, that can be set as job properties, since a collision may
- // occur that causes an incorrect job submission.
- repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
- // Spark driver and tasks.
- repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of files to be placed in the working directory of
- // each executor. Useful for naively parallel tasks.
- repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of archives to be extracted into the working directory
- // of each executor. Supported file types:
- // .jar, .tar, .tar.gz, .tgz, and .zip.
- repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values, used to configure Spark.
- // Properties that conflict with values set by the Dataproc API may be
- // overwritten. Can include properties set in
- // /etc/spark/conf/spark-defaults.conf and classes in user code.
- map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The runtime log config for job execution.
- LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
- }
- // A Dataproc job for running
- // [Apache
- // PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
- // applications on YARN.
- message PySparkJob {
- // Required. The HCFS URI of the main Python file to use as the driver. Must
- // be a .py file.
- string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
- // Optional. The arguments to pass to the driver. Do not include arguments,
- // such as `--conf`, that can be set as job properties, since a collision may
- // occur that causes an incorrect job submission.
- repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS file URIs of Python files to pass to the PySpark
- // framework. Supported file types: .py, .egg, and .zip.
- repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
- // Python driver and tasks.
- repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of files to be placed in the working directory of
- // each executor. Useful for naively parallel tasks.
- repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of archives to be extracted into the working directory
- // of each executor. Supported file types:
- // .jar, .tar, .tar.gz, .tgz, and .zip.
- repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values, used to configure PySpark.
- // Properties that conflict with values set by the Dataproc API may be
- // overwritten. Can include properties set in
- // /etc/spark/conf/spark-defaults.conf and classes in user code.
- map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The runtime log config for job execution.
- LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
- }
- // A list of queries to run on a cluster.
- message QueryList {
- // Required. The queries to execute. You do not need to end a query expression
- // with a semicolon. Multiple queries can be specified in one
- // string by separating each with a semicolon. Here is an example of a
- // Dataproc API snippet that uses a QueryList to specify a HiveJob:
- //
- // "hiveJob": {
- // "queryList": {
- // "queries": [
- // "query1",
- // "query2",
- // "query3;query4",
- // ]
- // }
- // }
- repeated string queries = 1 [(google.api.field_behavior) = REQUIRED];
- }
- // A Dataproc job for running [Apache Hive](https://hive.apache.org/)
- // queries on YARN.
- message HiveJob {
- // Required. The sequence of Hive queries to execute, specified as either
- // an HCFS file URI or a list of queries.
- oneof queries {
- // The HCFS URI of the script that contains Hive queries.
- string query_file_uri = 1;
- // A list of queries.
- QueryList query_list = 2;
- }
- // Optional. Whether to continue executing queries if a query fails.
- // The default value is `false`. Setting to `true` can be useful when
- // executing independent parallel queries.
- bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Mapping of query variable names to values (equivalent to the
- // Hive command: `SET name="value";`).
- map<string, string> script_variables = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names and values, used to configure Hive.
- // Properties that conflict with values set by the Dataproc API may be
- // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
- // /etc/hive/conf/hive-site.xml, and classes in user code.
- map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to add to the CLASSPATH of the
- // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
- // and UDFs.
- repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- }
- // A Dataproc job for running [Apache Spark
- // SQL](http://spark.apache.org/sql/) queries.
- message SparkSqlJob {
- // Required. The sequence of Spark SQL queries to execute, specified as
- // either an HCFS file URI or as a list of queries.
- oneof queries {
- // The HCFS URI of the script that contains SQL queries.
- string query_file_uri = 1;
- // A list of queries.
- QueryList query_list = 2;
- }
- // Optional. Mapping of query variable names to values (equivalent to the
- // Spark SQL command: SET `name="value";`).
- map<string, string> script_variables = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values, used to configure
- // Spark SQL's SparkConf. Properties that conflict with values set by the
- // Dataproc API may be overwritten.
- map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
- repeated string jar_file_uris = 56 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The runtime log config for job execution.
- LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
- }
- // A Dataproc job for running [Apache Pig](https://pig.apache.org/)
- // queries on YARN.
- message PigJob {
- // Required. The sequence of Pig queries to execute, specified as an HCFS
- // file URI or a list of queries.
- oneof queries {
- // The HCFS URI of the script that contains the Pig queries.
- string query_file_uri = 1;
- // A list of queries.
- QueryList query_list = 2;
- }
- // Optional. Whether to continue executing queries if a query fails.
- // The default value is `false`. Setting to `true` can be useful when
- // executing independent parallel queries.
- bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Mapping of query variable names to values (equivalent to the Pig
- // command: `name=[value]`).
- map<string, string> script_variables = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values, used to configure Pig.
- // Properties that conflict with values set by the Dataproc API may be
- // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
- // /etc/pig/conf/pig.properties, and classes in user code.
- map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to add to the CLASSPATH of
- // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
- repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The runtime log config for job execution.
- LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
- }
- // A Dataproc job for running
- // [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
- // applications on YARN.
- message SparkRJob {
- // Required. The HCFS URI of the main R file to use as the driver.
- // Must be a .R file.
- string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
- // Optional. The arguments to pass to the driver. Do not include arguments,
- // such as `--conf`, that can be set as job properties, since a collision may
- // occur that causes an incorrect job submission.
- repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of files to be placed in the working directory of
- // each executor. Useful for naively parallel tasks.
- repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of archives to be extracted into the working directory
- // of each executor. Supported file types:
- // .jar, .tar, .tar.gz, .tgz, and .zip.
- repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values, used to configure SparkR.
- // Properties that conflict with values set by the Dataproc API may be
- // overwritten. Can include properties set in
- // /etc/spark/conf/spark-defaults.conf and classes in user code.
- map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The runtime log config for job execution.
- LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
- }
- // A Dataproc job for running [Presto](https://prestosql.io/) queries.
- // **IMPORTANT**: The [Dataproc Presto Optional
- // Component](https://cloud.google.com/dataproc/docs/concepts/components/presto)
- // must be enabled when the cluster is created to submit a Presto job to the
- // cluster.
- message PrestoJob {
- // Required. The sequence of Presto queries to execute, specified as
- // either an HCFS file URI or as a list of queries.
- oneof queries {
- // The HCFS URI of the script that contains SQL queries.
- string query_file_uri = 1;
- // A list of queries.
- QueryList query_list = 2;
- }
- // Optional. Whether to continue executing queries if a query fails.
- // The default value is `false`. Setting to `true` can be useful when
- // executing independent parallel queries.
- bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The format in which query output will be displayed. See the
- // Presto documentation for supported output formats
- string output_format = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Presto client tags to attach to this query
- repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values. Used to set Presto
- // [session properties](https://prestodb.io/docs/current/sql/set-session.html)
- // Equivalent to using the --session flag in the Presto CLI
- map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The runtime log config for job execution.
- LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
- }
- // Dataproc job config.
- message JobPlacement {
- // Required. The name of the cluster where the job will be submitted.
- string cluster_name = 1 [(google.api.field_behavior) = REQUIRED];
- // Output only. A cluster UUID generated by the Dataproc service when
- // the job is submitted.
- string cluster_uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Optional. Cluster labels to identify a cluster where the job will be submitted.
- map<string, string> cluster_labels = 3 [(google.api.field_behavior) = OPTIONAL];
- }
- // Dataproc job status.
- message JobStatus {
- // The job state.
- enum State {
- // The job state is unknown.
- STATE_UNSPECIFIED = 0;
- // The job is pending; it has been submitted, but is not yet running.
- PENDING = 1;
- // Job has been received by the service and completed initial setup;
- // it will soon be submitted to the cluster.
- SETUP_DONE = 8;
- // The job is running on the cluster.
- RUNNING = 2;
- // A CancelJob request has been received, but is pending.
- CANCEL_PENDING = 3;
- // Transient in-flight resources have been canceled, and the request to
- // cancel the running job has been issued to the cluster.
- CANCEL_STARTED = 7;
- // The job cancellation was successful.
- CANCELLED = 4;
- // The job has completed successfully.
- DONE = 5;
- // The job has completed, but encountered an error.
- ERROR = 6;
- // Job attempt has failed. The detail field contains failure details for
- // this attempt.
- //
- // Applies to restartable jobs only.
- ATTEMPT_FAILURE = 9;
- }
- // The job substate.
- enum Substate {
- // The job substate is unknown.
- UNSPECIFIED = 0;
- // The Job is submitted to the agent.
- //
- // Applies to RUNNING state.
- SUBMITTED = 1;
- // The Job has been received and is awaiting execution (it may be waiting
- // for a condition to be met). See the "details" field for the reason for
- // the delay.
- //
- // Applies to RUNNING state.
- QUEUED = 2;
- // The agent-reported status is out of date, which may be caused by a
- // loss of communication between the agent and Dataproc. If the
- // agent does not send a timely update, the job will fail.
- //
- // Applies to RUNNING state.
- STALE_STATUS = 3;
- }
- // Output only. A state message specifying the overall job state.
- State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Optional. Output only. Job state details, such as an error
- // description if the state is <code>ERROR</code>.
- string details = 2 [
- (google.api.field_behavior) = OUTPUT_ONLY,
- (google.api.field_behavior) = OPTIONAL
- ];
- // Output only. The time when this state was entered.
- google.protobuf.Timestamp state_start_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Additional state information, which includes
- // status reported by the agent.
- Substate substate = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // Encapsulates the full scoping used to reference a job.
- message JobReference {
- // Optional. The ID of the Google Cloud Platform project that the job belongs to. If
- // specified, must match the request project ID.
- string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The job ID, which must be unique within the project.
- //
- // The ID must contain only letters (a-z, A-Z), numbers (0-9),
- // underscores (_), or hyphens (-). The maximum length is 100 characters.
- //
- // If not specified by the caller, the job ID will be provided by the server.
- string job_id = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // A YARN application created by a job. Application information is a subset of
- // <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- //
- // **Beta Feature**: This report is available for testing purposes only. It may
- // be changed before final release.
- message YarnApplication {
- // The application state, corresponding to
- // <code>YarnProtos.YarnApplicationStateProto</code>.
- enum State {
- // Status is unspecified.
- STATE_UNSPECIFIED = 0;
- // Status is NEW.
- NEW = 1;
- // Status is NEW_SAVING.
- NEW_SAVING = 2;
- // Status is SUBMITTED.
- SUBMITTED = 3;
- // Status is ACCEPTED.
- ACCEPTED = 4;
- // Status is RUNNING.
- RUNNING = 5;
- // Status is FINISHED.
- FINISHED = 6;
- // Status is FAILED.
- FAILED = 7;
- // Status is KILLED.
- KILLED = 8;
- }
- // Required. The application name.
- string name = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The application state.
- State state = 2 [(google.api.field_behavior) = REQUIRED];
- // Required. The numerical progress of the application, from 1 to 100.
- float progress = 3 [(google.api.field_behavior) = REQUIRED];
- // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or
- // TimelineServer that provides application-specific information. The URL uses
- // the internal hostname, and requires a proxy server for resolution and,
- // possibly, access.
- string tracking_url = 4 [(google.api.field_behavior) = OPTIONAL];
- }
- // A Dataproc job resource.
- message Job {
- // Optional. The fully qualified reference to the job, which can be used to
- // obtain the equivalent REST path of the job resource. If this property
- // is not specified when a job is created, the server generates a
- // <code>job_id</code>.
- JobReference reference = 1 [(google.api.field_behavior) = OPTIONAL];
- // Required. Job information, including how, when, and where to
- // run the job.
- JobPlacement placement = 2 [(google.api.field_behavior) = REQUIRED];
- // Required. The application/framework-specific portion of the job.
- oneof type_job {
- // Optional. Job is a Hadoop job.
- HadoopJob hadoop_job = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job is a Spark job.
- SparkJob spark_job = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job is a PySpark job.
- PySparkJob pyspark_job = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job is a Hive job.
- HiveJob hive_job = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job is a Pig job.
- PigJob pig_job = 7 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job is a SparkR job.
- SparkRJob spark_r_job = 21 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job is a SparkSql job.
- SparkSqlJob spark_sql_job = 12 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job is a Presto job.
- PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL];
- }
- // Output only. The job status. Additional application-specific
- // status information may be contained in the <code>type_job</code>
- // and <code>yarn_applications</code> fields.
- JobStatus status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The previous job status.
- repeated JobStatus status_history = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The collection of YARN applications spun up by this job.
- //
- // **Beta** Feature: This report is available for testing purposes only. It
- // may be changed before final release.
- repeated YarnApplication yarn_applications = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. A URI pointing to the location of the stdout of the job's
- // driver program.
- string driver_output_resource_uri = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. If present, the location of miscellaneous control files
- // which may be used as part of job setup and handling. If not present,
- // control files may be placed in the same location as `driver_output_uri`.
- string driver_control_files_uri = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Optional. The labels to associate with this job.
- // Label **keys** must contain 1 to 63 characters, and must conform to
- // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
- // Label **values** may be empty, but, if present, must contain 1 to 63
- // characters, and must conform to [RFC
- // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
- // associated with a job.
- map<string, string> labels = 18 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Job scheduling configuration.
- JobScheduling scheduling = 20 [(google.api.field_behavior) = OPTIONAL];
- // Output only. A UUID that uniquely identifies a job within the project
- // over time. This is in contrast to a user-settable reference.job_id that
- // may be reused over time.
- string job_uuid = 22 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Indicates whether the job is completed. If the value is `false`,
- // the job is still in progress. If `true`, the job is completed, and
- // `status.state` field will indicate if it was successful, failed,
- // or cancelled.
- bool done = 24 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // Job scheduling options.
- message JobScheduling {
- // Optional. Maximum number of times per hour a driver may be restarted as
- // a result of driver exiting with non-zero code before job is
- // reported failed.
- //
- // A job may be reported as thrashing if driver exits with non-zero code
- // 4 times within 10 minute window.
- //
- // Maximum value is 10.
- //
- // **Note:** Currently, this restartable job option is
- // not supported in Dataproc
- // [workflow
- // template](https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template)
- // jobs.
- int32 max_failures_per_hour = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Maximum number of times in total a driver may be restarted as a result of
- // driver exiting with non-zero code before job is reported failed.
- // Maximum value is 240.
- //
- // **Note:** Currently, this restartable job option is
- // not supported in Dataproc
- // [workflow
- // template](https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template)
- // jobs.
- int32 max_failures_total = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // A request to submit a job.
- message SubmitJobRequest {
- // Required. The ID of the Google Cloud Platform project that the job
- // belongs to.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The Dataproc region in which to handle the request.
- string region = 3 [(google.api.field_behavior) = REQUIRED];
- // Required. The job resource.
- Job job = 2 [(google.api.field_behavior) = REQUIRED];
- // Optional. A unique id used to identify the request. If the server
- // receives two
- // [SubmitJobRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.SubmitJobRequest)s
- // with the same id, then the second request will be ignored and the
- // first [Job][google.cloud.dataproc.v1.Job] created and stored in the backend
- // is returned.
- //
- // It is recommended to always set this value to a
- // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
- //
- // The id must contain only letters (a-z, A-Z), numbers (0-9),
- // underscores (_), and hyphens (-). The maximum length is 40 characters.
- string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
- }
- // Job Operation metadata.
- message JobMetadata {
- // Output only. The job id.
- string job_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Most recent job status.
- JobStatus status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Operation type.
- string operation_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Job submission time.
- google.protobuf.Timestamp start_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // A request to get the resource representation for a job in a project.
- message GetJobRequest {
- // Required. The ID of the Google Cloud Platform project that the job
- // belongs to.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The Dataproc region in which to handle the request.
- string region = 3 [(google.api.field_behavior) = REQUIRED];
- // Required. The job ID.
- string job_id = 2 [(google.api.field_behavior) = REQUIRED];
- }
- // A request to list jobs in a project.
- message ListJobsRequest {
- // A matcher that specifies categories of job states.
- enum JobStateMatcher {
- // Match all jobs, regardless of state.
- ALL = 0;
- // Only match jobs in non-terminal states: PENDING, RUNNING, or
- // CANCEL_PENDING.
- ACTIVE = 1;
- // Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
- NON_ACTIVE = 2;
- }
- // Required. The ID of the Google Cloud Platform project that the job
- // belongs to.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The Dataproc region in which to handle the request.
- string region = 6 [(google.api.field_behavior) = REQUIRED];
- // Optional. The number of results to return in each response.
- int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The page token, returned by a previous call, to request the
- // next page of results.
- string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. If set, the returned jobs list includes only jobs that were
- // submitted to the named cluster.
- string cluster_name = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Specifies enumerated categories of jobs to list.
- // (default = match ALL jobs).
- //
- // If `filter` is provided, `jobStateMatcher` will be ignored.
- JobStateMatcher job_state_matcher = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A filter constraining the jobs to list. Filters are
- // case-sensitive and have the following syntax:
- //
- // [field = value] AND [field [= value]] ...
- //
- // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
- // key. **value** can be `*` to match all values.
- // `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
- // Only the logical `AND` operator is supported; space-separated items are
- // treated as having an implicit `AND` operator.
- //
- // Example filter:
- //
- // status.state = ACTIVE AND labels.env = staging AND labels.starred = *
- string filter = 7 [(google.api.field_behavior) = OPTIONAL];
- }
- // A request to update a job.
- message UpdateJobRequest {
- // Required. The ID of the Google Cloud Platform project that the job
- // belongs to.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The Dataproc region in which to handle the request.
- string region = 2 [(google.api.field_behavior) = REQUIRED];
- // Required. The job ID.
- string job_id = 3 [(google.api.field_behavior) = REQUIRED];
- // Required. The changes to the job.
- Job job = 4 [(google.api.field_behavior) = REQUIRED];
- // Required. Specifies the path, relative to <code>Job</code>, of
- // the field to update. For example, to update the labels of a Job the
- // <code>update_mask</code> parameter would be specified as
- // <code>labels</code>, and the `PATCH` request body would specify the new
- // value. <strong>Note:</strong> Currently, <code>labels</code> is the only
- // field that can be updated.
- google.protobuf.FieldMask update_mask = 5 [(google.api.field_behavior) = REQUIRED];
- }
- // A list of jobs in a project.
- message ListJobsResponse {
- // Output only. Jobs list.
- repeated Job jobs = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Optional. This token is included in the response if there are more results
- // to fetch. To fetch additional results, provide this value as the
- // `page_token` in a subsequent <code>ListJobsRequest</code>.
- string next_page_token = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // A request to cancel a job.
- message CancelJobRequest {
- // Required. The ID of the Google Cloud Platform project that the job
- // belongs to.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The Dataproc region in which to handle the request.
- string region = 3 [(google.api.field_behavior) = REQUIRED];
- // Required. The job ID.
- string job_id = 2 [(google.api.field_behavior) = REQUIRED];
- }
- // A request to delete a job.
- message DeleteJobRequest {
- // Required. The ID of the Google Cloud Platform project that the job
- // belongs to.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The Dataproc region in which to handle the request.
- string region = 3 [(google.api.field_behavior) = REQUIRED];
- // Required. The job ID.
- string job_id = 2 [(google.api.field_behavior) = REQUIRED];
- }
|