123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- // Copyright 2021 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.dataproc.v1;
- import "google/api/annotations.proto";
- import "google/api/client.proto";
- import "google/api/field_behavior.proto";
- import "google/api/resource.proto";
- import "google/cloud/dataproc/v1/shared.proto";
- import "google/longrunning/operations.proto";
- import "google/protobuf/empty.proto";
- import "google/protobuf/timestamp.proto";
- option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
- option java_multiple_files = true;
- option java_outer_classname = "BatchesProto";
- option java_package = "com.google.cloud.dataproc.v1";
- // The BatchController provides methods to manage batch workloads.
- service BatchController {
- option (google.api.default_host) = "dataproc.googleapis.com";
- option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
- // Creates a batch workload that executes asynchronously.
- rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
- option (google.api.http) = {
- post: "/v1/{parent=projects/*/locations/*}/batches"
- body: "batch"
- };
- option (google.api.method_signature) = "parent,batch,batch_id";
- option (google.longrunning.operation_info) = {
- response_type: "Batch"
- metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata"
- };
- }
- // Gets the batch workload resource representation.
- rpc GetBatch(GetBatchRequest) returns (Batch) {
- option (google.api.http) = {
- get: "/v1/{name=projects/*/locations/*/batches/*}"
- };
- option (google.api.method_signature) = "name";
- }
- // Lists batch workloads.
- rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) {
- option (google.api.http) = {
- get: "/v1/{parent=projects/*/locations/*}/batches"
- };
- option (google.api.method_signature) = "parent";
- }
- // Deletes the batch workload resource. If the batch is not in terminal state,
- // the delete fails and the response returns `FAILED_PRECONDITION`.
- rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) {
- option (google.api.http) = {
- delete: "/v1/{name=projects/*/locations/*/batches/*}"
- };
- option (google.api.method_signature) = "name";
- }
- }
- // A request to create a batch workload.
- message CreateBatchRequest {
- // Required. The parent resource where this batch will be created.
- string parent = 1 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.resource_reference) = {
- child_type: "dataproc.googleapis.com/Batch"
- }
- ];
- // Required. The batch to create.
- Batch batch = 2 [(google.api.field_behavior) = REQUIRED];
- // Optional. The ID to use for the batch, which will become the final component of
- // the batch's resource name.
- //
- // This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
- string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A unique ID used to identify the request. If the service
- // receives two
- // [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s
- // with the same request_id, the second request is ignored and the
- // Operation that corresponds to the first Batch created and stored
- // in the backend is returned.
- //
- // Recommendation: Set this value to a
- // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
- //
- // The value must contain only letters (a-z, A-Z), numbers (0-9),
- // underscores (_), and hyphens (-). The maximum length is 40 characters.
- string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
- }
- // A request to get the resource representation for a batch workload.
- message GetBatchRequest {
- // Required. The name of the batch to retrieve.
- string name = 1 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.resource_reference) = {
- type: "dataproc.googleapis.com/Batch"
- }
- ];
- }
- // A request to list batch workloads in a project.
- message ListBatchesRequest {
- // Required. The parent, which owns this collection of batches.
- string parent = 1 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.resource_reference) = {
- child_type: "dataproc.googleapis.com/Batch"
- }
- ];
- // Optional. The maximum number of batches to return in each response.
- // The service may return fewer than this value.
- // The default page size is 20; the maximum page size is 1000.
- int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A page token received from a previous `ListBatches` call.
- // Provide this token to retrieve the subsequent page.
- string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
- }
- // A list of batch workloads.
- message ListBatchesResponse {
- // The batches from the specified collection.
- repeated Batch batches = 1;
- // A token, which can be sent as `page_token` to retrieve the next page.
- // If this field is omitted, there are no subsequent pages.
- string next_page_token = 2;
- }
- // A request to delete a batch workload.
- message DeleteBatchRequest {
- // Required. The name of the batch resource to delete.
- string name = 1 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.resource_reference) = {
- type: "dataproc.googleapis.com/Batch"
- }
- ];
- }
- // A representation of a batch workload in the service.
- message Batch {
- option (google.api.resource) = {
- type: "dataproc.googleapis.com/Batch"
- pattern: "projects/{project}/locations/{location}/batches/{batch}"
- };
- // Historical state information.
- message StateHistory {
- // Output only. The state of the batch at this point in history.
- State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Details about the state at this point in history.
- string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the batch entered the historical state.
- google.protobuf.Timestamp state_start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // The batch state.
- enum State {
- // The batch state is unknown.
- STATE_UNSPECIFIED = 0;
- // The batch is created before running.
- PENDING = 1;
- // The batch is running.
- RUNNING = 2;
- // The batch is cancelling.
- CANCELLING = 3;
- // The batch cancellation was successful.
- CANCELLED = 4;
- // The batch completed successfully.
- SUCCEEDED = 5;
- // The batch is no longer running due to an error.
- FAILED = 6;
- }
- // Output only. The resource name of the batch.
- string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. A batch UUID (Unique Universal Identifier). The service
- // generates this value when it creates the batch.
- string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the batch was created.
- google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
- // The application/framework-specific portion of the batch configuration.
- oneof batch_config {
- // Optional. PySpark batch config.
- PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Spark batch config.
- SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. SparkR batch config.
- SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. SparkSql batch config.
- SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL];
- }
- // Output only. Runtime information about batch execution.
- RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The state of the batch.
- State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Batch state details, such as a failure
- // description if the state is `FAILED`.
- string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the batch entered a current state.
- google.protobuf.Timestamp state_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The email address of the user who created the batch.
- string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Optional. The labels to associate with this batch.
- // Label **keys** must contain 1 to 63 characters, and must conform to
- // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
- // Label **values** may be empty, but, if present, must contain 1 to 63
- // characters, and must conform to [RFC
- // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
- // associated with a batch.
- map<string, string> labels = 13 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Runtime configuration for the batch execution.
- RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Environment configuration for the batch execution.
- EnvironmentConfig environment_config = 15 [(google.api.field_behavior) = OPTIONAL];
- // Output only. The resource name of the operation associated with this batch.
- string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Historical state information for the batch.
- repeated StateHistory state_history = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // A configuration for running an
- // [Apache
- // PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
- // batch workload.
- message PySparkBatch {
- // Required. The HCFS URI of the main Python file to use as the Spark driver. Must
- // be a .py file.
- string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
- // Optional. The arguments to pass to the driver. Do not include arguments
- // that can be set as batch properties, such as `--conf`, since a collision
- // can occur that causes an incorrect batch submission.
- repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS file URIs of Python files to pass to the PySpark
- // framework. Supported file types: `.py`, `.egg`, and `.zip`.
- repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to add to the classpath of the
- // Spark driver and tasks.
- repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of files to be placed in the working directory of
- // each executor.
- repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of archives to be extracted into the working directory
- // of each executor. Supported file types:
- // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
- repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- }
- // A configuration for running an [Apache Spark](http://spark.apache.org/)
- // batch workload.
- message SparkBatch {
- // The specification of the main method to call to drive the Spark
- // workload. Specify either the jar file that contains the main class or the
- // main class name. To pass both a main jar and a main class in that jar, add
- // the jar to `jar_file_uris`, and then specify the main class
- // name in `main_class`.
- oneof driver {
- // Optional. The HCFS URI of the jar file that contains the main class.
- string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The name of the driver main class. The jar file that contains the class
- // must be in the classpath or specified in `jar_file_uris`.
- string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // Optional. The arguments to pass to the driver. Do not include arguments
- // that can be set as batch properties, such as `--conf`, since a collision
- // can occur that causes an incorrect batch submission.
- repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to add to the classpath of the
- // Spark driver and tasks.
- repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of files to be placed in the working directory of
- // each executor.
- repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of archives to be extracted into the working directory
- // of each executor. Supported file types:
- // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
- repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- }
- // A configuration for running an
- // [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
- // batch workload.
- message SparkRBatch {
- // Required. The HCFS URI of the main R file to use as the driver.
- // Must be a `.R` or `.r` file.
- string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
- // Optional. The arguments to pass to the Spark driver. Do not include arguments
- // that can be set as batch properties, such as `--conf`, since a collision
- // can occur that causes an incorrect batch submission.
- repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of files to be placed in the working directory of
- // each executor.
- repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of archives to be extracted into the working directory
- // of each executor. Supported file types:
- // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
- repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- }
- // A configuration for running
- // [Apache Spark SQL](http://spark.apache.org/sql/) queries as a batch workload.
- message SparkSqlBatch {
- // Required. The HCFS URI of the script that contains Spark SQL queries to execute.
- string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
- // Optional. Mapping of query variable names to values (equivalent to the
- // Spark SQL command: `SET name="value";`).
- map<string, string> query_variables = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
- repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
- }
|