batches.proto 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. // Copyright 2021 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.dataproc.v1;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/api/field_behavior.proto";
  19. import "google/api/resource.proto";
  20. import "google/cloud/dataproc/v1/shared.proto";
  21. import "google/longrunning/operations.proto";
  22. import "google/protobuf/empty.proto";
  23. import "google/protobuf/timestamp.proto";
  24. option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
  25. option java_multiple_files = true;
  26. option java_outer_classname = "BatchesProto";
  27. option java_package = "com.google.cloud.dataproc.v1";
  28. // The BatchController provides methods to manage batch workloads.
  29. service BatchController {
  30. option (google.api.default_host) = "dataproc.googleapis.com";
  31. option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
  32. // Creates a batch workload that executes asynchronously.
  33. rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
  34. option (google.api.http) = {
  35. post: "/v1/{parent=projects/*/locations/*}/batches"
  36. body: "batch"
  37. };
  38. option (google.api.method_signature) = "parent,batch,batch_id";
  39. option (google.longrunning.operation_info) = {
  40. response_type: "Batch"
  41. metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata"
  42. };
  43. }
  44. // Gets the batch workload resource representation.
  45. rpc GetBatch(GetBatchRequest) returns (Batch) {
  46. option (google.api.http) = {
  47. get: "/v1/{name=projects/*/locations/*/batches/*}"
  48. };
  49. option (google.api.method_signature) = "name";
  50. }
  51. // Lists batch workloads.
  52. rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) {
  53. option (google.api.http) = {
  54. get: "/v1/{parent=projects/*/locations/*}/batches"
  55. };
  56. option (google.api.method_signature) = "parent";
  57. }
  58. // Deletes the batch workload resource. If the batch is not in terminal state,
  59. // the delete fails and the response returns `FAILED_PRECONDITION`.
  60. rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) {
  61. option (google.api.http) = {
  62. delete: "/v1/{name=projects/*/locations/*/batches/*}"
  63. };
  64. option (google.api.method_signature) = "name";
  65. }
  66. }
  67. // A request to create a batch workload.
  68. message CreateBatchRequest {
  69. // Required. The parent resource where this batch will be created.
  70. string parent = 1 [
  71. (google.api.field_behavior) = REQUIRED,
  72. (google.api.resource_reference) = {
  73. child_type: "dataproc.googleapis.com/Batch"
  74. }
  75. ];
  76. // Required. The batch to create.
  77. Batch batch = 2 [(google.api.field_behavior) = REQUIRED];
  78. // Optional. The ID to use for the batch, which will become the final component of
  79. // the batch's resource name.
  80. //
  81. // This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
  82. string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];
  83. // Optional. A unique ID used to identify the request. If the service
  84. // receives two
  85. // [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s
  86. // with the same request_id, the second request is ignored and the
  87. // Operation that corresponds to the first Batch created and stored
  88. // in the backend is returned.
  89. //
  90. // Recommendation: Set this value to a
  91. // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
  92. //
  93. // The value must contain only letters (a-z, A-Z), numbers (0-9),
  94. // underscores (_), and hyphens (-). The maximum length is 40 characters.
  95. string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
  96. }
  97. // A request to get the resource representation for a batch workload.
  98. message GetBatchRequest {
  99. // Required. The name of the batch to retrieve.
  100. string name = 1 [
  101. (google.api.field_behavior) = REQUIRED,
  102. (google.api.resource_reference) = {
  103. type: "dataproc.googleapis.com/Batch"
  104. }
  105. ];
  106. }
  107. // A request to list batch workloads in a project.
  108. message ListBatchesRequest {
  109. // Required. The parent, which owns this collection of batches.
  110. string parent = 1 [
  111. (google.api.field_behavior) = REQUIRED,
  112. (google.api.resource_reference) = {
  113. child_type: "dataproc.googleapis.com/Batch"
  114. }
  115. ];
  116. // Optional. The maximum number of batches to return in each response.
  117. // The service may return fewer than this value.
  118. // The default page size is 20; the maximum page size is 1000.
  119. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
  120. // Optional. A page token received from a previous `ListBatches` call.
  121. // Provide this token to retrieve the subsequent page.
  122. string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
  123. }
  124. // A list of batch workloads.
  125. message ListBatchesResponse {
  126. // The batches from the specified collection.
  127. repeated Batch batches = 1;
  128. // A token, which can be sent as `page_token` to retrieve the next page.
  129. // If this field is omitted, there are no subsequent pages.
  130. string next_page_token = 2;
  131. }
  132. // A request to delete a batch workload.
  133. message DeleteBatchRequest {
  134. // Required. The name of the batch resource to delete.
  135. string name = 1 [
  136. (google.api.field_behavior) = REQUIRED,
  137. (google.api.resource_reference) = {
  138. type: "dataproc.googleapis.com/Batch"
  139. }
  140. ];
  141. }
  142. // A representation of a batch workload in the service.
  143. message Batch {
  144. option (google.api.resource) = {
  145. type: "dataproc.googleapis.com/Batch"
  146. pattern: "projects/{project}/locations/{location}/batches/{batch}"
  147. };
  148. // Historical state information.
  149. message StateHistory {
  150. // Output only. The state of the batch at this point in history.
  151. State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  152. // Output only. Details about the state at this point in history.
  153. string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  154. // Output only. The time when the batch entered the historical state.
  155. google.protobuf.Timestamp state_start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  156. }
  157. // The batch state.
  158. enum State {
  159. // The batch state is unknown.
  160. STATE_UNSPECIFIED = 0;
  161. // The batch is created before running.
  162. PENDING = 1;
  163. // The batch is running.
  164. RUNNING = 2;
  165. // The batch is cancelling.
  166. CANCELLING = 3;
  167. // The batch cancellation was successful.
  168. CANCELLED = 4;
  169. // The batch completed successfully.
  170. SUCCEEDED = 5;
  171. // The batch is no longer running due to an error.
  172. FAILED = 6;
  173. }
  174. // Output only. The resource name of the batch.
  175. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  176. // Output only. A batch UUID (Unique Universal Identifier). The service
  177. // generates this value when it creates the batch.
  178. string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  179. // Output only. The time when the batch was created.
  180. google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  181. // The application/framework-specific portion of the batch configuration.
  182. oneof batch_config {
  183. // Optional. PySpark batch config.
  184. PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL];
  185. // Optional. Spark batch config.
  186. SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL];
  187. // Optional. SparkR batch config.
  188. SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL];
  189. // Optional. SparkSql batch config.
  190. SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL];
  191. }
  192. // Output only. Runtime information about batch execution.
  193. RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
  194. // Output only. The state of the batch.
  195. State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
  196. // Output only. Batch state details, such as a failure
  197. // description if the state is `FAILED`.
  198. string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
  199. // Output only. The time when the batch entered a current state.
  200. google.protobuf.Timestamp state_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
  201. // Output only. The email address of the user who created the batch.
  202. string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
  203. // Optional. The labels to associate with this batch.
  204. // Label **keys** must contain 1 to 63 characters, and must conform to
  205. // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
  206. // Label **values** may be empty, but, if present, must contain 1 to 63
  207. // characters, and must conform to [RFC
  208. // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
  209. // associated with a batch.
  210. map<string, string> labels = 13 [(google.api.field_behavior) = OPTIONAL];
  211. // Optional. Runtime configuration for the batch execution.
  212. RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];
  213. // Optional. Environment configuration for the batch execution.
  214. EnvironmentConfig environment_config = 15 [(google.api.field_behavior) = OPTIONAL];
  215. // Output only. The resource name of the operation associated with this batch.
  216. string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];
  217. // Output only. Historical state information for the batch.
  218. repeated StateHistory state_history = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
  219. }
  220. // A configuration for running an
  221. // [Apache
  222. // PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
  223. // batch workload.
  224. message PySparkBatch {
  225. // Required. The HCFS URI of the main Python file to use as the Spark driver. Must
  226. // be a .py file.
  227. string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
  228. // Optional. The arguments to pass to the driver. Do not include arguments
  229. // that can be set as batch properties, such as `--conf`, since a collision
  230. // can occur that causes an incorrect batch submission.
  231. repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
  232. // Optional. HCFS file URIs of Python files to pass to the PySpark
  233. // framework. Supported file types: `.py`, `.egg`, and `.zip`.
  234. repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
  235. // Optional. HCFS URIs of jar files to add to the classpath of the
  236. // Spark driver and tasks.
  237. repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  238. // Optional. HCFS URIs of files to be placed in the working directory of
  239. // each executor.
  240. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
  241. // Optional. HCFS URIs of archives to be extracted into the working directory
  242. // of each executor. Supported file types:
  243. // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
  244. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  245. }
  246. // A configuration for running an [Apache Spark](http://spark.apache.org/)
  247. // batch workload.
  248. message SparkBatch {
  249. // The specification of the main method to call to drive the Spark
  250. // workload. Specify either the jar file that contains the main class or the
  251. // main class name. To pass both a main jar and a main class in that jar, add
  252. // the jar to `jar_file_uris`, and then specify the main class
  253. // name in `main_class`.
  254. oneof driver {
  255. // Optional. The HCFS URI of the jar file that contains the main class.
  256. string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];
  257. // Optional. The name of the driver main class. The jar file that contains the class
  258. // must be in the classpath or specified in `jar_file_uris`.
  259. string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
  260. }
  261. // Optional. The arguments to pass to the driver. Do not include arguments
  262. // that can be set as batch properties, such as `--conf`, since a collision
  263. // can occur that causes an incorrect batch submission.
  264. repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
  265. // Optional. HCFS URIs of jar files to add to the classpath of the
  266. // Spark driver and tasks.
  267. repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  268. // Optional. HCFS URIs of files to be placed in the working directory of
  269. // each executor.
  270. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
  271. // Optional. HCFS URIs of archives to be extracted into the working directory
  272. // of each executor. Supported file types:
  273. // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
  274. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  275. }
  276. // A configuration for running an
  277. // [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
  278. // batch workload.
  279. message SparkRBatch {
  280. // Required. The HCFS URI of the main R file to use as the driver.
  281. // Must be a `.R` or `.r` file.
  282. string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
  283. // Optional. The arguments to pass to the Spark driver. Do not include arguments
  284. // that can be set as batch properties, such as `--conf`, since a collision
  285. // can occur that causes an incorrect batch submission.
  286. repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
  287. // Optional. HCFS URIs of files to be placed in the working directory of
  288. // each executor.
  289. repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
  290. // Optional. HCFS URIs of archives to be extracted into the working directory
  291. // of each executor. Supported file types:
  292. // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
  293. repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  294. }
  295. // A configuration for running
  296. // [Apache Spark SQL](http://spark.apache.org/sql/) queries as a batch workload.
  297. message SparkSqlBatch {
  298. // Required. The HCFS URI of the script that contains Spark SQL queries to execute.
  299. string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
  300. // Optional. Mapping of query variable names to values (equivalent to the
  301. // Spark SQL command: `SET name="value";`).
  302. map<string, string> query_variables = 2 [(google.api.field_behavior) = OPTIONAL];
  303. // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
  304. repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
  305. }