tasks.proto 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.dataplex.v1;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/cloud/dataplex/v1/resources.proto";
  19. import "google/protobuf/duration.proto";
  20. import "google/protobuf/timestamp.proto";
  21. option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
  22. option java_multiple_files = true;
  23. option java_outer_classname = "TasksProto";
  24. option java_package = "com.google.cloud.dataplex.v1";
  25. // A task represents a user-visible job.
  26. message Task {
  27. option (google.api.resource) = {
  28. type: "dataplex.googleapis.com/Task"
  29. pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}"
  30. };
  31. // Configuration for the underlying infrastructure used to run workloads.
  32. message InfrastructureSpec {
  33. // Batch compute resources associated with the task.
  34. message BatchComputeResources {
  35. // Optional. Total number of job executors.
  36. // Executor Count should be between 2 and 100. [Default=2]
  37. int32 executors_count = 1 [(google.api.field_behavior) = OPTIONAL];
  38. // Optional. Max configurable executors.
  39. // If max_executors_count > executors_count, then auto-scaling is enabled.
  40. // Max Executor Count should be between 2 and 1000. [Default=1000]
  41. int32 max_executors_count = 2 [(google.api.field_behavior) = OPTIONAL];
  42. }
  43. // Container Image Runtime Configuration used with Batch execution.
  44. message ContainerImageRuntime {
  45. // Optional. Container image to use.
  46. string image = 1 [(google.api.field_behavior) = OPTIONAL];
  47. // Optional. A list of Java JARS to add to the classpath.
  48. // Valid input includes Cloud Storage URIs to Jar binaries.
  49. // For example, gs://bucket-name/my/path/to/file.jar
  50. repeated string java_jars = 2 [(google.api.field_behavior) = OPTIONAL];
  51. // Optional. A list of python packages to be installed.
  52. // Valid formats include Cloud Storage URI to a PIP installable library.
  53. // For example, gs://bucket-name/my/path/to/lib.tar.gz
  54. repeated string python_packages = 3 [(google.api.field_behavior) = OPTIONAL];
  55. // Optional. Override to common configuration of open source components installed on
  56. // the Dataproc cluster.
  57. // The properties to set on daemon config files.
  58. // Property keys are specified in `prefix:property` format, for example
  59. // `core:hadoop.tmp.dir`.
  60. // For more information, see [Cluster
  61. // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
  62. map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL];
  63. }
  64. // Cloud VPC Network used to run the infrastructure.
  65. message VpcNetwork {
  66. // The Cloud VPC network identifier.
  67. oneof network_name {
  68. // Optional. The Cloud VPC network in which the job is run. By default, the Cloud
  69. // VPC network named Default within the project is used.
  70. string network = 1 [(google.api.field_behavior) = OPTIONAL];
  71. // Optional. The Cloud VPC sub-network in which the job is run.
  72. string sub_network = 2 [(google.api.field_behavior) = OPTIONAL];
  73. }
  74. // Optional. List of network tags to apply to the job.
  75. repeated string network_tags = 3 [(google.api.field_behavior) = OPTIONAL];
  76. }
  77. // Hardware config.
  78. oneof resources {
  79. // Compute resources needed for a Task when using Dataproc Serverless.
  80. BatchComputeResources batch = 52;
  81. }
  82. // Software config.
  83. oneof runtime {
  84. // Container Image Runtime Configuration.
  85. ContainerImageRuntime container_image = 101;
  86. }
  87. // Networking config.
  88. oneof network {
  89. // Vpc network.
  90. VpcNetwork vpc_network = 150;
  91. }
  92. }
  93. // Task scheduling and trigger settings.
  94. message TriggerSpec {
  95. // Determines how often and when the job will run.
  96. enum Type {
  97. // Unspecified trigger type.
  98. TYPE_UNSPECIFIED = 0;
  99. // The task runs one-time shortly after Task Creation.
  100. ON_DEMAND = 1;
  101. // The task is scheduled to run periodically.
  102. RECURRING = 2;
  103. }
  104. // Required. Immutable. Trigger type of the user-specified Task.
  105. Type type = 5 [
  106. (google.api.field_behavior) = REQUIRED,
  107. (google.api.field_behavior) = IMMUTABLE
  108. ];
  109. // Optional. The first run of the task will be after this time.
  110. // If not specified, the task will run shortly after being submitted if
  111. // ON_DEMAND and based on the schedule if RECURRING.
  112. google.protobuf.Timestamp start_time = 6 [(google.api.field_behavior) = OPTIONAL];
  113. // Optional. Prevent the task from executing.
  114. // This does not cancel already running tasks. It is intended to temporarily
  115. // disable RECURRING tasks.
  116. bool disabled = 4 [(google.api.field_behavior) = OPTIONAL];
  117. // Optional. Number of retry attempts before aborting.
  118. // Set to zero to never attempt to retry a failed task.
  119. int32 max_retries = 7 [(google.api.field_behavior) = OPTIONAL];
  120. // Trigger only applies for RECURRING tasks.
  121. oneof trigger {
  122. // Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running
  123. // tasks periodically.
  124. // To explicitly set a timezone to the cron tab, apply a prefix in the
  125. // cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
  126. // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone
  127. // database. For example, "CRON_TZ=America/New_York 1 * * * *", or
  128. // "TZ=America/New_York 1 * * * *".
  129. // This field is required for RECURRING tasks.
  130. string schedule = 100 [(google.api.field_behavior) = OPTIONAL];
  131. }
  132. }
  133. // Execution related settings, like retry and service_account.
  134. message ExecutionSpec {
  135. // Optional. The arguments to pass to the task.
  136. // The args can use placeholders of the format ${placeholder} as
  137. // part of key/value string. These will be interpolated before passing the
  138. // args to the driver. Currently supported placeholders:
  139. // - ${task_id}
  140. // - ${job_time}
  141. // To pass positional args, set the key as TASK_ARGS. The value should be a
  142. // comma-separated string of all the positional arguments. To use a
  143. // delimiter other than comma, refer to
  144. // https://cloud.google.com/sdk/gcloud/reference/topic/escaping. In case of
  145. // other keys being present in the args, then TASK_ARGS will be passed as
  146. // the last argument.
  147. map<string, string> args = 4 [(google.api.field_behavior) = OPTIONAL];
  148. // Required. Service account to use to execute a task.
  149. // If not provided, the default Compute service account for the project is
  150. // used.
  151. string service_account = 5 [(google.api.field_behavior) = REQUIRED];
  152. // Optional. The project in which jobs are run. By default, the project containing the
  153. // Lake is used. If a project is provided, the
  154. // [ExecutionSpec.service_account][google.cloud.dataplex.v1.Task.ExecutionSpec.service_account] must belong to this project.
  155. string project = 7 [(google.api.field_behavior) = OPTIONAL];
  156. // Optional. The maximum duration after which the job execution is expired.
  157. google.protobuf.Duration max_job_execution_lifetime = 8 [(google.api.field_behavior) = OPTIONAL];
  158. // Optional. The Cloud KMS key to use for encryption, of the form:
  159. // `projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}`.
  160. string kms_key = 9 [(google.api.field_behavior) = OPTIONAL];
  161. }
  162. // User-specified config for running a Spark task.
  163. message SparkTaskConfig {
  164. // Required. The specification of the main method to call to drive the
  165. // job. Specify either the jar file that contains the main class or the
  166. // main class name.
  167. oneof driver {
  168. // The Cloud Storage URI of the jar file that contains the main class.
  169. // The execution args are passed in as a sequence of named process
  170. // arguments (`--key=value`).
  171. string main_jar_file_uri = 100;
  172. // The name of the driver's main class. The jar file that contains the
  173. // class must be in the default CLASSPATH or specified in
  174. // `jar_file_uris`.
  175. // The execution args are passed in as a sequence of named process
  176. // arguments (`--key=value`).
  177. string main_class = 101;
  178. // The Gcloud Storage URI of the main Python file to use as the driver.
  179. // Must be a .py file. The execution args are passed in as a sequence of
  180. // named process arguments (`--key=value`).
  181. string python_script_file = 102;
  182. // A reference to a query file. This can be the Cloud Storage URI of the
  183. // query file or it can the path to a SqlScript Content. The execution
  184. // args are used to declare a set of script variables
  185. // (`set key="value";`).
  186. string sql_script_file = 104;
  187. // The query text.
  188. // The execution args are used to declare a set of script variables
  189. // (`set key="value";`).
  190. string sql_script = 105;
  191. }
  192. // Optional. Cloud Storage URIs of files to be placed in the working directory of each
  193. // executor.
  194. repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
  195. // Optional. Cloud Storage URIs of archives to be extracted into the working directory
  196. // of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
  197. // .zip.
  198. repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
  199. // Optional. Infrastructure specification for the execution.
  200. InfrastructureSpec infrastructure_spec = 6 [(google.api.field_behavior) = OPTIONAL];
  201. }
  202. // Config for running scheduled notebooks.
  203. message NotebookTaskConfig {
  204. // Required. Path to input notebook. This can be the Cloud Storage URI of the notebook
  205. // file or the path to a Notebook Content. The execution args are accessible
  206. // as environment variables
  207. // (`TASK_key=value`).
  208. string notebook = 4 [(google.api.field_behavior) = REQUIRED];
  209. // Optional. Infrastructure specification for the execution.
  210. InfrastructureSpec infrastructure_spec = 3 [(google.api.field_behavior) = OPTIONAL];
  211. // Optional. Cloud Storage URIs of files to be placed in the working directory of each
  212. // executor.
  213. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
  214. // Optional. Cloud Storage URIs of archives to be extracted into the working directory
  215. // of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
  216. // .zip.
  217. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
  218. }
  219. // Status of the task execution (e.g. Jobs).
  220. message ExecutionStatus {
  221. // Output only. Last update time of the status.
  222. google.protobuf.Timestamp update_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  223. // Output only. latest job execution
  224. Job latest_job = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
  225. }
  226. // Output only. The relative resource name of the task, of the form:
  227. // projects/{project_number}/locations/{location_id}/lakes/{lake_id}/
  228. // tasks/{task_id}.
  229. string name = 1 [
  230. (google.api.field_behavior) = OUTPUT_ONLY,
  231. (google.api.resource_reference) = {
  232. type: "dataplex.googleapis.com/Task"
  233. }
  234. ];
  235. // Output only. System generated globally unique ID for the task. This ID will be
  236. // different if the task is deleted and re-created with the same name.
  237. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  238. // Output only. The time when the task was created.
  239. google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  240. // Output only. The time when the task was last updated.
  241. google.protobuf.Timestamp update_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  242. // Optional. Description of the task.
  243. string description = 5 [(google.api.field_behavior) = OPTIONAL];
  244. // Optional. User friendly display name.
  245. string display_name = 6 [(google.api.field_behavior) = OPTIONAL];
  246. // Output only. Current state of the task.
  247. State state = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
  248. // Optional. User-defined labels for the task.
  249. map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
  250. // Required. Spec related to how often and when a task should be triggered.
  251. TriggerSpec trigger_spec = 100 [(google.api.field_behavior) = REQUIRED];
  252. // Required. Spec related to how a task is executed.
  253. ExecutionSpec execution_spec = 101 [(google.api.field_behavior) = REQUIRED];
  254. // Output only. Status of the latest task executions.
  255. ExecutionStatus execution_status = 201 [(google.api.field_behavior) = OUTPUT_ONLY];
  256. // Task template specific user-specified config.
  257. oneof config {
  258. // Config related to running custom Spark tasks.
  259. SparkTaskConfig spark = 300;
  260. // Config related to running scheduled Notebooks.
  261. NotebookTaskConfig notebook = 302;
  262. }
  263. }
  264. // A job represents an instance of a task.
  265. message Job {
  266. option (google.api.resource) = {
  267. type: "dataplex.googleapis.com/Job"
  268. pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}/jobs/{job}"
  269. };
  270. enum Service {
  271. // Service used to run the job is unspecified.
  272. SERVICE_UNSPECIFIED = 0;
  273. // Dataproc service is used to run this job.
  274. DATAPROC = 1;
  275. }
  276. enum State {
  277. // The job state is unknown.
  278. STATE_UNSPECIFIED = 0;
  279. // The job is running.
  280. RUNNING = 1;
  281. // The job is cancelling.
  282. CANCELLING = 2;
  283. // The job cancellation was successful.
  284. CANCELLED = 3;
  285. // The job completed successfully.
  286. SUCCEEDED = 4;
  287. // The job is no longer running due to an error.
  288. FAILED = 5;
  289. // The job was cancelled outside of Dataplex.
  290. ABORTED = 6;
  291. }
  292. // Output only. The relative resource name of the job, of the form:
  293. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/tasks/{task_id}/jobs/{job_id}`.
  294. string name = 1 [
  295. (google.api.field_behavior) = OUTPUT_ONLY,
  296. (google.api.resource_reference) = {
  297. type: "dataplex.googleapis.com/Job"
  298. }
  299. ];
  300. // Output only. System generated globally unique ID for the job.
  301. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  302. // Output only. The time when the job was started.
  303. google.protobuf.Timestamp start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  304. // Output only. The time when the job ended.
  305. google.protobuf.Timestamp end_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  306. // Output only. Execution state for the job.
  307. State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  308. // Output only. The number of times the job has been retried (excluding the
  309. // initial attempt).
  310. uint32 retry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
  311. // Output only. The underlying service running a job.
  312. Service service = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
  313. // Output only. The full resource name for the job run under a particular service.
  314. string service_job = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
  315. // Output only. Additional information about the current state.
  316. string message = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
  317. }