123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.dataplex.v1;
- import "google/api/field_behavior.proto";
- import "google/api/resource.proto";
- import "google/cloud/dataplex/v1/resources.proto";
- import "google/protobuf/duration.proto";
- import "google/protobuf/timestamp.proto";
- option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
- option java_multiple_files = true;
- option java_outer_classname = "TasksProto";
- option java_package = "com.google.cloud.dataplex.v1";
- // A task represents a user-visible job.
- message Task {
- option (google.api.resource) = {
- type: "dataplex.googleapis.com/Task"
- pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}"
- };
- // Configuration for the underlying infrastructure used to run workloads.
- message InfrastructureSpec {
- // Batch compute resources associated with the task.
- message BatchComputeResources {
- // Optional. Total number of job executors.
- // Executor Count should be between 2 and 100. [Default=2]
- int32 executors_count = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Max configurable executors.
- // If max_executors_count > executors_count, then auto-scaling is enabled.
- // Max Executor Count should be between 2 and 1000. [Default=1000]
- int32 max_executors_count = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // Container Image Runtime Configuration used with Batch execution.
- message ContainerImageRuntime {
- // Optional. Container image to use.
- string image = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A list of Java JARS to add to the classpath.
- // Valid input includes Cloud Storage URIs to Jar binaries.
- // For example, gs://bucket-name/my/path/to/file.jar
- repeated string java_jars = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A list of python packages to be installed.
- // Valid formats include Cloud Storage URI to a PIP installable library.
- // For example, gs://bucket-name/my/path/to/lib.tar.gz
- repeated string python_packages = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Override to common configuration of open source components installed on
- // the Dataproc cluster.
- // The properties to set on daemon config files.
- // Property keys are specified in `prefix:property` format, for example
- // `core:hadoop.tmp.dir`.
- // For more information, see [Cluster
- // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
- map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL];
- }
- // Cloud VPC Network used to run the infrastructure.
- message VpcNetwork {
- // The Cloud VPC network identifier.
- oneof network_name {
- // Optional. The Cloud VPC network in which the job is run. By default, the Cloud
- // VPC network named Default within the project is used.
- string network = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The Cloud VPC sub-network in which the job is run.
- string sub_network = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // Optional. List of network tags to apply to the job.
- repeated string network_tags = 3 [(google.api.field_behavior) = OPTIONAL];
- }
- // Hardware config.
- oneof resources {
- // Compute resources needed for a Task when using Dataproc Serverless.
- BatchComputeResources batch = 52;
- }
- // Software config.
- oneof runtime {
- // Container Image Runtime Configuration.
- ContainerImageRuntime container_image = 101;
- }
- // Networking config.
- oneof network {
- // Vpc network.
- VpcNetwork vpc_network = 150;
- }
- }
- // Task scheduling and trigger settings.
- message TriggerSpec {
- // Determines how often and when the job will run.
- enum Type {
- // Unspecified trigger type.
- TYPE_UNSPECIFIED = 0;
- // The task runs one-time shortly after Task Creation.
- ON_DEMAND = 1;
- // The task is scheduled to run periodically.
- RECURRING = 2;
- }
- // Required. Immutable. Trigger type of the user-specified Task.
- Type type = 5 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.field_behavior) = IMMUTABLE
- ];
- // Optional. The first run of the task will be after this time.
- // If not specified, the task will run shortly after being submitted if
- // ON_DEMAND and based on the schedule if RECURRING.
- google.protobuf.Timestamp start_time = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Prevent the task from executing.
- // This does not cancel already running tasks. It is intended to temporarily
- // disable RECURRING tasks.
- bool disabled = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Number of retry attempts before aborting.
- // Set to zero to never attempt to retry a failed task.
- int32 max_retries = 7 [(google.api.field_behavior) = OPTIONAL];
- // Trigger only applies for RECURRING tasks.
- oneof trigger {
- // Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running
- // tasks periodically.
- // To explicitly set a timezone to the cron tab, apply a prefix in the
- // cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
- // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone
- // database. For example, "CRON_TZ=America/New_York 1 * * * *", or
- // "TZ=America/New_York 1 * * * *".
- // This field is required for RECURRING tasks.
- string schedule = 100 [(google.api.field_behavior) = OPTIONAL];
- }
- }
- // Execution related settings, like retry and service_account.
- message ExecutionSpec {
- // Optional. The arguments to pass to the task.
- // The args can use placeholders of the format ${placeholder} as
- // part of key/value string. These will be interpolated before passing the
- // args to the driver. Currently supported placeholders:
- // - ${task_id}
- // - ${job_time}
- // To pass positional args, set the key as TASK_ARGS. The value should be a
- // comma-separated string of all the positional arguments. To use a
- // delimiter other than comma, refer to
- // https://cloud.google.com/sdk/gcloud/reference/topic/escaping. In case of
- // other keys being present in the args, then TASK_ARGS will be passed as
- // the last argument.
- map<string, string> args = 4 [(google.api.field_behavior) = OPTIONAL];
- // Required. Service account to use to execute a task.
- // If not provided, the default Compute service account for the project is
- // used.
- string service_account = 5 [(google.api.field_behavior) = REQUIRED];
- // Optional. The project in which jobs are run. By default, the project containing the
- // Lake is used. If a project is provided, the
- // [ExecutionSpec.service_account][google.cloud.dataplex.v1.Task.ExecutionSpec.service_account] must belong to this project.
- string project = 7 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The maximum duration after which the job execution is expired.
- google.protobuf.Duration max_job_execution_lifetime = 8 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The Cloud KMS key to use for encryption, of the form:
- // `projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}`.
- string kms_key = 9 [(google.api.field_behavior) = OPTIONAL];
- }
- // User-specified config for running a Spark task.
- message SparkTaskConfig {
- // Required. The specification of the main method to call to drive the
- // job. Specify either the jar file that contains the main class or the
- // main class name.
- oneof driver {
- // The Cloud Storage URI of the jar file that contains the main class.
- // The execution args are passed in as a sequence of named process
- // arguments (`--key=value`).
- string main_jar_file_uri = 100;
- // The name of the driver's main class. The jar file that contains the
- // class must be in the default CLASSPATH or specified in
- // `jar_file_uris`.
- // The execution args are passed in as a sequence of named process
- // arguments (`--key=value`).
- string main_class = 101;
- // The Gcloud Storage URI of the main Python file to use as the driver.
- // Must be a .py file. The execution args are passed in as a sequence of
- // named process arguments (`--key=value`).
- string python_script_file = 102;
- // A reference to a query file. This can be the Cloud Storage URI of the
- // query file or it can the path to a SqlScript Content. The execution
- // args are used to declare a set of script variables
- // (`set key="value";`).
- string sql_script_file = 104;
- // The query text.
- // The execution args are used to declare a set of script variables
- // (`set key="value";`).
- string sql_script = 105;
- }
- // Optional. Cloud Storage URIs of files to be placed in the working directory of each
- // executor.
- repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Cloud Storage URIs of archives to be extracted into the working directory
- // of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
- // .zip.
- repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Infrastructure specification for the execution.
- InfrastructureSpec infrastructure_spec = 6 [(google.api.field_behavior) = OPTIONAL];
- }
- // Config for running scheduled notebooks.
- message NotebookTaskConfig {
- // Required. Path to input notebook. This can be the Cloud Storage URI of the notebook
- // file or the path to a Notebook Content. The execution args are accessible
- // as environment variables
- // (`TASK_key=value`).
- string notebook = 4 [(google.api.field_behavior) = REQUIRED];
- // Optional. Infrastructure specification for the execution.
- InfrastructureSpec infrastructure_spec = 3 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Cloud Storage URIs of files to be placed in the working directory of each
- // executor.
- repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Cloud Storage URIs of archives to be extracted into the working directory
- // of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
- // .zip.
- repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
- }
- // Status of the task execution (e.g. Jobs).
- message ExecutionStatus {
- // Output only. Last update time of the status.
- google.protobuf.Timestamp update_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. latest job execution
- Job latest_job = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // Output only. The relative resource name of the task, of the form:
- // projects/{project_number}/locations/{location_id}/lakes/{lake_id}/
- // tasks/{task_id}.
- string name = 1 [
- (google.api.field_behavior) = OUTPUT_ONLY,
- (google.api.resource_reference) = {
- type: "dataplex.googleapis.com/Task"
- }
- ];
- // Output only. System generated globally unique ID for the task. This ID will be
- // different if the task is deleted and re-created with the same name.
- string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the task was created.
- google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the task was last updated.
- google.protobuf.Timestamp update_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Optional. Description of the task.
- string description = 5 [(google.api.field_behavior) = OPTIONAL];
- // Optional. User friendly display name.
- string display_name = 6 [(google.api.field_behavior) = OPTIONAL];
- // Output only. Current state of the task.
- State state = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Optional. User-defined labels for the task.
- map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
- // Required. Spec related to how often and when a task should be triggered.
- TriggerSpec trigger_spec = 100 [(google.api.field_behavior) = REQUIRED];
- // Required. Spec related to how a task is executed.
- ExecutionSpec execution_spec = 101 [(google.api.field_behavior) = REQUIRED];
- // Output only. Status of the latest task executions.
- ExecutionStatus execution_status = 201 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Task template specific user-specified config.
- oneof config {
- // Config related to running custom Spark tasks.
- SparkTaskConfig spark = 300;
- // Config related to running scheduled Notebooks.
- NotebookTaskConfig notebook = 302;
- }
- }
- // A job represents an instance of a task.
- message Job {
- option (google.api.resource) = {
- type: "dataplex.googleapis.com/Job"
- pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}/jobs/{job}"
- };
- enum Service {
- // Service used to run the job is unspecified.
- SERVICE_UNSPECIFIED = 0;
- // Dataproc service is used to run this job.
- DATAPROC = 1;
- }
- enum State {
- // The job state is unknown.
- STATE_UNSPECIFIED = 0;
- // The job is running.
- RUNNING = 1;
- // The job is cancelling.
- CANCELLING = 2;
- // The job cancellation was successful.
- CANCELLED = 3;
- // The job completed successfully.
- SUCCEEDED = 4;
- // The job is no longer running due to an error.
- FAILED = 5;
- // The job was cancelled outside of Dataplex.
- ABORTED = 6;
- }
- // Output only. The relative resource name of the job, of the form:
- // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/tasks/{task_id}/jobs/{job_id}`.
- string name = 1 [
- (google.api.field_behavior) = OUTPUT_ONLY,
- (google.api.resource_reference) = {
- type: "dataplex.googleapis.com/Job"
- }
- ];
- // Output only. System generated globally unique ID for the job.
- string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the job was started.
- google.protobuf.Timestamp start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the job ended.
- google.protobuf.Timestamp end_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Execution state for the job.
- State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The number of times the job has been retried (excluding the
- // initial attempt).
- uint32 retry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The underlying service running a job.
- Service service = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The full resource name for the job run under a particular service.
- string service_job = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Additional information about the current state.
- string message = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
|