| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279 | // Copyright 2022 Google LLC//// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////     http://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.syntax = "proto3";package google.cloud.aiplatform.v1;import "google/api/field_behavior.proto";import "google/api/resource.proto";import "google/cloud/aiplatform/v1/encryption_spec.proto";import "google/cloud/aiplatform/v1/env_var.proto";import "google/cloud/aiplatform/v1/io.proto";import "google/cloud/aiplatform/v1/job_state.proto";import "google/cloud/aiplatform/v1/machine_resources.proto";import "google/protobuf/duration.proto";import "google/protobuf/timestamp.proto";import "google/rpc/status.proto";option csharp_namespace = "Google.Cloud.AIPlatform.V1";option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform";option java_multiple_files = true;option java_outer_classname = "CustomJobProto";option java_package = "com.google.cloud.aiplatform.v1";option php_namespace = "Google\\Cloud\\AIPlatform\\V1";option ruby_package = "Google::Cloud::AIPlatform::V1";// Represents a job that runs custom workloads such as a Docker container or a// Python package. A CustomJob can have multiple worker pools and each worker// pool can have its own machine and input spec. A CustomJob will be cleaned up// once the job enters terminal state (failed or succeeded).message CustomJob {  option (google.api.resource) = {    type: "aiplatform.googleapis.com/CustomJob"    pattern: "projects/{project}/locations/{location}/customJobs/{custom_job}"  };  // Output only. Resource name of a CustomJob.  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];  // Required. The display name of the CustomJob.  // The name can be up to 128 characters long and can be consist of any UTF-8  // characters.  string display_name = 2 [(google.api.field_behavior) = REQUIRED];  // Required. Job spec.  CustomJobSpec job_spec = 4 [(google.api.field_behavior) = REQUIRED];  // Output only. The detailed state of the job.  JobState state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];  // Output only. Time when the CustomJob was created.  google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];  // Output only. Time when the CustomJob for the first time entered the  // `JOB_STATE_RUNNING` state.  google.protobuf.Timestamp start_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY];  // Output only. Time when the CustomJob entered any of the following states:  // `JOB_STATE_SUCCEEDED`, `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`.  google.protobuf.Timestamp end_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY];  // Output only. Time when the CustomJob was most recently updated.  google.protobuf.Timestamp update_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];  // Output only. Only populated when job's state is `JOB_STATE_FAILED` or  // `JOB_STATE_CANCELLED`.  google.rpc.Status error = 10 [(google.api.field_behavior) = OUTPUT_ONLY];  // The labels with user-defined metadata to organize CustomJobs.  //  // Label keys and values can be no longer than 64 characters  // (Unicode codepoints), can only contain lowercase letters, numeric  // characters, underscores and dashes. International characters are allowed.  //  // See https://goo.gl/xmQnxf for more information and examples of labels.  map<string, string> labels = 11;  // Customer-managed encryption key options for a CustomJob. If this is set,  // then all resources created by the CustomJob will be encrypted with the  // provided encryption key.  EncryptionSpec encryption_spec = 12;  // Output only. URIs for accessing [interactive  // shells](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell)  // (one URI for each training node). Only available if  // [job_spec.enable_web_access][google.cloud.aiplatform.v1.CustomJobSpec.enable_web_access] is `true`.  //  // The keys are names of each node in the training job; for example,  // `workerpool0-0` for the primary node, `workerpool1-0` for the first node in  // the second worker pool, and `workerpool1-1` for the second node in the  // second worker pool.  //  // The values are the URIs for each node's interactive shell.  map<string, string> web_access_uris = 16 [(google.api.field_behavior) = OUTPUT_ONLY];}// Represents the spec of a CustomJob.message CustomJobSpec {  // Required. The spec of the worker pools including machine type and Docker image.  // All worker pools except the first one are optional and can be skipped by  // providing an empty value.  repeated WorkerPoolSpec worker_pool_specs = 1 [(google.api.field_behavior) = REQUIRED];  // Scheduling options for a CustomJob.  Scheduling scheduling = 3;  // Specifies the service account for workload run-as account.  // Users submitting jobs must have act-as permission on this run-as account.  // If unspecified, the [Vertex AI Custom Code Service  // Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents)  // for the CustomJob's project is used.  string service_account = 4;  // Optional. The full name of the Compute Engine  // [network](/compute/docs/networks-and-firewalls#networks) to which the Job  // should be peered. For example, `projects/12345/global/networks/myVPC`.  // [Format](/compute/docs/reference/rest/v1/networks/insert)  // is of the form `projects/{project}/global/networks/{network}`.  // Where {project} is a project number, as in `12345`, and {network} is a  // network name.  //  // To specify this field, you must have already [configured VPC Network  // Peering for Vertex  // AI](https://cloud.google.com/vertex-ai/docs/general/vpc-peering).  //  // If this field is left unspecified, the job is not peered with any network.  string network = 5 [    (google.api.field_behavior) = OPTIONAL,    (google.api.resource_reference) = {      type: "compute.googleapis.com/Network"    }  ];  // Optional. A list of names for the reserved ip ranges under the VPC network  // that can be used for this job.  //  // If set, we will deploy the job within the provided ip ranges. Otherwise,  // the job will be deployed to any ip ranges under the provided VPC  // network.  //  // Example: ['vertex-ai-ip-range'].  repeated string reserved_ip_ranges = 13 [(google.api.field_behavior) = OPTIONAL];  // The Cloud Storage location to store the output of this CustomJob or  // HyperparameterTuningJob. For HyperparameterTuningJob,  // the baseOutputDirectory of  // each child CustomJob backing a Trial is set to a subdirectory of name  // [id][google.cloud.aiplatform.v1.Trial.id] under its parent HyperparameterTuningJob's  // baseOutputDirectory.  //  // The following Vertex AI environment variables will be passed to  // containers or python modules when this field is set:  //  //   For CustomJob:  //  //   * AIP_MODEL_DIR = `<base_output_directory>/model/`  //   * AIP_CHECKPOINT_DIR = `<base_output_directory>/checkpoints/`  //   * AIP_TENSORBOARD_LOG_DIR = `<base_output_directory>/logs/`  //  //   For CustomJob backing a Trial of HyperparameterTuningJob:  //  //   * AIP_MODEL_DIR = `<base_output_directory>/<trial_id>/model/`  //   * AIP_CHECKPOINT_DIR = `<base_output_directory>/<trial_id>/checkpoints/`  //   * AIP_TENSORBOARD_LOG_DIR = `<base_output_directory>/<trial_id>/logs/`  GcsDestination base_output_directory = 6;  // Optional. The name of a Vertex AI [Tensorboard][google.cloud.aiplatform.v1.Tensorboard] resource to which this CustomJob  // will upload Tensorboard logs.  // Format:  // `projects/{project}/locations/{location}/tensorboards/{tensorboard}`  string tensorboard = 7 [    (google.api.field_behavior) = OPTIONAL,    (google.api.resource_reference) = {      type: "aiplatform.googleapis.com/Tensorboard"    }  ];  // Optional. Whether you want Vertex AI to enable [interactive shell  // access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell)  // to training containers.  //  // If set to `true`, you can access interactive shells at the URIs given  // by [CustomJob.web_access_uris][google.cloud.aiplatform.v1.CustomJob.web_access_uris] or [Trial.web_access_uris][google.cloud.aiplatform.v1.Trial.web_access_uris] (within  // [HyperparameterTuningJob.trials][google.cloud.aiplatform.v1.HyperparameterTuningJob.trials]).  bool enable_web_access = 10 [(google.api.field_behavior) = OPTIONAL];}// Represents the spec of a worker pool in a job.message WorkerPoolSpec {  // The custom task to be executed in this worker pool.  oneof task {    // The custom container task.    ContainerSpec container_spec = 6;    // The Python packaged task.    PythonPackageSpec python_package_spec = 7;  }  // Optional. Immutable. The specification of a single machine.  MachineSpec machine_spec = 1 [    (google.api.field_behavior) = OPTIONAL,    (google.api.field_behavior) = IMMUTABLE  ];  // Optional. The number of worker replicas to use for this worker pool.  int64 replica_count = 2 [(google.api.field_behavior) = OPTIONAL];  // Optional. List of NFS mount spec.  repeated NfsMount nfs_mounts = 4 [(google.api.field_behavior) = OPTIONAL];  // Disk spec.  DiskSpec disk_spec = 5;}// The spec of a Container.message ContainerSpec {  // Required. The URI of a container image in the Container Registry that is to be run on  // each worker replica.  string image_uri = 1 [(google.api.field_behavior) = REQUIRED];  // The command to be invoked when the container is started.  // It overrides the entrypoint instruction in Dockerfile when provided.  repeated string command = 2;  // The arguments to be passed when starting the container.  repeated string args = 3;  // Environment variables to be passed to the container.  // Maximum limit is 100.  repeated EnvVar env = 4;}// The spec of a Python packaged code.message PythonPackageSpec {  // Required. The URI of a container image in Artifact Registry that will run the  // provided Python package. Vertex AI provides a wide range of executor  // images with pre-installed packages to meet users' various use cases. See  // the list of [pre-built containers for  // training](https://cloud.google.com/vertex-ai/docs/training/pre-built-containers).  // You must use an image from this list.  string executor_image_uri = 1 [(google.api.field_behavior) = REQUIRED];  // Required. The Google Cloud Storage location of the Python package files which are  // the training program and its dependent packages.  // The maximum number of package URIs is 100.  repeated string package_uris = 2 [(google.api.field_behavior) = REQUIRED];  // Required. The Python module name to run after installing the packages.  string python_module = 3 [(google.api.field_behavior) = REQUIRED];  // Command line arguments to be passed to the Python task.  repeated string args = 4;  // Environment variables to be passed to the python module.  // Maximum limit is 100.  repeated EnvVar env = 5;}// All parameters related to queuing and scheduling of custom jobs.message Scheduling {  // The maximum job running time. The default is 7 days.  google.protobuf.Duration timeout = 1;  // Restarts the entire CustomJob if a worker gets restarted.  // This feature can be used by distributed training jobs that are not  // resilient to workers leaving and joining a job.  bool restart_job_on_worker_restart = 3;}
 |