123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.aiplatform.v1beta1;
- import "google/api/field_behavior.proto";
- import "google/cloud/aiplatform/v1beta1/accelerator_type.proto";
- option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
- option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
- option java_multiple_files = true;
- option java_outer_classname = "MachineResourcesProto";
- option java_package = "com.google.cloud.aiplatform.v1beta1";
- option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
- option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
- // Specification of a single machine.
- message MachineSpec {
- // Immutable. The type of the machine.
- //
- // See the [list of machine types supported for
- // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
- //
- // See the [list of machine types supported for custom
- // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
- //
- // For [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] this field is optional, and the default
- // value is `n1-standard-2`. For [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob] or as part of
- // [WorkerPoolSpec][google.cloud.aiplatform.v1beta1.WorkerPoolSpec] this field is required.
- string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE];
- // Immutable. The type of accelerator(s) that may be attached to the machine as per
- // [accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count].
- AcceleratorType accelerator_type = 2 [(google.api.field_behavior) = IMMUTABLE];
- // The number of accelerators to attach to the machine.
- int32 accelerator_count = 3;
- }
- // A description of resources that are dedicated to a DeployedModel, and
- // that need a higher degree of manual configuration.
- message DedicatedResources {
- // Required. Immutable. The specification of a single machine used by the prediction.
- MachineSpec machine_spec = 1 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.field_behavior) = IMMUTABLE
- ];
- // Required. Immutable. The minimum number of machine replicas this DeployedModel will be always
- // deployed on. This value must be greater than or equal to 1.
- //
- // If traffic against the DeployedModel increases, it may dynamically be
- // deployed onto more replicas, and as traffic decreases, some of these extra
- // replicas may be freed.
- int32 min_replica_count = 2 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.field_behavior) = IMMUTABLE
- ];
- // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
- // the traffic against it increases. If the requested value is too large,
- // the deployment will error, but if deployment succeeds then the ability
- // to scale the model to that many replicas is guaranteed (barring service
- // outages). If traffic against the DeployedModel increases beyond what its
- // replicas at maximum may handle, a portion of the traffic will be dropped.
- // If this value is not provided, will use [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count] as the
- // default value.
- //
- // The value of this field impacts the charge against Vertex CPU and GPU
- // quotas. Specifically, you will be charged for (max_replica_count *
- // number of cores in the selected machine type) and (max_replica_count *
- // number of GPUs per replica in the selected machine type).
- int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
- // Immutable. The metric specifications that overrides a resource
- // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
- // target value (default to 60 if not set). At most one entry is allowed per
- // metric.
- //
- // If [machine_spec.accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count] is
- // above 0, the autoscaling will be based on both CPU utilization and
- // accelerator's duty cycle metrics and scale up when either metrics exceeds
- // its target value while scale down if both metrics are under their target
- // value. The default target value is 60 for both metrics.
- //
- // If [machine_spec.accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count] is
- // 0, the autoscaling will be based on CPU utilization metric only with
- // default target value 60 if not explicitly set.
- //
- // For example, in the case of Online Prediction, if you want to override
- // target CPU utilization to 80, you should set
- // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1beta1.AutoscalingMetricSpec.metric_name]
- // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and
- // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1beta1.AutoscalingMetricSpec.target] to `80`.
- repeated AutoscalingMetricSpec autoscaling_metric_specs = 4 [(google.api.field_behavior) = IMMUTABLE];
- }
- // A description of resources that to large degree are decided by Vertex AI,
- // and require only a modest additional configuration.
- // Each Model supporting these resources documents its specific guidelines.
- message AutomaticResources {
- // Immutable. The minimum number of replicas this DeployedModel will be always deployed
- // on. If traffic against it increases, it may dynamically be deployed onto
- // more replicas up to [max_replica_count][google.cloud.aiplatform.v1beta1.AutomaticResources.max_replica_count], and as traffic decreases, some
- // of these extra replicas may be freed.
- // If the requested value is too large, the deployment will error.
- int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];
- // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
- // the traffic against it increases. If the requested value is too large,
- // the deployment will error, but if deployment succeeds then the ability
- // to scale the model to that many replicas is guaranteed (barring service
- // outages). If traffic against the DeployedModel increases beyond what its
- // replicas at maximum may handle, a portion of the traffic will be dropped.
- // If this value is not provided, a no upper bound for scaling under heavy
- // traffic will be assume, though Vertex AI may be unable to scale beyond
- // certain replica number.
- int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
- }
- // A description of resources that are used for performing batch operations, are
- // dedicated to a Model, and need manual configuration.
- message BatchDedicatedResources {
- // Required. Immutable. The specification of a single machine.
- MachineSpec machine_spec = 1 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.field_behavior) = IMMUTABLE
- ];
- // Immutable. The number of machine replicas used at the start of the batch operation.
- // If not set, Vertex AI decides starting number, not greater than
- // [max_replica_count][google.cloud.aiplatform.v1beta1.BatchDedicatedResources.max_replica_count]
- int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
- // Immutable. The maximum number of machine replicas the batch operation may be scaled
- // to. The default value is 10.
- int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
- }
- // Statistics information about resource consumption.
- message ResourcesConsumed {
- // Output only. The number of replica hours used. Note that many replicas may run in
- // parallel, and additionally any given work may be queued for some time.
- // Therefore this value is not strictly related to wall time.
- double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // Represents the spec of disk options.
- message DiskSpec {
- // Type of the boot disk (default is "pd-ssd").
- // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
- // "pd-standard" (Persistent Disk Hard Disk Drive).
- string boot_disk_type = 1;
- // Size in GB of the boot disk (default is 100GB).
- int32 boot_disk_size_gb = 2;
- }
- // Represents a mount configuration for Network File System (NFS) to mount.
- message NfsMount {
- // Required. IP address of the NFS server.
- string server = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. Source path exported from NFS server.
- // Has to start with '/', and combined with the ip address, it indicates
- // the source mount path in the form of `server:path`
- string path = 2 [(google.api.field_behavior) = REQUIRED];
- // Required. Destination mount path. The NFS will be mounted for the user under
- // /mnt/nfs/<mount_point>
- string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
- }
- // The metric specification that defines the target resource utilization
- // (CPU utilization, accelerator's duty cycle, and so on) for calculating the
- // desired replica count.
- message AutoscalingMetricSpec {
- // Required. The resource metric name.
- // Supported metrics:
- //
- // * For Online Prediction:
- // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
- // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
- string metric_name = 1 [(google.api.field_behavior) = REQUIRED];
- // The target resource utilization in percentage (1% - 100%) for the given
- // metric; once the real usage deviates from the target by a certain
- // percentage, the machine replicas change. The default value is 60
- // (representing 60%) if not provided.
- int32 target = 2;
- }
|