machine_resources.proto 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.aiplatform.v1beta1;
  16. import "google/api/field_behavior.proto";
  17. import "google/cloud/aiplatform/v1beta1/accelerator_type.proto";
  18. option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
  19. option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
  20. option java_multiple_files = true;
  21. option java_outer_classname = "MachineResourcesProto";
  22. option java_package = "com.google.cloud.aiplatform.v1beta1";
  23. option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
  24. option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
  25. // Specification of a single machine.
  26. message MachineSpec {
  27. // Immutable. The type of the machine.
  28. //
  29. // See the [list of machine types supported for
  30. // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
  31. //
  32. // See the [list of machine types supported for custom
  33. // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
  34. //
  35. // For [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] this field is optional, and the default
  36. // value is `n1-standard-2`. For [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob] or as part of
  37. // [WorkerPoolSpec][google.cloud.aiplatform.v1beta1.WorkerPoolSpec] this field is required.
  38. string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE];
  39. // Immutable. The type of accelerator(s) that may be attached to the machine as per
  40. // [accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count].
  41. AcceleratorType accelerator_type = 2 [(google.api.field_behavior) = IMMUTABLE];
  42. // The number of accelerators to attach to the machine.
  43. int32 accelerator_count = 3;
  44. }
  45. // A description of resources that are dedicated to a DeployedModel, and
  46. // that need a higher degree of manual configuration.
  47. message DedicatedResources {
  48. // Required. Immutable. The specification of a single machine used by the prediction.
  49. MachineSpec machine_spec = 1 [
  50. (google.api.field_behavior) = REQUIRED,
  51. (google.api.field_behavior) = IMMUTABLE
  52. ];
  53. // Required. Immutable. The minimum number of machine replicas this DeployedModel will be always
  54. // deployed on. This value must be greater than or equal to 1.
  55. //
  56. // If traffic against the DeployedModel increases, it may dynamically be
  57. // deployed onto more replicas, and as traffic decreases, some of these extra
  58. // replicas may be freed.
  59. int32 min_replica_count = 2 [
  60. (google.api.field_behavior) = REQUIRED,
  61. (google.api.field_behavior) = IMMUTABLE
  62. ];
  63. // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
  64. // the traffic against it increases. If the requested value is too large,
  65. // the deployment will error, but if deployment succeeds then the ability
  66. // to scale the model to that many replicas is guaranteed (barring service
  67. // outages). If traffic against the DeployedModel increases beyond what its
  68. // replicas at maximum may handle, a portion of the traffic will be dropped.
  69. // If this value is not provided, will use [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count] as the
  70. // default value.
  71. //
  72. // The value of this field impacts the charge against Vertex CPU and GPU
  73. // quotas. Specifically, you will be charged for (max_replica_count *
  74. // number of cores in the selected machine type) and (max_replica_count *
  75. // number of GPUs per replica in the selected machine type).
  76. int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
  77. // Immutable. The metric specifications that overrides a resource
  78. // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
  79. // target value (default to 60 if not set). At most one entry is allowed per
  80. // metric.
  81. //
  82. // If [machine_spec.accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count] is
  83. // above 0, the autoscaling will be based on both CPU utilization and
  84. // accelerator's duty cycle metrics and scale up when either metrics exceeds
  85. // its target value while scale down if both metrics are under their target
  86. // value. The default target value is 60 for both metrics.
  87. //
  88. // If [machine_spec.accelerator_count][google.cloud.aiplatform.v1beta1.MachineSpec.accelerator_count] is
  89. // 0, the autoscaling will be based on CPU utilization metric only with
  90. // default target value 60 if not explicitly set.
  91. //
  92. // For example, in the case of Online Prediction, if you want to override
  93. // target CPU utilization to 80, you should set
  94. // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1beta1.AutoscalingMetricSpec.metric_name]
  95. // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and
  96. // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1beta1.AutoscalingMetricSpec.target] to `80`.
  97. repeated AutoscalingMetricSpec autoscaling_metric_specs = 4 [(google.api.field_behavior) = IMMUTABLE];
  98. }
  99. // A description of resources that to large degree are decided by Vertex AI,
  100. // and require only a modest additional configuration.
  101. // Each Model supporting these resources documents its specific guidelines.
  102. message AutomaticResources {
  103. // Immutable. The minimum number of replicas this DeployedModel will be always deployed
  104. // on. If traffic against it increases, it may dynamically be deployed onto
  105. // more replicas up to [max_replica_count][google.cloud.aiplatform.v1beta1.AutomaticResources.max_replica_count], and as traffic decreases, some
  106. // of these extra replicas may be freed.
  107. // If the requested value is too large, the deployment will error.
  108. int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];
  109. // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
  110. // the traffic against it increases. If the requested value is too large,
  111. // the deployment will error, but if deployment succeeds then the ability
  112. // to scale the model to that many replicas is guaranteed (barring service
  113. // outages). If traffic against the DeployedModel increases beyond what its
  114. // replicas at maximum may handle, a portion of the traffic will be dropped.
  115. // If this value is not provided, a no upper bound for scaling under heavy
  116. // traffic will be assume, though Vertex AI may be unable to scale beyond
  117. // certain replica number.
  118. int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
  119. }
  120. // A description of resources that are used for performing batch operations, are
  121. // dedicated to a Model, and need manual configuration.
  122. message BatchDedicatedResources {
  123. // Required. Immutable. The specification of a single machine.
  124. MachineSpec machine_spec = 1 [
  125. (google.api.field_behavior) = REQUIRED,
  126. (google.api.field_behavior) = IMMUTABLE
  127. ];
  128. // Immutable. The number of machine replicas used at the start of the batch operation.
  129. // If not set, Vertex AI decides starting number, not greater than
  130. // [max_replica_count][google.cloud.aiplatform.v1beta1.BatchDedicatedResources.max_replica_count]
  131. int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
  132. // Immutable. The maximum number of machine replicas the batch operation may be scaled
  133. // to. The default value is 10.
  134. int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
  135. }
  136. // Statistics information about resource consumption.
  137. message ResourcesConsumed {
  138. // Output only. The number of replica hours used. Note that many replicas may run in
  139. // parallel, and additionally any given work may be queued for some time.
  140. // Therefore this value is not strictly related to wall time.
  141. double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  142. }
  143. // Represents the spec of disk options.
  144. message DiskSpec {
  145. // Type of the boot disk (default is "pd-ssd").
  146. // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
  147. // "pd-standard" (Persistent Disk Hard Disk Drive).
  148. string boot_disk_type = 1;
  149. // Size in GB of the boot disk (default is 100GB).
  150. int32 boot_disk_size_gb = 2;
  151. }
  152. // Represents a mount configuration for Network File System (NFS) to mount.
  153. message NfsMount {
  154. // Required. IP address of the NFS server.
  155. string server = 1 [(google.api.field_behavior) = REQUIRED];
  156. // Required. Source path exported from NFS server.
  157. // Has to start with '/', and combined with the ip address, it indicates
  158. // the source mount path in the form of `server:path`
  159. string path = 2 [(google.api.field_behavior) = REQUIRED];
  160. // Required. Destination mount path. The NFS will be mounted for the user under
  161. // /mnt/nfs/<mount_point>
  162. string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
  163. }
  164. // The metric specification that defines the target resource utilization
  165. // (CPU utilization, accelerator's duty cycle, and so on) for calculating the
  166. // desired replica count.
  167. message AutoscalingMetricSpec {
  168. // Required. The resource metric name.
  169. // Supported metrics:
  170. //
  171. // * For Online Prediction:
  172. // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
  173. // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
  174. string metric_name = 1 [(google.api.field_behavior) = REQUIRED];
  175. // The target resource utilization in percentage (1% - 100%) for the given
  176. // metric; once the real usage deviates from the target by a certain
  177. // percentage, the machine replicas change. The default value is 60
  178. // (representing 60%) if not provided.
  179. int32 target = 2;
  180. }