endpoint.proto 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.aiplatform.v1beta1;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/cloud/aiplatform/v1beta1/encryption_spec.proto";
  19. import "google/cloud/aiplatform/v1beta1/explanation.proto";
  20. import "google/cloud/aiplatform/v1beta1/io.proto";
  21. import "google/cloud/aiplatform/v1beta1/machine_resources.proto";
  22. import "google/protobuf/timestamp.proto";
  23. option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
  24. option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
  25. option java_multiple_files = true;
  26. option java_outer_classname = "EndpointProto";
  27. option java_package = "com.google.cloud.aiplatform.v1beta1";
  28. option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
  29. option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
  30. // Models are deployed into it, and afterwards Endpoint is called to obtain
  31. // predictions and explanations.
  32. message Endpoint {
  33. option (google.api.resource) = {
  34. type: "aiplatform.googleapis.com/Endpoint"
  35. pattern: "projects/{project}/locations/{location}/endpoints/{endpoint}"
  36. };
  37. // Output only. The resource name of the Endpoint.
  38. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  39. // Required. The display name of the Endpoint.
  40. // The name can be up to 128 characters long and can be consist of any UTF-8
  41. // characters.
  42. string display_name = 2 [(google.api.field_behavior) = REQUIRED];
  43. // The description of the Endpoint.
  44. string description = 3;
  45. // Output only. The models deployed in this Endpoint.
  46. // To add or remove DeployedModels use [EndpointService.DeployModel][google.cloud.aiplatform.v1beta1.EndpointService.DeployModel] and
  47. // [EndpointService.UndeployModel][google.cloud.aiplatform.v1beta1.EndpointService.UndeployModel] respectively.
  48. repeated DeployedModel deployed_models = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  49. // A map from a DeployedModel's ID to the percentage of this Endpoint's
  50. // traffic that should be forwarded to that DeployedModel.
  51. //
  52. // If a DeployedModel's ID is not listed in this map, then it receives no
  53. // traffic.
  54. //
  55. // The traffic percentage values must add up to 100, or map must be empty if
  56. // the Endpoint is to not accept any traffic at a moment.
  57. map<string, int32> traffic_split = 5;
  58. // Used to perform consistent read-modify-write updates. If not set, a blind
  59. // "overwrite" update happens.
  60. string etag = 6;
  61. // The labels with user-defined metadata to organize your Endpoints.
  62. //
  63. // Label keys and values can be no longer than 64 characters
  64. // (Unicode codepoints), can only contain lowercase letters, numeric
  65. // characters, underscores and dashes. International characters are allowed.
  66. //
  67. // See https://goo.gl/xmQnxf for more information and examples of labels.
  68. map<string, string> labels = 7;
  69. // Output only. Timestamp when this Endpoint was created.
  70. google.protobuf.Timestamp create_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
  71. // Output only. Timestamp when this Endpoint was last updated.
  72. google.protobuf.Timestamp update_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
  73. // Customer-managed encryption key spec for an Endpoint. If set, this
  74. // Endpoint and all sub-resources of this Endpoint will be secured by
  75. // this key.
  76. EncryptionSpec encryption_spec = 10;
  77. // The full name of the Google Compute Engine
  78. // [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
  79. // to which the Endpoint should be peered.
  80. //
  81. // Private services access must already be configured for the network. If left
  82. // unspecified, the Endpoint is not peered with any network.
  83. //
  84. // Only one of the fields, [network][google.cloud.aiplatform.v1beta1.Endpoint.network] or
  85. // [enable_private_service_connect][google.cloud.aiplatform.v1beta1.Endpoint.enable_private_service_connect],
  86. // can be set.
  87. //
  88. // [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
  89. // `projects/{project}/global/networks/{network}`.
  90. // Where `{project}` is a project number, as in `12345`, and `{network}` is
  91. // network name.
  92. string network = 13 [(google.api.resource_reference) = {
  93. type: "compute.googleapis.com/Network"
  94. }];
  95. // Deprecated: If true, expose the Endpoint via private service connect.
  96. //
  97. // Only one of the fields, [network][google.cloud.aiplatform.v1beta1.Endpoint.network] or
  98. // [enable_private_service_connect][google.cloud.aiplatform.v1beta1.Endpoint.enable_private_service_connect],
  99. // can be set.
  100. bool enable_private_service_connect = 17 [deprecated = true];
  101. // Output only. Resource name of the Model Monitoring job associated with this Endpoint
  102. // if monitoring is enabled by [CreateModelDeploymentMonitoringJob][].
  103. // Format:
  104. // `projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}`
  105. string model_deployment_monitoring_job = 14 [
  106. (google.api.field_behavior) = OUTPUT_ONLY,
  107. (google.api.resource_reference) = {
  108. type: "aiplatform.googleapis.com/ModelDeploymentMonitoringJob"
  109. }
  110. ];
  111. // Configures the request-response logging for online prediction.
  112. PredictRequestResponseLoggingConfig predict_request_response_logging_config = 18;
  113. }
  114. // A deployment of a Model. Endpoints contain one or more DeployedModels.
  115. message DeployedModel {
  116. // The prediction (for example, the machine) resources that the DeployedModel
  117. // uses. The user is billed for the resources (at least their minimal amount)
  118. // even if the DeployedModel receives no traffic.
  119. // Not all Models support all resources types. See
  120. // [Model.supported_deployment_resources_types][google.cloud.aiplatform.v1beta1.Model.supported_deployment_resources_types].
  121. oneof prediction_resources {
  122. // A description of resources that are dedicated to the DeployedModel, and
  123. // that need a higher degree of manual configuration.
  124. DedicatedResources dedicated_resources = 7;
  125. // A description of resources that to large degree are decided by Vertex
  126. // AI, and require only a modest additional configuration.
  127. AutomaticResources automatic_resources = 8;
  128. // The resource name of the shared DeploymentResourcePool to deploy on.
  129. // Format:
  130. // `projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}`
  131. string shared_resources = 17 [(google.api.resource_reference) = {
  132. type: "aiplatform.googleapis.com/DeploymentResourcePool"
  133. }];
  134. }
  135. // Immutable. The ID of the DeployedModel. If not provided upon deployment, Vertex AI
  136. // will generate a value for this ID.
  137. //
  138. // This value should be 1-10 characters, and valid characters are /[0-9]/.
  139. string id = 1 [(google.api.field_behavior) = IMMUTABLE];
  140. // Required. The resource name of the Model that this is the deployment of. Note that
  141. // the Model may be in a different location than the DeployedModel's Endpoint.
  142. //
  143. // The resource name may contain version id or version alias to specify the
  144. // version, if no version is specified, the default version will be deployed.
  145. string model = 2 [
  146. (google.api.field_behavior) = REQUIRED,
  147. (google.api.resource_reference) = {
  148. type: "aiplatform.googleapis.com/Model"
  149. }
  150. ];
  151. // Output only. The version ID of the model that is deployed.
  152. string model_version_id = 18 [(google.api.field_behavior) = OUTPUT_ONLY];
  153. // The display name of the DeployedModel. If not provided upon creation,
  154. // the Model's display_name is used.
  155. string display_name = 3;
  156. // Output only. Timestamp when the DeployedModel was created.
  157. google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
  158. // Explanation configuration for this DeployedModel.
  159. //
  160. // When deploying a Model using [EndpointService.DeployModel][google.cloud.aiplatform.v1beta1.EndpointService.DeployModel], this value
  161. // overrides the value of [Model.explanation_spec][google.cloud.aiplatform.v1beta1.Model.explanation_spec]. All fields of
  162. // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] are optional in the request. If a field of
  163. // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] is not populated, the value of the same field of
  164. // [Model.explanation_spec][google.cloud.aiplatform.v1beta1.Model.explanation_spec] is inherited. If the corresponding
  165. // [Model.explanation_spec][google.cloud.aiplatform.v1beta1.Model.explanation_spec] is not populated, all fields of the
  166. // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] will be used for the explanation configuration.
  167. ExplanationSpec explanation_spec = 9;
  168. // The service account that the DeployedModel's container runs as. Specify the
  169. // email address of the service account. If this service account is not
  170. // specified, the container runs as a service account that doesn't have access
  171. // to the resource project.
  172. //
  173. // Users deploying the Model must have the `iam.serviceAccounts.actAs`
  174. // permission on this service account.
  175. string service_account = 11;
  176. // If true, the container of the DeployedModel instances will send `stderr`
  177. // and `stdout` streams to Stackdriver Logging.
  178. //
  179. // Only supported for custom-trained Models and AutoML Tabular Models.
  180. bool enable_container_logging = 12;
  181. // These logs are like standard server access logs, containing
  182. // information like timestamp and latency for each prediction request.
  183. //
  184. // Note that Stackdriver logs may incur a cost, especially if your project
  185. // receives prediction requests at a high queries per second rate (QPS).
  186. // Estimate your costs before enabling this option.
  187. bool enable_access_logging = 13;
  188. // Output only. Provide paths for users to send predict/explain/health requests directly to
  189. // the deployed model services running on Cloud via private services access.
  190. // This field is populated if [network][google.cloud.aiplatform.v1beta1.Endpoint.network] is configured.
  191. PrivateEndpoints private_endpoints = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
  192. }
  193. // PrivateEndpoints proto is used to provide paths for users to send
  194. // requests privately.
  195. // To send request via private service access, use predict_http_uri,
  196. // explain_http_uri or health_http_uri. To send request via private service
  197. // connect, use service_attachment.
  198. message PrivateEndpoints {
  199. // Output only. Http(s) path to send prediction requests.
  200. string predict_http_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  201. // Output only. Http(s) path to send explain requests.
  202. string explain_http_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  203. // Output only. Http(s) path to send health check requests.
  204. string health_http_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  205. // Output only. The name of the service attachment resource. Populated if private service
  206. // connect is enabled.
  207. string service_attachment = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  208. }
  209. // Configuration for logging request-response to a BigQuery table.
  210. message PredictRequestResponseLoggingConfig {
  211. // If logging is enabled or not.
  212. bool enabled = 1;
  213. // Percentage of requests to be logged, expressed as a fraction in
  214. // range(0,1].
  215. double sampling_rate = 2;
  216. // BigQuery table for logging.
  217. // If only given a project, a new dataset will be created with name
  218. // `logging_<endpoint-display-name>_<endpoint-id>` where
  219. // <endpoint-display-name> will be made BigQuery-dataset-name compatible (e.g.
  220. // most special characters will become underscores). If no table name is
  221. // given, a new table will be created with name `request_response_logging`
  222. BigQueryDestination bigquery_destination = 3;
  223. }