model_deployment_monitoring_job.proto 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.aiplatform.v1beta1;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/cloud/aiplatform/v1beta1/encryption_spec.proto";
  19. import "google/cloud/aiplatform/v1beta1/feature_monitoring_stats.proto";
  20. import "google/cloud/aiplatform/v1beta1/io.proto";
  21. import "google/cloud/aiplatform/v1beta1/job_state.proto";
  22. import "google/cloud/aiplatform/v1beta1/model_monitoring.proto";
  23. import "google/protobuf/duration.proto";
  24. import "google/protobuf/struct.proto";
  25. import "google/protobuf/timestamp.proto";
  26. import "google/rpc/status.proto";
  27. option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
  28. option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
  29. option java_multiple_files = true;
  30. option java_outer_classname = "ModelDeploymentMonitoringJobProto";
  31. option java_package = "com.google.cloud.aiplatform.v1beta1";
  32. option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
  33. option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
  34. // The Model Monitoring Objective types.
  35. enum ModelDeploymentMonitoringObjectiveType {
  36. // Default value, should not be set.
  37. MODEL_DEPLOYMENT_MONITORING_OBJECTIVE_TYPE_UNSPECIFIED = 0;
  38. // Raw feature values' stats to detect skew between Training-Prediction
  39. // datasets.
  40. RAW_FEATURE_SKEW = 1;
  41. // Raw feature values' stats to detect drift between Serving-Prediction
  42. // datasets.
  43. RAW_FEATURE_DRIFT = 2;
  44. // Feature attribution scores to detect skew between Training-Prediction
  45. // datasets.
  46. FEATURE_ATTRIBUTION_SKEW = 3;
  47. // Feature attribution scores to detect skew between Prediction datasets
  48. // collected within different time windows.
  49. FEATURE_ATTRIBUTION_DRIFT = 4;
  50. }
  51. // Represents a job that runs periodically to monitor the deployed models in an
  52. // endpoint. It will analyze the logged training & prediction data to detect any
  53. // abnormal behaviors.
  54. message ModelDeploymentMonitoringJob {
  55. option (google.api.resource) = {
  56. type: "aiplatform.googleapis.com/ModelDeploymentMonitoringJob"
  57. pattern: "projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}"
  58. };
  59. // All metadata of most recent monitoring pipelines.
  60. message LatestMonitoringPipelineMetadata {
  61. // The time that most recent monitoring pipelines that is related to this
  62. // run.
  63. google.protobuf.Timestamp run_time = 1;
  64. // The status of the most recent monitoring pipeline.
  65. google.rpc.Status status = 2;
  66. }
  67. // The state to Specify the monitoring pipeline.
  68. enum MonitoringScheduleState {
  69. // Unspecified state.
  70. MONITORING_SCHEDULE_STATE_UNSPECIFIED = 0;
  71. // The pipeline is picked up and wait to run.
  72. PENDING = 1;
  73. // The pipeline is offline and will be scheduled for next run.
  74. OFFLINE = 2;
  75. // The pipeline is running.
  76. RUNNING = 3;
  77. }
  78. // Output only. Resource name of a ModelDeploymentMonitoringJob.
  79. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  80. // Required. The user-defined name of the ModelDeploymentMonitoringJob.
  81. // The name can be up to 128 characters long and can be consist of any UTF-8
  82. // characters.
  83. // Display name of a ModelDeploymentMonitoringJob.
  84. string display_name = 2 [(google.api.field_behavior) = REQUIRED];
  85. // Required. Endpoint resource name.
  86. // Format: `projects/{project}/locations/{location}/endpoints/{endpoint}`
  87. string endpoint = 3 [
  88. (google.api.field_behavior) = REQUIRED,
  89. (google.api.resource_reference) = {
  90. type: "aiplatform.googleapis.com/Endpoint"
  91. }
  92. ];
  93. // Output only. The detailed state of the monitoring job.
  94. // When the job is still creating, the state will be 'PENDING'.
  95. // Once the job is successfully created, the state will be 'RUNNING'.
  96. // Pause the job, the state will be 'PAUSED'.
  97. // Resume the job, the state will return to 'RUNNING'.
  98. JobState state = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  99. // Output only. Schedule state when the monitoring job is in Running state.
  100. MonitoringScheduleState schedule_state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  101. // Output only. Latest triggered monitoring pipeline metadata.
  102. LatestMonitoringPipelineMetadata latest_monitoring_pipeline_metadata = 25 [(google.api.field_behavior) = OUTPUT_ONLY];
  103. // Required. The config for monitoring objectives. This is a per DeployedModel config.
  104. // Each DeployedModel needs to be configured separately.
  105. repeated ModelDeploymentMonitoringObjectiveConfig model_deployment_monitoring_objective_configs = 6 [(google.api.field_behavior) = REQUIRED];
  106. // Required. Schedule config for running the monitoring job.
  107. ModelDeploymentMonitoringScheduleConfig model_deployment_monitoring_schedule_config = 7 [(google.api.field_behavior) = REQUIRED];
  108. // Required. Sample Strategy for logging.
  109. SamplingStrategy logging_sampling_strategy = 8 [(google.api.field_behavior) = REQUIRED];
  110. // Alert config for model monitoring.
  111. ModelMonitoringAlertConfig model_monitoring_alert_config = 15;
  112. // YAML schema file uri describing the format of a single instance,
  113. // which are given to format this Endpoint's prediction (and explanation).
  114. // If not set, we will generate predict schema from collected predict
  115. // requests.
  116. string predict_instance_schema_uri = 9;
  117. // Sample Predict instance, same format as [PredictRequest.instances][google.cloud.aiplatform.v1beta1.PredictRequest.instances],
  118. // this can be set as a replacement of
  119. // [ModelDeploymentMonitoringJob.predict_instance_schema_uri][google.cloud.aiplatform.v1beta1.ModelDeploymentMonitoringJob.predict_instance_schema_uri]. If not set,
  120. // we will generate predict schema from collected predict requests.
  121. google.protobuf.Value sample_predict_instance = 19;
  122. // YAML schema file uri describing the format of a single instance that you
  123. // want Tensorflow Data Validation (TFDV) to analyze.
  124. //
  125. // If this field is empty, all the feature data types are inferred from
  126. // [predict_instance_schema_uri][google.cloud.aiplatform.v1beta1.ModelDeploymentMonitoringJob.predict_instance_schema_uri],
  127. // meaning that TFDV will use the data in the exact format(data type) as
  128. // prediction request/response.
  129. // If there are any data type differences between predict instance and TFDV
  130. // instance, this field can be used to override the schema.
  131. // For models trained with Vertex AI, this field must be set as all the
  132. // fields in predict instance formatted as string.
  133. string analysis_instance_schema_uri = 16;
  134. // Output only. The created bigquery tables for the job under customer project. Customer
  135. // could do their own query & analysis. There could be 4 log tables in
  136. // maximum:
  137. // 1. Training data logging predict request/response
  138. // 2. Serving data logging predict request/response
  139. repeated ModelDeploymentMonitoringBigQueryTable bigquery_tables = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
  140. // The TTL of BigQuery tables in user projects which stores logs.
  141. // A day is the basic unit of the TTL and we take the ceil of TTL/86400(a
  142. // day). e.g. { second: 3600} indicates ttl = 1 day.
  143. google.protobuf.Duration log_ttl = 17;
  144. // The labels with user-defined metadata to organize your
  145. // ModelDeploymentMonitoringJob.
  146. //
  147. // Label keys and values can be no longer than 64 characters
  148. // (Unicode codepoints), can only contain lowercase letters, numeric
  149. // characters, underscores and dashes. International characters are allowed.
  150. //
  151. // See https://goo.gl/xmQnxf for more information and examples of labels.
  152. map<string, string> labels = 11;
  153. // Output only. Timestamp when this ModelDeploymentMonitoringJob was created.
  154. google.protobuf.Timestamp create_time = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
  155. // Output only. Timestamp when this ModelDeploymentMonitoringJob was updated most recently.
  156. google.protobuf.Timestamp update_time = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
  157. // Output only. Timestamp when this monitoring pipeline will be scheduled to run for the
  158. // next round.
  159. google.protobuf.Timestamp next_schedule_time = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
  160. // Stats anomalies base folder path.
  161. GcsDestination stats_anomalies_base_directory = 20;
  162. // Customer-managed encryption key spec for a ModelDeploymentMonitoringJob. If
  163. // set, this ModelDeploymentMonitoringJob and all sub-resources of this
  164. // ModelDeploymentMonitoringJob will be secured by this key.
  165. EncryptionSpec encryption_spec = 21;
  166. // If true, the scheduled monitoring pipeline logs are sent to
  167. // Google Cloud Logging, including pipeline status and anomalies detected.
  168. // Please note the logs incur cost, which are subject to [Cloud Logging
  169. // pricing](https://cloud.google.com/logging#pricing).
  170. bool enable_monitoring_pipeline_logs = 22;
  171. // Output only. Only populated when the job's state is `JOB_STATE_FAILED` or
  172. // `JOB_STATE_CANCELLED`.
  173. google.rpc.Status error = 23 [(google.api.field_behavior) = OUTPUT_ONLY];
  174. }
  175. // ModelDeploymentMonitoringBigQueryTable specifies the BigQuery table name
  176. // as well as some information of the logs stored in this table.
  177. message ModelDeploymentMonitoringBigQueryTable {
  178. // Indicates where does the log come from.
  179. enum LogSource {
  180. // Unspecified source.
  181. LOG_SOURCE_UNSPECIFIED = 0;
  182. // Logs coming from Training dataset.
  183. TRAINING = 1;
  184. // Logs coming from Serving traffic.
  185. SERVING = 2;
  186. }
  187. // Indicates what type of traffic does the log belong to.
  188. enum LogType {
  189. // Unspecified type.
  190. LOG_TYPE_UNSPECIFIED = 0;
  191. // Predict logs.
  192. PREDICT = 1;
  193. // Explain logs.
  194. EXPLAIN = 2;
  195. }
  196. // The source of log.
  197. LogSource log_source = 1;
  198. // The type of log.
  199. LogType log_type = 2;
  200. // The created BigQuery table to store logs. Customer could do their own query
  201. // & analysis. Format:
  202. // `bq://<project_id>.model_deployment_monitoring_<endpoint_id>.<tolower(log_source)>_<tolower(log_type)>`
  203. string bigquery_table_path = 3;
  204. }
  205. // ModelDeploymentMonitoringObjectiveConfig contains the pair of
  206. // deployed_model_id to ModelMonitoringObjectiveConfig.
  207. message ModelDeploymentMonitoringObjectiveConfig {
  208. // The DeployedModel ID of the objective config.
  209. string deployed_model_id = 1;
  210. // The objective config of for the modelmonitoring job of this deployed model.
  211. ModelMonitoringObjectiveConfig objective_config = 2;
  212. }
  213. // The config for scheduling monitoring job.
  214. message ModelDeploymentMonitoringScheduleConfig {
  215. // Required. The model monitoring job scheduling interval. It will be rounded up to next
  216. // full hour. This defines how often the monitoring jobs are triggered.
  217. google.protobuf.Duration monitor_interval = 1 [(google.api.field_behavior) = REQUIRED];
  218. // The time window of the prediction data being included in each prediction
  219. // dataset. This window specifies how long the data should be collected from
  220. // historical model results for each run. If not set,
  221. // [ModelDeploymentMonitoringScheduleConfig.monitor_interval][google.cloud.aiplatform.v1beta1.ModelDeploymentMonitoringScheduleConfig.monitor_interval] will be used.
  222. // e.g. If currently the cutoff time is 2022-01-08 14:30:00 and the
  223. // monitor_window is set to be 3600, then data from 2022-01-08 13:30:00
  224. // to 2022-01-08 14:30:00 will be retrieved and aggregated to calculate the
  225. // monitoring statistics.
  226. google.protobuf.Duration monitor_window = 2;
  227. }
  228. // Statistics and anomalies generated by Model Monitoring.
  229. message ModelMonitoringStatsAnomalies {
  230. // Historical Stats (and Anomalies) for a specific Feature.
  231. message FeatureHistoricStatsAnomalies {
  232. // Display Name of the Feature.
  233. string feature_display_name = 1;
  234. // Threshold for anomaly detection.
  235. ThresholdConfig threshold = 3;
  236. // Stats calculated for the Training Dataset.
  237. FeatureStatsAnomaly training_stats = 4;
  238. // A list of historical stats generated by different time window's
  239. // Prediction Dataset.
  240. repeated FeatureStatsAnomaly prediction_stats = 5;
  241. }
  242. // Model Monitoring Objective those stats and anomalies belonging to.
  243. ModelDeploymentMonitoringObjectiveType objective = 1;
  244. // Deployed Model ID.
  245. string deployed_model_id = 2;
  246. // Number of anomalies within all stats.
  247. int32 anomaly_count = 3;
  248. // A list of historical Stats and Anomalies generated for all Features.
  249. repeated FeatureHistoricStatsAnomalies feature_stats = 4;
  250. }