model_monitoring.proto 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.aiplatform.v1;
  16. import "google/api/resource.proto";
  17. import "google/cloud/aiplatform/v1/io.proto";
  18. option csharp_namespace = "Google.Cloud.AIPlatform.V1";
  19. option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform";
  20. option java_multiple_files = true;
  21. option java_outer_classname = "ModelMonitoringProto";
  22. option java_package = "com.google.cloud.aiplatform.v1";
  23. option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
  24. option ruby_package = "Google::Cloud::AIPlatform::V1";
  25. // The objective configuration for model monitoring, including the information
  26. // needed to detect anomalies for one particular model.
  27. message ModelMonitoringObjectiveConfig {
  28. // Training Dataset information.
  29. message TrainingDataset {
  30. oneof data_source {
  31. // The resource name of the Dataset used to train this Model.
  32. string dataset = 3 [(google.api.resource_reference) = {
  33. type: "aiplatform.googleapis.com/Dataset"
  34. }];
  35. // The Google Cloud Storage uri of the unmanaged Dataset used to train
  36. // this Model.
  37. GcsSource gcs_source = 4;
  38. // The BigQuery table of the unmanaged Dataset used to train this
  39. // Model.
  40. BigQuerySource bigquery_source = 5;
  41. }
  42. // Data format of the dataset, only applicable if the input is from
  43. // Google Cloud Storage.
  44. // The possible formats are:
  45. //
  46. // "tf-record"
  47. // The source file is a TFRecord file.
  48. //
  49. // "csv"
  50. // The source file is a CSV file.
  51. // "jsonl"
  52. // The source file is a JSONL file.
  53. string data_format = 2;
  54. // The target field name the model is to predict.
  55. // This field will be excluded when doing Predict and (or) Explain for the
  56. // training data.
  57. string target_field = 6;
  58. // Strategy to sample data from Training Dataset.
  59. // If not set, we process the whole dataset.
  60. SamplingStrategy logging_sampling_strategy = 7;
  61. }
  62. // The config for Training & Prediction data skew detection. It specifies the
  63. // training dataset sources and the skew detection parameters.
  64. message TrainingPredictionSkewDetectionConfig {
  65. // Key is the feature name and value is the threshold. If a feature needs to
  66. // be monitored for skew, a value threshold must be configured for that
  67. // feature. The threshold here is against feature distribution distance
  68. // between the training and prediction feature.
  69. map<string, ThresholdConfig> skew_thresholds = 1;
  70. // Key is the feature name and value is the threshold. The threshold here is
  71. // against attribution score distance between the training and prediction
  72. // feature.
  73. map<string, ThresholdConfig> attribution_score_skew_thresholds = 2;
  74. // Skew anomaly detection threshold used by all features.
  75. // When the per-feature thresholds are not set, this field can be used to
  76. // specify a threshold for all features.
  77. ThresholdConfig default_skew_threshold = 6;
  78. }
  79. // The config for Prediction data drift detection.
  80. message PredictionDriftDetectionConfig {
  81. // Key is the feature name and value is the threshold. If a feature needs to
  82. // be monitored for drift, a value threshold must be configured for that
  83. // feature. The threshold here is against feature distribution distance
  84. // between different time windws.
  85. map<string, ThresholdConfig> drift_thresholds = 1;
  86. // Key is the feature name and value is the threshold. The threshold here is
  87. // against attribution score distance between different time windows.
  88. map<string, ThresholdConfig> attribution_score_drift_thresholds = 2;
  89. // Drift anomaly detection threshold used by all features.
  90. // When the per-feature thresholds are not set, this field can be used to
  91. // specify a threshold for all features.
  92. ThresholdConfig default_drift_threshold = 5;
  93. }
  94. // The config for integrating with Vertex Explainable AI. Only applicable if
  95. // the Model has explanation_spec populated.
  96. message ExplanationConfig {
  97. // Output from [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] for Model Monitoring baseline dataset,
  98. // which can be used to generate baseline attribution scores.
  99. message ExplanationBaseline {
  100. // The storage format of the predictions generated BatchPrediction job.
  101. enum PredictionFormat {
  102. // Should not be set.
  103. PREDICTION_FORMAT_UNSPECIFIED = 0;
  104. // Predictions are in JSONL files.
  105. JSONL = 2;
  106. // Predictions are in BigQuery.
  107. BIGQUERY = 3;
  108. }
  109. // The configuration specifying of BatchExplain job output. This can be
  110. // used to generate the baseline of feature attribution scores.
  111. oneof destination {
  112. // Cloud Storage location for BatchExplain output.
  113. GcsDestination gcs = 2;
  114. // BigQuery location for BatchExplain output.
  115. BigQueryDestination bigquery = 3;
  116. }
  117. // The storage format of the predictions generated BatchPrediction job.
  118. PredictionFormat prediction_format = 1;
  119. }
  120. // If want to analyze the Vertex Explainable AI feature attribute scores or
  121. // not. If set to true, Vertex AI will log the feature attributions from
  122. // explain response and do the skew/drift detection for them.
  123. bool enable_feature_attributes = 1;
  124. // Predictions generated by the BatchPredictionJob using baseline dataset.
  125. ExplanationBaseline explanation_baseline = 2;
  126. }
  127. // Training dataset for models. This field has to be set only if
  128. // TrainingPredictionSkewDetectionConfig is specified.
  129. TrainingDataset training_dataset = 1;
  130. // The config for skew between training data and prediction data.
  131. TrainingPredictionSkewDetectionConfig training_prediction_skew_detection_config = 2;
  132. // The config for drift of prediction data.
  133. PredictionDriftDetectionConfig prediction_drift_detection_config = 3;
  134. // The config for integrating with Vertex Explainable AI.
  135. ExplanationConfig explanation_config = 5;
  136. }
  137. message ModelMonitoringAlertConfig {
  138. // The config for email alert.
  139. message EmailAlertConfig {
  140. // The email addresses to send the alert.
  141. repeated string user_emails = 1;
  142. }
  143. oneof alert {
  144. // Email alert config.
  145. EmailAlertConfig email_alert_config = 1;
  146. }
  147. // Dump the anomalies to Cloud Logging. The anomalies will be put to json
  148. // payload encoded from proto
  149. // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][].
  150. // This can be further sinked to Pub/Sub or any other services supported
  151. // by Cloud Logging.
  152. bool enable_logging = 2;
  153. }
  154. // The config for feature monitoring threshold.
  155. message ThresholdConfig {
  156. oneof threshold {
  157. // Specify a threshold value that can trigger the alert.
  158. // If this threshold config is for feature distribution distance:
  159. // 1. For categorical feature, the distribution distance is calculated by
  160. // L-inifinity norm.
  161. // 2. For numerical feature, the distribution distance is calculated by
  162. // Jensen–Shannon divergence.
  163. // Each feature must have a non-zero threshold if they need to be monitored.
  164. // Otherwise no alert will be triggered for that feature.
  165. double value = 1;
  166. }
  167. }
  168. // Sampling Strategy for logging, can be for both training and prediction
  169. // dataset.
  170. message SamplingStrategy {
  171. // Requests are randomly selected.
  172. message RandomSampleConfig {
  173. // Sample rate (0, 1]
  174. double sample_rate = 1;
  175. }
  176. // Random sample config. Will support more sampling strategies later.
  177. RandomSampleConfig random_sample_config = 1;
  178. }