model_monitoring.proto 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.aiplatform.v1beta1;
  16. import "google/api/resource.proto";
  17. import "google/cloud/aiplatform/v1beta1/io.proto";
  18. option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
  19. option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
  20. option java_multiple_files = true;
  21. option java_outer_classname = "ModelMonitoringProto";
  22. option java_package = "com.google.cloud.aiplatform.v1beta1";
  23. option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
  24. option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
  25. // The model monitoring configuration used for Batch Prediction Job.
  26. message ModelMonitoringConfig {
  27. // Model monitoring objective config.
  28. repeated ModelMonitoringObjectiveConfig objective_configs = 3;
  29. // Model monitoring alert config.
  30. ModelMonitoringAlertConfig alert_config = 2;
  31. // YAML schema file uri in Cloud Storage describing the format of a single
  32. // instance that you want Tensorflow Data Validation (TFDV) to analyze.
  33. //
  34. // If there are any data type differences between predict instance and TFDV
  35. // instance, this field can be used to override the schema.
  36. // For models trained with Vertex AI, this field must be set as all the
  37. // fields in predict instance formatted as string.
  38. string analysis_instance_schema_uri = 4;
  39. // A Google Cloud Storage location for batch prediction model monitoring to
  40. // dump statistics and anomalies.
  41. // If not provided, a folder will be created in customer project to hold
  42. // statistics and anomalies.
  43. GcsDestination stats_anomalies_base_directory = 5;
  44. }
  45. // The objective configuration for model monitoring, including the information
  46. // needed to detect anomalies for one particular model.
  47. message ModelMonitoringObjectiveConfig {
  48. // Training Dataset information.
  49. message TrainingDataset {
  50. oneof data_source {
  51. // The resource name of the Dataset used to train this Model.
  52. string dataset = 3 [(google.api.resource_reference) = {
  53. type: "aiplatform.googleapis.com/Dataset"
  54. }];
  55. // The Google Cloud Storage uri of the unmanaged Dataset used to train
  56. // this Model.
  57. GcsSource gcs_source = 4;
  58. // The BigQuery table of the unmanaged Dataset used to train this
  59. // Model.
  60. BigQuerySource bigquery_source = 5;
  61. }
  62. // Data format of the dataset, only applicable if the input is from
  63. // Google Cloud Storage.
  64. // The possible formats are:
  65. //
  66. // "tf-record"
  67. // The source file is a TFRecord file.
  68. //
  69. // "csv"
  70. // The source file is a CSV file.
  71. // "jsonl"
  72. // The source file is a JSONL file.
  73. string data_format = 2;
  74. // The target field name the model is to predict.
  75. // This field will be excluded when doing Predict and (or) Explain for the
  76. // training data.
  77. string target_field = 6;
  78. // Strategy to sample data from Training Dataset.
  79. // If not set, we process the whole dataset.
  80. SamplingStrategy logging_sampling_strategy = 7;
  81. }
  82. // The config for Training & Prediction data skew detection. It specifies the
  83. // training dataset sources and the skew detection parameters.
  84. message TrainingPredictionSkewDetectionConfig {
  85. // Key is the feature name and value is the threshold. If a feature needs to
  86. // be monitored for skew, a value threshold must be configured for that
  87. // feature. The threshold here is against feature distribution distance
  88. // between the training and prediction feature.
  89. map<string, ThresholdConfig> skew_thresholds = 1;
  90. // Key is the feature name and value is the threshold. The threshold here is
  91. // against attribution score distance between the training and prediction
  92. // feature.
  93. map<string, ThresholdConfig> attribution_score_skew_thresholds = 2;
  94. // Skew anomaly detection threshold used by all features.
  95. // When the per-feature thresholds are not set, this field can be used to
  96. // specify a threshold for all features.
  97. ThresholdConfig default_skew_threshold = 6;
  98. }
  99. // The config for Prediction data drift detection.
  100. message PredictionDriftDetectionConfig {
  101. // Key is the feature name and value is the threshold. If a feature needs to
  102. // be monitored for drift, a value threshold must be configured for that
  103. // feature. The threshold here is against feature distribution distance
  104. // between different time windws.
  105. map<string, ThresholdConfig> drift_thresholds = 1;
  106. // Key is the feature name and value is the threshold. The threshold here is
  107. // against attribution score distance between different time windows.
  108. map<string, ThresholdConfig> attribution_score_drift_thresholds = 2;
  109. // Drift anomaly detection threshold used by all features.
  110. // When the per-feature thresholds are not set, this field can be used to
  111. // specify a threshold for all features.
  112. ThresholdConfig default_drift_threshold = 5;
  113. }
  114. // The config for integrating with Vertex Explainable AI. Only applicable if
  115. // the Model has explanation_spec populated.
  116. message ExplanationConfig {
  117. // Output from [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob] for Model Monitoring baseline dataset,
  118. // which can be used to generate baseline attribution scores.
  119. message ExplanationBaseline {
  120. // The storage format of the predictions generated BatchPrediction job.
  121. enum PredictionFormat {
  122. // Should not be set.
  123. PREDICTION_FORMAT_UNSPECIFIED = 0;
  124. // Predictions are in JSONL files.
  125. JSONL = 2;
  126. // Predictions are in BigQuery.
  127. BIGQUERY = 3;
  128. }
  129. // The configuration specifying of BatchExplain job output. This can be
  130. // used to generate the baseline of feature attribution scores.
  131. oneof destination {
  132. // Cloud Storage location for BatchExplain output.
  133. GcsDestination gcs = 2;
  134. // BigQuery location for BatchExplain output.
  135. BigQueryDestination bigquery = 3;
  136. }
  137. // The storage format of the predictions generated BatchPrediction job.
  138. PredictionFormat prediction_format = 1;
  139. }
  140. // If want to analyze the Vertex Explainable AI feature attribute scores or
  141. // not. If set to true, Vertex AI will log the feature attributions from
  142. // explain response and do the skew/drift detection for them.
  143. bool enable_feature_attributes = 1;
  144. // Predictions generated by the BatchPredictionJob using baseline dataset.
  145. ExplanationBaseline explanation_baseline = 2;
  146. }
  147. // Training dataset for models. This field has to be set only if
  148. // TrainingPredictionSkewDetectionConfig is specified.
  149. TrainingDataset training_dataset = 1;
  150. // The config for skew between training data and prediction data.
  151. TrainingPredictionSkewDetectionConfig training_prediction_skew_detection_config = 2;
  152. // The config for drift of prediction data.
  153. PredictionDriftDetectionConfig prediction_drift_detection_config = 3;
  154. // The config for integrating with Vertex Explainable AI.
  155. ExplanationConfig explanation_config = 5;
  156. }
  157. message ModelMonitoringAlertConfig {
  158. // The config for email alert.
  159. message EmailAlertConfig {
  160. // The email addresses to send the alert.
  161. repeated string user_emails = 1;
  162. }
  163. oneof alert {
  164. // Email alert config.
  165. EmailAlertConfig email_alert_config = 1;
  166. }
  167. // Dump the anomalies to Cloud Logging. The anomalies will be put to json
  168. // payload encoded from proto
  169. // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][].
  170. // This can be further sinked to Pub/Sub or any other services supported
  171. // by Cloud Logging.
  172. bool enable_logging = 2;
  173. }
  174. // The config for feature monitoring threshold.
  175. message ThresholdConfig {
  176. oneof threshold {
  177. // Specify a threshold value that can trigger the alert.
  178. // If this threshold config is for feature distribution distance:
  179. // 1. For categorical feature, the distribution distance is calculated by
  180. // L-inifinity norm.
  181. // 2. For numerical feature, the distribution distance is calculated by
  182. // Jensen–Shannon divergence.
  183. // Each feature must have a non-zero threshold if they need to be monitored.
  184. // Otherwise no alert will be triggered for that feature.
  185. double value = 1;
  186. }
  187. }
  188. // Sampling Strategy for logging, can be for both training and prediction
  189. // dataset.
  190. message SamplingStrategy {
  191. // Requests are randomly selected.
  192. message RandomSampleConfig {
  193. // Sample rate (0, 1]
  194. double sample_rate = 1;
  195. }
  196. // Random sample config. Will support more sampling strategies later.
  197. RandomSampleConfig random_sample_config = 1;
  198. }