123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.aiplatform.v1beta1;
- import "google/api/resource.proto";
- import "google/cloud/aiplatform/v1beta1/io.proto";
- option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
- option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
- option java_multiple_files = true;
- option java_outer_classname = "ModelMonitoringProto";
- option java_package = "com.google.cloud.aiplatform.v1beta1";
- option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
- option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
- // The model monitoring configuration used for Batch Prediction Job.
- message ModelMonitoringConfig {
- // Model monitoring objective config.
- repeated ModelMonitoringObjectiveConfig objective_configs = 3;
- // Model monitoring alert config.
- ModelMonitoringAlertConfig alert_config = 2;
- // YAML schema file uri in Cloud Storage describing the format of a single
- // instance that you want Tensorflow Data Validation (TFDV) to analyze.
- //
- // If there are any data type differences between predict instance and TFDV
- // instance, this field can be used to override the schema.
- // For models trained with Vertex AI, this field must be set as all the
- // fields in predict instance formatted as string.
- string analysis_instance_schema_uri = 4;
- // A Google Cloud Storage location for batch prediction model monitoring to
- // dump statistics and anomalies.
- // If not provided, a folder will be created in customer project to hold
- // statistics and anomalies.
- GcsDestination stats_anomalies_base_directory = 5;
- }
- // The objective configuration for model monitoring, including the information
- // needed to detect anomalies for one particular model.
- message ModelMonitoringObjectiveConfig {
- // Training Dataset information.
- message TrainingDataset {
- oneof data_source {
- // The resource name of the Dataset used to train this Model.
- string dataset = 3 [(google.api.resource_reference) = {
- type: "aiplatform.googleapis.com/Dataset"
- }];
- // The Google Cloud Storage uri of the unmanaged Dataset used to train
- // this Model.
- GcsSource gcs_source = 4;
- // The BigQuery table of the unmanaged Dataset used to train this
- // Model.
- BigQuerySource bigquery_source = 5;
- }
- // Data format of the dataset, only applicable if the input is from
- // Google Cloud Storage.
- // The possible formats are:
- //
- // "tf-record"
- // The source file is a TFRecord file.
- //
- // "csv"
- // The source file is a CSV file.
- // "jsonl"
- // The source file is a JSONL file.
- string data_format = 2;
- // The target field name the model is to predict.
- // This field will be excluded when doing Predict and (or) Explain for the
- // training data.
- string target_field = 6;
- // Strategy to sample data from Training Dataset.
- // If not set, we process the whole dataset.
- SamplingStrategy logging_sampling_strategy = 7;
- }
- // The config for Training & Prediction data skew detection. It specifies the
- // training dataset sources and the skew detection parameters.
- message TrainingPredictionSkewDetectionConfig {
- // Key is the feature name and value is the threshold. If a feature needs to
- // be monitored for skew, a value threshold must be configured for that
- // feature. The threshold here is against feature distribution distance
- // between the training and prediction feature.
- map<string, ThresholdConfig> skew_thresholds = 1;
- // Key is the feature name and value is the threshold. The threshold here is
- // against attribution score distance between the training and prediction
- // feature.
- map<string, ThresholdConfig> attribution_score_skew_thresholds = 2;
- // Skew anomaly detection threshold used by all features.
- // When the per-feature thresholds are not set, this field can be used to
- // specify a threshold for all features.
- ThresholdConfig default_skew_threshold = 6;
- }
- // The config for Prediction data drift detection.
- message PredictionDriftDetectionConfig {
- // Key is the feature name and value is the threshold. If a feature needs to
- // be monitored for drift, a value threshold must be configured for that
- // feature. The threshold here is against feature distribution distance
- // between different time windws.
- map<string, ThresholdConfig> drift_thresholds = 1;
- // Key is the feature name and value is the threshold. The threshold here is
- // against attribution score distance between different time windows.
- map<string, ThresholdConfig> attribution_score_drift_thresholds = 2;
- // Drift anomaly detection threshold used by all features.
- // When the per-feature thresholds are not set, this field can be used to
- // specify a threshold for all features.
- ThresholdConfig default_drift_threshold = 5;
- }
- // The config for integrating with Vertex Explainable AI. Only applicable if
- // the Model has explanation_spec populated.
- message ExplanationConfig {
- // Output from [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob] for Model Monitoring baseline dataset,
- // which can be used to generate baseline attribution scores.
- message ExplanationBaseline {
- // The storage format of the predictions generated BatchPrediction job.
- enum PredictionFormat {
- // Should not be set.
- PREDICTION_FORMAT_UNSPECIFIED = 0;
- // Predictions are in JSONL files.
- JSONL = 2;
- // Predictions are in BigQuery.
- BIGQUERY = 3;
- }
- // The configuration specifying of BatchExplain job output. This can be
- // used to generate the baseline of feature attribution scores.
- oneof destination {
- // Cloud Storage location for BatchExplain output.
- GcsDestination gcs = 2;
- // BigQuery location for BatchExplain output.
- BigQueryDestination bigquery = 3;
- }
- // The storage format of the predictions generated BatchPrediction job.
- PredictionFormat prediction_format = 1;
- }
- // If want to analyze the Vertex Explainable AI feature attribute scores or
- // not. If set to true, Vertex AI will log the feature attributions from
- // explain response and do the skew/drift detection for them.
- bool enable_feature_attributes = 1;
- // Predictions generated by the BatchPredictionJob using baseline dataset.
- ExplanationBaseline explanation_baseline = 2;
- }
- // Training dataset for models. This field has to be set only if
- // TrainingPredictionSkewDetectionConfig is specified.
- TrainingDataset training_dataset = 1;
- // The config for skew between training data and prediction data.
- TrainingPredictionSkewDetectionConfig training_prediction_skew_detection_config = 2;
- // The config for drift of prediction data.
- PredictionDriftDetectionConfig prediction_drift_detection_config = 3;
- // The config for integrating with Vertex Explainable AI.
- ExplanationConfig explanation_config = 5;
- }
- message ModelMonitoringAlertConfig {
- // The config for email alert.
- message EmailAlertConfig {
- // The email addresses to send the alert.
- repeated string user_emails = 1;
- }
- oneof alert {
- // Email alert config.
- EmailAlertConfig email_alert_config = 1;
- }
- // Dump the anomalies to Cloud Logging. The anomalies will be put to json
- // payload encoded from proto
- // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][].
- // This can be further sinked to Pub/Sub or any other services supported
- // by Cloud Logging.
- bool enable_logging = 2;
- }
- // The config for feature monitoring threshold.
- message ThresholdConfig {
- oneof threshold {
- // Specify a threshold value that can trigger the alert.
- // If this threshold config is for feature distribution distance:
- // 1. For categorical feature, the distribution distance is calculated by
- // L-inifinity norm.
- // 2. For numerical feature, the distribution distance is calculated by
- // Jensen–Shannon divergence.
- // Each feature must have a non-zero threshold if they need to be monitored.
- // Otherwise no alert will be triggered for that feature.
- double value = 1;
- }
- }
- // Sampling Strategy for logging, can be for both training and prediction
- // dataset.
- message SamplingStrategy {
- // Requests are randomly selected.
- message RandomSampleConfig {
- // Sample rate (0, 1]
- double sample_rate = 1;
- }
- // Random sample config. Will support more sampling strategies later.
- RandomSampleConfig random_sample_config = 1;
- }
|