123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.aiplatform.v1;
- import "google/api/field_behavior.proto";
- import "google/api/resource.proto";
- import "google/cloud/aiplatform/v1/encryption_spec.proto";
- import "google/cloud/aiplatform/v1/io.proto";
- import "google/protobuf/struct.proto";
- import "google/protobuf/timestamp.proto";
- option csharp_namespace = "Google.Cloud.AIPlatform.V1";
- option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform";
- option java_multiple_files = true;
- option java_outer_classname = "DatasetProto";
- option java_package = "com.google.cloud.aiplatform.v1";
- option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
- option ruby_package = "Google::Cloud::AIPlatform::V1";
- // A collection of DataItems and Annotations on them.
- message Dataset {
- option (google.api.resource) = {
- type: "aiplatform.googleapis.com/Dataset"
- pattern: "projects/{project}/locations/{location}/datasets/{dataset}"
- };
- // Output only. The resource name of the Dataset.
- string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Required. The user-defined name of the Dataset.
- // The name can be up to 128 characters long and can be consist of any UTF-8
- // characters.
- string display_name = 2 [(google.api.field_behavior) = REQUIRED];
- // Optional. The description of the Dataset.
- string description = 16 [(google.api.field_behavior) = OPTIONAL];
- // Required. Points to a YAML file stored on Google Cloud Storage describing additional
- // information about the Dataset.
- // The schema is defined as an OpenAPI 3.0.2 Schema Object.
- // The schema files that can be used here are found in
- // gs://google-cloud-aiplatform/schema/dataset/metadata/.
- string metadata_schema_uri = 3 [(google.api.field_behavior) = REQUIRED];
- // Required. Additional information about the Dataset.
- google.protobuf.Value metadata = 8 [(google.api.field_behavior) = REQUIRED];
- // Output only. Timestamp when this Dataset was created.
- google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Timestamp when this Dataset was last updated.
- google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Used to perform consistent read-modify-write updates. If not set, a blind
- // "overwrite" update happens.
- string etag = 6;
- // The labels with user-defined metadata to organize your Datasets.
- //
- // Label keys and values can be no longer than 64 characters
- // (Unicode codepoints), can only contain lowercase letters, numeric
- // characters, underscores and dashes. International characters are allowed.
- // No more than 64 user labels can be associated with one Dataset (System
- // labels are excluded).
- //
- // See https://goo.gl/xmQnxf for more information and examples of labels.
- // System reserved label keys are prefixed with "aiplatform.googleapis.com/"
- // and are immutable. Following system labels exist for each Dataset:
- //
- // * "aiplatform.googleapis.com/dataset_metadata_schema": output only, its
- // value is the [metadata_schema's][google.cloud.aiplatform.v1.Dataset.metadata_schema_uri] title.
- map<string, string> labels = 7;
- // Customer-managed encryption key spec for a Dataset. If set, this Dataset
- // and all sub-resources of this Dataset will be secured by this key.
- EncryptionSpec encryption_spec = 11;
- }
- // Describes the location from where we import data into a Dataset, together
- // with the labels that will be applied to the DataItems and the Annotations.
- message ImportDataConfig {
- // The source of the input.
- oneof source {
- // The Google Cloud Storage location for the input content.
- GcsSource gcs_source = 1;
- }
- // Labels that will be applied to newly imported DataItems. If an identical
- // DataItem as one being imported already exists in the Dataset, then these
- // labels will be appended to these of the already existing one, and if labels
- // with identical key is imported before, the old label value will be
- // overwritten. If two DataItems are identical in the same import data
- // operation, the labels will be combined and if key collision happens in this
- // case, one of the values will be picked randomly. Two DataItems are
- // considered identical if their content bytes are identical (e.g. image bytes
- // or pdf bytes).
- // These labels will be overridden by Annotation labels specified inside index
- // file referenced by [import_schema_uri][google.cloud.aiplatform.v1.ImportDataConfig.import_schema_uri], e.g. jsonl file.
- map<string, string> data_item_labels = 2;
- // Labels that will be applied to newly imported Annotations. If two
- // Annotations are identical, one of them will be deduped. Two Annotations are
- // considered identical if their [payload][google.cloud.aiplatform.v1.Annotation.payload],
- // [payload_schema_uri][google.cloud.aiplatform.v1.Annotation.payload_schema_uri] and all of their
- // [labels][google.cloud.aiplatform.v1.Annotation.labels] are the same.
- // These labels will be overridden by Annotation labels specified inside index
- // file referenced by [import_schema_uri][google.cloud.aiplatform.v1.ImportDataConfig.import_schema_uri], e.g. jsonl file.
- map<string, string> annotation_labels = 3;
- // Required. Points to a YAML file stored on Google Cloud Storage describing the import
- // format. Validation will be done against the schema. The schema is defined
- // as an [OpenAPI 3.0.2 Schema
- // Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject).
- string import_schema_uri = 4 [(google.api.field_behavior) = REQUIRED];
- }
- // Describes what part of the Dataset is to be exported, the destination of
- // the export and how to export.
- message ExportDataConfig {
- // The destination of the output.
- oneof destination {
- // The Google Cloud Storage location where the output is to be written to.
- // In the given directory a new directory will be created with name:
- // `export-data-<dataset-display-name>-<timestamp-of-export-call>` where
- // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export
- // output will be written into that directory. Inside that directory,
- // annotations with the same schema will be grouped into sub directories
- // which are named with the corresponding annotations' schema title. Inside
- // these sub directories, a schema.yaml will be created to describe the
- // output format.
- GcsDestination gcs_destination = 1;
- }
- // A filter on Annotations of the Dataset. Only Annotations on to-be-exported
- // DataItems(specified by [data_items_filter][]) that match this filter will
- // be exported. The filter syntax is the same as in
- // [ListAnnotations][google.cloud.aiplatform.v1.DatasetService.ListAnnotations].
- string annotations_filter = 2;
- }
|