dataset.proto 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.aiplatform.v1beta1;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/cloud/aiplatform/v1beta1/encryption_spec.proto";
  19. import "google/cloud/aiplatform/v1beta1/io.proto";
  20. import "google/protobuf/struct.proto";
  21. import "google/protobuf/timestamp.proto";
  22. option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
  23. option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1;aiplatform";
  24. option java_multiple_files = true;
  25. option java_outer_classname = "DatasetProto";
  26. option java_package = "com.google.cloud.aiplatform.v1beta1";
  27. option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
  28. option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
  29. // A collection of DataItems and Annotations on them.
  30. message Dataset {
  31. option (google.api.resource) = {
  32. type: "aiplatform.googleapis.com/Dataset"
  33. pattern: "projects/{project}/locations/{location}/datasets/{dataset}"
  34. };
  35. // Output only. The resource name of the Dataset.
  36. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  37. // Required. The user-defined name of the Dataset.
  38. // The name can be up to 128 characters long and can be consist of any UTF-8
  39. // characters.
  40. string display_name = 2 [(google.api.field_behavior) = REQUIRED];
  41. // Optional. The description of the Dataset.
  42. string description = 16 [(google.api.field_behavior) = OPTIONAL];
  43. // Required. Points to a YAML file stored on Google Cloud Storage describing additional
  44. // information about the Dataset.
  45. // The schema is defined as an OpenAPI 3.0.2 Schema Object.
  46. // The schema files that can be used here are found in
  47. // gs://google-cloud-aiplatform/schema/dataset/metadata/.
  48. string metadata_schema_uri = 3 [(google.api.field_behavior) = REQUIRED];
  49. // Required. Additional information about the Dataset.
  50. google.protobuf.Value metadata = 8 [(google.api.field_behavior) = REQUIRED];
  51. // Output only. Timestamp when this Dataset was created.
  52. google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  53. // Output only. Timestamp when this Dataset was last updated.
  54. google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  55. // Used to perform consistent read-modify-write updates. If not set, a blind
  56. // "overwrite" update happens.
  57. string etag = 6;
  58. // The labels with user-defined metadata to organize your Datasets.
  59. //
  60. // Label keys and values can be no longer than 64 characters
  61. // (Unicode codepoints), can only contain lowercase letters, numeric
  62. // characters, underscores and dashes. International characters are allowed.
  63. // No more than 64 user labels can be associated with one Dataset (System
  64. // labels are excluded).
  65. //
  66. // See https://goo.gl/xmQnxf for more information and examples of labels.
  67. // System reserved label keys are prefixed with "aiplatform.googleapis.com/"
  68. // and are immutable. Following system labels exist for each Dataset:
  69. //
  70. // * "aiplatform.googleapis.com/dataset_metadata_schema": output only, its
  71. // value is the [metadata_schema's][google.cloud.aiplatform.v1beta1.Dataset.metadata_schema_uri] title.
  72. map<string, string> labels = 7;
  73. // Customer-managed encryption key spec for a Dataset. If set, this Dataset
  74. // and all sub-resources of this Dataset will be secured by this key.
  75. EncryptionSpec encryption_spec = 11;
  76. }
  77. // Describes the location from where we import data into a Dataset, together
  78. // with the labels that will be applied to the DataItems and the Annotations.
  79. message ImportDataConfig {
  80. // The source of the input.
  81. oneof source {
  82. // The Google Cloud Storage location for the input content.
  83. GcsSource gcs_source = 1;
  84. }
  85. // Labels that will be applied to newly imported DataItems. If an identical
  86. // DataItem as one being imported already exists in the Dataset, then these
  87. // labels will be appended to these of the already existing one, and if labels
  88. // with identical key is imported before, the old label value will be
  89. // overwritten. If two DataItems are identical in the same import data
  90. // operation, the labels will be combined and if key collision happens in this
  91. // case, one of the values will be picked randomly. Two DataItems are
  92. // considered identical if their content bytes are identical (e.g. image bytes
  93. // or pdf bytes).
  94. // These labels will be overridden by Annotation labels specified inside index
  95. // file referenced by [import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri], e.g. jsonl file.
  96. map<string, string> data_item_labels = 2;
  97. // Labels that will be applied to newly imported Annotations. If two
  98. // Annotations are identical, one of them will be deduped. Two Annotations are
  99. // considered identical if their [payload][google.cloud.aiplatform.v1beta1.Annotation.payload],
  100. // [payload_schema_uri][google.cloud.aiplatform.v1beta1.Annotation.payload_schema_uri] and all of their
  101. // [labels][google.cloud.aiplatform.v1beta1.Annotation.labels] are the same.
  102. // These labels will be overridden by Annotation labels specified inside index
  103. // file referenced by [import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri], e.g. jsonl file.
  104. map<string, string> annotation_labels = 3;
  105. // Required. Points to a YAML file stored on Google Cloud Storage describing the import
  106. // format. Validation will be done against the schema. The schema is defined
  107. // as an [OpenAPI 3.0.2 Schema
  108. // Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject).
  109. string import_schema_uri = 4 [(google.api.field_behavior) = REQUIRED];
  110. }
  111. // Describes what part of the Dataset is to be exported, the destination of
  112. // the export and how to export.
  113. message ExportDataConfig {
  114. // The destination of the output.
  115. oneof destination {
  116. // The Google Cloud Storage location where the output is to be written to.
  117. // In the given directory a new directory will be created with name:
  118. // `export-data-<dataset-display-name>-<timestamp-of-export-call>` where
  119. // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export
  120. // output will be written into that directory. Inside that directory,
  121. // annotations with the same schema will be grouped into sub directories
  122. // which are named with the corresponding annotations' schema title. Inside
  123. // these sub directories, a schema.yaml will be created to describe the
  124. // output format.
  125. GcsDestination gcs_destination = 1;
  126. }
  127. // A filter on Annotations of the Dataset. Only Annotations on to-be-exported
  128. // DataItems(specified by [data_items_filter][]) that match this filter will
  129. // be exported. The filter syntax is the same as in
  130. // [ListAnnotations][google.cloud.aiplatform.v1beta1.DatasetService.ListAnnotations].
  131. string annotations_filter = 2;
  132. }