dataset.proto 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.datalabeling.v1beta1;
  17. import "google/api/resource.proto";
  18. import "google/cloud/datalabeling/v1beta1/annotation.proto";
  19. import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto";
  20. import "google/cloud/datalabeling/v1beta1/data_payloads.proto";
  21. import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto";
  22. import "google/protobuf/timestamp.proto";
  23. option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
  24. option go_package = "google.golang.org/genproto/googleapis/cloud/datalabeling/v1beta1;datalabeling";
  25. option java_multiple_files = true;
  26. option java_package = "com.google.cloud.datalabeling.v1beta1";
  27. option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
  28. option ruby_package = "Google::Cloud::DataLabeling::V1beta1";
  29. enum DataType {
  30. DATA_TYPE_UNSPECIFIED = 0;
  31. // Allowed for continuous evaluation.
  32. IMAGE = 1;
  33. VIDEO = 2;
  34. // Allowed for continuous evaluation.
  35. TEXT = 4;
  36. // Allowed for continuous evaluation.
  37. GENERAL_DATA = 6;
  38. }
  39. // Dataset is the resource to hold your data. You can request multiple labeling
  40. // tasks for a dataset while each one will generate an AnnotatedDataset.
  41. message Dataset {
  42. option (google.api.resource) = {
  43. type: "datalabeling.googleapis.com/Dataset"
  44. pattern: "projects/{project}/datasets/{dataset}"
  45. };
  46. // Output only. Dataset resource name, format is:
  47. // projects/{project_id}/datasets/{dataset_id}
  48. string name = 1;
  49. // Required. The display name of the dataset. Maximum of 64 characters.
  50. string display_name = 2;
  51. // Optional. User-provided description of the annotation specification set.
  52. // The description can be up to 10000 characters long.
  53. string description = 3;
  54. // Output only. Time the dataset is created.
  55. google.protobuf.Timestamp create_time = 4;
  56. // Output only. This is populated with the original input configs
  57. // where ImportData is called. It is available only after the clients
  58. // import data to this dataset.
  59. repeated InputConfig input_configs = 5;
  60. // Output only. The names of any related resources that are blocking changes
  61. // to the dataset.
  62. repeated string blocking_resources = 6;
  63. // Output only. The number of data items in the dataset.
  64. int64 data_item_count = 7;
  65. }
  66. // The configuration of input data, including data type, location, etc.
  67. message InputConfig {
  68. // Optional. The metadata associated with each data type.
  69. oneof data_type_metadata {
  70. // Required for text import, as language code must be specified.
  71. TextMetadata text_metadata = 6;
  72. }
  73. // Required. Where the data is from.
  74. oneof source {
  75. // Source located in Cloud Storage.
  76. GcsSource gcs_source = 2;
  77. // Source located in BigQuery. You must specify this field if you are using
  78. // this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
  79. BigQuerySource bigquery_source = 5;
  80. }
  81. // Required. Data type must be specifed when user tries to import data.
  82. DataType data_type = 1;
  83. // Optional. The type of annotation to be performed on this data. You must
  84. // specify this field if you are using this InputConfig in an
  85. // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
  86. AnnotationType annotation_type = 3;
  87. // Optional. Metadata about annotations for the input. You must specify this
  88. // field if you are using this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob] for a
  89. // model version that performs classification.
  90. ClassificationMetadata classification_metadata = 4;
  91. }
  92. // Metadata for the text.
  93. message TextMetadata {
  94. // The language of this text, as a
  95. // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt).
  96. // Default value is en-US.
  97. string language_code = 1;
  98. }
  99. // Metadata for classification annotations.
  100. message ClassificationMetadata {
  101. // Whether the classification task is multi-label or not.
  102. bool is_multi_label = 1;
  103. }
  104. // Source of the Cloud Storage file to be imported.
  105. message GcsSource {
  106. // Required. The input URI of source file. This must be a Cloud Storage path
  107. // (`gs://...`).
  108. string input_uri = 1;
  109. // Required. The format of the source file. Only "text/csv" is supported.
  110. string mime_type = 2;
  111. }
  112. // The BigQuery location for input data. If used in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob], this
  113. // is where the service saves the prediction input and output sampled from the
  114. // model version.
  115. message BigQuerySource {
  116. // Required. BigQuery URI to a table, up to 2,000 characters long. If you
  117. // specify the URI of a table that does not exist, Data Labeling Service
  118. // creates a table at the URI with the correct schema when you create your
  119. // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob]. If you specify the URI of a table that already exists,
  120. // it must have the
  121. // [correct
  122. // schema](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
  123. //
  124. // Provide the table URI in the following format:
  125. //
  126. // "bq://<var>{your_project_id}</var>/<var>{your_dataset_name}</var>/<var>{your_table_name}</var>"
  127. //
  128. // [Learn
  129. // more](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
  130. string input_uri = 1;
  131. }
  132. // The configuration of output data.
  133. message OutputConfig {
  134. // Required. Location to output data to.
  135. oneof destination {
  136. // Output to a file in Cloud Storage. Should be used for labeling output
  137. // other than image segmentation.
  138. GcsDestination gcs_destination = 1;
  139. // Output to a folder in Cloud Storage. Should be used for image
  140. // segmentation labeling output.
  141. GcsFolderDestination gcs_folder_destination = 2;
  142. }
  143. }
  144. // Export destination of the data.Only gcs path is allowed in
  145. // output_uri.
  146. message GcsDestination {
  147. // Required. The output uri of destination file.
  148. string output_uri = 1;
  149. // Required. The format of the gcs destination. Only "text/csv" and
  150. // "application/json"
  151. // are supported.
  152. string mime_type = 2;
  153. }
  154. // Export folder destination of the data.
  155. message GcsFolderDestination {
  156. // Required. Cloud Storage directory to export data to.
  157. string output_folder_uri = 1;
  158. }
  159. // DataItem is a piece of data, without annotation. For example, an image.
  160. message DataItem {
  161. option (google.api.resource) = {
  162. type: "datalabeling.googleapis.com/DataItem"
  163. pattern: "projects/{project}/datasets/{dataset}/dataItems/{data_item}"
  164. };
  165. // Output only.
  166. oneof payload {
  167. // The image payload, a container of the image bytes/uri.
  168. ImagePayload image_payload = 2;
  169. // The text payload, a container of text content.
  170. TextPayload text_payload = 3;
  171. // The video payload, a container of the video uri.
  172. VideoPayload video_payload = 4;
  173. }
  174. // Output only. Name of the data item, in format of:
  175. // projects/{project_id}/datasets/{dataset_id}/dataItems/{data_item_id}
  176. string name = 1;
  177. }
  178. // AnnotatedDataset is a set holding annotations for data in a Dataset. Each
  179. // labeling task will generate an AnnotatedDataset under the Dataset that the
  180. // task is requested for.
  181. message AnnotatedDataset {
  182. option (google.api.resource) = {
  183. type: "datalabeling.googleapis.com/AnnotatedDataset"
  184. pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}"
  185. };
  186. // Output only. AnnotatedDataset resource name in format of:
  187. // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
  188. // {annotated_dataset_id}
  189. string name = 1;
  190. // Output only. The display name of the AnnotatedDataset. It is specified in
  191. // HumanAnnotationConfig when user starts a labeling task. Maximum of 64
  192. // characters.
  193. string display_name = 2;
  194. // Output only. The description of the AnnotatedDataset. It is specified in
  195. // HumanAnnotationConfig when user starts a labeling task. Maximum of 10000
  196. // characters.
  197. string description = 9;
  198. // Output only. Source of the annotation.
  199. AnnotationSource annotation_source = 3;
  200. // Output only. Type of the annotation. It is specified when starting labeling
  201. // task.
  202. AnnotationType annotation_type = 8;
  203. // Output only. Number of examples in the annotated dataset.
  204. int64 example_count = 4;
  205. // Output only. Number of examples that have annotation in the annotated
  206. // dataset.
  207. int64 completed_example_count = 5;
  208. // Output only. Per label statistics.
  209. LabelStats label_stats = 6;
  210. // Output only. Time the AnnotatedDataset was created.
  211. google.protobuf.Timestamp create_time = 7;
  212. // Output only. Additional information about AnnotatedDataset.
  213. AnnotatedDatasetMetadata metadata = 10;
  214. // Output only. The names of any related resources that are blocking changes
  215. // to the annotated dataset.
  216. repeated string blocking_resources = 11;
  217. }
  218. // Statistics about annotation specs.
  219. message LabelStats {
  220. // Map of each annotation spec's example count. Key is the annotation spec
  221. // name and value is the number of examples for that annotation spec.
  222. // If the annotated dataset does not have annotation spec, the map will return
  223. // a pair where the key is empty string and value is the total number of
  224. // annotations.
  225. map<string, int64> example_count = 1;
  226. }
  227. // Metadata on AnnotatedDataset.
  228. message AnnotatedDatasetMetadata {
  229. // Specific request configuration used when requesting the labeling task.
  230. oneof annotation_request_config {
  231. // Configuration for image classification task.
  232. ImageClassificationConfig image_classification_config = 2;
  233. // Configuration for image bounding box and bounding poly task.
  234. BoundingPolyConfig bounding_poly_config = 3;
  235. // Configuration for image polyline task.
  236. PolylineConfig polyline_config = 4;
  237. // Configuration for image segmentation task.
  238. SegmentationConfig segmentation_config = 5;
  239. // Configuration for video classification task.
  240. VideoClassificationConfig video_classification_config = 6;
  241. // Configuration for video object detection task.
  242. ObjectDetectionConfig object_detection_config = 7;
  243. // Configuration for video object tracking task.
  244. ObjectTrackingConfig object_tracking_config = 8;
  245. // Configuration for video event labeling task.
  246. EventConfig event_config = 9;
  247. // Configuration for text classification task.
  248. TextClassificationConfig text_classification_config = 10;
  249. // Configuration for text entity extraction task.
  250. TextEntityExtractionConfig text_entity_extraction_config = 11;
  251. }
  252. // HumanAnnotationConfig used when requesting the human labeling task for this
  253. // AnnotatedDataset.
  254. HumanAnnotationConfig human_annotation_config = 1;
  255. }
  256. // An Example is a piece of data and its annotation. For example, an image with
  257. // label "house".
  258. message Example {
  259. option (google.api.resource) = {
  260. type: "datalabeling.googleapis.com/Example"
  261. pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}/examples/{example}"
  262. };
  263. // Output only. The data part of Example.
  264. oneof payload {
  265. // The image payload, a container of the image bytes/uri.
  266. ImagePayload image_payload = 2;
  267. // The text payload, a container of the text content.
  268. TextPayload text_payload = 6;
  269. // The video payload, a container of the video uri.
  270. VideoPayload video_payload = 7;
  271. }
  272. // Output only. Name of the example, in format of:
  273. // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
  274. // {annotated_dataset_id}/examples/{example_id}
  275. string name = 1;
  276. // Output only. Annotations for the piece of data in Example.
  277. // One piece of data can have multiple annotations.
  278. repeated Annotation annotations = 5;
  279. }