document.proto 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.contentwarehouse.v1;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/cloud/documentai/v1/document.proto";
  19. import "google/protobuf/timestamp.proto";
  20. import "google/type/datetime.proto";
  21. option go_package = "google.golang.org/genproto/googleapis/cloud/contentwarehouse/v1;contentwarehouse";
  22. option java_multiple_files = true;
  23. option java_outer_classname = "DocumentProto";
  24. option java_package = "com.google.cloud.contentwarehouse.v1";
  25. // Defines the structure for content warehouse document proto.
  26. message Document {
  27. option (google.api.resource) = {
  28. type: "contentwarehouse.googleapis.com/Document"
  29. pattern: "projects/{project}/locations/{location}/documents/{document}"
  30. pattern: "projects/{project}/locations/{location}/documents/referenceId/{reference_id}"
  31. };
  32. // The resource name of the document.
  33. // Format:
  34. // projects/{project_number}/locations/{location}/documents/{document_id}.
  35. //
  36. // The name is ignored when creating a document.
  37. string name = 1;
  38. // The reference ID set by customers. Must be unique per project and location.
  39. string reference_id = 11;
  40. // Required. Display name of the document given by the user. This name will be displayed
  41. // in the UI.
  42. // Customer can populate this field with the name of the document. This
  43. // differs from the 'title' field as 'title' is optional and stores the top
  44. // heading in the document.
  45. string display_name = 2 [(google.api.field_behavior) = REQUIRED];
  46. // Title that describes the document.
  47. // This is usually present in the top section of the document, and is a
  48. // mandatory field for the question-answering feature.
  49. string title = 18;
  50. // Uri to display the document, for example, in the UI.
  51. string display_uri = 17;
  52. // The Document schema name.
  53. // Format:
  54. // projects/{project_number}/locations/{location}/documentSchemas/{document_schema_id}.
  55. string document_schema_name = 3 [(google.api.resource_reference) = {
  56. type: "contentwarehouse.googleapis.com/DocumentSchema"
  57. }];
  58. oneof structured_content {
  59. // Other document format, such as PPTX, XLXS
  60. string plain_text = 15;
  61. // Document AI format to save the structured content, including OCR.
  62. google.cloud.documentai.v1.Document cloud_ai_document = 4;
  63. }
  64. // A path linked to structured content file.
  65. string structured_content_uri = 16;
  66. // Raw document file.
  67. oneof raw_document {
  68. // Raw document file in Cloud Storage path.
  69. string raw_document_path = 5;
  70. // Raw document content.
  71. bytes inline_raw_document = 6;
  72. }
  73. // List of values that are user supplied metadata.
  74. repeated Property properties = 7;
  75. // Output only. The time when the document is last updated.
  76. google.protobuf.Timestamp update_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
  77. // Output only. The time when the document is created.
  78. google.protobuf.Timestamp create_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
  79. // This is used when DocAI was not used to load the document and parsing/
  80. // extracting is needed for the inline_raw_document. For example, if
  81. // inline_raw_document is the byte representation of a PDF file, then
  82. // this should be set to: RAW_DOCUMENT_FILE_TYPE_PDF.
  83. RawDocumentFileType raw_document_file_type = 10;
  84. // If true, makes the document visible to asynchronous policies and rules.
  85. bool async_enabled = 12;
  86. // If true, text extraction will not be performed.
  87. bool text_extraction_disabled = 19;
  88. // The user who creates the document.
  89. string creator = 13;
  90. // The user who lastly updates the document.
  91. string updater = 14;
  92. }
  93. // References to the documents.
  94. message DocumentReference {
  95. // Required. Name of the referenced document.
  96. string document_name = 1 [
  97. (google.api.field_behavior) = REQUIRED,
  98. (google.api.resource_reference) = {
  99. type: "contentwarehouse.googleapis.com/Document"
  100. }
  101. ];
  102. // display_name of the referenced document; this name does not need to be
  103. // consistent to the display_name in the Document proto, depending on the ACL
  104. // constraint.
  105. string display_name = 2;
  106. // Stores the subset of the referenced document's content.
  107. // This is useful to allow user peek the information of the referenced
  108. // document.
  109. string snippet = 3;
  110. // The document type of the document being referenced.
  111. bool document_is_folder = 4;
  112. // Output only. The time when the document is last updated.
  113. google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  114. // Output only. The time when the document is created.
  115. google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
  116. // Output only. The time when the document is deleted.
  117. google.protobuf.Timestamp delete_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
  118. }
  119. // Property of a document.
  120. message Property {
  121. // Required. Must match the name of a PropertyDefinition in the DocumentSchema.
  122. string name = 1 [(google.api.field_behavior) = REQUIRED];
  123. // Type of the property.
  124. // Must match the property_options type of the matching PropertyDefinition.
  125. // Value of the Property parsed into a specific data type.
  126. // Specific type value(s) obtained from Document AIs Property.mention_text
  127. // field.
  128. oneof values {
  129. // Integer property values.
  130. IntegerArray integer_values = 2;
  131. // Float property values.
  132. FloatArray float_values = 3;
  133. // String/text property values.
  134. TextArray text_values = 4;
  135. // Enum property values.
  136. EnumArray enum_values = 5;
  137. // Nested structured data property values.
  138. PropertyArray property_values = 6;
  139. // Date time property values.
  140. // It is not supported by CMEK compliant deployment.
  141. DateTimeArray date_time_values = 7;
  142. // Map property values.
  143. MapProperty map_property = 8;
  144. // Timestamp property values.
  145. // It is not supported by CMEK compliant deployment.
  146. TimestampArray timestamp_values = 9;
  147. }
  148. }
  149. // Integer values.
  150. message IntegerArray {
  151. // List of integer values.
  152. repeated int32 values = 1;
  153. }
  154. // Float values.
  155. message FloatArray {
  156. // List of float values.
  157. repeated float values = 1;
  158. }
  159. // String/text values.
  160. message TextArray {
  161. // List of text values.
  162. repeated string values = 1;
  163. }
  164. // Enum values.
  165. message EnumArray {
  166. // List of enum values.
  167. repeated string values = 1;
  168. }
  169. // DateTime values.
  170. message DateTimeArray {
  171. // List of datetime values.
  172. // Both OffsetDateTime and ZonedDateTime are supported.
  173. repeated google.type.DateTime values = 1;
  174. }
  175. // Timestamp values.
  176. message TimestampArray {
  177. // List of timestamp values.
  178. repeated TimestampValue values = 1;
  179. }
  180. // Timestamp value type.
  181. message TimestampValue {
  182. oneof value {
  183. // Timestamp value
  184. google.protobuf.Timestamp timestamp_value = 1;
  185. // The string must represent a valid instant in UTC and is parsed using
  186. // java.time.format.DateTimeFormatter.ISO_INSTANT.
  187. // e.g. "2013-09-29T18:46:19Z"
  188. string text_value = 2;
  189. }
  190. }
  191. // Property values.
  192. message PropertyArray {
  193. // List of property values.
  194. repeated Property properties = 1;
  195. }
  196. // Map property value.
  197. // Represents a structured entries of key value pairs, consisting of field names
  198. // which map to dynamically typed values.
  199. message MapProperty {
  200. // Unordered map of dynamically typed values.
  201. map<string, Value> fields = 1;
  202. }
  203. // `Value` represents a dynamically typed value which can be either be
  204. // a float, a integer, a string, or a datetime value. A producer of value is
  205. // expected to set one of these variants. Absence of any variant indicates an
  206. // error.
  207. message Value {
  208. // The kind of value.
  209. oneof kind {
  210. // Represents a float value.
  211. float float_value = 1;
  212. // Represents a integer value.
  213. int32 int_value = 2;
  214. // Represents a string value.
  215. string string_value = 3;
  216. // Represents an enum value.
  217. EnumValue enum_value = 4;
  218. // Represents a datetime value.
  219. google.type.DateTime datetime_value = 5;
  220. // Represents a timestamp value.
  221. TimestampValue timestamp_value = 6;
  222. // Represents a boolean value.
  223. bool boolean_value = 7;
  224. }
  225. }
  226. // Represents the string value of the enum field.
  227. message EnumValue {
  228. // String value of the enum field. This must match defined set of enums
  229. // in document schema using EnumTypeOptions.
  230. string value = 1;
  231. }
  232. // When a raw document is supplied, this indicates the file format
  233. enum RawDocumentFileType {
  234. // No raw document specified or it is non-parsable
  235. RAW_DOCUMENT_FILE_TYPE_UNSPECIFIED = 0;
  236. // Adobe PDF format
  237. RAW_DOCUMENT_FILE_TYPE_PDF = 1;
  238. // Microsoft Word format
  239. RAW_DOCUMENT_FILE_TYPE_DOCX = 2;
  240. // Microsoft Excel format
  241. RAW_DOCUMENT_FILE_TYPE_XLSX = 3;
  242. // Microsoft Powerpoint format
  243. RAW_DOCUMENT_FILE_TYPE_PPTX = 4;
  244. // UTF-8 encoded text format
  245. RAW_DOCUMENT_FILE_TYPE_TEXT = 5;
  246. }