document_understanding.proto 11 KB


  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.documentai.v1beta1;
  17. import "google/api/annotations.proto";
  18. import "google/api/client.proto";
  19. import "google/api/field_behavior.proto";
  20. import "google/cloud/documentai/v1beta1/geometry.proto";
  21. import "google/longrunning/operations.proto";
  22. import "google/protobuf/timestamp.proto";
  23. option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta1";
  24. option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai";
  25. option java_multiple_files = true;
  26. option java_outer_classname = "DocumentAiProto";
  27. option java_package = "com.google.cloud.documentai.v1beta1";
  28. option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta1";
  29. option ruby_package = "Google::Cloud::DocumentAI::V1beta1";
  30. // Service to parse structured information from unstructured or semi-structured
  31. // documents using state-of-the-art Google AI such as natural language,
  32. // computer vision, and translation.
  33. service DocumentUnderstandingService {
  34. option (google.api.default_host) = "documentai.googleapis.com";
  35. option (google.api.oauth_scopes) =
  36. "https://www.googleapis.com/auth/cloud-platform";
  37. // LRO endpoint to batch process many documents.
  38. rpc BatchProcessDocuments(BatchProcessDocumentsRequest)
  39. returns (google.longrunning.Operation) {
  40. option (google.api.http) = {
  41. post: "/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess"
  42. body: "*"
  43. additional_bindings {
  44. post: "/v1beta1/{parent=projects/*}/documents:batchProcess"
  45. body: "*"
  46. }
  47. };
  48. option (google.api.method_signature) = "requests";
  49. option (google.longrunning.operation_info) = {
  50. response_type: "BatchProcessDocumentsResponse"
  51. metadata_type: "OperationMetadata"
  52. };
  53. }
  54. }
  55. // Request to batch process documents as an asynchronous operation.
  56. message BatchProcessDocumentsRequest {
  57. // Required. Individual requests for each document.
  58. repeated ProcessDocumentRequest requests = 1
  59. [(google.api.field_behavior) = REQUIRED];
  60. // Target project and location to make a call.
  61. //
  62. // Format: `projects/{project-id}/locations/{location-id}`.
  63. //
  64. // If no location is specified, a region will be chosen automatically.
  65. string parent = 2;
  66. }
  67. // Request to process one document.
  68. message ProcessDocumentRequest {
  69. // Required. Information about the input file.
  70. InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED];
  71. // Required. The desired output location.
  72. OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
  73. // Specifies a known document type for deeper structure detection. Valid
  74. // values are currently "general" and "invoice". If not provided, "general"\
  75. // is used as default. If any other value is given, the request is rejected.
  76. string document_type = 3;
  77. // Controls table extraction behavior. If not specified, the system will
  78. // decide reasonable defaults.
  79. TableExtractionParams table_extraction_params = 4;
  80. // Controls form extraction behavior. If not specified, the system will
  81. // decide reasonable defaults.
  82. FormExtractionParams form_extraction_params = 5;
  83. // Controls entity extraction behavior. If not specified, the system will
  84. // decide reasonable defaults.
  85. EntityExtractionParams entity_extraction_params = 6;
  86. // Controls OCR behavior. If not specified, the system will decide reasonable
  87. // defaults.
  88. OcrParams ocr_params = 7;
  89. }
  90. // Response to an batch document processing request. This is returned in
  91. // the LRO Operation after the operation is complete.
  92. message BatchProcessDocumentsResponse {
  93. // Responses for each individual document.
  94. repeated ProcessDocumentResponse responses = 1;
  95. }
  96. // Response to a single document processing request.
  97. message ProcessDocumentResponse {
  98. // Information about the input file. This is the same as the corresponding
  99. // input config in the request.
  100. InputConfig input_config = 1;
  101. // The output location of the parsed responses. The responses are written to
  102. // this location as JSON-serialized `Document` objects.
  103. OutputConfig output_config = 2;
  104. }
  105. // Parameters to control Optical Character Recognition (OCR) behavior.
  106. message OcrParams {
  107. // List of languages to use for OCR. In most cases, an empty value
  108. // yields the best results since it enables automatic language detection. For
  109. // languages based on the Latin alphabet, setting `language_hints` is not
  110. // needed. In rare cases, when the language of the text in the image is known,
  111. // setting a hint will help get better results (although it will be a
  112. // significant hindrance if the hint is wrong). Document processing returns an
  113. // error if one or more of the specified languages is not one of the
  114. // supported languages.
  115. repeated string language_hints = 1;
  116. }
  117. // Parameters to control table extraction behavior.
  118. message TableExtractionParams {
  119. // Whether to enable table extraction.
  120. bool enabled = 1;
  121. // Optional. Table bounding box hints that can be provided to complex cases
  122. // which our algorithm cannot locate the table(s) in.
  123. repeated TableBoundHint table_bound_hints = 2
  124. [(google.api.field_behavior) = OPTIONAL];
  125. // Optional. Table header hints. The extraction will bias towards producing
  126. // these terms as table headers, which may improve accuracy.
  127. repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL];
  128. // Model version of the table extraction system. Default is "builtin/stable".
  129. // Specify "builtin/latest" for the latest model.
  130. string model_version = 4;
  131. }
  132. // A hint for a table bounding box on the page for table parsing.
  133. message TableBoundHint {
  134. // Optional. Page number for multi-paged inputs this hint applies to. If not
  135. // provided, this hint will apply to all pages by default. This value is
  136. // 1-based.
  137. int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL];
  138. // Bounding box hint for a table on this page. The coordinates must be
  139. // normalized to [0,1] and the bounding box must be an axis-aligned rectangle.
  140. BoundingPoly bounding_box = 2;
  141. }
  142. // Parameters to control form extraction behavior.
  143. message FormExtractionParams {
  144. // Whether to enable form extraction.
  145. bool enabled = 1;
  146. // User can provide pairs of (key text, value type) to improve the parsing
  147. // result.
  148. //
  149. // For example, if a document has a field called "Date" that holds a date
  150. // value and a field called "Amount" that may hold either a currency value
  151. // (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the
  152. // following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key":
  153. // "Amount", "value_types": [ "PRICE", "NUMBER" ]} ]
  154. //
  155. // If the value type is unknown, but you want to provide hints for the keys,
  156. // you can leave the value_types field blank. e.g. {"key": "Date",
  157. // "value_types": []}
  158. repeated KeyValuePairHint key_value_pair_hints = 2;
  159. // Model version of the form extraction system. Default is
  160. // "builtin/stable". Specify "builtin/latest" for the latest model.
  161. string model_version = 3;
  162. }
  163. // User-provided hint for key value pair.
  164. message KeyValuePairHint {
  165. // The key text for the hint.
  166. string key = 1;
  167. // Type of the value. This is case-insensitive, and could be one of:
  168. // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER,
  169. // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will
  170. // be ignored.
  171. repeated string value_types = 2;
  172. }
  173. // Parameters to control entity extraction behavior.
  174. message EntityExtractionParams {
  175. // Whether to enable entity extraction.
  176. bool enabled = 1;
  177. // Model version of the entity extraction. Default is
  178. // "builtin/stable". Specify "builtin/latest" for the latest model.
  179. string model_version = 2;
  180. }
  181. // The desired input location and metadata.
  182. message InputConfig {
  183. // Required.
  184. oneof source {
  185. // The Google Cloud Storage location to read the input from. This must be a
  186. // single file.
  187. GcsSource gcs_source = 1;
  188. }
  189. // Required. Mimetype of the input. Current supported mimetypes are
  190. // application/pdf, image/tiff, and image/gif.
  191. string mime_type = 2 [(google.api.field_behavior) = REQUIRED];
  192. }
  193. // The desired output location and metadata.
  194. message OutputConfig {
  195. // Required.
  196. oneof destination {
  197. // The Google Cloud Storage location to write the output to.
  198. GcsDestination gcs_destination = 1;
  199. }
  200. // The max number of pages to include into each output Document shard JSON on
  201. // Google Cloud Storage.
  202. //
  203. // The valid range is [1, 100]. If not specified, the default value is 20.
  204. //
  205. // For example, for one pdf file with 100 pages, 100 parsed pages will be
  206. // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each
  207. // containing 20 parsed pages will be written under the prefix
  208. // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where
  209. // x and y are 1-indexed page numbers.
  210. //
  211. // Example GCS outputs with 157 pages and pages_per_shard = 50:
  212. //
  213. // <prefix>pages-001-to-050.json
  214. // <prefix>pages-051-to-100.json
  215. // <prefix>pages-101-to-150.json
  216. // <prefix>pages-151-to-157.json
  217. int32 pages_per_shard = 2;
  218. }
  219. // The Google Cloud Storage location where the input file will be read from.
  220. message GcsSource {
  221. string uri = 1 [(google.api.field_behavior) = REQUIRED];
  222. }
  223. // The Google Cloud Storage location where the output file will be written to.
  224. message GcsDestination {
  225. string uri = 1 [(google.api.field_behavior) = REQUIRED];
  226. }
  227. // Contains metadata for the BatchProcessDocuments operation.
  228. message OperationMetadata {
  229. enum State {
  230. // The default value. This value is used if the state is omitted.
  231. STATE_UNSPECIFIED = 0;
  232. // Request is received.
  233. ACCEPTED = 1;
  234. // Request operation is waiting for scheduling.
  235. WAITING = 2;
  236. // Request is being processed.
  237. RUNNING = 3;
  238. // The batch processing completed successfully.
  239. SUCCEEDED = 4;
  240. // The batch processing was cancelled.
  241. CANCELLED = 5;
  242. // The batch processing has failed.
  243. FAILED = 6;
  244. }
  245. // The state of the current batch processing.
  246. State state = 1;
  247. // A message providing more details about the current state of processing.
  248. string state_message = 2;
  249. // The creation time of the operation.
  250. google.protobuf.Timestamp create_time = 3;
  251. // The last update time of the operation.
  252. google.protobuf.Timestamp update_time = 4;
  253. }