123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 |
- // Copyright 2020 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.documentai.v1beta2;
- import "google/api/annotations.proto";
- import "google/api/client.proto";
- import "google/api/field_behavior.proto";
- import "google/cloud/documentai/v1beta2/document.proto";
- import "google/cloud/documentai/v1beta2/geometry.proto";
- import "google/longrunning/operations.proto";
- import "google/protobuf/timestamp.proto";
- option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta2";
- option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
- option java_multiple_files = true;
- option java_outer_classname = "DocumentAiProto";
- option java_package = "com.google.cloud.documentai.v1beta2";
- option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta2";
- option ruby_package = "Google::Cloud::DocumentAI::V1beta2";
- // Service to parse structured information from unstructured or semi-structured
- // documents using state-of-the-art Google AI such as natural language,
- // computer vision, and translation.
- service DocumentUnderstandingService {
- option (google.api.default_host) = "documentai.googleapis.com";
- option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
- // LRO endpoint to batch process many documents. The output is written
- // to Cloud Storage as JSON in the [Document] format.
- rpc BatchProcessDocuments(BatchProcessDocumentsRequest) returns (google.longrunning.Operation) {
- option (google.api.http) = {
- post: "/v1beta2/{parent=projects/*/locations/*}/documents:batchProcess"
- body: "*"
- additional_bindings {
- post: "/v1beta2/{parent=projects/*}/documents:batchProcess"
- body: "*"
- }
- };
- option (google.api.method_signature) = "requests";
- option (google.longrunning.operation_info) = {
- response_type: "BatchProcessDocumentsResponse"
- metadata_type: "OperationMetadata"
- };
- }
- // Processes a single document.
- rpc ProcessDocument(ProcessDocumentRequest) returns (Document) {
- option (google.api.http) = {
- post: "/v1beta2/{parent=projects/*/locations/*}/documents:process"
- body: "*"
- additional_bindings {
- post: "/v1beta2/{parent=projects/*}/documents:process"
- body: "*"
- }
- };
- }
- }
- // Request to batch process documents as an asynchronous operation. The output
- // is written to Cloud Storage as JSON in the [Document] format.
- message BatchProcessDocumentsRequest {
- // Required. Individual requests for each document.
- repeated ProcessDocumentRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
- // Target project and location to make a call.
- //
- // Format: `projects/{project-id}/locations/{location-id}`.
- //
- // If no location is specified, a region will be chosen automatically.
- string parent = 2;
- }
- // Request to process one document.
- message ProcessDocumentRequest {
- // Target project and location to make a call.
- //
- // Format: `projects/{project-id}/locations/{location-id}`.
- //
- // If no location is specified, a region will be chosen automatically.
- // This field is only populated when used in ProcessDocument method.
- string parent = 9;
- // Required. Information about the input file.
- InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED];
- // Optional. The desired output location. This field is only needed in
- // BatchProcessDocumentsRequest.
- OutputConfig output_config = 2 [(google.api.field_behavior) = OPTIONAL];
- // Specifies a known document type for deeper structure detection. Valid
- // values are currently "general" and "invoice". If not provided, "general"\
- // is used as default. If any other value is given, the request is rejected.
- string document_type = 3;
- // Controls table extraction behavior. If not specified, the system will
- // decide reasonable defaults.
- TableExtractionParams table_extraction_params = 4;
- // Controls form extraction behavior. If not specified, the system will
- // decide reasonable defaults.
- FormExtractionParams form_extraction_params = 5;
- // Controls entity extraction behavior. If not specified, the system will
- // decide reasonable defaults.
- EntityExtractionParams entity_extraction_params = 6;
- // Controls OCR behavior. If not specified, the system will decide reasonable
- // defaults.
- OcrParams ocr_params = 7;
- // Controls AutoML model prediction behavior. AutoMlParams cannot be used
- // together with other Params.
- AutoMlParams automl_params = 8;
- }
- // Response to an batch document processing request. This is returned in
- // the LRO Operation after the operation is complete.
- message BatchProcessDocumentsResponse {
- // Responses for each individual document.
- repeated ProcessDocumentResponse responses = 1;
- }
- // Response to a single document processing request.
- message ProcessDocumentResponse {
- // Information about the input file. This is the same as the corresponding
- // input config in the request.
- InputConfig input_config = 1;
- // The output location of the parsed responses. The responses are written to
- // this location as JSON-serialized `Document` objects.
- OutputConfig output_config = 2;
- }
- // Parameters to control Optical Character Recognition (OCR) behavior.
- message OcrParams {
- // List of languages to use for OCR. In most cases, an empty value
- // yields the best results since it enables automatic language detection. For
- // languages based on the Latin alphabet, setting `language_hints` is not
- // needed. In rare cases, when the language of the text in the image is known,
- // setting a hint will help get better results (although it will be a
- // significant hindrance if the hint is wrong). Document processing returns an
- // error if one or more of the specified languages is not one of the
- // supported languages.
- repeated string language_hints = 1;
- }
- // Parameters to control table extraction behavior.
- message TableExtractionParams {
- // Whether to enable table extraction.
- bool enabled = 1;
- // Optional. Table bounding box hints that can be provided to complex cases
- // which our algorithm cannot locate the table(s) in.
- repeated TableBoundHint table_bound_hints = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Table header hints. The extraction will bias towards producing
- // these terms as table headers, which may improve accuracy.
- repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL];
- // Model version of the table extraction system. Default is "builtin/stable".
- // Specify "builtin/latest" for the latest model.
- string model_version = 4;
- }
- // A hint for a table bounding box on the page for table parsing.
- message TableBoundHint {
- // Optional. Page number for multi-paged inputs this hint applies to. If not
- // provided, this hint will apply to all pages by default. This value is
- // 1-based.
- int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL];
- // Bounding box hint for a table on this page. The coordinates must be
- // normalized to [0,1] and the bounding box must be an axis-aligned rectangle.
- BoundingPoly bounding_box = 2;
- }
- // Parameters to control form extraction behavior.
- message FormExtractionParams {
- // Whether to enable form extraction.
- bool enabled = 1;
- // User can provide pairs of (key text, value type) to improve the parsing
- // result.
- //
- // For example, if a document has a field called "Date" that holds a date
- // value and a field called "Amount" that may hold either a currency value
- // (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the
- // following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key":
- // "Amount", "value_types": [ "PRICE", "NUMBER" ]} ]
- //
- // If the value type is unknown, but you want to provide hints for the keys,
- // you can leave the value_types field blank. e.g. {"key": "Date",
- // "value_types": []}
- repeated KeyValuePairHint key_value_pair_hints = 2;
- // Model version of the form extraction system. Default is
- // "builtin/stable". Specify "builtin/latest" for the latest model.
- // For custom form models, specify: “custom/{model_name}". Model name
- // format is "bucket_name/path/to/modeldir" corresponding to
- // "gs://bucket_name/path/to/modeldir" where annotated examples are stored.
- string model_version = 3;
- }
- // User-provided hint for key value pair.
- message KeyValuePairHint {
- // The key text for the hint.
- string key = 1;
- // Type of the value. This is case-insensitive, and could be one of:
- // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER,
- // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will
- // be ignored.
- repeated string value_types = 2;
- }
- // Parameters to control entity extraction behavior.
- message EntityExtractionParams {
- // Whether to enable entity extraction.
- bool enabled = 1;
- // Model version of the entity extraction. Default is
- // "builtin/stable". Specify "builtin/latest" for the latest model.
- string model_version = 2;
- }
- // Parameters to control AutoML model prediction behavior.
- message AutoMlParams {
- // Resource name of the AutoML model.
- //
- // Format: `projects/{project-id}/locations/{location-id}/models/{model-id}`.
- string model = 1;
- }
- // The desired input location and metadata.
- message InputConfig {
- // Required.
- oneof source {
- // The Google Cloud Storage location to read the input from. This must be a
- // single file.
- GcsSource gcs_source = 1;
- // Content in bytes, represented as a stream of bytes.
- // Note: As with all `bytes` fields, proto buffer messages use a pure binary
- // representation, whereas JSON representations use base64.
- //
- // This field only works for synchronous ProcessDocument method.
- bytes contents = 3;
- }
- // Required. Mimetype of the input. Current supported mimetypes are application/pdf,
- // image/tiff, and image/gif.
- // In addition, application/json type is supported for requests with
- // [ProcessDocumentRequest.automl_params][google.cloud.documentai.v1beta2.ProcessDocumentRequest.automl_params] field set. The JSON file needs to
- // be in [Document][google.cloud.documentai.v1beta2.Document] format.
- string mime_type = 2 [(google.api.field_behavior) = REQUIRED];
- }
- // The desired output location and metadata.
- message OutputConfig {
- // Required.
- oneof destination {
- // The Google Cloud Storage location to write the output to.
- GcsDestination gcs_destination = 1;
- }
- // The max number of pages to include into each output Document shard JSON on
- // Google Cloud Storage.
- //
- // The valid range is [1, 100]. If not specified, the default value is 20.
- //
- // For example, for one pdf file with 100 pages, 100 parsed pages will be
- // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each
- // containing 20 parsed pages will be written under the prefix
- // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where
- // x and y are 1-indexed page numbers.
- //
- // Example GCS outputs with 157 pages and pages_per_shard = 50:
- //
- // <prefix>pages-001-to-050.json
- // <prefix>pages-051-to-100.json
- // <prefix>pages-101-to-150.json
- // <prefix>pages-151-to-157.json
- int32 pages_per_shard = 2;
- }
- // The Google Cloud Storage location where the input file will be read from.
- message GcsSource {
- string uri = 1 [(google.api.field_behavior) = REQUIRED];
- }
- // The Google Cloud Storage location where the output file will be written to.
- message GcsDestination {
- string uri = 1 [(google.api.field_behavior) = REQUIRED];
- }
- // Contains metadata for the BatchProcessDocuments operation.
- message OperationMetadata {
- enum State {
- // The default value. This value is used if the state is omitted.
- STATE_UNSPECIFIED = 0;
- // Request is received.
- ACCEPTED = 1;
- // Request operation is waiting for scheduling.
- WAITING = 2;
- // Request is being processed.
- RUNNING = 3;
- // The batch processing completed successfully.
- SUCCEEDED = 4;
- // The batch processing was cancelled.
- CANCELLED = 5;
- // The batch processing has failed.
- FAILED = 6;
- }
- // The state of the current batch processing.
- State state = 1;
- // A message providing more details about the current state of processing.
- string state_message = 2;
- // The creation time of the operation.
- google.protobuf.Timestamp create_time = 3;
- // The last update time of the operation.
- google.protobuf.Timestamp update_time = 4;
- }
|