| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820 | // Copyright 2022 Google LLC//// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////     http://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.syntax = "proto3";package google.cloud.documentai.v1beta3;import "google/api/field_behavior.proto";import "google/cloud/documentai/v1beta3/barcode.proto";import "google/cloud/documentai/v1beta3/geometry.proto";import "google/protobuf/timestamp.proto";import "google/rpc/status.proto";import "google/type/color.proto";import "google/type/date.proto";import "google/type/datetime.proto";import "google/type/money.proto";import "google/type/postal_address.proto";option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta3;documentai";option java_multiple_files = true;option java_outer_classname = "DocumentProto";option java_package = "com.google.cloud.documentai.v1beta3";option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";option ruby_package = "Google::Cloud::DocumentAI::V1beta3";// Document represents the canonical document resource in Document AI. It is an// interchange format that provides insights into documents and allows for// collaboration between users and Document AI to iterate and optimize for// quality.message Document {  // For a large document, sharding may be performed to produce several  // document shards. Each document shard contains this field to detail which  // shard it is.  message ShardInfo {    // The 0-based index of this shard.    int64 shard_index = 1;    // Total number of shards.    int64 shard_count = 2;    // The index of the first character in [Document.text][google.cloud.documentai.v1beta3.Document.text] in the overall    // document global text.    int64 text_offset = 3;  }  // Annotation for common text style attributes. This adheres to CSS  // conventions as much as possible.  message Style {    // Font size with unit.    message FontSize {      // Font size for the text.      float size = 1;      // Unit for the font size. Follows CSS naming (in, px, pt, etc.).      string unit = 2;    }    // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text].    TextAnchor text_anchor = 1;    // Text color.    google.type.Color color = 2;    // Text background color.    google.type.Color background_color = 3;    // Font weight. Possible values are normal, bold, bolder, and lighter.    // https://www.w3schools.com/cssref/pr_font_weight.asp    string font_weight = 4;    // Text style. Possible values are normal, italic, and oblique.    // https://www.w3schools.com/cssref/pr_font_font-style.asp    string text_style = 5;    // Text decoration. Follows CSS standard.    // <text-decoration-line> <text-decoration-color> <text-decoration-style>    // https://www.w3schools.com/cssref/pr_text_text-decoration.asp    string text_decoration = 6;    // Font size.    FontSize font_size = 7;    // Font family such as `Arial`, `Times New Roman`.    // https://www.w3schools.com/cssref/pr_font_font-family.asp    string font_family = 8;  }  // A page in a [Document][google.cloud.documentai.v1beta3.Document].  message Page {    // Dimension for the page.    message Dimension {      // Page width.      float width = 1;      // Page height.      float height = 2;      // Dimension unit.      string unit = 3;    }    // Rendered image contents for this page.    message Image {      // Raw byte content of the image.      bytes content = 1;      // Encoding mime type for the image.      string mime_type = 2;      // Width of the image in pixels.      int32 width = 3;      // Height of the image in pixels.      int32 height = 4;    }    // Representation for transformation matrix, intended to be compatible and    // used with OpenCV format for image manipulation.    message Matrix {      // Number of rows in the matrix.      int32 rows = 1;      // Number of columns in the matrix.      int32 cols = 2;      // This encodes information about what data type the matrix uses.      // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list      // of OpenCV primitive data types, please refer to      // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html      int32 type = 3;      // The matrix data.      bytes data = 4;    }    // Visual element describing a layout unit on a page.    message Layout {      // Detected human reading orientation.      enum Orientation {        // Unspecified orientation.        ORIENTATION_UNSPECIFIED = 0;        // Orientation is aligned with page up.        PAGE_UP = 1;        // Orientation is aligned with page right.        // Turn the head 90 degrees clockwise from upright to read.        PAGE_RIGHT = 2;        // Orientation is aligned with page down.        // Turn the head 180 degrees from upright to read.        PAGE_DOWN = 3;        // Orientation is aligned with page left.        // Turn the head 90 degrees counterclockwise from upright to read.        PAGE_LEFT = 4;      }      // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text].      TextAnchor text_anchor = 1;      // Confidence of the current [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within context of the object this      // layout is for. e.g. confidence can be for a single token, a table,      // a visual element, etc. depending on context. Range `[0, 1]`.      float confidence = 2;      // The bounding polygon for the [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].      BoundingPoly bounding_poly = 3;      // Detected orientation for the [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].      Orientation orientation = 4;    }    // A block has a set of lines (collected into paragraphs) that have a    // common line-spacing and orientation.    message Block {      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta3.Document.Page.Block].      Layout layout = 1;      // A list of detected languages together with confidence.      repeated DetectedLanguage detected_languages = 2;      // The history of this annotation.      Provenance provenance = 3 [deprecated = true];    }    // A collection of lines that a human would perceive as a paragraph.    message Paragraph {      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta3.Document.Page.Paragraph].      Layout layout = 1;      // A list of detected languages together with confidence.      repeated DetectedLanguage detected_languages = 2;      // The  history of this annotation.      Provenance provenance = 3 [deprecated = true];    }    // A collection of tokens that a human would perceive as a line.    // Does not cross column boundaries, can be horizontal, vertical, etc.    message Line {      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta3.Document.Page.Line].      Layout layout = 1;      // A list of detected languages together with confidence.      repeated DetectedLanguage detected_languages = 2;      // The  history of this annotation.      Provenance provenance = 3 [deprecated = true];    }    // A detected token.    message Token {      // Detected break at the end of a [Token][google.cloud.documentai.v1beta3.Document.Page.Token].      message DetectedBreak {        // Enum to denote the type of break found.        enum Type {          // Unspecified break type.          TYPE_UNSPECIFIED = 0;          // A single whitespace.          SPACE = 1;          // A wider whitespace.          WIDE_SPACE = 2;          // A hyphen that indicates that a token has been split across lines.          HYPHEN = 3;        }        // Detected break type.        Type type = 1;      }      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta3.Document.Page.Token].      Layout layout = 1;      // Detected break at the end of a [Token][google.cloud.documentai.v1beta3.Document.Page.Token].      DetectedBreak detected_break = 2;      // A list of detected languages together with confidence.      repeated DetectedLanguage detected_languages = 3;      // The history of this annotation.      Provenance provenance = 4 [deprecated = true];    }    // A detected symbol.    message Symbol {      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].      Layout layout = 1;      // A list of detected languages together with confidence.      repeated DetectedLanguage detected_languages = 2;    }    // Detected non-text visual elements e.g. checkbox, signature etc. on the    // page.    message VisualElement {      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].      Layout layout = 1;      // Type of the [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement].      string type = 2;      // A list of detected languages together with confidence.      repeated DetectedLanguage detected_languages = 3;    }    // A table representation similar to HTML table structure.    message Table {      // A row of table cells.      message TableRow {        // Cells that make up this row.        repeated TableCell cells = 1;      }      // A cell representation inside the table.      message TableCell {        // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta3.Document.Page.Table.TableCell].        Layout layout = 1;        // How many rows this cell spans.        int32 row_span = 2;        // How many columns this cell spans.        int32 col_span = 3;        // A list of detected languages together with confidence.        repeated DetectedLanguage detected_languages = 4;      }      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta3.Document.Page.Table].      Layout layout = 1;      // Header rows of the table.      repeated TableRow header_rows = 2;      // Body rows of the table.      repeated TableRow body_rows = 3;      // A list of detected languages together with confidence.      repeated DetectedLanguage detected_languages = 4;      // The history of this table.      Provenance provenance = 5;    }    // A form field detected on the page.    message FormField {      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] name. e.g. `Address`, `Email`,      // `Grand total`, `Phone number`, etc.      Layout field_name = 1;      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] value.      Layout field_value = 2;      // A list of detected languages for name together with confidence.      repeated DetectedLanguage name_detected_languages = 3;      // A list of detected languages for value together with confidence.      repeated DetectedLanguage value_detected_languages = 4;      // If the value is non-textual, this field represents the type. Current      // valid values are:      // - blank (this indicates the field_value is normal text)      // - "unfilled_checkbox"      // - "filled_checkbox"      string value_type = 5;      // Created for Labeling UI to export key text.      // If corrections were made to the text identified by the      // `field_name.text_anchor`, this field will contain the correction.      string corrected_key_text = 6;      // Created for Labeling UI to export value text.      // If corrections were made to the text identified by the      // `field_value.text_anchor`, this field will contain the correction.      string corrected_value_text = 7;      // The history of this annotation.      Provenance provenance = 8;    }    // A detected barcode.    message DetectedBarcode {      // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].      Layout layout = 1;      // Detailed barcode information of the [DetectedBarcode][google.cloud.documentai.v1beta3.Document.Page.DetectedBarcode].      Barcode barcode = 2;    }    // Detected language for a structural component.    message DetectedLanguage {      // The BCP-47 language code, such as `en-US` or `sr-Latn`. For more      // information, see      // https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.      string language_code = 1;      // Confidence of detected language. Range `[0, 1]`.      float confidence = 2;    }    // Image Quality Scores for the page image    message ImageQualityScores {      // Image Quality Defects      message DetectedDefect {        // Name of the defect type. Supported values are:        //        // - `quality/defect_blurry`        // - `quality/defect_noisy`        // - `quality/defect_dark`        // - `quality/defect_faint`        // - `quality/defect_text_too_small`        // - `quality/defect_document_cutoff`        // - `quality/defect_text_cutoff`        // - `quality/defect_glare`        string type = 1;        // Confidence of detected defect. Range `[0, 1]` where 1 indicates        // strong confidence of that the defect exists.        float confidence = 2;      }      // The overall quality score. Range `[0, 1]` where 1 is perfect quality.      float quality_score = 1;      // A list of detected defects.      repeated DetectedDefect detected_defects = 2;    }    // 1-based index for current [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent [Document][google.cloud.documentai.v1beta3.Document].    // Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta3.Document] for individual    // processing.    int32 page_number = 1;    // Rendered image for this page. This image is preprocessed to remove any    // skew, rotation, and distortions such that the annotation bounding boxes    // can be upright and axis-aligned.    Image image = 13;    // Transformation matrices that were applied to the original document image    // to produce [Page.image][google.cloud.documentai.v1beta3.Document.Page.image].    repeated Matrix transforms = 14;    // Physical dimension of the page.    Dimension dimension = 2;    // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the page.    Layout layout = 3;    // A list of detected languages together with confidence.    repeated DetectedLanguage detected_languages = 4;    // A list of visually detected text blocks on the page.    // A block has a set of lines (collected into paragraphs) that have a common    // line-spacing and orientation.    repeated Block blocks = 5;    // A list of visually detected text paragraphs on the page.    // A collection of lines that a human would perceive as a paragraph.    repeated Paragraph paragraphs = 6;    // A list of visually detected text lines on the page.    // A collection of tokens that a human would perceive as a line.    repeated Line lines = 7;    // A list of visually detected tokens on the page.    repeated Token tokens = 8;    // A list of detected non-text visual elements e.g. checkbox,    // signature etc. on the page.    repeated VisualElement visual_elements = 9;    // A list of visually detected tables on the page.    repeated Table tables = 10;    // A list of visually detected form fields on the page.    repeated FormField form_fields = 11;    // A list of visually detected symbols on the page.    repeated Symbol symbols = 12;    // A list of detected barcodes.    repeated DetectedBarcode detected_barcodes = 15;    // Image Quality Scores.    ImageQualityScores image_quality_scores = 17;    // The history of this page.    Provenance provenance = 16 [deprecated = true];  }  // An entity that could be a phrase in the text or a property that belongs to  // the document. It is a known entity type, such as a person, an organization,  // or location.  message Entity {    // Parsed and normalized entity value.    message NormalizedValue {      // An optional structured entity value.      // Must match entity type defined in schema if      // known. If this field is present, the `text` field could also be      // populated.      oneof structured_value {        // Money value. See also:        // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto        google.type.Money money_value = 2;        // Date value. Includes year, month, day. See also:        // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto        google.type.Date date_value = 3;        // DateTime value. Includes date, time, and timezone. See also:        // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto        google.type.DateTime datetime_value = 4;        // Postal address. See also:        // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto        google.type.PostalAddress address_value = 5;        // Boolean value. Can be used for entities with binary values, or for        // checkboxes.        bool boolean_value = 6;        // Integer value.        int32 integer_value = 7;        // Float value.        float float_value = 8;      }      // Optional. An optional field to store a normalized string.      // For some entity types, one of respective `structured_value` fields may      // also be populated. Also not all the types of `structured_value` will be      // normalized. For example, some processors may not generate float      // or int normalized text by default.      //      // Below are sample formats mapped to structured values.      //      // - Money/Currency type (`money_value`) is in the ISO 4217 text format.      // - Date type (`date_value`) is in the ISO 8601 text format.      // - Datetime type (`datetime_value`) is in the ISO 8601 text format.      string text = 1 [(google.api.field_behavior) = OPTIONAL];    }    // Optional. Provenance of the entity.    // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text].    TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL];    // Required. Entity type from a schema e.g. `Address`.    string type = 2 [(google.api.field_behavior) = REQUIRED];    // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.    string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];    // Optional. Deprecated.  Use `id` field instead.    string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];    // Optional. Confidence of detected Schema entity. Range `[0, 1]`.    float confidence = 5 [(google.api.field_behavior) = OPTIONAL];    // Optional. Represents the provenance of this entity wrt. the location on the    // page where it was found.    PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];    // Optional. Canonical id. This will be a unique value in the entity list    // for this document.    string id = 7 [(google.api.field_behavior) = OPTIONAL];    // Optional. Normalized entity value. Absent if the extracted value could not be    // converted or the type (e.g. address) is not supported for certain    // parsers. This field is also only populated for certain supported document    // types.    NormalizedValue normalized_value = 9 [(google.api.field_behavior) = OPTIONAL];    // Optional. Entities can be nested to form a hierarchical data structure representing    // the content in the document.    repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL];    // Optional. The history of this annotation.    Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL];    // Optional. Whether the entity will be redacted for de-identification purposes.    bool redacted = 12 [(google.api.field_behavior) = OPTIONAL];  }  // Relationship between [Entities][google.cloud.documentai.v1beta3.Document.Entity].  message EntityRelation {    // Subject entity id.    string subject_id = 1;    // Object entity id.    string object_id = 2;    // Relationship description.    string relation = 3;  }  // Text reference indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text].  message TextAnchor {    // A text segment in the [Document.text][google.cloud.documentai.v1beta3.Document.text]. The indices may be out of bounds    // which indicate that the text extends into another document shard for    // large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1beta3.Document.ShardInfo.text_offset]    message TextSegment {      // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1beta3.Document.text].      int64 start_index = 1;      // [TextSegment][google.cloud.documentai.v1beta3.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the      // [Document.text][google.cloud.documentai.v1beta3.Document.text].      int64 end_index = 2;    }    // The text segments from the [Document.text][google.cloud.documentai.v1beta3.Document.text].    repeated TextSegment text_segments = 1;    // Contains the content of the text span so that users do    // not have to look it up in the text_segments.  It is always    // populated for formFields.    string content = 2;  }  // Referencing the visual context of the entity in the [Document.pages][google.cloud.documentai.v1beta3.Document.pages].  // Page anchors can be cross-page, consist of multiple bounding polygons and  // optionally reference specific layout element types.  message PageAnchor {    // Represents a weak reference to a page element within a document.    message PageRef {      // The type of layout that is being referenced.      enum LayoutType {        // Layout Unspecified.        LAYOUT_TYPE_UNSPECIFIED = 0;        // References a [Page.blocks][google.cloud.documentai.v1beta3.Document.Page.blocks] element.        BLOCK = 1;        // References a [Page.paragraphs][google.cloud.documentai.v1beta3.Document.Page.paragraphs] element.        PARAGRAPH = 2;        // References a [Page.lines][google.cloud.documentai.v1beta3.Document.Page.lines] element.        LINE = 3;        // References a [Page.tokens][google.cloud.documentai.v1beta3.Document.Page.tokens] element.        TOKEN = 4;        // References a [Page.visual_elements][google.cloud.documentai.v1beta3.Document.Page.visual_elements] element.        VISUAL_ELEMENT = 5;        // Refrrences a [Page.tables][google.cloud.documentai.v1beta3.Document.Page.tables] element.        TABLE = 6;        // References a [Page.form_fields][google.cloud.documentai.v1beta3.Document.Page.form_fields] element.        FORM_FIELD = 7;      }      // Required. Index into the [Document.pages][google.cloud.documentai.v1beta3.Document.pages] element, for example using      // [Document.pages][page_refs.page] to locate the related page element.      // This field is skipped when its value is the default 0. See      // https://developers.google.com/protocol-buffers/docs/proto3#json.      int64 page = 1 [(google.api.field_behavior) = REQUIRED];      // Optional. The type of the layout element that is being referenced if any.      LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];      // Optional. Deprecated.  Use [PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly] instead.      string layout_id = 3 [        deprecated = true,        (google.api.field_behavior) = OPTIONAL      ];      // Optional. Identifies the bounding polygon of a layout element on the page.      BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];      // Optional. Confidence of detected page element, if applicable. Range `[0, 1]`.      float confidence = 5 [(google.api.field_behavior) = OPTIONAL];    }    // One or more references to visual page elements    repeated PageRef page_refs = 1;  }  // Structure to identify provenance relationships between annotations in  // different revisions.  message Provenance {    // The parent element the current element is based on. Used for    // referencing/aligning, removal and replacement operations.    message Parent {      // The index of the index into current revision's parent_ids list.      int32 revision = 1;      // The index of the parent item in the corresponding item list (eg. list      // of entities, properties within entities, etc.) in the parent revision.      int32 index = 3;      // The id of the parent provenance.      int32 id = 2 [deprecated = true];    }    // If a processor or agent does an explicit operation on existing elements.    enum OperationType {      // Operation type unspecified. If no operation is specified a provenance      // entry is simply used to match against a `parent`.      OPERATION_TYPE_UNSPECIFIED = 0;      // Add an element.      ADD = 1;      // Remove an element identified by `parent`.      REMOVE = 2;      // Replace an element identified by `parent`.      REPLACE = 3;      // Request human review for the element identified by `parent`.      EVAL_REQUESTED = 4;      // Element is reviewed and approved at human review, confidence will be      // set to 1.0.      EVAL_APPROVED = 5;      // Element is skipped in the validation process.      EVAL_SKIPPED = 6;    }    // The index of the revision that produced this element.    int32 revision = 1;    // The Id of this operation.  Needs to be unique within the scope of the    // revision.    int32 id = 2 [deprecated = true];    // References to the original elements that are replaced.    repeated Parent parents = 3;    // The type of provenance operation.    OperationType type = 4;  }  // Contains past or forward revisions of this document.  message Revision {    // Human Review information of the document.    message HumanReview {      // Human review state. e.g. `requested`, `succeeded`, `rejected`.      string state = 1;      // A message providing more details about the current state of processing.      // For example, the rejection reason when the state is `rejected`.      string state_message = 2;    }    // Who/what made the change    oneof source {      // If the change was made by a person specify the name or id of that      // person.      string agent = 4;      // If the annotation was made by processor identify the processor by its      // resource name.      string processor = 5;    }    // Id of the revision.  Unique within the context of the document.    string id = 1;    // The revisions that this revision is based on.  This can include one or    // more parent (when documents are merged.)  This field represents the    // index into the `revisions` field.    repeated int32 parent = 2 [deprecated = true];    // The revisions that this revision is based on. Must include all the ids    // that have anything to do with this revision - eg. there are    // `provenance.parent.revision` fields that index into this field.    repeated string parent_ids = 7;    // The time that the revision was created.    google.protobuf.Timestamp create_time = 3;    // Human Review information of this revision.    HumanReview human_review = 6;  }  // This message is used for text changes aka. OCR corrections.  message TextChange {    // Provenance of the correction.    // Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text].  There can only be a    // single `TextAnchor.text_segments` element.  If the start and    // end index of the text segment are the same, the text change is inserted    // before that index.    TextAnchor text_anchor = 1;    // The text that replaces the text identified in the `text_anchor`.    string changed_text = 2;    // The history of this annotation.    repeated Provenance provenance = 3 [deprecated = true];  }  // Original source document from the user.  oneof source {    // Optional. Currently supports Google Cloud Storage URI of the form    //    `gs://bucket_name/object_name`. Object versioning is not supported.    //    See [Google Cloud Storage Request    //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more    //    info.    string uri = 1 [(google.api.field_behavior) = OPTIONAL];    // Optional. Inline document content, represented as a stream of bytes.    // Note: As with all `bytes` fields, protobuffers use a pure binary    // representation, whereas JSON representations use base64.    bytes content = 2 [(google.api.field_behavior) = OPTIONAL];  }  // An IANA published MIME type (also referred to as media type). For more  // information, see  // https://www.iana.org/assignments/media-types/media-types.xhtml.  string mime_type = 3;  // Optional. UTF-8 encoded text in reading order from the document.  string text = 4 [(google.api.field_behavior) = OPTIONAL];  // Placeholder.  Styles for the [Document.text][google.cloud.documentai.v1beta3.Document.text].  repeated Style text_styles = 5;  // Visual page layout for the [Document][google.cloud.documentai.v1beta3.Document].  repeated Page pages = 6;  // A list of entities detected on [Document.text][google.cloud.documentai.v1beta3.Document.text]. For document shards,  // entities in this list may cross shard boundaries.  repeated Entity entities = 7;  // Placeholder.  Relationship among [Document.entities][google.cloud.documentai.v1beta3.Document.entities].  repeated EntityRelation entity_relations = 8;  // Placeholder.  A list of text corrections made to [Document.text][google.cloud.documentai.v1beta3.Document.text].  This  // is usually used for annotating corrections to OCR mistakes.  Text changes  // for a given revision may not overlap with each other.  repeated TextChange text_changes = 14;  // Information about the sharding if this document is sharded part of a larger  // document. If the document is not sharded, this message is not specified.  ShardInfo shard_info = 9;  // Any error that occurred while processing this document.  google.rpc.Status error = 10;  // Placeholder. Revision history of this document.  repeated Revision revisions = 13;}
 |