document.proto 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.documentai.v1;
  16. import "google/api/field_behavior.proto";
  17. import "google/cloud/documentai/v1/barcode.proto";
  18. import "google/cloud/documentai/v1/geometry.proto";
  19. import "google/protobuf/timestamp.proto";
  20. import "google/rpc/status.proto";
  21. import "google/type/color.proto";
  22. import "google/type/date.proto";
  23. import "google/type/datetime.proto";
  24. import "google/type/money.proto";
  25. import "google/type/postal_address.proto";
  26. option csharp_namespace = "Google.Cloud.DocumentAI.V1";
  27. option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1;documentai";
  28. option java_multiple_files = true;
  29. option java_outer_classname = "DocumentProto";
  30. option java_package = "com.google.cloud.documentai.v1";
  31. option php_namespace = "Google\\Cloud\\DocumentAI\\V1";
  32. option ruby_package = "Google::Cloud::DocumentAI::V1";
  33. // Document represents the canonical document resource in Document AI. It is an
  34. // interchange format that provides insights into documents and allows for
  35. // collaboration between users and Document AI to iterate and optimize for
  36. // quality.
  37. message Document {
  38. // For a large document, sharding may be performed to produce several
  39. // document shards. Each document shard contains this field to detail which
  40. // shard it is.
  41. message ShardInfo {
  42. // The 0-based index of this shard.
  43. int64 shard_index = 1;
  44. // Total number of shards.
  45. int64 shard_count = 2;
  46. // The index of the first character in [Document.text][google.cloud.documentai.v1.Document.text] in the overall
  47. // document global text.
  48. int64 text_offset = 3;
  49. }
  50. // Annotation for common text style attributes. This adheres to CSS
  51. // conventions as much as possible.
  52. message Style {
  53. // Font size with unit.
  54. message FontSize {
  55. // Font size for the text.
  56. float size = 1;
  57. // Unit for the font size. Follows CSS naming (in, px, pt, etc.).
  58. string unit = 2;
  59. }
  60. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
  61. TextAnchor text_anchor = 1;
  62. // Text color.
  63. google.type.Color color = 2;
  64. // Text background color.
  65. google.type.Color background_color = 3;
  66. // Font weight. Possible values are normal, bold, bolder, and lighter.
  67. // https://www.w3schools.com/cssref/pr_font_weight.asp
  68. string font_weight = 4;
  69. // Text style. Possible values are normal, italic, and oblique.
  70. // https://www.w3schools.com/cssref/pr_font_font-style.asp
  71. string text_style = 5;
  72. // Text decoration. Follows CSS standard.
  73. // <text-decoration-line> <text-decoration-color> <text-decoration-style>
  74. // https://www.w3schools.com/cssref/pr_text_text-decoration.asp
  75. string text_decoration = 6;
  76. // Font size.
  77. FontSize font_size = 7;
  78. // Font family such as `Arial`, `Times New Roman`.
  79. // https://www.w3schools.com/cssref/pr_font_font-family.asp
  80. string font_family = 8;
  81. }
  82. // A page in a [Document][google.cloud.documentai.v1.Document].
  83. message Page {
  84. // Dimension for the page.
  85. message Dimension {
  86. // Page width.
  87. float width = 1;
  88. // Page height.
  89. float height = 2;
  90. // Dimension unit.
  91. string unit = 3;
  92. }
  93. // Rendered image contents for this page.
  94. message Image {
  95. // Raw byte content of the image.
  96. bytes content = 1;
  97. // Encoding mime type for the image.
  98. string mime_type = 2;
  99. // Width of the image in pixels.
  100. int32 width = 3;
  101. // Height of the image in pixels.
  102. int32 height = 4;
  103. }
  104. // Representation for transformation matrix, intended to be compatible and
  105. // used with OpenCV format for image manipulation.
  106. message Matrix {
  107. // Number of rows in the matrix.
  108. int32 rows = 1;
  109. // Number of columns in the matrix.
  110. int32 cols = 2;
  111. // This encodes information about what data type the matrix uses.
  112. // For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list
  113. // of OpenCV primitive data types, please refer to
  114. // https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html
  115. int32 type = 3;
  116. // The matrix data.
  117. bytes data = 4;
  118. }
  119. // Visual element describing a layout unit on a page.
  120. message Layout {
  121. // Detected human reading orientation.
  122. enum Orientation {
  123. // Unspecified orientation.
  124. ORIENTATION_UNSPECIFIED = 0;
  125. // Orientation is aligned with page up.
  126. PAGE_UP = 1;
  127. // Orientation is aligned with page right.
  128. // Turn the head 90 degrees clockwise from upright to read.
  129. PAGE_RIGHT = 2;
  130. // Orientation is aligned with page down.
  131. // Turn the head 180 degrees from upright to read.
  132. PAGE_DOWN = 3;
  133. // Orientation is aligned with page left.
  134. // Turn the head 90 degrees counterclockwise from upright to read.
  135. PAGE_LEFT = 4;
  136. }
  137. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
  138. TextAnchor text_anchor = 1;
  139. // Confidence of the current [Layout][google.cloud.documentai.v1.Document.Page.Layout] within context of the object this
  140. // layout is for. e.g. confidence can be for a single token, a table,
  141. // a visual element, etc. depending on context. Range `[0, 1]`.
  142. float confidence = 2;
  143. // The bounding polygon for the [Layout][google.cloud.documentai.v1.Document.Page.Layout].
  144. BoundingPoly bounding_poly = 3;
  145. // Detected orientation for the [Layout][google.cloud.documentai.v1.Document.Page.Layout].
  146. Orientation orientation = 4;
  147. }
  148. // A block has a set of lines (collected into paragraphs) that have a
  149. // common line-spacing and orientation.
  150. message Block {
  151. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Block][google.cloud.documentai.v1.Document.Page.Block].
  152. Layout layout = 1;
  153. // A list of detected languages together with confidence.
  154. repeated DetectedLanguage detected_languages = 2;
  155. // The history of this annotation.
  156. Provenance provenance = 3 [deprecated = true];
  157. }
  158. // A collection of lines that a human would perceive as a paragraph.
  159. message Paragraph {
  160. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1.Document.Page.Paragraph].
  161. Layout layout = 1;
  162. // A list of detected languages together with confidence.
  163. repeated DetectedLanguage detected_languages = 2;
  164. // The history of this annotation.
  165. Provenance provenance = 3 [deprecated = true];
  166. }
  167. // A collection of tokens that a human would perceive as a line.
  168. // Does not cross column boundaries, can be horizontal, vertical, etc.
  169. message Line {
  170. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Line][google.cloud.documentai.v1.Document.Page.Line].
  171. Layout layout = 1;
  172. // A list of detected languages together with confidence.
  173. repeated DetectedLanguage detected_languages = 2;
  174. // The history of this annotation.
  175. Provenance provenance = 3 [deprecated = true];
  176. }
  177. // A detected token.
  178. message Token {
  179. // Detected break at the end of a [Token][google.cloud.documentai.v1.Document.Page.Token].
  180. message DetectedBreak {
  181. // Enum to denote the type of break found.
  182. enum Type {
  183. // Unspecified break type.
  184. TYPE_UNSPECIFIED = 0;
  185. // A single whitespace.
  186. SPACE = 1;
  187. // A wider whitespace.
  188. WIDE_SPACE = 2;
  189. // A hyphen that indicates that a token has been split across lines.
  190. HYPHEN = 3;
  191. }
  192. // Detected break type.
  193. Type type = 1;
  194. }
  195. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Token][google.cloud.documentai.v1.Document.Page.Token].
  196. Layout layout = 1;
  197. // Detected break at the end of a [Token][google.cloud.documentai.v1.Document.Page.Token].
  198. DetectedBreak detected_break = 2;
  199. // A list of detected languages together with confidence.
  200. repeated DetectedLanguage detected_languages = 3;
  201. // The history of this annotation.
  202. Provenance provenance = 4 [deprecated = true];
  203. }
  204. // A detected symbol.
  205. message Symbol {
  206. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Symbol][google.cloud.documentai.v1.Document.Page.Symbol].
  207. Layout layout = 1;
  208. // A list of detected languages together with confidence.
  209. repeated DetectedLanguage detected_languages = 2;
  210. }
  211. // Detected non-text visual elements e.g. checkbox, signature etc. on the
  212. // page.
  213. message VisualElement {
  214. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement].
  215. Layout layout = 1;
  216. // Type of the [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement].
  217. string type = 2;
  218. // A list of detected languages together with confidence.
  219. repeated DetectedLanguage detected_languages = 3;
  220. }
  221. // A table representation similar to HTML table structure.
  222. message Table {
  223. // A row of table cells.
  224. message TableRow {
  225. // Cells that make up this row.
  226. repeated TableCell cells = 1;
  227. }
  228. // A cell representation inside the table.
  229. message TableCell {
  230. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1.Document.Page.Table.TableCell].
  231. Layout layout = 1;
  232. // How many rows this cell spans.
  233. int32 row_span = 2;
  234. // How many columns this cell spans.
  235. int32 col_span = 3;
  236. // A list of detected languages together with confidence.
  237. repeated DetectedLanguage detected_languages = 4;
  238. }
  239. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [Table][google.cloud.documentai.v1.Document.Page.Table].
  240. Layout layout = 1;
  241. // Header rows of the table.
  242. repeated TableRow header_rows = 2;
  243. // Body rows of the table.
  244. repeated TableRow body_rows = 3;
  245. // A list of detected languages together with confidence.
  246. repeated DetectedLanguage detected_languages = 4;
  247. // The history of this table.
  248. Provenance provenance = 5;
  249. }
  250. // A form field detected on the page.
  251. message FormField {
  252. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1.Document.Page.FormField] name. e.g. `Address`, `Email`,
  253. // `Grand total`, `Phone number`, etc.
  254. Layout field_name = 1;
  255. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1.Document.Page.FormField] value.
  256. Layout field_value = 2;
  257. // A list of detected languages for name together with confidence.
  258. repeated DetectedLanguage name_detected_languages = 3;
  259. // A list of detected languages for value together with confidence.
  260. repeated DetectedLanguage value_detected_languages = 4;
  261. // If the value is non-textual, this field represents the type. Current
  262. // valid values are:
  263. // - blank (this indicates the field_value is normal text)
  264. // - "unfilled_checkbox"
  265. // - "filled_checkbox"
  266. string value_type = 5;
  267. // Created for Labeling UI to export key text.
  268. // If corrections were made to the text identified by the
  269. // `field_name.text_anchor`, this field will contain the correction.
  270. string corrected_key_text = 6;
  271. // Created for Labeling UI to export value text.
  272. // If corrections were made to the text identified by the
  273. // `field_value.text_anchor`, this field will contain the correction.
  274. string corrected_value_text = 7;
  275. // The history of this annotation.
  276. Provenance provenance = 8;
  277. }
  278. // A detected barcode.
  279. message DetectedBarcode {
  280. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for [DetectedBarcode][google.cloud.documentai.v1.Document.Page.DetectedBarcode].
  281. Layout layout = 1;
  282. // Detailed barcode information of the [DetectedBarcode][google.cloud.documentai.v1.Document.Page.DetectedBarcode].
  283. Barcode barcode = 2;
  284. }
  285. // Detected language for a structural component.
  286. message DetectedLanguage {
  287. // The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
  288. // information, see
  289. // https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  290. string language_code = 1;
  291. // Confidence of detected language. Range `[0, 1]`.
  292. float confidence = 2;
  293. }
  294. // Image Quality Scores for the page image
  295. message ImageQualityScores {
  296. // Image Quality Defects
  297. message DetectedDefect {
  298. // Name of the defect type. Supported values are:
  299. //
  300. // - `quality/defect_blurry`
  301. // - `quality/defect_noisy`
  302. // - `quality/defect_dark`
  303. // - `quality/defect_faint`
  304. // - `quality/defect_text_too_small`
  305. // - `quality/defect_document_cutoff`
  306. // - `quality/defect_text_cutoff`
  307. // - `quality/defect_glare`
  308. string type = 1;
  309. // Confidence of detected defect. Range `[0, 1]` where 1 indicates
  310. // strong confidence of that the defect exists.
  311. float confidence = 2;
  312. }
  313. // The overall quality score. Range `[0, 1]` where 1 is perfect quality.
  314. float quality_score = 1;
  315. // A list of detected defects.
  316. repeated DetectedDefect detected_defects = 2;
  317. }
  318. // 1-based index for current [Page][google.cloud.documentai.v1.Document.Page] in a parent [Document][google.cloud.documentai.v1.Document].
  319. // Useful when a page is taken out of a [Document][google.cloud.documentai.v1.Document] for individual
  320. // processing.
  321. int32 page_number = 1;
  322. // Rendered image for this page. This image is preprocessed to remove any
  323. // skew, rotation, and distortions such that the annotation bounding boxes
  324. // can be upright and axis-aligned.
  325. Image image = 13;
  326. // Transformation matrices that were applied to the original document image
  327. // to produce [Page.image][google.cloud.documentai.v1.Document.Page.image].
  328. repeated Matrix transforms = 14;
  329. // Physical dimension of the page.
  330. Dimension dimension = 2;
  331. // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the page.
  332. Layout layout = 3;
  333. // A list of detected languages together with confidence.
  334. repeated DetectedLanguage detected_languages = 4;
  335. // A list of visually detected text blocks on the page.
  336. // A block has a set of lines (collected into paragraphs) that have a common
  337. // line-spacing and orientation.
  338. repeated Block blocks = 5;
  339. // A list of visually detected text paragraphs on the page.
  340. // A collection of lines that a human would perceive as a paragraph.
  341. repeated Paragraph paragraphs = 6;
  342. // A list of visually detected text lines on the page.
  343. // A collection of tokens that a human would perceive as a line.
  344. repeated Line lines = 7;
  345. // A list of visually detected tokens on the page.
  346. repeated Token tokens = 8;
  347. // A list of detected non-text visual elements e.g. checkbox,
  348. // signature etc. on the page.
  349. repeated VisualElement visual_elements = 9;
  350. // A list of visually detected tables on the page.
  351. repeated Table tables = 10;
  352. // A list of visually detected form fields on the page.
  353. repeated FormField form_fields = 11;
  354. // A list of visually detected symbols on the page.
  355. repeated Symbol symbols = 12;
  356. // A list of detected barcodes.
  357. repeated DetectedBarcode detected_barcodes = 15;
  358. // Image Quality Scores.
  359. ImageQualityScores image_quality_scores = 17;
  360. // The history of this page.
  361. Provenance provenance = 16 [deprecated = true];
  362. }
  363. // An entity that could be a phrase in the text or a property that belongs to
  364. // the document. It is a known entity type, such as a person, an organization,
  365. // or location.
  366. message Entity {
  367. // Parsed and normalized entity value.
  368. message NormalizedValue {
  369. // An optional structured entity value.
  370. // Must match entity type defined in schema if
  371. // known. If this field is present, the `text` field could also be
  372. // populated.
  373. oneof structured_value {
  374. // Money value. See also:
  375. // https://github.com/googleapis/googleapis/blob/master/google/type/money.proto
  376. google.type.Money money_value = 2;
  377. // Date value. Includes year, month, day. See also:
  378. // https://github.com/googleapis/googleapis/blob/master/google/type/date.proto
  379. google.type.Date date_value = 3;
  380. // DateTime value. Includes date, time, and timezone. See also:
  381. // https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto
  382. google.type.DateTime datetime_value = 4;
  383. // Postal address. See also:
  384. // https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto
  385. google.type.PostalAddress address_value = 5;
  386. // Boolean value. Can be used for entities with binary values, or for
  387. // checkboxes.
  388. bool boolean_value = 6;
  389. // Integer value.
  390. int32 integer_value = 7;
  391. // Float value.
  392. float float_value = 8;
  393. }
  394. // Optional. An optional field to store a normalized string.
  395. // For some entity types, one of respective `structured_value` fields may
  396. // also be populated. Also not all the types of `structured_value` will be
  397. // normalized. For example, some processors may not generate float
  398. // or int normalized text by default.
  399. //
  400. // Below are sample formats mapped to structured values.
  401. //
  402. // - Money/Currency type (`money_value`) is in the ISO 4217 text format.
  403. // - Date type (`date_value`) is in the ISO 8601 text format.
  404. // - Datetime type (`datetime_value`) is in the ISO 8601 text format.
  405. string text = 1 [(google.api.field_behavior) = OPTIONAL];
  406. }
  407. // Optional. Provenance of the entity.
  408. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
  409. TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL];
  410. // Required. Entity type from a schema e.g. `Address`.
  411. string type = 2 [(google.api.field_behavior) = REQUIRED];
  412. // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
  413. string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];
  414. // Optional. Deprecated. Use `id` field instead.
  415. string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];
  416. // Optional. Confidence of detected Schema entity. Range `[0, 1]`.
  417. float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
  418. // Optional. Represents the provenance of this entity wrt. the location on the
  419. // page where it was found.
  420. PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
  421. // Optional. Canonical id. This will be a unique value in the entity list
  422. // for this document.
  423. string id = 7 [(google.api.field_behavior) = OPTIONAL];
  424. // Optional. Normalized entity value. Absent if the extracted value could not be
  425. // converted or the type (e.g. address) is not supported for certain
  426. // parsers. This field is also only populated for certain supported document
  427. // types.
  428. NormalizedValue normalized_value = 9 [(google.api.field_behavior) = OPTIONAL];
  429. // Optional. Entities can be nested to form a hierarchical data structure representing
  430. // the content in the document.
  431. repeated Entity properties = 10 [(google.api.field_behavior) = OPTIONAL];
  432. // Optional. The history of this annotation.
  433. Provenance provenance = 11 [(google.api.field_behavior) = OPTIONAL];
  434. // Optional. Whether the entity will be redacted for de-identification purposes.
  435. bool redacted = 12 [(google.api.field_behavior) = OPTIONAL];
  436. }
  437. // Relationship between [Entities][google.cloud.documentai.v1.Document.Entity].
  438. message EntityRelation {
  439. // Subject entity id.
  440. string subject_id = 1;
  441. // Object entity id.
  442. string object_id = 2;
  443. // Relationship description.
  444. string relation = 3;
  445. }
  446. // Text reference indexing into the [Document.text][google.cloud.documentai.v1.Document.text].
  447. message TextAnchor {
  448. // A text segment in the [Document.text][google.cloud.documentai.v1.Document.text]. The indices may be out of bounds
  449. // which indicate that the text extends into another document shard for
  450. // large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1.Document.ShardInfo.text_offset]
  451. message TextSegment {
  452. // [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1.Document.text].
  453. int64 start_index = 1;
  454. // [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the
  455. // [Document.text][google.cloud.documentai.v1.Document.text].
  456. int64 end_index = 2;
  457. }
  458. // The text segments from the [Document.text][google.cloud.documentai.v1.Document.text].
  459. repeated TextSegment text_segments = 1;
  460. // Contains the content of the text span so that users do
  461. // not have to look it up in the text_segments. It is always
  462. // populated for formFields.
  463. string content = 2;
  464. }
  465. // Referencing the visual context of the entity in the [Document.pages][google.cloud.documentai.v1.Document.pages].
  466. // Page anchors can be cross-page, consist of multiple bounding polygons and
  467. // optionally reference specific layout element types.
  468. message PageAnchor {
  469. // Represents a weak reference to a page element within a document.
  470. message PageRef {
  471. // The type of layout that is being referenced.
  472. enum LayoutType {
  473. // Layout Unspecified.
  474. LAYOUT_TYPE_UNSPECIFIED = 0;
  475. // References a [Page.blocks][google.cloud.documentai.v1.Document.Page.blocks] element.
  476. BLOCK = 1;
  477. // References a [Page.paragraphs][google.cloud.documentai.v1.Document.Page.paragraphs] element.
  478. PARAGRAPH = 2;
  479. // References a [Page.lines][google.cloud.documentai.v1.Document.Page.lines] element.
  480. LINE = 3;
  481. // References a [Page.tokens][google.cloud.documentai.v1.Document.Page.tokens] element.
  482. TOKEN = 4;
  483. // References a [Page.visual_elements][google.cloud.documentai.v1.Document.Page.visual_elements] element.
  484. VISUAL_ELEMENT = 5;
  485. // Refrrences a [Page.tables][google.cloud.documentai.v1.Document.Page.tables] element.
  486. TABLE = 6;
  487. // References a [Page.form_fields][google.cloud.documentai.v1.Document.Page.form_fields] element.
  488. FORM_FIELD = 7;
  489. }
  490. // Required. Index into the [Document.pages][google.cloud.documentai.v1.Document.pages] element, for example using
  491. // [Document.pages][page_refs.page] to locate the related page element.
  492. // This field is skipped when its value is the default 0. See
  493. // https://developers.google.com/protocol-buffers/docs/proto3#json.
  494. int64 page = 1 [(google.api.field_behavior) = REQUIRED];
  495. // Optional. The type of the layout element that is being referenced if any.
  496. LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
  497. // Optional. Deprecated. Use [PageRef.bounding_poly][google.cloud.documentai.v1.Document.PageAnchor.PageRef.bounding_poly] instead.
  498. string layout_id = 3 [
  499. deprecated = true,
  500. (google.api.field_behavior) = OPTIONAL
  501. ];
  502. // Optional. Identifies the bounding polygon of a layout element on the page.
  503. BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];
  504. // Optional. Confidence of detected page element, if applicable. Range `[0, 1]`.
  505. float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
  506. }
  507. // One or more references to visual page elements
  508. repeated PageRef page_refs = 1;
  509. }
  510. // Structure to identify provenance relationships between annotations in
  511. // different revisions.
  512. message Provenance {
  513. // The parent element the current element is based on. Used for
  514. // referencing/aligning, removal and replacement operations.
  515. message Parent {
  516. // The index of the index into current revision's parent_ids list.
  517. int32 revision = 1;
  518. // The index of the parent item in the corresponding item list (eg. list
  519. // of entities, properties within entities, etc.) in the parent revision.
  520. int32 index = 3;
  521. // The id of the parent provenance.
  522. int32 id = 2 [deprecated = true];
  523. }
  524. // If a processor or agent does an explicit operation on existing elements.
  525. enum OperationType {
  526. // Operation type unspecified. If no operation is specified a provenance
  527. // entry is simply used to match against a `parent`.
  528. OPERATION_TYPE_UNSPECIFIED = 0;
  529. // Add an element.
  530. ADD = 1;
  531. // Remove an element identified by `parent`.
  532. REMOVE = 2;
  533. // Replace an element identified by `parent`.
  534. REPLACE = 3;
  535. // Request human review for the element identified by `parent`.
  536. EVAL_REQUESTED = 4;
  537. // Element is reviewed and approved at human review, confidence will be
  538. // set to 1.0.
  539. EVAL_APPROVED = 5;
  540. // Element is skipped in the validation process.
  541. EVAL_SKIPPED = 6;
  542. }
  543. // The index of the revision that produced this element.
  544. int32 revision = 1;
  545. // The Id of this operation. Needs to be unique within the scope of the
  546. // revision.
  547. int32 id = 2 [deprecated = true];
  548. // References to the original elements that are replaced.
  549. repeated Parent parents = 3;
  550. // The type of provenance operation.
  551. OperationType type = 4;
  552. }
  553. // Contains past or forward revisions of this document.
  554. message Revision {
  555. // Human Review information of the document.
  556. message HumanReview {
  557. // Human review state. e.g. `requested`, `succeeded`, `rejected`.
  558. string state = 1;
  559. // A message providing more details about the current state of processing.
  560. // For example, the rejection reason when the state is `rejected`.
  561. string state_message = 2;
  562. }
  563. // Who/what made the change
  564. oneof source {
  565. // If the change was made by a person specify the name or id of that
  566. // person.
  567. string agent = 4;
  568. // If the annotation was made by processor identify the processor by its
  569. // resource name.
  570. string processor = 5;
  571. }
  572. // Id of the revision. Unique within the context of the document.
  573. string id = 1;
  574. // The revisions that this revision is based on. This can include one or
  575. // more parent (when documents are merged.) This field represents the
  576. // index into the `revisions` field.
  577. repeated int32 parent = 2 [deprecated = true];
  578. // The revisions that this revision is based on. Must include all the ids
  579. // that have anything to do with this revision - eg. there are
  580. // `provenance.parent.revision` fields that index into this field.
  581. repeated string parent_ids = 7;
  582. // The time that the revision was created.
  583. google.protobuf.Timestamp create_time = 3;
  584. // Human Review information of this revision.
  585. HumanReview human_review = 6;
  586. }
  587. // This message is used for text changes aka. OCR corrections.
  588. message TextChange {
  589. // Provenance of the correction.
  590. // Text anchor indexing into the [Document.text][google.cloud.documentai.v1.Document.text]. There can only be a
  591. // single `TextAnchor.text_segments` element. If the start and
  592. // end index of the text segment are the same, the text change is inserted
  593. // before that index.
  594. TextAnchor text_anchor = 1;
  595. // The text that replaces the text identified in the `text_anchor`.
  596. string changed_text = 2;
  597. // The history of this annotation.
  598. repeated Provenance provenance = 3 [deprecated = true];
  599. }
  600. // Original source document from the user.
  601. oneof source {
  602. // Optional. Currently supports Google Cloud Storage URI of the form
  603. // `gs://bucket_name/object_name`. Object versioning is not supported.
  604. // See [Google Cloud Storage Request
  605. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  606. // info.
  607. string uri = 1 [(google.api.field_behavior) = OPTIONAL];
  608. // Optional. Inline document content, represented as a stream of bytes.
  609. // Note: As with all `bytes` fields, protobuffers use a pure binary
  610. // representation, whereas JSON representations use base64.
  611. bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
  612. }
  613. // An IANA published MIME type (also referred to as media type). For more
  614. // information, see
  615. // https://www.iana.org/assignments/media-types/media-types.xhtml.
  616. string mime_type = 3;
  617. // Optional. UTF-8 encoded text in reading order from the document.
  618. string text = 4 [(google.api.field_behavior) = OPTIONAL];
  619. // Placeholder. Styles for the [Document.text][google.cloud.documentai.v1.Document.text].
  620. repeated Style text_styles = 5;
  621. // Visual page layout for the [Document][google.cloud.documentai.v1.Document].
  622. repeated Page pages = 6;
  623. // A list of entities detected on [Document.text][google.cloud.documentai.v1.Document.text]. For document shards,
  624. // entities in this list may cross shard boundaries.
  625. repeated Entity entities = 7;
  626. // Placeholder. Relationship among [Document.entities][google.cloud.documentai.v1.Document.entities].
  627. repeated EntityRelation entity_relations = 8;
  628. // Placeholder. A list of text corrections made to [Document.text][google.cloud.documentai.v1.Document.text]. This
  629. // is usually used for annotating corrections to OCR mistakes. Text changes
  630. // for a given revision may not overlap with each other.
  631. repeated TextChange text_changes = 14;
  632. // Information about the sharding if this document is sharded part of a larger
  633. // document. If the document is not sharded, this message is not specified.
  634. ShardInfo shard_info = 9;
  635. // Any error that occurred while processing this document.
  636. google.rpc.Status error = 10;
  637. // Placeholder. Revision history of this document.
  638. repeated Revision revisions = 13;
  639. }