document.proto 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.documentai.v1beta1;
  17. import "google/api/field_behavior.proto";
  18. import "google/cloud/documentai/v1beta1/geometry.proto";
  19. import "google/rpc/status.proto";
  20. import "google/type/color.proto";
  21. option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta1";
  22. option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai";
  23. option java_multiple_files = true;
  24. option java_outer_classname = "DocumentProto";
  25. option java_package = "com.google.cloud.documentai.v1beta1";
  26. option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta1";
  27. option ruby_package = "Google::Cloud::DocumentAI::V1beta1";
  28. // Document represents the canonical document resource in Document Understanding
  29. // AI.
  30. // It is an interchange format that provides insights into documents and allows
  31. // for collaboration between users and Document Understanding AI to iterate and
  32. // optimize for quality.
  33. message Document {
  34. // For a large document, sharding may be performed to produce several
  35. // document shards. Each document shard contains this field to detail which
  36. // shard it is.
  37. message ShardInfo {
  38. // The 0-based index of this shard.
  39. int64 shard_index = 1;
  40. // Total number of shards.
  41. int64 shard_count = 2;
  42. // The index of the first character in
  43. // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the
  44. // overall document global text.
  45. int64 text_offset = 3;
  46. }
  47. // Annotation for common text style attributes. This adheres to CSS
  48. // conventions as much as possible.
  49. message Style {
  50. // Font size with unit.
  51. message FontSize {
  52. // Font size for the text.
  53. float size = 1;
  54. // Unit for the font size. Follows CSS naming (in, px, pt, etc.).
  55. string unit = 2;
  56. }
  57. // Text anchor indexing into the
  58. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  59. TextAnchor text_anchor = 1;
  60. // Text color.
  61. google.type.Color color = 2;
  62. // Text background color.
  63. google.type.Color background_color = 3;
  64. // Font weight. Possible values are normal, bold, bolder, and lighter.
  65. // https://www.w3schools.com/cssref/pr_font_weight.asp
  66. string font_weight = 4;
  67. // Text style. Possible values are normal, italic, and oblique.
  68. // https://www.w3schools.com/cssref/pr_font_font-style.asp
  69. string text_style = 5;
  70. // Text decoration. Follows CSS standard.
  71. // <text-decoration-line> <text-decoration-color> <text-decoration-style>
  72. // https://www.w3schools.com/cssref/pr_text_text-decoration.asp
  73. string text_decoration = 6;
  74. // Font size.
  75. FontSize font_size = 7;
  76. }
  77. // A page in a [Document][google.cloud.documentai.v1beta1.Document].
  78. message Page {
  79. // Dimension for the page.
  80. message Dimension {
  81. // Page width.
  82. float width = 1;
  83. // Page height.
  84. float height = 2;
  85. // Dimension unit.
  86. string unit = 3;
  87. }
  88. // Visual element describing a layout unit on a page.
  89. message Layout {
  90. // Detected human reading orientation.
  91. enum Orientation {
  92. // Unspecified orientation.
  93. ORIENTATION_UNSPECIFIED = 0;
  94. // Orientation is aligned with page up.
  95. PAGE_UP = 1;
  96. // Orientation is aligned with page right.
  97. // Turn the head 90 degrees clockwise from upright to read.
  98. PAGE_RIGHT = 2;
  99. // Orientation is aligned with page down.
  100. // Turn the head 180 degrees from upright to read.
  101. PAGE_DOWN = 3;
  102. // Orientation is aligned with page left.
  103. // Turn the head 90 degrees counterclockwise from upright to read.
  104. PAGE_LEFT = 4;
  105. }
  106. // Text anchor indexing into the
  107. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  108. TextAnchor text_anchor = 1;
  109. // Confidence of the current
  110. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within
  111. // context of the object this layout is for. e.g. confidence can be for a
  112. // single token, a table, a visual element, etc. depending on context.
  113. // Range [0, 1].
  114. float confidence = 2;
  115. // The bounding polygon for the
  116. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
  117. BoundingPoly bounding_poly = 3;
  118. // Detected orientation for the
  119. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
  120. Orientation orientation = 4;
  121. }
  122. // A block has a set of lines (collected into paragraphs) that have a
  123. // common line-spacing and orientation.
  124. message Block {
  125. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
  126. // [Block][google.cloud.documentai.v1beta1.Document.Page.Block].
  127. Layout layout = 1;
  128. // A list of detected languages together with confidence.
  129. repeated DetectedLanguage detected_languages = 2;
  130. }
  131. // A collection of lines that a human would perceive as a paragraph.
  132. message Paragraph {
  133. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
  134. // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph].
  135. Layout layout = 1;
  136. // A list of detected languages together with confidence.
  137. repeated DetectedLanguage detected_languages = 2;
  138. }
  139. // A collection of tokens that a human would perceive as a line.
  140. // Does not cross column boundaries, can be horizontal, vertical, etc.
  141. message Line {
  142. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
  143. // [Line][google.cloud.documentai.v1beta1.Document.Page.Line].
  144. Layout layout = 1;
  145. // A list of detected languages together with confidence.
  146. repeated DetectedLanguage detected_languages = 2;
  147. }
  148. // A detected token.
  149. message Token {
  150. // Detected break at the end of a
  151. // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
  152. message DetectedBreak {
  153. // Enum to denote the type of break found.
  154. enum Type {
  155. // Unspecified break type.
  156. TYPE_UNSPECIFIED = 0;
  157. // A single whitespace.
  158. SPACE = 1;
  159. // A wider whitespace.
  160. WIDE_SPACE = 2;
  161. // A hyphen that indicates that a token has been split across lines.
  162. HYPHEN = 3;
  163. }
  164. // Detected break type.
  165. Type type = 1;
  166. }
  167. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
  168. // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
  169. Layout layout = 1;
  170. // Detected break at the end of a
  171. // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
  172. DetectedBreak detected_break = 2;
  173. // A list of detected languages together with confidence.
  174. repeated DetectedLanguage detected_languages = 3;
  175. }
  176. // Detected non-text visual elements e.g. checkbox, signature etc. on the
  177. // page.
  178. message VisualElement {
  179. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
  180. // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
  181. Layout layout = 1;
  182. // Type of the
  183. // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement].
  184. string type = 2;
  185. // A list of detected languages together with confidence.
  186. repeated DetectedLanguage detected_languages = 3;
  187. }
  188. // A table representation similar to HTML table structure.
  189. message Table {
  190. // A row of table cells.
  191. message TableRow {
  192. // Cells that make up this row.
  193. repeated TableCell cells = 1;
  194. }
  195. // A cell representation inside the table.
  196. message TableCell {
  197. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
  198. // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell].
  199. Layout layout = 1;
  200. // How many rows this cell spans.
  201. int32 row_span = 2;
  202. // How many columns this cell spans.
  203. int32 col_span = 3;
  204. // A list of detected languages together with confidence.
  205. repeated DetectedLanguage detected_languages = 4;
  206. }
  207. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
  208. // [Table][google.cloud.documentai.v1beta1.Document.Page.Table].
  209. Layout layout = 1;
  210. // Header rows of the table.
  211. repeated TableRow header_rows = 2;
  212. // Body rows of the table.
  213. repeated TableRow body_rows = 3;
  214. // A list of detected languages together with confidence.
  215. repeated DetectedLanguage detected_languages = 4;
  216. }
  217. // A form field detected on the page.
  218. message FormField {
  219. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
  220. // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
  221. // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
  222. Layout field_name = 1;
  223. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
  224. // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
  225. // value.
  226. Layout field_value = 2;
  227. // A list of detected languages for name together with confidence.
  228. repeated DetectedLanguage name_detected_languages = 3;
  229. // A list of detected languages for value together with confidence.
  230. repeated DetectedLanguage value_detected_languages = 4;
  231. }
  232. // Detected language for a structural component.
  233. message DetectedLanguage {
  234. // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  235. // information, see
  236. // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  237. string language_code = 1;
  238. // Confidence of detected language. Range [0, 1].
  239. float confidence = 2;
  240. }
  241. // 1-based index for current
  242. // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent
  243. // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page
  244. // is taken out of a [Document][google.cloud.documentai.v1beta1.Document]
  245. // for individual processing.
  246. int32 page_number = 1;
  247. // Physical dimension of the page.
  248. Dimension dimension = 2;
  249. // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
  250. // page.
  251. Layout layout = 3;
  252. // A list of detected languages together with confidence.
  253. repeated DetectedLanguage detected_languages = 4;
  254. // A list of visually detected text blocks on the page.
  255. // A block has a set of lines (collected into paragraphs) that have a common
  256. // line-spacing and orientation.
  257. repeated Block blocks = 5;
  258. // A list of visually detected text paragraphs on the page.
  259. // A collection of lines that a human would perceive as a paragraph.
  260. repeated Paragraph paragraphs = 6;
  261. // A list of visually detected text lines on the page.
  262. // A collection of tokens that a human would perceive as a line.
  263. repeated Line lines = 7;
  264. // A list of visually detected tokens on the page.
  265. repeated Token tokens = 8;
  266. // A list of detected non-text visual elements e.g. checkbox,
  267. // signature etc. on the page.
  268. repeated VisualElement visual_elements = 9;
  269. // A list of visually detected tables on the page.
  270. repeated Table tables = 10;
  271. // A list of visually detected form fields on the page.
  272. repeated FormField form_fields = 11;
  273. }
  274. // A phrase in the text that is a known entity type, such as a person, an
  275. // organization, or location.
  276. message Entity {
  277. // Provenance of the entity.
  278. // Text anchor indexing into the
  279. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  280. TextAnchor text_anchor = 1;
  281. // Required. Entity type from a schema e.g. `Address`.
  282. string type = 2 [(google.api.field_behavior) = REQUIRED];
  283. // Text value in the document e.g. `1600 Amphitheatre Pkwy`.
  284. string mention_text = 3;
  285. // Canonical mention name. This will be a unique value in the entity list
  286. // for this document.
  287. string mention_id = 4;
  288. }
  289. // Relationship between
  290. // [Entities][google.cloud.documentai.v1beta1.Document.Entity].
  291. message EntityRelation {
  292. // Subject entity mention_id.
  293. string subject_id = 1;
  294. // Object entity mention_id.
  295. string object_id = 2;
  296. // Relationship description.
  297. string relation = 3;
  298. }
  299. // Text reference indexing into the
  300. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  301. message TextAnchor {
  302. // A text segment in the
  303. // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The
  304. // indices may be out of bounds which indicate that the text extends into
  305. // another document shard for large sharded documents. See
  306. // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset]
  307. message TextSegment {
  308. // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
  309. // start UTF-8 char index in the
  310. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  311. int64 start_index = 1;
  312. // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
  313. // half open end UTF-8 char index in the
  314. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  315. int64 end_index = 2;
  316. }
  317. // The text segments from the
  318. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  319. repeated TextSegment text_segments = 1;
  320. }
  321. // Original source document from the user.
  322. oneof source {
  323. // Currently supports Google Cloud Storage URI of the form
  324. // `gs://bucket_name/object_name`. Object versioning is not supported.
  325. // See [Google Cloud Storage Request
  326. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  327. // info.
  328. string uri = 1;
  329. // Inline document content, represented as a stream of bytes.
  330. // Note: As with all `bytes` fields, protobuffers use a pure binary
  331. // representation, whereas JSON representations use base64.
  332. bytes content = 2;
  333. }
  334. // An IANA published MIME type (also referred to as media type). For more
  335. // information, see
  336. // https://www.iana.org/assignments/media-types/media-types.xhtml.
  337. string mime_type = 3;
  338. // UTF-8 encoded text in reading order from the document.
  339. string text = 4;
  340. // Styles for the
  341. // [Document.text][google.cloud.documentai.v1beta1.Document.text].
  342. repeated Style text_styles = 5;
  343. // Visual page layout for the
  344. // [Document][google.cloud.documentai.v1beta1.Document].
  345. repeated Page pages = 6;
  346. // A list of entities detected on
  347. // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For
  348. // document shards, entities in this list may cross shard boundaries.
  349. repeated Entity entities = 7;
  350. // Relationship among
  351. // [Document.entities][google.cloud.documentai.v1beta1.Document.entities].
  352. repeated EntityRelation entity_relations = 8;
  353. // Information about the sharding if this document is sharded part of a larger
  354. // document. If the document is not sharded, this message is not specified.
  355. ShardInfo shard_info = 9;
  356. // Any error that occurred while processing this document.
  357. google.rpc.Status error = 10;
  358. }