document_schema.proto 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.documentai.v1beta3;
  16. option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
  17. option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta3;documentai";
  18. option java_multiple_files = true;
  19. option java_outer_classname = "DocumentAiDocumentSchema";
  20. option java_package = "com.google.cloud.documentai.v1beta3";
  21. option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
  22. option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
  23. // The schema defines the output of the processed document by a processor.
  24. message DocumentSchema {
  25. // EntityType is the wrapper of a label of the corresponding model with
  26. // detailed attributes and limitations for entity-based processors. Multiple
  27. // types can also compose a dependency tree to represent nested types.
  28. message EntityType {
  29. // Defines the a list of enum values.
  30. message EnumValues {
  31. // The individual values that this enum values type can include.
  32. repeated string values = 1;
  33. }
  34. // Defines properties that can be part of the entity type.
  35. message Property {
  36. // Types of occurrences of the entity type in the document. Note: this
  37. // represents the number of instances of an entity types, not number of
  38. // mentions of a given entity instance.
  39. enum OccurrenceType {
  40. // Unspecified occurrence type.
  41. OCCURRENCE_TYPE_UNSPECIFIED = 0;
  42. // There will be zero or one instance of this entity type.
  43. OPTIONAL_ONCE = 1;
  44. // The entity type will appear zero or multiple times.
  45. OPTIONAL_MULTIPLE = 2;
  46. // The entity type will only appear exactly once.
  47. REQUIRED_ONCE = 3;
  48. // The entity type will appear once or more times.
  49. REQUIRED_MULTIPLE = 4;
  50. }
  51. // The name of the property. Follows the same guidelines as the
  52. // EntityType name.
  53. string name = 1;
  54. // A reference to the value type of the property. This type is subject
  55. // to the same conventions as the `Entity.base_types` field.
  56. string value_type = 2;
  57. // Occurrence type limits the number of instances an entity type appears
  58. // in the document.
  59. OccurrenceType occurrence_type = 3;
  60. }
  61. oneof value_source {
  62. // If specified, lists all the possible values for this entity. This
  63. // should not be more than a handful of values. If the number of values
  64. // is >10 or could change frequently use the `EntityType.value_ontology`
  65. // field and specify a list of all possible values in a value ontology
  66. // file.
  67. EnumValues enum_values = 14;
  68. }
  69. // User defined name for the type.
  70. string display_name = 13;
  71. // Name of the type. It must be unique within the schema file and
  72. // cannot be a 'Common Type'. Besides that we use the following naming
  73. // conventions:
  74. //
  75. // - *use `snake_casing`*
  76. // - name matching is case-insensitive
  77. // - Maximum 64 characters.
  78. // - Must start with a letter.
  79. // - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward
  80. // compatibility internal infrastructure and tooling can handle any ascii
  81. // character)
  82. // - The `/` is sometimes used to denote a property of a type. For example
  83. // `line_item/amount`. This convention is deprecated, but will still be
  84. // honored for backward compatibility.
  85. string name = 1;
  86. // The entity type that this type is derived from. For now, one and only
  87. // one should be set.
  88. repeated string base_types = 2;
  89. // Describing the nested structure, or composition of an entity.
  90. repeated Property properties = 6;
  91. }
  92. // Metadata for global schema behavior.
  93. message Metadata {
  94. // If true, a `document` entity type can be applied to subdocument (
  95. // splitting). Otherwise, it can only be applied to the entire document
  96. // (classification).
  97. bool document_splitter = 1;
  98. // If true, on a given page, there can be multiple `document` annotations
  99. // covering it.
  100. bool document_allow_multiple_labels = 2;
  101. // If set, all the nested entities must be prefixed with the parents.
  102. bool prefixed_naming_on_properties = 6;
  103. // If set, we will skip the naming format validation in the schema. So the
  104. // string values in `DocumentSchema.EntityType.name` and
  105. // `DocumentSchema.EntityType.Property.name` will not be checked.
  106. bool skip_naming_validation = 7;
  107. }
  108. // Display name to show to users.
  109. string display_name = 1;
  110. // Description of the schema.
  111. string description = 2;
  112. // Entity types of the schema.
  113. repeated EntityType entity_types = 3;
  114. // Metadata of the schema.
  115. Metadata metadata = 4;
  116. }