123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.contentwarehouse.v1;
- import "google/api/field_behavior.proto";
- import "google/api/resource.proto";
- import "google/cloud/documentai/v1/document.proto";
- import "google/protobuf/timestamp.proto";
- import "google/type/datetime.proto";
- option go_package = "google.golang.org/genproto/googleapis/cloud/contentwarehouse/v1;contentwarehouse";
- option java_multiple_files = true;
- option java_outer_classname = "DocumentProto";
- option java_package = "com.google.cloud.contentwarehouse.v1";
- // Defines the structure for content warehouse document proto.
- message Document {
- option (google.api.resource) = {
- type: "contentwarehouse.googleapis.com/Document"
- pattern: "projects/{project}/locations/{location}/documents/{document}"
- pattern: "projects/{project}/locations/{location}/documents/referenceId/{reference_id}"
- };
- // The resource name of the document.
- // Format:
- // projects/{project_number}/locations/{location}/documents/{document_id}.
- //
- // The name is ignored when creating a document.
- string name = 1;
- // The reference ID set by customers. Must be unique per project and location.
- string reference_id = 11;
- // Required. Display name of the document given by the user. This name will be displayed
- // in the UI.
- // Customer can populate this field with the name of the document. This
- // differs from the 'title' field as 'title' is optional and stores the top
- // heading in the document.
- string display_name = 2 [(google.api.field_behavior) = REQUIRED];
- // Title that describes the document.
- // This is usually present in the top section of the document, and is a
- // mandatory field for the question-answering feature.
- string title = 18;
- // Uri to display the document, for example, in the UI.
- string display_uri = 17;
- // The Document schema name.
- // Format:
- // projects/{project_number}/locations/{location}/documentSchemas/{document_schema_id}.
- string document_schema_name = 3 [(google.api.resource_reference) = {
- type: "contentwarehouse.googleapis.com/DocumentSchema"
- }];
- oneof structured_content {
- // Other document format, such as PPTX, XLXS
- string plain_text = 15;
- // Document AI format to save the structured content, including OCR.
- google.cloud.documentai.v1.Document cloud_ai_document = 4;
- }
- // A path linked to structured content file.
- string structured_content_uri = 16;
- // Raw document file.
- oneof raw_document {
- // Raw document file in Cloud Storage path.
- string raw_document_path = 5;
- // Raw document content.
- bytes inline_raw_document = 6;
- }
- // List of values that are user supplied metadata.
- repeated Property properties = 7;
- // Output only. The time when the document is last updated.
- google.protobuf.Timestamp update_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the document is created.
- google.protobuf.Timestamp create_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
- // This is used when DocAI was not used to load the document and parsing/
- // extracting is needed for the inline_raw_document. For example, if
- // inline_raw_document is the byte representation of a PDF file, then
- // this should be set to: RAW_DOCUMENT_FILE_TYPE_PDF.
- RawDocumentFileType raw_document_file_type = 10;
- // If true, makes the document visible to asynchronous policies and rules.
- bool async_enabled = 12;
- // If true, text extraction will not be performed.
- bool text_extraction_disabled = 19;
- // The user who creates the document.
- string creator = 13;
- // The user who lastly updates the document.
- string updater = 14;
- }
- // References to the documents.
- message DocumentReference {
- // Required. Name of the referenced document.
- string document_name = 1 [
- (google.api.field_behavior) = REQUIRED,
- (google.api.resource_reference) = {
- type: "contentwarehouse.googleapis.com/Document"
- }
- ];
- // display_name of the referenced document; this name does not need to be
- // consistent to the display_name in the Document proto, depending on the ACL
- // constraint.
- string display_name = 2;
- // Stores the subset of the referenced document's content.
- // This is useful to allow user peek the information of the referenced
- // document.
- string snippet = 3;
- // The document type of the document being referenced.
- bool document_is_folder = 4;
- // Output only. The time when the document is last updated.
- google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the document is created.
- google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. The time when the document is deleted.
- google.protobuf.Timestamp delete_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // Property of a document.
- message Property {
- // Required. Must match the name of a PropertyDefinition in the DocumentSchema.
- string name = 1 [(google.api.field_behavior) = REQUIRED];
- // Type of the property.
- // Must match the property_options type of the matching PropertyDefinition.
- // Value of the Property parsed into a specific data type.
- // Specific type value(s) obtained from Document AIs Property.mention_text
- // field.
- oneof values {
- // Integer property values.
- IntegerArray integer_values = 2;
- // Float property values.
- FloatArray float_values = 3;
- // String/text property values.
- TextArray text_values = 4;
- // Enum property values.
- EnumArray enum_values = 5;
- // Nested structured data property values.
- PropertyArray property_values = 6;
- // Date time property values.
- // It is not supported by CMEK compliant deployment.
- DateTimeArray date_time_values = 7;
- // Map property values.
- MapProperty map_property = 8;
- // Timestamp property values.
- // It is not supported by CMEK compliant deployment.
- TimestampArray timestamp_values = 9;
- }
- }
- // Integer values.
- message IntegerArray {
- // List of integer values.
- repeated int32 values = 1;
- }
- // Float values.
- message FloatArray {
- // List of float values.
- repeated float values = 1;
- }
- // String/text values.
- message TextArray {
- // List of text values.
- repeated string values = 1;
- }
- // Enum values.
- message EnumArray {
- // List of enum values.
- repeated string values = 1;
- }
- // DateTime values.
- message DateTimeArray {
- // List of datetime values.
- // Both OffsetDateTime and ZonedDateTime are supported.
- repeated google.type.DateTime values = 1;
- }
- // Timestamp values.
- message TimestampArray {
- // List of timestamp values.
- repeated TimestampValue values = 1;
- }
- // Timestamp value type.
- message TimestampValue {
- oneof value {
- // Timestamp value
- google.protobuf.Timestamp timestamp_value = 1;
- // The string must represent a valid instant in UTC and is parsed using
- // java.time.format.DateTimeFormatter.ISO_INSTANT.
- // e.g. "2013-09-29T18:46:19Z"
- string text_value = 2;
- }
- }
- // Property values.
- message PropertyArray {
- // List of property values.
- repeated Property properties = 1;
- }
- // Map property value.
- // Represents a structured entries of key value pairs, consisting of field names
- // which map to dynamically typed values.
- message MapProperty {
- // Unordered map of dynamically typed values.
- map<string, Value> fields = 1;
- }
- // `Value` represents a dynamically typed value which can be either be
- // a float, a integer, a string, or a datetime value. A producer of value is
- // expected to set one of these variants. Absence of any variant indicates an
- // error.
- message Value {
- // The kind of value.
- oneof kind {
- // Represents a float value.
- float float_value = 1;
- // Represents a integer value.
- int32 int_value = 2;
- // Represents a string value.
- string string_value = 3;
- // Represents an enum value.
- EnumValue enum_value = 4;
- // Represents a datetime value.
- google.type.DateTime datetime_value = 5;
- // Represents a timestamp value.
- TimestampValue timestamp_value = 6;
- // Represents a boolean value.
- bool boolean_value = 7;
- }
- }
- // Represents the string value of the enum field.
- message EnumValue {
- // String value of the enum field. This must match defined set of enums
- // in document schema using EnumTypeOptions.
- string value = 1;
- }
- // When a raw document is supplied, this indicates the file format
- enum RawDocumentFileType {
- // No raw document specified or it is non-parsable
- RAW_DOCUMENT_FILE_TYPE_UNSPECIFIED = 0;
- // Adobe PDF format
- RAW_DOCUMENT_FILE_TYPE_PDF = 1;
- // Microsoft Word format
- RAW_DOCUMENT_FILE_TYPE_DOCX = 2;
- // Microsoft Excel format
- RAW_DOCUMENT_FILE_TYPE_XLSX = 3;
- // Microsoft Powerpoint format
- RAW_DOCUMENT_FILE_TYPE_PPTX = 4;
- // UTF-8 encoded text format
- RAW_DOCUMENT_FILE_TYPE_TEXT = 5;
- }
|