12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.language.v1;
- import "google/api/annotations.proto";
- import "google/api/client.proto";
- import "google/api/field_behavior.proto";
- option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1;language";
- option java_multiple_files = true;
- option java_outer_classname = "LanguageServiceProto";
- option java_package = "com.google.cloud.language.v1";
- // Provides text analysis operations such as sentiment analysis and entity
- // recognition.
- service LanguageService {
- option (google.api.default_host) = "language.googleapis.com";
- option (google.api.oauth_scopes) =
- "https://www.googleapis.com/auth/cloud-language,"
- "https://www.googleapis.com/auth/cloud-platform";
- // Analyzes the sentiment of the provided text.
- rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
- option (google.api.http) = {
- post: "/v1/documents:analyzeSentiment"
- body: "*"
- };
- option (google.api.method_signature) = "document,encoding_type";
- option (google.api.method_signature) = "document";
- }
- // Finds named entities (currently proper names and common nouns) in the text
- // along with entity types, salience, mentions for each entity, and
- // other properties.
- rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
- option (google.api.http) = {
- post: "/v1/documents:analyzeEntities"
- body: "*"
- };
- option (google.api.method_signature) = "document,encoding_type";
- option (google.api.method_signature) = "document";
- }
- // Finds entities, similar to [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities] in the text and analyzes
- // sentiment associated with each entity and its mentions.
- rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) returns (AnalyzeEntitySentimentResponse) {
- option (google.api.http) = {
- post: "/v1/documents:analyzeEntitySentiment"
- body: "*"
- };
- option (google.api.method_signature) = "document,encoding_type";
- option (google.api.method_signature) = "document";
- }
- // Analyzes the syntax of the text and provides sentence boundaries and
- // tokenization along with part of speech tags, dependency trees, and other
- // properties.
- rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
- option (google.api.http) = {
- post: "/v1/documents:analyzeSyntax"
- body: "*"
- };
- option (google.api.method_signature) = "document,encoding_type";
- option (google.api.method_signature) = "document";
- }
- // Classifies a document into categories.
- rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
- option (google.api.http) = {
- post: "/v1/documents:classifyText"
- body: "*"
- };
- option (google.api.method_signature) = "document";
- }
- // A convenience method that provides all the features that analyzeSentiment,
- // analyzeEntities, and analyzeSyntax provide in one call.
- rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
- option (google.api.http) = {
- post: "/v1/documents:annotateText"
- body: "*"
- };
- option (google.api.method_signature) = "document,features,encoding_type";
- option (google.api.method_signature) = "document,features";
- }
- }
- // Represents the input to API methods.
- message Document {
- // The document types enum.
- enum Type {
- // The content type is not specified.
- TYPE_UNSPECIFIED = 0;
- // Plain text
- PLAIN_TEXT = 1;
- // HTML
- HTML = 2;
- }
- // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
- // returns an `INVALID_ARGUMENT` error.
- Type type = 1;
- // The source of the document: a string containing the content or a
- // Google Cloud Storage URI.
- oneof source {
- // The content of the input in string format.
- // Cloud audit logging exempt since it is based on user data.
- string content = 2;
- // The Google Cloud Storage URI where the file content is located.
- // This URI must be of the form: gs://bucket_name/object_name. For more
- // details, see https://cloud.google.com/storage/docs/reference-uris.
- // NOTE: Cloud Storage object versioning is not supported.
- string gcs_content_uri = 3;
- }
- // The language of the document (if not specified, the language is
- // automatically detected). Both ISO and BCP-47 language codes are
- // accepted.<br>
- // [Language
- // Support](https://cloud.google.com/natural-language/docs/languages) lists
- // currently supported languages for each API method. If the language (either
- // specified by the caller or automatically detected) is not supported by the
- // called API method, an `INVALID_ARGUMENT` error is returned.
- string language = 4;
- }
- // Represents a sentence in the input document.
- message Sentence {
- // The sentence text.
- TextSpan text = 1;
- // For calls to [AnalyzeSentiment][] or if
- // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment] is set to
- // true, this field will contain the sentiment for the sentence.
- Sentiment sentiment = 2;
- }
- // Represents the text encoding that the caller uses to process the output.
- // Providing an `EncodingType` is recommended because the API provides the
- // beginning offsets for various outputs, such as tokens and mentions, and
- // languages that natively use different text encodings may access offsets
- // differently.
- enum EncodingType {
- // If `EncodingType` is not specified, encoding-dependent information (such as
- // `begin_offset`) will be set at `-1`.
- NONE = 0;
- // Encoding-dependent information (such as `begin_offset`) is calculated based
- // on the UTF-8 encoding of the input. C++ and Go are examples of languages
- // that use this encoding natively.
- UTF8 = 1;
- // Encoding-dependent information (such as `begin_offset`) is calculated based
- // on the UTF-16 encoding of the input. Java and JavaScript are examples of
- // languages that use this encoding natively.
- UTF16 = 2;
- // Encoding-dependent information (such as `begin_offset`) is calculated based
- // on the UTF-32 encoding of the input. Python is an example of a language
- // that uses this encoding natively.
- UTF32 = 3;
- }
- // Represents a phrase in the text that is a known entity, such as
- // a person, an organization, or location. The API associates information, such
- // as salience and mentions, with entities.
- message Entity {
- // The type of the entity. For most entity types, the associated metadata is a
- // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
- // below lists the associated fields for entities that have different
- // metadata.
- enum Type {
- // Unknown
- UNKNOWN = 0;
- // Person
- PERSON = 1;
- // Location
- LOCATION = 2;
- // Organization
- ORGANIZATION = 3;
- // Event
- EVENT = 4;
- // Artwork
- WORK_OF_ART = 5;
- // Consumer product
- CONSUMER_GOOD = 6;
- // Other types of entities
- OTHER = 7;
- // Phone number
- //
- // The metadata lists the phone number, formatted according to local
- // convention, plus whichever additional elements appear in the text:
- //
- // * `number` - the actual number, broken down into sections as per local
- // convention
- // * `national_prefix` - country code, if detected
- // * `area_code` - region or area code, if detected
- // * `extension` - phone extension (to be dialed after connection), if
- // detected
- PHONE_NUMBER = 9;
- // Address
- //
- // The metadata identifies the street number and locality plus whichever
- // additional elements appear in the text:
- //
- // * `street_number` - street number
- // * `locality` - city or town
- // * `street_name` - street/route name, if detected
- // * `postal_code` - postal code, if detected
- // * `country` - country, if detected<
- // * `broad_region` - administrative area, such as the state, if detected
- // * `narrow_region` - smaller administrative area, such as county, if
- // detected
- // * `sublocality` - used in Asian addresses to demark a district within a
- // city, if detected
- ADDRESS = 10;
- // Date
- //
- // The metadata identifies the components of the date:
- //
- // * `year` - four digit year, if detected
- // * `month` - two digit month number, if detected
- // * `day` - two digit day number, if detected
- DATE = 11;
- // Number
- //
- // The metadata is the number itself.
- NUMBER = 12;
- // Price
- //
- // The metadata identifies the `value` and `currency`.
- PRICE = 13;
- }
- // The representative name for the entity.
- string name = 1;
- // The entity type.
- Type type = 2;
- // Metadata associated with the entity.
- //
- // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
- // and Knowledge Graph MID (`mid`), if they are available. For the metadata
- // associated with other entity types, see the Type table below.
- map<string, string> metadata = 3;
- // The salience score associated with the entity in the [0, 1.0] range.
- //
- // The salience score for an entity provides information about the
- // importance or centrality of that entity to the entire document text.
- // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
- // salient.
- float salience = 4;
- // The mentions of this entity in the input document. The API currently
- // supports proper noun mentions.
- repeated EntityMention mentions = 5;
- // For calls to [AnalyzeEntitySentiment][] or if
- // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
- // true, this field will contain the aggregate sentiment expressed for this
- // entity in the provided document.
- Sentiment sentiment = 6;
- }
- // Represents the smallest syntactic building block of the text.
- message Token {
- // The token text.
- TextSpan text = 1;
- // Parts of speech tag for this token.
- PartOfSpeech part_of_speech = 2;
- // Dependency tree parse for this token.
- DependencyEdge dependency_edge = 3;
- // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
- string lemma = 4;
- }
- // Represents the feeling associated with the entire text or entities in
- // the text.
- message Sentiment {
- // A non-negative number in the [0, +inf) range, which represents
- // the absolute magnitude of sentiment regardless of score (positive or
- // negative).
- float magnitude = 2;
- // Sentiment score between -1.0 (negative sentiment) and 1.0
- // (positive sentiment).
- float score = 3;
- }
- // Represents part of speech information for a token. Parts of speech
- // are as defined in
- // http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
- message PartOfSpeech {
- // The part of speech tags enum.
- enum Tag {
- // Unknown
- UNKNOWN = 0;
- // Adjective
- ADJ = 1;
- // Adposition (preposition and postposition)
- ADP = 2;
- // Adverb
- ADV = 3;
- // Conjunction
- CONJ = 4;
- // Determiner
- DET = 5;
- // Noun (common and proper)
- NOUN = 6;
- // Cardinal number
- NUM = 7;
- // Pronoun
- PRON = 8;
- // Particle or other function word
- PRT = 9;
- // Punctuation
- PUNCT = 10;
- // Verb (all tenses and modes)
- VERB = 11;
- // Other: foreign words, typos, abbreviations
- X = 12;
- // Affix
- AFFIX = 13;
- }
- // The characteristic of a verb that expresses time flow during an event.
- enum Aspect {
- // Aspect is not applicable in the analyzed language or is not predicted.
- ASPECT_UNKNOWN = 0;
- // Perfective
- PERFECTIVE = 1;
- // Imperfective
- IMPERFECTIVE = 2;
- // Progressive
- PROGRESSIVE = 3;
- }
- // The grammatical function performed by a noun or pronoun in a phrase,
- // clause, or sentence. In some languages, other parts of speech, such as
- // adjective and determiner, take case inflection in agreement with the noun.
- enum Case {
- // Case is not applicable in the analyzed language or is not predicted.
- CASE_UNKNOWN = 0;
- // Accusative
- ACCUSATIVE = 1;
- // Adverbial
- ADVERBIAL = 2;
- // Complementive
- COMPLEMENTIVE = 3;
- // Dative
- DATIVE = 4;
- // Genitive
- GENITIVE = 5;
- // Instrumental
- INSTRUMENTAL = 6;
- // Locative
- LOCATIVE = 7;
- // Nominative
- NOMINATIVE = 8;
- // Oblique
- OBLIQUE = 9;
- // Partitive
- PARTITIVE = 10;
- // Prepositional
- PREPOSITIONAL = 11;
- // Reflexive
- REFLEXIVE_CASE = 12;
- // Relative
- RELATIVE_CASE = 13;
- // Vocative
- VOCATIVE = 14;
- }
- // Depending on the language, Form can be categorizing different forms of
- // verbs, adjectives, adverbs, etc. For example, categorizing inflected
- // endings of verbs and adjectives or distinguishing between short and long
- // forms of adjectives and participles
- enum Form {
- // Form is not applicable in the analyzed language or is not predicted.
- FORM_UNKNOWN = 0;
- // Adnomial
- ADNOMIAL = 1;
- // Auxiliary
- AUXILIARY = 2;
- // Complementizer
- COMPLEMENTIZER = 3;
- // Final ending
- FINAL_ENDING = 4;
- // Gerund
- GERUND = 5;
- // Realis
- REALIS = 6;
- // Irrealis
- IRREALIS = 7;
- // Short form
- SHORT = 8;
- // Long form
- LONG = 9;
- // Order form
- ORDER = 10;
- // Specific form
- SPECIFIC = 11;
- }
- // Gender classes of nouns reflected in the behaviour of associated words.
- enum Gender {
- // Gender is not applicable in the analyzed language or is not predicted.
- GENDER_UNKNOWN = 0;
- // Feminine
- FEMININE = 1;
- // Masculine
- MASCULINE = 2;
- // Neuter
- NEUTER = 3;
- }
- // The grammatical feature of verbs, used for showing modality and attitude.
- enum Mood {
- // Mood is not applicable in the analyzed language or is not predicted.
- MOOD_UNKNOWN = 0;
- // Conditional
- CONDITIONAL_MOOD = 1;
- // Imperative
- IMPERATIVE = 2;
- // Indicative
- INDICATIVE = 3;
- // Interrogative
- INTERROGATIVE = 4;
- // Jussive
- JUSSIVE = 5;
- // Subjunctive
- SUBJUNCTIVE = 6;
- }
- // Count distinctions.
- enum Number {
- // Number is not applicable in the analyzed language or is not predicted.
- NUMBER_UNKNOWN = 0;
- // Singular
- SINGULAR = 1;
- // Plural
- PLURAL = 2;
- // Dual
- DUAL = 3;
- }
- // The distinction between the speaker, second person, third person, etc.
- enum Person {
- // Person is not applicable in the analyzed language or is not predicted.
- PERSON_UNKNOWN = 0;
- // First
- FIRST = 1;
- // Second
- SECOND = 2;
- // Third
- THIRD = 3;
- // Reflexive
- REFLEXIVE_PERSON = 4;
- }
- // This category shows if the token is part of a proper name.
- enum Proper {
- // Proper is not applicable in the analyzed language or is not predicted.
- PROPER_UNKNOWN = 0;
- // Proper
- PROPER = 1;
- // Not proper
- NOT_PROPER = 2;
- }
- // Reciprocal features of a pronoun.
- enum Reciprocity {
- // Reciprocity is not applicable in the analyzed language or is not
- // predicted.
- RECIPROCITY_UNKNOWN = 0;
- // Reciprocal
- RECIPROCAL = 1;
- // Non-reciprocal
- NON_RECIPROCAL = 2;
- }
- // Time reference.
- enum Tense {
- // Tense is not applicable in the analyzed language or is not predicted.
- TENSE_UNKNOWN = 0;
- // Conditional
- CONDITIONAL_TENSE = 1;
- // Future
- FUTURE = 2;
- // Past
- PAST = 3;
- // Present
- PRESENT = 4;
- // Imperfect
- IMPERFECT = 5;
- // Pluperfect
- PLUPERFECT = 6;
- }
- // The relationship between the action that a verb expresses and the
- // participants identified by its arguments.
- enum Voice {
- // Voice is not applicable in the analyzed language or is not predicted.
- VOICE_UNKNOWN = 0;
- // Active
- ACTIVE = 1;
- // Causative
- CAUSATIVE = 2;
- // Passive
- PASSIVE = 3;
- }
- // The part of speech tag.
- Tag tag = 1;
- // The grammatical aspect.
- Aspect aspect = 2;
- // The grammatical case.
- Case case = 3;
- // The grammatical form.
- Form form = 4;
- // The grammatical gender.
- Gender gender = 5;
- // The grammatical mood.
- Mood mood = 6;
- // The grammatical number.
- Number number = 7;
- // The grammatical person.
- Person person = 8;
- // The grammatical properness.
- Proper proper = 9;
- // The grammatical reciprocity.
- Reciprocity reciprocity = 10;
- // The grammatical tense.
- Tense tense = 11;
- // The grammatical voice.
- Voice voice = 12;
- }
- // Represents dependency parse tree information for a token. (For more
- // information on dependency labels, see
- // http://www.aclweb.org/anthology/P13-2017
- message DependencyEdge {
- // The parse label enum for the token.
- enum Label {
- // Unknown
- UNKNOWN = 0;
- // Abbreviation modifier
- ABBREV = 1;
- // Adjectival complement
- ACOMP = 2;
- // Adverbial clause modifier
- ADVCL = 3;
- // Adverbial modifier
- ADVMOD = 4;
- // Adjectival modifier of an NP
- AMOD = 5;
- // Appositional modifier of an NP
- APPOS = 6;
- // Attribute dependent of a copular verb
- ATTR = 7;
- // Auxiliary (non-main) verb
- AUX = 8;
- // Passive auxiliary
- AUXPASS = 9;
- // Coordinating conjunction
- CC = 10;
- // Clausal complement of a verb or adjective
- CCOMP = 11;
- // Conjunct
- CONJ = 12;
- // Clausal subject
- CSUBJ = 13;
- // Clausal passive subject
- CSUBJPASS = 14;
- // Dependency (unable to determine)
- DEP = 15;
- // Determiner
- DET = 16;
- // Discourse
- DISCOURSE = 17;
- // Direct object
- DOBJ = 18;
- // Expletive
- EXPL = 19;
- // Goes with (part of a word in a text not well edited)
- GOESWITH = 20;
- // Indirect object
- IOBJ = 21;
- // Marker (word introducing a subordinate clause)
- MARK = 22;
- // Multi-word expression
- MWE = 23;
- // Multi-word verbal expression
- MWV = 24;
- // Negation modifier
- NEG = 25;
- // Noun compound modifier
- NN = 26;
- // Noun phrase used as an adverbial modifier
- NPADVMOD = 27;
- // Nominal subject
- NSUBJ = 28;
- // Passive nominal subject
- NSUBJPASS = 29;
- // Numeric modifier of a noun
- NUM = 30;
- // Element of compound number
- NUMBER = 31;
- // Punctuation mark
- P = 32;
- // Parataxis relation
- PARATAXIS = 33;
- // Participial modifier
- PARTMOD = 34;
- // The complement of a preposition is a clause
- PCOMP = 35;
- // Object of a preposition
- POBJ = 36;
- // Possession modifier
- POSS = 37;
- // Postverbal negative particle
- POSTNEG = 38;
- // Predicate complement
- PRECOMP = 39;
- // Preconjunt
- PRECONJ = 40;
- // Predeterminer
- PREDET = 41;
- // Prefix
- PREF = 42;
- // Prepositional modifier
- PREP = 43;
- // The relationship between a verb and verbal morpheme
- PRONL = 44;
- // Particle
- PRT = 45;
- // Associative or possessive marker
- PS = 46;
- // Quantifier phrase modifier
- QUANTMOD = 47;
- // Relative clause modifier
- RCMOD = 48;
- // Complementizer in relative clause
- RCMODREL = 49;
- // Ellipsis without a preceding predicate
- RDROP = 50;
- // Referent
- REF = 51;
- // Remnant
- REMNANT = 52;
- // Reparandum
- REPARANDUM = 53;
- // Root
- ROOT = 54;
- // Suffix specifying a unit of number
- SNUM = 55;
- // Suffix
- SUFF = 56;
- // Temporal modifier
- TMOD = 57;
- // Topic marker
- TOPIC = 58;
- // Clause headed by an infinite form of the verb that modifies a noun
- VMOD = 59;
- // Vocative
- VOCATIVE = 60;
- // Open clausal complement
- XCOMP = 61;
- // Name suffix
- SUFFIX = 62;
- // Name title
- TITLE = 63;
- // Adverbial phrase modifier
- ADVPHMOD = 64;
- // Causative auxiliary
- AUXCAUS = 65;
- // Helper auxiliary
- AUXVV = 66;
- // Rentaishi (Prenominal modifier)
- DTMOD = 67;
- // Foreign words
- FOREIGN = 68;
- // Keyword
- KW = 69;
- // List for chains of comparable items
- LIST = 70;
- // Nominalized clause
- NOMC = 71;
- // Nominalized clausal subject
- NOMCSUBJ = 72;
- // Nominalized clausal passive
- NOMCSUBJPASS = 73;
- // Compound of numeric modifier
- NUMC = 74;
- // Copula
- COP = 75;
- // Dislocated relation (for fronted/topicalized elements)
- DISLOCATED = 76;
- // Aspect marker
- ASP = 77;
- // Genitive modifier
- GMOD = 78;
- // Genitive object
- GOBJ = 79;
- // Infinitival modifier
- INFMOD = 80;
- // Measure
- MES = 81;
- // Nominal complement of a noun
- NCOMP = 82;
- }
- // Represents the head of this token in the dependency tree.
- // This is the index of the token which has an arc going to this token.
- // The index is the position of the token in the array of tokens returned
- // by the API method. If this token is a root token, then the
- // `head_token_index` is its own index.
- int32 head_token_index = 1;
- // The parse label for the token.
- Label label = 2;
- }
- // Represents a mention for an entity in the text. Currently, proper noun
- // mentions are supported.
- message EntityMention {
- // The supported types of mentions.
- enum Type {
- // Unknown
- TYPE_UNKNOWN = 0;
- // Proper name
- PROPER = 1;
- // Common noun (or noun compound)
- COMMON = 2;
- }
- // The mention text.
- TextSpan text = 1;
- // The type of the entity mention.
- Type type = 2;
- // For calls to [AnalyzeEntitySentiment][] or if
- // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
- // true, this field will contain the sentiment expressed for this mention of
- // the entity in the provided document.
- Sentiment sentiment = 3;
- }
- // Represents an output piece of text.
- message TextSpan {
- // The content of the output text.
- string content = 1;
- // The API calculates the beginning offset of the content in the original
- // document according to the [EncodingType][google.cloud.language.v1.EncodingType] specified in the API request.
- int32 begin_offset = 2;
- }
- // Represents a category returned from the text classifier.
- message ClassificationCategory {
- // The name of the category representing the document, from the [predefined
- // taxonomy](https://cloud.google.com/natural-language/docs/categories).
- string name = 1;
- // The classifier's confidence of the category. Number represents how certain
- // the classifier is that this category represents the given text.
- float confidence = 2;
- }
- // Model options available for classification requests.
- message ClassificationModelOptions {
- // Options for the V1 model.
- message V1Model {
- }
- // Options for the V2 model.
- message V2Model {
- // The content categories used for classification.
- enum ContentCategoriesVersion {
- // If `ContentCategoriesVersion` is not specified, this option will
- // default to `V1`.
- CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;
- // Legacy content categories of our initial launch in 2017.
- V1 = 1;
- // Updated content categories in 2022.
- V2 = 2;
- }
- // The content categories used for classification.
- ContentCategoriesVersion content_categories_version = 1;
- }
- // If this field is not set, then the `v1_model` will be used by default.
- oneof model_type {
- // Setting this field will use the V1 model and V1 content categories
- // version. The V1 model is a legacy model; support for this will be
- // discontinued in the future.
- V1Model v1_model = 1;
- // Setting this field will use the V2 model with the appropriate content
- // categories version. The V2 model is a better performing model.
- V2Model v2_model = 2;
- }
- }
- // The sentiment analysis request message.
- message AnalyzeSentimentRequest {
- // Required. Input document.
- Document document = 1 [(google.api.field_behavior) = REQUIRED];
- // The encoding type used by the API to calculate sentence offsets.
- EncodingType encoding_type = 2;
- }
- // The sentiment analysis response message.
- message AnalyzeSentimentResponse {
- // The overall sentiment of the input document.
- Sentiment document_sentiment = 1;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- // See [Document.language][google.cloud.language.v1.Document.language] field for more details.
- string language = 2;
- // The sentiment for all the sentences in the document.
- repeated Sentence sentences = 3;
- }
- // The entity-level sentiment analysis request message.
- message AnalyzeEntitySentimentRequest {
- // Required. Input document.
- Document document = 1 [(google.api.field_behavior) = REQUIRED];
- // The encoding type used by the API to calculate offsets.
- EncodingType encoding_type = 2;
- }
- // The entity-level sentiment analysis response message.
- message AnalyzeEntitySentimentResponse {
- // The recognized entities in the input document with associated sentiments.
- repeated Entity entities = 1;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- // See [Document.language][google.cloud.language.v1.Document.language] field for more details.
- string language = 2;
- }
- // The entity analysis request message.
- message AnalyzeEntitiesRequest {
- // Required. Input document.
- Document document = 1 [(google.api.field_behavior) = REQUIRED];
- // The encoding type used by the API to calculate offsets.
- EncodingType encoding_type = 2;
- }
- // The entity analysis response message.
- message AnalyzeEntitiesResponse {
- // The recognized entities in the input document.
- repeated Entity entities = 1;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- // See [Document.language][google.cloud.language.v1.Document.language] field for more details.
- string language = 2;
- }
- // The syntax analysis request message.
- message AnalyzeSyntaxRequest {
- // Required. Input document.
- Document document = 1 [(google.api.field_behavior) = REQUIRED];
- // The encoding type used by the API to calculate offsets.
- EncodingType encoding_type = 2;
- }
- // The syntax analysis response message.
- message AnalyzeSyntaxResponse {
- // Sentences in the input document.
- repeated Sentence sentences = 1;
- // Tokens, along with their syntactic information, in the input document.
- repeated Token tokens = 2;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- // See [Document.language][google.cloud.language.v1.Document.language] field for more details.
- string language = 3;
- }
- // The document classification request message.
- message ClassifyTextRequest {
- // Required. Input document.
- Document document = 1 [(google.api.field_behavior) = REQUIRED];
- // Model options to use for classification. Defaults to v1 options if not
- // specified.
- ClassificationModelOptions classification_model_options = 3;
- }
- // The document classification response message.
- message ClassifyTextResponse {
- // Categories representing the input document.
- repeated ClassificationCategory categories = 1;
- }
- // The request message for the text annotation API, which can perform multiple
- // analysis types (sentiment, entities, and syntax) in one call.
- message AnnotateTextRequest {
- // All available features for sentiment, syntax, and semantic analysis.
- // Setting each one to true will enable that specific analysis for the input.
- message Features {
- // Extract syntax information.
- bool extract_syntax = 1;
- // Extract entities.
- bool extract_entities = 2;
- // Extract document-level sentiment.
- bool extract_document_sentiment = 3;
- // Extract entities and their associated sentiment.
- bool extract_entity_sentiment = 4;
- // Classify the full document into categories.
- bool classify_text = 6;
- // The model options to use for classification. Defaults to v1 options
- // if not specified. Only used if `classify_text` is set to true.
- ClassificationModelOptions classification_model_options = 10;
- }
- // Required. Input document.
- Document document = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The enabled features.
- Features features = 2 [(google.api.field_behavior) = REQUIRED];
- // The encoding type used by the API to calculate offsets.
- EncodingType encoding_type = 3;
- }
- // The text annotations response message.
- message AnnotateTextResponse {
- // Sentences in the input document. Populated if the user enables
- // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
- repeated Sentence sentences = 1;
- // Tokens, along with their syntactic information, in the input document.
- // Populated if the user enables
- // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
- repeated Token tokens = 2;
- // Entities, along with their semantic information, in the input document.
- // Populated if the user enables
- // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities].
- repeated Entity entities = 3;
- // The overall sentiment for the document. Populated if the user enables
- // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment].
- Sentiment document_sentiment = 4;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- // See [Document.language][google.cloud.language.v1.Document.language] field for more details.
- string language = 5;
- // Categories identified in the input document.
- repeated ClassificationCategory categories = 6;
- }
|