cloud_speech.proto 78 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.speech.v2;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/api/field_behavior.proto";
  19. import "google/api/resource.proto";
  20. import "google/longrunning/operations.proto";
  21. import "google/protobuf/duration.proto";
  22. import "google/protobuf/field_mask.proto";
  23. import "google/protobuf/timestamp.proto";
  24. import "google/rpc/status.proto";
  25. option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v2;speech";
  26. option java_multiple_files = true;
  27. option java_outer_classname = "CloudSpeechProto";
  28. option java_package = "com.google.cloud.speech.v2";
  29. option (google.api.resource_definition) = {
  30. type: "cloudkms.googleapis.com/CryptoKey"
  31. pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}"
  32. };
  33. option (google.api.resource_definition) = {
  34. type: "cloudkms.googleapis.com/CryptoKeyVersion"
  35. pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}"
  36. };
  37. // Enables speech transcription and resource management.
  38. service Speech {
  39. option (google.api.default_host) = "speech.googleapis.com";
  40. option (google.api.oauth_scopes) =
  41. "https://www.googleapis.com/auth/cloud-platform";
  42. // Creates a [Recognizer][google.cloud.speech.v2.Recognizer].
  43. rpc CreateRecognizer(CreateRecognizerRequest)
  44. returns (google.longrunning.Operation) {
  45. option (google.api.http) = {
  46. post: "/v2/{parent=projects/*/locations/*}/recognizers"
  47. body: "recognizer"
  48. };
  49. option (google.api.method_signature) = "parent,recognizer,recognizer_id";
  50. option (google.longrunning.operation_info) = {
  51. response_type: "Recognizer"
  52. metadata_type: "OperationMetadata"
  53. };
  54. }
  55. // Lists Recognizers.
  56. rpc ListRecognizers(ListRecognizersRequest)
  57. returns (ListRecognizersResponse) {
  58. option (google.api.http) = {
  59. get: "/v2/{parent=projects/*/locations/*}/recognizers"
  60. };
  61. option (google.api.method_signature) = "parent";
  62. }
  63. // Returns the requested
  64. // [Recognizer][google.cloud.speech.v2.Recognizer]. Fails with
  65. // [NOT_FOUND][google.rpc.Code.NOT_FOUND] if the requested recognizer doesn't
  66. // exist.
  67. rpc GetRecognizer(GetRecognizerRequest) returns (Recognizer) {
  68. option (google.api.http) = {
  69. get: "/v2/{name=projects/*/locations/*/recognizers/*}"
  70. };
  71. option (google.api.method_signature) = "name";
  72. }
  73. // Updates the [Recognizer][google.cloud.speech.v2.Recognizer].
  74. rpc UpdateRecognizer(UpdateRecognizerRequest)
  75. returns (google.longrunning.Operation) {
  76. option (google.api.http) = {
  77. patch: "/v2/{recognizer.name=projects/*/locations/*/recognizers/*}"
  78. body: "recognizer"
  79. };
  80. option (google.api.method_signature) = "recognizer,update_mask";
  81. option (google.longrunning.operation_info) = {
  82. response_type: "Recognizer"
  83. metadata_type: "OperationMetadata"
  84. };
  85. }
  86. // Deletes the [Recognizer][google.cloud.speech.v2.Recognizer].
  87. rpc DeleteRecognizer(DeleteRecognizerRequest)
  88. returns (google.longrunning.Operation) {
  89. option (google.api.http) = {
  90. delete: "/v2/{name=projects/*/locations/*/recognizers/*}"
  91. };
  92. option (google.api.method_signature) = "name";
  93. option (google.longrunning.operation_info) = {
  94. response_type: "Recognizer"
  95. metadata_type: "OperationMetadata"
  96. };
  97. }
  98. // Undeletes the [Recognizer][google.cloud.speech.v2.Recognizer].
  99. rpc UndeleteRecognizer(UndeleteRecognizerRequest)
  100. returns (google.longrunning.Operation) {
  101. option (google.api.http) = {
  102. post: "/v2/{name=projects/*/locations/*/recognizers/*}:undelete"
  103. body: "*"
  104. };
  105. option (google.api.method_signature) = "name";
  106. option (google.longrunning.operation_info) = {
  107. response_type: "Recognizer"
  108. metadata_type: "OperationMetadata"
  109. };
  110. }
  111. // Performs synchronous Speech recognition: receive results after all audio
  112. // has been sent and processed.
  113. rpc Recognize(RecognizeRequest) returns (RecognizeResponse) {
  114. option (google.api.http) = {
  115. post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:recognize"
  116. body: "*"
  117. };
  118. option (google.api.method_signature) =
  119. "recognizer,config,config_mask,content";
  120. option (google.api.method_signature) = "recognizer,config,config_mask,uri";
  121. }
  122. // Performs bidirectional streaming speech recognition: receive results while
  123. // sending audio. This method is only available via the gRPC API (not REST).
  124. rpc StreamingRecognize(stream StreamingRecognizeRequest)
  125. returns (stream StreamingRecognizeResponse) {}
  126. // Performs batch asynchronous speech recognition: send a request with N
  127. // audio files and receive a long running operation that can be polled to see
  128. // when the transcriptions are finished.
  129. rpc BatchRecognize(BatchRecognizeRequest)
  130. returns (google.longrunning.Operation) {
  131. option (google.api.http) = {
  132. post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:batchRecognize"
  133. body: "*"
  134. };
  135. option (google.api.method_signature) =
  136. "recognizer,config,config_mask,files";
  137. option (google.longrunning.operation_info) = {
  138. response_type: "BatchRecognizeResponse"
  139. metadata_type: "OperationMetadata"
  140. };
  141. }
  142. // Returns the requested [Config][google.cloud.speech.v2.Config].
  143. rpc GetConfig(GetConfigRequest) returns (Config) {
  144. option (google.api.http) = {
  145. get: "/v2/{name=projects/*/locations/*/config}"
  146. };
  147. option (google.api.method_signature) = "name";
  148. }
  149. // Updates the [Config][google.cloud.speech.v2.Config].
  150. rpc UpdateConfig(UpdateConfigRequest) returns (Config) {
  151. option (google.api.http) = {
  152. patch: "/v2/{config.name=projects/*/locations/*/config}"
  153. body: "config"
  154. };
  155. option (google.api.method_signature) = "config,update_mask";
  156. }
  157. // Creates a [CustomClass][google.cloud.speech.v2.CustomClass].
  158. rpc CreateCustomClass(CreateCustomClassRequest)
  159. returns (google.longrunning.Operation) {
  160. option (google.api.http) = {
  161. post: "/v2/{parent=projects/*/locations/*}/customClasses"
  162. body: "custom_class"
  163. };
  164. option (google.api.method_signature) =
  165. "parent,custom_class,custom_class_id";
  166. option (google.longrunning.operation_info) = {
  167. response_type: "CustomClass"
  168. metadata_type: "OperationMetadata"
  169. };
  170. }
  171. // Lists CustomClasses.
  172. rpc ListCustomClasses(ListCustomClassesRequest)
  173. returns (ListCustomClassesResponse) {
  174. option (google.api.http) = {
  175. get: "/v2/{parent=projects/*/locations/*}/customClasses"
  176. };
  177. option (google.api.method_signature) = "parent";
  178. }
  179. // Returns the requested
  180. // [CustomClass][google.cloud.speech.v2.CustomClass].
  181. rpc GetCustomClass(GetCustomClassRequest) returns (CustomClass) {
  182. option (google.api.http) = {
  183. get: "/v2/{name=projects/*/locations/*/customClasses/*}"
  184. };
  185. option (google.api.method_signature) = "name";
  186. }
  187. // Updates the [CustomClass][google.cloud.speech.v2.CustomClass].
  188. rpc UpdateCustomClass(UpdateCustomClassRequest)
  189. returns (google.longrunning.Operation) {
  190. option (google.api.http) = {
  191. patch: "/v2/{custom_class.name=projects/*/locations/*/customClasses/*}"
  192. body: "custom_class"
  193. };
  194. option (google.api.method_signature) = "custom_class,update_mask";
  195. option (google.longrunning.operation_info) = {
  196. response_type: "CustomClass"
  197. metadata_type: "OperationMetadata"
  198. };
  199. }
  200. // Deletes the [CustomClass][google.cloud.speech.v2.CustomClass].
  201. rpc DeleteCustomClass(DeleteCustomClassRequest)
  202. returns (google.longrunning.Operation) {
  203. option (google.api.http) = {
  204. delete: "/v2/{name=projects/*/locations/*/customClasses/*}"
  205. };
  206. option (google.api.method_signature) = "name";
  207. option (google.longrunning.operation_info) = {
  208. response_type: "CustomClass"
  209. metadata_type: "OperationMetadata"
  210. };
  211. }
  212. // Undeletes the [CustomClass][google.cloud.speech.v2.CustomClass].
  213. rpc UndeleteCustomClass(UndeleteCustomClassRequest)
  214. returns (google.longrunning.Operation) {
  215. option (google.api.http) = {
  216. post: "/v2/{name=projects/*/locations/*/customClasses/*}:undelete"
  217. body: "*"
  218. };
  219. option (google.api.method_signature) = "name";
  220. option (google.longrunning.operation_info) = {
  221. response_type: "CustomClass"
  222. metadata_type: "OperationMetadata"
  223. };
  224. }
  225. // Creates a [PhraseSet][google.cloud.speech.v2.PhraseSet].
  226. rpc CreatePhraseSet(CreatePhraseSetRequest)
  227. returns (google.longrunning.Operation) {
  228. option (google.api.http) = {
  229. post: "/v2/{parent=projects/*/locations/*}/phraseSets"
  230. body: "phrase_set"
  231. };
  232. option (google.api.method_signature) = "parent,phrase_set,phrase_set_id";
  233. option (google.longrunning.operation_info) = {
  234. response_type: "PhraseSet"
  235. metadata_type: "OperationMetadata"
  236. };
  237. }
  238. // Lists PhraseSets.
  239. rpc ListPhraseSets(ListPhraseSetsRequest) returns (ListPhraseSetsResponse) {
  240. option (google.api.http) = {
  241. get: "/v2/{parent=projects/*/locations/*}/phraseSets"
  242. };
  243. option (google.api.method_signature) = "parent";
  244. }
  245. // Returns the requested
  246. // [PhraseSet][google.cloud.speech.v2.PhraseSet].
  247. rpc GetPhraseSet(GetPhraseSetRequest) returns (PhraseSet) {
  248. option (google.api.http) = {
  249. get: "/v2/{name=projects/*/locations/*/phraseSets/*}"
  250. };
  251. option (google.api.method_signature) = "name";
  252. }
  253. // Updates the [PhraseSet][google.cloud.speech.v2.PhraseSet].
  254. rpc UpdatePhraseSet(UpdatePhraseSetRequest)
  255. returns (google.longrunning.Operation) {
  256. option (google.api.http) = {
  257. patch: "/v2/{phrase_set.name=projects/*/locations/*/phraseSets/*}"
  258. body: "phrase_set"
  259. };
  260. option (google.api.method_signature) = "phrase_set,update_mask";
  261. option (google.longrunning.operation_info) = {
  262. response_type: "PhraseSet"
  263. metadata_type: "OperationMetadata"
  264. };
  265. }
  266. // Deletes the [PhraseSet][google.cloud.speech.v2.PhraseSet].
  267. rpc DeletePhraseSet(DeletePhraseSetRequest)
  268. returns (google.longrunning.Operation) {
  269. option (google.api.http) = {
  270. delete: "/v2/{name=projects/*/locations/*/phraseSets/*}"
  271. };
  272. option (google.api.method_signature) = "name";
  273. option (google.longrunning.operation_info) = {
  274. response_type: "PhraseSet"
  275. metadata_type: "OperationMetadata"
  276. };
  277. }
  278. // Undeletes the [PhraseSet][google.cloud.speech.v2.PhraseSet].
  279. rpc UndeletePhraseSet(UndeletePhraseSetRequest)
  280. returns (google.longrunning.Operation) {
  281. option (google.api.http) = {
  282. post: "/v2/{name=projects/*/locations/*/phraseSets/*}:undelete"
  283. body: "*"
  284. };
  285. option (google.api.method_signature) = "name";
  286. option (google.longrunning.operation_info) = {
  287. response_type: "PhraseSet"
  288. metadata_type: "OperationMetadata"
  289. };
  290. }
  291. }
  292. // Request message for the
  293. // [CreateRecognizer][google.cloud.speech.v2.Speech.CreateRecognizer] method.
  294. message CreateRecognizerRequest {
  295. // Required. The Recognizer to create.
  296. Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED];
  297. // If set, validate the request and preview the Recognizer, but do not
  298. // actually create it.
  299. bool validate_only = 2;
  300. // The ID to use for the Recognizer, which will become the final component of
  301. // the Recognizer's resource name.
  302. //
  303. // This value should be 4-63 characters, and valid characters
  304. // are /[a-z][0-9]-/.
  305. string recognizer_id = 3;
  306. // Required. The project and location where this Recognizer will be created.
  307. // The expected format is `projects/{project}/locations/{location}`.
  308. string parent = 4 [
  309. (google.api.field_behavior) = REQUIRED,
  310. (google.api.resource_reference) = {
  311. child_type: "speech.googleapis.com/Recognizer"
  312. }
  313. ];
  314. }
  315. // Represents the metadata of a long-running operation.
  316. message OperationMetadata {
  317. // The time the operation was created.
  318. google.protobuf.Timestamp create_time = 1;
  319. // The time the operation was last updated.
  320. google.protobuf.Timestamp update_time = 2;
  321. // The resource path for the target of the operation.
  322. string resource = 3;
  323. // The method that triggered the operation.
  324. string method = 4;
  325. // The [KMS key
  326. // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
  327. // the content of the Operation is encrypted. The expected format is
  328. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
  329. string kms_key_name = 6 [(google.api.resource_reference) = {
  330. type: "cloudkms.googleapis.com/CryptoKey"
  331. }];
  332. // The [KMS key version
  333. // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
  334. // with which content of the Operation is encrypted. The expected format is
  335. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
  336. string kms_key_version_name = 7 [(google.api.resource_reference) = {
  337. type: "cloudkms.googleapis.com/CryptoKeyVersion"
  338. }];
  339. // The request that spawned the Operation.
  340. oneof request {
  341. // The BatchRecognizeRequest that spawned the Operation.
  342. BatchRecognizeRequest batch_recognize_request = 8;
  343. // The CreateRecognizerRequest that spawned the Operation.
  344. CreateRecognizerRequest create_recognizer_request = 9;
  345. // The UpdateRecognizerRequest that spawned the Operation.
  346. UpdateRecognizerRequest update_recognizer_request = 10;
  347. // The DeleteRecognizerRequest that spawned the Operation.
  348. DeleteRecognizerRequest delete_recognizer_request = 11;
  349. // The UndeleteRecognizerRequest that spawned the Operation.
  350. UndeleteRecognizerRequest undelete_recognizer_request = 12;
  351. // The CreateCustomClassRequest that spawned the Operation.
  352. CreateCustomClassRequest create_custom_class_request = 13;
  353. // The UpdateCustomClassRequest that spawned the Operation.
  354. UpdateCustomClassRequest update_custom_class_request = 14;
  355. // The DeleteCustomClassRequest that spawned the Operation.
  356. DeleteCustomClassRequest delete_custom_class_request = 15;
  357. // The UndeleteCustomClassRequest that spawned the Operation.
  358. UndeleteCustomClassRequest undelete_custom_class_request = 16;
  359. // The CreatePhraseSetRequest that spawned the Operation.
  360. CreatePhraseSetRequest create_phrase_set_request = 17;
  361. // The UpdatePhraseSetRequest that spawned the Operation.
  362. UpdatePhraseSetRequest update_phrase_set_request = 18;
  363. // The DeletePhraseSetRequest that spawned the Operation.
  364. DeletePhraseSetRequest delete_phrase_set_request = 19;
  365. // The UndeletePhraseSetRequest that spawned the Operation.
  366. UndeletePhraseSetRequest undelete_phrase_set_request = 20;
  367. // The UpdateConfigRequest that spawned the Operation.
  368. UpdateConfigRequest update_config_request = 21;
  369. }
  370. // The percent progress of the Operation. Values can range from 0-100. If the
  371. // value is 100, then the operation is finished.
  372. int32 progress_percent = 22;
  373. // Specific metadata per RPC
  374. oneof metadata {
  375. // Metadata specific to the BatchRecognize method.
  376. BatchRecognizeMetadata batch_recognize_metadata = 23;
  377. }
  378. }
  379. // Request message for the
  380. // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method.
  381. message ListRecognizersRequest {
  382. // Required. The project and location of Recognizers to list. The expected
  383. // format is `projects/{project}/locations/{location}`.
  384. string parent = 1 [
  385. (google.api.field_behavior) = REQUIRED,
  386. (google.api.resource_reference) = {
  387. type: "locations.googleapis.com/Location"
  388. }
  389. ];
  390. // The maximum number of Recognizers to return. The service may return fewer
  391. // than this value. If unspecified, at most 20 Recognizers will be returned.
  392. // The maximum value is 20; values above 20 will be coerced to 20.
  393. int32 page_size = 2;
  394. // A page token, received from a previous
  395. // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] call.
  396. // Provide this to retrieve the subsequent page.
  397. //
  398. // When paginating, all other parameters provided to
  399. // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] must match
  400. // the call that provided the page token.
  401. string page_token = 3;
  402. // Whether, or not, to show resources that have been deleted.
  403. bool show_deleted = 4;
  404. }
  405. // Response message for the
  406. // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method.
  407. message ListRecognizersResponse {
  408. // The list of requested Recognizers.
  409. repeated Recognizer recognizers = 1;
  410. // A token, which can be sent as
  411. // [page_token][google.cloud.speech.v2.ListRecognizersRequest.page_token] to
  412. // retrieve the next page. If this field is omitted, there are no subsequent
  413. // pages. This token expires after 72 hours.
  414. string next_page_token = 2;
  415. }
  416. // Request message for the
  417. // [GetRecognizer][google.cloud.speech.v2.Speech.GetRecognizer] method.
  418. message GetRecognizerRequest {
  419. // Required. The name of the Recognizer to retrieve. The expected format is
  420. // `projects/{project}/locations/{location}/recognizers/{recognizer}`.
  421. string name = 1 [
  422. (google.api.field_behavior) = REQUIRED,
  423. (google.api.resource_reference) = {
  424. type: "speech.googleapis.com/Recognizer"
  425. }
  426. ];
  427. }
  428. // Request message for the
  429. // [UpdateRecognizer][google.cloud.speech.v2.Speech.UpdateRecognizer] method.
  430. message UpdateRecognizerRequest {
  431. // Required. The Recognizer to update.
  432. //
  433. // The Recognizer's `name` field is used to identify the Recognizer to update.
  434. // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`.
  435. Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED];
  436. // The list of fields to update. If empty, all non-default valued fields are
  437. // considered for update. Use `*` to update the entire Recognizer resource.
  438. google.protobuf.FieldMask update_mask = 2;
  439. // If set, validate the request and preview the updated Recognizer, but do not
  440. // actually update it.
  441. bool validate_only = 4;
  442. }
  443. // Request message for the
  444. // [DeleteRecognizer][google.cloud.speech.v2.Speech.DeleteRecognizer] method.
  445. message DeleteRecognizerRequest {
  446. // Required. The name of the Recognizer to delete.
  447. // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`
  448. string name = 1 [
  449. (google.api.field_behavior) = REQUIRED,
  450. (google.api.resource_reference) = {
  451. type: "speech.googleapis.com/Recognizer"
  452. }
  453. ];
  454. // If set, validate the request and preview the deleted Recognizer, but do not
  455. // actually delete it.
  456. bool validate_only = 2;
  457. // If set to true, and the Recognizer is not found, the request will succeed
  458. // and be a no-op (no Operation is recorded in this case).
  459. bool allow_missing = 4;
  460. // This checksum is computed by the server based on the value of other
  461. // fields. This may be sent on update, undelete, and delete requests to ensure
  462. // the client has an up-to-date value before proceeding.
  463. string etag = 3;
  464. }
  465. // Request message for the
  466. // [UndeleteRecognizer][google.cloud.speech.v2.Speech.UndeleteRecognizer]
  467. // method.
  468. message UndeleteRecognizerRequest {
  469. // Required. The name of the Recognizer to undelete.
  470. // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`
  471. string name = 1 [
  472. (google.api.field_behavior) = REQUIRED,
  473. (google.api.resource_reference) = {
  474. type: "speech.googleapis.com/Recognizer"
  475. }
  476. ];
  477. // If set, validate the request and preview the undeleted Recognizer, but do
  478. // not actually undelete it.
  479. bool validate_only = 3;
  480. // This checksum is computed by the server based on the value of other
  481. // fields. This may be sent on update, undelete, and delete requests to ensure
  482. // the client has an up-to-date value before proceeding.
  483. string etag = 4;
  484. }
  485. // A Recognizer message. Stores recognition configuration and metadata.
  486. message Recognizer {
  487. option (google.api.resource) = {
  488. type: "speech.googleapis.com/Recognizer"
  489. pattern: "projects/{project}/locations/{location}/recognizers/{recognizer}"
  490. style: DECLARATIVE_FRIENDLY
  491. };
  492. // Set of states that define the lifecycle of a Recognizer.
  493. enum State {
  494. // The default value. This value is used if the state is omitted.
  495. STATE_UNSPECIFIED = 0;
  496. // The Recognizer is active and ready for use.
  497. ACTIVE = 2;
  498. // This Recognizer has been deleted.
  499. DELETED = 4;
  500. }
  501. // Output only. The resource name of the Recognizer.
  502. // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`.
  503. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  504. // Output only. System-assigned unique identifier for the Recognizer.
  505. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  506. // User-settable, human-readable name for the Recognizer. Must be 63
  507. // characters or less.
  508. string display_name = 3;
  509. // Required. Which model to use for recognition requests. Select the model
  510. // best suited to your domain to get best results.
  511. //
  512. // Supported models:
  513. //
  514. // - `latest_long`
  515. //
  516. // Best for long form content like media or conversation.
  517. //
  518. // - `latest_short`
  519. //
  520. // Best for short form content like commands or single shot directed speech.
  521. // When using this model, the service will stop transcribing audio after the
  522. // first utterance is detected and completed.
  523. //
  524. // When using this model,
  525. // [SEPARATE_RECOGNITION_PER_CHANNEL][google.cloud.speech.v2.RecognitionFeatures.MultiChannelMode.SEPARATE_RECOGNITION_PER_CHANNEL]
  526. // is not supported; multi-channel audio is accepted, but only the first
  527. // channel will be processed and transcribed.
  528. string model = 4 [(google.api.field_behavior) = REQUIRED];
  529. // Required. The language of the supplied audio as a
  530. // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
  531. //
  532. // Supported languages:
  533. //
  534. // - `en-US`
  535. //
  536. // - `en-GB`
  537. //
  538. // - `fr-FR`
  539. //
  540. // If additional languages are provided, recognition result will contain
  541. // recognition in the most likely language detected. The recognition result
  542. // will include the language tag of the language detected in the audio.
  543. // When you create or update a Recognizer, these values are
  544. // stored in normalized BCP-47 form. For example, "en-us" is stored as
  545. // "en-US".
  546. repeated string language_codes = 17 [(google.api.field_behavior) = REQUIRED];
  547. // Default configuration to use for requests with this Recognizer.
  548. // This can be overwritten by inline configuration in the
  549. // [RecognizeRequest.config][google.cloud.speech.v2.RecognizeRequest.config]
  550. // field.
  551. RecognitionConfig default_recognition_config = 6;
  552. // Allows users to store small amounts of arbitrary data.
  553. // Both the key and the value must be 63 characters or less each.
  554. // At most 100 annotations.
  555. map<string, string> annotations = 7;
  556. // Output only. The Recognizer lifecycle state.
  557. State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
  558. // Output only. Creation time.
  559. google.protobuf.Timestamp create_time = 9
  560. [(google.api.field_behavior) = OUTPUT_ONLY];
  561. // Output only. The most recent time this Recognizer was modified.
  562. google.protobuf.Timestamp update_time = 10
  563. [(google.api.field_behavior) = OUTPUT_ONLY];
  564. // Output only. The time at which this Recognizer was requested for deletion.
  565. google.protobuf.Timestamp delete_time = 11
  566. [(google.api.field_behavior) = OUTPUT_ONLY];
  567. // Output only. The time at which this Recognizer will be purged.
  568. google.protobuf.Timestamp expire_time = 14
  569. [(google.api.field_behavior) = OUTPUT_ONLY];
  570. // Output only. This checksum is computed by the server based on the value of
  571. // other fields. This may be sent on update, undelete, and delete requests to
  572. // ensure the client has an up-to-date value before proceeding.
  573. string etag = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
  574. // Output only. Whether or not this Recognizer is in the process of being
  575. // updated.
  576. bool reconciling = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
  577. // Output only. The [KMS key
  578. // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
  579. // the Recognizer is encrypted. The expected format is
  580. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
  581. string kms_key_name = 15 [
  582. (google.api.field_behavior) = OUTPUT_ONLY,
  583. (google.api.resource_reference) = {
  584. type: "cloudkms.googleapis.com/CryptoKey"
  585. }
  586. ];
  587. // Output only. The [KMS key version
  588. // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
  589. // with which the Recognizer is encrypted. The expected format is
  590. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
  591. string kms_key_version_name = 16 [
  592. (google.api.field_behavior) = OUTPUT_ONLY,
  593. (google.api.resource_reference) = {
  594. type: "cloudkms.googleapis.com/CryptoKeyVersion"
  595. }
  596. ];
  597. }
  598. // Automatically detected decoding parameters.
  599. // Supported for the following encodings:
  600. // * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container.
  601. // * WAV_MULAW: 8-bit companded mulaw samples in a WAV container.
  602. // * WAV_ALAW: 8-bit companded alaw samples in a WAV container.
  603. // * RFC4867_5_AMR: AMR frames with an rfc4867.5 header.
  604. // * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header.
  605. // * FLAC: FLAC frames in the "native FLAC" container format.
  606. // * MP3: MPEG audio frames with optional (ignored) ID3 metadata.
  607. // * OGG_OPUS: Opus audio frames in an Ogg container.
  608. // * WEBM_OPUS: Opus audio frames in a WebM container.
  609. message AutoDetectDecodingConfig {}
  610. // Explicitly specified decoding parameters.
  611. message ExplicitDecodingConfig {
  612. // Supported audio data encodings.
  613. enum AudioEncoding {
  614. // Default value. This value is unused.
  615. AUDIO_ENCODING_UNSPECIFIED = 0;
  616. // Headerless 16-bit signed little-endian PCM samples.
  617. LINEAR16 = 1;
  618. // Headerless 8-bit companded mulaw samples.
  619. MULAW = 2;
  620. // Headerless 8-bit companded alaw samples.
  621. ALAW = 3;
  622. }
  623. // Required. Encoding of the audio data sent for recognition.
  624. AudioEncoding encoding = 1 [(google.api.field_behavior) = REQUIRED];
  625. // Sample rate in Hertz of the audio data sent for recognition. Valid
  626. // values are: 8000-48000. 16000 is optimal. For best results, set the
  627. // sampling rate of the audio source to 16000 Hz. If that's not possible, use
  628. // the native sample rate of the audio source (instead of re-sampling).
  629. // Supported for the following encodings:
  630. // * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
  631. // * MULAW: Headerless 8-bit companded mulaw samples.
  632. // * ALAW: Headerless 8-bit companded alaw samples.
  633. int32 sample_rate_hertz = 2;
  634. // Number of channels present in the audio data sent for recognition.
  635. // Supported for the following encodings:
  636. // * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
  637. // * MULAW: Headerless 8-bit companded mulaw samples.
  638. // * ALAW: Headerless 8-bit companded alaw samples.
  639. int32 audio_channel_count = 3;
  640. }
  641. // Configuration to enable speaker diarization.
  642. message SpeakerDiarizationConfig {
  643. // Required. Minimum number of speakers in the conversation. This range gives
  644. // you more flexibility by allowing the system to automatically determine the
  645. // correct number of speakers. If not set, the default value is 2.
  646. //
  647. // To fix the number of speakers detected in the audio, set
  648. // `min_speaker_count` = `max_speaker_count`.
  649. int32 min_speaker_count = 2 [(google.api.field_behavior) = REQUIRED];
  650. // Required. Maximum number of speakers in the conversation. Valid values are:
  651. // 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility
  652. // by allowing the system to automatically determine the correct number of
  653. // speakers.
  654. int32 max_speaker_count = 3 [(google.api.field_behavior) = REQUIRED];
  655. }
  656. // Available recognition features.
  657. message RecognitionFeatures {
  658. // Options for how to recognize multi-channel audio.
  659. enum MultiChannelMode {
  660. // Default value for the multi-channel mode. If the audio contains
  661. // multiple channels, only the first channel will be transcribed; other
  662. // channels will be ignored.
  663. MULTI_CHANNEL_MODE_UNSPECIFIED = 0;
  664. // If selected, each channel in the provided audio is transcribed
  665. // independently. This cannot be selected if the selected
  666. // [model][google.cloud.speech.v2.Recognizer.model] is `latest_short`.
  667. SEPARATE_RECOGNITION_PER_CHANNEL = 1;
  668. }
  669. // If set to `true`, the server will attempt to filter out profanities,
  670. // replacing all but the initial character in each filtered word with
  671. // asterisks, for instance, "f***". If set to `false` or omitted, profanities
  672. // won't be filtered out.
  673. bool profanity_filter = 1;
  674. // If `true`, the top result includes a list of words and the start and end
  675. // time offsets (timestamps) for those words. If `false`, no word-level time
  676. // offset information is returned. The default is `false`.
  677. bool enable_word_time_offsets = 2;
  678. // If `true`, the top result includes a list of words and the confidence for
  679. // those words. If `false`, no word-level confidence information is returned.
  680. // The default is `false`.
  681. bool enable_word_confidence = 3;
  682. // If `true`, adds punctuation to recognition result hypotheses. This feature
  683. // is only available in select languages. The default `false` value does not
  684. // add punctuation to result hypotheses.
  685. bool enable_automatic_punctuation = 4;
  686. // The spoken punctuation behavior for the call. If `true`, replaces spoken
  687. // punctuation with the corresponding symbols in the request. For example,
  688. // "how are you question mark" becomes "how are you?". See
  689. // https://cloud.google.com/speech-to-text/docs/spoken-punctuation for
  690. // support. If `false`, spoken punctuation is not replaced.
  691. bool enable_spoken_punctuation = 14;
  692. // The spoken emoji behavior for the call. If `true`, adds spoken emoji
  693. // formatting for the request. This will replace spoken emojis with the
  694. // corresponding Unicode symbols in the final transcript. If `false`, spoken
  695. // emojis are not replaced.
  696. bool enable_spoken_emojis = 15;
  697. // Mode for recognizing multi-channel audio.
  698. MultiChannelMode multi_channel_mode = 17;
  699. // Configuration to enable speaker diarization and set additional
  700. // parameters to make diarization better suited for your application.
  701. // When this is enabled, we send all the words from the beginning of the
  702. // audio for the top alternative in every consecutive STREAMING responses.
  703. // This is done in order to improve our speaker tags as our models learn to
  704. // identify the speakers in the conversation over time.
  705. // For non-streaming requests, the diarization results will be provided only
  706. // in the top alternative of the FINAL SpeechRecognitionResult.
  707. SpeakerDiarizationConfig diarization_config = 9;
  708. // Maximum number of recognition hypotheses to be returned.
  709. // The server may return fewer than `max_alternatives`.
  710. // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
  711. // one. If omitted, will return a maximum of one.
  712. int32 max_alternatives = 16;
  713. }
  714. // Provides "hints" to the speech recognizer to favor specific words and phrases
  715. // in the results. Phrase sets can be specified as an inline resource, or a
  716. // reference to an existing phrase set resource.
  717. message SpeechAdaptation {
  718. // A biasing phrase set, which can be either a string referencing the name of
  719. // an existing phrase set resource, or an inline definition of a phrase set.
  720. message AdaptationPhraseSet {
  721. oneof value {
  722. // The name of an existing phrase set resource. The user must have read
  723. // access to the resource and it must not be deleted.
  724. string phrase_set = 1 [(google.api.resource_reference) = {
  725. type: "speech.googleapis.com/PhraseSet"
  726. }];
  727. // An inline defined phrase set.
  728. PhraseSet inline_phrase_set = 2;
  729. }
  730. }
  731. // A list of inline or referenced phrase sets.
  732. repeated AdaptationPhraseSet phrase_sets = 1;
  733. // A list of inline custom classes. Existing custom class resources can be
  734. // referenced directly in a phrase set.
  735. repeated CustomClass custom_classes = 2;
  736. }
  737. // Provides information to the Recognizer that specifies how to process the
  738. // recognition request.
  739. message RecognitionConfig {
  740. // Decoding parameters for audio being sent for recognition.
  741. oneof decoding_config {
  742. // Automatically detect decoding parameters.
  743. // Preferred for supported formats.
  744. AutoDetectDecodingConfig auto_decoding_config = 7;
  745. // Explicitly specified decoding parameters.
  746. // Required if using headerless PCM audio (linear16, mulaw, alaw).
  747. ExplicitDecodingConfig explicit_decoding_config = 8;
  748. }
  749. // Speech recognition features to enable.
  750. RecognitionFeatures features = 2;
  751. // Speech adaptation context that weights recognizer predictions for specific
  752. // words and phrases.
  753. SpeechAdaptation adaptation = 6;
  754. }
  755. // Request message for the
  756. // [Recognize][google.cloud.speech.v2.Speech.Recognize] method. Either
  757. // `content` or `uri` must be supplied. Supplying both or neither returns
  758. // [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See [content
  759. // limits](https://cloud.google.com/speech-to-text/quotas#content).
  760. message RecognizeRequest {
  761. // Required. The name of the Recognizer to use during recognition. The
  762. // expected format is
  763. // `projects/{project}/locations/{location}/recognizers/{recognizer}`.
  764. string recognizer = 3 [
  765. (google.api.field_behavior) = REQUIRED,
  766. (google.api.resource_reference) = {
  767. type: "speech.googleapis.com/Recognizer"
  768. }
  769. ];
  770. // Features and audio metadata to use for the Automatic Speech Recognition.
  771. // This field in combination with the
  772. // [config_mask][google.cloud.speech.v2.RecognizeRequest.config_mask] field
  773. // can be used to override parts of the
  774. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  775. // of the Recognizer resource.
  776. RecognitionConfig config = 1;
  777. // The list of fields in
  778. // [config][google.cloud.speech.v2.RecognizeRequest.config] that override the
  779. // values in the
  780. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  781. // of the recognizer during this recognition request. If no mask is provided,
  782. // all non-default valued fields in
  783. // [config][google.cloud.speech.v2.RecognizeRequest.config] override the
  784. // values in the recognizer for this recognition request. If a mask is
  785. // provided, only the fields listed in the mask override the config in the
  786. // recognizer for this recognition request. If a wildcard (`*`) is provided,
  787. // [config][google.cloud.speech.v2.RecognizeRequest.config] completely
  788. // overrides and replaces the config in the recognizer for this recognition
  789. // request.
  790. google.protobuf.FieldMask config_mask = 8;
  791. // The audio source, which is either inline content or a Google Cloud
  792. // Storage URI.
  793. oneof audio_source {
  794. // The audio data bytes encoded as specified in
  795. // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. As
  796. // with all bytes fields, proto buffers use a pure binary representation,
  797. // whereas JSON representations use base64.
  798. bytes content = 5;
  799. // URI that points to a file that contains audio data bytes as specified in
  800. // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. The file
  801. // must not be compressed (for example, gzip). Currently, only Google Cloud
  802. // Storage URIs are supported, which must be specified in the following
  803. // format: `gs://bucket_name/object_name` (other URI formats return
  804. // [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more
  805. // information, see [Request
  806. // URIs](https://cloud.google.com/storage/docs/reference-uris).
  807. string uri = 6;
  808. }
  809. }
  810. // Metadata about the recognition request and response.
  811. message RecognitionResponseMetadata {
  812. // When available, billed audio seconds for the corresponding request.
  813. google.protobuf.Duration total_billed_duration = 6;
  814. }
  815. // Alternative hypotheses (a.k.a. n-best list).
  816. message SpeechRecognitionAlternative {
  817. // Transcript text representing the words that the user spoke.
  818. string transcript = 1;
  819. // The confidence estimate between 0.0 and 1.0. A higher number
  820. // indicates an estimated greater likelihood that the recognized words are
  821. // correct. This field is set only for the top alternative of a non-streaming
  822. // result or, of a streaming result where
  823. // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is
  824. // set to `true`. This field is not guaranteed to be accurate and users should
  825. // not rely on it to be always provided. The default of 0.0 is a sentinel
  826. // value indicating `confidence` was not set.
  827. float confidence = 2;
  828. // A list of word-specific information for each recognized word.
  829. // When
  830. // [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
  831. // is true, you will see all the words from the beginning of the audio.
  832. repeated WordInfo words = 3;
  833. }
  834. // Word-specific information for recognized words.
  835. message WordInfo {
  836. // Time offset relative to the beginning of the audio,
  837. // and corresponding to the start of the spoken word.
  838. // This field is only set if
  839. // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets]
  840. // is `true` and only in the top hypothesis. This is an experimental feature
  841. // and the accuracy of the time offset can vary.
  842. google.protobuf.Duration start_offset = 1;
  843. // Time offset relative to the beginning of the audio,
  844. // and corresponding to the end of the spoken word.
  845. // This field is only set if
  846. // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets]
  847. // is `true` and only in the top hypothesis. This is an experimental feature
  848. // and the accuracy of the time offset can vary.
  849. google.protobuf.Duration end_offset = 2;
  850. // The word corresponding to this set of information.
  851. string word = 3;
  852. // The confidence estimate between 0.0 and 1.0. A higher number
  853. // indicates an estimated greater likelihood that the recognized words are
  854. // correct. This field is set only for the top alternative of a non-streaming
  855. // result or, of a streaming result where
  856. // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is
  857. // set to `true`. This field is not guaranteed to be accurate and users should
  858. // not rely on it to be always provided. The default of 0.0 is a sentinel
  859. // value indicating `confidence` was not set.
  860. float confidence = 4;
  861. // A distinct label is assigned for every speaker within the audio. This field
  862. // specifies which one of those speakers was detected to have spoken this
  863. // word. `speaker_label` is set if
  864. // [enable_speaker_diarization][google.cloud.speech.v2.SpeakerDiarizationConfig.enable_speaker_diarization]
  865. // is `true` and only in the top alternative.
  866. string speaker_label = 6;
  867. }
  868. // A speech recognition result corresponding to a portion of the audio.
  869. message SpeechRecognitionResult {
  870. // May contain one or more recognition hypotheses. These alternatives are
  871. // ordered in terms of accuracy, with the top (first) alternative being the
  872. // most probable, as ranked by the recognizer.
  873. repeated SpeechRecognitionAlternative alternatives = 1;
  874. // For multi-channel audio, this is the channel number corresponding to the
  875. // recognized result for the audio from that channel.
  876. // For `audio_channel_count` = `N`, its output values can range from `1` to
  877. // `N`.
  878. int32 channel_tag = 2;
  879. // Time offset of the end of this result relative to the beginning of the
  880. // audio.
  881. google.protobuf.Duration result_end_offset = 4;
  882. // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
  883. // language tag of the language in this result. This language code was
  884. // detected to have the most likelihood of being spoken in the audio.
  885. string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  886. }
  887. // Response message for the
  888. // [Recognize][google.cloud.speech.v2.Speech.Recognize] method.
  889. message RecognizeResponse {
  890. // Sequential list of transcription results corresponding to sequential
  891. // portions of audio.
  892. repeated SpeechRecognitionResult results = 3;
  893. // Metadata about the recognition.
  894. RecognitionResponseMetadata metadata = 2;
  895. }
  896. // Available recognition features specific to streaming recognition requests.
  897. message StreamingRecognitionFeatures {
  898. // Events that a timeout can be set on for voice activity.
  899. message VoiceActivityTimeout {
  900. // Duration to timeout the stream if no speech begins. If this is set and
  901. // no speech is detected in this duration at the start of the stream, the
  902. // server will close the stream.
  903. google.protobuf.Duration speech_start_timeout = 1;
  904. // Duration to timeout the stream after speech ends. If this is set and no
  905. // speech is detected in this duration after speech was detected, the server
  906. // will close the stream.
  907. google.protobuf.Duration speech_end_timeout = 2;
  908. }
  909. // If `true`, responses with voice activity speech events will be returned as
  910. // they are detected.
  911. bool enable_voice_activity_events = 1;
  912. // Whether or not to stream interim results to the client. If set to true,
  913. // interim results will be streamed to the client. Otherwise, only the final
  914. // response will be streamed back.
  915. bool interim_results = 2;
  916. // If set, the server will automatically close the stream after the specified
  917. // duration has elapsed after the last VOICE_ACTIVITY speech event has been
  918. // sent. The field `voice_activity_events` must also be set to true.
  919. VoiceActivityTimeout voice_activity_timeout = 3;
  920. }
  921. // Provides configuration information for the StreamingRecognize request.
  922. message StreamingRecognitionConfig {
  923. // Required. Features and audio metadata to use for the Automatic Speech
  924. // Recognition. This field in combination with the
  925. // [config_mask][google.cloud.speech.v2.StreamingRecognitionConfig.config_mask]
  926. // field can be used to override parts of the
  927. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  928. // of the Recognizer resource.
  929. RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED];
  930. // The list of fields in
  931. // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] that
  932. // override the values in the
  933. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  934. // of the recognizer during this recognition request. If no mask is provided,
  935. // all non-default valued fields in
  936. // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] override
  937. // the values in the recognizer for this recognition request. If a mask is
  938. // provided, only the fields listed in the mask override the config in the
  939. // recognizer for this recognition request. If a wildcard (`*`) is provided,
  940. // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config]
  941. // completely overrides and replaces the config in the recognizer for this
  942. // recognition request.
  943. google.protobuf.FieldMask config_mask = 3;
  944. // Speech recognition features to enable specific to streaming audio
  945. // recognition requests.
  946. StreamingRecognitionFeatures streaming_features = 2;
  947. }
  948. // Request message for the
  949. // [StreamingRecognize][google.cloud.speech.v2.Speech.StreamingRecognize]
  950. // method. Multiple
  951. // [StreamingRecognizeRequest][google.cloud.speech.v2.StreamingRecognizeRequest]
  952. // messages are sent. The first message must contain a
  953. // [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer] and
  954. // optionally a
  955. // [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config]
  956. // message and must not contain
  957. // [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio]. All
  958. // subsequent messages must contain
  959. // [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] and must not
  960. // contain a
  961. // [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config]
  962. // message.
  963. message StreamingRecognizeRequest {
  964. // Required. Streaming recognition should start with an initial request having
  965. // a `recognizer`. Subsequent requests carry the audio data to be recognized.
  966. //
  967. // The initial request with configuration can be omitted if the Recognizer
  968. // being used has a
  969. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config].
  970. string recognizer = 3 [
  971. (google.api.field_behavior) = REQUIRED,
  972. (google.api.resource_reference) = {
  973. type: "speech.googleapis.com/Recognizer"
  974. }
  975. ];
  976. oneof streaming_request {
  977. // StreamingRecognitionConfig to be used in this recognition attempt.
  978. // If provided, it will override the default RecognitionConfig stored in the
  979. // Recognizer.
  980. StreamingRecognitionConfig streaming_config = 6;
  981. // Inline audio bytes to be Recognized.
  982. bytes audio = 5;
  983. }
  984. }
  985. // Request message for the
  986. // [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
  987. // method.
  988. message BatchRecognizeRequest {
  989. // Required. Resource name of the recognizer to be used for ASR.
  990. string recognizer = 1 [
  991. (google.api.field_behavior) = REQUIRED,
  992. (google.api.resource_reference) = {
  993. type: "speech.googleapis.com/Recognizer"
  994. }
  995. ];
  996. // Features and audio metadata to use for the Automatic Speech Recognition.
  997. // This field in combination with the
  998. // [config_mask][google.cloud.speech.v2.BatchRecognizeRequest.config_mask]
  999. // field can be used to override parts of the
  1000. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  1001. // of the Recognizer resource.
  1002. RecognitionConfig config = 4;
  1003. // The list of fields in
  1004. // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] that override
  1005. // the values in the
  1006. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  1007. // of the recognizer during this recognition request. If no mask is provided,
  1008. // all given fields in
  1009. // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] override the
  1010. // values in the recognizer for this recognition request. If a mask is
  1011. // provided, only the fields listed in the mask override the config in the
  1012. // recognizer for this recognition request. If a wildcard (`*`) is provided,
  1013. // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] completely
  1014. // overrides and replaces the config in the recognizer for this recognition
  1015. // request.
  1016. google.protobuf.FieldMask config_mask = 5;
  1017. // Audio files with file metadata for ASR.
  1018. repeated BatchRecognizeFileMetadata files = 3;
  1019. }
  1020. // Response message for
  1021. // [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] that is
  1022. // packaged into a longrunning [Operation][google.longrunning.Operation].
  1023. message BatchRecognizeResponse {
  1024. // Map from filename to the final result for that file.
  1025. map<string, BatchRecognizeFileResult> results = 1;
  1026. }
  1027. // Final results for a single file.
  1028. message BatchRecognizeFileResult {
  1029. // The GCS URI to which recognition results were written.
  1030. string uri = 1;
  1031. // Error if one was encountered.
  1032. google.rpc.Status error = 2;
  1033. }
  1034. // Metadata about transcription for a single file (for example, progress
  1035. // percent).
  1036. message BatchRecognizeTranscriptionMetadata {
  1037. // How much of the file has been transcribed so far.
  1038. int32 progress_percent = 1;
  1039. // Error if one was encountered.
  1040. google.rpc.Status error = 2;
  1041. // The GCS URI to which recognition results will be written.
  1042. string uri = 3;
  1043. }
  1044. // Operation metadata for
  1045. // [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize].
  1046. message BatchRecognizeMetadata {
  1047. // Map from provided filename to the transcription metadata for that file.
  1048. map<string, BatchRecognizeTranscriptionMetadata> transcription_metadata = 1;
  1049. }
  1050. // Metadata about a single file in a batch for BatchRecognize.
  1051. message BatchRecognizeFileMetadata {
  1052. // The audio source, which is a Google Cloud Storage URI.
  1053. oneof audio_source {
  1054. // Cloud Storage URI for the audio file.
  1055. string uri = 1;
  1056. }
  1057. // Features and audio metadata to use for the Automatic Speech Recognition.
  1058. // This field in combination with the
  1059. // [config_mask][google.cloud.speech.v2.BatchRecognizeFileMetadata.config_mask]
  1060. // field can be used to override parts of the
  1061. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  1062. // of the Recognizer resource as well as the
  1063. // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] at the
  1064. // request level.
  1065. RecognitionConfig config = 4;
  1066. // The list of fields in
  1067. // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] that
  1068. // override the values in the
  1069. // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
  1070. // of the recognizer during this recognition request. If no mask is provided,
  1071. // all non-default valued fields in
  1072. // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] override
  1073. // the values in the recognizer for this recognition request. If a mask is
  1074. // provided, only the fields listed in the mask override the config in the
  1075. // recognizer for this recognition request. If a wildcard (`*`) is provided,
  1076. // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config]
  1077. // completely overrides and replaces the config in the recognizer for this
  1078. // recognition request.
  1079. google.protobuf.FieldMask config_mask = 5;
  1080. }
  1081. // A streaming speech recognition result corresponding to a portion of the audio
  1082. // that is currently being processed.
  1083. message StreamingRecognitionResult {
  1084. // May contain one or more recognition hypotheses. These alternatives are
  1085. // ordered in terms of accuracy, with the top (first) alternative being the
  1086. // most probable, as ranked by the recognizer.
  1087. repeated SpeechRecognitionAlternative alternatives = 1;
  1088. // If `false`, this
  1089. // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult]
  1090. // represents an interim result that may change. If `true`, this is the final
  1091. // time the speech service will return this particular
  1092. // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult],
  1093. // the recognizer will not return any further hypotheses for this portion of
  1094. // the transcript and corresponding audio.
  1095. bool is_final = 2;
  1096. // An estimate of the likelihood that the recognizer will not change its guess
  1097. // about this interim result. Values range from 0.0 (completely unstable)
  1098. // to 1.0 (completely stable). This field is only provided for interim results
  1099. // ([is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`).
  1100. // The default of 0.0 is a sentinel value indicating `stability` was not set.
  1101. float stability = 3;
  1102. // Time offset of the end of this result relative to the beginning of the
  1103. // audio.
  1104. google.protobuf.Duration result_end_offset = 4;
  1105. // For multi-channel audio, this is the channel number corresponding to the
  1106. // recognized result for the audio from that channel.
  1107. // For
  1108. // `audio_channel_count` = `N`, its output values can range from `1` to `N`.
  1109. int32 channel_tag = 5;
  1110. // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
  1111. // language tag of the language in this result. This language code was
  1112. // detected to have the most likelihood of being spoken in the audio.
  1113. string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
  1114. }
  1115. // `StreamingRecognizeResponse` is the only message returned to the client by
  1116. // `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse`
  1117. // messages are streamed back to the client. If there is no recognizable
  1118. // audio then no messages are streamed back to the client.
  1119. //
  1120. // Here are some examples of `StreamingRecognizeResponse`s that might
  1121. // be returned while processing audio:
  1122. //
  1123. // 1. results { alternatives { transcript: "tube" } stability: 0.01 }
  1124. //
  1125. // 2. results { alternatives { transcript: "to be a" } stability: 0.01 }
  1126. //
  1127. // 3. results { alternatives { transcript: "to be" } stability: 0.9 }
  1128. // results { alternatives { transcript: " or not to be" } stability: 0.01 }
  1129. //
  1130. // 4. results { alternatives { transcript: "to be or not to be"
  1131. // confidence: 0.92 }
  1132. // alternatives { transcript: "to bee or not to bee" }
  1133. // is_final: true }
  1134. //
  1135. // 5. results { alternatives { transcript: " that's" } stability: 0.01 }
  1136. //
  1137. // 6. results { alternatives { transcript: " that is" } stability: 0.9 }
  1138. // results { alternatives { transcript: " the question" } stability: 0.01 }
  1139. //
  1140. // 7. results { alternatives { transcript: " that is the question"
  1141. // confidence: 0.98 }
  1142. // alternatives { transcript: " that was the question" }
  1143. // is_final: true }
  1144. //
  1145. // Notes:
  1146. //
  1147. // - Only two of the above responses #4 and #7 contain final results; they are
  1148. // indicated by `is_final: true`. Concatenating these together generates the
  1149. // full transcript: "to be or not to be that is the question".
  1150. //
  1151. // - The others contain interim `results`. #3 and #6 contain two interim
  1152. // `results`: the first portion has a high stability and is less likely to
  1153. // change; the second portion has a low stability and is very likely to
  1154. // change. A UI designer might choose to show only high stability `results`.
  1155. //
  1156. // - The specific `stability` and `confidence` values shown above are only for
  1157. // illustrative purposes. Actual values may vary.
  1158. //
  1159. // - In each response, only one of these fields will be set:
  1160. // `error`,
  1161. // `speech_event_type`, or
  1162. // one or more (repeated) `results`.
  1163. message StreamingRecognizeResponse {
  1164. // Indicates the type of speech event.
  1165. enum SpeechEventType {
  1166. // No speech event specified.
  1167. SPEECH_EVENT_TYPE_UNSPECIFIED = 0;
  1168. // This event indicates that the server has detected the end of the user's
  1169. // speech utterance and expects no additional speech. Therefore, the server
  1170. // will not process additional audio and will close the gRPC bidirectional
  1171. // stream. This event is only sent if there was a force cutoff due to
  1172. // silence being detected early. This event is only available through the
  1173. // `latest_short` [model][google.cloud.speech.v2.Recognizer.model].
  1174. END_OF_SINGLE_UTTERANCE = 1;
  1175. // This event indicates that the server has detected the beginning of human
  1176. // voice activity in the stream. This event can be returned multiple times
  1177. // if speech starts and stops repeatedly throughout the stream. This event
  1178. // is only sent if `voice_activity_events` is set to true.
  1179. SPEECH_ACTIVITY_BEGIN = 2;
  1180. // This event indicates that the server has detected the end of human voice
  1181. // activity in the stream. This event can be returned multiple times if
  1182. // speech starts and stops repeatedly throughout the stream. This event is
  1183. // only sent if `voice_activity_events` is set to true.
  1184. SPEECH_ACTIVITY_END = 3;
  1185. }
  1186. // This repeated list contains zero or more results that
  1187. // correspond to consecutive portions of the audio currently being processed.
  1188. // It contains zero or one
  1189. // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`true`
  1190. // result (the newly settled portion), followed by zero or more
  1191. // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`
  1192. // results (the interim results).
  1193. repeated StreamingRecognitionResult results = 6;
  1194. // Indicates the type of speech event.
  1195. SpeechEventType speech_event_type = 3;
  1196. // Time offset between the beginning of the audio and event emission.
  1197. google.protobuf.Duration speech_event_offset = 7;
  1198. // Metadata about the recognition.
  1199. RecognitionResponseMetadata metadata = 5;
  1200. }
  1201. // Message representing the config for the Speech-to-Text API. This includes an
  1202. // optional [KMS key](https://cloud.google.com/kms/docs/resource-hierarchy#keys)
  1203. // with which incoming data will be encrypted.
  1204. message Config {
  1205. option (google.api.resource) = {
  1206. type: "speech.googleapis.com/Config"
  1207. pattern: "projects/{project}/locations/{location}/config"
  1208. };
  1209. // Output only. The name of the config resource. There is exactly one config
  1210. // resource per project per location. The expected format is
  1211. // `projects/{project}/locations/{location}/config`.
  1212. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  1213. // Optional. An optional [KMS key
  1214. // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) that if
  1215. // present, will be used to encrypt Speech-to-Text resources at-rest. Updating
  1216. // this key will not encrypt existing resources using this key; only new
  1217. // resources will be encrypted using this key. The expected format is
  1218. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
  1219. string kms_key_name = 2 [
  1220. (google.api.field_behavior) = OPTIONAL,
  1221. (google.api.resource_reference) = {
  1222. type: "cloudkms.googleapis.com/CryptoKey"
  1223. }
  1224. ];
  1225. // Output only. The most recent time this resource was modified.
  1226. google.protobuf.Timestamp update_time = 3
  1227. [(google.api.field_behavior) = OUTPUT_ONLY];
  1228. }
  1229. // Request message for the
  1230. // [GetConfig][google.cloud.speech.v2.Speech.GetConfig] method.
  1231. message GetConfigRequest {
  1232. // Required. The name of the config to retrieve. There is exactly one config
  1233. // resource per project per location. The expected format is
  1234. // `projects/{project}/locations/{location}/config`.
  1235. string name = 1 [
  1236. (google.api.field_behavior) = REQUIRED,
  1237. (google.api.resource_reference) = { type: "speech.googleapis.com/Config" }
  1238. ];
  1239. }
  1240. // Request message for the
  1241. // [UpdateConfig][google.cloud.speech.v2.Speech.UpdateConfig] method.
  1242. message UpdateConfigRequest {
  1243. // Required. The config to update.
  1244. //
  1245. // The config's `name` field is used to identify the config to be updated.
  1246. // The expected format is `projects/{project}/locations/{location}/config`.
  1247. Config config = 1 [(google.api.field_behavior) = REQUIRED];
  1248. // The list of fields to be updated.
  1249. google.protobuf.FieldMask update_mask = 2;
  1250. }
  1251. // CustomClass for biasing in speech recognition. Used to define a set of words
  1252. // or phrases that represents a common concept or theme likely to appear in your
  1253. // audio, for example a list of passenger ship names.
  1254. message CustomClass {
  1255. option (google.api.resource) = {
  1256. type: "speech.googleapis.com/CustomClass"
  1257. pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}"
  1258. style: DECLARATIVE_FRIENDLY
  1259. };
  1260. // An item of the class.
  1261. message ClassItem {
  1262. // The class item's value.
  1263. string value = 1;
  1264. }
  1265. // Set of states that define the lifecycle of a CustomClass.
  1266. enum State {
  1267. // Unspecified state. This is only used/useful for distinguishing
  1268. // unset values.
  1269. STATE_UNSPECIFIED = 0;
  1270. // The normal and active state.
  1271. ACTIVE = 2;
  1272. // This CustomClass has been deleted.
  1273. DELETED = 4;
  1274. }
  1275. // Output only. The resource name of the CustomClass.
  1276. // Format:
  1277. // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
  1278. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  1279. // Output only. System-assigned unique identifier for the CustomClass.
  1280. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  1281. // User-settable, human-readable name for the CustomClass. Must be 63
  1282. // characters or less.
  1283. string display_name = 4;
  1284. // A collection of class items.
  1285. repeated ClassItem items = 5;
  1286. // Output only. The CustomClass lifecycle state.
  1287. State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
  1288. // Output only. Creation time.
  1289. google.protobuf.Timestamp create_time = 6
  1290. [(google.api.field_behavior) = OUTPUT_ONLY];
  1291. // Output only. The most recent time this resource was modified.
  1292. google.protobuf.Timestamp update_time = 7
  1293. [(google.api.field_behavior) = OUTPUT_ONLY];
  1294. // Output only. The time at which this resource was requested for deletion.
  1295. google.protobuf.Timestamp delete_time = 8
  1296. [(google.api.field_behavior) = OUTPUT_ONLY];
  1297. // Output only. The time at which this resource will be purged.
  1298. google.protobuf.Timestamp expire_time = 9
  1299. [(google.api.field_behavior) = OUTPUT_ONLY];
  1300. // Allows users to store small amounts of arbitrary data.
  1301. // Both the key and the value must be 63 characters or less each.
  1302. // At most 100 annotations.
  1303. map<string, string> annotations = 10;
  1304. // Output only. This checksum is computed by the server based on the value of
  1305. // other fields. This may be sent on update, undelete, and delete requests to
  1306. // ensure the client has an up-to-date value before proceeding.
  1307. string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
  1308. // Output only. Whether or not this CustomClass is in the process of being
  1309. // updated.
  1310. bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
  1311. // Output only. The [KMS key
  1312. // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
  1313. // the CustomClass is encrypted. The expected format is
  1314. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
  1315. string kms_key_name = 13 [
  1316. (google.api.field_behavior) = OUTPUT_ONLY,
  1317. (google.api.resource_reference) = {
  1318. type: "cloudkms.googleapis.com/CryptoKey"
  1319. }
  1320. ];
  1321. // Output only. The [KMS key version
  1322. // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
  1323. // with which the CustomClass is encrypted. The expected format is
  1324. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
  1325. string kms_key_version_name = 14 [
  1326. (google.api.field_behavior) = OUTPUT_ONLY,
  1327. (google.api.resource_reference) = {
  1328. type: "cloudkms.googleapis.com/CryptoKeyVersion"
  1329. }
  1330. ];
  1331. }
  1332. // PhraseSet for biasing in speech recognition. A PhraseSet is used to provide
  1333. // "hints" to the speech recognizer to favor specific words and phrases in the
  1334. // results.
  1335. message PhraseSet {
  1336. option (google.api.resource) = {
  1337. type: "speech.googleapis.com/PhraseSet"
  1338. pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}"
  1339. style: DECLARATIVE_FRIENDLY
  1340. };
  1341. // A Phrase contains words and phrase "hints" so that the speech recognition
  1342. // is more likely to recognize them. This can be used to improve the accuracy
  1343. // for specific words and phrases, for example, if specific commands are
  1344. // typically spoken by the user. This can also be used to add additional words
  1345. // to the vocabulary of the recognizer.
  1346. //
  1347. // List items can also include CustomClass references containing groups of
  1348. // words that represent common concepts that occur in natural language.
  1349. message Phrase {
  1350. // The phrase itself.
  1351. string value = 1;
  1352. // Hint Boost. Overrides the boost set at the phrase set level.
  1353. // Positive value will increase the probability that a specific phrase will
  1354. // be recognized over other similar sounding phrases. The higher the boost,
  1355. // the higher the chance of false positive recognition as well. Negative
  1356. // boost values would correspond to anti-biasing. Anti-biasing is not
  1357. // enabled, so negative boost will simply be ignored. Though `boost` can
  1358. // accept a wide range of positive values, most use cases are best served
  1359. // with values between 0 and 20. We recommend using a binary search approach
  1360. // to finding the optimal value for your use case. Speech recognition
  1361. // will skip PhraseSets with a boost value of 0.
  1362. float boost = 2;
  1363. }
  1364. // Set of states that define the lifecycle of a PhraseSet.
  1365. enum State {
  1366. // Unspecified state. This is only used/useful for distinguishing
  1367. // unset values.
  1368. STATE_UNSPECIFIED = 0;
  1369. // The normal and active state.
  1370. ACTIVE = 2;
  1371. // This PhraseSet has been deleted.
  1372. DELETED = 4;
  1373. }
  1374. // Output only. The resource name of the PhraseSet.
  1375. // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
  1376. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  1377. // Output only. System-assigned unique identifier for the PhraseSet.
  1378. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  1379. // A list of word and phrases.
  1380. repeated Phrase phrases = 3;
  1381. // Hint Boost. Positive value will increase the probability that a specific
  1382. // phrase will be recognized over other similar sounding phrases. The higher
  1383. // the boost, the higher the chance of false positive recognition as well.
  1384. // Valid `boost` values are between 0 (exclusive) and 20. We recommend using a
  1385. // binary search approach to finding the optimal value for your use case.
  1386. float boost = 4;
  1387. // User-settable, human-readable name for the PhraseSet. Must be 63
  1388. // characters or less.
  1389. string display_name = 5;
  1390. // Output only. The PhraseSet lifecycle state.
  1391. State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
  1392. // Output only. Creation time.
  1393. google.protobuf.Timestamp create_time = 6
  1394. [(google.api.field_behavior) = OUTPUT_ONLY];
  1395. // Output only. The most recent time this resource was modified.
  1396. google.protobuf.Timestamp update_time = 7
  1397. [(google.api.field_behavior) = OUTPUT_ONLY];
  1398. // Output only. The time at which this resource was requested for deletion.
  1399. google.protobuf.Timestamp delete_time = 8
  1400. [(google.api.field_behavior) = OUTPUT_ONLY];
  1401. // Output only. The time at which this resource will be purged.
  1402. google.protobuf.Timestamp expire_time = 9
  1403. [(google.api.field_behavior) = OUTPUT_ONLY];
  1404. // Allows users to store small amounts of arbitrary data.
  1405. // Both the key and the value must be 63 characters or less each.
  1406. // At most 100 annotations.
  1407. map<string, string> annotations = 10;
  1408. // Output only. This checksum is computed by the server based on the value of
  1409. // other fields. This may be sent on update, undelete, and delete requests to
  1410. // ensure the client has an up-to-date value before proceeding.
  1411. string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
  1412. // Output only. Whether or not this PhraseSet is in the process of being
  1413. // updated.
  1414. bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
  1415. // Output only. The [KMS key
  1416. // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
  1417. // the PhraseSet is encrypted. The expected format is
  1418. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
  1419. string kms_key_name = 13 [
  1420. (google.api.field_behavior) = OUTPUT_ONLY,
  1421. (google.api.resource_reference) = {
  1422. type: "cloudkms.googleapis.com/CryptoKey"
  1423. }
  1424. ];
  1425. // Output only. The [KMS key version
  1426. // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
  1427. // with which the PhraseSet is encrypted. The expected format is
  1428. // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
  1429. string kms_key_version_name = 14 [
  1430. (google.api.field_behavior) = OUTPUT_ONLY,
  1431. (google.api.resource_reference) = {
  1432. type: "cloudkms.googleapis.com/CryptoKeyVersion"
  1433. }
  1434. ];
  1435. }
  1436. // Request message for the
  1437. // [CreateCustomClass][google.cloud.speech.v2.Speech.CreateCustomClass] method.
  1438. message CreateCustomClassRequest {
  1439. // Required. The CustomClass to create.
  1440. CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED];
  1441. // If set, validate the request and preview the CustomClass, but do not
  1442. // actually create it.
  1443. bool validate_only = 2;
  1444. // The ID to use for the CustomClass, which will become the final component of
  1445. // the CustomClass's resource name.
  1446. //
  1447. // This value should be 4-63 characters, and valid characters
  1448. // are /[a-z][0-9]-/.
  1449. string custom_class_id = 3;
  1450. // Required. The project and location where this CustomClass will be created.
  1451. // The expected format is `projects/{project}/locations/{location}`.
  1452. string parent = 4 [
  1453. (google.api.field_behavior) = REQUIRED,
  1454. (google.api.resource_reference) = {
  1455. child_type: "speech.googleapis.com/CustomClass"
  1456. }
  1457. ];
  1458. }
  1459. // Request message for the
  1460. // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method.
  1461. message ListCustomClassesRequest {
  1462. // Required. The project and location of CustomClass resources to list. The
  1463. // expected format is `projects/{project}/locations/{location}`.
  1464. string parent = 1 [
  1465. (google.api.field_behavior) = REQUIRED,
  1466. (google.api.resource_reference) = {
  1467. type: "locations.googleapis.com/Location"
  1468. }
  1469. ];
  1470. // Number of results per requests. A valid page_size ranges from 0 to 20
  1471. // inclusive. If the page_size is zero or unspecified, a page size of 5 will
  1472. // be chosen. If the page size exceeds 20, it will be coerced down to 20. Note
  1473. // that a call might return fewer results than the requested page size.
  1474. int32 page_size = 2;
  1475. // A page token, received from a previous
  1476. // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] call.
  1477. // Provide this to retrieve the subsequent page.
  1478. //
  1479. // When paginating, all other parameters provided to
  1480. // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] must
  1481. // match the call that provided the page token.
  1482. string page_token = 3;
  1483. // Whether, or not, to show resources that have been deleted.
  1484. bool show_deleted = 4;
  1485. }
  1486. // Response message for the
  1487. // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method.
  1488. message ListCustomClassesResponse {
  1489. // The list of requested CustomClasses.
  1490. repeated CustomClass custom_classes = 1;
  1491. // A token, which can be sent as
  1492. // [page_token][google.cloud.speech.v2.ListCustomClassesRequest.page_token] to
  1493. // retrieve the next page. If this field is omitted, there are no subsequent
  1494. // pages. This token expires after 72 hours.
  1495. string next_page_token = 2;
  1496. }
  1497. // Request message for the
  1498. // [GetCustomClass][google.cloud.speech.v2.Speech.GetCustomClass] method.
  1499. message GetCustomClassRequest {
  1500. // Required. The name of the CustomClass to retrieve. The expected format is
  1501. // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
  1502. string name = 1 [
  1503. (google.api.field_behavior) = REQUIRED,
  1504. (google.api.resource_reference) = {
  1505. type: "speech.googleapis.com/CustomClass"
  1506. }
  1507. ];
  1508. }
  1509. // Request message for the
  1510. // [UpdateCustomClass][google.cloud.speech.v2.Speech.UpdateCustomClass] method.
  1511. message UpdateCustomClassRequest {
  1512. // Required. The CustomClass to update.
  1513. //
  1514. // The CustomClass's `name` field is used to identify the CustomClass to
  1515. // update. Format:
  1516. // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
  1517. CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED];
  1518. // The list of fields to be updated. If empty, all fields are considered for
  1519. // update.
  1520. google.protobuf.FieldMask update_mask = 2;
  1521. // If set, validate the request and preview the updated CustomClass, but do
  1522. // not actually update it.
  1523. bool validate_only = 4;
  1524. }
  1525. // Request message for the
  1526. // [DeleteCustomClass][google.cloud.speech.v2.Speech.DeleteCustomClass] method.
  1527. message DeleteCustomClassRequest {
  1528. // Required. The name of the CustomClass to delete.
  1529. // Format:
  1530. // `projects/{project}/locations/{location}/customClasses/{custom_class}`
  1531. string name = 1 [
  1532. (google.api.field_behavior) = REQUIRED,
  1533. (google.api.resource_reference) = {
  1534. type: "speech.googleapis.com/CustomClass"
  1535. }
  1536. ];
  1537. // If set, validate the request and preview the deleted CustomClass, but do
  1538. // not actually delete it.
  1539. bool validate_only = 2;
  1540. // If set to true, and the CustomClass is not found, the request will succeed
  1541. // and be a no-op (no Operation is recorded in this case).
  1542. bool allow_missing = 4;
  1543. // This checksum is computed by the server based on the value of other
  1544. // fields. This may be sent on update, undelete, and delete requests to ensure
  1545. // the client has an up-to-date value before proceeding.
  1546. string etag = 3;
  1547. }
  1548. // Request message for the
  1549. // [UndeleteCustomClass][google.cloud.speech.v2.Speech.UndeleteCustomClass]
  1550. // method.
  1551. message UndeleteCustomClassRequest {
  1552. // Required. The name of the CustomClass to undelete.
  1553. // Format:
  1554. // `projects/{project}/locations/{location}/customClasses/{custom_class}`
  1555. string name = 1 [
  1556. (google.api.field_behavior) = REQUIRED,
  1557. (google.api.resource_reference) = {
  1558. type: "speech.googleapis.com/CustomClass"
  1559. }
  1560. ];
  1561. // If set, validate the request and preview the undeleted CustomClass, but do
  1562. // not actually undelete it.
  1563. bool validate_only = 3;
  1564. // This checksum is computed by the server based on the value of other
  1565. // fields. This may be sent on update, undelete, and delete requests to ensure
  1566. // the client has an up-to-date value before proceeding.
  1567. string etag = 4;
  1568. }
  1569. // Request message for the
  1570. // [CreatePhraseSet][google.cloud.speech.v2.Speech.CreatePhraseSet] method.
  1571. message CreatePhraseSetRequest {
  1572. // Required. The PhraseSet to create.
  1573. PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED];
  1574. // If set, validate the request and preview the PhraseSet, but do not
  1575. // actually create it.
  1576. bool validate_only = 2;
  1577. // The ID to use for the PhraseSet, which will become the final component of
  1578. // the PhraseSet's resource name.
  1579. //
  1580. // This value should be 4-63 characters, and valid characters
  1581. // are /[a-z][0-9]-/.
  1582. string phrase_set_id = 3;
  1583. // Required. The project and location where this PhraseSet will be created.
  1584. // The expected format is `projects/{project}/locations/{location}`.
  1585. string parent = 4 [
  1586. (google.api.field_behavior) = REQUIRED,
  1587. (google.api.resource_reference) = {
  1588. child_type: "speech.googleapis.com/PhraseSet"
  1589. }
  1590. ];
  1591. }
  1592. // Request message for the
  1593. // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method.
  1594. message ListPhraseSetsRequest {
  1595. // Required. The project and location of PhraseSet resources to list. The
  1596. // expected format is `projects/{project}/locations/{location}`.
  1597. string parent = 1 [
  1598. (google.api.field_behavior) = REQUIRED,
  1599. (google.api.resource_reference) = {
  1600. type: "locations.googleapis.com/Location"
  1601. }
  1602. ];
  1603. // The maximum number of PhraseSets to return. The service may return fewer
  1604. // than this value. If unspecified, at most 20 PhraseSets will be returned.
  1605. // The maximum value is 20; values above 20 will be coerced to 20.
  1606. int32 page_size = 2;
  1607. // A page token, received from a previous
  1608. // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] call.
  1609. // Provide this to retrieve the subsequent page.
  1610. //
  1611. // When paginating, all other parameters provided to
  1612. // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] must match
  1613. // the call that provided the page token.
  1614. string page_token = 3;
  1615. // Whether, or not, to show resources that have been deleted.
  1616. bool show_deleted = 4;
  1617. }
  1618. // Response message for the
  1619. // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method.
  1620. message ListPhraseSetsResponse {
  1621. // The list of requested PhraseSets.
  1622. repeated PhraseSet phrase_sets = 1;
  1623. // A token, which can be sent as
  1624. // [page_token][google.cloud.speech.v2.ListPhraseSetsRequest.page_token] to
  1625. // retrieve the next page. If this field is omitted, there are no subsequent
  1626. // pages. This token expires after 72 hours.
  1627. string next_page_token = 2;
  1628. }
  1629. // Request message for the
  1630. // [GetPhraseSet][google.cloud.speech.v2.Speech.GetPhraseSet] method.
  1631. message GetPhraseSetRequest {
  1632. // Required. The name of the PhraseSet to retrieve. The expected format is
  1633. // `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
  1634. string name = 1 [
  1635. (google.api.field_behavior) = REQUIRED,
  1636. (google.api.resource_reference) = {
  1637. type: "speech.googleapis.com/PhraseSet"
  1638. }
  1639. ];
  1640. }
  1641. // Request message for the
  1642. // [UpdatePhraseSet][google.cloud.speech.v2.Speech.UpdatePhraseSet] method.
  1643. message UpdatePhraseSetRequest {
  1644. // Required. The PhraseSet to update.
  1645. //
  1646. // The PhraseSet's `name` field is used to identify the PhraseSet to update.
  1647. // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
  1648. PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED];
  1649. // The list of fields to update. If empty, all non-default valued fields are
  1650. // considered for update. Use `*` to update the entire PhraseSet resource.
  1651. google.protobuf.FieldMask update_mask = 2;
  1652. // If set, validate the request and preview the updated PhraseSet, but do not
  1653. // actually update it.
  1654. bool validate_only = 4;
  1655. }
  1656. // Request message for the
  1657. // [DeletePhraseSet][google.cloud.speech.v2.Speech.DeletePhraseSet] method.
  1658. message DeletePhraseSetRequest {
  1659. // Required. The name of the PhraseSet to delete.
  1660. // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`
  1661. string name = 1 [
  1662. (google.api.field_behavior) = REQUIRED,
  1663. (google.api.resource_reference) = {
  1664. type: "speech.googleapis.com/PhraseSet"
  1665. }
  1666. ];
  1667. // If set, validate the request and preview the deleted PhraseSet, but do not
  1668. // actually delete it.
  1669. bool validate_only = 2;
  1670. // If set to true, and the PhraseSet is not found, the request will succeed
  1671. // and be a no-op (no Operation is recorded in this case).
  1672. bool allow_missing = 4;
  1673. // This checksum is computed by the server based on the value of other
  1674. // fields. This may be sent on update, undelete, and delete requests to ensure
  1675. // the client has an up-to-date value before proceeding.
  1676. string etag = 3;
  1677. }
  1678. // Request message for the
  1679. // [UndeletePhraseSet][google.cloud.speech.v2.Speech.UndeletePhraseSet]
  1680. // method.
  1681. message UndeletePhraseSetRequest {
  1682. // Required. The name of the PhraseSet to undelete.
  1683. // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`
  1684. string name = 1 [
  1685. (google.api.field_behavior) = REQUIRED,
  1686. (google.api.resource_reference) = {
  1687. type: "speech.googleapis.com/PhraseSet"
  1688. }
  1689. ];
  1690. // If set, validate the request and preview the undeleted PhraseSet, but do
  1691. // not actually undelete it.
  1692. bool validate_only = 3;
  1693. // This checksum is computed by the server based on the value of other
  1694. // fields. This may be sent on update, undelete, and delete requests to ensure
  1695. // the client has an up-to-date value before proceeding.
  1696. string etag = 4;
  1697. }