io.proto 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331
  1. // Copyright 2021 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.automl.v1;
  16. import "google/api/field_behavior.proto";
  17. option csharp_namespace = "Google.Cloud.AutoML.V1";
  18. option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl";
  19. option java_multiple_files = true;
  20. option java_package = "com.google.cloud.automl.v1";
  21. option php_namespace = "Google\\Cloud\\AutoMl\\V1";
  22. option ruby_package = "Google::Cloud::AutoML::V1";
  23. // Input configuration for [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData] action.
  24. //
  25. // The format of input depends on dataset_metadata the Dataset into which
  26. // the import is happening has. As input source the
  27. // [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source]
  28. // is expected, unless specified otherwise. Additionally any input .CSV file
  29. // by itself must be 100MB or smaller, unless specified otherwise.
  30. // If an "example" file (that is, image, video etc.) with identical content
  31. // (even if it had different `GCS_FILE_PATH`) is mentioned multiple times, then
  32. // its label, bounding boxes etc. are appended. The same file should be always
  33. // provided with the same `ML_USE` and `GCS_FILE_PATH`, if it is not, then
  34. // these values are nondeterministically selected from the given ones.
  35. //
  36. // The formats are represented in EBNF with commas being literal and with
  37. // non-terminal symbols defined near the end of this comment. The formats are:
  38. //
  39. // <h4>AutoML Vision</h4>
  40. //
  41. //
  42. // <div class="ds-selector-tabs"><section><h5>Classification</h5>
  43. //
  44. // See [Preparing your training
  45. // data](https://cloud.google.com/vision/automl/docs/prepare) for more
  46. // information.
  47. //
  48. // CSV file(s) with each line in format:
  49. //
  50. // ML_USE,GCS_FILE_PATH,LABEL,LABEL,...
  51. //
  52. // * `ML_USE` - Identifies the data set that the current row (file) applies
  53. // to.
  54. // This value can be one of the following:
  55. // * `TRAIN` - Rows in this file are used to train the model.
  56. // * `TEST` - Rows in this file are used to test the model during training.
  57. // * `UNASSIGNED` - Rows in this file are not categorized. They are
  58. // Automatically divided into train and test data. 80% for training and
  59. // 20% for testing.
  60. //
  61. // * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to
  62. // 30MB in size. Supported extensions: .JPEG, .GIF, .PNG, .WEBP, .BMP,
  63. // .TIFF, .ICO.
  64. //
  65. // * `LABEL` - A label that identifies the object in the image.
  66. //
  67. // For the `MULTICLASS` classification type, at most one `LABEL` is allowed
  68. // per image. If an image has not yet been labeled, then it should be
  69. // mentioned just once with no `LABEL`.
  70. //
  71. // Some sample rows:
  72. //
  73. // TRAIN,gs://folder/image1.jpg,daisy
  74. // TEST,gs://folder/image2.jpg,dandelion,tulip,rose
  75. // UNASSIGNED,gs://folder/image3.jpg,daisy
  76. // UNASSIGNED,gs://folder/image4.jpg
  77. //
  78. //
  79. // </section><section><h5>Object Detection</h5>
  80. // See [Preparing your training
  81. // data](https://cloud.google.com/vision/automl/object-detection/docs/prepare)
  82. // for more information.
  83. //
  84. // A CSV file(s) with each line in format:
  85. //
  86. // ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,)
  87. //
  88. // * `ML_USE` - Identifies the data set that the current row (file) applies
  89. // to.
  90. // This value can be one of the following:
  91. // * `TRAIN` - Rows in this file are used to train the model.
  92. // * `TEST` - Rows in this file are used to test the model during training.
  93. // * `UNASSIGNED` - Rows in this file are not categorized. They are
  94. // Automatically divided into train and test data. 80% for training and
  95. // 20% for testing.
  96. //
  97. // * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to
  98. // 30MB in size. Supported extensions: .JPEG, .GIF, .PNG. Each image
  99. // is assumed to be exhaustively labeled.
  100. //
  101. // * `LABEL` - A label that identifies the object in the image specified by the
  102. // `BOUNDING_BOX`.
  103. //
  104. // * `BOUNDING BOX` - The vertices of an object in the example image.
  105. // The minimum allowed `BOUNDING_BOX` edge length is 0.01, and no more than
  106. // 500 `BOUNDING_BOX` instances per image are allowed (one `BOUNDING_BOX`
  107. // per line). If an image has no looked for objects then it should be
  108. // mentioned just once with no LABEL and the ",,,,,,," in place of the
  109. // `BOUNDING_BOX`.
  110. //
  111. // **Four sample rows:**
  112. //
  113. // TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,,
  114. // TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,,
  115. // UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3
  116. // TEST,gs://folder/im3.png,,,,,,,,,
  117. // </section>
  118. // </div>
  119. //
  120. //
  121. // <h4>AutoML Video Intelligence</h4>
  122. //
  123. //
  124. // <div class="ds-selector-tabs"><section><h5>Classification</h5>
  125. //
  126. // See [Preparing your training
  127. // data](https://cloud.google.com/video-intelligence/automl/docs/prepare) for
  128. // more information.
  129. //
  130. // CSV file(s) with each line in format:
  131. //
  132. // ML_USE,GCS_FILE_PATH
  133. //
  134. // For `ML_USE`, do not use `VALIDATE`.
  135. //
  136. // `GCS_FILE_PATH` is the path to another .csv file that describes training
  137. // example for a given `ML_USE`, using the following row format:
  138. //
  139. // GCS_FILE_PATH,(LABEL,TIME_SEGMENT_START,TIME_SEGMENT_END | ,,)
  140. //
  141. // Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up
  142. // to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  143. //
  144. // `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
  145. // length of the video, and the end time must be after the start time. Any
  146. // segment of a video which has one or more labels on it, is considered a
  147. // hard negative for all other labels. Any segment with no labels on
  148. // it is considered to be unknown. If a whole video is unknown, then
  149. // it should be mentioned just once with ",," in place of `LABEL,
  150. // TIME_SEGMENT_START,TIME_SEGMENT_END`.
  151. //
  152. // Sample top level CSV file:
  153. //
  154. // TRAIN,gs://folder/train_videos.csv
  155. // TEST,gs://folder/test_videos.csv
  156. // UNASSIGNED,gs://folder/other_videos.csv
  157. //
  158. // Sample rows of a CSV file for a particular ML_USE:
  159. //
  160. // gs://folder/video1.avi,car,120,180.000021
  161. // gs://folder/video1.avi,bike,150,180.000021
  162. // gs://folder/vid2.avi,car,0,60.5
  163. // gs://folder/vid3.avi,,,
  164. //
  165. //
  166. //
  167. // </section><section><h5>Object Tracking</h5>
  168. //
  169. // See [Preparing your training
  170. // data](/video-intelligence/automl/object-tracking/docs/prepare) for more
  171. // information.
  172. //
  173. // CSV file(s) with each line in format:
  174. //
  175. // ML_USE,GCS_FILE_PATH
  176. //
  177. // For `ML_USE`, do not use `VALIDATE`.
  178. //
  179. // `GCS_FILE_PATH` is the path to another .csv file that describes training
  180. // example for a given `ML_USE`, using the following row format:
  181. //
  182. // GCS_FILE_PATH,LABEL,[INSTANCE_ID],TIMESTAMP,BOUNDING_BOX
  183. //
  184. // or
  185. //
  186. // GCS_FILE_PATH,,,,,,,,,,
  187. //
  188. // Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up
  189. // to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  190. // Providing `INSTANCE_ID`s can help to obtain a better model. When
  191. // a specific labeled entity leaves the video frame, and shows up
  192. // afterwards it is not required, albeit preferable, that the same
  193. // `INSTANCE_ID` is given to it.
  194. //
  195. // `TIMESTAMP` must be within the length of the video, the
  196. // `BOUNDING_BOX` is assumed to be drawn on the closest video's frame
  197. // to the `TIMESTAMP`. Any mentioned by the `TIMESTAMP` frame is expected
  198. // to be exhaustively labeled and no more than 500 `BOUNDING_BOX`-es per
  199. // frame are allowed. If a whole video is unknown, then it should be
  200. // mentioned just once with ",,,,,,,,,," in place of `LABEL,
  201. // [INSTANCE_ID],TIMESTAMP,BOUNDING_BOX`.
  202. //
  203. // Sample top level CSV file:
  204. //
  205. // TRAIN,gs://folder/train_videos.csv
  206. // TEST,gs://folder/test_videos.csv
  207. // UNASSIGNED,gs://folder/other_videos.csv
  208. //
  209. // Seven sample rows of a CSV file for a particular ML_USE:
  210. //
  211. // gs://folder/video1.avi,car,1,12.10,0.8,0.8,0.9,0.8,0.9,0.9,0.8,0.9
  212. // gs://folder/video1.avi,car,1,12.90,0.4,0.8,0.5,0.8,0.5,0.9,0.4,0.9
  213. // gs://folder/video1.avi,car,2,12.10,.4,.2,.5,.2,.5,.3,.4,.3
  214. // gs://folder/video1.avi,car,2,12.90,.8,.2,,,.9,.3,,
  215. // gs://folder/video1.avi,bike,,12.50,.45,.45,,,.55,.55,,
  216. // gs://folder/video2.avi,car,1,0,.1,.9,,,.9,.1,,
  217. // gs://folder/video2.avi,,,,,,,,,,,
  218. // </section>
  219. // </div>
  220. //
  221. //
  222. // <h4>AutoML Natural Language</h4>
  223. //
  224. //
  225. // <div class="ds-selector-tabs"><section><h5>Entity Extraction</h5>
  226. //
  227. // See [Preparing your training
  228. // data](/natural-language/automl/entity-analysis/docs/prepare) for more
  229. // information.
  230. //
  231. // One or more CSV file(s) with each line in the following format:
  232. //
  233. // ML_USE,GCS_FILE_PATH
  234. //
  235. // * `ML_USE` - Identifies the data set that the current row (file) applies
  236. // to.
  237. // This value can be one of the following:
  238. // * `TRAIN` - Rows in this file are used to train the model.
  239. // * `TEST` - Rows in this file are used to test the model during training.
  240. // * `UNASSIGNED` - Rows in this file are not categorized. They are
  241. // Automatically divided into train and test data. 80% for training and
  242. // 20% for testing..
  243. //
  244. // * `GCS_FILE_PATH` - a Identifies JSON Lines (.JSONL) file stored in
  245. // Google Cloud Storage that contains in-line text in-line as documents
  246. // for model training.
  247. //
  248. // After the training data set has been determined from the `TRAIN` and
  249. // `UNASSIGNED` CSV files, the training data is divided into train and
  250. // validation data sets. 70% for training and 30% for validation.
  251. //
  252. // For example:
  253. //
  254. // TRAIN,gs://folder/file1.jsonl
  255. // VALIDATE,gs://folder/file2.jsonl
  256. // TEST,gs://folder/file3.jsonl
  257. //
  258. // **In-line JSONL files**
  259. //
  260. // In-line .JSONL files contain, per line, a JSON document that wraps a
  261. // [`text_snippet`][google.cloud.automl.v1.TextSnippet] field followed by
  262. // one or more [`annotations`][google.cloud.automl.v1.AnnotationPayload]
  263. // fields, which have `display_name` and `text_extraction` fields to describe
  264. // the entity from the text snippet. Multiple JSON documents can be separated
  265. // using line breaks (\n).
  266. //
  267. // The supplied text must be annotated exhaustively. For example, if you
  268. // include the text "horse", but do not label it as "animal",
  269. // then "horse" is assumed to not be an "animal".
  270. //
  271. // Any given text snippet content must have 30,000 characters or
  272. // less, and also be UTF-8 NFC encoded. ASCII is accepted as it is
  273. // UTF-8 NFC encoded.
  274. //
  275. // For example:
  276. //
  277. // {
  278. // "text_snippet": {
  279. // "content": "dog car cat"
  280. // },
  281. // "annotations": [
  282. // {
  283. // "display_name": "animal",
  284. // "text_extraction": {
  285. // "text_segment": {"start_offset": 0, "end_offset": 2}
  286. // }
  287. // },
  288. // {
  289. // "display_name": "vehicle",
  290. // "text_extraction": {
  291. // "text_segment": {"start_offset": 4, "end_offset": 6}
  292. // }
  293. // },
  294. // {
  295. // "display_name": "animal",
  296. // "text_extraction": {
  297. // "text_segment": {"start_offset": 8, "end_offset": 10}
  298. // }
  299. // }
  300. // ]
  301. // }\n
  302. // {
  303. // "text_snippet": {
  304. // "content": "This dog is good."
  305. // },
  306. // "annotations": [
  307. // {
  308. // "display_name": "animal",
  309. // "text_extraction": {
  310. // "text_segment": {"start_offset": 5, "end_offset": 7}
  311. // }
  312. // }
  313. // ]
  314. // }
  315. //
  316. // **JSONL files that reference documents**
  317. //
  318. // .JSONL files contain, per line, a JSON document that wraps a
  319. // `input_config` that contains the path to a source document.
  320. // Multiple JSON documents can be separated using line breaks (\n).
  321. //
  322. // Supported document extensions: .PDF, .TIF, .TIFF
  323. //
  324. // For example:
  325. //
  326. // {
  327. // "document": {
  328. // "input_config": {
  329. // "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
  330. // }
  331. // }
  332. // }
  333. // }\n
  334. // {
  335. // "document": {
  336. // "input_config": {
  337. // "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ]
  338. // }
  339. // }
  340. // }
  341. // }
  342. //
  343. // **In-line JSONL files with document layout information**
  344. //
  345. // **Note:** You can only annotate documents using the UI. The format described
  346. // below applies to annotated documents exported using the UI or `exportData`.
  347. //
  348. // In-line .JSONL files for documents contain, per line, a JSON document
  349. // that wraps a `document` field that provides the textual content of the
  350. // document and the layout information.
  351. //
  352. // For example:
  353. //
  354. // {
  355. // "document": {
  356. // "document_text": {
  357. // "content": "dog car cat"
  358. // }
  359. // "layout": [
  360. // {
  361. // "text_segment": {
  362. // "start_offset": 0,
  363. // "end_offset": 11,
  364. // },
  365. // "page_number": 1,
  366. // "bounding_poly": {
  367. // "normalized_vertices": [
  368. // {"x": 0.1, "y": 0.1},
  369. // {"x": 0.1, "y": 0.3},
  370. // {"x": 0.3, "y": 0.3},
  371. // {"x": 0.3, "y": 0.1},
  372. // ],
  373. // },
  374. // "text_segment_type": TOKEN,
  375. // }
  376. // ],
  377. // "document_dimensions": {
  378. // "width": 8.27,
  379. // "height": 11.69,
  380. // "unit": INCH,
  381. // }
  382. // "page_count": 3,
  383. // },
  384. // "annotations": [
  385. // {
  386. // "display_name": "animal",
  387. // "text_extraction": {
  388. // "text_segment": {"start_offset": 0, "end_offset": 3}
  389. // }
  390. // },
  391. // {
  392. // "display_name": "vehicle",
  393. // "text_extraction": {
  394. // "text_segment": {"start_offset": 4, "end_offset": 7}
  395. // }
  396. // },
  397. // {
  398. // "display_name": "animal",
  399. // "text_extraction": {
  400. // "text_segment": {"start_offset": 8, "end_offset": 11}
  401. // }
  402. // },
  403. // ],
  404. //
  405. //
  406. //
  407. //
  408. // </section><section><h5>Classification</h5>
  409. //
  410. // See [Preparing your training
  411. // data](https://cloud.google.com/natural-language/automl/docs/prepare) for more
  412. // information.
  413. //
  414. // One or more CSV file(s) with each line in the following format:
  415. //
  416. // ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),LABEL,LABEL,...
  417. //
  418. // * `ML_USE` - Identifies the data set that the current row (file) applies
  419. // to.
  420. // This value can be one of the following:
  421. // * `TRAIN` - Rows in this file are used to train the model.
  422. // * `TEST` - Rows in this file are used to test the model during training.
  423. // * `UNASSIGNED` - Rows in this file are not categorized. They are
  424. // Automatically divided into train and test data. 80% for training and
  425. // 20% for testing.
  426. //
  427. // * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If
  428. // the column content is a valid Google Cloud Storage file path, that is,
  429. // prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if
  430. // the content is enclosed in double quotes (""), it is treated as a
  431. // `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a
  432. // file with supported extension and UTF-8 encoding, for example,
  433. // "gs://folder/content.txt" AutoML imports the file content
  434. // as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content
  435. // excluding quotes. In both cases, size of the content must be 10MB or
  436. // less in size. For zip files, the size of each file inside the zip must be
  437. // 10MB or less in size.
  438. //
  439. // For the `MULTICLASS` classification type, at most one `LABEL` is allowed.
  440. //
  441. // The `ML_USE` and `LABEL` columns are optional.
  442. // Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP
  443. //
  444. // A maximum of 100 unique labels are allowed per CSV row.
  445. //
  446. // Sample rows:
  447. //
  448. // TRAIN,"They have bad food and very rude",RudeService,BadFood
  449. // gs://folder/content.txt,SlowService
  450. // TEST,gs://folder/document.pdf
  451. // VALIDATE,gs://folder/text_files.zip,BadFood
  452. //
  453. //
  454. //
  455. // </section><section><h5>Sentiment Analysis</h5>
  456. //
  457. // See [Preparing your training
  458. // data](https://cloud.google.com/natural-language/automl/docs/prepare) for more
  459. // information.
  460. //
  461. // CSV file(s) with each line in format:
  462. //
  463. // ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT
  464. //
  465. // * `ML_USE` - Identifies the data set that the current row (file) applies
  466. // to.
  467. // This value can be one of the following:
  468. // * `TRAIN` - Rows in this file are used to train the model.
  469. // * `TEST` - Rows in this file are used to test the model during training.
  470. // * `UNASSIGNED` - Rows in this file are not categorized. They are
  471. // Automatically divided into train and test data. 80% for training and
  472. // 20% for testing.
  473. //
  474. // * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If
  475. // the column content is a valid Google Cloud Storage file path, that is,
  476. // prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if
  477. // the content is enclosed in double quotes (""), it is treated as a
  478. // `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a
  479. // file with supported extension and UTF-8 encoding, for example,
  480. // "gs://folder/content.txt" AutoML imports the file content
  481. // as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content
  482. // excluding quotes. In both cases, size of the content must be 128kB or
  483. // less in size. For zip files, the size of each file inside the zip must be
  484. // 128kB or less in size.
  485. //
  486. // The `ML_USE` and `SENTIMENT` columns are optional.
  487. // Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP
  488. //
  489. // * `SENTIMENT` - An integer between 0 and
  490. // Dataset.text_sentiment_dataset_metadata.sentiment_max
  491. // (inclusive). Describes the ordinal of the sentiment - higher
  492. // value means a more positive sentiment. All the values are
  493. // completely relative, i.e. neither 0 needs to mean a negative or
  494. // neutral sentiment nor sentiment_max needs to mean a positive one -
  495. // it is just required that 0 is the least positive sentiment
  496. // in the data, and sentiment_max is the most positive one.
  497. // The SENTIMENT shouldn't be confused with "score" or "magnitude"
  498. // from the previous Natural Language Sentiment Analysis API.
  499. // All SENTIMENT values between 0 and sentiment_max must be
  500. // represented in the imported data. On prediction the same 0 to
  501. // sentiment_max range will be used. The difference between
  502. // neighboring sentiment values needs not to be uniform, e.g. 1 and
  503. // 2 may be similar whereas the difference between 2 and 3 may be
  504. // large.
  505. //
  506. // Sample rows:
  507. //
  508. // TRAIN,"@freewrytin this is way too good for your product",2
  509. // gs://folder/content.txt,3
  510. // TEST,gs://folder/document.pdf
  511. // VALIDATE,gs://folder/text_files.zip,2
  512. // </section>
  513. // </div>
  514. //
  515. //
  516. //
  517. // <h4>AutoML Tables</h4><div class="ui-datasection-main"><section
  518. // class="selected">
  519. //
  520. // See [Preparing your training
  521. // data](https://cloud.google.com/automl-tables/docs/prepare) for more
  522. // information.
  523. //
  524. // You can use either
  525. // [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] or
  526. // [bigquery_source][google.cloud.automl.v1.InputConfig.bigquery_source].
  527. // All input is concatenated into a
  528. // single
  529. // [primary_table_spec_id][google.cloud.automl.v1.TablesDatasetMetadata.primary_table_spec_id]
  530. //
  531. // **For gcs_source:**
  532. //
  533. // CSV file(s), where the first row of the first file is the header,
  534. // containing unique column names. If the first row of a subsequent
  535. // file is the same as the header, then it is also treated as a
  536. // header. All other rows contain values for the corresponding
  537. // columns.
  538. //
  539. // Each .CSV file by itself must be 10GB or smaller, and their total
  540. // size must be 100GB or smaller.
  541. //
  542. // First three sample rows of a CSV file:
  543. // <pre>
  544. // "Id","First Name","Last Name","Dob","Addresses"
  545. // "1","John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
  546. // "2","Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
  547. // </pre>
  548. // **For bigquery_source:**
  549. //
  550. // An URI of a BigQuery table. The user data size of the BigQuery
  551. // table must be 100GB or smaller.
  552. //
  553. // An imported table must have between 2 and 1,000 columns, inclusive,
  554. // and between 1000 and 100,000,000 rows, inclusive. There are at most 5
  555. // import data running in parallel.
  556. //
  557. // </section>
  558. // </div>
  559. //
  560. //
  561. // **Input field definitions:**
  562. //
  563. // `ML_USE`
  564. // : ("TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED")
  565. // Describes how the given example (file) should be used for model
  566. // training. "UNASSIGNED" can be used when user has no preference.
  567. //
  568. // `GCS_FILE_PATH`
  569. // : The path to a file on Google Cloud Storage. For example,
  570. // "gs://folder/image1.png".
  571. //
  572. // `LABEL`
  573. // : A display name of an object on an image, video etc., e.g. "dog".
  574. // Must be up to 32 characters long and can consist only of ASCII
  575. // Latin letters A-Z and a-z, underscores(_), and ASCII digits 0-9.
  576. // For each label an AnnotationSpec is created which display_name
  577. // becomes the label; AnnotationSpecs are given back in predictions.
  578. //
  579. // `INSTANCE_ID`
  580. // : A positive integer that identifies a specific instance of a
  581. // labeled entity on an example. Used e.g. to track two cars on
  582. // a video while being able to tell apart which one is which.
  583. //
  584. // `BOUNDING_BOX`
  585. // : (`VERTEX,VERTEX,VERTEX,VERTEX` | `VERTEX,,,VERTEX,,`)
  586. // A rectangle parallel to the frame of the example (image,
  587. // video). If 4 vertices are given they are connected by edges
  588. // in the order provided, if 2 are given they are recognized
  589. // as diagonally opposite vertices of the rectangle.
  590. //
  591. // `VERTEX`
  592. // : (`COORDINATE,COORDINATE`)
  593. // First coordinate is horizontal (x), the second is vertical (y).
  594. //
  595. // `COORDINATE`
  596. // : A float in 0 to 1 range, relative to total length of
  597. // image or video in given dimension. For fractions the
  598. // leading non-decimal 0 can be omitted (i.e. 0.3 = .3).
  599. // Point 0,0 is in top left.
  600. //
  601. // `TIME_SEGMENT_START`
  602. // : (`TIME_OFFSET`)
  603. // Expresses a beginning, inclusive, of a time segment
  604. // within an example that has a time dimension
  605. // (e.g. video).
  606. //
  607. // `TIME_SEGMENT_END`
  608. // : (`TIME_OFFSET`)
  609. // Expresses an end, exclusive, of a time segment within
  610. // n example that has a time dimension (e.g. video).
  611. //
  612. // `TIME_OFFSET`
  613. // : A number of seconds as measured from the start of an
  614. // example (e.g. video). Fractions are allowed, up to a
  615. // microsecond precision. "inf" is allowed, and it means the end
  616. // of the example.
  617. //
  618. // `TEXT_SNIPPET`
  619. // : The content of a text snippet, UTF-8 encoded, enclosed within
  620. // double quotes ("").
  621. //
  622. // `DOCUMENT`
  623. // : A field that provides the textual content with document and the layout
  624. // information.
  625. //
  626. //
  627. // **Errors:**
  628. //
  629. // If any of the provided CSV files can't be parsed or if more than certain
  630. // percent of CSV rows cannot be processed then the operation fails and
  631. // nothing is imported. Regardless of overall success or failure the per-row
  632. // failures, up to a certain count cap, is listed in
  633. // Operation.metadata.partial_failures.
  634. //
  635. message InputConfig {
  636. // The source of the input.
  637. oneof source {
  638. // The Google Cloud Storage location for the input content.
  639. // For [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData], `gcs_source` points to a CSV file with
  640. // a structure described in [InputConfig][google.cloud.automl.v1.InputConfig].
  641. GcsSource gcs_source = 1;
  642. }
  643. // Additional domain-specific parameters describing the semantic of the
  644. // imported data, any string must be up to 25000
  645. // characters long.
  646. //
  647. // <h4>AutoML Tables</h4>
  648. //
  649. // `schema_inference_version`
  650. // : (integer) This value must be supplied.
  651. // The version of the
  652. // algorithm to use for the initial inference of the
  653. // column data types of the imported table. Allowed values: "1".
  654. map<string, string> params = 2;
  655. }
  656. // Input configuration for BatchPredict Action.
  657. //
  658. // The format of input depends on the ML problem of the model used for
  659. // prediction. As input source the
  660. // [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source]
  661. // is expected, unless specified otherwise.
  662. //
  663. // The formats are represented in EBNF with commas being literal and with
  664. // non-terminal symbols defined near the end of this comment. The formats
  665. // are:
  666. //
  667. // <h4>AutoML Vision</h4>
  668. // <div class="ds-selector-tabs"><section><h5>Classification</h5>
  669. //
  670. // One or more CSV files where each line is a single column:
  671. //
  672. // GCS_FILE_PATH
  673. //
  674. // The Google Cloud Storage location of an image of up to
  675. // 30MB in size. Supported extensions: .JPEG, .GIF, .PNG.
  676. // This path is treated as the ID in the batch predict output.
  677. //
  678. // Sample rows:
  679. //
  680. // gs://folder/image1.jpeg
  681. // gs://folder/image2.gif
  682. // gs://folder/image3.png
  683. //
  684. // </section><section><h5>Object Detection</h5>
  685. //
  686. // One or more CSV files where each line is a single column:
  687. //
  688. // GCS_FILE_PATH
  689. //
  690. // The Google Cloud Storage location of an image of up to
  691. // 30MB in size. Supported extensions: .JPEG, .GIF, .PNG.
  692. // This path is treated as the ID in the batch predict output.
  693. //
  694. // Sample rows:
  695. //
  696. // gs://folder/image1.jpeg
  697. // gs://folder/image2.gif
  698. // gs://folder/image3.png
  699. // </section>
  700. // </div>
  701. //
  702. // <h4>AutoML Video Intelligence</h4>
  703. // <div class="ds-selector-tabs"><section><h5>Classification</h5>
  704. //
  705. // One or more CSV files where each line is a single column:
  706. //
  707. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
  708. //
  709. // `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in
  710. // size and up to 3h in duration duration.
  711. // Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  712. //
  713. // `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
  714. // length of the video, and the end time must be after the start time.
  715. //
  716. // Sample rows:
  717. //
  718. // gs://folder/video1.mp4,10,40
  719. // gs://folder/video1.mp4,20,60
  720. // gs://folder/vid2.mov,0,inf
  721. //
  722. // </section><section><h5>Object Tracking</h5>
  723. //
  724. // One or more CSV files where each line is a single column:
  725. //
  726. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
  727. //
  728. // `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in
  729. // size and up to 3h in duration duration.
  730. // Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
  731. //
  732. // `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
  733. // length of the video, and the end time must be after the start time.
  734. //
  735. // Sample rows:
  736. //
  737. // gs://folder/video1.mp4,10,40
  738. // gs://folder/video1.mp4,20,60
  739. // gs://folder/vid2.mov,0,inf
  740. // </section>
  741. // </div>
  742. //
  743. // <h4>AutoML Natural Language</h4>
  744. // <div class="ds-selector-tabs"><section><h5>Classification</h5>
  745. //
  746. // One or more CSV files where each line is a single column:
  747. //
  748. // GCS_FILE_PATH
  749. //
  750. // `GCS_FILE_PATH` is the Google Cloud Storage location of a text file.
  751. // Supported file extensions: .TXT, .PDF, .TIF, .TIFF
  752. //
  753. // Text files can be no larger than 10MB in size.
  754. //
  755. // Sample rows:
  756. //
  757. // gs://folder/text1.txt
  758. // gs://folder/text2.pdf
  759. // gs://folder/text3.tif
  760. //
  761. // </section><section><h5>Sentiment Analysis</h5>
  762. // One or more CSV files where each line is a single column:
  763. //
  764. // GCS_FILE_PATH
  765. //
  766. // `GCS_FILE_PATH` is the Google Cloud Storage location of a text file.
  767. // Supported file extensions: .TXT, .PDF, .TIF, .TIFF
  768. //
  769. // Text files can be no larger than 128kB in size.
  770. //
  771. // Sample rows:
  772. //
  773. // gs://folder/text1.txt
  774. // gs://folder/text2.pdf
  775. // gs://folder/text3.tif
  776. //
  777. // </section><section><h5>Entity Extraction</h5>
  778. //
  779. // One or more JSONL (JSON Lines) files that either provide inline text or
  780. // documents. You can only use one format, either inline text or documents,
  781. // for a single call to [AutoMl.BatchPredict].
  782. //
  783. // Each JSONL file contains a per line a proto that
  784. // wraps a temporary user-assigned TextSnippet ID (string up to 2000
  785. // characters long) called "id", a TextSnippet proto (in
  786. // JSON representation) and zero or more TextFeature protos. Any given
  787. // text snippet content must have 30,000 characters or less, and also
  788. // be UTF-8 NFC encoded (ASCII already is). The IDs provided should be
  789. // unique.
  790. //
  791. // Each document JSONL file contains, per line, a proto that wraps a Document
  792. // proto with `input_config` set. Each document cannot exceed 2MB in size.
  793. //
  794. // Supported document extensions: .PDF, .TIF, .TIFF
  795. //
  796. // Each JSONL file must not exceed 100MB in size, and no more than 20
  797. // JSONL files may be passed.
  798. //
  799. // Sample inline JSONL file (Shown with artificial line
  800. // breaks. Actual line breaks are denoted by "\n".):
  801. //
  802. // {
  803. // "id": "my_first_id",
  804. // "text_snippet": { "content": "dog car cat"},
  805. // "text_features": [
  806. // {
  807. // "text_segment": {"start_offset": 4, "end_offset": 6},
  808. // "structural_type": PARAGRAPH,
  809. // "bounding_poly": {
  810. // "normalized_vertices": [
  811. // {"x": 0.1, "y": 0.1},
  812. // {"x": 0.1, "y": 0.3},
  813. // {"x": 0.3, "y": 0.3},
  814. // {"x": 0.3, "y": 0.1},
  815. // ]
  816. // },
  817. // }
  818. // ],
  819. // }\n
  820. // {
  821. // "id": "2",
  822. // "text_snippet": {
  823. // "content": "Extended sample content",
  824. // "mime_type": "text/plain"
  825. // }
  826. // }
  827. //
  828. // Sample document JSONL file (Shown with artificial line
  829. // breaks. Actual line breaks are denoted by "\n".):
  830. //
  831. // {
  832. // "document": {
  833. // "input_config": {
  834. // "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
  835. // }
  836. // }
  837. // }
  838. // }\n
  839. // {
  840. // "document": {
  841. // "input_config": {
  842. // "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ]
  843. // }
  844. // }
  845. // }
  846. // }
  847. // </section>
  848. // </div>
  849. //
  850. // <h4>AutoML Tables</h4><div class="ui-datasection-main"><section
  851. // class="selected">
  852. //
  853. // See [Preparing your training
  854. // data](https://cloud.google.com/automl-tables/docs/predict-batch) for more
  855. // information.
  856. //
  857. // You can use either
  858. // [gcs_source][google.cloud.automl.v1.BatchPredictInputConfig.gcs_source]
  859. // or
  860. // [bigquery_source][BatchPredictInputConfig.bigquery_source].
  861. //
  862. // **For gcs_source:**
  863. //
  864. // CSV file(s), each by itself 10GB or smaller and total size must be
  865. // 100GB or smaller, where first file must have a header containing
  866. // column names. If the first row of a subsequent file is the same as
  867. // the header, then it is also treated as a header. All other rows
  868. // contain values for the corresponding columns.
  869. //
  870. // The column names must contain the model's
  871. // [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs]
  872. // [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name]
  873. // (order doesn't matter). The columns corresponding to the model's
  874. // input feature column specs must contain values compatible with the
  875. // column spec's data types. Prediction on all the rows, i.e. the CSV
  876. // lines, will be attempted.
  877. //
  878. //
  879. // Sample rows from a CSV file:
  880. // <pre>
  881. // "First Name","Last Name","Dob","Addresses"
  882. // "John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
  883. // "Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
  884. // </pre>
  885. // **For bigquery_source:**
  886. //
  887. // The URI of a BigQuery table. The user data size of the BigQuery
  888. // table must be 100GB or smaller.
  889. //
  890. // The column names must contain the model's
  891. // [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs]
  892. // [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name]
  893. // (order doesn't matter). The columns corresponding to the model's
  894. // input feature column specs must contain values compatible with the
  895. // column spec's data types. Prediction on all the rows of the table
  896. // will be attempted.
  897. // </section>
  898. // </div>
  899. //
  900. // **Input field definitions:**
  901. //
  902. // `GCS_FILE_PATH`
  903. // : The path to a file on Google Cloud Storage. For example,
  904. // "gs://folder/video.avi".
  905. //
  906. // `TIME_SEGMENT_START`
  907. // : (`TIME_OFFSET`)
  908. // Expresses a beginning, inclusive, of a time segment
  909. // within an example that has a time dimension
  910. // (e.g. video).
  911. //
  912. // `TIME_SEGMENT_END`
  913. // : (`TIME_OFFSET`)
  914. // Expresses an end, exclusive, of a time segment within
  915. // n example that has a time dimension (e.g. video).
  916. //
  917. // `TIME_OFFSET`
  918. // : A number of seconds as measured from the start of an
  919. // example (e.g. video). Fractions are allowed, up to a
  920. // microsecond precision. "inf" is allowed, and it means the end
  921. // of the example.
  922. //
  923. // **Errors:**
  924. //
  925. // If any of the provided CSV files can't be parsed or if more than certain
  926. // percent of CSV rows cannot be processed then the operation fails and
  927. // prediction does not happen. Regardless of overall success or failure the
  928. // per-row failures, up to a certain count cap, will be listed in
  929. // Operation.metadata.partial_failures.
  930. message BatchPredictInputConfig {
  931. // The source of the input.
  932. oneof source {
  933. // Required. The Google Cloud Storage location for the input content.
  934. GcsSource gcs_source = 1 [(google.api.field_behavior) = REQUIRED];
  935. }
  936. }
  937. // Input configuration of a [Document][google.cloud.automl.v1.Document].
  938. message DocumentInputConfig {
  939. // The Google Cloud Storage location of the document file. Only a single path
  940. // should be given.
  941. //
  942. // Max supported size: 512MB.
  943. //
  944. // Supported extensions: .PDF.
  945. GcsSource gcs_source = 1;
  946. }
  947. // * For Translation:
  948. // CSV file `translation.csv`, with each line in format:
  949. // ML_USE,GCS_FILE_PATH
  950. // GCS_FILE_PATH leads to a .TSV file which describes examples that have
  951. // given ML_USE, using the following row format per line:
  952. // TEXT_SNIPPET (in source language) \t TEXT_SNIPPET (in target
  953. // language)
  954. //
  955. // * For Tables:
  956. // Output depends on whether the dataset was imported from Google Cloud
  957. // Storage or BigQuery.
  958. // Google Cloud Storage case:
  959. // [gcs_destination][google.cloud.automl.v1p1beta.OutputConfig.gcs_destination]
  960. // must be set. Exported are CSV file(s) `tables_1.csv`,
  961. // `tables_2.csv`,...,`tables_N.csv` with each having as header line
  962. // the table's column names, and all other lines contain values for
  963. // the header columns.
  964. // BigQuery case:
  965. // [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination]
  966. // pointing to a BigQuery project must be set. In the given project a
  967. // new dataset will be created with name
  968. // `export_data_<automl-dataset-display-name>_<timestamp-of-export-call>`
  969. // where <automl-dataset-display-name> will be made
  970. // BigQuery-dataset-name compatible (e.g. most special characters will
  971. // become underscores), and timestamp will be in
  972. // YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In that
  973. // dataset a new table called `primary_table` will be created, and
  974. // filled with precisely the same data as this obtained on import.
  975. message OutputConfig {
  976. // The destination of the output.
  977. oneof destination {
  978. // Required. The Google Cloud Storage location where the output is to be written to.
  979. // For Image Object Detection, Text Extraction, Video Classification and
  980. // Tables, in the given directory a new directory will be created with name:
  981. // export_data-<dataset-display-name>-<timestamp-of-export-call> where
  982. // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export
  983. // output will be written into that directory.
  984. GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED];
  985. }
  986. }
  987. // Output configuration for BatchPredict Action.
  988. //
  989. // As destination the
  990. // [gcs_destination][google.cloud.automl.v1.BatchPredictOutputConfig.gcs_destination]
  991. // must be set unless specified otherwise for a domain. If gcs_destination is
  992. // set then in the given directory a new directory is created. Its name
  993. // will be
  994. // "prediction-<model-display-name>-<timestamp-of-prediction-call>",
  995. // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents
  996. // of it depends on the ML problem the predictions are made for.
  997. //
  998. // * For Image Classification:
  999. // In the created directory files `image_classification_1.jsonl`,
  1000. // `image_classification_2.jsonl`,...,`image_classification_N.jsonl`
  1001. // will be created, where N may be 1, and depends on the
  1002. // total number of the successfully predicted images and annotations.
  1003. // A single image will be listed only once with all its annotations,
  1004. // and its annotations will never be split across files.
  1005. // Each .JSONL file will contain, per line, a JSON representation of a
  1006. // proto that wraps image's "ID" : "<id_value>" followed by a list of
  1007. // zero or more AnnotationPayload protos (called annotations), which
  1008. // have classification detail populated.
  1009. // If prediction for any image failed (partially or completely), then an
  1010. // additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
  1011. // files will be created (N depends on total number of failed
  1012. // predictions). These files will have a JSON representation of a proto
  1013. // that wraps the same "ID" : "<id_value>" but here followed by
  1014. // exactly one
  1015. // [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  1016. // containing only `code` and `message`fields.
  1017. //
  1018. // * For Image Object Detection:
  1019. // In the created directory files `image_object_detection_1.jsonl`,
  1020. // `image_object_detection_2.jsonl`,...,`image_object_detection_N.jsonl`
  1021. // will be created, where N may be 1, and depends on the
  1022. // total number of the successfully predicted images and annotations.
  1023. // Each .JSONL file will contain, per line, a JSON representation of a
  1024. // proto that wraps image's "ID" : "<id_value>" followed by a list of
  1025. // zero or more AnnotationPayload protos (called annotations), which
  1026. // have image_object_detection detail populated. A single image will
  1027. // be listed only once with all its annotations, and its annotations
  1028. // will never be split across files.
  1029. // If prediction for any image failed (partially or completely), then
  1030. // additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
  1031. // files will be created (N depends on total number of failed
  1032. // predictions). These files will have a JSON representation of a proto
  1033. // that wraps the same "ID" : "<id_value>" but here followed by
  1034. // exactly one
  1035. // [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  1036. // containing only `code` and `message`fields.
  1037. // * For Video Classification:
  1038. // In the created directory a video_classification.csv file, and a .JSON
  1039. // file per each video classification requested in the input (i.e. each
  1040. // line in given CSV(s)), will be created.
  1041. //
  1042. // The format of video_classification.csv is:
  1043. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
  1044. // where:
  1045. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
  1046. // the prediction input lines (i.e. video_classification.csv has
  1047. // precisely the same number of lines as the prediction input had.)
  1048. // JSON_FILE_NAME = Name of .JSON file in the output directory, which
  1049. // contains prediction responses for the video time segment.
  1050. // STATUS = "OK" if prediction completed successfully, or an error code
  1051. // with message otherwise. If STATUS is not "OK" then the .JSON file
  1052. // for that line may not exist or be empty.
  1053. //
  1054. // Each .JSON file, assuming STATUS is "OK", will contain a list of
  1055. // AnnotationPayload protos in JSON format, which are the predictions
  1056. // for the video time segment the file is assigned to in the
  1057. // video_classification.csv. All AnnotationPayload protos will have
  1058. // video_classification field set, and will be sorted by
  1059. // video_classification.type field (note that the returned types are
  1060. // governed by `classifaction_types` parameter in
  1061. // [PredictService.BatchPredictRequest.params][]).
  1062. //
  1063. // * For Video Object Tracking:
  1064. // In the created directory a video_object_tracking.csv file will be
  1065. // created, and multiple files video_object_trackinng_1.json,
  1066. // video_object_trackinng_2.json,..., video_object_trackinng_N.json,
  1067. // where N is the number of requests in the input (i.e. the number of
  1068. // lines in given CSV(s)).
  1069. //
  1070. // The format of video_object_tracking.csv is:
  1071. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
  1072. // where:
  1073. // GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
  1074. // the prediction input lines (i.e. video_object_tracking.csv has
  1075. // precisely the same number of lines as the prediction input had.)
  1076. // JSON_FILE_NAME = Name of .JSON file in the output directory, which
  1077. // contains prediction responses for the video time segment.
  1078. // STATUS = "OK" if prediction completed successfully, or an error
  1079. // code with message otherwise. If STATUS is not "OK" then the .JSON
  1080. // file for that line may not exist or be empty.
  1081. //
  1082. // Each .JSON file, assuming STATUS is "OK", will contain a list of
  1083. // AnnotationPayload protos in JSON format, which are the predictions
  1084. // for each frame of the video time segment the file is assigned to in
  1085. // video_object_tracking.csv. All AnnotationPayload protos will have
  1086. // video_object_tracking field set.
  1087. // * For Text Classification:
  1088. // In the created directory files `text_classification_1.jsonl`,
  1089. // `text_classification_2.jsonl`,...,`text_classification_N.jsonl`
  1090. // will be created, where N may be 1, and depends on the
  1091. // total number of inputs and annotations found.
  1092. //
  1093. // Each .JSONL file will contain, per line, a JSON representation of a
  1094. // proto that wraps input text file (or document) in
  1095. // the text snippet (or document) proto and a list of
  1096. // zero or more AnnotationPayload protos (called annotations), which
  1097. // have classification detail populated. A single text file (or
  1098. // document) will be listed only once with all its annotations, and its
  1099. // annotations will never be split across files.
  1100. //
  1101. // If prediction for any input file (or document) failed (partially or
  1102. // completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
  1103. // `errors_N.jsonl` files will be created (N depends on total number of
  1104. // failed predictions). These files will have a JSON representation of a
  1105. // proto that wraps input file followed by exactly one
  1106. // [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  1107. // containing only `code` and `message`.
  1108. //
  1109. // * For Text Sentiment:
  1110. // In the created directory files `text_sentiment_1.jsonl`,
  1111. // `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl`
  1112. // will be created, where N may be 1, and depends on the
  1113. // total number of inputs and annotations found.
  1114. //
  1115. // Each .JSONL file will contain, per line, a JSON representation of a
  1116. // proto that wraps input text file (or document) in
  1117. // the text snippet (or document) proto and a list of
  1118. // zero or more AnnotationPayload protos (called annotations), which
  1119. // have text_sentiment detail populated. A single text file (or
  1120. // document) will be listed only once with all its annotations, and its
  1121. // annotations will never be split across files.
  1122. //
  1123. // If prediction for any input file (or document) failed (partially or
  1124. // completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
  1125. // `errors_N.jsonl` files will be created (N depends on total number of
  1126. // failed predictions). These files will have a JSON representation of a
  1127. // proto that wraps input file followed by exactly one
  1128. // [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  1129. // containing only `code` and `message`.
  1130. //
  1131. // * For Text Extraction:
  1132. // In the created directory files `text_extraction_1.jsonl`,
  1133. // `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl`
  1134. // will be created, where N may be 1, and depends on the
  1135. // total number of inputs and annotations found.
  1136. // The contents of these .JSONL file(s) depend on whether the input
  1137. // used inline text, or documents.
  1138. // If input was inline, then each .JSONL file will contain, per line,
  1139. // a JSON representation of a proto that wraps given in request text
  1140. // snippet's "id" (if specified), followed by input text snippet,
  1141. // and a list of zero or more
  1142. // AnnotationPayload protos (called annotations), which have
  1143. // text_extraction detail populated. A single text snippet will be
  1144. // listed only once with all its annotations, and its annotations will
  1145. // never be split across files.
  1146. // If input used documents, then each .JSONL file will contain, per
  1147. // line, a JSON representation of a proto that wraps given in request
  1148. // document proto, followed by its OCR-ed representation in the form
  1149. // of a text snippet, finally followed by a list of zero or more
  1150. // AnnotationPayload protos (called annotations), which have
  1151. // text_extraction detail populated and refer, via their indices, to
  1152. // the OCR-ed text snippet. A single document (and its text snippet)
  1153. // will be listed only once with all its annotations, and its
  1154. // annotations will never be split across files.
  1155. // If prediction for any text snippet failed (partially or completely),
  1156. // then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
  1157. // `errors_N.jsonl` files will be created (N depends on total number of
  1158. // failed predictions). These files will have a JSON representation of a
  1159. // proto that wraps either the "id" : "<id_value>" (in case of inline)
  1160. // or the document proto (in case of document) but here followed by
  1161. // exactly one
  1162. // [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  1163. // containing only `code` and `message`.
  1164. //
  1165. // * For Tables:
  1166. // Output depends on whether
  1167. // [gcs_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.gcs_destination]
  1168. // or
  1169. // [bigquery_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.bigquery_destination]
  1170. // is set (either is allowed).
  1171. // Google Cloud Storage case:
  1172. // In the created directory files `tables_1.csv`, `tables_2.csv`,...,
  1173. // `tables_N.csv` will be created, where N may be 1, and depends on
  1174. // the total number of the successfully predicted rows.
  1175. // For all CLASSIFICATION
  1176. // [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]:
  1177. // Each .csv file will contain a header, listing all columns'
  1178. // [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name]
  1179. // given on input followed by M target column names in the format of
  1180. // "<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
  1181. // [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>_<target
  1182. // value>_score" where M is the number of distinct target values,
  1183. // i.e. number of distinct values in the target column of the table
  1184. // used to train the model. Subsequent lines will contain the
  1185. // respective values of successfully predicted rows, with the last,
  1186. // i.e. the target, columns having the corresponding prediction
  1187. // [scores][google.cloud.automl.v1p1beta.TablesAnnotation.score].
  1188. // For REGRESSION and FORECASTING
  1189. // [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]:
  1190. // Each .csv file will contain a header, listing all columns'
  1191. // [display_name-s][google.cloud.automl.v1p1beta.display_name]
  1192. // given on input followed by the predicted target column with name
  1193. // in the format of
  1194. // "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
  1195. // [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>"
  1196. // Subsequent lines will contain the respective values of
  1197. // successfully predicted rows, with the last, i.e. the target,
  1198. // column having the predicted target value.
  1199. // If prediction for any rows failed, then an additional
  1200. // `errors_1.csv`, `errors_2.csv`,..., `errors_N.csv` will be
  1201. // created (N depends on total number of failed rows). These files
  1202. // will have analogous format as `tables_*.csv`, but always with a
  1203. // single target column having
  1204. // [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  1205. // represented as a JSON string, and containing only `code` and
  1206. // `message`.
  1207. // BigQuery case:
  1208. // [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination]
  1209. // pointing to a BigQuery project must be set. In the given project a
  1210. // new dataset will be created with name
  1211. // `prediction_<model-display-name>_<timestamp-of-prediction-call>`
  1212. // where <model-display-name> will be made
  1213. // BigQuery-dataset-name compatible (e.g. most special characters will
  1214. // become underscores), and timestamp will be in
  1215. // YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset
  1216. // two tables will be created, `predictions`, and `errors`.
  1217. // The `predictions` table's column names will be the input columns'
  1218. // [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name]
  1219. // followed by the target column with name in the format of
  1220. // "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
  1221. // [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>"
  1222. // The input feature columns will contain the respective values of
  1223. // successfully predicted rows, with the target column having an
  1224. // ARRAY of
  1225. // [AnnotationPayloads][google.cloud.automl.v1p1beta.AnnotationPayload],
  1226. // represented as STRUCT-s, containing
  1227. // [TablesAnnotation][google.cloud.automl.v1p1beta.TablesAnnotation].
  1228. // The `errors` table contains rows for which the prediction has
  1229. // failed, it has analogous input columns while the target column name
  1230. // is in the format of
  1231. // "errors_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
  1232. // [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>",
  1233. // and as a value has
  1234. // [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
  1235. // represented as a STRUCT, and containing only `code` and `message`.
  1236. message BatchPredictOutputConfig {
  1237. // The destination of the output.
  1238. oneof destination {
  1239. // Required. The Google Cloud Storage location of the directory where the output is to
  1240. // be written to.
  1241. GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED];
  1242. }
  1243. }
  1244. // Output configuration for ModelExport Action.
  1245. message ModelExportOutputConfig {
  1246. // The destination of the output.
  1247. oneof destination {
  1248. // Required. The Google Cloud Storage location where the model is to be written to.
  1249. // This location may only be set for the following model formats:
  1250. // "tflite", "edgetpu_tflite", "tf_saved_model", "tf_js", "core_ml".
  1251. //
  1252. // Under the directory given as the destination a new one with name
  1253. // "model-export-<model-display-name>-<timestamp-of-export-call>",
  1254. // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format,
  1255. // will be created. Inside the model and any of its supporting files
  1256. // will be written.
  1257. GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED];
  1258. }
  1259. // The format in which the model must be exported. The available, and default,
  1260. // formats depend on the problem and model type (if given problem and type
  1261. // combination doesn't have a format listed, it means its models are not
  1262. // exportable):
  1263. //
  1264. // * For Image Classification mobile-low-latency-1, mobile-versatile-1,
  1265. // mobile-high-accuracy-1:
  1266. // "tflite" (default), "edgetpu_tflite", "tf_saved_model", "tf_js",
  1267. // "docker".
  1268. //
  1269. // * For Image Classification mobile-core-ml-low-latency-1,
  1270. // mobile-core-ml-versatile-1, mobile-core-ml-high-accuracy-1:
  1271. // "core_ml" (default).
  1272. //
  1273. // * For Image Object Detection mobile-low-latency-1, mobile-versatile-1,
  1274. // mobile-high-accuracy-1:
  1275. // "tflite", "tf_saved_model", "tf_js".
  1276. // Formats description:
  1277. //
  1278. // * tflite - Used for Android mobile devices.
  1279. // * edgetpu_tflite - Used for [Edge TPU](https://cloud.google.com/edge-tpu/)
  1280. // devices.
  1281. // * tf_saved_model - A tensorflow model in SavedModel format.
  1282. // * tf_js - A [TensorFlow.js](https://www.tensorflow.org/js) model that can
  1283. // be used in the browser and in Node.js using JavaScript.
  1284. // * docker - Used for Docker containers. Use the params field to customize
  1285. // the container. The container is verified to work correctly on
  1286. // ubuntu 16.04 operating system. See more at
  1287. // [containers
  1288. // quickstart](https://cloud.google.com/vision/automl/docs/containers-gcs-quickstart)
  1289. // * core_ml - Used for iOS mobile devices.
  1290. string model_format = 4;
  1291. // Additional model-type and format specific parameters describing the
  1292. // requirements for the to be exported model files, any string must be up to
  1293. // 25000 characters long.
  1294. //
  1295. // * For `docker` format:
  1296. // `cpu_architecture` - (string) "x86_64" (default).
  1297. // `gpu_architecture` - (string) "none" (default), "nvidia".
  1298. map<string, string> params = 2;
  1299. }
  1300. // The Google Cloud Storage location for the input content.
  1301. message GcsSource {
  1302. // Required. Google Cloud Storage URIs to input files, up to 2000
  1303. // characters long. Accepted forms:
  1304. // * Full object path, e.g. gs://bucket/directory/object.csv
  1305. repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED];
  1306. }
  1307. // The Google Cloud Storage location where the output is to be written to.
  1308. message GcsDestination {
  1309. // Required. Google Cloud Storage URI to output directory, up to 2000
  1310. // characters long.
  1311. // Accepted forms:
  1312. // * Prefix path: gs://bucket/directory
  1313. // The requesting user must have write permission to the bucket.
  1314. // The directory is created if it doesn't exist.
  1315. string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED];
  1316. }