image_annotator.proto 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.vision.v1p2beta1;
  17. import "google/api/annotations.proto";
  18. import "google/api/client.proto";
  19. import "google/api/field_behavior.proto";
  20. import "google/cloud/vision/v1p2beta1/geometry.proto";
  21. import "google/cloud/vision/v1p2beta1/text_annotation.proto";
  22. import "google/cloud/vision/v1p2beta1/web_detection.proto";
  23. import "google/longrunning/operations.proto";
  24. import "google/protobuf/timestamp.proto";
  25. import "google/rpc/status.proto";
  26. import "google/type/color.proto";
  27. import "google/type/latlng.proto";
  28. option cc_enable_arenas = true;
  29. option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p2beta1;vision";
  30. option java_multiple_files = true;
  31. option java_outer_classname = "ImageAnnotatorProto";
  32. option java_package = "com.google.cloud.vision.v1p2beta1";
  33. // Service that performs Google Cloud Vision API detection tasks over client
  34. // images, such as face, landmark, logo, label, and text detection. The
  35. // ImageAnnotator service returns detected entities from the images.
  36. service ImageAnnotator {
  37. option (google.api.default_host) = "vision.googleapis.com";
  38. option (google.api.oauth_scopes) =
  39. "https://www.googleapis.com/auth/cloud-platform,"
  40. "https://www.googleapis.com/auth/cloud-vision";
  41. // Run image detection and annotation for a batch of images.
  42. rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
  43. option (google.api.http) = {
  44. post: "/v1p2beta1/images:annotate"
  45. body: "*"
  46. };
  47. option (google.api.method_signature) = "requests";
  48. }
  49. // Run async image detection and annotation for a list of generic files (e.g.
  50. // PDF) which may contain multiple pages and multiple images per page.
  51. // Progress and results can be retrieved through the
  52. // `google.longrunning.Operations` interface.
  53. // `Operation.metadata` contains `OperationMetadata` (metadata).
  54. // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  55. rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) returns (google.longrunning.Operation) {
  56. option (google.api.http) = {
  57. post: "/v1p2beta1/files:asyncBatchAnnotate"
  58. body: "*"
  59. };
  60. option (google.api.method_signature) = "requests";
  61. option (google.longrunning.operation_info) = {
  62. response_type: "AsyncBatchAnnotateFilesResponse"
  63. metadata_type: "OperationMetadata"
  64. };
  65. }
  66. }
  67. // The type of Google Cloud Vision API detection to perform, and the maximum
  68. // number of results to return for that type. Multiple `Feature` objects can
  69. // be specified in the `features` list.
  70. message Feature {
  71. // Type of Google Cloud Vision API feature to be extracted.
  72. enum Type {
  73. // Unspecified feature type.
  74. TYPE_UNSPECIFIED = 0;
  75. // Run face detection.
  76. FACE_DETECTION = 1;
  77. // Run landmark detection.
  78. LANDMARK_DETECTION = 2;
  79. // Run logo detection.
  80. LOGO_DETECTION = 3;
  81. // Run label detection.
  82. LABEL_DETECTION = 4;
  83. // Run text detection / optical character recognition (OCR). Text detection
  84. // is optimized for areas of text within a larger image; if the image is
  85. // a document, use `DOCUMENT_TEXT_DETECTION` instead.
  86. TEXT_DETECTION = 5;
  87. // Run dense text document OCR. Takes precedence when both
  88. // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
  89. DOCUMENT_TEXT_DETECTION = 11;
  90. // Run Safe Search to detect potentially unsafe
  91. // or undesirable content.
  92. SAFE_SEARCH_DETECTION = 6;
  93. // Compute a set of image properties, such as the
  94. // image's dominant colors.
  95. IMAGE_PROPERTIES = 7;
  96. // Run crop hints.
  97. CROP_HINTS = 9;
  98. // Run web detection.
  99. WEB_DETECTION = 10;
  100. }
  101. // The feature type.
  102. Type type = 1;
  103. // Maximum number of results of this type. Does not apply to
  104. // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
  105. int32 max_results = 2;
  106. // Model to use for the feature.
  107. // Supported values: "builtin/stable" (the default if unset) and
  108. // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
  109. // support "builtin/weekly" for the bleeding edge release updated weekly.
  110. string model = 3;
  111. }
  112. // External image source (Google Cloud Storage or web URL image location).
  113. message ImageSource {
  114. // **Use `image_uri` instead.**
  115. //
  116. // The Google Cloud Storage URI of the form
  117. // `gs://bucket_name/object_name`. Object versioning is not supported. See
  118. // [Google Cloud Storage Request
  119. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
  120. string gcs_image_uri = 1;
  121. // The URI of the source image. Can be either:
  122. //
  123. // 1. A Google Cloud Storage URI of the form
  124. // `gs://bucket_name/object_name`. Object versioning is not supported. See
  125. // [Google Cloud Storage Request
  126. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  127. // info.
  128. //
  129. // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
  130. // HTTP/HTTPS URLs, Google cannot guarantee that the request will be
  131. // completed. Your request may fail if the specified host denies the
  132. // request (e.g. due to request throttling or DOS prevention), or if Google
  133. // throttles requests to the site for abuse prevention. You should not
  134. // depend on externally-hosted images for production applications.
  135. //
  136. // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
  137. // precedence.
  138. string image_uri = 2;
  139. }
  140. // Client image to perform Google Cloud Vision API tasks over.
  141. message Image {
  142. // Image content, represented as a stream of bytes.
  143. // Note: As with all `bytes` fields, protobuffers use a pure binary
  144. // representation, whereas JSON representations use base64.
  145. bytes content = 1;
  146. // Google Cloud Storage image location, or publicly-accessible image
  147. // URL. If both `content` and `source` are provided for an image, `content`
  148. // takes precedence and is used to perform the image annotation request.
  149. ImageSource source = 2;
  150. }
  151. // A face annotation object contains the results of face detection.
  152. message FaceAnnotation {
  153. // A face-specific landmark (for example, a face feature).
  154. message Landmark {
  155. // Face landmark (feature) type.
  156. // Left and right are defined from the vantage of the viewer of the image
  157. // without considering mirror projections typical of photos. So, `LEFT_EYE`,
  158. // typically, is the person's right eye.
  159. enum Type {
  160. // Unknown face landmark detected. Should not be filled.
  161. UNKNOWN_LANDMARK = 0;
  162. // Left eye.
  163. LEFT_EYE = 1;
  164. // Right eye.
  165. RIGHT_EYE = 2;
  166. // Left of left eyebrow.
  167. LEFT_OF_LEFT_EYEBROW = 3;
  168. // Right of left eyebrow.
  169. RIGHT_OF_LEFT_EYEBROW = 4;
  170. // Left of right eyebrow.
  171. LEFT_OF_RIGHT_EYEBROW = 5;
  172. // Right of right eyebrow.
  173. RIGHT_OF_RIGHT_EYEBROW = 6;
  174. // Midpoint between eyes.
  175. MIDPOINT_BETWEEN_EYES = 7;
  176. // Nose tip.
  177. NOSE_TIP = 8;
  178. // Upper lip.
  179. UPPER_LIP = 9;
  180. // Lower lip.
  181. LOWER_LIP = 10;
  182. // Mouth left.
  183. MOUTH_LEFT = 11;
  184. // Mouth right.
  185. MOUTH_RIGHT = 12;
  186. // Mouth center.
  187. MOUTH_CENTER = 13;
  188. // Nose, bottom right.
  189. NOSE_BOTTOM_RIGHT = 14;
  190. // Nose, bottom left.
  191. NOSE_BOTTOM_LEFT = 15;
  192. // Nose, bottom center.
  193. NOSE_BOTTOM_CENTER = 16;
  194. // Left eye, top boundary.
  195. LEFT_EYE_TOP_BOUNDARY = 17;
  196. // Left eye, right corner.
  197. LEFT_EYE_RIGHT_CORNER = 18;
  198. // Left eye, bottom boundary.
  199. LEFT_EYE_BOTTOM_BOUNDARY = 19;
  200. // Left eye, left corner.
  201. LEFT_EYE_LEFT_CORNER = 20;
  202. // Right eye, top boundary.
  203. RIGHT_EYE_TOP_BOUNDARY = 21;
  204. // Right eye, right corner.
  205. RIGHT_EYE_RIGHT_CORNER = 22;
  206. // Right eye, bottom boundary.
  207. RIGHT_EYE_BOTTOM_BOUNDARY = 23;
  208. // Right eye, left corner.
  209. RIGHT_EYE_LEFT_CORNER = 24;
  210. // Left eyebrow, upper midpoint.
  211. LEFT_EYEBROW_UPPER_MIDPOINT = 25;
  212. // Right eyebrow, upper midpoint.
  213. RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
  214. // Left ear tragion.
  215. LEFT_EAR_TRAGION = 27;
  216. // Right ear tragion.
  217. RIGHT_EAR_TRAGION = 28;
  218. // Left eye pupil.
  219. LEFT_EYE_PUPIL = 29;
  220. // Right eye pupil.
  221. RIGHT_EYE_PUPIL = 30;
  222. // Forehead glabella.
  223. FOREHEAD_GLABELLA = 31;
  224. // Chin gnathion.
  225. CHIN_GNATHION = 32;
  226. // Chin left gonion.
  227. CHIN_LEFT_GONION = 33;
  228. // Chin right gonion.
  229. CHIN_RIGHT_GONION = 34;
  230. }
  231. // Face landmark type.
  232. Type type = 3;
  233. // Face landmark position.
  234. Position position = 4;
  235. }
  236. // The bounding polygon around the face. The coordinates of the bounding box
  237. // are in the original image's scale, as returned in `ImageParams`.
  238. // The bounding box is computed to "frame" the face in accordance with human
  239. // expectations. It is based on the landmarker results.
  240. // Note that one or more x and/or y coordinates may not be generated in the
  241. // `BoundingPoly` (the polygon will be unbounded) if only a partial face
  242. // appears in the image to be annotated.
  243. BoundingPoly bounding_poly = 1;
  244. // The `fd_bounding_poly` bounding polygon is tighter than the
  245. // `boundingPoly`, and encloses only the skin part of the face. Typically, it
  246. // is used to eliminate the face from any image analysis that detects the
  247. // "amount of skin" visible in an image. It is not based on the
  248. // landmarker results, only on the initial face detection, hence
  249. // the <code>fd</code> (face detection) prefix.
  250. BoundingPoly fd_bounding_poly = 2;
  251. // Detected face landmarks.
  252. repeated Landmark landmarks = 3;
  253. // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
  254. // of the face relative to the image vertical about the axis perpendicular to
  255. // the face. Range [-180,180].
  256. float roll_angle = 4;
  257. // Yaw angle, which indicates the leftward/rightward angle that the face is
  258. // pointing relative to the vertical plane perpendicular to the image. Range
  259. // [-180,180].
  260. float pan_angle = 5;
  261. // Pitch angle, which indicates the upwards/downwards angle that the face is
  262. // pointing relative to the image's horizontal plane. Range [-180,180].
  263. float tilt_angle = 6;
  264. // Detection confidence. Range [0, 1].
  265. float detection_confidence = 7;
  266. // Face landmarking confidence. Range [0, 1].
  267. float landmarking_confidence = 8;
  268. // Joy likelihood.
  269. Likelihood joy_likelihood = 9;
  270. // Sorrow likelihood.
  271. Likelihood sorrow_likelihood = 10;
  272. // Anger likelihood.
  273. Likelihood anger_likelihood = 11;
  274. // Surprise likelihood.
  275. Likelihood surprise_likelihood = 12;
  276. // Under-exposed likelihood.
  277. Likelihood under_exposed_likelihood = 13;
  278. // Blurred likelihood.
  279. Likelihood blurred_likelihood = 14;
  280. // Headwear likelihood.
  281. Likelihood headwear_likelihood = 15;
  282. }
  283. // Detected entity location information.
  284. message LocationInfo {
  285. // lat/long location coordinates.
  286. google.type.LatLng lat_lng = 1;
  287. }
  288. // A `Property` consists of a user-supplied name/value pair.
  289. message Property {
  290. // Name of the property.
  291. string name = 1;
  292. // Value of the property.
  293. string value = 2;
  294. // Value of numeric properties.
  295. uint64 uint64_value = 3;
  296. }
  297. // Set of detected entity features.
  298. message EntityAnnotation {
  299. // Opaque entity ID. Some IDs may be available in
  300. // [Google Knowledge Graph Search
  301. // API](https://developers.google.com/knowledge-graph/).
  302. string mid = 1;
  303. // The language code for the locale in which the entity textual
  304. // `description` is expressed.
  305. string locale = 2;
  306. // Entity textual description, expressed in its `locale` language.
  307. string description = 3;
  308. // Overall score of the result. Range [0, 1].
  309. float score = 4;
  310. // **Deprecated. Use `score` instead.**
  311. // The accuracy of the entity detection in an image.
  312. // For example, for an image in which the "Eiffel Tower" entity is detected,
  313. // this field represents the confidence that there is a tower in the query
  314. // image. Range [0, 1].
  315. float confidence = 5;
  316. // The relevancy of the ICA (Image Content Annotation) label to the
  317. // image. For example, the relevancy of "tower" is likely higher to an image
  318. // containing the detected "Eiffel Tower" than to an image containing a
  319. // detected distant towering building, even though the confidence that
  320. // there is a tower in each image may be the same. Range [0, 1].
  321. float topicality = 6;
  322. // Image region to which this entity belongs. Not produced
  323. // for `LABEL_DETECTION` features.
  324. BoundingPoly bounding_poly = 7;
  325. // The location information for the detected entity. Multiple
  326. // `LocationInfo` elements can be present because one location may
  327. // indicate the location of the scene in the image, and another location
  328. // may indicate the location of the place where the image was taken.
  329. // Location information is usually present for landmarks.
  330. repeated LocationInfo locations = 8;
  331. // Some entities may have optional user-supplied `Property` (name/value)
  332. // fields, such a score or string that qualifies the entity.
  333. repeated Property properties = 9;
  334. }
  335. // Set of features pertaining to the image, computed by computer vision
  336. // methods over safe-search verticals (for example, adult, spoof, medical,
  337. // violence).
  338. message SafeSearchAnnotation {
  339. // Represents the adult content likelihood for the image. Adult content may
  340. // contain elements such as nudity, pornographic images or cartoons, or
  341. // sexual activities.
  342. Likelihood adult = 1;
  343. // Spoof likelihood. The likelihood that an modification
  344. // was made to the image's canonical version to make it appear
  345. // funny or offensive.
  346. Likelihood spoof = 2;
  347. // Likelihood that this is a medical image.
  348. Likelihood medical = 3;
  349. // Likelihood that this image contains violent content.
  350. Likelihood violence = 4;
  351. // Likelihood that the request image contains racy content. Racy content may
  352. // include (but is not limited to) skimpy or sheer clothing, strategically
  353. // covered nudity, lewd or provocative poses, or close-ups of sensitive
  354. // body areas.
  355. Likelihood racy = 9;
  356. }
  357. // Rectangle determined by min and max `LatLng` pairs.
  358. message LatLongRect {
  359. // Min lat/long pair.
  360. google.type.LatLng min_lat_lng = 1;
  361. // Max lat/long pair.
  362. google.type.LatLng max_lat_lng = 2;
  363. }
  364. // Color information consists of RGB channels, score, and the fraction of
  365. // the image that the color occupies in the image.
  366. message ColorInfo {
  367. // RGB components of the color.
  368. google.type.Color color = 1;
  369. // Image-specific score for this color. Value in range [0, 1].
  370. float score = 2;
  371. // The fraction of pixels the color occupies in the image.
  372. // Value in range [0, 1].
  373. float pixel_fraction = 3;
  374. }
  375. // Set of dominant colors and their corresponding scores.
  376. message DominantColorsAnnotation {
  377. // RGB color values with their score and pixel fraction.
  378. repeated ColorInfo colors = 1;
  379. }
  380. // Stores image properties, such as dominant colors.
  381. message ImageProperties {
  382. // If present, dominant colors completed successfully.
  383. DominantColorsAnnotation dominant_colors = 1;
  384. }
  385. // Single crop hint that is used to generate a new crop when serving an image.
  386. message CropHint {
  387. // The bounding polygon for the crop region. The coordinates of the bounding
  388. // box are in the original image's scale, as returned in `ImageParams`.
  389. BoundingPoly bounding_poly = 1;
  390. // Confidence of this being a salient region. Range [0, 1].
  391. float confidence = 2;
  392. // Fraction of importance of this salient region with respect to the original
  393. // image.
  394. float importance_fraction = 3;
  395. }
  396. // Set of crop hints that are used to generate new crops when serving images.
  397. message CropHintsAnnotation {
  398. // Crop hint results.
  399. repeated CropHint crop_hints = 1;
  400. }
  401. // Parameters for crop hints annotation request.
  402. message CropHintsParams {
  403. // Aspect ratios in floats, representing the ratio of the width to the height
  404. // of the image. For example, if the desired aspect ratio is 4/3, the
  405. // corresponding float value should be 1.33333. If not specified, the
  406. // best possible crop is returned. The number of provided aspect ratios is
  407. // limited to a maximum of 16; any aspect ratios provided after the 16th are
  408. // ignored.
  409. repeated float aspect_ratios = 1;
  410. }
  411. // Parameters for web detection request.
  412. message WebDetectionParams {
  413. // Whether to include results derived from the geo information in the image.
  414. bool include_geo_results = 2;
  415. }
  416. // Parameters for text detections. This is used to control TEXT_DETECTION and
  417. // DOCUMENT_TEXT_DETECTION features.
  418. message TextDetectionParams {
  419. // By default, Cloud Vision API only includes confidence score for
  420. // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
  421. // score for TEXT_DETECTION as well.
  422. bool enable_text_detection_confidence_score = 9;
  423. // A list of advanced OCR options to fine-tune OCR behavior.
  424. repeated string advanced_ocr_options = 11;
  425. }
  426. // Image context and/or feature-specific parameters.
  427. message ImageContext {
  428. // Not used.
  429. LatLongRect lat_long_rect = 1;
  430. // List of languages to use for TEXT_DETECTION. In most cases, an empty value
  431. // yields the best results since it enables automatic language detection. For
  432. // languages based on the Latin alphabet, setting `language_hints` is not
  433. // needed. In rare cases, when the language of the text in the image is known,
  434. // setting a hint will help get better results (although it will be a
  435. // significant hindrance if the hint is wrong). Text detection returns an
  436. // error if one or more of the specified languages is not one of the
  437. // [supported languages](https://cloud.google.com/vision/docs/languages).
  438. repeated string language_hints = 2;
  439. // Parameters for crop hints annotation request.
  440. CropHintsParams crop_hints_params = 4;
  441. // Parameters for web detection.
  442. WebDetectionParams web_detection_params = 6;
  443. // Parameters for text detection and document text detection.
  444. TextDetectionParams text_detection_params = 12;
  445. }
  446. // Request for performing Google Cloud Vision API tasks over a user-provided
  447. // image, with user-requested features.
  448. message AnnotateImageRequest {
  449. // The image to be processed.
  450. Image image = 1;
  451. // Requested features.
  452. repeated Feature features = 2;
  453. // Additional context that may accompany the image.
  454. ImageContext image_context = 3;
  455. }
  456. // If an image was produced from a file (e.g. a PDF), this message gives
  457. // information about the source of that image.
  458. message ImageAnnotationContext {
  459. // The URI of the file used to produce the image.
  460. string uri = 1;
  461. // If the file was a PDF or TIFF, this field gives the page number within
  462. // the file used to produce the image.
  463. int32 page_number = 2;
  464. }
  465. // Response to an image annotation request.
  466. message AnnotateImageResponse {
  467. // If present, face detection has completed successfully.
  468. repeated FaceAnnotation face_annotations = 1;
  469. // If present, landmark detection has completed successfully.
  470. repeated EntityAnnotation landmark_annotations = 2;
  471. // If present, logo detection has completed successfully.
  472. repeated EntityAnnotation logo_annotations = 3;
  473. // If present, label detection has completed successfully.
  474. repeated EntityAnnotation label_annotations = 4;
  475. // If present, text (OCR) detection has completed successfully.
  476. repeated EntityAnnotation text_annotations = 5;
  477. // If present, text (OCR) detection or document (OCR) text detection has
  478. // completed successfully.
  479. // This annotation provides the structural hierarchy for the OCR detected
  480. // text.
  481. TextAnnotation full_text_annotation = 12;
  482. // If present, safe-search annotation has completed successfully.
  483. SafeSearchAnnotation safe_search_annotation = 6;
  484. // If present, image properties were extracted successfully.
  485. ImageProperties image_properties_annotation = 8;
  486. // If present, crop hints have completed successfully.
  487. CropHintsAnnotation crop_hints_annotation = 11;
  488. // If present, web detection has completed successfully.
  489. WebDetection web_detection = 13;
  490. // If set, represents the error message for the operation.
  491. // Note that filled-in image annotations are guaranteed to be
  492. // correct, even when `error` is set.
  493. google.rpc.Status error = 9;
  494. // If present, contextual information is needed to understand where this image
  495. // comes from.
  496. ImageAnnotationContext context = 21;
  497. }
  498. // Response to a single file annotation request. A file may contain one or more
  499. // images, which individually have their own responses.
  500. message AnnotateFileResponse {
  501. // Information about the file for which this response is generated.
  502. InputConfig input_config = 1;
  503. // Individual responses to images found within the file.
  504. repeated AnnotateImageResponse responses = 2;
  505. }
  506. // Multiple image annotation requests are batched into a single service call.
  507. message BatchAnnotateImagesRequest {
  508. // Required. Individual image annotation requests for this batch.
  509. repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
  510. }
  511. // Response to a batch image annotation request.
  512. message BatchAnnotateImagesResponse {
  513. // Individual responses to image annotation requests within the batch.
  514. repeated AnnotateImageResponse responses = 1;
  515. }
  516. // An offline file annotation request.
  517. message AsyncAnnotateFileRequest {
  518. // Required. Information about the input file.
  519. InputConfig input_config = 1;
  520. // Required. Requested features.
  521. repeated Feature features = 2;
  522. // Additional context that may accompany the image(s) in the file.
  523. ImageContext image_context = 3;
  524. // Required. The desired output location and metadata (e.g. format).
  525. OutputConfig output_config = 4;
  526. }
  527. // The response for a single offline file annotation request.
  528. message AsyncAnnotateFileResponse {
  529. // The output location and metadata from AsyncAnnotateFileRequest.
  530. OutputConfig output_config = 1;
  531. }
  532. // Multiple async file annotation requests are batched into a single service
  533. // call.
  534. message AsyncBatchAnnotateFilesRequest {
  535. // Required. Individual async file annotation requests for this batch.
  536. repeated AsyncAnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
  537. }
  538. // Response to an async batch file annotation request.
  539. message AsyncBatchAnnotateFilesResponse {
  540. // The list of file annotation responses, one for each request in
  541. // AsyncBatchAnnotateFilesRequest.
  542. repeated AsyncAnnotateFileResponse responses = 1;
  543. }
  544. // The desired input location and metadata.
  545. message InputConfig {
  546. // The Google Cloud Storage location to read the input from.
  547. GcsSource gcs_source = 1;
  548. // The type of the file. Currently only "application/pdf" and "image/tiff"
  549. // are supported. Wildcards are not supported.
  550. string mime_type = 2;
  551. }
  552. // The desired output location and metadata.
  553. message OutputConfig {
  554. // The Google Cloud Storage location to write the output(s) to.
  555. GcsDestination gcs_destination = 1;
  556. // The max number of response protos to put into each output JSON file on GCS.
  557. // The valid range is [1, 100]. If not specified, the default value is 20.
  558. //
  559. // For example, for one pdf file with 100 pages, 100 response protos will
  560. // be generated. If `batch_size` = 20, then 5 json files each
  561. // containing 20 response protos will be written under the prefix
  562. // `gcs_destination`.`uri`.
  563. //
  564. // Currently, batch_size only applies to GcsDestination, with potential future
  565. // support for other output configurations.
  566. int32 batch_size = 2;
  567. }
  568. // The Google Cloud Storage location where the input will be read from.
  569. message GcsSource {
  570. // Google Cloud Storage URI for the input file. This must only be a GCS
  571. // object. Wildcards are not currently supported.
  572. string uri = 1;
  573. }
  574. // The Google Cloud Storage location where the output will be written to.
  575. message GcsDestination {
  576. // Google Cloud Storage URI where the results will be stored. Results will
  577. // be in JSON format and preceded by its corresponding input URI. This field
  578. // can either represent a single file, or a prefix for multiple outputs.
  579. // Prefixes must end in a `/`.
  580. //
  581. // Examples:
  582. //
  583. // * File: gs://bucket-name/filename.json
  584. // * Prefix: gs://bucket-name/prefix/here/
  585. // * File: gs://bucket-name/prefix/here
  586. //
  587. // If multiple outputs, each response is still AnnotateFileResponse, each of
  588. // which contains some subset of the full list of AnnotateImageResponse.
  589. // Multiple outputs can happen if, for example, the output JSON is too large
  590. // and overflows into multiple sharded files.
  591. string uri = 1;
  592. }
  593. // Contains metadata for the BatchAnnotateImages operation.
  594. message OperationMetadata {
  595. // Batch operation states.
  596. enum State {
  597. // Invalid.
  598. STATE_UNSPECIFIED = 0;
  599. // Request is received.
  600. CREATED = 1;
  601. // Request is actively being processed.
  602. RUNNING = 2;
  603. // The batch processing is done.
  604. DONE = 3;
  605. // The batch processing was cancelled.
  606. CANCELLED = 4;
  607. }
  608. // Current state of the batch operation.
  609. State state = 1;
  610. // The time when the batch request was received.
  611. google.protobuf.Timestamp create_time = 5;
  612. // The time when the operation result was last updated.
  613. google.protobuf.Timestamp update_time = 6;
  614. }
  615. // A bucketized representation of likelihood, which is intended to give clients
  616. // highly stable results across model upgrades.
  617. enum Likelihood {
  618. // Unknown likelihood.
  619. UNKNOWN = 0;
  620. // It is very unlikely that the image belongs to the specified vertical.
  621. VERY_UNLIKELY = 1;
  622. // It is unlikely that the image belongs to the specified vertical.
  623. UNLIKELY = 2;
  624. // It is possible that the image belongs to the specified vertical.
  625. POSSIBLE = 3;
  626. // It is likely that the image belongs to the specified vertical.
  627. LIKELY = 4;
  628. // It is very likely that the image belongs to the specified vertical.
  629. VERY_LIKELY = 5;
  630. }