image_annotator.proto 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.vision.v1;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/api/field_behavior.proto";
  19. import "google/cloud/vision/v1/geometry.proto";
  20. import "google/cloud/vision/v1/product_search.proto";
  21. import "google/cloud/vision/v1/text_annotation.proto";
  22. import "google/cloud/vision/v1/web_detection.proto";
  23. import "google/longrunning/operations.proto";
  24. import "google/protobuf/timestamp.proto";
  25. import "google/rpc/status.proto";
  26. import "google/type/color.proto";
  27. import "google/type/latlng.proto";
  28. option cc_enable_arenas = true;
  29. option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
  30. option java_multiple_files = true;
  31. option java_outer_classname = "ImageAnnotatorProto";
  32. option java_package = "com.google.cloud.vision.v1";
  33. option objc_class_prefix = "GCVN";
  34. // Service that performs Google Cloud Vision API detection tasks over client
  35. // images, such as face, landmark, logo, label, and text detection. The
  36. // ImageAnnotator service returns detected entities from the images.
  37. service ImageAnnotator {
  38. option (google.api.default_host) = "vision.googleapis.com";
  39. option (google.api.oauth_scopes) =
  40. "https://www.googleapis.com/auth/cloud-platform,"
  41. "https://www.googleapis.com/auth/cloud-vision";
  42. // Run image detection and annotation for a batch of images.
  43. rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
  44. option (google.api.http) = {
  45. post: "/v1/images:annotate"
  46. body: "*"
  47. additional_bindings {
  48. post: "/v1/{parent=projects/*/locations/*}/images:annotate"
  49. body: "*"
  50. }
  51. additional_bindings {
  52. post: "/v1/{parent=projects/*}/images:annotate"
  53. body: "*"
  54. }
  55. };
  56. option (google.api.method_signature) = "requests";
  57. }
  58. // Service that performs image detection and annotation for a batch of files.
  59. // Now only "application/pdf", "image/tiff" and "image/gif" are supported.
  60. //
  61. // This service will extract at most 5 (customers can specify which 5 in
  62. // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each
  63. // file provided and perform detection and annotation for each image
  64. // extracted.
  65. rpc BatchAnnotateFiles(BatchAnnotateFilesRequest) returns (BatchAnnotateFilesResponse) {
  66. option (google.api.http) = {
  67. post: "/v1/files:annotate"
  68. body: "*"
  69. additional_bindings {
  70. post: "/v1/{parent=projects/*/locations/*}/files:annotate"
  71. body: "*"
  72. }
  73. additional_bindings {
  74. post: "/v1/{parent=projects/*}/files:annotate"
  75. body: "*"
  76. }
  77. };
  78. option (google.api.method_signature) = "requests";
  79. }
  80. // Run asynchronous image detection and annotation for a list of images.
  81. //
  82. // Progress and results can be retrieved through the
  83. // `google.longrunning.Operations` interface.
  84. // `Operation.metadata` contains `OperationMetadata` (metadata).
  85. // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
  86. //
  87. // This service will write image annotation outputs to json files in customer
  88. // GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
  89. rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest) returns (google.longrunning.Operation) {
  90. option (google.api.http) = {
  91. post: "/v1/images:asyncBatchAnnotate"
  92. body: "*"
  93. additional_bindings {
  94. post: "/v1/{parent=projects/*/locations/*}/images:asyncBatchAnnotate"
  95. body: "*"
  96. }
  97. additional_bindings {
  98. post: "/v1/{parent=projects/*}/images:asyncBatchAnnotate"
  99. body: "*"
  100. }
  101. };
  102. option (google.api.method_signature) = "requests,output_config";
  103. option (google.longrunning.operation_info) = {
  104. response_type: "AsyncBatchAnnotateImagesResponse"
  105. metadata_type: "OperationMetadata"
  106. };
  107. }
  108. // Run asynchronous image detection and annotation for a list of generic
  109. // files, such as PDF files, which may contain multiple pages and multiple
  110. // images per page. Progress and results can be retrieved through the
  111. // `google.longrunning.Operations` interface.
  112. // `Operation.metadata` contains `OperationMetadata` (metadata).
  113. // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  114. rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) returns (google.longrunning.Operation) {
  115. option (google.api.http) = {
  116. post: "/v1/files:asyncBatchAnnotate"
  117. body: "*"
  118. additional_bindings {
  119. post: "/v1/{parent=projects/*/locations/*}/files:asyncBatchAnnotate"
  120. body: "*"
  121. }
  122. additional_bindings {
  123. post: "/v1/{parent=projects/*}/files:asyncBatchAnnotate"
  124. body: "*"
  125. }
  126. };
  127. option (google.api.method_signature) = "requests";
  128. option (google.longrunning.operation_info) = {
  129. response_type: "AsyncBatchAnnotateFilesResponse"
  130. metadata_type: "OperationMetadata"
  131. };
  132. }
  133. }
  134. // A bucketized representation of likelihood, which is intended to give clients
  135. // highly stable results across model upgrades.
  136. enum Likelihood {
  137. // Unknown likelihood.
  138. UNKNOWN = 0;
  139. // It is very unlikely.
  140. VERY_UNLIKELY = 1;
  141. // It is unlikely.
  142. UNLIKELY = 2;
  143. // It is possible.
  144. POSSIBLE = 3;
  145. // It is likely.
  146. LIKELY = 4;
  147. // It is very likely.
  148. VERY_LIKELY = 5;
  149. }
  150. // The type of Google Cloud Vision API detection to perform, and the maximum
  151. // number of results to return for that type. Multiple `Feature` objects can
  152. // be specified in the `features` list.
  153. message Feature {
  154. // Type of Google Cloud Vision API feature to be extracted.
  155. enum Type {
  156. // Unspecified feature type.
  157. TYPE_UNSPECIFIED = 0;
  158. // Run face detection.
  159. FACE_DETECTION = 1;
  160. // Run landmark detection.
  161. LANDMARK_DETECTION = 2;
  162. // Run logo detection.
  163. LOGO_DETECTION = 3;
  164. // Run label detection.
  165. LABEL_DETECTION = 4;
  166. // Run text detection / optical character recognition (OCR). Text detection
  167. // is optimized for areas of text within a larger image; if the image is
  168. // a document, use `DOCUMENT_TEXT_DETECTION` instead.
  169. TEXT_DETECTION = 5;
  170. // Run dense text document OCR. Takes precedence when both
  171. // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
  172. DOCUMENT_TEXT_DETECTION = 11;
  173. // Run Safe Search to detect potentially unsafe
  174. // or undesirable content.
  175. SAFE_SEARCH_DETECTION = 6;
  176. // Compute a set of image properties, such as the
  177. // image's dominant colors.
  178. IMAGE_PROPERTIES = 7;
  179. // Run crop hints.
  180. CROP_HINTS = 9;
  181. // Run web detection.
  182. WEB_DETECTION = 10;
  183. // Run Product Search.
  184. PRODUCT_SEARCH = 12;
  185. // Run localizer for object detection.
  186. OBJECT_LOCALIZATION = 19;
  187. }
  188. // The feature type.
  189. Type type = 1;
  190. // Maximum number of results of this type. Does not apply to
  191. // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
  192. int32 max_results = 2;
  193. // Model to use for the feature.
  194. // Supported values: "builtin/stable" (the default if unset) and
  195. // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
  196. // support "builtin/weekly" for the bleeding edge release updated weekly.
  197. string model = 3;
  198. }
  199. // External image source (Google Cloud Storage or web URL image location).
  200. message ImageSource {
  201. // **Use `image_uri` instead.**
  202. //
  203. // The Google Cloud Storage URI of the form
  204. // `gs://bucket_name/object_name`. Object versioning is not supported. See
  205. // [Google Cloud Storage Request
  206. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
  207. string gcs_image_uri = 1;
  208. // The URI of the source image. Can be either:
  209. //
  210. // 1. A Google Cloud Storage URI of the form
  211. // `gs://bucket_name/object_name`. Object versioning is not supported. See
  212. // [Google Cloud Storage Request
  213. // URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  214. // info.
  215. //
  216. // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
  217. // HTTP/HTTPS URLs, Google cannot guarantee that the request will be
  218. // completed. Your request may fail if the specified host denies the
  219. // request (e.g. due to request throttling or DOS prevention), or if Google
  220. // throttles requests to the site for abuse prevention. You should not
  221. // depend on externally-hosted images for production applications.
  222. //
  223. // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
  224. // precedence.
  225. string image_uri = 2;
  226. }
  227. // Client image to perform Google Cloud Vision API tasks over.
  228. message Image {
  229. // Image content, represented as a stream of bytes.
  230. // Note: As with all `bytes` fields, protobuffers use a pure binary
  231. // representation, whereas JSON representations use base64.
  232. //
  233. // Currently, this field only works for BatchAnnotateImages requests. It does
  234. // not work for AsyncBatchAnnotateImages requests.
  235. bytes content = 1;
  236. // Google Cloud Storage image location, or publicly-accessible image
  237. // URL. If both `content` and `source` are provided for an image, `content`
  238. // takes precedence and is used to perform the image annotation request.
  239. ImageSource source = 2;
  240. }
  241. // A face annotation object contains the results of face detection.
  242. message FaceAnnotation {
  243. // A face-specific landmark (for example, a face feature).
  244. message Landmark {
  245. // Face landmark (feature) type.
  246. // Left and right are defined from the vantage of the viewer of the image
  247. // without considering mirror projections typical of photos. So, `LEFT_EYE`,
  248. // typically, is the person's right eye.
  249. enum Type {
  250. // Unknown face landmark detected. Should not be filled.
  251. UNKNOWN_LANDMARK = 0;
  252. // Left eye.
  253. LEFT_EYE = 1;
  254. // Right eye.
  255. RIGHT_EYE = 2;
  256. // Left of left eyebrow.
  257. LEFT_OF_LEFT_EYEBROW = 3;
  258. // Right of left eyebrow.
  259. RIGHT_OF_LEFT_EYEBROW = 4;
  260. // Left of right eyebrow.
  261. LEFT_OF_RIGHT_EYEBROW = 5;
  262. // Right of right eyebrow.
  263. RIGHT_OF_RIGHT_EYEBROW = 6;
  264. // Midpoint between eyes.
  265. MIDPOINT_BETWEEN_EYES = 7;
  266. // Nose tip.
  267. NOSE_TIP = 8;
  268. // Upper lip.
  269. UPPER_LIP = 9;
  270. // Lower lip.
  271. LOWER_LIP = 10;
  272. // Mouth left.
  273. MOUTH_LEFT = 11;
  274. // Mouth right.
  275. MOUTH_RIGHT = 12;
  276. // Mouth center.
  277. MOUTH_CENTER = 13;
  278. // Nose, bottom right.
  279. NOSE_BOTTOM_RIGHT = 14;
  280. // Nose, bottom left.
  281. NOSE_BOTTOM_LEFT = 15;
  282. // Nose, bottom center.
  283. NOSE_BOTTOM_CENTER = 16;
  284. // Left eye, top boundary.
  285. LEFT_EYE_TOP_BOUNDARY = 17;
  286. // Left eye, right corner.
  287. LEFT_EYE_RIGHT_CORNER = 18;
  288. // Left eye, bottom boundary.
  289. LEFT_EYE_BOTTOM_BOUNDARY = 19;
  290. // Left eye, left corner.
  291. LEFT_EYE_LEFT_CORNER = 20;
  292. // Right eye, top boundary.
  293. RIGHT_EYE_TOP_BOUNDARY = 21;
  294. // Right eye, right corner.
  295. RIGHT_EYE_RIGHT_CORNER = 22;
  296. // Right eye, bottom boundary.
  297. RIGHT_EYE_BOTTOM_BOUNDARY = 23;
  298. // Right eye, left corner.
  299. RIGHT_EYE_LEFT_CORNER = 24;
  300. // Left eyebrow, upper midpoint.
  301. LEFT_EYEBROW_UPPER_MIDPOINT = 25;
  302. // Right eyebrow, upper midpoint.
  303. RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
  304. // Left ear tragion.
  305. LEFT_EAR_TRAGION = 27;
  306. // Right ear tragion.
  307. RIGHT_EAR_TRAGION = 28;
  308. // Left eye pupil.
  309. LEFT_EYE_PUPIL = 29;
  310. // Right eye pupil.
  311. RIGHT_EYE_PUPIL = 30;
  312. // Forehead glabella.
  313. FOREHEAD_GLABELLA = 31;
  314. // Chin gnathion.
  315. CHIN_GNATHION = 32;
  316. // Chin left gonion.
  317. CHIN_LEFT_GONION = 33;
  318. // Chin right gonion.
  319. CHIN_RIGHT_GONION = 34;
  320. // Left cheek center.
  321. LEFT_CHEEK_CENTER = 35;
  322. // Right cheek center.
  323. RIGHT_CHEEK_CENTER = 36;
  324. }
  325. // Face landmark type.
  326. Type type = 3;
  327. // Face landmark position.
  328. Position position = 4;
  329. }
  330. // The bounding polygon around the face. The coordinates of the bounding box
  331. // are in the original image's scale.
  332. // The bounding box is computed to "frame" the face in accordance with human
  333. // expectations. It is based on the landmarker results.
  334. // Note that one or more x and/or y coordinates may not be generated in the
  335. // `BoundingPoly` (the polygon will be unbounded) if only a partial face
  336. // appears in the image to be annotated.
  337. BoundingPoly bounding_poly = 1;
  338. // The `fd_bounding_poly` bounding polygon is tighter than the
  339. // `boundingPoly`, and encloses only the skin part of the face. Typically, it
  340. // is used to eliminate the face from any image analysis that detects the
  341. // "amount of skin" visible in an image. It is not based on the
  342. // landmarker results, only on the initial face detection, hence
  343. // the <code>fd</code> (face detection) prefix.
  344. BoundingPoly fd_bounding_poly = 2;
  345. // Detected face landmarks.
  346. repeated Landmark landmarks = 3;
  347. // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
  348. // of the face relative to the image vertical about the axis perpendicular to
  349. // the face. Range [-180,180].
  350. float roll_angle = 4;
  351. // Yaw angle, which indicates the leftward/rightward angle that the face is
  352. // pointing relative to the vertical plane perpendicular to the image. Range
  353. // [-180,180].
  354. float pan_angle = 5;
  355. // Pitch angle, which indicates the upwards/downwards angle that the face is
  356. // pointing relative to the image's horizontal plane. Range [-180,180].
  357. float tilt_angle = 6;
  358. // Detection confidence. Range [0, 1].
  359. float detection_confidence = 7;
  360. // Face landmarking confidence. Range [0, 1].
  361. float landmarking_confidence = 8;
  362. // Joy likelihood.
  363. Likelihood joy_likelihood = 9;
  364. // Sorrow likelihood.
  365. Likelihood sorrow_likelihood = 10;
  366. // Anger likelihood.
  367. Likelihood anger_likelihood = 11;
  368. // Surprise likelihood.
  369. Likelihood surprise_likelihood = 12;
  370. // Under-exposed likelihood.
  371. Likelihood under_exposed_likelihood = 13;
  372. // Blurred likelihood.
  373. Likelihood blurred_likelihood = 14;
  374. // Headwear likelihood.
  375. Likelihood headwear_likelihood = 15;
  376. }
  377. // Detected entity location information.
  378. message LocationInfo {
  379. // lat/long location coordinates.
  380. google.type.LatLng lat_lng = 1;
  381. }
  382. // A `Property` consists of a user-supplied name/value pair.
  383. message Property {
  384. // Name of the property.
  385. string name = 1;
  386. // Value of the property.
  387. string value = 2;
  388. // Value of numeric properties.
  389. uint64 uint64_value = 3;
  390. }
  391. // Set of detected entity features.
  392. message EntityAnnotation {
  393. // Opaque entity ID. Some IDs may be available in
  394. // [Google Knowledge Graph Search
  395. // API](https://developers.google.com/knowledge-graph/).
  396. string mid = 1;
  397. // The language code for the locale in which the entity textual
  398. // `description` is expressed.
  399. string locale = 2;
  400. // Entity textual description, expressed in its `locale` language.
  401. string description = 3;
  402. // Overall score of the result. Range [0, 1].
  403. float score = 4;
  404. // **Deprecated. Use `score` instead.**
  405. // The accuracy of the entity detection in an image.
  406. // For example, for an image in which the "Eiffel Tower" entity is detected,
  407. // this field represents the confidence that there is a tower in the query
  408. // image. Range [0, 1].
  409. float confidence = 5 [deprecated = true];
  410. // The relevancy of the ICA (Image Content Annotation) label to the
  411. // image. For example, the relevancy of "tower" is likely higher to an image
  412. // containing the detected "Eiffel Tower" than to an image containing a
  413. // detected distant towering building, even though the confidence that
  414. // there is a tower in each image may be the same. Range [0, 1].
  415. float topicality = 6;
  416. // Image region to which this entity belongs. Not produced
  417. // for `LABEL_DETECTION` features.
  418. BoundingPoly bounding_poly = 7;
  419. // The location information for the detected entity. Multiple
  420. // `LocationInfo` elements can be present because one location may
  421. // indicate the location of the scene in the image, and another location
  422. // may indicate the location of the place where the image was taken.
  423. // Location information is usually present for landmarks.
  424. repeated LocationInfo locations = 8;
  425. // Some entities may have optional user-supplied `Property` (name/value)
  426. // fields, such a score or string that qualifies the entity.
  427. repeated Property properties = 9;
  428. }
  429. // Set of detected objects with bounding boxes.
  430. message LocalizedObjectAnnotation {
  431. // Object ID that should align with EntityAnnotation mid.
  432. string mid = 1;
  433. // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  434. // information, see
  435. // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  436. string language_code = 2;
  437. // Object name, expressed in its `language_code` language.
  438. string name = 3;
  439. // Score of the result. Range [0, 1].
  440. float score = 4;
  441. // Image region to which this object belongs. This must be populated.
  442. BoundingPoly bounding_poly = 5;
  443. }
  444. // Set of features pertaining to the image, computed by computer vision
  445. // methods over safe-search verticals (for example, adult, spoof, medical,
  446. // violence).
  447. message SafeSearchAnnotation {
  448. // Represents the adult content likelihood for the image. Adult content may
  449. // contain elements such as nudity, pornographic images or cartoons, or
  450. // sexual activities.
  451. Likelihood adult = 1;
  452. // Spoof likelihood. The likelihood that an modification
  453. // was made to the image's canonical version to make it appear
  454. // funny or offensive.
  455. Likelihood spoof = 2;
  456. // Likelihood that this is a medical image.
  457. Likelihood medical = 3;
  458. // Likelihood that this image contains violent content.
  459. Likelihood violence = 4;
  460. // Likelihood that the request image contains racy content. Racy content may
  461. // include (but is not limited to) skimpy or sheer clothing, strategically
  462. // covered nudity, lewd or provocative poses, or close-ups of sensitive
  463. // body areas.
  464. Likelihood racy = 9;
  465. }
  466. // Rectangle determined by min and max `LatLng` pairs.
  467. message LatLongRect {
  468. // Min lat/long pair.
  469. google.type.LatLng min_lat_lng = 1;
  470. // Max lat/long pair.
  471. google.type.LatLng max_lat_lng = 2;
  472. }
  473. // Color information consists of RGB channels, score, and the fraction of
  474. // the image that the color occupies in the image.
  475. message ColorInfo {
  476. // RGB components of the color.
  477. google.type.Color color = 1;
  478. // Image-specific score for this color. Value in range [0, 1].
  479. float score = 2;
  480. // The fraction of pixels the color occupies in the image.
  481. // Value in range [0, 1].
  482. float pixel_fraction = 3;
  483. }
  484. // Set of dominant colors and their corresponding scores.
  485. message DominantColorsAnnotation {
  486. // RGB color values with their score and pixel fraction.
  487. repeated ColorInfo colors = 1;
  488. }
  489. // Stores image properties, such as dominant colors.
  490. message ImageProperties {
  491. // If present, dominant colors completed successfully.
  492. DominantColorsAnnotation dominant_colors = 1;
  493. }
  494. // Single crop hint that is used to generate a new crop when serving an image.
  495. message CropHint {
  496. // The bounding polygon for the crop region. The coordinates of the bounding
  497. // box are in the original image's scale.
  498. BoundingPoly bounding_poly = 1;
  499. // Confidence of this being a salient region. Range [0, 1].
  500. float confidence = 2;
  501. // Fraction of importance of this salient region with respect to the original
  502. // image.
  503. float importance_fraction = 3;
  504. }
  505. // Set of crop hints that are used to generate new crops when serving images.
  506. message CropHintsAnnotation {
  507. // Crop hint results.
  508. repeated CropHint crop_hints = 1;
  509. }
  510. // Parameters for crop hints annotation request.
  511. message CropHintsParams {
  512. // Aspect ratios in floats, representing the ratio of the width to the height
  513. // of the image. For example, if the desired aspect ratio is 4/3, the
  514. // corresponding float value should be 1.33333. If not specified, the
  515. // best possible crop is returned. The number of provided aspect ratios is
  516. // limited to a maximum of 16; any aspect ratios provided after the 16th are
  517. // ignored.
  518. repeated float aspect_ratios = 1;
  519. }
  520. // Parameters for web detection request.
  521. message WebDetectionParams {
  522. // Whether to include results derived from the geo information in the image.
  523. bool include_geo_results = 2;
  524. }
  525. // Parameters for text detections. This is used to control TEXT_DETECTION and
  526. // DOCUMENT_TEXT_DETECTION features.
  527. message TextDetectionParams {
  528. // By default, Cloud Vision API only includes confidence score for
  529. // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
  530. // score for TEXT_DETECTION as well.
  531. bool enable_text_detection_confidence_score = 9;
  532. // A list of advanced OCR options to fine-tune OCR behavior.
  533. repeated string advanced_ocr_options = 11;
  534. }
  535. // Image context and/or feature-specific parameters.
  536. message ImageContext {
  537. // Not used.
  538. LatLongRect lat_long_rect = 1;
  539. // List of languages to use for TEXT_DETECTION. In most cases, an empty value
  540. // yields the best results since it enables automatic language detection. For
  541. // languages based on the Latin alphabet, setting `language_hints` is not
  542. // needed. In rare cases, when the language of the text in the image is known,
  543. // setting a hint will help get better results (although it will be a
  544. // significant hindrance if the hint is wrong). Text detection returns an
  545. // error if one or more of the specified languages is not one of the
  546. // [supported languages](https://cloud.google.com/vision/docs/languages).
  547. repeated string language_hints = 2;
  548. // Parameters for crop hints annotation request.
  549. CropHintsParams crop_hints_params = 4;
  550. // Parameters for product search.
  551. ProductSearchParams product_search_params = 5;
  552. // Parameters for web detection.
  553. WebDetectionParams web_detection_params = 6;
  554. // Parameters for text detection and document text detection.
  555. TextDetectionParams text_detection_params = 12;
  556. }
  557. // Request for performing Google Cloud Vision API tasks over a user-provided
  558. // image, with user-requested features, and with context information.
  559. message AnnotateImageRequest {
  560. // The image to be processed.
  561. Image image = 1;
  562. // Requested features.
  563. repeated Feature features = 2;
  564. // Additional context that may accompany the image.
  565. ImageContext image_context = 3;
  566. }
  567. // If an image was produced from a file (e.g. a PDF), this message gives
  568. // information about the source of that image.
  569. message ImageAnnotationContext {
  570. // The URI of the file used to produce the image.
  571. string uri = 1;
  572. // If the file was a PDF or TIFF, this field gives the page number within
  573. // the file used to produce the image.
  574. int32 page_number = 2;
  575. }
  576. // Response to an image annotation request.
  577. message AnnotateImageResponse {
  578. // If present, face detection has completed successfully.
  579. repeated FaceAnnotation face_annotations = 1;
  580. // If present, landmark detection has completed successfully.
  581. repeated EntityAnnotation landmark_annotations = 2;
  582. // If present, logo detection has completed successfully.
  583. repeated EntityAnnotation logo_annotations = 3;
  584. // If present, label detection has completed successfully.
  585. repeated EntityAnnotation label_annotations = 4;
  586. // If present, localized object detection has completed successfully.
  587. // This will be sorted descending by confidence score.
  588. repeated LocalizedObjectAnnotation localized_object_annotations = 22;
  589. // If present, text (OCR) detection has completed successfully.
  590. repeated EntityAnnotation text_annotations = 5;
  591. // If present, text (OCR) detection or document (OCR) text detection has
  592. // completed successfully.
  593. // This annotation provides the structural hierarchy for the OCR detected
  594. // text.
  595. TextAnnotation full_text_annotation = 12;
  596. // If present, safe-search annotation has completed successfully.
  597. SafeSearchAnnotation safe_search_annotation = 6;
  598. // If present, image properties were extracted successfully.
  599. ImageProperties image_properties_annotation = 8;
  600. // If present, crop hints have completed successfully.
  601. CropHintsAnnotation crop_hints_annotation = 11;
  602. // If present, web detection has completed successfully.
  603. WebDetection web_detection = 13;
  604. // If present, product search has completed successfully.
  605. ProductSearchResults product_search_results = 14;
  606. // If set, represents the error message for the operation.
  607. // Note that filled-in image annotations are guaranteed to be
  608. // correct, even when `error` is set.
  609. google.rpc.Status error = 9;
  610. // If present, contextual information is needed to understand where this image
  611. // comes from.
  612. ImageAnnotationContext context = 21;
  613. }
  614. // Multiple image annotation requests are batched into a single service call.
  615. message BatchAnnotateImagesRequest {
  616. // Required. Individual image annotation requests for this batch.
  617. repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
  618. // Optional. Target project and location to make a call.
  619. //
  620. // Format: `projects/{project-id}/locations/{location-id}`.
  621. //
  622. // If no parent is specified, a region will be chosen automatically.
  623. //
  624. // Supported location-ids:
  625. // `us`: USA country only,
  626. // `asia`: East asia areas, like Japan, Taiwan,
  627. // `eu`: The European Union.
  628. //
  629. // Example: `projects/project-A/locations/eu`.
  630. string parent = 4;
  631. }
  632. // Response to a batch image annotation request.
  633. message BatchAnnotateImagesResponse {
  634. // Individual responses to image annotation requests within the batch.
  635. repeated AnnotateImageResponse responses = 1;
  636. }
  637. // A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
  638. message AnnotateFileRequest {
  639. // Required. Information about the input file.
  640. InputConfig input_config = 1;
  641. // Required. Requested features.
  642. repeated Feature features = 2;
  643. // Additional context that may accompany the image(s) in the file.
  644. ImageContext image_context = 3;
  645. // Pages of the file to perform image annotation.
  646. //
  647. // Pages starts from 1, we assume the first page of the file is page 1.
  648. // At most 5 pages are supported per request. Pages can be negative.
  649. //
  650. // Page 1 means the first page.
  651. // Page 2 means the second page.
  652. // Page -1 means the last page.
  653. // Page -2 means the second to the last page.
  654. //
  655. // If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
  656. //
  657. // If this field is empty, by default the service performs image annotation
  658. // for the first 5 pages of the file.
  659. repeated int32 pages = 4;
  660. }
  661. // Response to a single file annotation request. A file may contain one or more
  662. // images, which individually have their own responses.
  663. message AnnotateFileResponse {
  664. // Information about the file for which this response is generated.
  665. InputConfig input_config = 1;
  666. // Individual responses to images found within the file. This field will be
  667. // empty if the `error` field is set.
  668. repeated AnnotateImageResponse responses = 2;
  669. // This field gives the total number of pages in the file.
  670. int32 total_pages = 3;
  671. // If set, represents the error message for the failed request. The
  672. // `responses` field will not be set in this case.
  673. google.rpc.Status error = 4;
  674. }
  675. // A list of requests to annotate files using the BatchAnnotateFiles API.
  676. message BatchAnnotateFilesRequest {
  677. // Required. The list of file annotation requests. Right now we support only one
  678. // AnnotateFileRequest in BatchAnnotateFilesRequest.
  679. repeated AnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
  680. // Optional. Target project and location to make a call.
  681. //
  682. // Format: `projects/{project-id}/locations/{location-id}`.
  683. //
  684. // If no parent is specified, a region will be chosen automatically.
  685. //
  686. // Supported location-ids:
  687. // `us`: USA country only,
  688. // `asia`: East asia areas, like Japan, Taiwan,
  689. // `eu`: The European Union.
  690. //
  691. // Example: `projects/project-A/locations/eu`.
  692. string parent = 3;
  693. }
  694. // A list of file annotation responses.
  695. message BatchAnnotateFilesResponse {
  696. // The list of file annotation responses, each response corresponding to each
  697. // AnnotateFileRequest in BatchAnnotateFilesRequest.
  698. repeated AnnotateFileResponse responses = 1;
  699. }
  700. // An offline file annotation request.
  701. message AsyncAnnotateFileRequest {
  702. // Required. Information about the input file.
  703. InputConfig input_config = 1;
  704. // Required. Requested features.
  705. repeated Feature features = 2;
  706. // Additional context that may accompany the image(s) in the file.
  707. ImageContext image_context = 3;
  708. // Required. The desired output location and metadata (e.g. format).
  709. OutputConfig output_config = 4;
  710. }
  711. // The response for a single offline file annotation request.
  712. message AsyncAnnotateFileResponse {
  713. // The output location and metadata from AsyncAnnotateFileRequest.
  714. OutputConfig output_config = 1;
  715. }
  716. // Request for async image annotation for a list of images.
  717. message AsyncBatchAnnotateImagesRequest {
  718. // Required. Individual image annotation requests for this batch.
  719. repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
  720. // Required. The desired output location and metadata (e.g. format).
  721. OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
  722. // Optional. Target project and location to make a call.
  723. //
  724. // Format: `projects/{project-id}/locations/{location-id}`.
  725. //
  726. // If no parent is specified, a region will be chosen automatically.
  727. //
  728. // Supported location-ids:
  729. // `us`: USA country only,
  730. // `asia`: East asia areas, like Japan, Taiwan,
  731. // `eu`: The European Union.
  732. //
  733. // Example: `projects/project-A/locations/eu`.
  734. string parent = 4;
  735. }
  736. // Response to an async batch image annotation request.
  737. message AsyncBatchAnnotateImagesResponse {
  738. // The output location and metadata from AsyncBatchAnnotateImagesRequest.
  739. OutputConfig output_config = 1;
  740. }
  741. // Multiple async file annotation requests are batched into a single service
  742. // call.
  743. message AsyncBatchAnnotateFilesRequest {
  744. // Required. Individual async file annotation requests for this batch.
  745. repeated AsyncAnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
  746. // Optional. Target project and location to make a call.
  747. //
  748. // Format: `projects/{project-id}/locations/{location-id}`.
  749. //
  750. // If no parent is specified, a region will be chosen automatically.
  751. //
  752. // Supported location-ids:
  753. // `us`: USA country only,
  754. // `asia`: East asia areas, like Japan, Taiwan,
  755. // `eu`: The European Union.
  756. //
  757. // Example: `projects/project-A/locations/eu`.
  758. string parent = 4;
  759. }
  760. // Response to an async batch file annotation request.
  761. message AsyncBatchAnnotateFilesResponse {
  762. // The list of file annotation responses, one for each request in
  763. // AsyncBatchAnnotateFilesRequest.
  764. repeated AsyncAnnotateFileResponse responses = 1;
  765. }
  766. // The desired input location and metadata.
  767. message InputConfig {
  768. // The Google Cloud Storage location to read the input from.
  769. GcsSource gcs_source = 1;
  770. // File content, represented as a stream of bytes.
  771. // Note: As with all `bytes` fields, protobuffers use a pure binary
  772. // representation, whereas JSON representations use base64.
  773. //
  774. // Currently, this field only works for BatchAnnotateFiles requests. It does
  775. // not work for AsyncBatchAnnotateFiles requests.
  776. bytes content = 3;
  777. // The type of the file. Currently only "application/pdf", "image/tiff" and
  778. // "image/gif" are supported. Wildcards are not supported.
  779. string mime_type = 2;
  780. }
  781. // The desired output location and metadata.
  782. message OutputConfig {
  783. // The Google Cloud Storage location to write the output(s) to.
  784. GcsDestination gcs_destination = 1;
  785. // The max number of response protos to put into each output JSON file on
  786. // Google Cloud Storage.
  787. // The valid range is [1, 100]. If not specified, the default value is 20.
  788. //
  789. // For example, for one pdf file with 100 pages, 100 response protos will
  790. // be generated. If `batch_size` = 20, then 5 json files each
  791. // containing 20 response protos will be written under the prefix
  792. // `gcs_destination`.`uri`.
  793. //
  794. // Currently, batch_size only applies to GcsDestination, with potential future
  795. // support for other output configurations.
  796. int32 batch_size = 2;
  797. }
  798. // The Google Cloud Storage location where the input will be read from.
  799. message GcsSource {
  800. // Google Cloud Storage URI for the input file. This must only be a
  801. // Google Cloud Storage object. Wildcards are not currently supported.
  802. string uri = 1;
  803. }
  804. // The Google Cloud Storage location where the output will be written to.
  805. message GcsDestination {
  806. // Google Cloud Storage URI prefix where the results will be stored. Results
  807. // will be in JSON format and preceded by its corresponding input URI prefix.
  808. // This field can either represent a gcs file prefix or gcs directory. In
  809. // either case, the uri should be unique because in order to get all of the
  810. // output files, you will need to do a wildcard gcs search on the uri prefix
  811. // you provide.
  812. //
  813. // Examples:
  814. //
  815. // * File Prefix: gs://bucket-name/here/filenameprefix The output files
  816. // will be created in gs://bucket-name/here/ and the names of the
  817. // output files will begin with "filenameprefix".
  818. //
  819. // * Directory Prefix: gs://bucket-name/some/location/ The output files
  820. // will be created in gs://bucket-name/some/location/ and the names of the
  821. // output files could be anything because there was no filename prefix
  822. // specified.
  823. //
  824. // If multiple outputs, each response is still AnnotateFileResponse, each of
  825. // which contains some subset of the full list of AnnotateImageResponse.
  826. // Multiple outputs can happen if, for example, the output JSON is too large
  827. // and overflows into multiple sharded files.
  828. string uri = 1;
  829. }
  830. // Contains metadata for the BatchAnnotateImages operation.
  831. message OperationMetadata {
  832. // Batch operation states.
  833. enum State {
  834. // Invalid.
  835. STATE_UNSPECIFIED = 0;
  836. // Request is received.
  837. CREATED = 1;
  838. // Request is actively being processed.
  839. RUNNING = 2;
  840. // The batch processing is done.
  841. DONE = 3;
  842. // The batch processing was cancelled.
  843. CANCELLED = 4;
  844. }
  845. // Current state of the batch operation.
  846. State state = 1;
  847. // The time when the batch request was received.
  848. google.protobuf.Timestamp create_time = 5;
  849. // The time when the operation result was last updated.
  850. google.protobuf.Timestamp update_time = 6;
  851. }