annotations.proto 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.visionai.v1alpha1;
  16. import "google/protobuf/struct.proto";
  17. import "google/protobuf/timestamp.proto";
  18. option csharp_namespace = "Google.Cloud.VisionAI.V1Alpha1";
  19. option go_package = "google.golang.org/genproto/googleapis/cloud/visionai/v1alpha1;visionai";
  20. option java_multiple_files = true;
  21. option java_outer_classname = "AnnotationsProto";
  22. option java_package = "com.google.cloud.visionai.v1alpha1";
  23. option php_namespace = "Google\\Cloud\\VisionAI\\V1alpha1";
  24. option ruby_package = "Google::Cloud::VisionAI::V1alpha1";
  25. // Enum describing all possible types of a stream annotation.
  26. enum StreamAnnotationType {
  27. // Type UNSPECIFIED.
  28. STREAM_ANNOTATION_TYPE_UNSPECIFIED = 0;
  29. // active_zone annotation defines a polygon on top of the content from an
  30. // image/video based stream, following processing will only focus on the
  31. // content inside the active zone.
  32. STREAM_ANNOTATION_TYPE_ACTIVE_ZONE = 1;
  33. // crossing_line annotation defines a polyline on top of the content from an
  34. // image/video based Vision AI stream, events happening across the line will
  35. // be captured. For example, the counts of people who goes acroos the line
  36. // in Occupancy Analytic Processor.
  37. STREAM_ANNOTATION_TYPE_CROSSING_LINE = 2;
  38. }
  39. // Output format for Personal Protective Equipment Detection Operator.
  40. message PersonalProtectiveEquipmentDetectionOutput {
  41. // The entity info for annotations from person detection prediction result.
  42. message PersonEntity {
  43. // Entity id.
  44. int64 person_entity_id = 1;
  45. }
  46. // The entity info for annotations from PPE detection prediction result.
  47. message PPEEntity {
  48. // Label id.
  49. int64 ppe_label_id = 1;
  50. // Human readable string of the label (Examples: helmet, glove, mask).
  51. string ppe_label_string = 2;
  52. // Human readable string of the super category label (Examples: head_cover,
  53. // hands_cover, face_cover).
  54. string ppe_supercategory_label_string = 3;
  55. // Entity id.
  56. int64 ppe_entity_id = 4;
  57. }
  58. // Bounding Box in the normalized coordinates.
  59. message NormalizedBoundingBox {
  60. // Min in x coordinate.
  61. float xmin = 1;
  62. // Min in y coordinate.
  63. float ymin = 2;
  64. // Width of the bounding box.
  65. float width = 3;
  66. // Height of the bounding box.
  67. float height = 4;
  68. }
  69. // PersonIdentified box contains the location and the entity info of the
  70. // person.
  71. message PersonIdentifiedBox {
  72. // An unique id for this box.
  73. int64 box_id = 1;
  74. // Bounding Box in the normalized coordinates.
  75. NormalizedBoundingBox normalized_bounding_box = 2;
  76. // Confidence score associated with this box.
  77. float confidence_score = 3;
  78. // Person entity info.
  79. PersonEntity person_entity = 4;
  80. }
  81. // PPEIdentified box contains the location and the entity info of the PPE.
  82. message PPEIdentifiedBox {
  83. // An unique id for this box.
  84. int64 box_id = 1;
  85. // Bounding Box in the normalized coordinates.
  86. NormalizedBoundingBox normalized_bounding_box = 2;
  87. // Confidence score associated with this box.
  88. float confidence_score = 3;
  89. // PPE entity info.
  90. PPEEntity ppe_entity = 4;
  91. }
  92. // Detected Person contains the detected person and their associated
  93. // ppes and their protecting information.
  94. message DetectedPerson {
  95. // The id of detected person.
  96. int64 person_id = 1;
  97. // The info of detected person identified box.
  98. PersonIdentifiedBox detected_person_identified_box = 2;
  99. // The info of detected person associated ppe identified boxes.
  100. repeated PPEIdentifiedBox detected_ppe_identified_boxes = 3;
  101. // Coverage score for each body part.
  102. // Coverage score for face.
  103. optional float face_coverage_score = 4;
  104. // Coverage score for eyes.
  105. optional float eyes_coverage_score = 5;
  106. // Coverage score for head.
  107. optional float head_coverage_score = 6;
  108. // Coverage score for hands.
  109. optional float hands_coverage_score = 7;
  110. // Coverage score for body.
  111. optional float body_coverage_score = 8;
  112. // Coverage score for feet.
  113. optional float feet_coverage_score = 9;
  114. }
  115. // Current timestamp.
  116. google.protobuf.Timestamp current_time = 1;
  117. // A list of DetectedPersons.
  118. repeated DetectedPerson detected_persons = 2;
  119. }
  120. // Prediction output format for Generic Object Detection.
  121. message ObjectDetectionPredictionResult {
  122. // The entity info for annotations from object detection prediction result.
  123. message Entity {
  124. // Label id.
  125. int64 label_id = 1;
  126. // Human readable string of the label.
  127. string label_string = 2;
  128. }
  129. // Identified box contains location and the entity of the object.
  130. message IdentifiedBox {
  131. // Bounding Box in the normalized coordinates.
  132. message NormalizedBoundingBox {
  133. // Min in x coordinate.
  134. float xmin = 1;
  135. // Min in y coordinate.
  136. float ymin = 2;
  137. // Width of the bounding box.
  138. float width = 3;
  139. // Height of the bounding box.
  140. float height = 4;
  141. }
  142. // An unique id for this box.
  143. int64 box_id = 1;
  144. // Bounding Box in the normalized coordinates.
  145. NormalizedBoundingBox normalized_bounding_box = 2;
  146. // Confidence score associated with this box.
  147. float confidence_score = 3;
  148. // Entity of this box.
  149. Entity entity = 4;
  150. }
  151. // Current timestamp.
  152. google.protobuf.Timestamp current_time = 1;
  153. // A list of identified boxes.
  154. repeated IdentifiedBox identified_boxes = 2;
  155. }
  156. // Prediction output format for Image Object Detection.
  157. message ImageObjectDetectionPredictionResult {
  158. // The resource IDs of the AnnotationSpecs that had been identified, ordered
  159. // by the confidence score descendingly. It is the id segment instead of full
  160. // resource name.
  161. repeated int64 ids = 1;
  162. // The display names of the AnnotationSpecs that had been identified, order
  163. // matches the IDs.
  164. repeated string display_names = 2;
  165. // The Model's confidences in correctness of the predicted IDs, higher value
  166. // means higher confidence. Order matches the Ids.
  167. repeated float confidences = 3;
  168. // Bounding boxes, i.e. the rectangles over the image, that pinpoint
  169. // the found AnnotationSpecs. Given in order that matches the IDs. Each
  170. // bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and
  171. // `yMax`, which represent the extremal coordinates of the box. They are
  172. // relative to the image size, and the point 0,0 is in the top left
  173. // of the image.
  174. repeated google.protobuf.ListValue bboxes = 4;
  175. }
  176. // Prediction output format for Image and Text Classification.
  177. message ClassificationPredictionResult {
  178. // The resource IDs of the AnnotationSpecs that had been identified.
  179. repeated int64 ids = 1;
  180. // The display names of the AnnotationSpecs that had been identified, order
  181. // matches the IDs.
  182. repeated string display_names = 2;
  183. // The Model's confidences in correctness of the predicted IDs, higher value
  184. // means higher confidence. Order matches the Ids.
  185. repeated float confidences = 3;
  186. }
  187. // Prediction output format for Image Segmentation.
  188. message ImageSegmentationPredictionResult {
  189. // A PNG image where each pixel in the mask represents the category in which
  190. // the pixel in the original image was predicted to belong to. The size of
  191. // this image will be the same as the original image. The mapping between the
  192. // AnntoationSpec and the color can be found in model's metadata. The model
  193. // will choose the most likely category and if none of the categories reach
  194. // the confidence threshold, the pixel will be marked as background.
  195. string category_mask = 1;
  196. // A one channel image which is encoded as an 8bit lossless PNG. The size of
  197. // the image will be the same as the original image. For a specific pixel,
  198. // darker color means less confidence in correctness of the cateogry in the
  199. // categoryMask for the corresponding pixel. Black means no confidence and
  200. // white means complete confidence.
  201. string confidence_mask = 2;
  202. }
  203. // Prediction output format for Video Action Recognition.
  204. message VideoActionRecognitionPredictionResult {
  205. // Each IdentifiedAction is one particular identification of an action
  206. // specified with the AnnotationSpec id, display_name and the associated
  207. // confidence score.
  208. message IdentifiedAction {
  209. // The resource ID of the AnnotationSpec that had been identified.
  210. string id = 1;
  211. // The display name of the AnnotationSpec that had been identified.
  212. string display_name = 2;
  213. // The Model's confidence in correction of this identification, higher
  214. // value means higher confidence.
  215. float confidence = 3;
  216. }
  217. // The beginning, inclusive, of the video's time segment in which the
  218. // actions have been identified.
  219. google.protobuf.Timestamp segment_start_time = 1;
  220. // The end, inclusive, of the video's time segment in which the actions have
  221. // been identified. Particularly, if the end is the same as the start, it
  222. // means the identification happens on a specific video frame.
  223. google.protobuf.Timestamp segment_end_time = 2;
  224. // All of the actions identified in the time range.
  225. repeated IdentifiedAction actions = 3;
  226. }
  227. // Prediction output format for Video Object Tracking.
  228. message VideoObjectTrackingPredictionResult {
  229. // Boundingbox for detected object. I.e. the rectangle over the video frame
  230. // pinpointing the found AnnotationSpec. The coordinates are relative to the
  231. // frame size, and the point 0,0 is in the top left of the frame.
  232. message BoundingBox {
  233. // The leftmost coordinate of the bounding box.
  234. float x_min = 1;
  235. // The rightmost coordinate of the bounding box.
  236. float x_max = 2;
  237. // The topmost coordinate of the bounding box.
  238. float y_min = 3;
  239. // The bottommost coordinate of the bounding box.
  240. float y_max = 4;
  241. }
  242. // Each DetectedObject is one particular identification of an object
  243. // specified with the AnnotationSpec id and display_name, the bounding box,
  244. // the associated confidence score and the corresponding track_id.
  245. message DetectedObject {
  246. // The resource ID of the AnnotationSpec that had been identified.
  247. string id = 1;
  248. // The display name of the AnnotationSpec that had been identified.
  249. string display_name = 2;
  250. // Boundingbox.
  251. BoundingBox bounding_box = 3;
  252. // The Model's confidence in correction of this identification, higher
  253. // value means higher confidence.
  254. float confidence = 4;
  255. // The same object may be identified on muitiple frames which are typical
  256. // adjacent. The set of frames where a particular object has been detected
  257. // form a track. This track_id can be used to trace down all frames for an
  258. // detected object.
  259. int64 track_id = 5;
  260. }
  261. // The beginning, inclusive, of the video's time segment in which the
  262. // current identifications happens.
  263. google.protobuf.Timestamp segment_start_time = 1;
  264. // The end, inclusive, of the video's time segment in which the current
  265. // identifications happen. Particularly, if the end is the same as the start,
  266. // it means the identifications happen on a specific video frame.
  267. google.protobuf.Timestamp segment_end_time = 2;
  268. // All of the objects detected in the specified time range.
  269. repeated DetectedObject objects = 3;
  270. }
  271. // Prediction output format for Video Classification.
  272. message VideoClassificationPredictionResult {
  273. // Each IdentifiedClassification is one particular identification of an
  274. // classification specified with the AnnotationSpec id and display_name,
  275. // and the associated confidence score.
  276. message IdentifiedClassification {
  277. // The resource ID of the AnnotationSpec that had been identified.
  278. string id = 1;
  279. // The display name of the AnnotationSpec that had been identified.
  280. string display_name = 2;
  281. // The Model's confidence in correction of this identification, higher
  282. // value means higher confidence.
  283. float confidence = 3;
  284. }
  285. // The beginning, inclusive, of the video's time segment in which the
  286. // classifications have been identified.
  287. google.protobuf.Timestamp segment_start_time = 1;
  288. // The end, inclusive, of the video's time segment in which the
  289. // classifications have been identified. Particularly, if the end is the same
  290. // as the start, it means the identification happens on a specific video
  291. // frame.
  292. google.protobuf.Timestamp segment_end_time = 2;
  293. // All of the classifications identified in the time range.
  294. repeated IdentifiedClassification classifications = 3;
  295. }
  296. // The prediction result proto for occupancy counting.
  297. message OccupancyCountingPredictionResult {
  298. // The entity info for annotations from occupancy counting operator.
  299. message Entity {
  300. // Label id.
  301. int64 label_id = 1;
  302. // Human readable string of the label.
  303. string label_string = 2;
  304. }
  305. // Identified box contains location and the entity of the object.
  306. message IdentifiedBox {
  307. // Bounding Box in the normalized coordinates.
  308. message NormalizedBoundingBox {
  309. // Min in x coordinate.
  310. float xmin = 1;
  311. // Min in y coordinate.
  312. float ymin = 2;
  313. // Width of the bounding box.
  314. float width = 3;
  315. // Height of the bounding box.
  316. float height = 4;
  317. }
  318. // An unique id for this box.
  319. int64 box_id = 1;
  320. // Bounding Box in the normalized coordinates.
  321. NormalizedBoundingBox normalized_bounding_box = 2;
  322. // Confidence score associated with this box.
  323. float score = 3;
  324. // Entity of this box.
  325. Entity entity = 4;
  326. // An unique id to identify a track. It should be consistent across frames.
  327. // It only exists if tracking is enabled.
  328. int64 track_id = 5;
  329. }
  330. // The statistics info for annotations from occupancy counting operator.
  331. message Stats {
  332. // The object info and count for annotations from occupancy counting
  333. // operator.
  334. message ObjectCount {
  335. // Entity of this object.
  336. Entity entity = 1;
  337. // Count of the object.
  338. int32 count = 2;
  339. }
  340. // Message for Crossing line count.
  341. message CrossingLineCount {
  342. // Line annotation from the user.
  343. StreamAnnotation annotation = 1;
  344. // The direction that follows the right hand rule.
  345. repeated ObjectCount positive_direction_counts = 2;
  346. // The direction that is opposite to the right hand rule.
  347. repeated ObjectCount negative_direction_counts = 3;
  348. }
  349. // Message for the active zone count.
  350. message ActiveZoneCount {
  351. // Active zone annotation from the user.
  352. StreamAnnotation annotation = 1;
  353. // Counts in the zone.
  354. repeated ObjectCount counts = 2;
  355. }
  356. // Counts of the full frame.
  357. repeated ObjectCount full_frame_count = 1;
  358. // Crossing line counts.
  359. repeated CrossingLineCount crossing_line_counts = 2;
  360. // Active zone counts.
  361. repeated ActiveZoneCount active_zone_counts = 3;
  362. }
  363. // The track info for annotations from occupancy counting operator.
  364. message TrackInfo {
  365. // An unique id to identify a track. It should be consistent across frames.
  366. string track_id = 1;
  367. // Start timestamp of this track.
  368. google.protobuf.Timestamp start_time = 2;
  369. }
  370. // The dwell time info for annotations from occupancy counting operator.
  371. message DwellTimeInfo {
  372. // An unique id to identify a track. It should be consistent across frames.
  373. string track_id = 1;
  374. // The unique id for the zone in which the object is dwelling/waiting.
  375. string zone_id = 2;
  376. // The beginning time when a dwelling object has been identified in a zone.
  377. google.protobuf.Timestamp dwell_start_time = 3;
  378. // The end time when a dwelling object has exited in a zone.
  379. google.protobuf.Timestamp dwell_end_time = 4;
  380. }
  381. // Current timestamp.
  382. google.protobuf.Timestamp current_time = 1;
  383. // A list of identified boxes.
  384. repeated IdentifiedBox identified_boxes = 2;
  385. // Detection statistics.
  386. Stats stats = 3;
  387. // Track related information. All the tracks that are live at this timestamp.
  388. // It only exists if tracking is enabled.
  389. repeated TrackInfo track_info = 4;
  390. // Dwell time related information. All the tracks that are live in a given
  391. // zone with a start and end dwell time timestamp
  392. repeated DwellTimeInfo dwell_time_info = 5;
  393. }
  394. // message about annotations about Vision AI stream resource.
  395. message StreamAnnotation {
  396. oneof annotation_payload {
  397. // Annotation for type ACTIVE_ZONE
  398. NormalizedPolygon active_zone = 5;
  399. // Annotation for type CROSSING_LINE
  400. NormalizedPolyline crossing_line = 6;
  401. }
  402. // ID of the annotation. It must be unique when used in the certain context.
  403. // For example, all the annotations to one input streams of a Vision AI
  404. // application.
  405. string id = 1;
  406. // User-friendly name for the annotation.
  407. string display_name = 2;
  408. // The Vision AI stream resource name.
  409. string source_stream = 3;
  410. // The actual type of Annotation.
  411. StreamAnnotationType type = 4;
  412. }
  413. // A wrapper of repeated StreamAnnotation.
  414. message StreamAnnotations {
  415. // Multiple annotations.
  416. repeated StreamAnnotation stream_annotations = 1;
  417. }
  418. // Normalized Polygon.
  419. message NormalizedPolygon {
  420. // The bounding polygon normalized vertices. Top left corner of the image
  421. // will be [0, 0].
  422. repeated NormalizedVertex normalized_vertices = 1;
  423. }
  424. // Normalized Pplyline, which represents a curve consisting of connected
  425. // straight-line segments.
  426. message NormalizedPolyline {
  427. // A sequence of vertices connected by straight lines.
  428. repeated NormalizedVertex normalized_vertices = 1;
  429. }
  430. // A vertex represents a 2D point in the image.
  431. // NOTE: the normalized vertex coordinates are relative to the original image
  432. // and range from 0 to 1.
  433. message NormalizedVertex {
  434. // X coordinate.
  435. float x = 1;
  436. // Y coordinate.
  437. float y = 2;
  438. }
  439. // Message of essential metadata of App Platform.
  440. // This message is usually attached to a certain processor output annotation for
  441. // customer to identify the source of the data.
  442. message AppPlatformMetadata {
  443. // The application resource name.
  444. string application = 1;
  445. // The instance resource id. Instance is the nested resource of application
  446. // under collection 'instances'.
  447. string instance_id = 2;
  448. // The node name of the application graph.
  449. string node = 3;
  450. // The referred processor resource name of the application node.
  451. string processor = 4;
  452. }
  453. // For any cloud function based customer processing logic, customer's cloud
  454. // function is expected to receive AppPlatformCloudFunctionRequest as request
  455. // and send back AppPlatformCloudFunctionResponse as response.
  456. // Message of request from AppPlatform to Cloud Function.
  457. message AppPlatformCloudFunctionRequest {
  458. // A general annotation message that uses struct format to represent different
  459. // concrete annotation protobufs.
  460. message StructedInputAnnotation {
  461. // The ingestion time of the current annotation.
  462. int64 ingestion_time_micros = 1;
  463. // The struct format of the actual annotation.
  464. google.protobuf.Struct annotation = 2;
  465. }
  466. // The metadata of the AppPlatform for customer to identify the source of the
  467. // payload.
  468. AppPlatformMetadata app_platform_metadata = 1;
  469. // The actual annotations to be processed by the customized Cloud Function.
  470. repeated StructedInputAnnotation annotations = 2;
  471. }
  472. // Message of the response from customer's Cloud Function to AppPlatform.
  473. message AppPlatformCloudFunctionResponse {
  474. // A general annotation message that uses struct format to represent different
  475. // concrete annotation protobufs.
  476. message StructedOutputAnnotation {
  477. // The struct format of the actual annotation.
  478. google.protobuf.Struct annotation = 1;
  479. }
  480. // The modified annotations that is returned back to AppPlatform.
  481. // If the annotations fields are empty, then those annotations will be dropped
  482. // by AppPlatform.
  483. repeated StructedOutputAnnotation annotations = 2;
  484. }