123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.visionai.v1alpha1;
- import "google/protobuf/struct.proto";
- import "google/protobuf/timestamp.proto";
- option csharp_namespace = "Google.Cloud.VisionAI.V1Alpha1";
- option go_package = "google.golang.org/genproto/googleapis/cloud/visionai/v1alpha1;visionai";
- option java_multiple_files = true;
- option java_outer_classname = "AnnotationsProto";
- option java_package = "com.google.cloud.visionai.v1alpha1";
- option php_namespace = "Google\\Cloud\\VisionAI\\V1alpha1";
- option ruby_package = "Google::Cloud::VisionAI::V1alpha1";
- // Enum describing all possible types of a stream annotation.
- enum StreamAnnotationType {
- // active_zone annotation defines a polygon on top of the content from an
- // image/video based stream, following processing will only focus on the
- // content inside the active zone.
- // crossing_line annotation defines a polyline on top of the content from an
- // image/video based Vision AI stream, events happening across the line will
- // be captured. For example, the counts of people who goes acroos the line
- // in Occupancy Analytic Processor.
- }
- // Output format for Personal Protective Equipment Detection Operator.
- message PersonalProtectiveEquipmentDetectionOutput {
- // The entity info for annotations from person detection prediction result.
- message PersonEntity {
- // Entity id.
- int64 person_entity_id = 1;
- }
- // The entity info for annotations from PPE detection prediction result.
- message PPEEntity {
- // Label id.
- int64 ppe_label_id = 1;
- // Human readable string of the label (Examples: helmet, glove, mask).
- string ppe_label_string = 2;
- // Human readable string of the super category label (Examples: head_cover,
- // hands_cover, face_cover).
- string ppe_supercategory_label_string = 3;
- // Entity id.
- int64 ppe_entity_id = 4;
- }
- // Bounding Box in the normalized coordinates.
- message NormalizedBoundingBox {
- // Min in x coordinate.
- float xmin = 1;
- // Min in y coordinate.
- float ymin = 2;
- // Width of the bounding box.
- float width = 3;
- // Height of the bounding box.
- float height = 4;
- }
- // PersonIdentified box contains the location and the entity info of the
- // person.
- message PersonIdentifiedBox {
- // An unique id for this box.
- int64 box_id = 1;
- // Bounding Box in the normalized coordinates.
- NormalizedBoundingBox normalized_bounding_box = 2;
- // Confidence score associated with this box.
- float confidence_score = 3;
- // Person entity info.
- PersonEntity person_entity = 4;
- }
- // PPEIdentified box contains the location and the entity info of the PPE.
- message PPEIdentifiedBox {
- // An unique id for this box.
- int64 box_id = 1;
- // Bounding Box in the normalized coordinates.
- NormalizedBoundingBox normalized_bounding_box = 2;
- // Confidence score associated with this box.
- float confidence_score = 3;
- // PPE entity info.
- PPEEntity ppe_entity = 4;
- }
- // Detected Person contains the detected person and their associated
- // ppes and their protecting information.
- message DetectedPerson {
- // The id of detected person.
- int64 person_id = 1;
- // The info of detected person identified box.
- PersonIdentifiedBox detected_person_identified_box = 2;
- // The info of detected person associated ppe identified boxes.
- repeated PPEIdentifiedBox detected_ppe_identified_boxes = 3;
- // Coverage score for each body part.
- // Coverage score for face.
- optional float face_coverage_score = 4;
- // Coverage score for eyes.
- optional float eyes_coverage_score = 5;
- // Coverage score for head.
- optional float head_coverage_score = 6;
- // Coverage score for hands.
- optional float hands_coverage_score = 7;
- // Coverage score for body.
- optional float body_coverage_score = 8;
- // Coverage score for feet.
- optional float feet_coverage_score = 9;
- }
- // Current timestamp.
- google.protobuf.Timestamp current_time = 1;
- // A list of DetectedPersons.
- repeated DetectedPerson detected_persons = 2;
- }
- // Prediction output format for Generic Object Detection.
- message ObjectDetectionPredictionResult {
- // The entity info for annotations from object detection prediction result.
- message Entity {
- // Label id.
- int64 label_id = 1;
- // Human readable string of the label.
- string label_string = 2;
- }
- // Identified box contains location and the entity of the object.
- message IdentifiedBox {
- // Bounding Box in the normalized coordinates.
- message NormalizedBoundingBox {
- // Min in x coordinate.
- float xmin = 1;
- // Min in y coordinate.
- float ymin = 2;
- // Width of the bounding box.
- float width = 3;
- // Height of the bounding box.
- float height = 4;
- }
- // An unique id for this box.
- int64 box_id = 1;
- // Bounding Box in the normalized coordinates.
- NormalizedBoundingBox normalized_bounding_box = 2;
- // Confidence score associated with this box.
- float confidence_score = 3;
- // Entity of this box.
- Entity entity = 4;
- }
- // Current timestamp.
- google.protobuf.Timestamp current_time = 1;
- // A list of identified boxes.
- repeated IdentifiedBox identified_boxes = 2;
- }
- // Prediction output format for Image Object Detection.
- message ImageObjectDetectionPredictionResult {
- // The resource IDs of the AnnotationSpecs that had been identified, ordered
- // by the confidence score descendingly. It is the id segment instead of full
- // resource name.
- repeated int64 ids = 1;
- // The display names of the AnnotationSpecs that had been identified, order
- // matches the IDs.
- repeated string display_names = 2;
- // The Model's confidences in correctness of the predicted IDs, higher value
- // means higher confidence. Order matches the Ids.
- repeated float confidences = 3;
- // Bounding boxes, i.e. the rectangles over the image, that pinpoint
- // the found AnnotationSpecs. Given in order that matches the IDs. Each
- // bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and
- // `yMax`, which represent the extremal coordinates of the box. They are
- // relative to the image size, and the point 0,0 is in the top left
- // of the image.
- repeated google.protobuf.ListValue bboxes = 4;
- }
- // Prediction output format for Image and Text Classification.
- message ClassificationPredictionResult {
- // The resource IDs of the AnnotationSpecs that had been identified.
- repeated int64 ids = 1;
- // The display names of the AnnotationSpecs that had been identified, order
- // matches the IDs.
- repeated string display_names = 2;
- // The Model's confidences in correctness of the predicted IDs, higher value
- // means higher confidence. Order matches the Ids.
- repeated float confidences = 3;
- }
- // Prediction output format for Image Segmentation.
- message ImageSegmentationPredictionResult {
- // A PNG image where each pixel in the mask represents the category in which
- // the pixel in the original image was predicted to belong to. The size of
- // this image will be the same as the original image. The mapping between the
- // AnntoationSpec and the color can be found in model's metadata. The model
- // will choose the most likely category and if none of the categories reach
- // the confidence threshold, the pixel will be marked as background.
- string category_mask = 1;
- // A one channel image which is encoded as an 8bit lossless PNG. The size of
- // the image will be the same as the original image. For a specific pixel,
- // darker color means less confidence in correctness of the cateogry in the
- // categoryMask for the corresponding pixel. Black means no confidence and
- // white means complete confidence.
- string confidence_mask = 2;
- }
- // Prediction output format for Video Action Recognition.
- message VideoActionRecognitionPredictionResult {
- // Each IdentifiedAction is one particular identification of an action
- // specified with the AnnotationSpec id, display_name and the associated
- // confidence score.
- message IdentifiedAction {
- // The resource ID of the AnnotationSpec that had been identified.
- string id = 1;
- // The display name of the AnnotationSpec that had been identified.
- string display_name = 2;
- // The Model's confidence in correction of this identification, higher
- // value means higher confidence.
- float confidence = 3;
- }
- // The beginning, inclusive, of the video's time segment in which the
- // actions have been identified.
- google.protobuf.Timestamp segment_start_time = 1;
- // The end, inclusive, of the video's time segment in which the actions have
- // been identified. Particularly, if the end is the same as the start, it
- // means the identification happens on a specific video frame.
- google.protobuf.Timestamp segment_end_time = 2;
- // All of the actions identified in the time range.
- repeated IdentifiedAction actions = 3;
- }
- // Prediction output format for Video Object Tracking.
- message VideoObjectTrackingPredictionResult {
- // Boundingbox for detected object. I.e. the rectangle over the video frame
- // pinpointing the found AnnotationSpec. The coordinates are relative to the
- // frame size, and the point 0,0 is in the top left of the frame.
- message BoundingBox {
- // The leftmost coordinate of the bounding box.
- float x_min = 1;
- // The rightmost coordinate of the bounding box.
- float x_max = 2;
- // The topmost coordinate of the bounding box.
- float y_min = 3;
- // The bottommost coordinate of the bounding box.
- float y_max = 4;
- }
- // Each DetectedObject is one particular identification of an object
- // specified with the AnnotationSpec id and display_name, the bounding box,
- // the associated confidence score and the corresponding track_id.
- message DetectedObject {
- // The resource ID of the AnnotationSpec that had been identified.
- string id = 1;
- // The display name of the AnnotationSpec that had been identified.
- string display_name = 2;
- // Boundingbox.
- BoundingBox bounding_box = 3;
- // The Model's confidence in correction of this identification, higher
- // value means higher confidence.
- float confidence = 4;
- // The same object may be identified on muitiple frames which are typical
- // adjacent. The set of frames where a particular object has been detected
- // form a track. This track_id can be used to trace down all frames for an
- // detected object.
- int64 track_id = 5;
- }
- // The beginning, inclusive, of the video's time segment in which the
- // current identifications happens.
- google.protobuf.Timestamp segment_start_time = 1;
- // The end, inclusive, of the video's time segment in which the current
- // identifications happen. Particularly, if the end is the same as the start,
- // it means the identifications happen on a specific video frame.
- google.protobuf.Timestamp segment_end_time = 2;
- // All of the objects detected in the specified time range.
- repeated DetectedObject objects = 3;
- }
- // Prediction output format for Video Classification.
- message VideoClassificationPredictionResult {
- // Each IdentifiedClassification is one particular identification of an
- // classification specified with the AnnotationSpec id and display_name,
- // and the associated confidence score.
- message IdentifiedClassification {
- // The resource ID of the AnnotationSpec that had been identified.
- string id = 1;
- // The display name of the AnnotationSpec that had been identified.
- string display_name = 2;
- // The Model's confidence in correction of this identification, higher
- // value means higher confidence.
- float confidence = 3;
- }
- // The beginning, inclusive, of the video's time segment in which the
- // classifications have been identified.
- google.protobuf.Timestamp segment_start_time = 1;
- // The end, inclusive, of the video's time segment in which the
- // classifications have been identified. Particularly, if the end is the same
- // as the start, it means the identification happens on a specific video
- // frame.
- google.protobuf.Timestamp segment_end_time = 2;
- // All of the classifications identified in the time range.
- repeated IdentifiedClassification classifications = 3;
- }
- // The prediction result proto for occupancy counting.
- message OccupancyCountingPredictionResult {
- // The entity info for annotations from occupancy counting operator.
- message Entity {
- // Label id.
- int64 label_id = 1;
- // Human readable string of the label.
- string label_string = 2;
- }
- // Identified box contains location and the entity of the object.
- message IdentifiedBox {
- // Bounding Box in the normalized coordinates.
- message NormalizedBoundingBox {
- // Min in x coordinate.
- float xmin = 1;
- // Min in y coordinate.
- float ymin = 2;
- // Width of the bounding box.
- float width = 3;
- // Height of the bounding box.
- float height = 4;
- }
- // An unique id for this box.
- int64 box_id = 1;
- // Bounding Box in the normalized coordinates.
- NormalizedBoundingBox normalized_bounding_box = 2;
- // Confidence score associated with this box.
- float score = 3;
- // Entity of this box.
- Entity entity = 4;
- // An unique id to identify a track. It should be consistent across frames.
- // It only exists if tracking is enabled.
- int64 track_id = 5;
- }
- // The statistics info for annotations from occupancy counting operator.
- message Stats {
- // The object info and count for annotations from occupancy counting
- // operator.
- message ObjectCount {
- // Entity of this object.
- Entity entity = 1;
- // Count of the object.
- int32 count = 2;
- }
- // Message for Crossing line count.
- message CrossingLineCount {
- // Line annotation from the user.
- StreamAnnotation annotation = 1;
- // The direction that follows the right hand rule.
- repeated ObjectCount positive_direction_counts = 2;
- // The direction that is opposite to the right hand rule.
- repeated ObjectCount negative_direction_counts = 3;
- }
- // Message for the active zone count.
- message ActiveZoneCount {
- // Active zone annotation from the user.
- StreamAnnotation annotation = 1;
- // Counts in the zone.
- repeated ObjectCount counts = 2;
- }
- // Counts of the full frame.
- repeated ObjectCount full_frame_count = 1;
- // Crossing line counts.
- repeated CrossingLineCount crossing_line_counts = 2;
- // Active zone counts.
- repeated ActiveZoneCount active_zone_counts = 3;
- }
- // The track info for annotations from occupancy counting operator.
- message TrackInfo {
- // An unique id to identify a track. It should be consistent across frames.
- string track_id = 1;
- // Start timestamp of this track.
- google.protobuf.Timestamp start_time = 2;
- }
- // The dwell time info for annotations from occupancy counting operator.
- message DwellTimeInfo {
- // An unique id to identify a track. It should be consistent across frames.
- string track_id = 1;
- // The unique id for the zone in which the object is dwelling/waiting.
- string zone_id = 2;
- // The beginning time when a dwelling object has been identified in a zone.
- google.protobuf.Timestamp dwell_start_time = 3;
- // The end time when a dwelling object has exited in a zone.
- google.protobuf.Timestamp dwell_end_time = 4;
- }
- // Current timestamp.
- google.protobuf.Timestamp current_time = 1;
- // A list of identified boxes.
- repeated IdentifiedBox identified_boxes = 2;
- // Detection statistics.
- Stats stats = 3;
- // Track related information. All the tracks that are live at this timestamp.
- // It only exists if tracking is enabled.
- repeated TrackInfo track_info = 4;
- // Dwell time related information. All the tracks that are live in a given
- // zone with a start and end dwell time timestamp
- repeated DwellTimeInfo dwell_time_info = 5;
- }
- // message about annotations about Vision AI stream resource.
- message StreamAnnotation {
- oneof annotation_payload {
- // Annotation for type ACTIVE_ZONE
- NormalizedPolygon active_zone = 5;
- // Annotation for type CROSSING_LINE
- NormalizedPolyline crossing_line = 6;
- }
- // ID of the annotation. It must be unique when used in the certain context.
- // For example, all the annotations to one input streams of a Vision AI
- // application.
- string id = 1;
- // User-friendly name for the annotation.
- string display_name = 2;
- // The Vision AI stream resource name.
- string source_stream = 3;
- // The actual type of Annotation.
- StreamAnnotationType type = 4;
- }
- // A wrapper of repeated StreamAnnotation.
- message StreamAnnotations {
- // Multiple annotations.
- repeated StreamAnnotation stream_annotations = 1;
- }
- // Normalized Polygon.
- message NormalizedPolygon {
- // The bounding polygon normalized vertices. Top left corner of the image
- // will be [0, 0].
- repeated NormalizedVertex normalized_vertices = 1;
- }
- // Normalized Pplyline, which represents a curve consisting of connected
- // straight-line segments.
- message NormalizedPolyline {
- // A sequence of vertices connected by straight lines.
- repeated NormalizedVertex normalized_vertices = 1;
- }
- // A vertex represents a 2D point in the image.
- // NOTE: the normalized vertex coordinates are relative to the original image
- // and range from 0 to 1.
- message NormalizedVertex {
- // X coordinate.
- float x = 1;
- // Y coordinate.
- float y = 2;
- }
- // Message of essential metadata of App Platform.
- // This message is usually attached to a certain processor output annotation for
- // customer to identify the source of the data.
- message AppPlatformMetadata {
- // The application resource name.
- string application = 1;
- // The instance resource id. Instance is the nested resource of application
- // under collection 'instances'.
- string instance_id = 2;
- // The node name of the application graph.
- string node = 3;
- // The referred processor resource name of the application node.
- string processor = 4;
- }
- // For any cloud function based customer processing logic, customer's cloud
- // function is expected to receive AppPlatformCloudFunctionRequest as request
- // and send back AppPlatformCloudFunctionResponse as response.
- // Message of request from AppPlatform to Cloud Function.
- message AppPlatformCloudFunctionRequest {
- // A general annotation message that uses struct format to represent different
- // concrete annotation protobufs.
- message StructedInputAnnotation {
- // The ingestion time of the current annotation.
- int64 ingestion_time_micros = 1;
- // The struct format of the actual annotation.
- google.protobuf.Struct annotation = 2;
- }
- // The metadata of the AppPlatform for customer to identify the source of the
- // payload.
- AppPlatformMetadata app_platform_metadata = 1;
- // The actual annotations to be processed by the customized Cloud Function.
- repeated StructedInputAnnotation annotations = 2;
- }
- // Message of the response from customer's Cloud Function to AppPlatform.
- message AppPlatformCloudFunctionResponse {
- // A general annotation message that uses struct format to represent different
- // concrete annotation protobufs.
- message StructedOutputAnnotation {
- // The struct format of the actual annotation.
- google.protobuf.Struct annotation = 1;
- }
- // The modified annotations that is returned back to AppPlatform.
- // If the annotations fields are empty, then those annotations will be dropped
- // by AppPlatform.
- repeated StructedOutputAnnotation annotations = 2;
- }