video_intelligence.proto 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. // Copyright 2019 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. syntax = "proto3";
  16. package google.cloud.videointelligence.v1beta2;
  17. import "google/api/annotations.proto";
  18. import "google/api/client.proto";
  19. import "google/api/field_behavior.proto";
  20. import "google/longrunning/operations.proto";
  21. import "google/protobuf/duration.proto";
  22. import "google/protobuf/timestamp.proto";
  23. import "google/rpc/status.proto";
  24. option csharp_namespace = "Google.Cloud.VideoIntelligence.V1Beta2";
  25. option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1beta2;videointelligence";
  26. option java_multiple_files = true;
  27. option java_outer_classname = "VideoIntelligenceServiceProto";
  28. option java_package = "com.google.cloud.videointelligence.v1beta2";
  29. option php_namespace = "Google\\Cloud\\VideoIntelligence\\V1beta2";
  30. option ruby_package = "Google::Cloud::VideoIntelligence::V1beta2";
  31. // Service that implements Google Cloud Video Intelligence API.
  32. service VideoIntelligenceService {
  33. option (google.api.default_host) = "videointelligence.googleapis.com";
  34. option (google.api.oauth_scopes) =
  35. "https://www.googleapis.com/auth/cloud-platform";
  36. // Performs asynchronous video annotation. Progress and results can be
  37. // retrieved through the `google.longrunning.Operations` interface.
  38. // `Operation.metadata` contains `AnnotateVideoProgress` (progress).
  39. // `Operation.response` contains `AnnotateVideoResponse` (results).
  40. rpc AnnotateVideo(AnnotateVideoRequest)
  41. returns (google.longrunning.Operation) {
  42. option (google.api.http) = {
  43. post: "/v1beta2/videos:annotate"
  44. body: "*"
  45. };
  46. option (google.api.method_signature) = "input_uri,features";
  47. option (google.longrunning.operation_info) = {
  48. response_type: "AnnotateVideoResponse"
  49. metadata_type: "AnnotateVideoProgress"
  50. };
  51. }
  52. }
  53. // Video annotation request.
  54. message AnnotateVideoRequest {
  55. // Input video location. Currently, only
  56. // [Google Cloud Storage](https://cloud.google.com/storage/) URIs are
  57. // supported, which must be specified in the following format:
  58. // `gs://bucket-id/object-id` (other URI formats return
  59. // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
  60. // more information, see [Request
  61. // URIs](https://cloud.google.com/storage/docs/request-endpoints). A video URI
  62. // may include wildcards in `object-id`, and thus identify multiple videos.
  63. // Supported wildcards: '*' to match 0 or more characters;
  64. // '?' to match 1 character. If unset, the input video should be embedded
  65. // in the request as `input_content`. If set, `input_content` should be unset.
  66. string input_uri = 1;
  67. // The video data bytes.
  68. // If unset, the input video(s) should be specified via `input_uri`.
  69. // If set, `input_uri` should be unset.
  70. bytes input_content = 6;
  71. // Required. Requested video annotation features.
  72. repeated Feature features = 2 [(google.api.field_behavior) = REQUIRED];
  73. // Additional video context and/or feature-specific parameters.
  74. VideoContext video_context = 3;
  75. // Optional. Location where the output (in JSON format) should be stored.
  76. // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
  77. // URIs are supported, which must be specified in the following format:
  78. // `gs://bucket-id/object-id` (other URI formats return
  79. // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
  80. // more information, see [Request
  81. // URIs](https://cloud.google.com/storage/docs/request-endpoints).
  82. string output_uri = 4 [(google.api.field_behavior) = OPTIONAL];
  83. // Optional. Cloud region where annotation should take place. Supported cloud
  84. // regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region
  85. // is specified, a region will be determined based on video file location.
  86. string location_id = 5 [(google.api.field_behavior) = OPTIONAL];
  87. }
  88. // Video context and/or feature-specific parameters.
  89. message VideoContext {
  90. // Video segments to annotate. The segments may overlap and are not required
  91. // to be contiguous or span the whole video. If unspecified, each video is
  92. // treated as a single segment.
  93. repeated VideoSegment segments = 1;
  94. // Config for LABEL_DETECTION.
  95. LabelDetectionConfig label_detection_config = 2;
  96. // Config for SHOT_CHANGE_DETECTION.
  97. ShotChangeDetectionConfig shot_change_detection_config = 3;
  98. // Config for EXPLICIT_CONTENT_DETECTION.
  99. ExplicitContentDetectionConfig explicit_content_detection_config = 4;
  100. // Config for FACE_DETECTION.
  101. FaceDetectionConfig face_detection_config = 5;
  102. }
  103. // Config for LABEL_DETECTION.
  104. message LabelDetectionConfig {
  105. // What labels should be detected with LABEL_DETECTION, in addition to
  106. // video-level labels or segment-level labels.
  107. // If unspecified, defaults to `SHOT_MODE`.
  108. LabelDetectionMode label_detection_mode = 1;
  109. // Whether the video has been shot from a stationary (i.e. non-moving) camera.
  110. // When set to true, might improve detection accuracy for moving objects.
  111. // Should be used with `SHOT_AND_FRAME_MODE` enabled.
  112. bool stationary_camera = 2;
  113. // Model to use for label detection.
  114. // Supported values: "builtin/stable" (the default if unset) and
  115. // "builtin/latest".
  116. string model = 3;
  117. }
  118. // Config for SHOT_CHANGE_DETECTION.
  119. message ShotChangeDetectionConfig {
  120. // Model to use for shot change detection.
  121. // Supported values: "builtin/stable" (the default if unset) and
  122. // "builtin/latest".
  123. string model = 1;
  124. }
  125. // Config for EXPLICIT_CONTENT_DETECTION.
  126. message ExplicitContentDetectionConfig {
  127. // Model to use for explicit content detection.
  128. // Supported values: "builtin/stable" (the default if unset) and
  129. // "builtin/latest".
  130. string model = 1;
  131. }
  132. // Config for FACE_DETECTION.
  133. message FaceDetectionConfig {
  134. // Model to use for face detection.
  135. // Supported values: "builtin/stable" (the default if unset) and
  136. // "builtin/latest".
  137. string model = 1;
  138. // Whether bounding boxes be included in the face annotation output.
  139. bool include_bounding_boxes = 2;
  140. }
  141. // Video segment.
  142. message VideoSegment {
  143. // Time-offset, relative to the beginning of the video,
  144. // corresponding to the start of the segment (inclusive).
  145. google.protobuf.Duration start_time_offset = 1;
  146. // Time-offset, relative to the beginning of the video,
  147. // corresponding to the end of the segment (inclusive).
  148. google.protobuf.Duration end_time_offset = 2;
  149. }
  150. // Video segment level annotation results for label detection.
  151. message LabelSegment {
  152. // Video segment where a label was detected.
  153. VideoSegment segment = 1;
  154. // Confidence that the label is accurate. Range: [0, 1].
  155. float confidence = 2;
  156. }
  157. // Video frame level annotation results for label detection.
  158. message LabelFrame {
  159. // Time-offset, relative to the beginning of the video, corresponding to the
  160. // video frame for this location.
  161. google.protobuf.Duration time_offset = 1;
  162. // Confidence that the label is accurate. Range: [0, 1].
  163. float confidence = 2;
  164. }
  165. // Detected entity from video analysis.
  166. message Entity {
  167. // Opaque entity ID. Some IDs may be available in
  168. // [Google Knowledge Graph Search
  169. // API](https://developers.google.com/knowledge-graph/).
  170. string entity_id = 1;
  171. // Textual description, e.g. `Fixed-gear bicycle`.
  172. string description = 2;
  173. // Language code for `description` in BCP-47 format.
  174. string language_code = 3;
  175. }
  176. // Label annotation.
  177. message LabelAnnotation {
  178. // Detected entity.
  179. Entity entity = 1;
  180. // Common categories for the detected entity.
  181. // E.g. when the label is `Terrier` the category is likely `dog`. And in some
  182. // cases there might be more than one categories e.g. `Terrier` could also be
  183. // a `pet`.
  184. repeated Entity category_entities = 2;
  185. // All video segments where a label was detected.
  186. repeated LabelSegment segments = 3;
  187. // All video frames where a label was detected.
  188. repeated LabelFrame frames = 4;
  189. }
  190. // Video frame level annotation results for explicit content.
  191. message ExplicitContentFrame {
  192. // Time-offset, relative to the beginning of the video, corresponding to the
  193. // video frame for this location.
  194. google.protobuf.Duration time_offset = 1;
  195. // Likelihood of the pornography content..
  196. Likelihood pornography_likelihood = 2;
  197. }
  198. // Explicit content annotation (based on per-frame visual signals only).
  199. // If no explicit content has been detected in a frame, no annotations are
  200. // present for that frame.
  201. message ExplicitContentAnnotation {
  202. // All video frames where explicit content was detected.
  203. repeated ExplicitContentFrame frames = 1;
  204. }
  205. // Normalized bounding box.
  206. // The normalized vertex coordinates are relative to the original image.
  207. // Range: [0, 1].
  208. message NormalizedBoundingBox {
  209. // Left X coordinate.
  210. float left = 1;
  211. // Top Y coordinate.
  212. float top = 2;
  213. // Right X coordinate.
  214. float right = 3;
  215. // Bottom Y coordinate.
  216. float bottom = 4;
  217. }
  218. // Video segment level annotation results for face detection.
  219. message FaceSegment {
  220. // Video segment where a face was detected.
  221. VideoSegment segment = 1;
  222. }
  223. // Video frame level annotation results for face detection.
  224. message FaceFrame {
  225. // Normalized Bounding boxes in a frame.
  226. // There can be more than one boxes if the same face is detected in multiple
  227. // locations within the current frame.
  228. repeated NormalizedBoundingBox normalized_bounding_boxes = 1;
  229. // Time-offset, relative to the beginning of the video,
  230. // corresponding to the video frame for this location.
  231. google.protobuf.Duration time_offset = 2;
  232. }
  233. // Face annotation.
  234. message FaceAnnotation {
  235. // Thumbnail of a representative face view (in JPEG format).
  236. bytes thumbnail = 1;
  237. // All video segments where a face was detected.
  238. repeated FaceSegment segments = 2;
  239. // All video frames where a face was detected.
  240. repeated FaceFrame frames = 3;
  241. }
  242. // Annotation results for a single video.
  243. message VideoAnnotationResults {
  244. // Video file location in
  245. // [Google Cloud Storage](https://cloud.google.com/storage/).
  246. string input_uri = 1;
  247. // Label annotations on video level or user specified segment level.
  248. // There is exactly one element for each unique label.
  249. repeated LabelAnnotation segment_label_annotations = 2;
  250. // Label annotations on shot level.
  251. // There is exactly one element for each unique label.
  252. repeated LabelAnnotation shot_label_annotations = 3;
  253. // Label annotations on frame level.
  254. // There is exactly one element for each unique label.
  255. repeated LabelAnnotation frame_label_annotations = 4;
  256. // Face annotations. There is exactly one element for each unique face.
  257. repeated FaceAnnotation face_annotations = 5;
  258. // Shot annotations. Each shot is represented as a video segment.
  259. repeated VideoSegment shot_annotations = 6;
  260. // Explicit content annotation.
  261. ExplicitContentAnnotation explicit_annotation = 7;
  262. // If set, indicates an error. Note that for a single `AnnotateVideoRequest`
  263. // some videos may succeed and some may fail.
  264. google.rpc.Status error = 9;
  265. }
  266. // Video annotation response. Included in the `response`
  267. // field of the `Operation` returned by the `GetOperation`
  268. // call of the `google::longrunning::Operations` service.
  269. message AnnotateVideoResponse {
  270. // Annotation results for all videos specified in `AnnotateVideoRequest`.
  271. repeated VideoAnnotationResults annotation_results = 1;
  272. }
  273. // Annotation progress for a single video.
  274. message VideoAnnotationProgress {
  275. // Video file location in
  276. // [Google Cloud Storage](https://cloud.google.com/storage/).
  277. string input_uri = 1;
  278. // Approximate percentage processed thus far.
  279. // Guaranteed to be 100 when fully processed.
  280. int32 progress_percent = 2;
  281. // Time when the request was received.
  282. google.protobuf.Timestamp start_time = 3;
  283. // Time of the most recent update.
  284. google.protobuf.Timestamp update_time = 4;
  285. }
  286. // Video annotation progress. Included in the `metadata`
  287. // field of the `Operation` returned by the `GetOperation`
  288. // call of the `google::longrunning::Operations` service.
  289. message AnnotateVideoProgress {
  290. // Progress metadata for all videos specified in `AnnotateVideoRequest`.
  291. repeated VideoAnnotationProgress annotation_progress = 1;
  292. }
  293. // Video annotation feature.
  294. enum Feature {
  295. // Unspecified.
  296. FEATURE_UNSPECIFIED = 0;
  297. // Label detection. Detect objects, such as dog or flower.
  298. LABEL_DETECTION = 1;
  299. // Shot change detection.
  300. SHOT_CHANGE_DETECTION = 2;
  301. // Explicit content detection.
  302. EXPLICIT_CONTENT_DETECTION = 3;
  303. // Human face detection and tracking.
  304. FACE_DETECTION = 4;
  305. }
  306. // Label detection mode.
  307. enum LabelDetectionMode {
  308. // Unspecified.
  309. LABEL_DETECTION_MODE_UNSPECIFIED = 0;
  310. // Detect shot-level labels.
  311. SHOT_MODE = 1;
  312. // Detect frame-level labels.
  313. FRAME_MODE = 2;
  314. // Detect both shot-level and frame-level labels.
  315. SHOT_AND_FRAME_MODE = 3;
  316. }
  317. // Bucketized representation of likelihood.
  318. enum Likelihood {
  319. // Unspecified likelihood.
  320. LIKELIHOOD_UNSPECIFIED = 0;
  321. // Very unlikely.
  322. VERY_UNLIKELY = 1;
  323. // Unlikely.
  324. UNLIKELY = 2;
  325. // Possible.
  326. POSSIBLE = 3;
  327. // Likely.
  328. LIKELY = 4;
  329. // Very likely.
  330. VERY_LIKELY = 5;
  331. }