stream.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.bigquery.storage.v1;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/cloud/bigquery/storage/v1/arrow.proto";
  19. import "google/cloud/bigquery/storage/v1/avro.proto";
  20. import "google/cloud/bigquery/storage/v1/table.proto";
  21. import "google/protobuf/timestamp.proto";
  22. option csharp_namespace = "Google.Cloud.BigQuery.Storage.V1";
  23. option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1;storage";
  24. option java_multiple_files = true;
  25. option java_outer_classname = "StreamProto";
  26. option java_package = "com.google.cloud.bigquery.storage.v1";
  27. option php_namespace = "Google\\Cloud\\BigQuery\\Storage\\V1";
  28. // Data format for input or output data.
  29. enum DataFormat {
  30. // Data format is unspecified.
  31. DATA_FORMAT_UNSPECIFIED = 0;
  32. // Avro is a standard open source row based file format.
  33. // See https://avro.apache.org/ for more details.
  34. AVRO = 1;
  35. // Arrow is a standard open source column-based message format.
  36. // See https://arrow.apache.org/ for more details.
  37. ARROW = 2;
  38. }
  39. // Information about the ReadSession.
  40. message ReadSession {
  41. option (google.api.resource) = {
  42. type: "bigquerystorage.googleapis.com/ReadSession"
  43. pattern: "projects/{project}/locations/{location}/sessions/{session}"
  44. };
  45. // Additional attributes when reading a table.
  46. message TableModifiers {
  47. // The snapshot time of the table. If not set, interpreted as now.
  48. google.protobuf.Timestamp snapshot_time = 1;
  49. }
  50. // Options dictating how we read a table.
  51. message TableReadOptions {
  52. // Optional. The names of the fields in the table to be returned. If no
  53. // field names are specified, then all fields in the table are returned.
  54. //
  55. // Nested fields -- the child elements of a STRUCT field -- can be selected
  56. // individually using their fully-qualified names, and will be returned as
  57. // record fields containing only the selected nested fields. If a STRUCT
  58. // field is specified in the selected fields list, all of the child elements
  59. // will be returned.
  60. //
  61. // As an example, consider a table with the following schema:
  62. //
  63. // {
  64. // "name": "struct_field",
  65. // "type": "RECORD",
  66. // "mode": "NULLABLE",
  67. // "fields": [
  68. // {
  69. // "name": "string_field1",
  70. // "type": "STRING",
  71. // . "mode": "NULLABLE"
  72. // },
  73. // {
  74. // "name": "string_field2",
  75. // "type": "STRING",
  76. // "mode": "NULLABLE"
  77. // }
  78. // ]
  79. // }
  80. //
  81. // Specifying "struct_field" in the selected fields list will result in a
  82. // read session schema with the following logical structure:
  83. //
  84. // struct_field {
  85. // string_field1
  86. // string_field2
  87. // }
  88. //
  89. // Specifying "struct_field.string_field1" in the selected fields list will
  90. // result in a read session schema with the following logical structure:
  91. //
  92. // struct_field {
  93. // string_field1
  94. // }
  95. //
  96. // The order of the fields in the read session schema is derived from the
  97. // table schema and does not correspond to the order in which the fields are
  98. // specified in this list.
  99. repeated string selected_fields = 1;
  100. // SQL text filtering statement, similar to a WHERE clause in a query.
  101. // Aggregates are not supported.
  102. //
  103. // Examples: "int_field > 5"
  104. // "date_field = CAST('2014-9-27' as DATE)"
  105. // "nullable_field is not NULL"
  106. // "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
  107. // "numeric_field BETWEEN 1.0 AND 5.0"
  108. //
  109. // Restricted to a maximum length for 1 MB.
  110. string row_restriction = 2;
  111. oneof output_format_serialization_options {
  112. // Optional. Options specific to the Apache Arrow output format.
  113. ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL];
  114. // Optional. Options specific to the Apache Avro output format
  115. AvroSerializationOptions avro_serialization_options = 4 [(google.api.field_behavior) = OPTIONAL];
  116. }
  117. }
  118. // Output only. Unique identifier for the session, in the form
  119. // `projects/{project_id}/locations/{location}/sessions/{session_id}`.
  120. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  121. // Output only. Time at which the session becomes invalid. After this time, subsequent
  122. // requests to read this Session will return errors. The expire_time is
  123. // automatically assigned and currently cannot be specified or updated.
  124. google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  125. // Immutable. Data format of the output data. DATA_FORMAT_UNSPECIFIED not supported.
  126. DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE];
  127. // The schema for the read. If read_options.selected_fields is set, the
  128. // schema may be different from the table schema as it will only contain
  129. // the selected fields.
  130. oneof schema {
  131. // Output only. Avro schema.
  132. AvroSchema avro_schema = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  133. // Output only. Arrow schema.
  134. ArrowSchema arrow_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  135. }
  136. // Immutable. Table that this ReadSession is reading from, in the form
  137. // `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`
  138. string table = 6 [
  139. (google.api.field_behavior) = IMMUTABLE,
  140. (google.api.resource_reference) = {
  141. type: "bigquery.googleapis.com/Table"
  142. }
  143. ];
  144. // Optional. Any modifiers which are applied when reading from the specified table.
  145. TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL];
  146. // Optional. Read options for this session (e.g. column selection, filters).
  147. TableReadOptions read_options = 8 [(google.api.field_behavior) = OPTIONAL];
  148. // Output only. A list of streams created with the session.
  149. //
  150. // At least one stream is created with the session. In the future, larger
  151. // request_stream_count values *may* result in this list being unpopulated,
  152. // in that case, the user will need to use a List method to get the streams
  153. // instead, which is not yet available.
  154. repeated ReadStream streams = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
  155. // Output only. An estimate on the number of bytes this session will scan when
  156. // all streams are completely consumed. This estimate is based on
  157. // metadata from the table which might be incomplete or stale.
  158. int64 estimated_total_bytes_scanned = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
  159. // Optional. ID set by client to annotate a session identity. This does not need
  160. // to be strictly unique, but instead the same ID should be used to group
  161. // logically connected sessions (e.g. All using the same ID for all sessions
  162. // needed to complete a Spark SQL query is reasonable).
  163. //
  164. // Maximum length is 256 bytes.
  165. string trace_id = 13 [(google.api.field_behavior) = OPTIONAL];
  166. }
  167. // Information about a single stream that gets data out of the storage system.
  168. // Most of the information about `ReadStream` instances is aggregated, making
  169. // `ReadStream` lightweight.
  170. message ReadStream {
  171. option (google.api.resource) = {
  172. type: "bigquerystorage.googleapis.com/ReadStream"
  173. pattern: "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}"
  174. };
  175. // Output only. Name of the stream, in the form
  176. // `projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}`.
  177. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  178. }
  179. // WriteStreamView is a view enum that controls what details about a write
  180. // stream should be returned.
  181. enum WriteStreamView {
  182. // The default / unset value.
  183. WRITE_STREAM_VIEW_UNSPECIFIED = 0;
  184. // The BASIC projection returns basic metadata about a write stream. The
  185. // basic view does not include schema information. This is the default view
  186. // returned by GetWriteStream.
  187. BASIC = 1;
  188. // The FULL projection returns all available write stream metadata, including
  189. // the schema. CreateWriteStream returns the full projection of write stream
  190. // metadata.
  191. FULL = 2;
  192. }
  193. // Information about a single stream that gets data inside the storage system.
  194. message WriteStream {
  195. option (google.api.resource) = {
  196. type: "bigquerystorage.googleapis.com/WriteStream"
  197. pattern: "projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}"
  198. };
  199. // Type enum of the stream.
  200. enum Type {
  201. // Unknown type.
  202. TYPE_UNSPECIFIED = 0;
  203. // Data will commit automatically and appear as soon as the write is
  204. // acknowledged.
  205. COMMITTED = 1;
  206. // Data is invisible until the stream is committed.
  207. PENDING = 2;
  208. // Data is only visible up to the offset to which it was flushed.
  209. BUFFERED = 3;
  210. }
  211. // Mode enum of the stream.
  212. enum WriteMode {
  213. // Unknown type.
  214. WRITE_MODE_UNSPECIFIED = 0;
  215. // Insert new records into the table.
  216. // It is the default value if customers do not specify it.
  217. INSERT = 1;
  218. }
  219. // Output only. Name of the stream, in the form
  220. // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
  221. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  222. // Immutable. Type of the stream.
  223. Type type = 2 [(google.api.field_behavior) = IMMUTABLE];
  224. // Output only. Create time of the stream. For the _default stream, this is the
  225. // creation_time of the table.
  226. google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  227. // Output only. Commit time of the stream.
  228. // If a stream is of `COMMITTED` type, then it will have a commit_time same as
  229. // `create_time`. If the stream is of `PENDING` type, empty commit_time
  230. // means it is not committed.
  231. google.protobuf.Timestamp commit_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
  232. // Output only. The schema of the destination table. It is only returned in
  233. // `CreateWriteStream` response. Caller should generate data that's
  234. // compatible with this schema to send in initial `AppendRowsRequest`.
  235. // The table schema could go out of date during the life time of the stream.
  236. TableSchema table_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  237. // Immutable. Mode of the stream.
  238. WriteMode write_mode = 7 [(google.api.field_behavior) = IMMUTABLE];
  239. // Immutable. The geographic location where the stream's dataset resides. See
  240. // https://cloud.google.com/bigquery/docs/locations for supported
  241. // locations.
  242. string location = 8 [(google.api.field_behavior) = IMMUTABLE];
  243. }