storage.proto 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. // Copyright 2021 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.bigquery.storage.v1beta2;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/api/field_behavior.proto";
  19. import "google/api/resource.proto";
  20. import "google/cloud/bigquery/storage/v1beta2/arrow.proto";
  21. import "google/cloud/bigquery/storage/v1beta2/avro.proto";
  22. import "google/cloud/bigquery/storage/v1beta2/protobuf.proto";
  23. import "google/cloud/bigquery/storage/v1beta2/stream.proto";
  24. import "google/cloud/bigquery/storage/v1beta2/table.proto";
  25. import "google/protobuf/timestamp.proto";
  26. import "google/protobuf/wrappers.proto";
  27. import "google/rpc/status.proto";
  28. option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage";
  29. option java_multiple_files = true;
  30. option java_outer_classname = "StorageProto";
  31. option java_package = "com.google.cloud.bigquery.storage.v1beta2";
  32. // BigQuery Read API.
  33. //
  34. // The Read API can be used to read data from BigQuery.
  35. //
  36. // New code should use the v1 Read API going forward, if they don't use Write
  37. // API at the same time.
  38. service BigQueryRead {
  39. option (google.api.default_host) = "bigquerystorage.googleapis.com";
  40. option (google.api.oauth_scopes) =
  41. "https://www.googleapis.com/auth/bigquery,"
  42. "https://www.googleapis.com/auth/cloud-platform";
  43. // Creates a new read session. A read session divides the contents of a
  44. // BigQuery table into one or more streams, which can then be used to read
  45. // data from the table. The read session also specifies properties of the
  46. // data to be read, such as a list of columns or a push-down filter describing
  47. // the rows to be returned.
  48. //
  49. // A particular row can be read by at most one stream. When the caller has
  50. // reached the end of each stream in the session, then all the data in the
  51. // table has been read.
  52. //
  53. // Data is assigned to each stream such that roughly the same number of
  54. // rows can be read from each stream. Because the server-side unit for
  55. // assigning data is collections of rows, the API does not guarantee that
  56. // each stream will return the same number or rows. Additionally, the
  57. // limits are enforced based on the number of pre-filtered rows, so some
  58. // filters can lead to lopsided assignments.
  59. //
  60. // Read sessions automatically expire 6 hours after they are created and do
  61. // not require manual clean-up by the caller.
  62. rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
  63. option (google.api.http) = {
  64. post: "/v1beta2/{read_session.table=projects/*/datasets/*/tables/*}"
  65. body: "*"
  66. };
  67. option (google.api.method_signature) = "parent,read_session,max_stream_count";
  68. }
  69. // Reads rows from the stream in the format prescribed by the ReadSession.
  70. // Each response contains one or more table rows, up to a maximum of 100 MiB
  71. // per response; read requests which attempt to read individual rows larger
  72. // than 100 MiB will fail.
  73. //
  74. // Each request also returns a set of stream statistics reflecting the current
  75. // state of the stream.
  76. rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) {
  77. option (google.api.http) = {
  78. get: "/v1beta2/{read_stream=projects/*/locations/*/sessions/*/streams/*}"
  79. };
  80. option (google.api.method_signature) = "read_stream,offset";
  81. }
  82. // Splits a given `ReadStream` into two `ReadStream` objects. These
  83. // `ReadStream` objects are referred to as the primary and the residual
  84. // streams of the split. The original `ReadStream` can still be read from in
  85. // the same manner as before. Both of the returned `ReadStream` objects can
  86. // also be read from, and the rows returned by both child streams will be
  87. // the same as the rows read from the original stream.
  88. //
  89. // Moreover, the two child streams will be allocated back-to-back in the
  90. // original `ReadStream`. Concretely, it is guaranteed that for streams
  91. // original, primary, and residual, that original[0-j] = primary[0-j] and
  92. // original[j-n] = residual[0-m] once the streams have been read to
  93. // completion.
  94. rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
  95. option (google.api.http) = {
  96. get: "/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}"
  97. };
  98. }
  99. }
  100. // BigQuery Write API.
  101. //
  102. // The Write API can be used to write data to BigQuery.
  103. service BigQueryWrite {
  104. option (google.api.default_host) = "bigquerystorage.googleapis.com";
  105. option (google.api.oauth_scopes) =
  106. "https://www.googleapis.com/auth/bigquery,"
  107. "https://www.googleapis.com/auth/bigquery.insertdata,"
  108. "https://www.googleapis.com/auth/cloud-platform";
  109. // Creates a write stream to the given table.
  110. // Additionally, every table has a special COMMITTED stream named '_default'
  111. // to which data can be written. This stream doesn't need to be created using
  112. // CreateWriteStream. It is a stream that can be used simultaneously by any
  113. // number of clients. Data written to this stream is considered committed as
  114. // soon as an acknowledgement is received.
  115. rpc CreateWriteStream(CreateWriteStreamRequest) returns (WriteStream) {
  116. option (google.api.http) = {
  117. post: "/v1beta2/{parent=projects/*/datasets/*/tables/*}"
  118. body: "write_stream"
  119. };
  120. option (google.api.method_signature) = "parent,write_stream";
  121. }
  122. // Appends data to the given stream.
  123. //
  124. // If `offset` is specified, the `offset` is checked against the end of
  125. // stream. The server returns `OUT_OF_RANGE` in `AppendRowsResponse` if an
  126. // attempt is made to append to an offset beyond the current end of the stream
  127. // or `ALREADY_EXISTS` if user provids an `offset` that has already been
  128. // written to. User can retry with adjusted offset within the same RPC
  129. // stream. If `offset` is not specified, append happens at the end of the
  130. // stream.
  131. //
  132. // The response contains the offset at which the append happened. Responses
  133. // are received in the same order in which requests are sent. There will be
  134. // one response for each successful request. If the `offset` is not set in
  135. // response, it means append didn't happen due to some errors. If one request
  136. // fails, all the subsequent requests will also fail until a success request
  137. // is made again.
  138. //
  139. // If the stream is of `PENDING` type, data will only be available for read
  140. // operations after the stream is committed.
  141. rpc AppendRows(stream AppendRowsRequest) returns (stream AppendRowsResponse) {
  142. option (google.api.http) = {
  143. post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}"
  144. body: "*"
  145. };
  146. option (google.api.method_signature) = "write_stream";
  147. }
  148. // Gets a write stream.
  149. rpc GetWriteStream(GetWriteStreamRequest) returns (WriteStream) {
  150. option (google.api.http) = {
  151. post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}"
  152. body: "*"
  153. };
  154. option (google.api.method_signature) = "name";
  155. }
  156. // Finalize a write stream so that no new data can be appended to the
  157. // stream. Finalize is not supported on the '_default' stream.
  158. rpc FinalizeWriteStream(FinalizeWriteStreamRequest) returns (FinalizeWriteStreamResponse) {
  159. option (google.api.http) = {
  160. post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}"
  161. body: "*"
  162. };
  163. option (google.api.method_signature) = "name";
  164. }
  165. // Atomically commits a group of `PENDING` streams that belong to the same
  166. // `parent` table.
  167. // Streams must be finalized before commit and cannot be committed multiple
  168. // times. Once a stream is committed, data in the stream becomes available
  169. // for read operations.
  170. rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest) returns (BatchCommitWriteStreamsResponse) {
  171. option (google.api.http) = {
  172. get: "/v1beta2/{parent=projects/*/datasets/*/tables/*}"
  173. };
  174. option (google.api.method_signature) = "parent";
  175. }
  176. // Flushes rows to a BUFFERED stream.
  177. // If users are appending rows to BUFFERED stream, flush operation is
  178. // required in order for the rows to become available for reading. A
  179. // Flush operation flushes up to any previously flushed offset in a BUFFERED
  180. // stream, to the offset specified in the request.
  181. // Flush is not supported on the _default stream, since it is not BUFFERED.
  182. rpc FlushRows(FlushRowsRequest) returns (FlushRowsResponse) {
  183. option (google.api.http) = {
  184. post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}"
  185. body: "*"
  186. };
  187. option (google.api.method_signature) = "write_stream";
  188. }
  189. }
  190. // Request message for `CreateReadSession`.
  191. message CreateReadSessionRequest {
  192. // Required. The request project that owns the session, in the form of
  193. // `projects/{project_id}`.
  194. string parent = 1 [
  195. (google.api.field_behavior) = REQUIRED,
  196. (google.api.resource_reference) = {
  197. type: "cloudresourcemanager.googleapis.com/Project"
  198. }
  199. ];
  200. // Required. Session to be created.
  201. ReadSession read_session = 2 [(google.api.field_behavior) = REQUIRED];
  202. // Max initial number of streams. If unset or zero, the server will
  203. // provide a value of streams so as to produce reasonable throughput. Must be
  204. // non-negative. The number of streams may be lower than the requested number,
  205. // depending on the amount parallelism that is reasonable for the table. Error
  206. // will be returned if the max count is greater than the current system
  207. // max limit of 1,000.
  208. //
  209. // Streams must be read starting from offset 0.
  210. int32 max_stream_count = 3;
  211. }
  212. // Request message for `ReadRows`.
  213. message ReadRowsRequest {
  214. // Required. Stream to read rows from.
  215. string read_stream = 1 [
  216. (google.api.field_behavior) = REQUIRED,
  217. (google.api.resource_reference) = {
  218. type: "bigquerystorage.googleapis.com/ReadStream"
  219. }
  220. ];
  221. // The offset requested must be less than the last row read from Read.
  222. // Requesting a larger offset is undefined. If not specified, start reading
  223. // from offset zero.
  224. int64 offset = 2;
  225. }
  226. // Information on if the current connection is being throttled.
  227. message ThrottleState {
  228. // How much this connection is being throttled. Zero means no throttling,
  229. // 100 means fully throttled.
  230. int32 throttle_percent = 1;
  231. }
  232. // Estimated stream statistics for a given Stream.
  233. message StreamStats {
  234. message Progress {
  235. // The fraction of rows assigned to the stream that have been processed by
  236. // the server so far, not including the rows in the current response
  237. // message.
  238. //
  239. // This value, along with `at_response_end`, can be used to interpolate
  240. // the progress made as the rows in the message are being processed using
  241. // the following formula: `at_response_start + (at_response_end -
  242. // at_response_start) * rows_processed_from_response / rows_in_response`.
  243. //
  244. // Note that if a filter is provided, the `at_response_end` value of the
  245. // previous response may not necessarily be equal to the
  246. // `at_response_start` value of the current response.
  247. double at_response_start = 1;
  248. // Similar to `at_response_start`, except that this value includes the
  249. // rows in the current response.
  250. double at_response_end = 2;
  251. }
  252. // Represents the progress of the current stream.
  253. Progress progress = 2;
  254. }
  255. // Response from calling `ReadRows` may include row data, progress and
  256. // throttling information.
  257. message ReadRowsResponse {
  258. // Row data is returned in format specified during session creation.
  259. oneof rows {
  260. // Serialized row data in AVRO format.
  261. AvroRows avro_rows = 3;
  262. // Serialized row data in Arrow RecordBatch format.
  263. ArrowRecordBatch arrow_record_batch = 4;
  264. }
  265. // Number of serialized rows in the rows block.
  266. int64 row_count = 6;
  267. // Statistics for the stream.
  268. StreamStats stats = 2;
  269. // Throttling state. If unset, the latest response still describes
  270. // the current throttling status.
  271. ThrottleState throttle_state = 5;
  272. // The schema for the read. If read_options.selected_fields is set, the
  273. // schema may be different from the table schema as it will only contain
  274. // the selected fields. This schema is equivelant to the one returned by
  275. // CreateSession. This field is only populated in the first ReadRowsResponse
  276. // RPC.
  277. oneof schema {
  278. // Output only. Avro schema.
  279. AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
  280. // Output only. Arrow schema.
  281. ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
  282. }
  283. }
  284. // Request message for `SplitReadStream`.
  285. message SplitReadStreamRequest {
  286. // Required. Name of the stream to split.
  287. string name = 1 [
  288. (google.api.field_behavior) = REQUIRED,
  289. (google.api.resource_reference) = {
  290. type: "bigquerystorage.googleapis.com/ReadStream"
  291. }
  292. ];
  293. // A value in the range (0.0, 1.0) that specifies the fractional point at
  294. // which the original stream should be split. The actual split point is
  295. // evaluated on pre-filtered rows, so if a filter is provided, then there is
  296. // no guarantee that the division of the rows between the new child streams
  297. // will be proportional to this fractional value. Additionally, because the
  298. // server-side unit for assigning data is collections of rows, this fraction
  299. // will always map to a data storage boundary on the server side.
  300. double fraction = 2;
  301. }
  302. message SplitReadStreamResponse {
  303. // Primary stream, which contains the beginning portion of
  304. // |original_stream|. An empty value indicates that the original stream can no
  305. // longer be split.
  306. ReadStream primary_stream = 1;
  307. // Remainder stream, which contains the tail of |original_stream|. An empty
  308. // value indicates that the original stream can no longer be split.
  309. ReadStream remainder_stream = 2;
  310. }
  311. // Request message for `CreateWriteStream`.
  312. message CreateWriteStreamRequest {
  313. // Required. Reference to the table to which the stream belongs, in the format
  314. // of `projects/{project}/datasets/{dataset}/tables/{table}`.
  315. string parent = 1 [
  316. (google.api.field_behavior) = REQUIRED,
  317. (google.api.resource_reference) = {
  318. type: "bigquery.googleapis.com/Table"
  319. }
  320. ];
  321. // Required. Stream to be created.
  322. WriteStream write_stream = 2 [(google.api.field_behavior) = REQUIRED];
  323. }
  324. // Request message for `AppendRows`.
  325. message AppendRowsRequest {
  326. // Proto schema and data.
  327. message ProtoData {
  328. // Proto schema used to serialize the data.
  329. ProtoSchema writer_schema = 1;
  330. // Serialized row data in protobuf message format.
  331. ProtoRows rows = 2;
  332. }
  333. // Required. The stream that is the target of the append operation. This value must be
  334. // specified for the initial request. If subsequent requests specify the
  335. // stream name, it must equal to the value provided in the first request.
  336. // To write to the _default stream, populate this field with a string in the
  337. // format `projects/{project}/datasets/{dataset}/tables/{table}/_default`.
  338. string write_stream = 1 [
  339. (google.api.field_behavior) = REQUIRED,
  340. (google.api.resource_reference) = {
  341. type: "bigquerystorage.googleapis.com/WriteStream"
  342. }
  343. ];
  344. // If present, the write is only performed if the next append offset is same
  345. // as the provided value. If not present, the write is performed at the
  346. // current end of stream. Specifying a value for this field is not allowed
  347. // when calling AppendRows for the '_default' stream.
  348. google.protobuf.Int64Value offset = 2;
  349. // Input rows. The `writer_schema` field must be specified at the initial
  350. // request and currently, it will be ignored if specified in following
  351. // requests. Following requests must have data in the same format as the
  352. // initial request.
  353. oneof rows {
  354. // Rows in proto format.
  355. ProtoData proto_rows = 4;
  356. }
  357. // Id set by client to annotate its identity. Only initial request setting is
  358. // respected.
  359. string trace_id = 6;
  360. }
  361. // Response message for `AppendRows`.
  362. message AppendRowsResponse {
  363. // AppendResult is returned for successful append requests.
  364. message AppendResult {
  365. // The row offset at which the last append occurred. The offset will not be
  366. // set if appending using default streams.
  367. google.protobuf.Int64Value offset = 1;
  368. }
  369. oneof response {
  370. // Result if the append is successful.
  371. AppendResult append_result = 1;
  372. // Error returned when problems were encountered. If present,
  373. // it indicates rows were not accepted into the system.
  374. // Users can retry or continue with other append requests within the
  375. // same connection.
  376. //
  377. // Additional information about error signalling:
  378. //
  379. // ALREADY_EXISTS: Happens when an append specified an offset, and the
  380. // backend already has received data at this offset. Typically encountered
  381. // in retry scenarios, and can be ignored.
  382. //
  383. // OUT_OF_RANGE: Returned when the specified offset in the stream is beyond
  384. // the current end of the stream.
  385. //
  386. // INVALID_ARGUMENT: Indicates a malformed request or data.
  387. //
  388. // ABORTED: Request processing is aborted because of prior failures. The
  389. // request can be retried if previous failure is addressed.
  390. //
  391. // INTERNAL: Indicates server side error(s) that can be retried.
  392. google.rpc.Status error = 2;
  393. }
  394. // If backend detects a schema update, pass it to user so that user can
  395. // use it to input new type of message. It will be empty when no schema
  396. // updates have occurred.
  397. TableSchema updated_schema = 3;
  398. }
  399. // Request message for `GetWriteStreamRequest`.
  400. message GetWriteStreamRequest {
  401. // Required. Name of the stream to get, in the form of
  402. // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
  403. string name = 1 [
  404. (google.api.field_behavior) = REQUIRED,
  405. (google.api.resource_reference) = {
  406. type: "bigquerystorage.googleapis.com/WriteStream"
  407. }
  408. ];
  409. }
  410. // Request message for `BatchCommitWriteStreams`.
  411. message BatchCommitWriteStreamsRequest {
  412. // Required. Parent table that all the streams should belong to, in the form of
  413. // `projects/{project}/datasets/{dataset}/tables/{table}`.
  414. string parent = 1 [
  415. (google.api.field_behavior) = REQUIRED
  416. ];
  417. // Required. The group of streams that will be committed atomically.
  418. repeated string write_streams = 2 [(google.api.field_behavior) = REQUIRED];
  419. }
  420. // Response message for `BatchCommitWriteStreams`.
  421. message BatchCommitWriteStreamsResponse {
  422. // The time at which streams were committed in microseconds granularity.
  423. // This field will only exist when there are no stream errors.
  424. // **Note** if this field is not set, it means the commit was not successful.
  425. google.protobuf.Timestamp commit_time = 1;
  426. // Stream level error if commit failed. Only streams with error will be in
  427. // the list.
  428. // If empty, there is no error and all streams are committed successfully.
  429. // If non empty, certain streams have errors and ZERO stream is committed due
  430. // to atomicity guarantee.
  431. repeated StorageError stream_errors = 2;
  432. }
  433. // Request message for invoking `FinalizeWriteStream`.
  434. message FinalizeWriteStreamRequest {
  435. // Required. Name of the stream to finalize, in the form of
  436. // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
  437. string name = 1 [
  438. (google.api.field_behavior) = REQUIRED,
  439. (google.api.resource_reference) = {
  440. type: "bigquerystorage.googleapis.com/WriteStream"
  441. }
  442. ];
  443. }
  444. // Response message for `FinalizeWriteStream`.
  445. message FinalizeWriteStreamResponse {
  446. // Number of rows in the finalized stream.
  447. int64 row_count = 1;
  448. }
  449. // Request message for `FlushRows`.
  450. message FlushRowsRequest {
  451. // Required. The stream that is the target of the flush operation.
  452. string write_stream = 1 [
  453. (google.api.field_behavior) = REQUIRED,
  454. (google.api.resource_reference) = {
  455. type: "bigquerystorage.googleapis.com/WriteStream"
  456. }
  457. ];
  458. // Ending offset of the flush operation. Rows before this offset(including
  459. // this offset) will be flushed.
  460. google.protobuf.Int64Value offset = 2;
  461. }
  462. // Respond message for `FlushRows`.
  463. message FlushRowsResponse {
  464. // The rows before this offset (including this offset) are flushed.
  465. int64 offset = 1;
  466. }
  467. // Structured custom BigQuery Storage error message. The error can be attached
  468. // as error details in the returned rpc Status. In particular, the use of error
  469. // codes allows more structured error handling, and reduces the need to evaluate
  470. // unstructured error text strings.
  471. message StorageError {
  472. // Error code for `StorageError`.
  473. enum StorageErrorCode {
  474. // Default error.
  475. STORAGE_ERROR_CODE_UNSPECIFIED = 0;
  476. // Table is not found in the system.
  477. TABLE_NOT_FOUND = 1;
  478. // Stream is already committed.
  479. STREAM_ALREADY_COMMITTED = 2;
  480. // Stream is not found.
  481. STREAM_NOT_FOUND = 3;
  482. // Invalid Stream type.
  483. // For example, you try to commit a stream that is not pending.
  484. INVALID_STREAM_TYPE = 4;
  485. // Invalid Stream state.
  486. // For example, you try to commit a stream that is not finalized or is
  487. // garbaged.
  488. INVALID_STREAM_STATE = 5;
  489. // Stream is finalized.
  490. STREAM_FINALIZED = 6;
  491. }
  492. // BigQuery Storage specific error code.
  493. StorageErrorCode code = 1;
  494. // Name of the failed entity.
  495. string entity = 2;
  496. // Message that describes the error.
  497. string error_message = 3;
  498. }