data.proto 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.bigtable.v2;
  16. option csharp_namespace = "Google.Cloud.Bigtable.V2";
  17. option go_package = "google.golang.org/genproto/googleapis/bigtable/v2;bigtable";
  18. option java_multiple_files = true;
  19. option java_outer_classname = "DataProto";
  20. option java_package = "com.google.bigtable.v2";
  21. option php_namespace = "Google\\Cloud\\Bigtable\\V2";
  22. option ruby_package = "Google::Cloud::Bigtable::V2";
  23. // Specifies the complete (requested) contents of a single row of a table.
  24. // Rows which exceed 256MiB in size cannot be read in full.
  25. message Row {
  26. // The unique key which identifies this row within its table. This is the same
  27. // key that's used to identify the row in, for example, a MutateRowRequest.
  28. // May contain any non-empty byte string up to 4KiB in length.
  29. bytes key = 1;
  30. // May be empty, but only if the entire row is empty.
  31. // The mutual ordering of column families is not specified.
  32. repeated Family families = 2;
  33. }
  34. // Specifies (some of) the contents of a single row/column family intersection
  35. // of a table.
  36. message Family {
  37. // The unique key which identifies this family within its row. This is the
  38. // same key that's used to identify the family in, for example, a RowFilter
  39. // which sets its "family_name_regex_filter" field.
  40. // Must match `[-_.a-zA-Z0-9]+`, except that AggregatingRowProcessors may
  41. // produce cells in a sentinel family with an empty name.
  42. // Must be no greater than 64 characters in length.
  43. string name = 1;
  44. // Must not be empty. Sorted in order of increasing "qualifier".
  45. repeated Column columns = 2;
  46. }
  47. // Specifies (some of) the contents of a single row/column intersection of a
  48. // table.
  49. message Column {
  50. // The unique key which identifies this column within its family. This is the
  51. // same key that's used to identify the column in, for example, a RowFilter
  52. // which sets its `column_qualifier_regex_filter` field.
  53. // May contain any byte string, including the empty string, up to 16kiB in
  54. // length.
  55. bytes qualifier = 1;
  56. // Must not be empty. Sorted in order of decreasing "timestamp_micros".
  57. repeated Cell cells = 2;
  58. }
  59. // Specifies (some of) the contents of a single row/column/timestamp of a table.
  60. message Cell {
  61. // The cell's stored timestamp, which also uniquely identifies it within
  62. // its column.
  63. // Values are always expressed in microseconds, but individual tables may set
  64. // a coarser granularity to further restrict the allowed values. For
  65. // example, a table which specifies millisecond granularity will only allow
  66. // values of `timestamp_micros` which are multiples of 1000.
  67. int64 timestamp_micros = 1;
  68. // The value stored in the cell.
  69. // May contain any byte string, including the empty string, up to 100MiB in
  70. // length.
  71. bytes value = 2;
  72. // Labels applied to the cell by a [RowFilter][google.bigtable.v2.RowFilter].
  73. repeated string labels = 3;
  74. }
  75. // Specifies a contiguous range of rows.
  76. message RowRange {
  77. // The row key at which to start the range.
  78. // If neither field is set, interpreted as the empty string, inclusive.
  79. oneof start_key {
  80. // Used when giving an inclusive lower bound for the range.
  81. bytes start_key_closed = 1;
  82. // Used when giving an exclusive lower bound for the range.
  83. bytes start_key_open = 2;
  84. }
  85. // The row key at which to end the range.
  86. // If neither field is set, interpreted as the infinite row key, exclusive.
  87. oneof end_key {
  88. // Used when giving an exclusive upper bound for the range.
  89. bytes end_key_open = 3;
  90. // Used when giving an inclusive upper bound for the range.
  91. bytes end_key_closed = 4;
  92. }
  93. }
  94. // Specifies a non-contiguous set of rows.
  95. message RowSet {
  96. // Single rows included in the set.
  97. repeated bytes row_keys = 1;
  98. // Contiguous row ranges included in the set.
  99. repeated RowRange row_ranges = 2;
  100. }
  101. // Specifies a contiguous range of columns within a single column family.
  102. // The range spans from <column_family>:<start_qualifier> to
  103. // <column_family>:<end_qualifier>, where both bounds can be either
  104. // inclusive or exclusive.
  105. message ColumnRange {
  106. // The name of the column family within which this range falls.
  107. string family_name = 1;
  108. // The column qualifier at which to start the range (within `column_family`).
  109. // If neither field is set, interpreted as the empty string, inclusive.
  110. oneof start_qualifier {
  111. // Used when giving an inclusive lower bound for the range.
  112. bytes start_qualifier_closed = 2;
  113. // Used when giving an exclusive lower bound for the range.
  114. bytes start_qualifier_open = 3;
  115. }
  116. // The column qualifier at which to end the range (within `column_family`).
  117. // If neither field is set, interpreted as the infinite string, exclusive.
  118. oneof end_qualifier {
  119. // Used when giving an inclusive upper bound for the range.
  120. bytes end_qualifier_closed = 4;
  121. // Used when giving an exclusive upper bound for the range.
  122. bytes end_qualifier_open = 5;
  123. }
  124. }
  125. // Specified a contiguous range of microsecond timestamps.
  126. message TimestampRange {
  127. // Inclusive lower bound. If left empty, interpreted as 0.
  128. int64 start_timestamp_micros = 1;
  129. // Exclusive upper bound. If left empty, interpreted as infinity.
  130. int64 end_timestamp_micros = 2;
  131. }
  132. // Specifies a contiguous range of raw byte values.
  133. message ValueRange {
  134. // The value at which to start the range.
  135. // If neither field is set, interpreted as the empty string, inclusive.
  136. oneof start_value {
  137. // Used when giving an inclusive lower bound for the range.
  138. bytes start_value_closed = 1;
  139. // Used when giving an exclusive lower bound for the range.
  140. bytes start_value_open = 2;
  141. }
  142. // The value at which to end the range.
  143. // If neither field is set, interpreted as the infinite string, exclusive.
  144. oneof end_value {
  145. // Used when giving an inclusive upper bound for the range.
  146. bytes end_value_closed = 3;
  147. // Used when giving an exclusive upper bound for the range.
  148. bytes end_value_open = 4;
  149. }
  150. }
  151. // Takes a row as input and produces an alternate view of the row based on
  152. // specified rules. For example, a RowFilter might trim down a row to include
  153. // just the cells from columns matching a given regular expression, or might
  154. // return all the cells of a row but not their values. More complicated filters
  155. // can be composed out of these components to express requests such as, "within
  156. // every column of a particular family, give just the two most recent cells
  157. // which are older than timestamp X."
  158. //
  159. // There are two broad categories of RowFilters (true filters and transformers),
  160. // as well as two ways to compose simple filters into more complex ones
  161. // (chains and interleaves). They work as follows:
  162. //
  163. // * True filters alter the input row by excluding some of its cells wholesale
  164. // from the output row. An example of a true filter is the `value_regex_filter`,
  165. // which excludes cells whose values don't match the specified pattern. All
  166. // regex true filters use RE2 syntax (https://github.com/google/re2/wiki/Syntax)
  167. // in raw byte mode (RE2::Latin1), and are evaluated as full matches. An
  168. // important point to keep in mind is that `RE2(.)` is equivalent by default to
  169. // `RE2([^\n])`, meaning that it does not match newlines. When attempting to
  170. // match an arbitrary byte, you should therefore use the escape sequence `\C`,
  171. // which may need to be further escaped as `\\C` in your client language.
  172. //
  173. // * Transformers alter the input row by changing the values of some of its
  174. // cells in the output, without excluding them completely. Currently, the only
  175. // supported transformer is the `strip_value_transformer`, which replaces every
  176. // cell's value with the empty string.
  177. //
  178. // * Chains and interleaves are described in more detail in the
  179. // RowFilter.Chain and RowFilter.Interleave documentation.
  180. //
  181. // The total serialized size of a RowFilter message must not
  182. // exceed 20480 bytes, and RowFilters may not be nested within each other
  183. // (in Chains or Interleaves) to a depth of more than 20.
  184. message RowFilter {
  185. // A RowFilter which sends rows through several RowFilters in sequence.
  186. message Chain {
  187. // The elements of "filters" are chained together to process the input row:
  188. // in row -> f(0) -> intermediate row -> f(1) -> ... -> f(N) -> out row
  189. // The full chain is executed atomically.
  190. repeated RowFilter filters = 1;
  191. }
  192. // A RowFilter which sends each row to each of several component
  193. // RowFilters and interleaves the results.
  194. message Interleave {
  195. // The elements of "filters" all process a copy of the input row, and the
  196. // results are pooled, sorted, and combined into a single output row.
  197. // If multiple cells are produced with the same column and timestamp,
  198. // they will all appear in the output row in an unspecified mutual order.
  199. // Consider the following example, with three filters:
  200. //
  201. // input row
  202. // |
  203. // -----------------------------------------------------
  204. // | | |
  205. // f(0) f(1) f(2)
  206. // | | |
  207. // 1: foo,bar,10,x foo,bar,10,z far,bar,7,a
  208. // 2: foo,blah,11,z far,blah,5,x far,blah,5,x
  209. // | | |
  210. // -----------------------------------------------------
  211. // |
  212. // 1: foo,bar,10,z // could have switched with #2
  213. // 2: foo,bar,10,x // could have switched with #1
  214. // 3: foo,blah,11,z
  215. // 4: far,bar,7,a
  216. // 5: far,blah,5,x // identical to #6
  217. // 6: far,blah,5,x // identical to #5
  218. //
  219. // All interleaved filters are executed atomically.
  220. repeated RowFilter filters = 1;
  221. }
  222. // A RowFilter which evaluates one of two possible RowFilters, depending on
  223. // whether or not a predicate RowFilter outputs any cells from the input row.
  224. //
  225. // IMPORTANT NOTE: The predicate filter does not execute atomically with the
  226. // true and false filters, which may lead to inconsistent or unexpected
  227. // results. Additionally, Condition filters have poor performance, especially
  228. // when filters are set for the false condition.
  229. message Condition {
  230. // If `predicate_filter` outputs any cells, then `true_filter` will be
  231. // evaluated on the input row. Otherwise, `false_filter` will be evaluated.
  232. RowFilter predicate_filter = 1;
  233. // The filter to apply to the input row if `predicate_filter` returns any
  234. // results. If not provided, no results will be returned in the true case.
  235. RowFilter true_filter = 2;
  236. // The filter to apply to the input row if `predicate_filter` does not
  237. // return any results. If not provided, no results will be returned in the
  238. // false case.
  239. RowFilter false_filter = 3;
  240. }
  241. // Which of the possible RowFilter types to apply. If none are set, this
  242. // RowFilter returns all cells in the input row.
  243. oneof filter {
  244. // Applies several RowFilters to the data in sequence, progressively
  245. // narrowing the results.
  246. Chain chain = 1;
  247. // Applies several RowFilters to the data in parallel and combines the
  248. // results.
  249. Interleave interleave = 2;
  250. // Applies one of two possible RowFilters to the data based on the output of
  251. // a predicate RowFilter.
  252. Condition condition = 3;
  253. // ADVANCED USE ONLY.
  254. // Hook for introspection into the RowFilter. Outputs all cells directly to
  255. // the output of the read rather than to any parent filter. Consider the
  256. // following example:
  257. //
  258. // Chain(
  259. // FamilyRegex("A"),
  260. // Interleave(
  261. // All(),
  262. // Chain(Label("foo"), Sink())
  263. // ),
  264. // QualifierRegex("B")
  265. // )
  266. //
  267. // A,A,1,w
  268. // A,B,2,x
  269. // B,B,4,z
  270. // |
  271. // FamilyRegex("A")
  272. // |
  273. // A,A,1,w
  274. // A,B,2,x
  275. // |
  276. // +------------+-------------+
  277. // | |
  278. // All() Label(foo)
  279. // | |
  280. // A,A,1,w A,A,1,w,labels:[foo]
  281. // A,B,2,x A,B,2,x,labels:[foo]
  282. // | |
  283. // | Sink() --------------+
  284. // | | |
  285. // +------------+ x------+ A,A,1,w,labels:[foo]
  286. // | A,B,2,x,labels:[foo]
  287. // A,A,1,w |
  288. // A,B,2,x |
  289. // | |
  290. // QualifierRegex("B") |
  291. // | |
  292. // A,B,2,x |
  293. // | |
  294. // +--------------------------------+
  295. // |
  296. // A,A,1,w,labels:[foo]
  297. // A,B,2,x,labels:[foo] // could be switched
  298. // A,B,2,x // could be switched
  299. //
  300. // Despite being excluded by the qualifier filter, a copy of every cell
  301. // that reaches the sink is present in the final result.
  302. //
  303. // As with an [Interleave][google.bigtable.v2.RowFilter.Interleave],
  304. // duplicate cells are possible, and appear in an unspecified mutual order.
  305. // In this case we have a duplicate with column "A:B" and timestamp 2,
  306. // because one copy passed through the all filter while the other was
  307. // passed through the label and sink. Note that one copy has label "foo",
  308. // while the other does not.
  309. //
  310. // Cannot be used within the `predicate_filter`, `true_filter`, or
  311. // `false_filter` of a [Condition][google.bigtable.v2.RowFilter.Condition].
  312. bool sink = 16;
  313. // Matches all cells, regardless of input. Functionally equivalent to
  314. // leaving `filter` unset, but included for completeness.
  315. bool pass_all_filter = 17;
  316. // Does not match any cells, regardless of input. Useful for temporarily
  317. // disabling just part of a filter.
  318. bool block_all_filter = 18;
  319. // Matches only cells from rows whose keys satisfy the given RE2 regex. In
  320. // other words, passes through the entire row when the key matches, and
  321. // otherwise produces an empty row.
  322. // Note that, since row keys can contain arbitrary bytes, the `\C` escape
  323. // sequence must be used if a true wildcard is desired. The `.` character
  324. // will not match the new line character `\n`, which may be present in a
  325. // binary key.
  326. bytes row_key_regex_filter = 4;
  327. // Matches all cells from a row with probability p, and matches no cells
  328. // from the row with probability 1-p.
  329. double row_sample_filter = 14;
  330. // Matches only cells from columns whose families satisfy the given RE2
  331. // regex. For technical reasons, the regex must not contain the `:`
  332. // character, even if it is not being used as a literal.
  333. // Note that, since column families cannot contain the new line character
  334. // `\n`, it is sufficient to use `.` as a full wildcard when matching
  335. // column family names.
  336. string family_name_regex_filter = 5;
  337. // Matches only cells from columns whose qualifiers satisfy the given RE2
  338. // regex.
  339. // Note that, since column qualifiers can contain arbitrary bytes, the `\C`
  340. // escape sequence must be used if a true wildcard is desired. The `.`
  341. // character will not match the new line character `\n`, which may be
  342. // present in a binary qualifier.
  343. bytes column_qualifier_regex_filter = 6;
  344. // Matches only cells from columns within the given range.
  345. ColumnRange column_range_filter = 7;
  346. // Matches only cells with timestamps within the given range.
  347. TimestampRange timestamp_range_filter = 8;
  348. // Matches only cells with values that satisfy the given regular expression.
  349. // Note that, since cell values can contain arbitrary bytes, the `\C` escape
  350. // sequence must be used if a true wildcard is desired. The `.` character
  351. // will not match the new line character `\n`, which may be present in a
  352. // binary value.
  353. bytes value_regex_filter = 9;
  354. // Matches only cells with values that fall within the given range.
  355. ValueRange value_range_filter = 15;
  356. // Skips the first N cells of each row, matching all subsequent cells.
  357. // If duplicate cells are present, as is possible when using an Interleave,
  358. // each copy of the cell is counted separately.
  359. int32 cells_per_row_offset_filter = 10;
  360. // Matches only the first N cells of each row.
  361. // If duplicate cells are present, as is possible when using an Interleave,
  362. // each copy of the cell is counted separately.
  363. int32 cells_per_row_limit_filter = 11;
  364. // Matches only the most recent N cells within each column. For example,
  365. // if N=2, this filter would match column `foo:bar` at timestamps 10 and 9,
  366. // skip all earlier cells in `foo:bar`, and then begin matching again in
  367. // column `foo:bar2`.
  368. // If duplicate cells are present, as is possible when using an Interleave,
  369. // each copy of the cell is counted separately.
  370. int32 cells_per_column_limit_filter = 12;
  371. // Replaces each cell's value with the empty string.
  372. bool strip_value_transformer = 13;
  373. // Applies the given label to all cells in the output row. This allows
  374. // the client to determine which results were produced from which part of
  375. // the filter.
  376. //
  377. // Values must be at most 15 characters in length, and match the RE2
  378. // pattern `[a-z0-9\\-]+`
  379. //
  380. // Due to a technical limitation, it is not currently possible to apply
  381. // multiple labels to a cell. As a result, a Chain may have no more than
  382. // one sub-filter which contains a `apply_label_transformer`. It is okay for
  383. // an Interleave to contain multiple `apply_label_transformers`, as they
  384. // will be applied to separate copies of the input. This may be relaxed in
  385. // the future.
  386. string apply_label_transformer = 19;
  387. }
  388. }
  389. // Specifies a particular change to be made to the contents of a row.
  390. message Mutation {
  391. // A Mutation which sets the value of the specified cell.
  392. message SetCell {
  393. // The name of the family into which new data should be written.
  394. // Must match `[-_.a-zA-Z0-9]+`
  395. string family_name = 1;
  396. // The qualifier of the column into which new data should be written.
  397. // Can be any byte string, including the empty string.
  398. bytes column_qualifier = 2;
  399. // The timestamp of the cell into which new data should be written.
  400. // Use -1 for current Bigtable server time.
  401. // Otherwise, the client should set this value itself, noting that the
  402. // default value is a timestamp of zero if the field is left unspecified.
  403. // Values must match the granularity of the table (e.g. micros, millis).
  404. int64 timestamp_micros = 3;
  405. // The value to be written into the specified cell.
  406. bytes value = 4;
  407. }
  408. // A Mutation which deletes cells from the specified column, optionally
  409. // restricting the deletions to a given timestamp range.
  410. message DeleteFromColumn {
  411. // The name of the family from which cells should be deleted.
  412. // Must match `[-_.a-zA-Z0-9]+`
  413. string family_name = 1;
  414. // The qualifier of the column from which cells should be deleted.
  415. // Can be any byte string, including the empty string.
  416. bytes column_qualifier = 2;
  417. // The range of timestamps within which cells should be deleted.
  418. TimestampRange time_range = 3;
  419. }
  420. // A Mutation which deletes all cells from the specified column family.
  421. message DeleteFromFamily {
  422. // The name of the family from which cells should be deleted.
  423. // Must match `[-_.a-zA-Z0-9]+`
  424. string family_name = 1;
  425. }
  426. // A Mutation which deletes all cells from the containing row.
  427. message DeleteFromRow {
  428. }
  429. // Which of the possible Mutation types to apply.
  430. oneof mutation {
  431. // Set a cell's value.
  432. SetCell set_cell = 1;
  433. // Deletes cells from a column.
  434. DeleteFromColumn delete_from_column = 2;
  435. // Deletes cells from a column family.
  436. DeleteFromFamily delete_from_family = 3;
  437. // Deletes cells from the entire row.
  438. DeleteFromRow delete_from_row = 4;
  439. }
  440. }
  441. // Specifies an atomic read/modify/write operation on the latest value of the
  442. // specified column.
  443. message ReadModifyWriteRule {
  444. // The name of the family to which the read/modify/write should be applied.
  445. // Must match `[-_.a-zA-Z0-9]+`
  446. string family_name = 1;
  447. // The qualifier of the column to which the read/modify/write should be
  448. // applied.
  449. // Can be any byte string, including the empty string.
  450. bytes column_qualifier = 2;
  451. // The rule used to determine the column's new latest value from its current
  452. // latest value.
  453. oneof rule {
  454. // Rule specifying that `append_value` be appended to the existing value.
  455. // If the targeted cell is unset, it will be treated as containing the
  456. // empty string.
  457. bytes append_value = 3;
  458. // Rule specifying that `increment_amount` be added to the existing value.
  459. // If the targeted cell is unset, it will be treated as containing a zero.
  460. // Otherwise, the targeted cell must contain an 8-byte value (interpreted
  461. // as a 64-bit big-endian signed integer), or the entire request will fail.
  462. int64 increment_amount = 4;
  463. }
  464. }