metadata.proto 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.dataplex.v1;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/api/field_behavior.proto";
  19. import "google/api/resource.proto";
  20. import "google/protobuf/empty.proto";
  21. import "google/protobuf/timestamp.proto";
  22. option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
  23. option java_multiple_files = true;
  24. option java_outer_classname = "MetadataProto";
  25. option java_package = "com.google.cloud.dataplex.v1";
  26. // Metadata service manages metadata resources such as tables, filesets and
  27. // partitions.
  28. service MetadataService {
  29. option (google.api.default_host) = "dataplex.googleapis.com";
  30. option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
  31. // Create a metadata entity.
  32. rpc CreateEntity(CreateEntityRequest) returns (Entity) {
  33. option (google.api.http) = {
  34. post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities"
  35. body: "entity"
  36. };
  37. option (google.api.method_signature) = "parent,entity";
  38. }
  39. // Update a metadata entity. Only supports full resource update.
  40. rpc UpdateEntity(UpdateEntityRequest) returns (Entity) {
  41. option (google.api.http) = {
  42. put: "/v1/{entity.name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
  43. body: "entity"
  44. };
  45. }
  46. // Delete a metadata entity.
  47. rpc DeleteEntity(DeleteEntityRequest) returns (google.protobuf.Empty) {
  48. option (google.api.http) = {
  49. delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
  50. };
  51. option (google.api.method_signature) = "name";
  52. }
  53. // Get a metadata entity.
  54. rpc GetEntity(GetEntityRequest) returns (Entity) {
  55. option (google.api.http) = {
  56. get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
  57. };
  58. option (google.api.method_signature) = "name";
  59. }
  60. // List metadata entities in a zone.
  61. rpc ListEntities(ListEntitiesRequest) returns (ListEntitiesResponse) {
  62. option (google.api.http) = {
  63. get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities"
  64. };
  65. option (google.api.method_signature) = "parent";
  66. }
  67. // Create a metadata partition.
  68. rpc CreatePartition(CreatePartitionRequest) returns (Partition) {
  69. option (google.api.http) = {
  70. post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions"
  71. body: "partition"
  72. };
  73. option (google.api.method_signature) = "parent,partition";
  74. }
  75. // Delete a metadata partition.
  76. rpc DeletePartition(DeletePartitionRequest) returns (google.protobuf.Empty) {
  77. option (google.api.http) = {
  78. delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}"
  79. };
  80. option (google.api.method_signature) = "name";
  81. }
  82. // Get a metadata partition of an entity.
  83. rpc GetPartition(GetPartitionRequest) returns (Partition) {
  84. option (google.api.http) = {
  85. get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}"
  86. };
  87. option (google.api.method_signature) = "name";
  88. }
  89. // List metadata partitions of an entity.
  90. rpc ListPartitions(ListPartitionsRequest) returns (ListPartitionsResponse) {
  91. option (google.api.http) = {
  92. get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions"
  93. };
  94. option (google.api.method_signature) = "parent";
  95. }
  96. }
  97. // Create a metadata entity request.
  98. message CreateEntityRequest {
  99. // Required. The resource name of the parent zone:
  100. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`.
  101. string parent = 1 [
  102. (google.api.field_behavior) = REQUIRED,
  103. (google.api.resource_reference) = {
  104. type: "dataplex.googleapis.com/Zone"
  105. }
  106. ];
  107. // Required. Entity resource.
  108. Entity entity = 3 [(google.api.field_behavior) = REQUIRED];
  109. // Optional. Only validate the request, but do not perform mutations.
  110. // The default is false.
  111. bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
  112. }
  113. // Update a metadata entity request.
  114. // The exiting entity will be fully replaced by the entity in the request.
  115. // The entity ID is mutable. To modify the ID, use the current entity ID in the
  116. // request URL and specify the new ID in the request body.
  117. message UpdateEntityRequest {
  118. // Required. Update description.
  119. Entity entity = 2 [(google.api.field_behavior) = REQUIRED];
  120. // Optional. Only validate the request, but do not perform mutations.
  121. // The default is false.
  122. bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL];
  123. }
  124. // Delete a metadata entity request.
  125. message DeleteEntityRequest {
  126. // Required. The resource name of the entity:
  127. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
  128. string name = 1 [
  129. (google.api.field_behavior) = REQUIRED,
  130. (google.api.resource_reference) = {
  131. type: "dataplex.googleapis.com/Entity"
  132. }
  133. ];
  134. // Required. The etag associated with the entity, which can be retrieved with a
  135. // [GetEntity][] request.
  136. string etag = 2 [(google.api.field_behavior) = REQUIRED];
  137. }
  138. // List metadata entities request.
  139. message ListEntitiesRequest {
  140. // Entity views.
  141. enum EntityView {
  142. // The default unset value. Return both table and fileset entities
  143. // if unspecified.
  144. ENTITY_VIEW_UNSPECIFIED = 0;
  145. // Only list table entities.
  146. TABLES = 1;
  147. // Only list fileset entities.
  148. FILESETS = 2;
  149. }
  150. // Required. The resource name of the parent zone:
  151. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`.
  152. string parent = 1 [
  153. (google.api.field_behavior) = REQUIRED,
  154. (google.api.resource_reference) = {
  155. type: "dataplex.googleapis.com/Zone"
  156. }
  157. ];
  158. // Required. Specify the entity view to make a partial list request.
  159. EntityView view = 2 [(google.api.field_behavior) = REQUIRED];
  160. // Optional. Maximum number of entities to return. The service may return fewer than
  161. // this value. If unspecified, 100 entities will be returned by default. The
  162. // maximum value is 500; larger values will will be truncated to 500.
  163. int32 page_size = 3 [(google.api.field_behavior) = OPTIONAL];
  164. // Optional. Page token received from a previous `ListEntities` call. Provide
  165. // this to retrieve the subsequent page. When paginating, all other parameters
  166. // provided to `ListEntities` must match the call that provided the
  167. // page token.
  168. string page_token = 4 [(google.api.field_behavior) = OPTIONAL];
  169. // Optional. The following filter parameters can be added to the URL to limit the
  170. // entities returned by the API:
  171. //
  172. // - Entity ID: ?filter="id=entityID"
  173. // - Asset ID: ?filter="asset=assetID"
  174. // - Data path ?filter="data_path=gs://my-bucket"
  175. // - Is HIVE compatible: ?filter="hive_compatible=true"
  176. // - Is BigQuery compatible: ?filter="bigquery_compatible=true"
  177. string filter = 5 [(google.api.field_behavior) = OPTIONAL];
  178. }
  179. // List metadata entities response.
  180. message ListEntitiesResponse {
  181. // Entities in the specified parent zone.
  182. repeated Entity entities = 1;
  183. // Token to retrieve the next page of results, or empty if there are no
  184. // remaining results in the list.
  185. string next_page_token = 2;
  186. }
  187. // Get metadata entity request.
  188. message GetEntityRequest {
  189. // Entity views for get entity partial result.
  190. enum EntityView {
  191. // The API will default to the `BASIC` view.
  192. ENTITY_VIEW_UNSPECIFIED = 0;
  193. // Minimal view that does not include the schema.
  194. BASIC = 1;
  195. // Include basic information and schema.
  196. SCHEMA = 2;
  197. // Include everything. Currently, this is the same as the SCHEMA view.
  198. FULL = 4;
  199. }
  200. // Required. The resource name of the entity:
  201. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}.`
  202. string name = 1 [
  203. (google.api.field_behavior) = REQUIRED,
  204. (google.api.resource_reference) = {
  205. type: "dataplex.googleapis.com/Entity"
  206. }
  207. ];
  208. // Optional. Used to select the subset of entity information to return.
  209. // Defaults to `BASIC`.
  210. EntityView view = 2 [(google.api.field_behavior) = OPTIONAL];
  211. }
  212. // List metadata partitions request.
  213. message ListPartitionsRequest {
  214. // Required. The resource name of the parent entity:
  215. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
  216. string parent = 1 [
  217. (google.api.field_behavior) = REQUIRED,
  218. (google.api.resource_reference) = {
  219. type: "dataplex.googleapis.com/Entity"
  220. }
  221. ];
  222. // Optional. Maximum number of partitions to return. The service may return fewer than
  223. // this value. If unspecified, 100 partitions will be returned by default. The
  224. // maximum page size is 500; larger values will will be truncated to 500.
  225. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
  226. // Optional. Page token received from a previous `ListPartitions` call. Provide
  227. // this to retrieve the subsequent page. When paginating, all other parameters
  228. // provided to `ListPartitions` must match the call that provided the
  229. // page token.
  230. string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
  231. // Optional. Filter the partitions returned to the caller using a key value pair
  232. // expression. Supported operators and syntax:
  233. //
  234. // - logic operators: AND, OR
  235. // - comparison operators: <, >, >=, <= ,=, !=
  236. // - LIKE operators:
  237. // - The right hand of a LIKE operator supports "." and
  238. // "*" for wildcard searches, for example "value1 LIKE ".*oo.*"
  239. // - parenthetical grouping: ( )
  240. //
  241. // Sample filter expression: `?filter="key1 < value1 OR key2 > value2"
  242. //
  243. // **Notes:**
  244. //
  245. // - Keys to the left of operators are case insensitive.
  246. // - Partition results are sorted first by creation time, then by
  247. // lexicographic order.
  248. // - Up to 20 key value filter pairs are allowed, but due to performance
  249. // considerations, only the first 10 will be used as a filter.
  250. string filter = 4 [(google.api.field_behavior) = OPTIONAL];
  251. }
  252. // Create metadata partition request.
  253. message CreatePartitionRequest {
  254. // Required. The resource name of the parent zone:
  255. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
  256. string parent = 1 [
  257. (google.api.field_behavior) = REQUIRED,
  258. (google.api.resource_reference) = {
  259. type: "dataplex.googleapis.com/Entity"
  260. }
  261. ];
  262. // Required. Partition resource.
  263. Partition partition = 3 [(google.api.field_behavior) = REQUIRED];
  264. // Optional. Only validate the request, but do not perform mutations.
  265. // The default is false.
  266. bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
  267. }
  268. // Delete metadata partition request.
  269. message DeletePartitionRequest {
  270. // Required. The resource name of the partition.
  271. // format:
  272. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`.
  273. // The {partition_value_path} segment consists of an ordered sequence of
  274. // partition values separated by "/". All values must be provided.
  275. string name = 1 [
  276. (google.api.field_behavior) = REQUIRED,
  277. (google.api.resource_reference) = {
  278. type: "dataplex.googleapis.com/Partition"
  279. }
  280. ];
  281. // Optional. The etag associated with the partition.
  282. string etag = 2 [
  283. deprecated = true,
  284. (google.api.field_behavior) = OPTIONAL
  285. ];
  286. }
  287. // List metadata partitions response.
  288. message ListPartitionsResponse {
  289. // Partitions under the specified parent entity.
  290. repeated Partition partitions = 1;
  291. // Token to retrieve the next page of results, or empty if there are no
  292. // remaining results in the list.
  293. string next_page_token = 2;
  294. }
  295. // Get metadata partition request.
  296. message GetPartitionRequest {
  297. // Required. The resource name of the partition:
  298. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`.
  299. // The {partition_value_path} segment consists of an ordered sequence of
  300. // partition values separated by "/". All values must be provided.
  301. string name = 1 [
  302. (google.api.field_behavior) = REQUIRED,
  303. (google.api.resource_reference) = {
  304. type: "dataplex.googleapis.com/Partition"
  305. }
  306. ];
  307. }
  308. // Represents tables and fileset metadata contained within a zone.
  309. message Entity {
  310. option (google.api.resource) = {
  311. type: "dataplex.googleapis.com/Entity"
  312. pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}"
  313. };
  314. // The type of entity.
  315. enum Type {
  316. // Type unspecified.
  317. TYPE_UNSPECIFIED = 0;
  318. // Structured and semi-structured data.
  319. TABLE = 1;
  320. // Unstructured data.
  321. FILESET = 2;
  322. }
  323. // Provides compatibility information for various metadata stores.
  324. message CompatibilityStatus {
  325. // Provides compatibility information for a specific metadata store.
  326. message Compatibility {
  327. // Output only. Whether the entity is compatible and can be represented in the metadata
  328. // store.
  329. bool compatible = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  330. // Output only. Provides additional detail if the entity is incompatible with the
  331. // metadata store.
  332. string reason = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  333. }
  334. // Output only. Whether this entity is compatible with Hive Metastore.
  335. Compatibility hive_metastore = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  336. // Output only. Whether this entity is compatible with BigQuery.
  337. Compatibility bigquery = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  338. }
  339. // Output only. The resource name of the entity, of the form:
  340. // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{id}`.
  341. string name = 1 [
  342. (google.api.field_behavior) = OUTPUT_ONLY,
  343. (google.api.resource_reference) = {
  344. type: "dataplex.googleapis.com/Entity"
  345. }
  346. ];
  347. // Optional. Display name must be shorter than or equal to 256 characters.
  348. string display_name = 2 [(google.api.field_behavior) = OPTIONAL];
  349. // Optional. User friendly longer description text. Must be shorter than or equal to
  350. // 1024 characters.
  351. string description = 3 [(google.api.field_behavior) = OPTIONAL];
  352. // Output only. The time when the entity was created.
  353. google.protobuf.Timestamp create_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  354. // Output only. The time when the entity was last updated.
  355. google.protobuf.Timestamp update_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
  356. // Required. A user-provided entity ID. It is mutable, and will be used as the
  357. // published table name. Specifying a new ID in an update entity
  358. // request will override the existing value.
  359. // The ID must contain only letters (a-z, A-Z), numbers (0-9), and
  360. // underscores. Must begin with a letter and consist of 256 or fewer
  361. // characters.
  362. string id = 7 [(google.api.field_behavior) = REQUIRED];
  363. // Optional. The etag associated with the entity, which can be retrieved with a
  364. // [GetEntity][] request. Required for update and delete requests.
  365. string etag = 8 [(google.api.field_behavior) = OPTIONAL];
  366. // Required. Immutable. The type of entity.
  367. Type type = 10 [
  368. (google.api.field_behavior) = REQUIRED,
  369. (google.api.field_behavior) = IMMUTABLE
  370. ];
  371. // Required. Immutable. The ID of the asset associated with the storage location containing the
  372. // entity data. The entity must be with in the same zone with the asset.
  373. string asset = 11 [
  374. (google.api.field_behavior) = REQUIRED,
  375. (google.api.field_behavior) = IMMUTABLE
  376. ];
  377. // Required. Immutable. The storage path of the entity data.
  378. // For Cloud Storage data, this is the fully-qualified path to the entity,
  379. // such as `gs://bucket/path/to/data`. For BigQuery data, this is the name of
  380. // the table resource, such as
  381. // `projects/project_id/datasets/dataset_id/tables/table_id`.
  382. string data_path = 12 [
  383. (google.api.field_behavior) = REQUIRED,
  384. (google.api.field_behavior) = IMMUTABLE
  385. ];
  386. // Optional. The set of items within the data path constituting the data in the entity,
  387. // represented as a glob path.
  388. // Example: `gs://bucket/path/to/data/**/*.csv`.
  389. string data_path_pattern = 13 [(google.api.field_behavior) = OPTIONAL];
  390. // Output only. The name of the associated Data Catalog entry.
  391. string catalog_entry = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
  392. // Required. Immutable. Identifies the storage system of the entity data.
  393. StorageSystem system = 15 [
  394. (google.api.field_behavior) = REQUIRED,
  395. (google.api.field_behavior) = IMMUTABLE
  396. ];
  397. // Required. Identifies the storage format of the entity data.
  398. // It does not apply to entities with data stored in BigQuery.
  399. StorageFormat format = 16 [(google.api.field_behavior) = REQUIRED];
  400. // Output only. Metadata stores that the entity is compatible with.
  401. CompatibilityStatus compatibility = 19 [(google.api.field_behavior) = OUTPUT_ONLY];
  402. // Required. The description of the data structure and layout.
  403. // The schema is not included in list responses. It is only included in
  404. // `SCHEMA` and `FULL` entity views of a `GetEntity` response.
  405. Schema schema = 50 [(google.api.field_behavior) = REQUIRED];
  406. }
  407. // Represents partition metadata contained within entity instances.
  408. message Partition {
  409. option (google.api.resource) = {
  410. type: "dataplex.googleapis.com/Partition"
  411. pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}/partitions/{partition}"
  412. };
  413. // Output only. Partition values used in the HTTP URL must be
  414. // double encoded. For example, `url_encode(url_encode(value))` can be used
  415. // to encode "US:CA/CA#Sunnyvale so that the request URL ends
  416. // with "/partitions/US%253ACA/CA%2523Sunnyvale".
  417. // The name field in the response retains the encoded format.
  418. string name = 1 [
  419. (google.api.field_behavior) = OUTPUT_ONLY,
  420. (google.api.resource_reference) = {
  421. type: "dataplex.googleapis.com/Partition"
  422. }
  423. ];
  424. // Required. Immutable. The set of values representing the partition, which correspond to the
  425. // partition schema defined in the parent entity.
  426. repeated string values = 2 [
  427. (google.api.field_behavior) = REQUIRED,
  428. (google.api.field_behavior) = IMMUTABLE
  429. ];
  430. // Required. Immutable. The location of the entity data within the partition, for example,
  431. // `gs://bucket/path/to/entity/key1=value1/key2=value2`.
  432. // Or `projects/<project_id>/datasets/<dataset_id>/tables/<table_id>`
  433. string location = 3 [
  434. (google.api.field_behavior) = REQUIRED,
  435. (google.api.field_behavior) = IMMUTABLE
  436. ];
  437. // Optional. The etag for this partition.
  438. string etag = 4 [
  439. deprecated = true,
  440. (google.api.field_behavior) = OPTIONAL
  441. ];
  442. }
  443. // Schema information describing the structure and layout of the data.
  444. message Schema {
  445. // Type information for fields in schemas and partition schemas.
  446. enum Type {
  447. // SchemaType unspecified.
  448. TYPE_UNSPECIFIED = 0;
  449. // Boolean field.
  450. BOOLEAN = 1;
  451. // Single byte numeric field.
  452. BYTE = 2;
  453. // 16-bit numeric field.
  454. INT16 = 3;
  455. // 32-bit numeric field.
  456. INT32 = 4;
  457. // 64-bit numeric field.
  458. INT64 = 5;
  459. // Floating point numeric field.
  460. FLOAT = 6;
  461. // Double precision numeric field.
  462. DOUBLE = 7;
  463. // Real value numeric field.
  464. DECIMAL = 8;
  465. // Sequence of characters field.
  466. STRING = 9;
  467. // Sequence of bytes field.
  468. BINARY = 10;
  469. // Date and time field.
  470. TIMESTAMP = 11;
  471. // Date field.
  472. DATE = 12;
  473. // Time field.
  474. TIME = 13;
  475. // Structured field. Nested fields that define the structure of the map.
  476. // If all nested fields are nullable, this field represents a union.
  477. RECORD = 14;
  478. // Null field that does not have values.
  479. NULL = 100;
  480. }
  481. // Additional qualifiers to define field semantics.
  482. enum Mode {
  483. // Mode unspecified.
  484. MODE_UNSPECIFIED = 0;
  485. // The field has required semantics.
  486. REQUIRED = 1;
  487. // The field has optional semantics, and may be null.
  488. NULLABLE = 2;
  489. // The field has repeated (0 or more) semantics, and is a list of values.
  490. REPEATED = 3;
  491. }
  492. // Represents a column field within a table schema.
  493. message SchemaField {
  494. // Required. The name of the field. Must contain only letters, numbers and
  495. // underscores, with a maximum length of 767 characters,
  496. // and must begin with a letter or underscore.
  497. string name = 1 [(google.api.field_behavior) = REQUIRED];
  498. // Optional. User friendly field description. Must be less than or equal to 1024
  499. // characters.
  500. string description = 2 [(google.api.field_behavior) = OPTIONAL];
  501. // Required. The type of field.
  502. Type type = 3 [(google.api.field_behavior) = REQUIRED];
  503. // Required. Additional field semantics.
  504. Mode mode = 4 [(google.api.field_behavior) = REQUIRED];
  505. // Optional. Any nested field for complex types.
  506. repeated SchemaField fields = 10 [(google.api.field_behavior) = OPTIONAL];
  507. }
  508. // Represents a key field within the entity's partition structure. You could
  509. // have up to 20 partition fields, but only the first 10 partitions have the
  510. // filtering ability due to performance consideration. **Note:**
  511. // Partition fields are immutable.
  512. message PartitionField {
  513. // Required. Partition field name must consist of letters, numbers, and underscores
  514. // only, with a maximum of length of 256 characters,
  515. // and must begin with a letter or underscore..
  516. string name = 1 [(google.api.field_behavior) = REQUIRED];
  517. // Required. Immutable. The type of field.
  518. Type type = 2 [
  519. (google.api.field_behavior) = REQUIRED,
  520. (google.api.field_behavior) = IMMUTABLE
  521. ];
  522. }
  523. // The structure of paths within the entity, which represent partitions.
  524. enum PartitionStyle {
  525. // PartitionStyle unspecified
  526. PARTITION_STYLE_UNSPECIFIED = 0;
  527. // Partitions are hive-compatible.
  528. // Examples: `gs://bucket/path/to/table/dt=2019-10-31/lang=en`,
  529. // `gs://bucket/path/to/table/dt=2019-10-31/lang=en/late`.
  530. HIVE_COMPATIBLE = 1;
  531. }
  532. // Required. Set to `true` if user-managed or `false` if managed by Dataplex. The
  533. // default is `false` (managed by Dataplex).
  534. //
  535. // - Set to `false`to enable Dataplex discovery to update the schema.
  536. // including new data discovery, schema inference, and schema evolution.
  537. // Users retain the ability to input and edit the schema. Dataplex
  538. // treats schema input by the user as though produced
  539. // by a previous Dataplex discovery operation, and it will
  540. // evolve the schema and take action based on that treatment.
  541. //
  542. // - Set to `true` to fully manage the entity
  543. // schema. This setting guarantees that Dataplex will not
  544. // change schema fields.
  545. bool user_managed = 1 [(google.api.field_behavior) = REQUIRED];
  546. // Optional. The sequence of fields describing data in table entities.
  547. // **Note:** BigQuery SchemaFields are immutable.
  548. repeated SchemaField fields = 2 [(google.api.field_behavior) = OPTIONAL];
  549. // Optional. The sequence of fields describing the partition structure in entities.
  550. // If this field is empty, there are no partitions within the data.
  551. repeated PartitionField partition_fields = 3 [(google.api.field_behavior) = OPTIONAL];
  552. // Optional. The structure of paths containing partition data within the entity.
  553. PartitionStyle partition_style = 4 [(google.api.field_behavior) = OPTIONAL];
  554. }
  555. // Describes the format of the data within its storage location.
  556. message StorageFormat {
  557. // Describes CSV and similar semi-structured data formats.
  558. message CsvOptions {
  559. // Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8", and
  560. // "ISO-8859-1". Defaults to UTF-8 if unspecified.
  561. string encoding = 1 [(google.api.field_behavior) = OPTIONAL];
  562. // Optional. The number of rows to interpret as header rows that should be skipped
  563. // when reading data rows. Defaults to 0.
  564. int32 header_rows = 2 [(google.api.field_behavior) = OPTIONAL];
  565. // Optional. The delimiter used to separate values. Defaults to ','.
  566. string delimiter = 3 [(google.api.field_behavior) = OPTIONAL];
  567. // Optional. The character used to quote column values. Accepts '"'
  568. // (double quotation mark) or ''' (single quotation mark). Defaults to
  569. // '"' (double quotation mark) if unspecified.
  570. string quote = 4 [(google.api.field_behavior) = OPTIONAL];
  571. }
  572. // Describes JSON data format.
  573. message JsonOptions {
  574. // Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8" and
  575. // "ISO-8859-1". Defaults to UTF-8 if not specified.
  576. string encoding = 1 [(google.api.field_behavior) = OPTIONAL];
  577. }
  578. // The specific file format of the data.
  579. enum Format {
  580. // Format unspecified.
  581. FORMAT_UNSPECIFIED = 0;
  582. // Parquet-formatted structured data.
  583. PARQUET = 1;
  584. // Avro-formatted structured data.
  585. AVRO = 2;
  586. // Orc-formatted structured data.
  587. ORC = 3;
  588. // Csv-formatted semi-structured data.
  589. CSV = 100;
  590. // Json-formatted semi-structured data.
  591. JSON = 101;
  592. // Image data formats (such as jpg and png).
  593. IMAGE = 200;
  594. // Audio data formats (such as mp3, and wav).
  595. AUDIO = 201;
  596. // Video data formats (such as mp4 and mpg).
  597. VIDEO = 202;
  598. // Textual data formats (such as txt and xml).
  599. TEXT = 203;
  600. // TensorFlow record format.
  601. TFRECORD = 204;
  602. // Data that doesn't match a specific format.
  603. OTHER = 1000;
  604. // Data of an unknown format.
  605. UNKNOWN = 1001;
  606. }
  607. // The specific compressed file format of the data.
  608. enum CompressionFormat {
  609. // CompressionFormat unspecified. Implies uncompressed data.
  610. COMPRESSION_FORMAT_UNSPECIFIED = 0;
  611. // GZip compressed set of files.
  612. GZIP = 2;
  613. // BZip2 compressed set of files.
  614. BZIP2 = 3;
  615. }
  616. // Output only. The data format associated with the stored data, which represents
  617. // content type values. The value is inferred from mime type.
  618. Format format = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  619. // Optional. The compression type associated with the stored data.
  620. // If unspecified, the data is uncompressed.
  621. CompressionFormat compression_format = 2 [(google.api.field_behavior) = OPTIONAL];
  622. // Required. The mime type descriptor for the data. Must match the pattern
  623. // {type}/{subtype}. Supported values:
  624. //
  625. // - application/x-parquet
  626. // - application/x-avro
  627. // - application/x-orc
  628. // - application/x-tfrecord
  629. // - application/json
  630. // - application/{subtypes}
  631. // - text/csv
  632. // - text/<subtypes>
  633. // - image/{image subtype}
  634. // - video/{video subtype}
  635. // - audio/{audio subtype}
  636. string mime_type = 3 [(google.api.field_behavior) = REQUIRED];
  637. // Additional format-specific options.
  638. oneof options {
  639. // Optional. Additional information about CSV formatted data.
  640. CsvOptions csv = 10 [(google.api.field_behavior) = OPTIONAL];
  641. // Optional. Additional information about CSV formatted data.
  642. JsonOptions json = 11 [(google.api.field_behavior) = OPTIONAL];
  643. }
  644. }
  645. // Identifies the cloud system that manages the data storage.
  646. enum StorageSystem {
  647. // Storage system unspecified.
  648. STORAGE_SYSTEM_UNSPECIFIED = 0;
  649. // The entity data is contained within a Cloud Storage bucket.
  650. CLOUD_STORAGE = 1;
  651. // The entity data is contained within a BigQuery dataset.
  652. BIGQUERY = 2;
  653. }