shared.proto 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.dataproc.v1;
  16. import "google/api/field_behavior.proto";
  17. option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
  18. option java_multiple_files = true;
  19. option java_outer_classname = "SharedProto";
  20. option java_package = "com.google.cloud.dataproc.v1";
  21. // Runtime configuration for a workload.
  22. message RuntimeConfig {
  23. // Optional. Version of the batch runtime.
  24. string version = 1 [(google.api.field_behavior) = OPTIONAL];
  25. // Optional. Optional custom container image for the job runtime environment. If
  26. // not specified, a default container image will be used.
  27. string container_image = 2 [(google.api.field_behavior) = OPTIONAL];
  28. // Optional. A mapping of property names to values, which are used to configure workload
  29. // execution.
  30. map<string, string> properties = 3 [(google.api.field_behavior) = OPTIONAL];
  31. }
  32. // Environment configuration for a workload.
  33. message EnvironmentConfig {
  34. // Optional. Execution configuration for a workload.
  35. ExecutionConfig execution_config = 1 [(google.api.field_behavior) = OPTIONAL];
  36. // Optional. Peripherals configuration that workload has access to.
  37. PeripheralsConfig peripherals_config = 2 [(google.api.field_behavior) = OPTIONAL];
  38. }
  39. // Execution configuration for a workload.
  40. message ExecutionConfig {
  41. // Optional. Service account that used to execute workload.
  42. string service_account = 2 [(google.api.field_behavior) = OPTIONAL];
  43. // Network configuration for workload execution.
  44. oneof network {
  45. // Optional. Network URI to connect workload to.
  46. string network_uri = 4 [(google.api.field_behavior) = OPTIONAL];
  47. // Optional. Subnetwork URI to connect workload to.
  48. string subnetwork_uri = 5 [(google.api.field_behavior) = OPTIONAL];
  49. }
  50. // Optional. Tags used for network traffic control.
  51. repeated string network_tags = 6 [(google.api.field_behavior) = OPTIONAL];
  52. // Optional. The Cloud KMS key to use for encryption.
  53. string kms_key = 7 [(google.api.field_behavior) = OPTIONAL];
  54. }
  55. // Spark History Server configuration for the workload.
  56. message SparkHistoryServerConfig {
  57. // Optional. Resource name of an existing Dataproc Cluster to act as a Spark History
  58. // Server for the workload.
  59. //
  60. // Example:
  61. //
  62. // * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
  63. string dataproc_cluster = 1 [
  64. (google.api.field_behavior) = OPTIONAL
  65. ];
  66. }
  67. // Auxiliary services configuration for a workload.
  68. message PeripheralsConfig {
  69. // Optional. Resource name of an existing Dataproc Metastore service.
  70. //
  71. // Example:
  72. //
  73. // * `projects/[project_id]/locations/[region]/services/[service_id]`
  74. string metastore_service = 1 [
  75. (google.api.field_behavior) = OPTIONAL
  76. ];
  77. // Optional. The Spark History Server configuration for the workload.
  78. SparkHistoryServerConfig spark_history_server_config = 2 [(google.api.field_behavior) = OPTIONAL];
  79. }
  80. // Runtime information about workload execution.
  81. message RuntimeInfo {
  82. // Output only. Map of remote access endpoints (such as web interfaces and APIs) to their
  83. // URIs.
  84. map<string, string> endpoints = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  85. // Output only. A URI pointing to the location of the stdout and stderr of the workload.
  86. string output_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  87. // Output only. A URI pointing to the location of the diagnostics tarball.
  88. string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  89. }
  90. // The cluster's GKE config.
  91. message GkeClusterConfig {
  92. // Optional. A target GKE cluster to deploy to. It must be in the same project and
  93. // region as the Dataproc cluster (the GKE cluster can be zonal or regional).
  94. // Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
  95. string gke_cluster_target = 2 [
  96. (google.api.field_behavior) = OPTIONAL
  97. ];
  98. // Optional. GKE NodePools where workloads will be scheduled. At least one node pool
  99. // must be assigned the 'default' role. Each role can be given to only a
  100. // single NodePoolTarget. All NodePools must have the same location settings.
  101. // If a nodePoolTarget is not specified, Dataproc constructs a default
  102. // nodePoolTarget.
  103. repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL];
  104. }
  105. // The configuration for running the Dataproc cluster on Kubernetes.
  106. message KubernetesClusterConfig {
  107. // Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace
  108. // does not exist, it is created. If it exists, Dataproc
  109. // verifies that another Dataproc VirtualCluster is not installed
  110. // into it. If not specified, the name of the Dataproc Cluster is used.
  111. string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL];
  112. oneof config {
  113. // Required. The configuration for running the Dataproc cluster on GKE.
  114. GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED];
  115. }
  116. // Optional. The software configuration for this Dataproc cluster running on Kubernetes.
  117. KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL];
  118. }
  119. // The software configuration for this Dataproc cluster running on Kubernetes.
  120. message KubernetesSoftwareConfig {
  121. // The components that should be installed in this Dataproc cluster. The key
  122. // must be a string from the KubernetesComponent enumeration. The value is
  123. // the version of the software to be installed.
  124. // At least one entry must be specified.
  125. map<string, string> component_version = 1;
  126. // The properties to set on daemon config files.
  127. //
  128. // Property keys are specified in `prefix:property` format, for example
  129. // `spark:spark.kubernetes.container.image`. The following are supported
  130. // prefixes and their mappings:
  131. //
  132. // * spark: `spark-defaults.conf`
  133. //
  134. // For more information, see [Cluster
  135. // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
  136. map<string, string> properties = 2;
  137. }
  138. // GKE NodePools that Dataproc workloads run on.
  139. message GkeNodePoolTarget {
  140. // `Role` specifies whose tasks will run on the NodePool. The roles can be
  141. // specific to workloads. Exactly one GkeNodePoolTarget within the
  142. // VirtualCluster must have 'default' role, which is used to run all workloads
  143. // that are not associated with a NodePool.
  144. enum Role {
  145. // Role is unspecified.
  146. ROLE_UNSPECIFIED = 0;
  147. // Any roles that are not directly assigned to a NodePool run on the
  148. // `default` role's NodePool.
  149. DEFAULT = 1;
  150. // Run controllers and webhooks.
  151. CONTROLLER = 2;
  152. // Run spark driver.
  153. SPARK_DRIVER = 3;
  154. // Run spark executors.
  155. SPARK_EXECUTOR = 4;
  156. }
  157. // Required. The target GKE NodePool.
  158. // Format:
  159. // 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
  160. string node_pool = 1 [
  161. (google.api.field_behavior) = REQUIRED
  162. ];
  163. // Required. The types of role for a GKE NodePool
  164. repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
  165. // Optional. The configuration for the GKE NodePool.
  166. //
  167. // If specified, Dataproc attempts to create a NodePool with the
  168. // specified shape. If one with the same name already exists, it is
  169. // verified against all specified fields. If a field differs, the
  170. // virtual cluster creation will fail.
  171. //
  172. // If omitted, any NodePool with the specified name is used. If a
  173. // NodePool with the specified name does not exist, Dataproc create a NodePool
  174. // with default values.
  175. GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL];
  176. }
  177. // The configuration of a GKE NodePool used by a [Dataproc-on-GKE
  178. // cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster).
  179. message GkeNodePoolConfig {
  180. // Parameters that describe cluster nodes.
  181. message GkeNodeConfig {
  182. // Optional. The name of a Compute Engine [machine
  183. // type](https://cloud.google.com/compute/docs/machine-types).
  184. string machine_type = 1 [(google.api.field_behavior) = OPTIONAL];
  185. // Optional. Whether the nodes are created as [preemptible VM
  186. // instances](https://cloud.google.com/compute/docs/instances/preemptible).
  187. bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
  188. // Optional. The number of local SSD disks to attach to the node, which is limited by
  189. // the maximum number of disks allowable per zone (see [Adding Local
  190. // SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
  191. int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL];
  192. // Optional. A list of [hardware
  193. // accelerators](https://cloud.google.com/compute/docs/gpus) to attach to
  194. // each node.
  195. repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL];
  196. // Optional. [Minimum CPU
  197. // platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
  198. // to be used by this instance. The instance may be scheduled on the
  199. // specified or a newer CPU platform. Specify the friendly names of CPU
  200. // platforms, such as "Intel Haswell"` or Intel Sandy Bridge".
  201. string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL];
  202. }
  203. // A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request
  204. // for a NodePool.
  205. message GkeNodePoolAcceleratorConfig {
  206. // The number of accelerator cards exposed to an instance.
  207. int64 accelerator_count = 1;
  208. // The accelerator type resource namename (see GPUs on Compute Engine).
  209. string accelerator_type = 2;
  210. }
  211. // GkeNodePoolAutoscaling contains information the cluster autoscaler needs to
  212. // adjust the size of the node pool to the current cluster usage.
  213. message GkeNodePoolAutoscalingConfig {
  214. // The minimum number of nodes in the NodePool. Must be >= 0 and <=
  215. // max_node_count.
  216. int32 min_node_count = 2;
  217. // The maximum number of nodes in the NodePool. Must be >= min_node_count.
  218. // **Note:** Quota must be sufficient to scale up the cluster.
  219. int32 max_node_count = 3;
  220. }
  221. // Optional. The node pool configuration.
  222. GkeNodeConfig config = 2 [(google.api.field_behavior) = OPTIONAL];
  223. // Optional. The list of Compute Engine
  224. // [zones](https://cloud.google.com/compute/docs/zones#available) where
  225. // NodePool's nodes will be located.
  226. //
  227. // **Note:** Currently, only one zone may be specified.
  228. //
  229. // If a location is not specified during NodePool creation, Dataproc will
  230. // choose a location.
  231. repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL];
  232. // Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled
  233. // only when a valid configuration is present.
  234. GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL];
  235. }
  236. // Cluster components that can be activated.
  237. enum Component {
  238. // Unspecified component. Specifying this will cause Cluster creation to fail.
  239. COMPONENT_UNSPECIFIED = 0;
  240. // The Anaconda python distribution. The Anaconda component is not supported
  241. // in the Dataproc
  242. // <a
  243. // href="/dataproc/docs/concepts/versioning/dataproc-release-2.0">2.0
  244. // image</a>. The 2.0 image is pre-installed with Miniconda.
  245. ANACONDA = 5;
  246. // Docker
  247. DOCKER = 13;
  248. // The Druid query engine. (alpha)
  249. DRUID = 9;
  250. // Flink
  251. FLINK = 14;
  252. // HBase. (beta)
  253. HBASE = 11;
  254. // The Hive Web HCatalog (the REST service for accessing HCatalog).
  255. HIVE_WEBHCAT = 3;
  256. // The Jupyter Notebook.
  257. JUPYTER = 1;
  258. // The Presto query engine.
  259. PRESTO = 6;
  260. // The Ranger service.
  261. RANGER = 12;
  262. // The Solr service.
  263. SOLR = 10;
  264. // The Zeppelin notebook.
  265. ZEPPELIN = 4;
  266. // The Zookeeper service.
  267. ZOOKEEPER = 8;
  268. }
  269. // Actions in response to failure of a resource associated with a cluster.
  270. enum FailureAction {
  271. // When FailureAction is unspecified, failure action defaults to NO_ACTION.
  272. FAILURE_ACTION_UNSPECIFIED = 0;
  273. // Take no action on failure to create a cluster resource. NO_ACTION is the
  274. // default.
  275. NO_ACTION = 1;
  276. // Delete the failed cluster resource.
  277. DELETE = 2;
  278. }