123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.dataproc.v1;
- import "google/api/field_behavior.proto";
- option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
- option java_multiple_files = true;
- option java_outer_classname = "SharedProto";
- option java_package = "com.google.cloud.dataproc.v1";
- // Runtime configuration for a workload.
- message RuntimeConfig {
- // Optional. Version of the batch runtime.
- string version = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Optional custom container image for the job runtime environment. If
- // not specified, a default container image will be used.
- string container_image = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A mapping of property names to values, which are used to configure workload
- // execution.
- map<string, string> properties = 3 [(google.api.field_behavior) = OPTIONAL];
- }
- // Environment configuration for a workload.
- message EnvironmentConfig {
- // Optional. Execution configuration for a workload.
- ExecutionConfig execution_config = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Peripherals configuration that workload has access to.
- PeripheralsConfig peripherals_config = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // Execution configuration for a workload.
- message ExecutionConfig {
- // Optional. Service account that used to execute workload.
- string service_account = 2 [(google.api.field_behavior) = OPTIONAL];
- // Network configuration for workload execution.
- oneof network {
- // Optional. Network URI to connect workload to.
- string network_uri = 4 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Subnetwork URI to connect workload to.
- string subnetwork_uri = 5 [(google.api.field_behavior) = OPTIONAL];
- }
- // Optional. Tags used for network traffic control.
- repeated string network_tags = 6 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The Cloud KMS key to use for encryption.
- string kms_key = 7 [(google.api.field_behavior) = OPTIONAL];
- }
- // Spark History Server configuration for the workload.
- message SparkHistoryServerConfig {
- // Optional. Resource name of an existing Dataproc Cluster to act as a Spark History
- // Server for the workload.
- //
- // Example:
- //
- // * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
- string dataproc_cluster = 1 [
- (google.api.field_behavior) = OPTIONAL
- ];
- }
- // Auxiliary services configuration for a workload.
- message PeripheralsConfig {
- // Optional. Resource name of an existing Dataproc Metastore service.
- //
- // Example:
- //
- // * `projects/[project_id]/locations/[region]/services/[service_id]`
- string metastore_service = 1 [
- (google.api.field_behavior) = OPTIONAL
- ];
- // Optional. The Spark History Server configuration for the workload.
- SparkHistoryServerConfig spark_history_server_config = 2 [(google.api.field_behavior) = OPTIONAL];
- }
- // Runtime information about workload execution.
- message RuntimeInfo {
- // Output only. Map of remote access endpoints (such as web interfaces and APIs) to their
- // URIs.
- map<string, string> endpoints = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. A URI pointing to the location of the stdout and stderr of the workload.
- string output_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. A URI pointing to the location of the diagnostics tarball.
- string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
- }
- // The cluster's GKE config.
- message GkeClusterConfig {
- // Optional. A target GKE cluster to deploy to. It must be in the same project and
- // region as the Dataproc cluster (the GKE cluster can be zonal or regional).
- // Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
- string gke_cluster_target = 2 [
- (google.api.field_behavior) = OPTIONAL
- ];
- // Optional. GKE NodePools where workloads will be scheduled. At least one node pool
- // must be assigned the 'default' role. Each role can be given to only a
- // single NodePoolTarget. All NodePools must have the same location settings.
- // If a nodePoolTarget is not specified, Dataproc constructs a default
- // nodePoolTarget.
- repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL];
- }
- // The configuration for running the Dataproc cluster on Kubernetes.
- message KubernetesClusterConfig {
- // Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace
- // does not exist, it is created. If it exists, Dataproc
- // verifies that another Dataproc VirtualCluster is not installed
- // into it. If not specified, the name of the Dataproc Cluster is used.
- string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL];
- oneof config {
- // Required. The configuration for running the Dataproc cluster on GKE.
- GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED];
- }
- // Optional. The software configuration for this Dataproc cluster running on Kubernetes.
- KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL];
- }
- // The software configuration for this Dataproc cluster running on Kubernetes.
- message KubernetesSoftwareConfig {
- // The components that should be installed in this Dataproc cluster. The key
- // must be a string from the KubernetesComponent enumeration. The value is
- // the version of the software to be installed.
- // At least one entry must be specified.
- map<string, string> component_version = 1;
- // The properties to set on daemon config files.
- //
- // Property keys are specified in `prefix:property` format, for example
- // `spark:spark.kubernetes.container.image`. The following are supported
- // prefixes and their mappings:
- //
- // * spark: `spark-defaults.conf`
- //
- // For more information, see [Cluster
- // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
- map<string, string> properties = 2;
- }
- // GKE NodePools that Dataproc workloads run on.
- message GkeNodePoolTarget {
- // `Role` specifies whose tasks will run on the NodePool. The roles can be
- // specific to workloads. Exactly one GkeNodePoolTarget within the
- // VirtualCluster must have 'default' role, which is used to run all workloads
- // that are not associated with a NodePool.
- enum Role {
- // Role is unspecified.
- ROLE_UNSPECIFIED = 0;
- // Any roles that are not directly assigned to a NodePool run on the
- // `default` role's NodePool.
- DEFAULT = 1;
- // Run controllers and webhooks.
- CONTROLLER = 2;
- // Run spark driver.
- SPARK_DRIVER = 3;
- // Run spark executors.
- SPARK_EXECUTOR = 4;
- }
- // Required. The target GKE NodePool.
- // Format:
- // 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
- string node_pool = 1 [
- (google.api.field_behavior) = REQUIRED
- ];
- // Required. The types of role for a GKE NodePool
- repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
- // Optional. The configuration for the GKE NodePool.
- //
- // If specified, Dataproc attempts to create a NodePool with the
- // specified shape. If one with the same name already exists, it is
- // verified against all specified fields. If a field differs, the
- // virtual cluster creation will fail.
- //
- // If omitted, any NodePool with the specified name is used. If a
- // NodePool with the specified name does not exist, Dataproc create a NodePool
- // with default values.
- GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL];
- }
- // The configuration of a GKE NodePool used by a [Dataproc-on-GKE
- // cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster).
- message GkeNodePoolConfig {
- // Parameters that describe cluster nodes.
- message GkeNodeConfig {
- // Optional. The name of a Compute Engine [machine
- // type](https://cloud.google.com/compute/docs/machine-types).
- string machine_type = 1 [(google.api.field_behavior) = OPTIONAL];
- // Optional. Whether the nodes are created as [preemptible VM
- // instances](https://cloud.google.com/compute/docs/instances/preemptible).
- bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The number of local SSD disks to attach to the node, which is limited by
- // the maximum number of disks allowable per zone (see [Adding Local
- // SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
- int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL];
- // Optional. A list of [hardware
- // accelerators](https://cloud.google.com/compute/docs/gpus) to attach to
- // each node.
- repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL];
- // Optional. [Minimum CPU
- // platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
- // to be used by this instance. The instance may be scheduled on the
- // specified or a newer CPU platform. Specify the friendly names of CPU
- // platforms, such as "Intel Haswell"` or Intel Sandy Bridge".
- string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL];
- }
- // A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request
- // for a NodePool.
- message GkeNodePoolAcceleratorConfig {
- // The number of accelerator cards exposed to an instance.
- int64 accelerator_count = 1;
- // The accelerator type resource namename (see GPUs on Compute Engine).
- string accelerator_type = 2;
- }
- // GkeNodePoolAutoscaling contains information the cluster autoscaler needs to
- // adjust the size of the node pool to the current cluster usage.
- message GkeNodePoolAutoscalingConfig {
- // The minimum number of nodes in the NodePool. Must be >= 0 and <=
- // max_node_count.
- int32 min_node_count = 2;
- // The maximum number of nodes in the NodePool. Must be >= min_node_count.
- // **Note:** Quota must be sufficient to scale up the cluster.
- int32 max_node_count = 3;
- }
- // Optional. The node pool configuration.
- GkeNodeConfig config = 2 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The list of Compute Engine
- // [zones](https://cloud.google.com/compute/docs/zones#available) where
- // NodePool's nodes will be located.
- //
- // **Note:** Currently, only one zone may be specified.
- //
- // If a location is not specified during NodePool creation, Dataproc will
- // choose a location.
- repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL];
- // Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled
- // only when a valid configuration is present.
- GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL];
- }
- // Cluster components that can be activated.
- enum Component {
- // Unspecified component. Specifying this will cause Cluster creation to fail.
- COMPONENT_UNSPECIFIED = 0;
- // The Anaconda python distribution. The Anaconda component is not supported
- // in the Dataproc
- // <a
- // href="/dataproc/docs/concepts/versioning/dataproc-release-2.0">2.0
- // image</a>. The 2.0 image is pre-installed with Miniconda.
- ANACONDA = 5;
- // Docker
- DOCKER = 13;
- // The Druid query engine. (alpha)
- DRUID = 9;
- // Flink
- FLINK = 14;
- // HBase. (beta)
- HBASE = 11;
- // The Hive Web HCatalog (the REST service for accessing HCatalog).
- HIVE_WEBHCAT = 3;
- // The Jupyter Notebook.
- JUPYTER = 1;
- // The Presto query engine.
- PRESTO = 6;
- // The Ranger service.
- RANGER = 12;
- // The Solr service.
- SOLR = 10;
- // The Zeppelin notebook.
- ZEPPELIN = 4;
- // The Zookeeper service.
- ZOOKEEPER = 8;
- }
- // Actions in response to failure of a resource associated with a cluster.
- enum FailureAction {
- // When FailureAction is unspecified, failure action defaults to NO_ACTION.
- FAILURE_ACTION_UNSPECIFIED = 0;
- // Take no action on failure to create a cluster resource. NO_ACTION is the
- // default.
- NO_ACTION = 1;
- // Delete the failed cluster resource.
- DELETE = 2;
- }
|