123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632 |
- // Copyright 2016 Google Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.genomics.v1alpha2;
- import "google/api/annotations.proto";
- import "google/longrunning/operations.proto";
- import "google/protobuf/duration.proto";
- import "google/protobuf/empty.proto";
- import "google/protobuf/timestamp.proto";
- import "google/rpc/code.proto";
- option cc_enable_arenas = true;
- option go_package = "google.golang.org/genproto/googleapis/genomics/v1alpha2;genomics";
- option java_multiple_files = true;
- option java_outer_classname = "PipelinesProto";
- option java_package = "com.google.genomics.v1a";
- // A service for running genomics pipelines.
- service PipelinesV1Alpha2 {
- // Creates a pipeline that can be run later. Create takes a Pipeline that
- // has all fields other than `pipelineId` populated, and then returns
- // the same pipeline with `pipelineId` populated. This id can be used
- // to run the pipeline.
- //
- // Caller must have WRITE permission to the project.
- rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) {
- option (google.api.http) = {
- post: "/v1alpha2/pipelines"
- body: "pipeline"
- };
- }
- // Runs a pipeline. If `pipelineId` is specified in the request, then
- // run a saved pipeline. If `ephemeralPipeline` is specified, then run
- // that pipeline once without saving a copy.
- //
- // The caller must have READ permission to the project where the pipeline
- // is stored and WRITE permission to the project where the pipeline will be
- // run, as VMs will be created and storage will be used.
- rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) {
- option (google.api.http) = {
- post: "/v1alpha2/pipelines:run"
- body: "*"
- };
- }
- // Retrieves a pipeline based on ID.
- //
- // Caller must have READ permission to the project.
- rpc GetPipeline(GetPipelineRequest) returns (Pipeline) {
- option (google.api.http) = {
- get: "/v1alpha2/pipelines/{pipeline_id}"
- };
- }
- // Lists pipelines.
- //
- // Caller must have READ permission to the project.
- rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) {
- option (google.api.http) = {
- get: "/v1alpha2/pipelines"
- };
- }
- // Deletes a pipeline based on ID.
- //
- // Caller must have WRITE permission to the project.
- rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {
- option (google.api.http) = {
- delete: "/v1alpha2/pipelines/{pipeline_id}"
- };
- }
- // Gets controller configuration information. Should only be called
- // by VMs created by the Pipelines Service and not by end users.
- rpc GetControllerConfig(GetControllerConfigRequest)
- returns (ControllerConfig) {
- option (google.api.http) = {
- get: "/v1alpha2/pipelines:getControllerConfig"
- };
- }
- // Sets status of a given operation. Any new timestamps (as determined by
- // description) are appended to TimestampEvents. Should only be called by VMs
- // created by the Pipelines Service and not by end users.
- rpc SetOperationStatus(SetOperationStatusRequest)
- returns (google.protobuf.Empty) {
- option (google.api.http) = {
- put: "/v1alpha2/pipelines:setOperationStatus"
- body: "*"
- };
- }
- }
- // Describes a Compute Engine resource that is being managed by a running
- // [pipeline][google.genomics.v1alpha2.Pipeline].
- message ComputeEngine {
- // The instance on which the operation is running.
- string instance_name = 1;
- // The availability zone in which the instance resides.
- string zone = 2;
- // The machine type of the instance.
- string machine_type = 3;
- // The names of the disks that were created for this pipeline.
- repeated string disk_names = 4;
- }
- // Runtime metadata that will be populated in the
- // [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata]
- // field of the Operation associated with a RunPipeline execution.
- message RuntimeMetadata {
- // Execution information specific to Google Compute Engine.
- ComputeEngine compute_engine = 1;
- }
- // The pipeline object. Represents a transformation from a set of input
- // parameters to a set of output parameters. The transformation is defined
- // as a docker image and command to run within that image. Each pipeline
- // is run on a Google Compute Engine VM. A pipeline can be created with the
- // `create` method and then later run with the `run` method, or a pipeline can
- // be defined and run all at once with the `run` method.
- message Pipeline {
- // Required. The project in which to create the pipeline. The caller must have
- // WRITE access.
- string project_id = 1;
- // Required. A user specified pipeline name that does not have to be unique.
- // This name can be used for filtering Pipelines in ListPipelines.
- string name = 2;
- // User-specified description.
- string description = 3;
- // Input parameters of the pipeline.
- repeated PipelineParameter input_parameters = 8;
- // Output parameters of the pipeline.
- repeated PipelineParameter output_parameters = 9;
- // Required. The executor indicates in which environment the pipeline runs.
- oneof executor {
- // Specifies the docker run information.
- DockerExecutor docker = 5;
- }
- // Required. Specifies resource requirements for the pipeline run.
- // Required fields:
- //
- // *
- // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores]
- //
- // *
- // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
- PipelineResources resources = 6;
- // Unique pipeline id that is generated by the service when CreatePipeline
- // is called. Cannot be specified in the Pipeline used in the
- // CreatePipelineRequest, and will be populated in the response to
- // CreatePipeline and all subsequent Get and List calls. Indicates that the
- // service has registered this pipeline.
- string pipeline_id = 7;
- }
- // The request to create a pipeline. The pipeline field here should not have
- // `pipelineId` populated, as that will be populated by the server.
- message CreatePipelineRequest {
- // The pipeline to create. Should not have `pipelineId` populated.
- Pipeline pipeline = 1;
- }
- // The pipeline run arguments.
- message RunPipelineArgs {
- // Required. The project in which to run the pipeline. The caller must have
- // WRITER access to all Google Cloud services and resources (e.g. Google
- // Compute Engine) will be used.
- string project_id = 1;
- // Pipeline input arguments; keys are defined in the pipeline documentation.
- // All input parameters that do not have default values must be specified.
- // If parameters with defaults are specified here, the defaults will be
- // overridden.
- map<string, string> inputs = 2;
- // Pipeline output arguments; keys are defined in the pipeline
- // documentation. All output parameters of without default values
- // must be specified. If parameters with defaults are specified
- // here, the defaults will be overridden.
- map<string, string> outputs = 3;
- // The Google Cloud Service Account that will be used to access data and
- // services. By default, the compute service account associated with
- // `projectId` is used.
- ServiceAccount service_account = 4;
- // This field is deprecated. Use `labels` instead. Client-specified pipeline
- // operation identifier.
- string client_id = 5;
- // Specifies resource requirements/overrides for the pipeline run.
- PipelineResources resources = 6;
- // Required. Logging options. Used by the service to communicate results
- // to the user.
- LoggingOptions logging = 7;
- // How long to keep the VM up after a failure (for example docker command
- // failed, copying input or output files failed, etc). While the VM is up, one
- // can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day.
- google.protobuf.Duration keep_vm_alive_on_failure_duration = 8;
- // Labels to apply to this pipeline run. Labels will also be applied to
- // compute resources (VM, disks) created by this pipeline run. When listing
- // operations, operations can [filtered by labels]
- // [google.longrunning.ListOperationsRequest.filter].
- // Label keys may not be empty; label values may be empty. Non-empty labels
- // must be 1-63 characters long, and comply with [RFC1035]
- // (https://www.ietf.org/rfc/rfc1035.txt).
- // Specifically, the name must be 1-63 characters long and match the regular
- // expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first
- // character must be a lowercase letter, and all following characters must be
- // a dash, lowercase letter, or digit, except the last character, which cannot
- // be a dash.
- map<string, string> labels = 9;
- }
- // The request to run a pipeline. If `pipelineId` is specified, it
- // refers to a saved pipeline created with CreatePipeline and set as
- // the `pipelineId` of the returned Pipeline object. If
- // `ephemeralPipeline` is specified, that pipeline is run once
- // with the given args and not saved. It is an error to specify both
- // `pipelineId` and `ephemeralPipeline`. `pipelineArgs`
- // must be specified.
- message RunPipelineRequest {
- oneof pipeline {
- // The already created pipeline to run.
- string pipeline_id = 1;
- // A new pipeline object to run once and then delete.
- Pipeline ephemeral_pipeline = 2;
- }
- // The arguments to use when running this pipeline.
- RunPipelineArgs pipeline_args = 3;
- }
- // A request to get a saved pipeline by id.
- message GetPipelineRequest {
- // Caller must have READ access to the project in which this pipeline
- // is defined.
- string pipeline_id = 1;
- }
- // A request to list pipelines in a given project. Pipelines can be
- // filtered by name using `namePrefix`: all pipelines with names that
- // begin with `namePrefix` will be returned. Uses standard pagination:
- // `pageSize` indicates how many pipelines to return, and
- // `pageToken` comes from a previous ListPipelinesResponse to
- // indicate offset.
- message ListPipelinesRequest {
- // Required. The name of the project to search for pipelines. Caller
- // must have READ access to this project.
- string project_id = 1;
- // Pipelines with names that match this prefix should be
- // returned. If unspecified, all pipelines in the project, up to
- // `pageSize`, will be returned.
- string name_prefix = 2;
- // Number of pipelines to return at once. Defaults to 256, and max
- // is 2048.
- int32 page_size = 3;
- // Token to use to indicate where to start getting results.
- // If unspecified, returns the first page of results.
- string page_token = 4;
- }
- // The response of ListPipelines. Contains at most `pageSize`
- // pipelines. If it contains `pageSize` pipelines, and more pipelines
- // exist, then `nextPageToken` will be populated and should be
- // used as the `pageToken` argument to a subsequent ListPipelines
- // request.
- message ListPipelinesResponse {
- // The matched pipelines.
- repeated Pipeline pipelines = 1;
- // The token to use to get the next page of results.
- string next_page_token = 2;
- }
- // The request to delete a saved pipeline by ID.
- message DeletePipelineRequest {
- // Caller must have WRITE access to the project in which this pipeline
- // is defined.
- string pipeline_id = 1;
- }
- // Request to get controller configuation. Should only be used
- // by VMs created by the Pipelines Service and not by end users.
- message GetControllerConfigRequest {
- // The operation to retrieve controller configuration for.
- string operation_id = 1;
- uint64 validation_token = 2;
- }
- // Stores the information that the controller will fetch from the
- // server in order to run. Should only be used by VMs created by the
- // Pipelines Service and not by end users.
- message ControllerConfig {
- message RepeatedString {
- repeated string values = 1;
- }
- string image = 1;
- string cmd = 2;
- string gcs_log_path = 3;
- string machine_type = 4;
- map<string, string> vars = 5;
- map<string, string> disks = 6;
- map<string, RepeatedString> gcs_sources = 7;
- map<string, RepeatedString> gcs_sinks = 8;
- }
- // Stores the list of events and times they occured for major events in job
- // execution.
- message TimestampEvent {
- // String indicating the type of event
- string description = 1;
- // The time this event occured.
- google.protobuf.Timestamp timestamp = 2;
- }
- // Request to set operation status. Should only be used by VMs
- // created by the Pipelines Service and not by end users.
- message SetOperationStatusRequest {
- string operation_id = 1;
- repeated TimestampEvent timestamp_events = 2;
- google.rpc.Code error_code = 3;
- string error_message = 4;
- uint64 validation_token = 5;
- }
- // A Google Cloud Service Account.
- message ServiceAccount {
- // Email address of the service account. Defaults to `default`,
- // which uses the compute service account associated with the project.
- string email = 1;
- // List of scopes to be enabled for this service account on the VM.
- // The following scopes are automatically included:
- //
- // * https://www.googleapis.com/auth/compute
- // * https://www.googleapis.com/auth/devstorage.full_control
- // * https://www.googleapis.com/auth/genomics
- // * https://www.googleapis.com/auth/logging.write
- // * https://www.googleapis.com/auth/monitoring.write
- repeated string scopes = 2;
- }
- // The logging options for the pipeline run.
- message LoggingOptions {
- // The location in Google Cloud Storage to which the pipeline logs
- // will be copied. Can be specified as a fully qualified directory
- // path, in which case logs will be output with a unique identifier
- // as the filename in that directory, or as a fully specified path,
- // which must end in `.log`, in which case that path will be
- // used, and the user must ensure that logs are not
- // overwritten. Stdout and stderr logs from the run are also
- // generated and output as `-stdout.log` and `-stderr.log`.
- string gcs_path = 1;
- }
- // The system resources for the pipeline run.
- message PipelineResources {
- // A Google Compute Engine disk resource specification.
- message Disk {
- // The types of disks that may be attached to VMs.
- enum Type {
- // Default disk type. Use one of the other options below.
- TYPE_UNSPECIFIED = 0;
- // Specifies a Google Compute Engine persistent hard disk. See
- // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
- PERSISTENT_HDD = 1;
- // Specifies a Google Compute Engine persistent solid-state disk. See
- // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
- PERSISTENT_SSD = 2;
- // Specifies a Google Compute Engine local SSD.
- // See https://cloud.google.com/compute/docs/disks/local-ssd for details.
- LOCAL_SSD = 3;
- }
- // Required. The name of the disk that can be used in the pipeline
- // parameters. Must be 1 - 63 characters.
- // The name "boot" is reserved for system use.
- string name = 1;
- // Required. The type of the disk to create.
- Type type = 2;
- // The size of the disk. Defaults to 500 (GB).
- // This field is not applicable for local SSD.
- int32 size_gb = 3;
- // The full or partial URL of the persistent disk to attach. See
- // https://cloud.google.com/compute/docs/reference/latest/instances#resource
- // and
- // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots
- // for more details.
- string source = 4;
- // Deprecated. Disks created by the Pipelines API will be deleted at the end
- // of the pipeline run, regardless of what this field is set to.
- bool auto_delete = 6;
- // Required at create time and cannot be overridden at run time.
- // Specifies the path in the docker container where files on
- // this disk should be located. For example, if `mountPoint`
- // is `/mnt/disk`, and the parameter has `localPath`
- // `inputs/file.txt`, the docker container can access the data at
- // `/mnt/disk/inputs/file.txt`.
- string mount_point = 8;
- }
- // The minimum number of cores to use. Defaults to 1.
- int32 minimum_cpu_cores = 1;
- // Whether to use preemptible VMs. Defaults to `false`. In order to use this,
- // must be true for both create time and run time. Cannot be true at run time
- // if false at create time.
- bool preemptible = 2;
- // The minimum amount of RAM to use. Defaults to 3.75 (GB)
- double minimum_ram_gb = 3;
- // Disks to attach.
- repeated Disk disks = 4;
- // List of Google Compute Engine availability zones to which resource
- // creation will restricted. If empty, any zone may be chosen.
- repeated string zones = 5;
- // The size of the boot disk. Defaults to 10 (GB).
- int32 boot_disk_size_gb = 6;
- // Whether to assign an external IP to the instance. This is an experimental
- // feature that may go away. Defaults to false.
- // Corresponds to `--no_address` flag for [gcloud compute instances create]
- // (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create).
- // In order to use this, must be true for both create time and run time.
- // Cannot be true at run time if false at create time. If you need to ssh into
- // a private IP VM for debugging, you can ssh to a public VM and then ssh into
- // the private VM's Internal IP. If noAddress is set, this pipeline run may
- // only load docker images from Google Container Registry and not Docker Hub.
- // ** Note: To use this option, your project must be in Google Access for
- // Private IPs Early Access Program.**
- bool no_address = 7;
- }
- // Parameters facilitate setting and delivering data into the
- // pipeline's execution environment. They are defined at create time,
- // with optional defaults, and can be overridden at run time.
- //
- // If `localCopy` is unset, then the parameter specifies a string that
- // is passed as-is into the pipeline, as the value of the environment
- // variable with the given name. A default value can be optionally
- // specified at create time. The default can be overridden at run time
- // using the inputs map. If no default is given, a value must be
- // supplied at runtime.
- //
- // If `localCopy` is defined, then the parameter specifies a data
- // source or sink, both in Google Cloud Storage and on the Docker container
- // where the pipeline computation is run. The [service account associated with
- // the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by
- // default the project's Compute Engine service account) must have access to the
- // Google Cloud Storage paths.
- //
- // At run time, the Google Cloud Storage paths can be overridden if a default
- // was provided at create time, or must be set otherwise. The pipeline runner
- // should add a key/value pair to either the inputs or outputs map. The
- // indicated data copies will be carried out before/after pipeline execution,
- // just as if the corresponding arguments were provided to `gsutil cp`.
- //
- // For example: Given the following `PipelineParameter`, specified
- // in the `inputParameters` list:
- //
- // ```
- // {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}}
- // ```
- //
- // where `disk` is defined in the `PipelineResources` object as:
- //
- // ```
- // {name: "pd1", mountPoint: "/mnt/disk/"}
- // ```
- //
- // We create a disk named `pd1`, mount it on the host VM, and map
- // `/mnt/pd1` to `/mnt/disk` in the docker container. At
- // runtime, an entry for `input_file` would be required in the inputs
- // map, such as:
- //
- // ```
- // inputs["input_file"] = "gs://my-bucket/bar.txt"
- // ```
- //
- // This would generate the following gsutil call:
- //
- // ```
- // gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt
- // ```
- //
- // The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the
- // Docker container. Acceptable paths are:
- //
- // <table>
- // <thead>
- // <tr><th>Google Cloud storage path</th><th>Local path</th></tr>
- // </thead>
- // <tbody>
- // <tr><td>file</td><td>file</td></tr>
- // <tr><td>glob</td><td>directory</td></tr>
- // </tbody>
- // </table>
- //
- // For outputs, the direction of the copy is reversed:
- //
- // ```
- // gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt
- // ```
- //
- // Acceptable paths are:
- //
- // <table>
- // <thead>
- // <tr><th>Local path</th><th>Google Cloud Storage path</th></tr>
- // </thead>
- // <tbody>
- // <tr><td>file</td><td>file</td></tr>
- // <tr>
- // <td>file</td>
- // <td>directory - directory must already exist</td>
- // </tr>
- // <tr>
- // <td>glob</td>
- // <td>directory - directory will be created if it doesn't exist</td></tr>
- // </tbody>
- // </table>
- //
- // One restriction due to docker limitations, is that for outputs that are found
- // on the boot disk, the local path cannot be a glob and must be a file.
- message PipelineParameter {
- // LocalCopy defines how a remote file should be copied to and from the VM.
- message LocalCopy {
- // Required. The path within the user's docker container where
- // this input should be localized to and from, relative to the specified
- // disk's mount point. For example: file.txt,
- string path = 1;
- // Required. The name of the disk where this parameter is
- // located. Can be the name of one of the disks specified in the
- // Resources field, or "boot", which represents the Docker
- // instance's boot disk and has a mount point of `/`.
- string disk = 2;
- }
- // Required. Name of the parameter - the pipeline runner uses this string
- // as the key to the input and output maps in RunPipeline.
- string name = 1;
- // Human-readable description.
- string description = 2;
- // The default value for this parameter. Can be overridden at runtime.
- // If `localCopy` is present, then this must be a Google Cloud Storage path
- // beginning with `gs://`.
- string default_value = 5;
- // If present, this parameter is marked for copying to and from the VM.
- // `LocalCopy` indicates where on the VM the file should be. The value
- // given to this parameter (either at runtime or using `defaultValue`)
- // must be the remote path where the file should be.
- LocalCopy local_copy = 6;
- }
- // The Docker execuctor specification.
- message DockerExecutor {
- // Required. Image name from either Docker Hub or Google Container Registry.
- // Users that run pipelines must have READ access to the image.
- string image_name = 1;
- // Required. The command or newline delimited script to run. The command
- // string will be executed within a bash shell.
- //
- // If the command exits with a non-zero exit code, output parameter
- // de-localization will be skipped and the pipeline operation's
- // [`error`][google.longrunning.Operation.error] field will be populated.
- //
- // Maximum command string length is 16384.
- string cmd = 2;
- }
|