execution.proto 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.notebooks.v1;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/protobuf/timestamp.proto";
  19. option go_package = "google.golang.org/genproto/googleapis/cloud/notebooks/v1;notebooks";
  20. option java_multiple_files = true;
  21. option java_outer_classname = "ExecutionProto";
  22. option java_package = "com.google.cloud.notebooks.v1";
  23. option (google.api.resource_definition) = {
  24. type: "aiplatform.googleapis.com/Tensorboard"
  25. pattern: "projects/{project}/locations/{location}/tensorboards/{tensorboard}"
  26. };
  27. // The description a notebook execution workload.
  28. message ExecutionTemplate {
  29. // Required. Specifies the machine types, the number of replicas for workers
  30. // and parameter servers.
  31. enum ScaleTier {
  32. // Unspecified Scale Tier.
  33. SCALE_TIER_UNSPECIFIED = 0;
  34. // A single worker instance. This tier is suitable for learning how to use
  35. // Cloud ML, and for experimenting with new models using small datasets.
  36. BASIC = 1;
  37. // Many workers and a few parameter servers.
  38. STANDARD_1 = 2;
  39. // A large number of workers with many parameter servers.
  40. PREMIUM_1 = 3;
  41. // A single worker instance with a K80 GPU.
  42. BASIC_GPU = 4;
  43. // A single worker instance with a Cloud TPU.
  44. BASIC_TPU = 5;
  45. // The CUSTOM tier is not a set tier, but rather enables you to use your
  46. // own cluster specification. When you use this tier, set values to
  47. // configure your processing cluster according to these guidelines:
  48. //
  49. // * You _must_ set `ExecutionTemplate.masterType` to specify the type
  50. // of machine to use for your master node. This is the only required
  51. // setting.
  52. CUSTOM = 6;
  53. }
  54. // Hardware accelerator types for AI Platform Training jobs.
  55. enum SchedulerAcceleratorType {
  56. // Unspecified accelerator type. Default to no GPU.
  57. SCHEDULER_ACCELERATOR_TYPE_UNSPECIFIED = 0;
  58. // Nvidia Tesla K80 GPU.
  59. NVIDIA_TESLA_K80 = 1;
  60. // Nvidia Tesla P100 GPU.
  61. NVIDIA_TESLA_P100 = 2;
  62. // Nvidia Tesla V100 GPU.
  63. NVIDIA_TESLA_V100 = 3;
  64. // Nvidia Tesla P4 GPU.
  65. NVIDIA_TESLA_P4 = 4;
  66. // Nvidia Tesla T4 GPU.
  67. NVIDIA_TESLA_T4 = 5;
  68. // Nvidia Tesla A100 GPU.
  69. NVIDIA_TESLA_A100 = 10;
  70. // TPU v2.
  71. TPU_V2 = 6;
  72. // TPU v3.
  73. TPU_V3 = 7;
  74. }
  75. // Definition of a hardware accelerator. Note that not all combinations
  76. // of `type` and `core_count` are valid. Check [GPUs on
  77. // Compute Engine](https://cloud.google.com/compute/docs/gpus) to find a valid
  78. // combination. TPUs are not supported.
  79. message SchedulerAcceleratorConfig {
  80. // Type of this accelerator.
  81. SchedulerAcceleratorType type = 1;
  82. // Count of cores of this accelerator.
  83. int64 core_count = 2;
  84. }
  85. // The backend used for this execution.
  86. enum JobType {
  87. // No type specified.
  88. JOB_TYPE_UNSPECIFIED = 0;
  89. // Custom Job in `aiplatform.googleapis.com`.
  90. // Default value for an execution.
  91. VERTEX_AI = 1;
  92. // Run execution on a cluster with Dataproc as a job.
  93. // https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs
  94. DATAPROC = 2;
  95. }
  96. // Parameters used in Dataproc JobType executions.
  97. message DataprocParameters {
  98. // URI for cluster used to run Dataproc execution.
  99. // Format: `projects/{PROJECT_ID}/regions/{REGION}/clusters/{CLUSTER_NAME}`
  100. string cluster = 1;
  101. }
  102. // Parameters used in Vertex AI JobType executions.
  103. message VertexAIParameters {
  104. // The full name of the Compute Engine
  105. // [network](/compute/docs/networks-and-firewalls#networks) to which the Job
  106. // should be peered. For example, `projects/12345/global/networks/myVPC`.
  107. // [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert)
  108. // is of the form `projects/{project}/global/networks/{network}`.
  109. // Where {project} is a project number, as in `12345`, and {network} is a
  110. // network name.
  111. //
  112. // Private services access must already be configured for the network. If
  113. // left unspecified, the job is not peered with any network.
  114. string network = 1;
  115. // Environment variables.
  116. // At most 100 environment variables can be specified and unique.
  117. // Example: GCP_BUCKET=gs://my-bucket/samples/
  118. map<string, string> env = 2;
  119. }
  120. // Required. Scale tier of the hardware used for notebook execution.
  121. // DEPRECATED Will be discontinued. As right now only CUSTOM is supported.
  122. ScaleTier scale_tier = 1 [
  123. deprecated = true,
  124. (google.api.field_behavior) = REQUIRED
  125. ];
  126. // Specifies the type of virtual machine to use for your training
  127. // job's master worker. You must specify this field when `scaleTier` is set to
  128. // `CUSTOM`.
  129. //
  130. // You can use certain Compute Engine machine types directly in this field.
  131. // The following types are supported:
  132. //
  133. // - `n1-standard-4`
  134. // - `n1-standard-8`
  135. // - `n1-standard-16`
  136. // - `n1-standard-32`
  137. // - `n1-standard-64`
  138. // - `n1-standard-96`
  139. // - `n1-highmem-2`
  140. // - `n1-highmem-4`
  141. // - `n1-highmem-8`
  142. // - `n1-highmem-16`
  143. // - `n1-highmem-32`
  144. // - `n1-highmem-64`
  145. // - `n1-highmem-96`
  146. // - `n1-highcpu-16`
  147. // - `n1-highcpu-32`
  148. // - `n1-highcpu-64`
  149. // - `n1-highcpu-96`
  150. //
  151. //
  152. // Alternatively, you can use the following legacy machine types:
  153. //
  154. // - `standard`
  155. // - `large_model`
  156. // - `complex_model_s`
  157. // - `complex_model_m`
  158. // - `complex_model_l`
  159. // - `standard_gpu`
  160. // - `complex_model_m_gpu`
  161. // - `complex_model_l_gpu`
  162. // - `standard_p100`
  163. // - `complex_model_m_p100`
  164. // - `standard_v100`
  165. // - `large_model_v100`
  166. // - `complex_model_m_v100`
  167. // - `complex_model_l_v100`
  168. //
  169. //
  170. // Finally, if you want to use a TPU for training, specify `cloud_tpu` in this
  171. // field. Learn more about the [special configuration options for training
  172. // with
  173. // TPU](https://cloud.google.com/ai-platform/training/docs/using-tpus#configuring_a_custom_tpu_machine).
  174. string master_type = 2;
  175. // Configuration (count and accelerator type) for hardware running notebook
  176. // execution.
  177. SchedulerAcceleratorConfig accelerator_config = 3;
  178. // Labels for execution.
  179. // If execution is scheduled, a field included will be 'nbs-scheduled'.
  180. // Otherwise, it is an immediate execution, and an included field will be
  181. // 'nbs-immediate'. Use fields to efficiently index between various types of
  182. // executions.
  183. map<string, string> labels = 4;
  184. // Path to the notebook file to execute.
  185. // Must be in a Google Cloud Storage bucket.
  186. // Format: `gs://{bucket_name}/{folder}/{notebook_file_name}`
  187. // Ex: `gs://notebook_user/scheduled_notebooks/sentiment_notebook.ipynb`
  188. string input_notebook_file = 5;
  189. // Container Image URI to a DLVM
  190. // Example: 'gcr.io/deeplearning-platform-release/base-cu100'
  191. // More examples can be found at:
  192. // https://cloud.google.com/ai-platform/deep-learning-containers/docs/choosing-container
  193. string container_image_uri = 6;
  194. // Path to the notebook folder to write to.
  195. // Must be in a Google Cloud Storage bucket path.
  196. // Format: `gs://{bucket_name}/{folder}`
  197. // Ex: `gs://notebook_user/scheduled_notebooks`
  198. string output_notebook_folder = 7;
  199. // Parameters to be overridden in the notebook during execution.
  200. // Ref https://papermill.readthedocs.io/en/latest/usage-parameterize.html on
  201. // how to specifying parameters in the input notebook and pass them here
  202. // in an YAML file.
  203. // Ex: `gs://notebook_user/scheduled_notebooks/sentiment_notebook_params.yaml`
  204. string params_yaml_file = 8;
  205. // Parameters used within the 'input_notebook_file' notebook.
  206. string parameters = 9;
  207. // The email address of a service account to use when running the execution.
  208. // You must have the `iam.serviceAccounts.actAs` permission for the specified
  209. // service account.
  210. string service_account = 10;
  211. // The type of Job to be used on this execution.
  212. JobType job_type = 11;
  213. // Parameters for an execution type.
  214. // NOTE: There are currently no extra parameters for VertexAI jobs.
  215. oneof job_parameters {
  216. // Parameters used in Dataproc JobType executions.
  217. DataprocParameters dataproc_parameters = 12;
  218. // Parameters used in Vertex AI JobType executions.
  219. VertexAIParameters vertex_ai_parameters = 13;
  220. }
  221. // Name of the kernel spec to use. This must be specified if the
  222. // kernel spec name on the execution target does not match the name in the
  223. // input notebook file.
  224. string kernel_spec = 14;
  225. // The name of a Vertex AI [Tensorboard] resource to which this execution
  226. // will upload Tensorboard logs.
  227. // Format:
  228. // `projects/{project}/locations/{location}/tensorboards/{tensorboard}`
  229. string tensorboard = 15 [(google.api.resource_reference) = {
  230. type: "aiplatform.googleapis.com/Tensorboard"
  231. }];
  232. }
  233. // The definition of a single executed notebook.
  234. message Execution {
  235. option (google.api.resource) = {
  236. type: "notebooks.googleapis.com/Execution"
  237. pattern: "projects/{project}/location/{location}/executions/{execution}"
  238. };
  239. // Enum description of the state of the underlying AIP job.
  240. enum State {
  241. // The job state is unspecified.
  242. STATE_UNSPECIFIED = 0;
  243. // The job has been just created and processing has not yet begun.
  244. QUEUED = 1;
  245. // The service is preparing to execution the job.
  246. PREPARING = 2;
  247. // The job is in progress.
  248. RUNNING = 3;
  249. // The job completed successfully.
  250. SUCCEEDED = 4;
  251. // The job failed.
  252. // `error_message` should contain the details of the failure.
  253. FAILED = 5;
  254. // The job is being cancelled.
  255. // `error_message` should describe the reason for the cancellation.
  256. CANCELLING = 6;
  257. // The job has been cancelled.
  258. // `error_message` should describe the reason for the cancellation.
  259. CANCELLED = 7;
  260. // The job has become expired (relevant to Vertex AI jobs)
  261. // https://cloud.google.com/vertex-ai/docs/reference/rest/v1/JobState
  262. EXPIRED = 9;
  263. // The Execution is being created.
  264. INITIALIZING = 10;
  265. }
  266. // execute metadata including name, hardware spec, region, labels, etc.
  267. ExecutionTemplate execution_template = 1;
  268. // Output only. The resource name of the execute. Format:
  269. // `projects/{project_id}/locations/{location}/executions/{execution_id}`
  270. string name = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  271. // Output only. Name used for UI purposes.
  272. // Name can only contain alphanumeric characters and underscores '_'.
  273. string display_name = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  274. // A brief description of this execution.
  275. string description = 4;
  276. // Output only. Time the Execution was instantiated.
  277. google.protobuf.Timestamp create_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
  278. // Output only. Time the Execution was last updated.
  279. google.protobuf.Timestamp update_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
  280. // Output only. State of the underlying AI Platform job.
  281. State state = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
  282. // Output notebook file generated by this execution
  283. string output_notebook_file = 8;
  284. // Output only. The URI of the external job used to execute the notebook.
  285. string job_uri = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
  286. }