job.proto 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.batch.v1alpha;
  16. import "google/api/field_behavior.proto";
  17. import "google/api/resource.proto";
  18. import "google/cloud/batch/v1alpha/task.proto";
  19. import "google/protobuf/duration.proto";
  20. import "google/protobuf/timestamp.proto";
  21. option csharp_namespace = "Google.Cloud.Batch.V1Alpha";
  22. option go_package = "google.golang.org/genproto/googleapis/cloud/batch/v1alpha;batch";
  23. option java_multiple_files = true;
  24. option java_outer_classname = "JobProto";
  25. option java_package = "com.google.cloud.batch.v1alpha";
  26. option objc_class_prefix = "GCB";
  27. option php_namespace = "Google\\Cloud\\Batch\\V1alpha";
  28. option ruby_package = "Google::Cloud::Batch::V1alpha";
  29. // The Cloud Batch Job description.
  30. message Job {
  31. option (google.api.resource) = {
  32. type: "batch.googleapis.com/Job"
  33. pattern: "projects/{project}/locations/{location}/jobs/{job}"
  34. };
  35. // The order that TaskGroups are scheduled relative to each other.
  36. //
  37. // Not yet implemented.
  38. enum SchedulingPolicy {
  39. // Unspecified.
  40. SCHEDULING_POLICY_UNSPECIFIED = 0;
  41. // Run all TaskGroups as soon as possible.
  42. AS_SOON_AS_POSSIBLE = 1;
  43. }
  44. // Output only. Job name.
  45. // For example: "projects/123456/locations/us-central1/jobs/job01".
  46. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  47. // Output only. A system generated unique ID (in UUID4 format) for the Job.
  48. string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  49. // Priority of the Job.
  50. // The valid value range is [0, 100).
  51. // A job with higher priority value is more likely to run earlier if all other
  52. // requirements are satisfied.
  53. int64 priority = 3;
  54. // Required. TaskGroups in the Job. Only one TaskGroup is supported now.
  55. repeated TaskGroup task_groups = 4 [(google.api.field_behavior) = REQUIRED];
  56. // Scheduling policy for TaskGroups in the job.
  57. SchedulingPolicy scheduling_policy = 5;
  58. // At least one of the dependencies must be satisfied before the Job is
  59. // scheduled to run.
  60. // Only one JobDependency is supported now.
  61. // Not yet implemented.
  62. repeated JobDependency dependencies = 6;
  63. // Compute resource allocation for all TaskGroups in the Job.
  64. AllocationPolicy allocation_policy = 7;
  65. // Labels for the Job. Labels could be user provided or system generated.
  66. // For example,
  67. // "labels": {
  68. // "department": "finance",
  69. // "environment": "test"
  70. // }
  71. // You can assign up to 64 labels. [Google Compute Engine label
  72. // restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
  73. // apply.
  74. // Label names that start with "goog-" or "google-" are reserved.
  75. map<string, string> labels = 8;
  76. // Output only. Job status. It is read only for users.
  77. JobStatus status = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
  78. // Deprecated: please use notifications instead.
  79. JobNotification notification = 10 [deprecated = true];
  80. // Output only. When the Job was created.
  81. google.protobuf.Timestamp create_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
  82. // Output only. The last time the Job was updated.
  83. google.protobuf.Timestamp update_time = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
  84. // Log preservation policy for the Job.
  85. LogsPolicy logs_policy = 13;
  86. // Notification configurations.
  87. repeated JobNotification notifications = 14;
  88. }
  89. // LogsPolicy describes how outputs from a Job's Tasks (stdout/stderr) will be
  90. // preserved.
  91. message LogsPolicy {
  92. // The destination (if any) for logs.
  93. enum Destination {
  94. // Logs are not preserved.
  95. DESTINATION_UNSPECIFIED = 0;
  96. // Logs are streamed to Cloud Logging.
  97. CLOUD_LOGGING = 1;
  98. // Logs are saved to a file path.
  99. PATH = 2;
  100. }
  101. // Where logs should be saved.
  102. Destination destination = 1;
  103. // The path to which logs are saved when the destination = PATH. This can be a
  104. // local file path on the VM, or under the mount point of a Persistent Disk or
  105. // Filestore, or a Cloud Storage path.
  106. string logs_path = 2;
  107. }
  108. // JobDependency describes the state of other Jobs that the start of this Job
  109. // depends on.
  110. // All dependent Jobs must have been submitted in the same region.
  111. message JobDependency {
  112. // Dependency type.
  113. enum Type {
  114. // Unspecified.
  115. TYPE_UNSPECIFIED = 0;
  116. // The dependent Job has succeeded.
  117. SUCCEEDED = 1;
  118. // The dependent Job has failed.
  119. FAILED = 2;
  120. // SUCCEEDED or FAILED.
  121. FINISHED = 3;
  122. }
  123. // Each item maps a Job name to a Type.
  124. // All items must be satisfied for the JobDependency to be satisfied (the AND
  125. // operation).
  126. // Once a condition for one item becomes true, it won't go back to false
  127. // even the dependent Job state changes again.
  128. map<string, Type> items = 1;
  129. }
  130. // Job status.
  131. message JobStatus {
  132. // VM instance status.
  133. message InstanceStatus {
  134. // The Compute Engine machine type.
  135. string machine_type = 1;
  136. // The VM instance provisioning model.
  137. AllocationPolicy.ProvisioningModel provisioning_model = 2;
  138. // The max number of tasks can be assigned to this instance type.
  139. int64 task_pack = 3;
  140. }
  141. // Aggregated task status for a TaskGroup.
  142. message TaskGroupStatus {
  143. // Count of task in each state in the TaskGroup.
  144. // The map key is task state name.
  145. map<string, int64> counts = 1;
  146. // Status of instances allocated for the TaskGroup.
  147. repeated InstanceStatus instances = 2;
  148. }
  149. // Valid Job states.
  150. enum State {
  151. STATE_UNSPECIFIED = 0;
  152. // Job is admitted (validated and persisted) and waiting for resources.
  153. QUEUED = 1;
  154. // Job is scheduled to run as soon as resource allocation is ready.
  155. // The resource allocation may happen at a later time but with a high
  156. // chance to succeed.
  157. SCHEDULED = 2;
  158. // Resource allocation has been successful. At least one Task in the Job is
  159. // RUNNING.
  160. RUNNING = 3;
  161. // All Tasks in the Job have finished successfully.
  162. SUCCEEDED = 4;
  163. // At least one Task in the Job has failed.
  164. FAILED = 5;
  165. // The Job will be deleted, but has not been deleted yet. Typically this is
  166. // because resources used by the Job are still being cleaned up.
  167. DELETION_IN_PROGRESS = 6;
  168. }
  169. // Job state
  170. State state = 1;
  171. // Job status events
  172. repeated StatusEvent status_events = 2;
  173. // Aggregated task status for each TaskGroup in the Job.
  174. // The map key is TaskGroup ID.
  175. map<string, TaskGroupStatus> task_groups = 4;
  176. // The duration of time that the Job spent in status RUNNING.
  177. google.protobuf.Duration run_duration = 5;
  178. }
  179. // Notification configurations.
  180. message JobNotification {
  181. // Message details.
  182. // Describe the attribute that a message should have.
  183. // Without specified message attributes, no message will be sent by default.
  184. message Message {
  185. // The message type.
  186. Type type = 1;
  187. // The new job state.
  188. JobStatus.State new_job_state = 2;
  189. // The new task state.
  190. TaskStatus.State new_task_state = 3;
  191. }
  192. // The message type.
  193. enum Type {
  194. // Unspecified.
  195. TYPE_UNSPECIFIED = 0;
  196. // Notify users that the job state has changed.
  197. JOB_STATE_CHANGED = 1;
  198. // Notify users that the task state has changed.
  199. TASK_STATE_CHANGED = 2;
  200. }
  201. // The Pub/Sub topic where notifications like the job state changes
  202. // will be published. This topic exist in the same project as the job
  203. // and billings will be charged to this project.
  204. // If not specified, no Pub/Sub messages will be sent.
  205. // Topic format: `projects/{project}/topics/{topic}`.
  206. string pubsub_topic = 1;
  207. // The attribute requirements of messages to be sent to this Pub/Sub topic.
  208. // Without this field, no message will be sent.
  209. Message message = 2;
  210. }
  211. // A Job's resource allocation policy describes when, where, and how compute
  212. // resources should be allocated for the Job.
  213. message AllocationPolicy {
  214. message LocationPolicy {
  215. // A list of allowed location names represented by internal URLs.
  216. // Each location can be a region or a zone.
  217. // Only one region or multiple zones in one region is supported now.
  218. // For example,
  219. // ["regions/us-central1"] allow VMs in any zones in region us-central1.
  220. // ["zones/us-central1-a", "zones/us-central1-c"] only allow VMs
  221. // in zones us-central1-a and us-central1-c.
  222. // All locations end up in different regions would cause errors.
  223. // For example,
  224. // ["regions/us-central1", "zones/us-central1-a", "zones/us-central1-b",
  225. // "zones/us-west1-a"] contains 2 regions "us-central1" and
  226. // "us-west1". An error is expected in this case.
  227. repeated string allowed_locations = 1;
  228. // A list of denied location names.
  229. //
  230. // Not yet implemented.
  231. repeated string denied_locations = 2;
  232. }
  233. // A new persistent disk or a local ssd.
  234. // A VM can only have one local SSD setting but multiple local SSD partitions.
  235. // https://cloud.google.com/compute/docs/disks#pdspecs.
  236. // https://cloud.google.com/compute/docs/disks#localssds.
  237. message Disk {
  238. // A data source from which a PD will be created.
  239. oneof data_source {
  240. // Name of a public or custom image used as the data source.
  241. string image = 4;
  242. // Name of a snapshot used as the data source.
  243. string snapshot = 5;
  244. }
  245. // Disk type as shown in `gcloud compute disk-types list`
  246. // For example, "pd-ssd", "pd-standard", "pd-balanced", "local-ssd".
  247. string type = 1;
  248. // Disk size in GB.
  249. // This field is ignored if `data_source` is `disk` or `image`.
  250. // If `type` is `local-ssd`, size_gb should be a multiple of 375GB,
  251. // otherwise, the final size will be the next greater multiple of 375 GB.
  252. int64 size_gb = 2;
  253. // Local SSDs are available through both "SCSI" and "NVMe" interfaces.
  254. // If not indicated, "NVMe" will be the default one for local ssds.
  255. // We only support "SCSI" for persistent disks now.
  256. string disk_interface = 6;
  257. }
  258. // A new or an existing persistent disk (PD) or a local ssd attached to a VM
  259. // instance.
  260. message AttachedDisk {
  261. oneof attached {
  262. Disk new_disk = 1;
  263. // Name of an existing PD.
  264. string existing_disk = 2;
  265. }
  266. // Device name that the guest operating system will see.
  267. // It is used by Runnable.volumes field to mount disks. So please specify
  268. // the device_name if you want Batch to help mount the disk, and it should
  269. // match the device_name field in volumes.
  270. string device_name = 3;
  271. }
  272. // Accelerator describes Compute Engine accelerators to be attached to the VM.
  273. message Accelerator {
  274. // The accelerator type. For example, "nvidia-tesla-t4".
  275. // See `gcloud compute accelerator-types list`.
  276. string type = 1;
  277. // The number of accelerators of this type.
  278. int64 count = 2;
  279. // Deprecated: please use instances[0].install_gpu_drivers instead.
  280. bool install_gpu_drivers = 3 [deprecated = true];
  281. }
  282. // InstancePolicy describes an instance type and resources attached to each VM
  283. // created by this InstancePolicy.
  284. message InstancePolicy {
  285. // Deprecated: please use machine_type instead.
  286. repeated string allowed_machine_types = 1 [deprecated = true];
  287. // The Compute Engine machine type.
  288. string machine_type = 2;
  289. // The minimum CPU platform.
  290. // See
  291. // `https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform`.
  292. // Not yet implemented.
  293. string min_cpu_platform = 3;
  294. // The provisioning model.
  295. ProvisioningModel provisioning_model = 4;
  296. // The accelerators attached to each VM instance.
  297. repeated Accelerator accelerators = 5;
  298. // Non-boot disks to be attached for each VM created by this InstancePolicy.
  299. // New disks will be deleted when the VM is deleted.
  300. repeated AttachedDisk disks = 6;
  301. // If specified, VMs will be allocated only inside the matching reservation.
  302. string reservation = 7;
  303. }
  304. // Either an InstancePolicy or an instance template.
  305. message InstancePolicyOrTemplate {
  306. oneof policy_template {
  307. // InstancePolicy.
  308. InstancePolicy policy = 1;
  309. // Name of an instance template used to create VMs.
  310. // Named the field as 'instance_template' instead of 'template' to avoid
  311. // c++ keyword conflict.
  312. string instance_template = 2;
  313. }
  314. // Set this field true if users want Batch to help fetch drivers from a
  315. // third party location and install them for GPUs specified in
  316. // policy.accelerators or instance_template on their behalf. Default is
  317. // false.
  318. bool install_gpu_drivers = 3;
  319. }
  320. // A network interface.
  321. message NetworkInterface {
  322. // The URL of the network resource.
  323. string network = 1;
  324. // The URL of the Subnetwork resource.
  325. string subnetwork = 2;
  326. // Default is false (with an external IP address). Required if
  327. // no external public IP address is attached to the VM. If no external
  328. // public IP address, additional configuration is required to allow the VM
  329. // to access Google Services. See
  330. // https://cloud.google.com/vpc/docs/configure-private-google-access and
  331. // https://cloud.google.com/nat/docs/gce-example#create-nat for more
  332. // information.
  333. bool no_external_ip_address = 3;
  334. }
  335. // NetworkPolicy describes VM instance network configurations.
  336. message NetworkPolicy {
  337. // Network configurations.
  338. repeated NetworkInterface network_interfaces = 1;
  339. }
  340. // Compute Engine VM instance provisioning model.
  341. enum ProvisioningModel {
  342. // Unspecified.
  343. PROVISIONING_MODEL_UNSPECIFIED = 0;
  344. // Standard VM.
  345. STANDARD = 1;
  346. // SPOT VM.
  347. SPOT = 2;
  348. // Preemptible VM (PVM).
  349. //
  350. // Above SPOT VM is the preferable model for preemptible VM instances: the
  351. // old preemptible VM model (indicated by this field) is the older model,
  352. // and has been migrated to use the SPOT model as the underlying technology.
  353. // This old model will still be supported.
  354. PREEMPTIBLE = 3;
  355. }
  356. // Location where compute resources should be allocated for the Job.
  357. LocationPolicy location = 1;
  358. // Deprecated: please use instances[0].policy instead.
  359. InstancePolicy instance = 2 [deprecated = true];
  360. // Describe instances that can be created by this AllocationPolicy.
  361. // Only instances[0] is supported now.
  362. repeated InstancePolicyOrTemplate instances = 8;
  363. // Deprecated: please use instances[0].template instead.
  364. repeated string instance_templates = 3 [deprecated = true];
  365. // Deprecated: please use instances[0].policy.provisioning_model instead.
  366. repeated ProvisioningModel provisioning_models = 4 [deprecated = true];
  367. // Deprecated: please use service_account instead.
  368. string service_account_email = 5 [deprecated = true];
  369. // Service account that VMs will run as.
  370. ServiceAccount service_account = 9;
  371. // Labels applied to all VM instances and other resources
  372. // created by AllocationPolicy.
  373. // Labels could be user provided or system generated.
  374. // You can assign up to 64 labels. [Google Compute Engine label
  375. // restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
  376. // apply.
  377. // Label names that start with "goog-" or "google-" are reserved.
  378. map<string, string> labels = 6;
  379. // The network policy.
  380. NetworkPolicy network = 7;
  381. }
  382. // A TaskGroup contains one or multiple Tasks that share the same
  383. // Runnable but with different runtime parameters.
  384. message TaskGroup {
  385. option (google.api.resource) = {
  386. type: "batch.googleapis.com/TaskGroup"
  387. pattern: "projects/{project}/locations/{location}/jobs/{job}/taskGroups/{task_group}"
  388. };
  389. // How Tasks in the TaskGroup should be scheduled relative to each other.
  390. enum SchedulingPolicy {
  391. // Unspecified.
  392. SCHEDULING_POLICY_UNSPECIFIED = 0;
  393. // Run Tasks as soon as resources are available.
  394. AS_SOON_AS_POSSIBLE = 1;
  395. }
  396. // Output only. TaskGroup name.
  397. // The system generates this field based on parent Job name.
  398. // For example:
  399. // "projects/123456/locations/us-west1/jobs/job01/taskGroups/group01".
  400. string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  401. // Required. Tasks in the group share the same task spec.
  402. TaskSpec task_spec = 3 [(google.api.field_behavior) = REQUIRED];
  403. // Number of Tasks in the TaskGroup.
  404. // default is 1
  405. int64 task_count = 4;
  406. // Max number of tasks that can run in parallel.
  407. // Default to min(task_count, 1000).
  408. int64 parallelism = 5;
  409. // Scheduling policy for Tasks in the TaskGroup.
  410. SchedulingPolicy scheduling_policy = 6;
  411. // Compute resource allocation for the TaskGroup.
  412. // If specified, it overrides resources in Job.
  413. AllocationPolicy allocation_policy = 7;
  414. // Labels for the TaskGroup.
  415. // Labels could be user provided or system generated.
  416. // You can assign up to 64 labels. [Google Compute Engine label
  417. // restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
  418. // apply.
  419. // Label names that start with "goog-" or "google-" are reserved.
  420. map<string, string> labels = 8;
  421. // An array of environment variable mappings, which are passed to Tasks with
  422. // matching indices. If task_environments is used then task_count should
  423. // not be specified in the request (and will be ignored). Task count will be
  424. // the length of task_environments.
  425. //
  426. // Tasks get a BATCH_TASK_INDEX and BATCH_TASK_COUNT environment variable, in
  427. // addition to any environment variables set in task_environments, specifying
  428. // the number of Tasks in the Task's parent TaskGroup, and the specific Task's
  429. // index in the TaskGroup (0 through BATCH_TASK_COUNT - 1).
  430. //
  431. // task_environments supports up to 200 entries.
  432. repeated Environment task_environments = 9;
  433. // Max number of tasks that can be run on a VM at the same time.
  434. // If not specified, the system will decide a value based on available
  435. // compute resources on a VM and task requirements.
  436. int64 task_count_per_node = 10;
  437. // When true, Batch will populate a file with a list of all VMs assigned to
  438. // the TaskGroup and set the BATCH_HOSTS_FILE environment variable to the path
  439. // of that file. Defaults to false.
  440. bool require_hosts_file = 11;
  441. // When true, Batch will configure SSH to allow passwordless login between
  442. // VMs running the Batch tasks in the same TaskGroup.
  443. bool permissive_ssh = 12;
  444. }
  445. // Carries information about a Google Cloud service account.
  446. message ServiceAccount {
  447. // Email address of the service account. If not specified, the default
  448. // Compute Engine service account for the project will be used. If instance
  449. // template is being used, the service account has to be specified in the
  450. // instance template and it has to match the email field here.
  451. string email = 1;
  452. // List of scopes to be enabled for this service account on the VM, in
  453. // addition to the cloud-platform API scope that will be added by default.
  454. repeated string scopes = 2 [deprecated = true];
  455. }