jobs.proto 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926
  1. // Copyright 2022 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.dataflow.v1beta3;
  16. import "google/api/annotations.proto";
  17. import "google/api/client.proto";
  18. import "google/dataflow/v1beta3/environment.proto";
  19. import "google/dataflow/v1beta3/snapshots.proto";
  20. import "google/protobuf/duration.proto";
  21. import "google/protobuf/struct.proto";
  22. import "google/protobuf/timestamp.proto";
  23. option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3";
  24. option go_package = "google.golang.org/genproto/googleapis/dataflow/v1beta3;dataflow";
  25. option java_multiple_files = true;
  26. option java_outer_classname = "JobsProto";
  27. option java_package = "com.google.dataflow.v1beta3";
  28. option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3";
  29. option ruby_package = "Google::Cloud::Dataflow::V1beta3";
  30. // Provides a method to create and modify Google Cloud Dataflow jobs.
  31. // A Job is a multi-stage computation graph run by the Cloud Dataflow service.
  32. service JobsV1Beta3 {
  33. option (google.api.default_host) = "dataflow.googleapis.com";
  34. option (google.api.oauth_scopes) =
  35. "https://www.googleapis.com/auth/cloud-platform,"
  36. "https://www.googleapis.com/auth/compute,"
  37. "https://www.googleapis.com/auth/compute.readonly,"
  38. "https://www.googleapis.com/auth/userinfo.email";
  39. // Creates a Cloud Dataflow job.
  40. //
  41. // To create a job, we recommend using `projects.locations.jobs.create` with a
  42. // [regional endpoint]
  43. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
  44. // `projects.jobs.create` is not recommended, as your job will always start
  45. // in `us-central1`.
  46. rpc CreateJob(CreateJobRequest) returns (Job) {
  47. option (google.api.http) = {
  48. post: "/v1b3/projects/{project_id}/locations/{location}/jobs"
  49. body: "job"
  50. additional_bindings {
  51. post: "/v1b3/projects/{project_id}/jobs"
  52. body: "job"
  53. }
  54. };
  55. }
  56. // Gets the state of the specified Cloud Dataflow job.
  57. //
  58. // To get the state of a job, we recommend using `projects.locations.jobs.get`
  59. // with a [regional endpoint]
  60. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
  61. // `projects.jobs.get` is not recommended, as you can only get the state of
  62. // jobs that are running in `us-central1`.
  63. rpc GetJob(GetJobRequest) returns (Job) {
  64. option (google.api.http) = {
  65. get: "/v1b3/projects/{project_id}/locations/{location}/jobs/{job_id}"
  66. additional_bindings {
  67. get: "/v1b3/projects/{project_id}/jobs/{job_id}"
  68. }
  69. };
  70. }
  71. // Updates the state of an existing Cloud Dataflow job.
  72. //
  73. // To update the state of an existing job, we recommend using
  74. // `projects.locations.jobs.update` with a [regional endpoint]
  75. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
  76. // `projects.jobs.update` is not recommended, as you can only update the state
  77. // of jobs that are running in `us-central1`.
  78. rpc UpdateJob(UpdateJobRequest) returns (Job) {
  79. option (google.api.http) = {
  80. put: "/v1b3/projects/{project_id}/locations/{location}/jobs/{job_id}"
  81. body: "job"
  82. additional_bindings {
  83. put: "/v1b3/projects/{project_id}/jobs/{job_id}"
  84. body: "job"
  85. }
  86. };
  87. }
  88. // List the jobs of a project.
  89. //
  90. // To list the jobs of a project in a region, we recommend using
  91. // `projects.locations.jobs.list` with a [regional endpoint]
  92. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To
  93. // list the all jobs across all regions, use `projects.jobs.aggregated`. Using
  94. // `projects.jobs.list` is not recommended, as you can only get the list of
  95. // jobs that are running in `us-central1`.
  96. rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
  97. option (google.api.http) = {
  98. get: "/v1b3/projects/{project_id}/locations/{location}/jobs"
  99. additional_bindings {
  100. get: "/v1b3/projects/{project_id}/jobs"
  101. }
  102. };
  103. }
  104. // List the jobs of a project across all regions.
  105. rpc AggregatedListJobs(ListJobsRequest) returns (ListJobsResponse) {
  106. option (google.api.http) = {
  107. get: "/v1b3/projects/{project_id}/jobs:aggregated"
  108. };
  109. }
  110. // Check for existence of active jobs in the given project across all regions.
  111. rpc CheckActiveJobs(CheckActiveJobsRequest) returns (CheckActiveJobsResponse) {
  112. }
  113. // Snapshot the state of a streaming job.
  114. rpc SnapshotJob(SnapshotJobRequest) returns (Snapshot) {
  115. option (google.api.http) = {
  116. post: "/v1b3/projects/{project_id}/locations/{location}/jobs/{job_id}:snapshot"
  117. body: "*"
  118. additional_bindings {
  119. post: "/v1b3/projects/{project_id}/jobs/{job_id}:snapshot"
  120. body: "*"
  121. }
  122. };
  123. }
  124. }
  125. // Defines a job to be run by the Cloud Dataflow service.
  126. message Job {
  127. // The unique ID of this job.
  128. //
  129. // This field is set by the Cloud Dataflow service when the Job is
  130. // created, and is immutable for the life of the job.
  131. string id = 1;
  132. // The ID of the Cloud Platform project that the job belongs to.
  133. string project_id = 2;
  134. // The user-specified Cloud Dataflow job name.
  135. //
  136. // Only one Job with a given name may exist in a project at any
  137. // given time. If a caller attempts to create a Job with the same
  138. // name as an already-existing Job, the attempt returns the
  139. // existing Job.
  140. //
  141. // The name must match the regular expression
  142. // `[a-z]([-a-z0-9]{0,1022}[a-z0-9])?`
  143. string name = 3;
  144. // The type of Cloud Dataflow job.
  145. JobType type = 4;
  146. // The environment for the job.
  147. Environment environment = 5;
  148. // Exactly one of step or steps_location should be specified.
  149. //
  150. // The top-level steps that constitute the entire job. Only retrieved with
  151. // JOB_VIEW_ALL.
  152. repeated Step steps = 6;
  153. // The Cloud Storage location where the steps are stored.
  154. string steps_location = 24;
  155. // The current state of the job.
  156. //
  157. // Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
  158. // specified.
  159. //
  160. // A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
  161. // terminal state. After a job has reached a terminal state, no
  162. // further state updates may be made.
  163. //
  164. // This field may be mutated by the Cloud Dataflow service;
  165. // callers cannot mutate it.
  166. JobState current_state = 7;
  167. // The timestamp associated with the current state.
  168. google.protobuf.Timestamp current_state_time = 8;
  169. // The job's requested state.
  170. //
  171. // `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
  172. // `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
  173. // also be used to directly set a job's requested state to
  174. // `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
  175. // job if it has not already reached a terminal state.
  176. JobState requested_state = 9;
  177. // Deprecated.
  178. JobExecutionInfo execution_info = 10;
  179. // The timestamp when the job was initially created. Immutable and set by the
  180. // Cloud Dataflow service.
  181. google.protobuf.Timestamp create_time = 11;
  182. // If this job is an update of an existing job, this field is the job ID
  183. // of the job it replaced.
  184. //
  185. // When sending a `CreateJobRequest`, you can update a job by specifying it
  186. // here. The job named here is stopped, and its intermediate state is
  187. // transferred to this job.
  188. string replace_job_id = 12;
  189. // The map of transform name prefixes of the job to be replaced to the
  190. // corresponding name prefixes of the new job.
  191. map<string, string> transform_name_mapping = 13;
  192. // The client's unique identifier of the job, re-used across retried attempts.
  193. // If this field is set, the service will ensure its uniqueness.
  194. // The request to create a job will fail if the service has knowledge of a
  195. // previously submitted job with the same client's ID and job name.
  196. // The caller may use this field to ensure idempotence of job
  197. // creation across retried attempts to create a job.
  198. // By default, the field is empty and, in that case, the service ignores it.
  199. string client_request_id = 14;
  200. // If another job is an update of this job (and thus, this job is in
  201. // `JOB_STATE_UPDATED`), this field contains the ID of that job.
  202. string replaced_by_job_id = 15;
  203. // A set of files the system should be aware of that are used
  204. // for temporary storage. These temporary files will be
  205. // removed on job completion.
  206. // No duplicates are allowed.
  207. // No file patterns are supported.
  208. //
  209. // The supported files are:
  210. //
  211. // Google Cloud Storage:
  212. //
  213. // storage.googleapis.com/{bucket}/{object}
  214. // bucket.storage.googleapis.com/{object}
  215. repeated string temp_files = 16;
  216. // User-defined labels for this job.
  217. //
  218. // The labels map can contain no more than 64 entries. Entries of the labels
  219. // map are UTF8 strings that comply with the following restrictions:
  220. //
  221. // * Keys must conform to regexp: [\p{Ll}\p{Lo}][\p{Ll}\p{Lo}\p{N}_-]{0,62}
  222. // * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
  223. // * Both keys and values are additionally constrained to be <= 128 bytes in
  224. // size.
  225. map<string, string> labels = 17;
  226. // The [regional endpoint]
  227. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  228. // contains this job.
  229. string location = 18;
  230. // Preliminary field: The format of this data may change at any time.
  231. // A description of the user pipeline and stages through which it is executed.
  232. // Created by Cloud Dataflow service. Only retrieved with
  233. // JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
  234. PipelineDescription pipeline_description = 19;
  235. // This field may be mutated by the Cloud Dataflow service;
  236. // callers cannot mutate it.
  237. repeated ExecutionStageState stage_states = 20;
  238. // This field is populated by the Dataflow service to support filtering jobs
  239. // by the metadata values provided here. Populated for ListJobs and all GetJob
  240. // views SUMMARY and higher.
  241. JobMetadata job_metadata = 21;
  242. // The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
  243. // Flexible resource scheduling jobs are started with some delay after job
  244. // creation, so start_time is unset before start and is updated when the
  245. // job is started by the Cloud Dataflow service. For other jobs, start_time
  246. // always equals to create_time and is immutable and set by the Cloud Dataflow
  247. // service.
  248. google.protobuf.Timestamp start_time = 22;
  249. // If this is specified, the job's initial state is populated from the given
  250. // snapshot.
  251. string created_from_snapshot_id = 23;
  252. // Reserved for future use. This field is set only in responses from the
  253. // server; it is ignored if it is set in any requests.
  254. bool satisfies_pzs = 25;
  255. }
  256. // Metadata for a Datastore connector used by the job.
  257. message DatastoreIODetails {
  258. // Namespace used in the connection.
  259. string namespace = 1;
  260. // ProjectId accessed in the connection.
  261. string project_id = 2;
  262. }
  263. // Metadata for a Pub/Sub connector used by the job.
  264. message PubSubIODetails {
  265. // Topic accessed in the connection.
  266. string topic = 1;
  267. // Subscription used in the connection.
  268. string subscription = 2;
  269. }
  270. // Metadata for a File connector used by the job.
  271. message FileIODetails {
  272. // File Pattern used to access files by the connector.
  273. string file_pattern = 1;
  274. }
  275. // Metadata for a Cloud Bigtable connector used by the job.
  276. message BigTableIODetails {
  277. // ProjectId accessed in the connection.
  278. string project_id = 1;
  279. // InstanceId accessed in the connection.
  280. string instance_id = 2;
  281. // TableId accessed in the connection.
  282. string table_id = 3;
  283. }
  284. // Metadata for a BigQuery connector used by the job.
  285. message BigQueryIODetails {
  286. // Table accessed in the connection.
  287. string table = 1;
  288. // Dataset accessed in the connection.
  289. string dataset = 2;
  290. // Project accessed in the connection.
  291. string project_id = 3;
  292. // Query used to access data in the connection.
  293. string query = 4;
  294. }
  295. // Metadata for a Spanner connector used by the job.
  296. message SpannerIODetails {
  297. // ProjectId accessed in the connection.
  298. string project_id = 1;
  299. // InstanceId accessed in the connection.
  300. string instance_id = 2;
  301. // DatabaseId accessed in the connection.
  302. string database_id = 3;
  303. }
  304. // The version of the SDK used to run the job.
  305. message SdkVersion {
  306. // The support status of the SDK used to run the job.
  307. enum SdkSupportStatus {
  308. // Cloud Dataflow is unaware of this version.
  309. UNKNOWN = 0;
  310. // This is a known version of an SDK, and is supported.
  311. SUPPORTED = 1;
  312. // A newer version of the SDK family exists, and an update is recommended.
  313. STALE = 2;
  314. // This version of the SDK is deprecated and will eventually be
  315. // unsupported.
  316. DEPRECATED = 3;
  317. // Support for this SDK version has ended and it should no longer be used.
  318. UNSUPPORTED = 4;
  319. }
  320. // The version of the SDK used to run the job.
  321. string version = 1;
  322. // A readable string describing the version of the SDK.
  323. string version_display_name = 2;
  324. // The support status for this SDK version.
  325. SdkSupportStatus sdk_support_status = 3;
  326. }
  327. // Metadata available primarily for filtering jobs. Will be included in the
  328. // ListJob response and Job SUMMARY view.
  329. message JobMetadata {
  330. // The SDK version used to run the job.
  331. SdkVersion sdk_version = 1;
  332. // Identification of a Spanner source used in the Dataflow job.
  333. repeated SpannerIODetails spanner_details = 2;
  334. // Identification of a BigQuery source used in the Dataflow job.
  335. repeated BigQueryIODetails bigquery_details = 3;
  336. // Identification of a Cloud Bigtable source used in the Dataflow job.
  337. repeated BigTableIODetails big_table_details = 4;
  338. // Identification of a Pub/Sub source used in the Dataflow job.
  339. repeated PubSubIODetails pubsub_details = 5;
  340. // Identification of a File source used in the Dataflow job.
  341. repeated FileIODetails file_details = 6;
  342. // Identification of a Datastore source used in the Dataflow job.
  343. repeated DatastoreIODetails datastore_details = 7;
  344. }
  345. // A message describing the state of a particular execution stage.
  346. message ExecutionStageState {
  347. // The name of the execution stage.
  348. string execution_stage_name = 1;
  349. // Executions stage states allow the same set of values as JobState.
  350. JobState execution_stage_state = 2;
  351. // The time at which the stage transitioned to this state.
  352. google.protobuf.Timestamp current_state_time = 3;
  353. }
  354. // A descriptive representation of submitted pipeline as well as the executed
  355. // form. This data is provided by the Dataflow service for ease of visualizing
  356. // the pipeline and interpreting Dataflow provided metrics.
  357. message PipelineDescription {
  358. // Description of each transform in the pipeline and collections between them.
  359. repeated TransformSummary original_pipeline_transform = 1;
  360. // Description of each stage of execution of the pipeline.
  361. repeated ExecutionStageSummary execution_pipeline_stage = 2;
  362. // Pipeline level display data.
  363. repeated DisplayData display_data = 3;
  364. }
  365. // Type of transform or stage operation.
  366. enum KindType {
  367. // Unrecognized transform type.
  368. UNKNOWN_KIND = 0;
  369. // ParDo transform.
  370. PAR_DO_KIND = 1;
  371. // Group By Key transform.
  372. GROUP_BY_KEY_KIND = 2;
  373. // Flatten transform.
  374. FLATTEN_KIND = 3;
  375. // Read transform.
  376. READ_KIND = 4;
  377. // Write transform.
  378. WRITE_KIND = 5;
  379. // Constructs from a constant value, such as with Create.of.
  380. CONSTANT_KIND = 6;
  381. // Creates a Singleton view of a collection.
  382. SINGLETON_KIND = 7;
  383. // Opening or closing a shuffle session, often as part of a GroupByKey.
  384. SHUFFLE_KIND = 8;
  385. }
  386. // Description of the type, names/ids, and input/outputs for a transform.
  387. message TransformSummary {
  388. // Type of transform.
  389. KindType kind = 1;
  390. // SDK generated id of this transform instance.
  391. string id = 2;
  392. // User provided name for this transform instance.
  393. string name = 3;
  394. // Transform-specific display data.
  395. repeated DisplayData display_data = 4;
  396. // User names for all collection outputs to this transform.
  397. repeated string output_collection_name = 5;
  398. // User names for all collection inputs to this transform.
  399. repeated string input_collection_name = 6;
  400. }
  401. // Description of the composing transforms, names/ids, and input/outputs of a
  402. // stage of execution. Some composing transforms and sources may have been
  403. // generated by the Dataflow service during execution planning.
  404. message ExecutionStageSummary {
  405. // Description of an input or output of an execution stage.
  406. message StageSource {
  407. // Human-readable name for this source; may be user or system generated.
  408. string user_name = 1;
  409. // Dataflow service generated name for this source.
  410. string name = 2;
  411. // User name for the original user transform or collection with which this
  412. // source is most closely associated.
  413. string original_transform_or_collection = 3;
  414. // Size of the source, if measurable.
  415. int64 size_bytes = 4;
  416. }
  417. // Description of a transform executed as part of an execution stage.
  418. message ComponentTransform {
  419. // Human-readable name for this transform; may be user or system generated.
  420. string user_name = 1;
  421. // Dataflow service generated name for this source.
  422. string name = 2;
  423. // User name for the original user transform with which this transform is
  424. // most closely associated.
  425. string original_transform = 3;
  426. }
  427. // Description of an interstitial value between transforms in an execution
  428. // stage.
  429. message ComponentSource {
  430. // Human-readable name for this transform; may be user or system generated.
  431. string user_name = 1;
  432. // Dataflow service generated name for this source.
  433. string name = 2;
  434. // User name for the original user transform or collection with which this
  435. // source is most closely associated.
  436. string original_transform_or_collection = 3;
  437. }
  438. // Dataflow service generated name for this stage.
  439. string name = 1;
  440. // Dataflow service generated id for this stage.
  441. string id = 2;
  442. // Type of transform this stage is executing.
  443. KindType kind = 3;
  444. // Input sources for this stage.
  445. repeated StageSource input_source = 4;
  446. // Output sources for this stage.
  447. repeated StageSource output_source = 5;
  448. // Other stages that must complete before this stage can run.
  449. repeated string prerequisite_stage = 8;
  450. // Transforms that comprise this execution stage.
  451. repeated ComponentTransform component_transform = 6;
  452. // Collections produced and consumed by component transforms of this stage.
  453. repeated ComponentSource component_source = 7;
  454. }
  455. // Data provided with a pipeline or transform to provide descriptive info.
  456. message DisplayData {
  457. // The key identifying the display data.
  458. // This is intended to be used as a label for the display data
  459. // when viewed in a dax monitoring system.
  460. string key = 1;
  461. // The namespace for the key. This is usually a class name or programming
  462. // language namespace (i.e. python module) which defines the display data.
  463. // This allows a dax monitoring system to specially handle the data
  464. // and perform custom rendering.
  465. string namespace = 2;
  466. // Various value types which can be used for display data. Only one will be
  467. // set.
  468. oneof Value {
  469. // Contains value if the data is of string type.
  470. string str_value = 4;
  471. // Contains value if the data is of int64 type.
  472. int64 int64_value = 5;
  473. // Contains value if the data is of float type.
  474. float float_value = 6;
  475. // Contains value if the data is of java class type.
  476. string java_class_value = 7;
  477. // Contains value if the data is of timestamp type.
  478. google.protobuf.Timestamp timestamp_value = 8;
  479. // Contains value if the data is of duration type.
  480. google.protobuf.Duration duration_value = 9;
  481. // Contains value if the data is of a boolean type.
  482. bool bool_value = 10;
  483. }
  484. // A possible additional shorter value to display.
  485. // For example a java_class_name_value of com.mypackage.MyDoFn
  486. // will be stored with MyDoFn as the short_str_value and
  487. // com.mypackage.MyDoFn as the java_class_name value.
  488. // short_str_value can be displayed and java_class_name_value
  489. // will be displayed as a tooltip.
  490. string short_str_value = 11;
  491. // An optional full URL.
  492. string url = 12;
  493. // An optional label to display in a dax UI for the element.
  494. string label = 13;
  495. }
  496. // Defines a particular step within a Cloud Dataflow job.
  497. //
  498. // A job consists of multiple steps, each of which performs some
  499. // specific operation as part of the overall job. Data is typically
  500. // passed from one step to another as part of the job.
  501. //
  502. // Here's an example of a sequence of steps which together implement a
  503. // Map-Reduce job:
  504. //
  505. // * Read a collection of data from some source, parsing the
  506. // collection's elements.
  507. //
  508. // * Validate the elements.
  509. //
  510. // * Apply a user-defined function to map each element to some value
  511. // and extract an element-specific key value.
  512. //
  513. // * Group elements with the same key into a single element with
  514. // that key, transforming a multiply-keyed collection into a
  515. // uniquely-keyed collection.
  516. //
  517. // * Write the elements out to some data sink.
  518. //
  519. // Note that the Cloud Dataflow service may be used to run many different
  520. // types of jobs, not just Map-Reduce.
  521. message Step {
  522. // The kind of step in the Cloud Dataflow job.
  523. string kind = 1;
  524. // The name that identifies the step. This must be unique for each
  525. // step with respect to all other steps in the Cloud Dataflow job.
  526. string name = 2;
  527. // Named properties associated with the step. Each kind of
  528. // predefined step has its own required set of properties.
  529. // Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
  530. google.protobuf.Struct properties = 3;
  531. }
  532. // Describes the overall state of a [google.dataflow.v1beta3.Job][google.dataflow.v1beta3.Job].
  533. enum JobState {
  534. // The job's run state isn't specified.
  535. JOB_STATE_UNKNOWN = 0;
  536. // `JOB_STATE_STOPPED` indicates that the job has not
  537. // yet started to run.
  538. JOB_STATE_STOPPED = 1;
  539. // `JOB_STATE_RUNNING` indicates that the job is currently running.
  540. JOB_STATE_RUNNING = 2;
  541. // `JOB_STATE_DONE` indicates that the job has successfully completed.
  542. // This is a terminal job state. This state may be set by the Cloud Dataflow
  543. // service, as a transition from `JOB_STATE_RUNNING`. It may also be set via a
  544. // Cloud Dataflow `UpdateJob` call, if the job has not yet reached a terminal
  545. // state.
  546. JOB_STATE_DONE = 3;
  547. // `JOB_STATE_FAILED` indicates that the job has failed. This is a
  548. // terminal job state. This state may only be set by the Cloud Dataflow
  549. // service, and only as a transition from `JOB_STATE_RUNNING`.
  550. JOB_STATE_FAILED = 4;
  551. // `JOB_STATE_CANCELLED` indicates that the job has been explicitly
  552. // cancelled. This is a terminal job state. This state may only be
  553. // set via a Cloud Dataflow `UpdateJob` call, and only if the job has not
  554. // yet reached another terminal state.
  555. JOB_STATE_CANCELLED = 5;
  556. // `JOB_STATE_UPDATED` indicates that the job was successfully updated,
  557. // meaning that this job was stopped and another job was started, inheriting
  558. // state from this one. This is a terminal job state. This state may only be
  559. // set by the Cloud Dataflow service, and only as a transition from
  560. // `JOB_STATE_RUNNING`.
  561. JOB_STATE_UPDATED = 6;
  562. // `JOB_STATE_DRAINING` indicates that the job is in the process of draining.
  563. // A draining job has stopped pulling from its input sources and is processing
  564. // any data that remains in-flight. This state may be set via a Cloud Dataflow
  565. // `UpdateJob` call, but only as a transition from `JOB_STATE_RUNNING`. Jobs
  566. // that are draining may only transition to `JOB_STATE_DRAINED`,
  567. // `JOB_STATE_CANCELLED`, or `JOB_STATE_FAILED`.
  568. JOB_STATE_DRAINING = 7;
  569. // `JOB_STATE_DRAINED` indicates that the job has been drained.
  570. // A drained job terminated by stopping pulling from its input sources and
  571. // processing any data that remained in-flight when draining was requested.
  572. // This state is a terminal state, may only be set by the Cloud Dataflow
  573. // service, and only as a transition from `JOB_STATE_DRAINING`.
  574. JOB_STATE_DRAINED = 8;
  575. // `JOB_STATE_PENDING` indicates that the job has been created but is not yet
  576. // running. Jobs that are pending may only transition to `JOB_STATE_RUNNING`,
  577. // or `JOB_STATE_FAILED`.
  578. JOB_STATE_PENDING = 9;
  579. // `JOB_STATE_CANCELLING` indicates that the job has been explicitly cancelled
  580. // and is in the process of stopping. Jobs that are cancelling may only
  581. // transition to `JOB_STATE_CANCELLED` or `JOB_STATE_FAILED`.
  582. JOB_STATE_CANCELLING = 10;
  583. // `JOB_STATE_QUEUED` indicates that the job has been created but is being
  584. // delayed until launch. Jobs that are queued may only transition to
  585. // `JOB_STATE_PENDING` or `JOB_STATE_CANCELLED`.
  586. JOB_STATE_QUEUED = 11;
  587. // `JOB_STATE_RESOURCE_CLEANING_UP` indicates that the batch job's associated
  588. // resources are currently being cleaned up after a successful run.
  589. // Currently, this is an opt-in feature, please reach out to Cloud support
  590. // team if you are interested.
  591. JOB_STATE_RESOURCE_CLEANING_UP = 12;
  592. }
  593. // Additional information about how a Cloud Dataflow job will be executed that
  594. // isn't contained in the submitted job.
  595. message JobExecutionInfo {
  596. // A mapping from each stage to the information about that stage.
  597. map<string, JobExecutionStageInfo> stages = 1;
  598. }
  599. // Contains information about how a particular
  600. // [google.dataflow.v1beta3.Step][google.dataflow.v1beta3.Step] will be executed.
  601. message JobExecutionStageInfo {
  602. // The steps associated with the execution stage.
  603. // Note that stages may have several steps, and that a given step
  604. // might be run by more than one stage.
  605. repeated string step_name = 1;
  606. }
  607. // Selector for how much information is returned in Job responses.
  608. enum JobView {
  609. // The job view to return isn't specified, or is unknown.
  610. // Responses will contain at least the `JOB_VIEW_SUMMARY` information,
  611. // and may contain additional information.
  612. JOB_VIEW_UNKNOWN = 0;
  613. // Request summary information only:
  614. // Project ID, Job ID, job name, job type, job status, start/end time,
  615. // and Cloud SDK version details.
  616. JOB_VIEW_SUMMARY = 1;
  617. // Request all information available for this job.
  618. JOB_VIEW_ALL = 2;
  619. // Request summary info and limited job description data for steps, labels and
  620. // environment.
  621. JOB_VIEW_DESCRIPTION = 3;
  622. }
  623. // Request to create a Cloud Dataflow job.
  624. message CreateJobRequest {
  625. // The ID of the Cloud Platform project that the job belongs to.
  626. string project_id = 1;
  627. // The job to create.
  628. Job job = 2;
  629. // The level of information requested in response.
  630. JobView view = 3;
  631. // Deprecated. This field is now in the Job message.
  632. string replace_job_id = 4;
  633. // The [regional endpoint]
  634. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  635. // contains this job.
  636. string location = 5;
  637. }
  638. // Request to get the state of a Cloud Dataflow job.
  639. message GetJobRequest {
  640. // The ID of the Cloud Platform project that the job belongs to.
  641. string project_id = 1;
  642. // The job ID.
  643. string job_id = 2;
  644. // The level of information requested in response.
  645. JobView view = 3;
  646. // The [regional endpoint]
  647. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  648. // contains this job.
  649. string location = 4;
  650. }
  651. // Request to update a Cloud Dataflow job.
  652. message UpdateJobRequest {
  653. // The ID of the Cloud Platform project that the job belongs to.
  654. string project_id = 1;
  655. // The job ID.
  656. string job_id = 2;
  657. // The updated job.
  658. // Only the job state is updatable; other fields will be ignored.
  659. Job job = 3;
  660. // The [regional endpoint]
  661. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  662. // contains this job.
  663. string location = 4;
  664. }
  665. // Request to list Cloud Dataflow jobs.
  666. message ListJobsRequest {
  667. // This field filters out and returns jobs in the specified job state. The
  668. // order of data returned is determined by the filter used, and is subject to
  669. // change.
  670. enum Filter {
  671. // The filter isn't specified, or is unknown. This returns all jobs ordered
  672. // on descending `JobUuid`.
  673. UNKNOWN = 0;
  674. // Returns all running jobs first ordered on creation timestamp, then
  675. // returns all terminated jobs ordered on the termination timestamp.
  676. ALL = 1;
  677. // Filters the jobs that have a terminated state, ordered on the
  678. // termination timestamp. Example terminated states: `JOB_STATE_STOPPED`,
  679. // `JOB_STATE_UPDATED`, `JOB_STATE_DRAINED`, etc.
  680. TERMINATED = 2;
  681. // Filters the jobs that are running ordered on the creation timestamp.
  682. ACTIVE = 3;
  683. }
  684. // The kind of filter to use.
  685. Filter filter = 5;
  686. // The project which owns the jobs.
  687. string project_id = 1;
  688. // Deprecated. ListJobs always returns summaries now.
  689. // Use GetJob for other JobViews.
  690. JobView view = 2 [deprecated = true];
  691. // If there are many jobs, limit response to at most this many.
  692. // The actual number of jobs returned will be the lesser of max_responses
  693. // and an unspecified server-defined limit.
  694. int32 page_size = 3;
  695. // Set this to the 'next_page_token' field of a previous response
  696. // to request additional results in a long list.
  697. string page_token = 4;
  698. // The [regional endpoint]
  699. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  700. // contains this job.
  701. string location = 17;
  702. }
  703. // Indicates which [regional endpoint]
  704. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) failed
  705. // to respond to a request for data.
  706. message FailedLocation {
  707. // The name of the [regional endpoint]
  708. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  709. // failed to respond.
  710. string name = 1;
  711. }
  712. // Response to a request to list Cloud Dataflow jobs in a project. This might
  713. // be a partial response, depending on the page size in the ListJobsRequest.
  714. // However, if the project does not have any jobs, an instance of
  715. // ListJobsResponse is not returned and the requests's response
  716. // body is empty {}.
  717. message ListJobsResponse {
  718. // A subset of the requested job information.
  719. repeated Job jobs = 1;
  720. // Set if there may be more results than fit in this response.
  721. string next_page_token = 2;
  722. // Zero or more messages describing the [regional endpoints]
  723. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  724. // failed to respond.
  725. repeated FailedLocation failed_location = 3;
  726. }
  727. // Request to create a snapshot of a job.
  728. message SnapshotJobRequest {
  729. // The project which owns the job to be snapshotted.
  730. string project_id = 1;
  731. // The job to be snapshotted.
  732. string job_id = 2;
  733. // TTL for the snapshot.
  734. google.protobuf.Duration ttl = 3;
  735. // The location that contains this job.
  736. string location = 4;
  737. // If true, perform snapshots for sources which support this.
  738. bool snapshot_sources = 5;
  739. // User specified description of the snapshot. Maybe empty.
  740. string description = 6;
  741. }
  742. // Request to check is active jobs exists for a project
  743. message CheckActiveJobsRequest {
  744. // The project which owns the jobs.
  745. string project_id = 1;
  746. }
  747. // Response for CheckActiveJobsRequest.
  748. message CheckActiveJobsResponse {
  749. // If True, active jobs exists for project. False otherwise.
  750. bool active_jobs_exist = 1;
  751. }