123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.dataflow.v1beta3;
- option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3";
- option go_package = "google.golang.org/genproto/googleapis/dataflow/v1beta3;dataflow";
- option java_multiple_files = true;
- option java_outer_classname = "StreamingProto";
- option java_package = "com.google.dataflow.v1beta3";
- option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3";
- option ruby_package = "Google::Cloud::Dataflow::V1beta3";
- // Global topology of the streaming Dataflow job, including all
- // computations and their sharded locations.
- message TopologyConfig {
- // The computations associated with a streaming Dataflow job.
- repeated ComputationTopology computations = 1;
- // The disks assigned to a streaming Dataflow job.
- repeated DataDiskAssignment data_disk_assignments = 2;
- // Maps user stage names to stable computation names.
- map<string, string> user_stage_to_computation_name_map = 3;
- // The size (in bits) of keys that will be assigned to source messages.
- int32 forwarding_key_bits = 4;
- // Version number for persistent state.
- int32 persistent_state_version = 5;
- }
- // Identifies a pubsub location to use for transferring data into or
- // out of a streaming Dataflow job.
- message PubsubLocation {
- // A pubsub topic, in the form of
- // "pubsub.googleapis.com/topics/<project-id>/<topic-name>"
- string topic = 1;
- // A pubsub subscription, in the form of
- // "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>"
- string subscription = 2;
- // If set, contains a pubsub label from which to extract record timestamps.
- // If left empty, record timestamps will be generated upon arrival.
- string timestamp_label = 3;
- // If set, contains a pubsub label from which to extract record ids.
- // If left empty, record deduplication will be strictly best effort.
- string id_label = 4;
- // Indicates whether the pipeline allows late-arriving data.
- bool drop_late_data = 5;
- // If set, specifies the pubsub subscription that will be used for tracking
- // custom time timestamps for watermark estimation.
- string tracking_subscription = 6;
- // If true, then the client has requested to get pubsub attributes.
- bool with_attributes = 7;
- }
- // Identifies the location of a streaming computation stage, for
- // stage-to-stage communication.
- message StreamingStageLocation {
- // Identifies the particular stream within the streaming Dataflow
- // job.
- string stream_id = 1;
- }
- // Identifies the location of a streaming side input.
- message StreamingSideInputLocation {
- // Identifies the particular side input within the streaming Dataflow job.
- string tag = 1;
- // Identifies the state family where this side input is stored.
- string state_family = 2;
- }
- // Identifies the location of a custom souce.
- message CustomSourceLocation {
- // Whether this source is stateful.
- bool stateful = 1;
- }
- // Describes a stream of data, either as input to be processed or as
- // output of a streaming Dataflow job.
- message StreamLocation {
- // A specification of a stream's location.
- oneof location {
- // The stream is part of another computation within the current
- // streaming Dataflow job.
- StreamingStageLocation streaming_stage_location = 1;
- // The stream is a pubsub stream.
- PubsubLocation pubsub_location = 2;
- // The stream is a streaming side input.
- StreamingSideInputLocation side_input_location = 3;
- // The stream is a custom source.
- CustomSourceLocation custom_source_location = 4;
- }
- }
- // State family configuration.
- message StateFamilyConfig {
- // The state family value.
- string state_family = 1;
- // If true, this family corresponds to a read operation.
- bool is_read = 2;
- }
- // All configuration data for a particular Computation.
- message ComputationTopology {
- // The system stage name.
- string system_stage_name = 1;
- // The ID of the computation.
- string computation_id = 5;
- // The key ranges processed by the computation.
- repeated KeyRangeLocation key_ranges = 2;
- // The inputs to the computation.
- repeated StreamLocation inputs = 3;
- // The outputs from the computation.
- repeated StreamLocation outputs = 4;
- // The state family values.
- repeated StateFamilyConfig state_families = 7;
- }
- // Location information for a specific key-range of a sharded computation.
- // Currently we only support UTF-8 character splits to simplify encoding into
- // JSON.
- message KeyRangeLocation {
- // The start (inclusive) of the key range.
- string start = 1;
- // The end (exclusive) of the key range.
- string end = 2;
- // The physical location of this range assignment to be used for
- // streaming computation cross-worker message delivery.
- string delivery_endpoint = 3;
- // The name of the data disk where data for this range is stored.
- // This name is local to the Google Cloud Platform project and uniquely
- // identifies the disk within that project, for example
- // "myproject-1014-104817-4c2-harness-0-disk-1".
- string data_disk = 5;
- // DEPRECATED. The location of the persistent state for this range, as a
- // persistent directory in the worker local filesystem.
- string deprecated_persistent_directory = 4 [deprecated = true];
- }
- // Describes mounted data disk.
- message MountedDataDisk {
- // The name of the data disk.
- // This name is local to the Google Cloud Platform project and uniquely
- // identifies the disk within that project, for example
- // "myproject-1014-104817-4c2-harness-0-disk-1".
- string data_disk = 1;
- }
- // Data disk assignment for a given VM instance.
- message DataDiskAssignment {
- // VM instance name the data disks mounted to, for example
- // "myproject-1014-104817-4c2-harness-0".
- string vm_instance = 1;
- // Mounted data disks. The order is important a data disk's 0-based index in
- // this list defines which persistent directory the disk is mounted to, for
- // example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" },
- // { "myproject-1014-104817-4c2-harness-0-disk-1" }.
- repeated string data_disks = 2;
- }
- // Data disk assignment information for a specific key-range of a sharded
- // computation.
- // Currently we only support UTF-8 character splits to simplify encoding into
- // JSON.
- message KeyRangeDataDiskAssignment {
- // The start (inclusive) of the key range.
- string start = 1;
- // The end (exclusive) of the key range.
- string end = 2;
- // The name of the data disk where data for this range is stored.
- // This name is local to the Google Cloud Platform project and uniquely
- // identifies the disk within that project, for example
- // "myproject-1014-104817-4c2-harness-0-disk-1".
- string data_disk = 3;
- }
- // Describes full or partial data disk assignment information of the computation
- // ranges.
- message StreamingComputationRanges {
- // The ID of the computation.
- string computation_id = 1;
- // Data disk assignments for ranges from this computation.
- repeated KeyRangeDataDiskAssignment range_assignments = 2;
- }
- // Streaming appliance snapshot configuration.
- message StreamingApplianceSnapshotConfig {
- // If set, indicates the snapshot id for the snapshot being performed.
- string snapshot_id = 1;
- // Indicates which endpoint is used to import appliance state.
- string import_state_endpoint = 2;
- }
|