123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808 |
- // Copyright 2022 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.privacy.dlp.v2;
- import "google/api/resource.proto";
- import "google/protobuf/timestamp.proto";
- option csharp_namespace = "Google.Cloud.Dlp.V2";
- option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2;dlp";
- option java_multiple_files = true;
- option java_outer_classname = "DlpStorage";
- option java_package = "com.google.privacy.dlp.v2";
- option php_namespace = "Google\\Cloud\\Dlp\\V2";
- option ruby_package = "Google::Cloud::Dlp::V2";
- // Type of information detected by the API.
- message InfoType {
- // Name of the information type. Either a name of your choosing when
- // creating a CustomInfoType, or one of the names listed
- // at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- // a built-in type. When sending Cloud DLP results to Data Catalog, infoType
- // names should conform to the pattern `[A-Za-z0-9$_-]{1,64}`.
- string name = 1;
- // Optional version name for this InfoType.
- string version = 2;
- }
- // Score is a summary of all elements in the data profile.
- // A higher number means more sensitive.
- message SensitivityScore {
- // Various score levels for resources.
- enum SensitivityScoreLevel {
- // Unused.
- SENSITIVITY_SCORE_UNSPECIFIED = 0;
- // No sensitive information detected. Limited access.
- SENSITIVITY_LOW = 10;
- // Medium risk - PII, potentially sensitive data, or fields with free-text
- // data that are at higher risk of having intermittent sensitive data.
- // Consider limiting access.
- SENSITIVITY_MODERATE = 20;
- // High risk – SPII may be present. Exfiltration of data may lead to user
- // data loss. Re-identification of users may be possible. Consider limiting
- // usage and or removing SPII.
- SENSITIVITY_HIGH = 30;
- }
- // The score applied to the resource.
- SensitivityScoreLevel score = 1;
- }
- // Categorization of results based on how likely they are to represent a match,
- // based on the number of elements they contain which imply a match.
- enum Likelihood {
- // Default value; same as POSSIBLE.
- LIKELIHOOD_UNSPECIFIED = 0;
- // Few matching elements.
- VERY_UNLIKELY = 1;
- UNLIKELY = 2;
- // Some matching elements.
- POSSIBLE = 3;
- LIKELY = 4;
- // Many matching elements.
- VERY_LIKELY = 5;
- }
- // A reference to a StoredInfoType to use with scanning.
- message StoredType {
- // Resource name of the requested `StoredInfoType`, for example
- // `organizations/433245324/storedInfoTypes/432452342` or
- // `projects/project-id/storedInfoTypes/432452342`.
- string name = 1;
- // Timestamp indicating when the version of the `StoredInfoType` used for
- // inspection was created. Output-only field, populated by the system.
- google.protobuf.Timestamp create_time = 2;
- }
- // Custom information type provided by the user. Used to find domain-specific
- // sensitive information configurable to the data in question.
- message CustomInfoType {
- // Custom information type based on a dictionary of words or phrases. This can
- // be used to match sensitive information specific to the data, such as a list
- // of employee IDs or job titles.
- //
- // Dictionary words are case-insensitive and all characters other than letters
- // and digits in the unicode [Basic Multilingual
- // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- // will be replaced with whitespace when scanning for matches, so the
- // dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- // "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- // surrounding any match must be of a different type than the adjacent
- // characters within the word, so letters must be next to non-letters and
- // digits next to non-digits. For example, the dictionary word "jen" will
- // match the first three letters of the text "jen123" but will return no
- // matches for "jennifer".
- //
- // Dictionary words containing a large number of characters that are not
- // letters or digits may result in unexpected findings because such characters
- // are treated as whitespace. The
- // [limits](https://cloud.google.com/dlp/limits) page contains details about
- // the size limits of dictionaries. For dictionaries that do not fit within
- // these constraints, consider using `LargeCustomDictionaryConfig` in the
- // `StoredInfoType` API.
- message Dictionary {
- // Message defining a list of words or phrases to search for in the data.
- message WordList {
- // Words or phrases defining the dictionary. The dictionary must contain
- // at least one phrase and every phrase must contain at least 2 characters
- // that are letters or digits. [required]
- repeated string words = 1;
- }
- oneof source {
- // List of words or phrases to search for.
- WordList word_list = 1;
- // Newline-delimited file of words in Cloud Storage. Only a single file
- // is accepted.
- CloudStoragePath cloud_storage_path = 3;
- }
- }
- // Message defining a custom regular expression.
- message Regex {
- // Pattern defining the regular expression. Its syntax
- // (https://github.com/google/re2/wiki/Syntax) can be found under the
- // google/re2 repository on GitHub.
- string pattern = 1;
- // The index of the submatch to extract as findings. When not
- // specified, the entire match is returned. No more than 3 may be included.
- repeated int32 group_indexes = 2;
- }
- // Message for detecting output from deidentification transformations
- // such as
- // [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- // These types of transformations are
- // those that perform pseudonymization, thereby producing a "surrogate" as
- // output. This should be used in conjunction with a field on the
- // transformation such as `surrogate_info_type`. This CustomInfoType does
- // not support the use of `detection_rules`.
- message SurrogateType {
- }
- // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- // `CustomInfoType` to alter behavior under certain circumstances, depending
- // on the specific details of the rule. Not supported for the `surrogate_type`
- // custom infoType.
- message DetectionRule {
- // Message for specifying a window around a finding to apply a detection
- // rule.
- message Proximity {
- // Number of characters before the finding to consider. For tabular data,
- // if you want to modify the likelihood of an entire column of findngs,
- // set this to 1. For more information, see
- // [Hotword example: Set the match likelihood of a table column]
- // (https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values).
- int32 window_before = 1;
- // Number of characters after the finding to consider.
- int32 window_after = 2;
- }
- // Message for specifying an adjustment to the likelihood of a finding as
- // part of a detection rule.
- message LikelihoodAdjustment {
- oneof adjustment {
- // Set the likelihood of a finding to a fixed value.
- Likelihood fixed_likelihood = 1;
- // Increase or decrease the likelihood by the specified number of
- // levels. For example, if a finding would be `POSSIBLE` without the
- // detection rule and `relative_likelihood` is 1, then it is upgraded to
- // `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- // Likelihood may never drop below `VERY_UNLIKELY` or exceed
- // `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- // adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- // a final likelihood of `LIKELY`.
- int32 relative_likelihood = 2;
- }
- }
- // The rule that adjusts the likelihood of findings within a certain
- // proximity of hotwords.
- message HotwordRule {
- // Regular expression pattern defining what qualifies as a hotword.
- Regex hotword_regex = 1;
- // Range of characters within which the entire hotword must reside.
- // The total length of the window cannot exceed 1000 characters.
- // The finding itself will be included in the window, so that hotwords can
- // be used to match substrings of the finding itself. Suppose you
- // want Cloud DLP to promote the likelihood of the phone number
- // regex "\(\d{3}\) \d{3}-\d{4}" if the area code is known to be the
- // area code of a company's office. In this case, use the hotword regex
- // "\(xxx\)", where "xxx" is the area code in question.
- //
- // For tabular data, if you want to modify the likelihood of an entire
- // column of findngs, see
- // [Hotword example: Set the match likelihood of a table column]
- // (https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values).
- Proximity proximity = 2;
- // Likelihood adjustment to apply to all matching findings.
- LikelihoodAdjustment likelihood_adjustment = 3;
- }
- oneof type {
- // Hotword-based detection rule.
- HotwordRule hotword_rule = 1;
- }
- }
- enum ExclusionType {
- // A finding of this custom info type will not be excluded from results.
- EXCLUSION_TYPE_UNSPECIFIED = 0;
- // A finding of this custom info type will be excluded from final results,
- // but can still affect rule execution.
- EXCLUSION_TYPE_EXCLUDE = 1;
- }
- // CustomInfoType can either be a new infoType, or an extension of built-in
- // infoType, when the name matches one of existing infoTypes and that infoType
- // is specified in `InspectContent.info_types` field. Specifying the latter
- // adds findings to the one detected by the system. If built-in info type is
- // not specified in `InspectContent.info_types` list then the name is treated
- // as a custom info type.
- InfoType info_type = 1;
- // Likelihood to return for this CustomInfoType. This base value can be
- // altered by a detection rule if the finding meets the criteria specified by
- // the rule. Defaults to `VERY_LIKELY` if not specified.
- Likelihood likelihood = 6;
- oneof type {
- // A list of phrases to detect as a CustomInfoType.
- Dictionary dictionary = 2;
- // Regular expression based CustomInfoType.
- Regex regex = 3;
- // Message for detecting output from deidentification transformations that
- // support reversing.
- SurrogateType surrogate_type = 4;
- // Load an existing `StoredInfoType` resource for use in
- // `InspectDataSource`. Not currently supported in `InspectContent`.
- StoredType stored_type = 5;
- }
- // Set of detection rules to apply to all findings of this CustomInfoType.
- // Rules are applied in order that they are specified. Not supported for the
- // `surrogate_type` CustomInfoType.
- repeated DetectionRule detection_rules = 7;
- // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- // to be returned. It still can be used for rules matching.
- ExclusionType exclusion_type = 8;
- }
- // General identifier of a data field in a storage service.
- message FieldId {
- // Name describing the field.
- string name = 1;
- }
- // Datastore partition ID.
- // A partition ID identifies a grouping of entities. The grouping is always
- // by project and namespace, however the namespace ID may be empty.
- //
- // A partition ID contains several dimensions:
- // project ID and namespace ID.
- message PartitionId {
- // The ID of the project to which the entities belong.
- string project_id = 2;
- // If not empty, the ID of the namespace to which the entities belong.
- string namespace_id = 4;
- }
- // A representation of a Datastore kind.
- message KindExpression {
- // The name of the kind.
- string name = 1;
- }
- // Options defining a data set within Google Cloud Datastore.
- message DatastoreOptions {
- // A partition ID identifies a grouping of entities. The grouping is always
- // by project and namespace, however the namespace ID may be empty.
- PartitionId partition_id = 1;
- // The kind to process.
- KindExpression kind = 2;
- }
- // Definitions of file type groups to scan. New types will be added to this
- // list.
- enum FileType {
- // Includes all files.
- FILE_TYPE_UNSPECIFIED = 0;
- // Includes all file extensions not covered by another entry. Binary
- // scanning attempts to convert the content of the file to utf_8 to scan
- // the file.
- // If you wish to avoid this fall back, specify one or more of the other
- // FileType's in your storage scan.
- BINARY_FILE = 1;
- // Included file extensions:
- // asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
- // dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
- // mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
- // properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
- // shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
- // txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
- TEXT_FILE = 2;
- // Included file extensions:
- // bmp, gif, jpg, jpeg, jpe, png.
- // bytes_limit_per_file has no effect on image files.
- // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- IMAGE = 3;
- // Word files >30 MB will be scanned as binary files.
- // Included file extensions:
- // docx, dotx, docm, dotm
- WORD = 5;
- // PDF files >30 MB will be scanned as binary files.
- // Included file extensions:
- // pdf
- PDF = 6;
- // Included file extensions:
- // avro
- AVRO = 7;
- // Included file extensions:
- // csv
- CSV = 8;
- // Included file extensions:
- // tsv
- TSV = 9;
- // Powerpoint files >30 MB will be scanned as binary files.
- // Included file extensions:
- // pptx, pptm, potx, potm, pot
- POWERPOINT = 11;
- // Excel files >30 MB will be scanned as binary files.
- // Included file extensions:
- // xlsx, xlsm, xltx, xltm
- EXCEL = 12;
- }
- // Message representing a set of files in a Cloud Storage bucket. Regular
- // expressions are used to allow fine-grained control over which files in the
- // bucket to include.
- //
- // Included files are those that match at least one item in `include_regex` and
- // do not match any items in `exclude_regex`. Note that a file that matches
- // items from both lists will _not_ be included. For a match to occur, the
- // entire file path (i.e., everything in the url after the bucket name) must
- // match the regular expression.
- //
- // For example, given the input `{bucket_name: "mybucket", include_regex:
- // ["directory1/.*"], exclude_regex:
- // ["directory1/excluded.*"]}`:
- //
- // * `gs://mybucket/directory1/myfile` will be included
- // * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- // across `/`)
- // * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- // full path doesn't match any items in `include_regex`)
- // * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- // matches an item in `exclude_regex`)
- //
- // If `include_regex` is left empty, it will match all files by default
- // (this is equivalent to setting `include_regex: [".*"]`).
- //
- // Some other common use cases:
- //
- // * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- // files in `mybucket` except for .pdf files
- // * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- // include all files directly under `gs://mybucket/directory/`, without matching
- // across `/`
- message CloudStorageRegexFileSet {
- // The name of a Cloud Storage bucket. Required.
- string bucket_name = 1;
- // A list of regular expressions matching file paths to include. All files in
- // the bucket that match at least one of these regular expressions will be
- // included in the set of files, except for those that also match an item in
- // `exclude_regex`. Leaving this field empty will match all files by default
- // (this is equivalent to including `.*` in the list).
- //
- // Regular expressions use RE2
- // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- // under the google/re2 repository on GitHub.
- repeated string include_regex = 2;
- // A list of regular expressions matching file paths to exclude. All files in
- // the bucket that match at least one of these regular expressions will be
- // excluded from the scan.
- //
- // Regular expressions use RE2
- // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- // under the google/re2 repository on GitHub.
- repeated string exclude_regex = 3;
- }
- // Options defining a file or a set of files within a Cloud Storage
- // bucket.
- message CloudStorageOptions {
- // Set of files to scan.
- message FileSet {
- // The Cloud Storage url of the file(s) to scan, in the format
- // `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- //
- // If the url ends in a trailing slash, the bucket or directory represented
- // by the url will be scanned non-recursively (content in sub-directories
- // will not be scanned). This means that `gs://mybucket/` is equivalent to
- // `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- // `gs://mybucket/directory/*`.
- //
- // Exactly one of `url` or `regex_file_set` must be set.
- string url = 1;
- // The regex-filtered set of files to scan. Exactly one of `url` or
- // `regex_file_set` must be set.
- CloudStorageRegexFileSet regex_file_set = 2;
- }
- // How to sample bytes if not all bytes are scanned. Meaningful only when used
- // in conjunction with bytes_limit_per_file. If not specified, scanning would
- // start from the top.
- enum SampleMethod {
- SAMPLE_METHOD_UNSPECIFIED = 0;
- // Scan from the top (default).
- TOP = 1;
- // For each file larger than bytes_limit_per_file, randomly pick the offset
- // to start scanning. The scanned bytes are contiguous.
- RANDOM_START = 2;
- }
- // The set of one or more files to scan.
- FileSet file_set = 1;
- // Max number of bytes to scan from a file. If a scanned file's size is bigger
- // than this value then the rest of the bytes are omitted. Only one
- // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- // Cannot be set if de-identification is requested.
- int64 bytes_limit_per_file = 4;
- // Max percentage of bytes to scan from a file. The rest are omitted. The
- // number of bytes scanned is rounded down. Must be between 0 and 100,
- // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- // Cannot be set if de-identification is requested.
- int32 bytes_limit_per_file_percent = 8;
- // List of file type groups to include in the scan.
- // If empty, all files are scanned and available data format processors
- // are applied. In addition, the binary content of the selected files
- // is always scanned as well.
- // Images are scanned only as binary if the specified region
- // does not support image inspection and no file_types were specified.
- // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- repeated FileType file_types = 5;
- SampleMethod sample_method = 6;
- // Limits the number of files to scan to this percentage of the input FileSet.
- // Number of files scanned is rounded down. Must be between 0 and 100,
- // inclusively. Both 0 and 100 means no limit. Defaults to 0.
- int32 files_limit_percent = 7;
- }
- // Message representing a set of files in Cloud Storage.
- message CloudStorageFileSet {
- // The url, in the format `gs://<bucket>/<path>`. Trailing wildcard in the
- // path is allowed.
- string url = 1;
- }
- // Message representing a single file or path in Cloud Storage.
- message CloudStoragePath {
- // A url representing a file or path (no wildcards) in Cloud Storage.
- // Example: gs://[BUCKET_NAME]/dictionary.txt
- string path = 1;
- }
- // Options defining BigQuery table and row identifiers.
- message BigQueryOptions {
- // How to sample rows if not all rows are scanned. Meaningful only when used
- // in conjunction with either rows_limit or rows_limit_percent. If not
- // specified, rows are scanned in the order BigQuery reads them.
- enum SampleMethod {
- SAMPLE_METHOD_UNSPECIFIED = 0;
- // Scan groups of rows in the order BigQuery provides (default). Multiple
- // groups of rows may be scanned in parallel, so results may not appear in
- // the same order the rows are read.
- TOP = 1;
- // Randomly pick groups of rows to scan.
- RANDOM_START = 2;
- }
- // Complete BigQuery table reference.
- BigQueryTable table_reference = 1;
- // Table fields that may uniquely identify a row within the table. When
- // `actions.saveFindings.outputConfig.table` is specified, the values of
- // columns specified here are available in the output table under
- // `location.content_locations.record_location.record_key.id_values`. Nested
- // fields such as `person.birthdate.year` are allowed.
- repeated FieldId identifying_fields = 2;
- // Max number of rows to scan. If the table has more rows than this value, the
- // rest of the rows are omitted. If not set, or if set to 0, all rows will be
- // scanned. Only one of rows_limit and rows_limit_percent can be specified.
- // Cannot be used in conjunction with TimespanConfig.
- int64 rows_limit = 3;
- // Max percentage of rows to scan. The rest are omitted. The number of rows
- // scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- // 100 means no limit. Defaults to 0. Only one of rows_limit and
- // rows_limit_percent can be specified. Cannot be used in conjunction with
- // TimespanConfig.
- int32 rows_limit_percent = 6;
- SampleMethod sample_method = 4;
- // References to fields excluded from scanning. This allows you to skip
- // inspection of entire columns which you know have no findings.
- repeated FieldId excluded_fields = 5;
- // Limit scanning only to these fields.
- repeated FieldId included_fields = 7;
- }
- // Shared message indicating Cloud storage type.
- message StorageConfig {
- // Configuration of the timespan of the items to include in scanning.
- // Currently only supported when inspecting Cloud Storage and BigQuery.
- message TimespanConfig {
- // Exclude files, tables, or rows older than this value.
- // If not set, no lower time limit is applied.
- google.protobuf.Timestamp start_time = 1;
- // Exclude files, tables, or rows newer than this value.
- // If not set, no upper time limit is applied.
- google.protobuf.Timestamp end_time = 2;
- // Specification of the field containing the timestamp of scanned items.
- // Used for data sources like Datastore and BigQuery.
- //
- // <b>For BigQuery</b>
- //
- // If this value is not specified and the table was modified between the
- // given start and end times, the entire table will be scanned. If this
- // value is specified, then rows are filtered based on the given start and
- // end times. Rows with a `NULL` value in the provided BigQuery column are
- // skipped.
- // Valid data types of the provided BigQuery column are: `INTEGER`, `DATE`,
- // `TIMESTAMP`, and `DATETIME`.
- //
- // If your BigQuery table is [partitioned at ingestion
- // time](https://cloud.google.com/bigquery/docs/partitioned-tables#ingestion_time),
- // you can use any of the following pseudo-columns as your timestamp field.
- // When used with Cloud DLP, these pseudo-column names are case sensitive.
- //
- // <ul>
- // <li><code>_PARTITIONTIME</code></li>
- // <li><code>_PARTITIONDATE</code></li>
- // <li><code>_PARTITION_LOAD_TIME</code></li>
- // </ul>
- //
- // <b>For Datastore</b>
- //
- // If this value is specified, then entities are filtered based on the given
- // start and end times. If an entity does not contain the provided timestamp
- // property or contains empty or invalid values, then it is included.
- // Valid data types of the provided timestamp property are: `TIMESTAMP`.
- //
- // See the
- // [known issue](https://cloud.google.com/dlp/docs/known-issues#bq-timespan)
- // related to this operation.
- FieldId timestamp_field = 3;
- // When the job is started by a JobTrigger we will automatically figure out
- // a valid start_time to avoid scanning files that have not been modified
- // since the last time the JobTrigger executed. This will be based on the
- // time of the execution of the last run of the JobTrigger or the timespan
- // end_time used in the last run of the JobTrigger.
- bool enable_auto_population_of_timespan_config = 4;
- }
- oneof type {
- // Google Cloud Datastore options.
- DatastoreOptions datastore_options = 2;
- // Cloud Storage options.
- CloudStorageOptions cloud_storage_options = 3;
- // BigQuery options.
- BigQueryOptions big_query_options = 4;
- // Hybrid inspection options.
- HybridOptions hybrid_options = 9;
- }
- TimespanConfig timespan_config = 6;
- }
- // Configuration to control jobs where the content being inspected is outside
- // of Google Cloud Platform.
- message HybridOptions {
- // A short description of where the data is coming from. Will be stored once
- // in the job. 256 max length.
- string description = 1;
- // These are labels that each inspection request must include within their
- // 'finding_labels' map. Request may contain others, but any missing one of
- // these will be rejected.
- //
- // Label keys must be between 1 and 63 characters long and must conform
- // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
- //
- // No more than 10 keys can be required.
- repeated string required_finding_label_keys = 2;
- // To organize findings, these labels will be added to each finding.
- //
- // Label keys must be between 1 and 63 characters long and must conform
- // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
- //
- // Label values must be between 0 and 63 characters long and must conform
- // to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
- //
- // No more than 10 labels can be associated with a given finding.
- //
- // Examples:
- // * `"environment" : "production"`
- // * `"pipeline" : "etl"`
- map<string, string> labels = 3;
- // If the container is a table, additional information to make findings
- // meaningful such as the columns that are primary keys.
- TableOptions table_options = 4;
- }
- // Row key for identifying a record in BigQuery table.
- message BigQueryKey {
- // Complete BigQuery table reference.
- BigQueryTable table_reference = 1;
- // Row number inferred at the time the table was scanned. This value is
- // nondeterministic, cannot be queried, and may be null for inspection
- // jobs. To locate findings within a table, specify
- // `inspect_job.storage_config.big_query_options.identifying_fields` in
- // `CreateDlpJobRequest`.
- int64 row_number = 2;
- }
- // Record key for a finding in Cloud Datastore.
- message DatastoreKey {
- // Datastore entity key.
- Key entity_key = 1;
- }
- // A unique identifier for a Datastore entity.
- // If a key's partition ID or any of its path kinds or names are
- // reserved/read-only, the key is reserved/read-only.
- // A reserved/read-only key is forbidden in certain documented contexts.
- message Key {
- // A (kind, ID/name) pair used to construct a key path.
- //
- // If either name or ID is set, the element is complete.
- // If neither is set, the element is incomplete.
- message PathElement {
- // The kind of the entity.
- // A kind matching regex `__.*__` is reserved/read-only.
- // A kind must not contain more than 1500 bytes when UTF-8 encoded.
- // Cannot be `""`.
- string kind = 1;
- // The type of ID.
- oneof id_type {
- // The auto-allocated ID of the entity.
- // Never equal to zero. Values less than zero are discouraged and may not
- // be supported in the future.
- int64 id = 2;
- // The name of the entity.
- // A name matching regex `__.*__` is reserved/read-only.
- // A name must not be more than 1500 bytes when UTF-8 encoded.
- // Cannot be `""`.
- string name = 3;
- }
- }
- // Entities are partitioned into subsets, currently identified by a project
- // ID and namespace ID.
- // Queries are scoped to a single partition.
- PartitionId partition_id = 1;
- // The entity path.
- // An entity path consists of one or more elements composed of a kind and a
- // string or numerical identifier, which identify entities. The first
- // element identifies a _root entity_, the second element identifies
- // a _child_ of the root entity, the third element identifies a child of the
- // second entity, and so forth. The entities identified by all prefixes of
- // the path are called the element's _ancestors_.
- //
- // A path can never be empty, and a path can have at most 100 elements.
- repeated PathElement path = 2;
- }
- // Message for a unique key indicating a record that contains a finding.
- message RecordKey {
- oneof type {
- DatastoreKey datastore_key = 2;
- BigQueryKey big_query_key = 3;
- }
- // Values of identifying columns in the given row. Order of values matches
- // the order of `identifying_fields` specified in the scanning request.
- repeated string id_values = 5;
- }
- // Message defining the location of a BigQuery table. A table is uniquely
- // identified by its project_id, dataset_id, and table_name. Within a query
- // a table is often referenced with a string in the format of:
- // `<project_id>:<dataset_id>.<table_id>` or
- // `<project_id>.<dataset_id>.<table_id>`.
- message BigQueryTable {
- // The Google Cloud Platform project ID of the project containing the table.
- // If omitted, project ID is inferred from the API call.
- string project_id = 1;
- // Dataset ID of the table.
- string dataset_id = 2;
- // Name of the table.
- string table_id = 3;
- }
- // Message defining a field of a BigQuery table.
- message BigQueryField {
- // Source table of the field.
- BigQueryTable table = 1;
- // Designated field in the BigQuery table.
- FieldId field = 2;
- }
- // An entity in a dataset is a field or set of fields that correspond to a
- // single person. For example, in medical records the `EntityId` might be a
- // patient identifier, or for financial records it might be an account
- // identifier. This message is used when generalizations or analysis must take
- // into account that multiple rows correspond to the same entity.
- message EntityId {
- // Composite key indicating which field contains the entity identifier.
- FieldId field = 1;
- }
- // Instructions regarding the table content being inspected.
- message TableOptions {
- // The columns that are the primary keys for table objects included in
- // ContentItem. A copy of this cell's value will stored alongside alongside
- // each finding so that the finding can be traced to the specific row it came
- // from. No more than 3 may be provided.
- repeated FieldId identifying_fields = 1;
- }
|