data_stats.proto 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. // Copyright 2020 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.automl.v1beta1;
  16. option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
  17. option java_multiple_files = true;
  18. option java_package = "com.google.cloud.automl.v1beta1";
  19. option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
  20. option ruby_package = "Google::Cloud::AutoML::V1beta1";
  21. // The data statistics of a series of values that share the same DataType.
  22. message DataStats {
  23. // The data statistics specific to a DataType.
  24. oneof stats {
  25. // The statistics for FLOAT64 DataType.
  26. Float64Stats float64_stats = 3;
  27. // The statistics for STRING DataType.
  28. StringStats string_stats = 4;
  29. // The statistics for TIMESTAMP DataType.
  30. TimestampStats timestamp_stats = 5;
  31. // The statistics for ARRAY DataType.
  32. ArrayStats array_stats = 6;
  33. // The statistics for STRUCT DataType.
  34. StructStats struct_stats = 7;
  35. // The statistics for CATEGORY DataType.
  36. CategoryStats category_stats = 8;
  37. }
  38. // The number of distinct values.
  39. int64 distinct_value_count = 1;
  40. // The number of values that are null.
  41. int64 null_value_count = 2;
  42. // The number of values that are valid.
  43. int64 valid_value_count = 9;
  44. }
  45. // The data statistics of a series of FLOAT64 values.
  46. message Float64Stats {
  47. // A bucket of a histogram.
  48. message HistogramBucket {
  49. // The minimum value of the bucket, inclusive.
  50. double min = 1;
  51. // The maximum value of the bucket, exclusive unless max = `"Infinity"`, in
  52. // which case it's inclusive.
  53. double max = 2;
  54. // The number of data values that are in the bucket, i.e. are between
  55. // min and max values.
  56. int64 count = 3;
  57. }
  58. // The mean of the series.
  59. double mean = 1;
  60. // The standard deviation of the series.
  61. double standard_deviation = 2;
  62. // Ordered from 0 to k k-quantile values of the data series of n values.
  63. // The value at index i is, approximately, the i*n/k-th smallest value in the
  64. // series; for i = 0 and i = k these are, respectively, the min and max
  65. // values.
  66. repeated double quantiles = 3;
  67. // Histogram buckets of the data series. Sorted by the min value of the
  68. // bucket, ascendingly, and the number of the buckets is dynamically
  69. // generated. The buckets are non-overlapping and completely cover whole
  70. // FLOAT64 range with min of first bucket being `"-Infinity"`, and max of
  71. // the last one being `"Infinity"`.
  72. repeated HistogramBucket histogram_buckets = 4;
  73. }
  74. // The data statistics of a series of STRING values.
  75. message StringStats {
  76. // The statistics of a unigram.
  77. message UnigramStats {
  78. // The unigram.
  79. string value = 1;
  80. // The number of occurrences of this unigram in the series.
  81. int64 count = 2;
  82. }
  83. // The statistics of the top 20 unigrams, ordered by
  84. // [count][google.cloud.automl.v1beta1.StringStats.UnigramStats.count].
  85. repeated UnigramStats top_unigram_stats = 1;
  86. }
  87. // The data statistics of a series of TIMESTAMP values.
  88. message TimestampStats {
  89. // Stats split by a defined in context granularity.
  90. message GranularStats {
  91. // A map from granularity key to example count for that key.
  92. // E.g. for hour_of_day `13` means 1pm, or for month_of_year `5` means May).
  93. map<int32, int64> buckets = 1;
  94. }
  95. // The string key is the pre-defined granularity. Currently supported:
  96. // hour_of_day, day_of_week, month_of_year.
  97. // Granularities finer that the granularity of timestamp data are not
  98. // populated (e.g. if timestamps are at day granularity, then hour_of_day
  99. // is not populated).
  100. map<string, GranularStats> granular_stats = 1;
  101. }
  102. // The data statistics of a series of ARRAY values.
  103. message ArrayStats {
  104. // Stats of all the values of all arrays, as if they were a single long
  105. // series of data. The type depends on the element type of the array.
  106. DataStats member_stats = 2;
  107. }
  108. // The data statistics of a series of STRUCT values.
  109. message StructStats {
  110. // Map from a field name of the struct to data stats aggregated over series
  111. // of all data in that field across all the structs.
  112. map<string, DataStats> field_stats = 1;
  113. }
  114. // The data statistics of a series of CATEGORY values.
  115. message CategoryStats {
  116. // The statistics of a single CATEGORY value.
  117. message SingleCategoryStats {
  118. // The CATEGORY value.
  119. string value = 1;
  120. // The number of occurrences of this value in the series.
  121. int64 count = 2;
  122. }
  123. // The statistics of the top 20 CATEGORY values, ordered by
  124. //
  125. // [count][google.cloud.automl.v1beta1.CategoryStats.SingleCategoryStats.count].
  126. repeated SingleCategoryStats top_category_stats = 1;
  127. }
  128. // A correlation statistics between two series of DataType values. The series
  129. // may have differing DataType-s, but within a single series the DataType must
  130. // be the same.
  131. message CorrelationStats {
  132. // The correlation value using the Cramer's V measure.
  133. double cramers_v = 1;
  134. }