references.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.genomics.v1;
  16. import "google/api/annotations.proto";
  17. option cc_enable_arenas = true;
  18. option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
  19. option java_multiple_files = true;
  20. option java_outer_classname = "ReferencesProto";
  21. option java_package = "com.google.genomics.v1";
  22. service ReferenceServiceV1 {
  23. // Searches for reference sets which match the given criteria.
  24. //
  25. // For the definitions of references and other genomics resources, see
  26. // [Fundamentals of Google
  27. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  28. //
  29. // Implements
  30. // [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
  31. rpc SearchReferenceSets(SearchReferenceSetsRequest)
  32. returns (SearchReferenceSetsResponse) {
  33. option (google.api.http) = {
  34. post: "/v1/referencesets/search"
  35. body: "*"
  36. };
  37. }
  38. // Gets a reference set.
  39. //
  40. // For the definitions of references and other genomics resources, see
  41. // [Fundamentals of Google
  42. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  43. //
  44. // Implements
  45. // [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
  46. rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) {
  47. option (google.api.http) = {
  48. get: "/v1/referencesets/{reference_set_id}"
  49. };
  50. }
  51. // Searches for references which match the given criteria.
  52. //
  53. // For the definitions of references and other genomics resources, see
  54. // [Fundamentals of Google
  55. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  56. //
  57. // Implements
  58. // [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
  59. rpc SearchReferences(SearchReferencesRequest)
  60. returns (SearchReferencesResponse) {
  61. option (google.api.http) = {
  62. post: "/v1/references/search"
  63. body: "*"
  64. };
  65. }
  66. // Gets a reference.
  67. //
  68. // For the definitions of references and other genomics resources, see
  69. // [Fundamentals of Google
  70. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  71. //
  72. // Implements
  73. // [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
  74. rpc GetReference(GetReferenceRequest) returns (Reference) {
  75. option (google.api.http) = {
  76. get: "/v1/references/{reference_id}"
  77. };
  78. }
  79. // Lists the bases in a reference, optionally restricted to a range.
  80. //
  81. // For the definitions of references and other genomics resources, see
  82. // [Fundamentals of Google
  83. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  84. //
  85. // Implements
  86. // [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
  87. rpc ListBases(ListBasesRequest) returns (ListBasesResponse) {
  88. option (google.api.http) = {
  89. get: "/v1/references/{reference_id}/bases"
  90. };
  91. }
  92. }
  93. // A reference is a canonical assembled DNA sequence, intended to act as a
  94. // reference coordinate space for other genomic annotations. A single reference
  95. // might represent the human chromosome 1 or mitochandrial DNA, for instance. A
  96. // reference belongs to one or more reference sets.
  97. //
  98. // For more genomics resource definitions, see [Fundamentals of Google
  99. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  100. message Reference {
  101. // The server-generated reference ID, unique across all references.
  102. string id = 1;
  103. // The length of this reference's sequence.
  104. int64 length = 2;
  105. // MD5 of the upper-case sequence excluding all whitespace characters (this
  106. // is equivalent to SQ:M5 in SAM). This value is represented in lower case
  107. // hexadecimal format.
  108. string md5checksum = 3;
  109. // The name of this reference, for example `22`.
  110. string name = 4;
  111. // The URI from which the sequence was obtained. Typically specifies a FASTA
  112. // format file.
  113. string source_uri = 5;
  114. // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
  115. // with a version number, for example `GCF_000001405.26`.
  116. repeated string source_accessions = 6;
  117. // ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.
  118. int32 ncbi_taxon_id = 7;
  119. }
  120. // A reference set is a set of references which typically comprise a reference
  121. // assembly for a species, such as `GRCh38` which is representative
  122. // of the human genome. A reference set defines a common coordinate space for
  123. // comparing reference-aligned experimental data. A reference set contains 1 or
  124. // more references.
  125. //
  126. // For more genomics resource definitions, see [Fundamentals of Google
  127. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  128. message ReferenceSet {
  129. // The server-generated reference set ID, unique across all reference sets.
  130. string id = 1;
  131. // The IDs of the reference objects that are part of this set.
  132. // `Reference.md5checksum` must be unique within this set.
  133. repeated string reference_ids = 2;
  134. // Order-independent MD5 checksum which identifies this reference set. The
  135. // checksum is computed by sorting all lower case hexidecimal string
  136. // `reference.md5checksum` (for all reference in this set) in
  137. // ascending lexicographic order, concatenating, and taking the MD5 of that
  138. // value. The resulting value is represented in lower case hexadecimal format.
  139. string md5checksum = 3;
  140. // ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human)
  141. // indicating the species which this reference set is intended to model. Note
  142. // that contained references may specify a different `ncbiTaxonId`, as
  143. // assemblies may contain reference sequences which do not belong to the
  144. // modeled species, for example EBV in a human reference genome.
  145. int32 ncbi_taxon_id = 4;
  146. // Free text description of this reference set.
  147. string description = 5;
  148. // Public id of this reference set, such as `GRCh37`.
  149. string assembly_id = 6;
  150. // The URI from which the references were obtained.
  151. string source_uri = 7;
  152. // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
  153. // with a version number, for example `NC_000001.11`.
  154. repeated string source_accessions = 8;
  155. }
  156. message SearchReferenceSetsRequest {
  157. // If present, return reference sets for which the
  158. // [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
  159. repeated string md5checksums = 1;
  160. // If present, return reference sets for which a prefix of any of
  161. // [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions]
  162. // match any of these strings. Accession numbers typically have a main number
  163. // and a version, for example `NC_000001.11`.
  164. repeated string accessions = 2;
  165. // If present, return reference sets for which a substring of their
  166. // `assemblyId` matches this string (case insensitive).
  167. string assembly_id = 3;
  168. // The continuation token, which is used to page through large result sets.
  169. // To get the next page of results, set this parameter to the value of
  170. // `nextPageToken` from the previous response.
  171. string page_token = 4;
  172. // The maximum number of results to return in a single page. If unspecified,
  173. // defaults to 1024. The maximum value is 4096.
  174. int32 page_size = 5;
  175. }
  176. message SearchReferenceSetsResponse {
  177. // The matching references sets.
  178. repeated ReferenceSet reference_sets = 1;
  179. // The continuation token, which is used to page through large result sets.
  180. // Provide this value in a subsequent request to return the next page of
  181. // results. This field will be empty if there aren't any additional results.
  182. string next_page_token = 2;
  183. }
  184. message GetReferenceSetRequest {
  185. // The ID of the reference set.
  186. string reference_set_id = 1;
  187. }
  188. message SearchReferencesRequest {
  189. // If present, return references for which the
  190. // [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
  191. repeated string md5checksums = 1;
  192. // If present, return references for which a prefix of any of
  193. // [sourceAccessions][google.genomics.v1.Reference.source_accessions] match
  194. // any of these strings. Accession numbers typically have a main number and a
  195. // version, for example `GCF_000001405.26`.
  196. repeated string accessions = 2;
  197. // If present, return only references which belong to this reference set.
  198. string reference_set_id = 3;
  199. // The continuation token, which is used to page through large result sets.
  200. // To get the next page of results, set this parameter to the value of
  201. // `nextPageToken` from the previous response.
  202. string page_token = 4;
  203. // The maximum number of results to return in a single page. If unspecified,
  204. // defaults to 1024. The maximum value is 4096.
  205. int32 page_size = 5;
  206. }
  207. message SearchReferencesResponse {
  208. // The matching references.
  209. repeated Reference references = 1;
  210. // The continuation token, which is used to page through large result sets.
  211. // Provide this value in a subsequent request to return the next page of
  212. // results. This field will be empty if there aren't any additional results.
  213. string next_page_token = 2;
  214. }
  215. message GetReferenceRequest {
  216. // The ID of the reference.
  217. string reference_id = 1;
  218. }
  219. message ListBasesRequest {
  220. // The ID of the reference.
  221. string reference_id = 1;
  222. // The start position (0-based) of this query. Defaults to 0.
  223. int64 start = 2;
  224. // The end position (0-based, exclusive) of this query. Defaults to the length
  225. // of this reference.
  226. int64 end = 3;
  227. // The continuation token, which is used to page through large result sets.
  228. // To get the next page of results, set this parameter to the value of
  229. // `nextPageToken` from the previous response.
  230. string page_token = 4;
  231. // The maximum number of bases to return in a single page. If unspecified,
  232. // defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base
  233. // pairs).
  234. int32 page_size = 5;
  235. }
  236. message ListBasesResponse {
  237. // The offset position (0-based) of the given `sequence` from the
  238. // start of this `Reference`. This value will differ for each page
  239. // in a paginated request.
  240. int64 offset = 1;
  241. // A substring of the bases that make up this reference.
  242. string sequence = 2;
  243. // The continuation token, which is used to page through large result sets.
  244. // Provide this value in a subsequent request to return the next page of
  245. // results. This field will be empty if there aren't any additional results.
  246. string next_page_token = 3;
  247. }