cigar.proto 3.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.genomics.v1;
  16. import "google/api/annotations.proto";
  17. option cc_enable_arenas = true;
  18. option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
  19. option java_multiple_files = true;
  20. option java_outer_classname = "CigarProto";
  21. option java_package = "com.google.genomics.v1";
  22. // A single CIGAR operation.
  23. message CigarUnit {
  24. // Describes the different types of CIGAR alignment operations that exist.
  25. // Used wherever CIGAR alignments are used.
  26. enum Operation {
  27. OPERATION_UNSPECIFIED = 0;
  28. // An alignment match indicates that a sequence can be aligned to the
  29. // reference without evidence of an INDEL. Unlike the
  30. // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
  31. // the `ALIGNMENT_MATCH` operator does not indicate whether the
  32. // reference and read sequences are an exact match. This operator is
  33. // equivalent to SAM's `M`.
  34. ALIGNMENT_MATCH = 1;
  35. // The insert operator indicates that the read contains evidence of bases
  36. // being inserted into the reference. This operator is equivalent to SAM's
  37. // `I`.
  38. INSERT = 2;
  39. // The delete operator indicates that the read contains evidence of bases
  40. // being deleted from the reference. This operator is equivalent to SAM's
  41. // `D`.
  42. DELETE = 3;
  43. // The skip operator indicates that this read skips a long segment of the
  44. // reference, but the bases have not been deleted. This operator is commonly
  45. // used when working with RNA-seq data, where reads may skip long segments
  46. // of the reference between exons. This operator is equivalent to SAM's
  47. // `N`.
  48. SKIP = 4;
  49. // The soft clip operator indicates that bases at the start/end of a read
  50. // have not been considered during alignment. This may occur if the majority
  51. // of a read maps, except for low quality bases at the start/end of a read.
  52. // This operator is equivalent to SAM's `S`. Bases that are soft
  53. // clipped will still be stored in the read.
  54. CLIP_SOFT = 5;
  55. // The hard clip operator indicates that bases at the start/end of a read
  56. // have been omitted from this alignment. This may occur if this linear
  57. // alignment is part of a chimeric alignment, or if the read has been
  58. // trimmed (for example, during error correction or to trim poly-A tails for
  59. // RNA-seq). This operator is equivalent to SAM's `H`.
  60. CLIP_HARD = 6;
  61. // The pad operator indicates that there is padding in an alignment. This
  62. // operator is equivalent to SAM's `P`.
  63. PAD = 7;
  64. // This operator indicates that this portion of the aligned sequence exactly
  65. // matches the reference. This operator is equivalent to SAM's `=`.
  66. SEQUENCE_MATCH = 8;
  67. // This operator indicates that this portion of the aligned sequence is an
  68. // alignment match to the reference, but a sequence mismatch. This can
  69. // indicate a SNP or a read error. This operator is equivalent to SAM's
  70. // `X`.
  71. SEQUENCE_MISMATCH = 9;
  72. }
  73. Operation operation = 1;
  74. // The number of genomic bases that the operation runs for. Required.
  75. int64 operation_length = 2;
  76. // `referenceSequence` is only used at mismatches
  77. // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
  78. // Filling this field replaces SAM's MD tag. If the relevant information is
  79. // not available, this field is unset.
  80. string reference_sequence = 3;
  81. }