1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- // Copyright 2016 Google Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.genomics.v1;
- import "google/api/annotations.proto";
- option cc_enable_arenas = true;
- option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
- option java_multiple_files = true;
- option java_outer_classname = "CigarProto";
- option java_package = "com.google.genomics.v1";
- // A single CIGAR operation.
- message CigarUnit {
- // Describes the different types of CIGAR alignment operations that exist.
- // Used wherever CIGAR alignments are used.
- enum Operation {
- OPERATION_UNSPECIFIED = 0;
- // An alignment match indicates that a sequence can be aligned to the
- // reference without evidence of an INDEL. Unlike the
- // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
- // the `ALIGNMENT_MATCH` operator does not indicate whether the
- // reference and read sequences are an exact match. This operator is
- // equivalent to SAM's `M`.
- ALIGNMENT_MATCH = 1;
- // The insert operator indicates that the read contains evidence of bases
- // being inserted into the reference. This operator is equivalent to SAM's
- // `I`.
- INSERT = 2;
- // The delete operator indicates that the read contains evidence of bases
- // being deleted from the reference. This operator is equivalent to SAM's
- // `D`.
- DELETE = 3;
- // The skip operator indicates that this read skips a long segment of the
- // reference, but the bases have not been deleted. This operator is commonly
- // used when working with RNA-seq data, where reads may skip long segments
- // of the reference between exons. This operator is equivalent to SAM's
- // `N`.
- SKIP = 4;
- // The soft clip operator indicates that bases at the start/end of a read
- // have not been considered during alignment. This may occur if the majority
- // of a read maps, except for low quality bases at the start/end of a read.
- // This operator is equivalent to SAM's `S`. Bases that are soft
- // clipped will still be stored in the read.
- CLIP_SOFT = 5;
- // The hard clip operator indicates that bases at the start/end of a read
- // have been omitted from this alignment. This may occur if this linear
- // alignment is part of a chimeric alignment, or if the read has been
- // trimmed (for example, during error correction or to trim poly-A tails for
- // RNA-seq). This operator is equivalent to SAM's `H`.
- CLIP_HARD = 6;
- // The pad operator indicates that there is padding in an alignment. This
- // operator is equivalent to SAM's `P`.
- PAD = 7;
- // This operator indicates that this portion of the aligned sequence exactly
- // matches the reference. This operator is equivalent to SAM's `=`.
- SEQUENCE_MATCH = 8;
- // This operator indicates that this portion of the aligned sequence is an
- // alignment match to the reference, but a sequence mismatch. This can
- // indicate a SNP or a read error. This operator is equivalent to SAM's
- // `X`.
- SEQUENCE_MISMATCH = 9;
- }
- Operation operation = 1;
- // The number of genomic bases that the operation runs for. Required.
- int64 operation_length = 2;
- // `referenceSequence` is only used at mismatches
- // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
- // Filling this field replaces SAM's MD tag. If the relevant information is
- // not available, this field is unset.
- string reference_sequence = 3;
- }
|