Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/insdc.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
| author | charles_s_test |
|---|---|
| date | Mon, 27 Nov 2017 11:21:07 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:0d65b71ff8df | 3:38ad1130d077 |
|---|---|
| 1 /*=========================================================================== | |
| 2 * | |
| 3 * PUBLIC DOMAIN NOTICE | |
| 4 * National Center for Biotechnology Information | |
| 5 * | |
| 6 * This software/database is a "United States Government Work" under the | |
| 7 * terms of the United States Copyright Act. It was written as part of | |
| 8 * the author's official duties as a United States Government employee and | |
| 9 * thus cannot be copyrighted. This software/database is freely available | |
| 10 * to the public for use. The National Library of Medicine and the U.S. | |
| 11 * Government have not placed any restriction on its use or reproduction. | |
| 12 * | |
| 13 * Although all reasonable efforts have been taken to ensure the accuracy | |
| 14 * and reliability of the software and data, the NLM and the U.S. | |
| 15 * Government do not and cannot warrant the performance or results that | |
| 16 * may be obtained by using this software or data. The NLM and the U.S. | |
| 17 * Government disclaim all warranties, express or implied, including | |
| 18 * warranties of performance, merchantability or fitness for any particular | |
| 19 * purpose. | |
| 20 * | |
| 21 * Please cite the author in any work or product based on this material. | |
| 22 * | |
| 23 * =========================================================================== | |
| 24 * | |
| 25 */ | |
| 26 | |
| 27 /*========================================================================== | |
| 28 * INSDC types, constants | |
| 29 */ | |
| 30 version 1; | |
| 31 | |
| 32 | |
| 33 /*-------------------------------------------------------------------------- | |
| 34 * dna | |
| 35 * represented in IUPAC characters | |
| 36 */ | |
| 37 typedef ascii INSDC:dna:text; | |
| 38 | |
| 39 | |
| 40 /*-------------------------------------------------------------------------- | |
| 41 * 4na | |
| 42 * nucleotide data with all possible ambiguity | |
| 43 * does not represent all possible EVENTS | |
| 44 * | |
| 45 * text encodings use the IUPAC character set | |
| 46 * legal values: [ACMGRSVTWYHKDBNacmgrsvtwyhkdbn.] | |
| 47 * canonical values: [ACMGRSVTWYHKDBN] | |
| 48 * | |
| 49 * binary values are 0..15 = { NACMGRSVTWYHKDBN } | |
| 50 * | |
| 51 * 4na values use bits for each letter: | |
| 52 * | |
| 53 * A | C | G | T | |
| 54 * ================= | |
| 55 * N | | | | |
| 56 * A * | | | | |
| 57 * C | * | | | |
| 58 * M * | * | | | |
| 59 * G | | * | | |
| 60 * R * | | * | | |
| 61 * S | * | * | | |
| 62 * V * | * | * | | |
| 63 * T | | | * | |
| 64 * W * | | | * | |
| 65 * Y | * | | * | |
| 66 * H * | * | | * | |
| 67 * K | | * | * | |
| 68 * D * | | * | * | |
| 69 * B | * | * | * | |
| 70 * N * | * | * | * | |
| 71 */ | |
| 72 typedef U8 INSDC:4na:bin; | |
| 73 typedef B1 INSDC:4na:packed [ 4 ]; | |
| 74 | |
| 75 const INSDC:4na:bin INSDC:4na:map:BINSET | |
| 76 = [ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 ]; | |
| 77 const INSDC:dna:text INSDC:4na:map:CHARSET | |
| 78 = ".ACMGRSVTWYHKDBN"; | |
| 79 const INSDC:dna:text INSDC:4na:accept:CHARSET | |
| 80 = ".ACMGRSVTWYHKDBNacmgrsvtwyhkdbn"; | |
| 81 | |
| 82 | |
| 83 /*-------------------------------------------------------------------------- | |
| 84 * 2na - nucleotide data A,T,G,C | |
| 85 * x2na - nucleotide data extended with single ambiguity value (N) | |
| 86 * | |
| 87 * text encodings use the IUPAC character set | |
| 88 * legal values: [ACGTNacgtn.] | |
| 89 * canonical values: [ACGTN] | |
| 90 * | |
| 91 * x2na values are 0..4 = { ACGTN } | |
| 92 * | |
| 93 * 2na values exclude N: | |
| 94 * A = 0 | |
| 95 * C = 1 | |
| 96 * G = 2 | |
| 97 * T = 3 | |
| 98 */ | |
| 99 typedef U8 INSDC:2na:bin; | |
| 100 typedef U8 INSDC:x2na:bin; | |
| 101 typedef B1 INSDC:2na:packed [ 2 ]; | |
| 102 | |
| 103 const INSDC:2na:bin INSDC:2na:map:BINSET = [ 0,1,2,3 ]; | |
| 104 const INSDC:dna:text INSDC:2na:map:CHARSET = "ACGT"; | |
| 105 const INSDC:dna:text INSDC:2na:accept:CHARSET = "ACGTacgt"; | |
| 106 const INSDC:x2na:bin INSDC:x2na:map:BINSET = [ 0,1,2,3,4 ]; | |
| 107 const INSDC:dna:text INSDC:x2na:map:CHARSET = "ACGTN"; | |
| 108 const INSDC:dna:text INSDC:x2na:accept:CHARSET = "ACGTNacgtn."; | |
| 109 | |
| 110 | |
| 111 /*-------------------------------------------------------------------------- | |
| 112 * color - color-space text | |
| 113 * 2cs - color-space data 0,1,2,3 | |
| 114 * x2cs - color-space data extended with single ambiguity value (.) | |
| 115 * | |
| 116 * text encodings use the ASCII numeric character set | |
| 117 * values: [0123.] | |
| 118 * | |
| 119 * x2cs values are 0..4 = { 0123. } | |
| 120 * | |
| 121 * 2cs values exclude '.': | |
| 122 * '0' = 0 | |
| 123 * '1' = 1 | |
| 124 * '2' = 2 | |
| 125 * '3' = 3 | |
| 126 */ | |
| 127 typedef ascii INSDC:color:text; | |
| 128 typedef U8 INSDC:2cs:bin; | |
| 129 typedef U8 INSDC:x2cs:bin; | |
| 130 typedef B1 INSDC:2cs:packed [ 2 ]; | |
| 131 | |
| 132 const INSDC:2cs:bin INSDC:2cs:map:BINSET = [ 0,1,2,3 ]; | |
| 133 const INSDC:color:text INSDC:2cs:map:CHARSET = "0123"; | |
| 134 const INSDC:color:text INSDC:2cs:accept:CHARSET = "0123"; | |
| 135 const INSDC:x2cs:bin INSDC:x2cs:map:BINSET = [ 0,1,2,3,4 ]; | |
| 136 const INSDC:color:text INSDC:x2cs:map:CHARSET = "0123."; | |
| 137 const INSDC:color:text INSDC:x2cs:accept:CHARSET = "0123."; | |
| 138 | |
| 139 const U8 INSDC:color:default_matrix = | |
| 140 [ | |
| 141 0, 1, 2, 3, 4, | |
| 142 1, 0, 3, 2, 4, | |
| 143 2, 3, 0, 1, 4, | |
| 144 3, 2, 1, 0, 4, | |
| 145 4, 4, 4, 4, 4 | |
| 146 ]; | |
| 147 | |
| 148 | |
| 149 /*-------------------------------------------------------------------------- | |
| 150 * protein | |
| 151 * represented in IUPAC characters | |
| 152 */ | |
| 153 typedef ascii INSDC:protein:text; | |
| 154 | |
| 155 | |
| 156 /*-------------------------------------------------------------------------- | |
| 157 * aa | |
| 158 * protein data | |
| 159 * text encodings use the IUPAC character set | |
| 160 */ | |
| 161 typedef U8 INSDC:aa:bin; | |
| 162 | |
| 163 const INSDC:aa:bin INSDC:aa:map:BINSET | |
| 164 = [ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27 ]; | |
| 165 const INSDC:protein:text INSDC:aa:map:CHARSET | |
| 166 = "ABCDEFGHIKLMNPQRSTVWXYZU*OJ"; | |
| 167 const INSDC:protein:text INSDC:aa:accept:CHARSET | |
| 168 = "ABCDEFGHIJKLMNOPQRSTVWXYZU*abcdefghijklmnopqrstvwxyzu"; | |
| 169 | |
| 170 | |
| 171 /*-------------------------------------------------------------------------- | |
| 172 * quality | |
| 173 * quality scoring values | |
| 174 * | |
| 175 * phred legal values: 0..63 | |
| 176 */ | |
| 177 typedef U8 INSDC:quality:phred; | |
| 178 typedef I8 INSDC:quality:log_odds; | |
| 179 | |
| 180 // text-encoding of quality scores | |
| 181 // offsets are 33 = '!' and 64 = '@' | |
| 182 typedef ascii INSDC:quality:text:phred_33; | |
| 183 typedef ascii INSDC:quality:text:phred_64; | |
| 184 typedef ascii INSDC:quality:text:log_odds_64; | |
| 185 | |
| 186 | |
| 187 /*-------------------------------------------------------------------------- | |
| 188 * coordinate | |
| 189 * zero and one based coordinates | |
| 190 */ | |
| 191 | |
| 192 // 32 bit coordinates | |
| 193 typedef I32 INSDC:coord:val; | |
| 194 typedef U32 INSDC:coord:len; | |
| 195 | |
| 196 // zero or one based coordinate system | |
| 197 typedef INSDC:coord:val INSDC:coord:zero; | |
| 198 typedef INSDC:coord:val INSDC:coord:one; | |
| 199 | |
| 200 // POSITION types for relating bases to their location in signal | |
| 201 typedef INSDC:coord:zero INSDC:position:zero; | |
| 202 typedef INSDC:coord:one INSDC:position:one; | |
| 203 | |
| 204 // one-based coordinate limits | |
| 205 const INSDC:coord:one INSDC:coord:min:one = 0x80000001; | |
| 206 const INSDC:coord:one INSDC:coord:max:one = 0x3FFFFFFF; | |
| 207 | |
| 208 // zero-based coordinate limits | |
| 209 const INSDC:coord:zero INSDC:coord:min:zero = 0x80000000; | |
| 210 const INSDC:coord:zero INSDC:coord:max:zero = 0x3FFFFFFE; | |
| 211 | |
| 212 /*------------------------------------------------------------------------- | |
| 213 * read filters bits | |
| 214 */ | |
| 215 typedef U8 INSDC:SRA:read_filter; | |
| 216 const INSDC:SRA:read_filter SRA_READ_FILTER_PASS = 0; | |
| 217 const INSDC:SRA:read_filter SRA_READ_FILTER_REJECT = 1; | |
| 218 const INSDC:SRA:read_filter SRA_READ_FILTER_CRITERIA = 2; | |
| 219 const INSDC:SRA:read_filter SRA_READ_FILTER_REDACTED = 3; | |
| 220 | |
| 221 /*------------------------------------------------------------------------- | |
| 222 * read type bits | |
| 223 */ | |
| 224 typedef U8 INSDC:SRA:xread_type; | |
| 225 const INSDC:SRA:xread_type SRA_READ_TYPE_TECHNICAL = 0; | |
| 226 const INSDC:SRA:xread_type SRA_READ_TYPE_BIOLOGICAL = 1; | |
| 227 const INSDC:SRA:xread_type SRA_READ_TYPE_FORWARD = 2; | |
| 228 const INSDC:SRA:xread_type SRA_READ_TYPE_REVERSE = 4; | |
| 229 | |
| 230 // original read-types included only technical and biological | |
| 231 typedef INSDC:SRA:xread_type INSDC:SRA:read_type; | |
| 232 |
