Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/insdc.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:0d65b71ff8df | 3:38ad1130d077 |
---|---|
1 /*=========================================================================== | |
2 * | |
3 * PUBLIC DOMAIN NOTICE | |
4 * National Center for Biotechnology Information | |
5 * | |
6 * This software/database is a "United States Government Work" under the | |
7 * terms of the United States Copyright Act. It was written as part of | |
8 * the author's official duties as a United States Government employee and | |
9 * thus cannot be copyrighted. This software/database is freely available | |
10 * to the public for use. The National Library of Medicine and the U.S. | |
11 * Government have not placed any restriction on its use or reproduction. | |
12 * | |
13 * Although all reasonable efforts have been taken to ensure the accuracy | |
14 * and reliability of the software and data, the NLM and the U.S. | |
15 * Government do not and cannot warrant the performance or results that | |
16 * may be obtained by using this software or data. The NLM and the U.S. | |
17 * Government disclaim all warranties, express or implied, including | |
18 * warranties of performance, merchantability or fitness for any particular | |
19 * purpose. | |
20 * | |
21 * Please cite the author in any work or product based on this material. | |
22 * | |
23 * =========================================================================== | |
24 * | |
25 */ | |
26 | |
27 /*========================================================================== | |
28 * INSDC types, constants | |
29 */ | |
30 version 1; | |
31 | |
32 | |
33 /*-------------------------------------------------------------------------- | |
34 * dna | |
35 * represented in IUPAC characters | |
36 */ | |
37 typedef ascii INSDC:dna:text; | |
38 | |
39 | |
40 /*-------------------------------------------------------------------------- | |
41 * 4na | |
42 * nucleotide data with all possible ambiguity | |
43 * does not represent all possible EVENTS | |
44 * | |
45 * text encodings use the IUPAC character set | |
46 * legal values: [ACMGRSVTWYHKDBNacmgrsvtwyhkdbn.] | |
47 * canonical values: [ACMGRSVTWYHKDBN] | |
48 * | |
49 * binary values are 0..15 = { NACMGRSVTWYHKDBN } | |
50 * | |
51 * 4na values use bits for each letter: | |
52 * | |
53 * A | C | G | T | |
54 * ================= | |
55 * N | | | | |
56 * A * | | | | |
57 * C | * | | | |
58 * M * | * | | | |
59 * G | | * | | |
60 * R * | | * | | |
61 * S | * | * | | |
62 * V * | * | * | | |
63 * T | | | * | |
64 * W * | | | * | |
65 * Y | * | | * | |
66 * H * | * | | * | |
67 * K | | * | * | |
68 * D * | | * | * | |
69 * B | * | * | * | |
70 * N * | * | * | * | |
71 */ | |
72 typedef U8 INSDC:4na:bin; | |
73 typedef B1 INSDC:4na:packed [ 4 ]; | |
74 | |
75 const INSDC:4na:bin INSDC:4na:map:BINSET | |
76 = [ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 ]; | |
77 const INSDC:dna:text INSDC:4na:map:CHARSET | |
78 = ".ACMGRSVTWYHKDBN"; | |
79 const INSDC:dna:text INSDC:4na:accept:CHARSET | |
80 = ".ACMGRSVTWYHKDBNacmgrsvtwyhkdbn"; | |
81 | |
82 | |
83 /*-------------------------------------------------------------------------- | |
84 * 2na - nucleotide data A,T,G,C | |
85 * x2na - nucleotide data extended with single ambiguity value (N) | |
86 * | |
87 * text encodings use the IUPAC character set | |
88 * legal values: [ACGTNacgtn.] | |
89 * canonical values: [ACGTN] | |
90 * | |
91 * x2na values are 0..4 = { ACGTN } | |
92 * | |
93 * 2na values exclude N: | |
94 * A = 0 | |
95 * C = 1 | |
96 * G = 2 | |
97 * T = 3 | |
98 */ | |
99 typedef U8 INSDC:2na:bin; | |
100 typedef U8 INSDC:x2na:bin; | |
101 typedef B1 INSDC:2na:packed [ 2 ]; | |
102 | |
103 const INSDC:2na:bin INSDC:2na:map:BINSET = [ 0,1,2,3 ]; | |
104 const INSDC:dna:text INSDC:2na:map:CHARSET = "ACGT"; | |
105 const INSDC:dna:text INSDC:2na:accept:CHARSET = "ACGTacgt"; | |
106 const INSDC:x2na:bin INSDC:x2na:map:BINSET = [ 0,1,2,3,4 ]; | |
107 const INSDC:dna:text INSDC:x2na:map:CHARSET = "ACGTN"; | |
108 const INSDC:dna:text INSDC:x2na:accept:CHARSET = "ACGTNacgtn."; | |
109 | |
110 | |
111 /*-------------------------------------------------------------------------- | |
112 * color - color-space text | |
113 * 2cs - color-space data 0,1,2,3 | |
114 * x2cs - color-space data extended with single ambiguity value (.) | |
115 * | |
116 * text encodings use the ASCII numeric character set | |
117 * values: [0123.] | |
118 * | |
119 * x2cs values are 0..4 = { 0123. } | |
120 * | |
121 * 2cs values exclude '.': | |
122 * '0' = 0 | |
123 * '1' = 1 | |
124 * '2' = 2 | |
125 * '3' = 3 | |
126 */ | |
127 typedef ascii INSDC:color:text; | |
128 typedef U8 INSDC:2cs:bin; | |
129 typedef U8 INSDC:x2cs:bin; | |
130 typedef B1 INSDC:2cs:packed [ 2 ]; | |
131 | |
132 const INSDC:2cs:bin INSDC:2cs:map:BINSET = [ 0,1,2,3 ]; | |
133 const INSDC:color:text INSDC:2cs:map:CHARSET = "0123"; | |
134 const INSDC:color:text INSDC:2cs:accept:CHARSET = "0123"; | |
135 const INSDC:x2cs:bin INSDC:x2cs:map:BINSET = [ 0,1,2,3,4 ]; | |
136 const INSDC:color:text INSDC:x2cs:map:CHARSET = "0123."; | |
137 const INSDC:color:text INSDC:x2cs:accept:CHARSET = "0123."; | |
138 | |
139 const U8 INSDC:color:default_matrix = | |
140 [ | |
141 0, 1, 2, 3, 4, | |
142 1, 0, 3, 2, 4, | |
143 2, 3, 0, 1, 4, | |
144 3, 2, 1, 0, 4, | |
145 4, 4, 4, 4, 4 | |
146 ]; | |
147 | |
148 | |
149 /*-------------------------------------------------------------------------- | |
150 * protein | |
151 * represented in IUPAC characters | |
152 */ | |
153 typedef ascii INSDC:protein:text; | |
154 | |
155 | |
156 /*-------------------------------------------------------------------------- | |
157 * aa | |
158 * protein data | |
159 * text encodings use the IUPAC character set | |
160 */ | |
161 typedef U8 INSDC:aa:bin; | |
162 | |
163 const INSDC:aa:bin INSDC:aa:map:BINSET | |
164 = [ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27 ]; | |
165 const INSDC:protein:text INSDC:aa:map:CHARSET | |
166 = "ABCDEFGHIKLMNPQRSTVWXYZU*OJ"; | |
167 const INSDC:protein:text INSDC:aa:accept:CHARSET | |
168 = "ABCDEFGHIJKLMNOPQRSTVWXYZU*abcdefghijklmnopqrstvwxyzu"; | |
169 | |
170 | |
171 /*-------------------------------------------------------------------------- | |
172 * quality | |
173 * quality scoring values | |
174 * | |
175 * phred legal values: 0..63 | |
176 */ | |
177 typedef U8 INSDC:quality:phred; | |
178 typedef I8 INSDC:quality:log_odds; | |
179 | |
180 // text-encoding of quality scores | |
181 // offsets are 33 = '!' and 64 = '@' | |
182 typedef ascii INSDC:quality:text:phred_33; | |
183 typedef ascii INSDC:quality:text:phred_64; | |
184 typedef ascii INSDC:quality:text:log_odds_64; | |
185 | |
186 | |
187 /*-------------------------------------------------------------------------- | |
188 * coordinate | |
189 * zero and one based coordinates | |
190 */ | |
191 | |
192 // 32 bit coordinates | |
193 typedef I32 INSDC:coord:val; | |
194 typedef U32 INSDC:coord:len; | |
195 | |
196 // zero or one based coordinate system | |
197 typedef INSDC:coord:val INSDC:coord:zero; | |
198 typedef INSDC:coord:val INSDC:coord:one; | |
199 | |
200 // POSITION types for relating bases to their location in signal | |
201 typedef INSDC:coord:zero INSDC:position:zero; | |
202 typedef INSDC:coord:one INSDC:position:one; | |
203 | |
204 // one-based coordinate limits | |
205 const INSDC:coord:one INSDC:coord:min:one = 0x80000001; | |
206 const INSDC:coord:one INSDC:coord:max:one = 0x3FFFFFFF; | |
207 | |
208 // zero-based coordinate limits | |
209 const INSDC:coord:zero INSDC:coord:min:zero = 0x80000000; | |
210 const INSDC:coord:zero INSDC:coord:max:zero = 0x3FFFFFFE; | |
211 | |
212 /*------------------------------------------------------------------------- | |
213 * read filters bits | |
214 */ | |
215 typedef U8 INSDC:SRA:read_filter; | |
216 const INSDC:SRA:read_filter SRA_READ_FILTER_PASS = 0; | |
217 const INSDC:SRA:read_filter SRA_READ_FILTER_REJECT = 1; | |
218 const INSDC:SRA:read_filter SRA_READ_FILTER_CRITERIA = 2; | |
219 const INSDC:SRA:read_filter SRA_READ_FILTER_REDACTED = 3; | |
220 | |
221 /*------------------------------------------------------------------------- | |
222 * read type bits | |
223 */ | |
224 typedef U8 INSDC:SRA:xread_type; | |
225 const INSDC:SRA:xread_type SRA_READ_TYPE_TECHNICAL = 0; | |
226 const INSDC:SRA:xread_type SRA_READ_TYPE_BIOLOGICAL = 1; | |
227 const INSDC:SRA:xread_type SRA_READ_TYPE_FORWARD = 2; | |
228 const INSDC:SRA:xread_type SRA_READ_TYPE_REVERSE = 4; | |
229 | |
230 // original read-types included only technical and biological | |
231 typedef INSDC:SRA:xread_type INSDC:SRA:read_type; | |
232 |