comparison libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/insdc.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
comparison
equal deleted inserted replaced
2:0d65b71ff8df 3:38ad1130d077
1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /*==========================================================================
28 * INSDC types, constants
29 */
30 version 1;
31
32
33 /*--------------------------------------------------------------------------
34 * dna
35 * represented in IUPAC characters
36 */
37 typedef ascii INSDC:dna:text;
38
39
40 /*--------------------------------------------------------------------------
41 * 4na
42 * nucleotide data with all possible ambiguity
43 * does not represent all possible EVENTS
44 *
45 * text encodings use the IUPAC character set
46 * legal values: [ACMGRSVTWYHKDBNacmgrsvtwyhkdbn.]
47 * canonical values: [ACMGRSVTWYHKDBN]
48 *
49 * binary values are 0..15 = { NACMGRSVTWYHKDBN }
50 *
51 * 4na values use bits for each letter:
52 *
53 * A | C | G | T
54 * =================
55 * N | | |
56 * A * | | |
57 * C | * | |
58 * M * | * | |
59 * G | | * |
60 * R * | | * |
61 * S | * | * |
62 * V * | * | * |
63 * T | | | *
64 * W * | | | *
65 * Y | * | | *
66 * H * | * | | *
67 * K | | * | *
68 * D * | | * | *
69 * B | * | * | *
70 * N * | * | * | *
71 */
72 typedef U8 INSDC:4na:bin;
73 typedef B1 INSDC:4na:packed [ 4 ];
74
75 const INSDC:4na:bin INSDC:4na:map:BINSET
76 = [ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 ];
77 const INSDC:dna:text INSDC:4na:map:CHARSET
78 = ".ACMGRSVTWYHKDBN";
79 const INSDC:dna:text INSDC:4na:accept:CHARSET
80 = ".ACMGRSVTWYHKDBNacmgrsvtwyhkdbn";
81
82
83 /*--------------------------------------------------------------------------
84 * 2na - nucleotide data A,T,G,C
85 * x2na - nucleotide data extended with single ambiguity value (N)
86 *
87 * text encodings use the IUPAC character set
88 * legal values: [ACGTNacgtn.]
89 * canonical values: [ACGTN]
90 *
91 * x2na values are 0..4 = { ACGTN }
92 *
93 * 2na values exclude N:
94 * A = 0
95 * C = 1
96 * G = 2
97 * T = 3
98 */
99 typedef U8 INSDC:2na:bin;
100 typedef U8 INSDC:x2na:bin;
101 typedef B1 INSDC:2na:packed [ 2 ];
102
103 const INSDC:2na:bin INSDC:2na:map:BINSET = [ 0,1,2,3 ];
104 const INSDC:dna:text INSDC:2na:map:CHARSET = "ACGT";
105 const INSDC:dna:text INSDC:2na:accept:CHARSET = "ACGTacgt";
106 const INSDC:x2na:bin INSDC:x2na:map:BINSET = [ 0,1,2,3,4 ];
107 const INSDC:dna:text INSDC:x2na:map:CHARSET = "ACGTN";
108 const INSDC:dna:text INSDC:x2na:accept:CHARSET = "ACGTNacgtn.";
109
110
111 /*--------------------------------------------------------------------------
112 * color - color-space text
113 * 2cs - color-space data 0,1,2,3
114 * x2cs - color-space data extended with single ambiguity value (.)
115 *
116 * text encodings use the ASCII numeric character set
117 * values: [0123.]
118 *
119 * x2cs values are 0..4 = { 0123. }
120 *
121 * 2cs values exclude '.':
122 * '0' = 0
123 * '1' = 1
124 * '2' = 2
125 * '3' = 3
126 */
127 typedef ascii INSDC:color:text;
128 typedef U8 INSDC:2cs:bin;
129 typedef U8 INSDC:x2cs:bin;
130 typedef B1 INSDC:2cs:packed [ 2 ];
131
132 const INSDC:2cs:bin INSDC:2cs:map:BINSET = [ 0,1,2,3 ];
133 const INSDC:color:text INSDC:2cs:map:CHARSET = "0123";
134 const INSDC:color:text INSDC:2cs:accept:CHARSET = "0123";
135 const INSDC:x2cs:bin INSDC:x2cs:map:BINSET = [ 0,1,2,3,4 ];
136 const INSDC:color:text INSDC:x2cs:map:CHARSET = "0123.";
137 const INSDC:color:text INSDC:x2cs:accept:CHARSET = "0123.";
138
139 const U8 INSDC:color:default_matrix =
140 [
141 0, 1, 2, 3, 4,
142 1, 0, 3, 2, 4,
143 2, 3, 0, 1, 4,
144 3, 2, 1, 0, 4,
145 4, 4, 4, 4, 4
146 ];
147
148
149 /*--------------------------------------------------------------------------
150 * protein
151 * represented in IUPAC characters
152 */
153 typedef ascii INSDC:protein:text;
154
155
156 /*--------------------------------------------------------------------------
157 * aa
158 * protein data
159 * text encodings use the IUPAC character set
160 */
161 typedef U8 INSDC:aa:bin;
162
163 const INSDC:aa:bin INSDC:aa:map:BINSET
164 = [ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27 ];
165 const INSDC:protein:text INSDC:aa:map:CHARSET
166 = "ABCDEFGHIKLMNPQRSTVWXYZU*OJ";
167 const INSDC:protein:text INSDC:aa:accept:CHARSET
168 = "ABCDEFGHIJKLMNOPQRSTVWXYZU*abcdefghijklmnopqrstvwxyzu";
169
170
171 /*--------------------------------------------------------------------------
172 * quality
173 * quality scoring values
174 *
175 * phred legal values: 0..63
176 */
177 typedef U8 INSDC:quality:phred;
178 typedef I8 INSDC:quality:log_odds;
179
180 // text-encoding of quality scores
181 // offsets are 33 = '!' and 64 = '@'
182 typedef ascii INSDC:quality:text:phred_33;
183 typedef ascii INSDC:quality:text:phred_64;
184 typedef ascii INSDC:quality:text:log_odds_64;
185
186
187 /*--------------------------------------------------------------------------
188 * coordinate
189 * zero and one based coordinates
190 */
191
192 // 32 bit coordinates
193 typedef I32 INSDC:coord:val;
194 typedef U32 INSDC:coord:len;
195
196 // zero or one based coordinate system
197 typedef INSDC:coord:val INSDC:coord:zero;
198 typedef INSDC:coord:val INSDC:coord:one;
199
200 // POSITION types for relating bases to their location in signal
201 typedef INSDC:coord:zero INSDC:position:zero;
202 typedef INSDC:coord:one INSDC:position:one;
203
204 // one-based coordinate limits
205 const INSDC:coord:one INSDC:coord:min:one = 0x80000001;
206 const INSDC:coord:one INSDC:coord:max:one = 0x3FFFFFFF;
207
208 // zero-based coordinate limits
209 const INSDC:coord:zero INSDC:coord:min:zero = 0x80000000;
210 const INSDC:coord:zero INSDC:coord:max:zero = 0x3FFFFFFE;
211
212 /*-------------------------------------------------------------------------
213 * read filters bits
214 */
215 typedef U8 INSDC:SRA:read_filter;
216 const INSDC:SRA:read_filter SRA_READ_FILTER_PASS = 0;
217 const INSDC:SRA:read_filter SRA_READ_FILTER_REJECT = 1;
218 const INSDC:SRA:read_filter SRA_READ_FILTER_CRITERIA = 2;
219 const INSDC:SRA:read_filter SRA_READ_FILTER_REDACTED = 3;
220
221 /*-------------------------------------------------------------------------
222 * read type bits
223 */
224 typedef U8 INSDC:SRA:xread_type;
225 const INSDC:SRA:xread_type SRA_READ_TYPE_TECHNICAL = 0;
226 const INSDC:SRA:xread_type SRA_READ_TYPE_BIOLOGICAL = 1;
227 const INSDC:SRA:xread_type SRA_READ_TYPE_FORWARD = 2;
228 const INSDC:SRA:xread_type SRA_READ_TYPE_REVERSE = 4;
229
230 // original read-types included only technical and biological
231 typedef INSDC:SRA:xread_type INSDC:SRA:read_type;
232