comparison libs/sratoolkit.2.8.0-centos_linux64/schema/align/seq.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
comparison
equal deleted inserted replaced
2:0d65b71ff8df 3:38ad1130d077
1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /*==========================================================================
28 * Sequence schema
29 */
30 version 1;
31
32 include 'vdb/vdb.vschema';
33 include 'ncbi/seq.vschema';
34
35
36 /* cmp_base_space
37 * table representing compressed reads in base space,
38 * where the bases are only stored for unaligned reads
39 */
40 table NCBI:align:tbl:cmp_base_space #1
41 = INSDC:tbl:sequence #1.0.1
42 , NCBI:tbl:dcmp_base_space #1
43 {
44 /* CMP_READ
45 * read compressed against a reference sequence
46 */
47
48 // default is IUPAC character representation
49 extern default column INSDC:dna:text CMP_READ
50 {
51 read = out_cmp_dna_text;
52 validate = < INSDC:dna:text > compare ( in_cmp_dna_text, out_cmp_dna_text );
53 }
54
55 // 4na representation
56 extern column INSDC:4na:bin CMP_READ = out_cmp_4na_bin;
57 extern column INSDC:4na:packed CMP_READ = out_cmp_4na_packed;
58
59 // x2na representation - 2na with ambiguity
60 extern column INSDC:x2na:bin CMP_READ = out_cmp_x2na_bin;
61
62 // 2na representation - 2na with no ambiguity
63 extern column INSDC:2na:bin CMP_READ = out_cmp_2na_bin;
64 extern column INSDC:2na:packed CMP_READ = out_cmp_2na_packed;
65
66
67 /* input processing rules
68 */
69
70 // compressed input text
71 INSDC:dna:text in_cmp_dna_text
72 = < INSDC:dna:text, INSDC:dna:text > map < '.acmgrsvtwyhkdbn','NACMGRSVTWYHKDBN' > ( CMP_READ );
73
74 // compressed input 4na bin
75 INSDC:4na:bin in_cmp_4na_bin
76 = < INSDC:4na:bin > range_validate < 0, 15 > ( CMP_READ )
77 | ( INSDC:4na:bin ) unpack ( in_cmp_4na_packed )
78 | < INSDC:dna:text, INSDC:4na:bin > map < INSDC:4na:map:CHARSET, INSDC:4na:map:BINSET > ( in_cmp_dna_text )
79 | < INSDC:x2na:bin, INSDC:4na:bin > map < INSDC:x2na:map:BINSET, [ 1, 2, 4, 8, 15 ] > ( in_cmp_x2na_bin );
80
81 // compressed input 4na packed
82 INSDC:4na:packed in_cmp_4na_packed = CMP_READ;
83
84 // compressed input x2na bin
85 INSDC:x2na:bin in_cmp_x2na_bin
86 = < INSDC:x2na:bin > range_validate < 0, 4 > ( CMP_READ )
87 | < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( in_cmp_4na_bin );
88
89 // compressed input 2na bin
90 INSDC:2na:bin in_cmp_2na_bin
91 = < INSDC:2na:bin > range_validate < 0, 3 > ( CMP_READ )
92 | ( INSDC:2na:bin ) unpack ( in_cmp_2na_packed )
93 | INSDC:SEQ:rand_4na_2na ( in_cmp_4na_bin );
94
95 // compressed input 2na packed
96 INSDC:2na:packed in_cmp_2na_packed = CMP_READ;
97
98 // input 4na alt-read ( ambiguities )
99 INSDC:4na:bin in_cmp_alt_4na_bin
100 = < INSDC:4na:bin, INSDC:4na:bin > map < INSDC:4na:map:BINSET, [ 15,0,0,3,0,5,6,7,0,9,10,11,12,13,14,15 ] > ( in_cmp_4na_bin );
101
102 // preparing a feed into stats column
103 U8 in_cmp_stats_bin = in_cmp_2na_bin;
104
105
106 /* physical columns
107 */
108
109 physical column INSDC:2na:packed .CMP_READ
110 = in_cmp_2na_packed
111 | ( INSDC:2na:packed ) pack ( in_cmp_2na_bin );
112
113 physical column < INSDC:4na:bin > zip_encoding .CMP_ALTREAD
114 = < INSDC:4na:bin > trim < 0, 0 > ( in_cmp_alt_4na_bin );
115
116
117 /* output processing rules
118 */
119
120 // output 2na packed
121 INSDC:2na:packed out_cmp_2na_packed = .CMP_READ;
122
123 // unambiguous unpacked 2na
124 INSDC:2na:bin out_cmp_2na_bin
125 = ( INSDC:2na:bin ) unpack ( out_cmp_2na_packed );
126
127 // output x2na bin
128 INSDC:x2na:bin out_cmp_x2na_bin
129 = < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_cmp_4na_bin );
130
131 // output 2na->4na bin
132 INSDC:4na:bin out_cmp_2na_4na_bin
133 = < INSDC:2na:bin, INSDC:4na:bin > map < INSDC:2na:map:BINSET, [ 1, 2, 4, 8 ] > ( out_cmp_2na_bin );
134
135 // output 4na bin
136 INSDC:4na:bin out_cmp_4na_bin
137 = < INSDC:4na:bin > bit_or < ALIGN_RIGHT > ( out_cmp_2na_4na_bin, .CMP_ALTREAD )
138 | out_cmp_2na_4na_bin;
139
140 // synthesized packed 4na
141 INSDC:4na:packed out_cmp_4na_packed
142 = ( INSDC:4na:packed ) pack ( out_cmp_4na_bin );
143
144 // output text
145 INSDC:dna:text out_cmp_dna_text
146 = < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_cmp_4na_bin );
147
148
149 /* decompressed sequences
150 * source is out_dcmp_4na_bin - a virtual production
151 */
152
153 // synthesize x2na_bin, 2na_bin and 2na_packed
154 INSDC:x2na:bin out_dcmp_x2na_bin
155 = < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_dcmp_4na_bin );
156 INSDC:2na:bin out_dcmp_2na_bin
157 = < INSDC:x2na:bin, INSDC:2na:bin > map < [ 0,1,2,3,4 ], [ 0,1,2,3,0 ] > ( out_dcmp_x2na_bin );
158 INSDC:2na:packed out_dcmp_2na_packed
159 = ( INSDC:2na:packed ) pack ( out_dcmp_2na_bin );
160
161
162 /* INSDC:tbl:sequence inherited productions
163 * cs_native
164 * out_cs_key
165 * out_signal
166 * out_2cs_bin
167 * out_2na_bin
168 * out_4na_bin
169 * out_dna_text
170 * out_x2cs_bin
171 * out_x2na_bin
172 * out_2cs_packed
173 * out_2na_packed
174 * out_4na_packed
175 * out_color_text
176 * out_color_matrix
177 */
178
179 /* NCBI:tbl:dcmp_base_space inherited productions
180 * out_dcmp_4na_bin
181 */
182 }
183
184
185 /* cmp_color_space
186 * table representing compressed reads in color space,
187 * where the colors are only stored for unaligned reads
188 */
189 table NCBI:align:tbl:cmp_color_space #1 =
190 INSDC:tbl:sequence #1.0.1, NCBI:tbl:dcmp_color_space #1
191 {
192 /* CMP_CSREAD
193 * read compressed against a reference sequence
194 */
195
196 // default is IUPAC character representation
197 extern default column INSDC:color:text CMP_CSREAD = out_cmp_color_text;
198
199 // x2cs representation - 2cs with ambiguity
200 extern column INSDC:x2cs:bin CMP_CSREAD = out_cmp_x2cs_bin;
201
202 // 2cs representation - 2cs with no ambiguity
203 extern column INSDC:2cs:bin CMP_CSREAD = out_cmp_2cs_bin;
204 extern column INSDC:2cs:packed CMP_CSREAD = out_cmp_2cs_packed;
205
206
207 /* input processing rules
208 */
209
210 // compressed input text
211 INSDC:color:text in_cmp_color_text = CMP_CSREAD;
212
213 // compressed input x2cs bin
214 INSDC:x2cs:bin in_cmp_x2cs_bin
215 = < INSDC:x2cs:bin > range_validate < 0, 4 > ( CMP_CSREAD )
216 | < INSDC:color:text, INSDC:x2cs:bin > map < INSDC:x2cs:map:CHARSET, INSDC:x2cs:map:BINSET > ( in_cmp_color_text );
217
218 // compressed input 2cs bin
219 INSDC:2cs:bin in_cmp_2cs_bin
220 = < INSDC:2cs:bin > range_validate < 0, 3 > ( CMP_CSREAD )
221 | ( INSDC:2cs:bin ) unpack ( in_cmp_2cs_packed )
222 | < INSDC:x2cs:bin, INSDC:2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 1, 2, 3, 0 ] > ( in_cmp_x2cs_bin );
223
224 // compressed input 2cs packed
225 INSDC:2cs:packed in_cmp_2cs_packed = CMP_CSREAD;
226
227 // compressed input x2cs alt-read ( ambiguities )
228 INSDC:x2cs:bin in_cmp_alt_x2cs_bin
229 = < INSDC:x2cs:bin, INSDC:x2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 0, 0, 0, 4 ] > ( in_cmp_x2cs_bin );
230
231 // preparing a feed into stats column
232 U8 in_cmp_stats_bin = in_cmp_2cs_bin;
233
234
235 /* physical columns
236 */
237
238 physical column INSDC:2cs:packed .CMP_CSREAD
239 = in_cmp_2cs_packed
240 | ( INSDC:2cs:packed ) pack ( in_cmp_2cs_bin );
241
242 physical column < INSDC:x2cs:bin > zip_encoding .CMP_ALTCSREAD
243 = < INSDC:x2cs:bin > trim < 0, 0 > ( in_cmp_alt_x2cs_bin );
244
245
246 /* output processing rules
247 */
248
249 // compressed output 2cs packed
250 INSDC:2cs:packed out_cmp_2cs_packed = .CMP_CSREAD;
251
252 // unambiguous unpacked 2cs
253 INSDC:2cs:bin out_cmp_2cs_bin
254 = ( INSDC:2cs:bin ) unpack ( out_cmp_2cs_packed );
255
256 // unpacked 2cs with ambiguity
257 INSDC:x2cs:bin out_cmp_x2cs_bin
258 = ( INSDC:x2cs:bin ) < U8 > bit_or < ALIGN_RIGHT > ( out_cmp_2cs_bin, .CMP_ALTCSREAD )
259 | ( INSDC:x2cs:bin ) out_cmp_2cs_bin;
260
261 // output text
262 INSDC:color:text out_cmp_color_text
263 = < INSDC:x2cs:bin, INSDC:color:text > map < INSDC:x2cs:map:BINSET, INSDC:x2cs:map:CHARSET > ( out_cmp_x2cs_bin );
264
265
266 /* decompressed sequences
267 * sources are out_dcmp_x2cs_bin - virtual production
268 */
269
270 // synthesize 2cs_bin and 2cs_packed
271 INSDC:2cs:bin out_dcmp_2cs_bin
272 = < INSDC:x2cs:bin, INSDC:2cs:bin > map < [ 0,1,2,3,4 ], [ 0,1,2,3,0 ] > ( out_dcmp_x2cs_bin );
273 INSDC:2cs:packed out_dcmp_2cs_packed
274 = ( INSDC:2cs:packed ) pack ( out_dcmp_2cs_bin );
275
276
277 /* INSDC:tbl:sequence inherited productions
278 * cs_native
279 * out_cs_key
280 * out_signal
281 * out_2cs_bin
282 * out_2na_bin
283 * out_4na_bin
284 * out_dna_text
285 * out_x2cs_bin
286 * out_x2na_bin
287 * out_2cs_packed
288 * out_2na_packed
289 * out_4na_packed
290 * out_color_text
291 * out_qual_phred
292 * out_color_matrix
293 * out_qual_text_phred_33
294 * out_qual_text_phred_64
295 */
296
297 /* NCBI:tbl:dcmp_color_space inherited productions
298 * out_dcmp_x2cs_bin
299 */
300 }