Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/align/seq.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:0d65b71ff8df | 3:38ad1130d077 |
---|---|
1 /*=========================================================================== | |
2 * | |
3 * PUBLIC DOMAIN NOTICE | |
4 * National Center for Biotechnology Information | |
5 * | |
6 * This software/database is a "United States Government Work" under the | |
7 * terms of the United States Copyright Act. It was written as part of | |
8 * the author's official duties as a United States Government employee and | |
9 * thus cannot be copyrighted. This software/database is freely available | |
10 * to the public for use. The National Library of Medicine and the U.S. | |
11 * Government have not placed any restriction on its use or reproduction. | |
12 * | |
13 * Although all reasonable efforts have been taken to ensure the accuracy | |
14 * and reliability of the software and data, the NLM and the U.S. | |
15 * Government do not and cannot warrant the performance or results that | |
16 * may be obtained by using this software or data. The NLM and the U.S. | |
17 * Government disclaim all warranties, express or implied, including | |
18 * warranties of performance, merchantability or fitness for any particular | |
19 * purpose. | |
20 * | |
21 * Please cite the author in any work or product based on this material. | |
22 * | |
23 * =========================================================================== | |
24 * | |
25 */ | |
26 | |
27 /*========================================================================== | |
28 * Sequence schema | |
29 */ | |
30 version 1; | |
31 | |
32 include 'vdb/vdb.vschema'; | |
33 include 'ncbi/seq.vschema'; | |
34 | |
35 | |
36 /* cmp_base_space | |
37 * table representing compressed reads in base space, | |
38 * where the bases are only stored for unaligned reads | |
39 */ | |
40 table NCBI:align:tbl:cmp_base_space #1 | |
41 = INSDC:tbl:sequence #1.0.1 | |
42 , NCBI:tbl:dcmp_base_space #1 | |
43 { | |
44 /* CMP_READ | |
45 * read compressed against a reference sequence | |
46 */ | |
47 | |
48 // default is IUPAC character representation | |
49 extern default column INSDC:dna:text CMP_READ | |
50 { | |
51 read = out_cmp_dna_text; | |
52 validate = < INSDC:dna:text > compare ( in_cmp_dna_text, out_cmp_dna_text ); | |
53 } | |
54 | |
55 // 4na representation | |
56 extern column INSDC:4na:bin CMP_READ = out_cmp_4na_bin; | |
57 extern column INSDC:4na:packed CMP_READ = out_cmp_4na_packed; | |
58 | |
59 // x2na representation - 2na with ambiguity | |
60 extern column INSDC:x2na:bin CMP_READ = out_cmp_x2na_bin; | |
61 | |
62 // 2na representation - 2na with no ambiguity | |
63 extern column INSDC:2na:bin CMP_READ = out_cmp_2na_bin; | |
64 extern column INSDC:2na:packed CMP_READ = out_cmp_2na_packed; | |
65 | |
66 | |
67 /* input processing rules | |
68 */ | |
69 | |
70 // compressed input text | |
71 INSDC:dna:text in_cmp_dna_text | |
72 = < INSDC:dna:text, INSDC:dna:text > map < '.acmgrsvtwyhkdbn','NACMGRSVTWYHKDBN' > ( CMP_READ ); | |
73 | |
74 // compressed input 4na bin | |
75 INSDC:4na:bin in_cmp_4na_bin | |
76 = < INSDC:4na:bin > range_validate < 0, 15 > ( CMP_READ ) | |
77 | ( INSDC:4na:bin ) unpack ( in_cmp_4na_packed ) | |
78 | < INSDC:dna:text, INSDC:4na:bin > map < INSDC:4na:map:CHARSET, INSDC:4na:map:BINSET > ( in_cmp_dna_text ) | |
79 | < INSDC:x2na:bin, INSDC:4na:bin > map < INSDC:x2na:map:BINSET, [ 1, 2, 4, 8, 15 ] > ( in_cmp_x2na_bin ); | |
80 | |
81 // compressed input 4na packed | |
82 INSDC:4na:packed in_cmp_4na_packed = CMP_READ; | |
83 | |
84 // compressed input x2na bin | |
85 INSDC:x2na:bin in_cmp_x2na_bin | |
86 = < INSDC:x2na:bin > range_validate < 0, 4 > ( CMP_READ ) | |
87 | < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( in_cmp_4na_bin ); | |
88 | |
89 // compressed input 2na bin | |
90 INSDC:2na:bin in_cmp_2na_bin | |
91 = < INSDC:2na:bin > range_validate < 0, 3 > ( CMP_READ ) | |
92 | ( INSDC:2na:bin ) unpack ( in_cmp_2na_packed ) | |
93 | INSDC:SEQ:rand_4na_2na ( in_cmp_4na_bin ); | |
94 | |
95 // compressed input 2na packed | |
96 INSDC:2na:packed in_cmp_2na_packed = CMP_READ; | |
97 | |
98 // input 4na alt-read ( ambiguities ) | |
99 INSDC:4na:bin in_cmp_alt_4na_bin | |
100 = < INSDC:4na:bin, INSDC:4na:bin > map < INSDC:4na:map:BINSET, [ 15,0,0,3,0,5,6,7,0,9,10,11,12,13,14,15 ] > ( in_cmp_4na_bin ); | |
101 | |
102 // preparing a feed into stats column | |
103 U8 in_cmp_stats_bin = in_cmp_2na_bin; | |
104 | |
105 | |
106 /* physical columns | |
107 */ | |
108 | |
109 physical column INSDC:2na:packed .CMP_READ | |
110 = in_cmp_2na_packed | |
111 | ( INSDC:2na:packed ) pack ( in_cmp_2na_bin ); | |
112 | |
113 physical column < INSDC:4na:bin > zip_encoding .CMP_ALTREAD | |
114 = < INSDC:4na:bin > trim < 0, 0 > ( in_cmp_alt_4na_bin ); | |
115 | |
116 | |
117 /* output processing rules | |
118 */ | |
119 | |
120 // output 2na packed | |
121 INSDC:2na:packed out_cmp_2na_packed = .CMP_READ; | |
122 | |
123 // unambiguous unpacked 2na | |
124 INSDC:2na:bin out_cmp_2na_bin | |
125 = ( INSDC:2na:bin ) unpack ( out_cmp_2na_packed ); | |
126 | |
127 // output x2na bin | |
128 INSDC:x2na:bin out_cmp_x2na_bin | |
129 = < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_cmp_4na_bin ); | |
130 | |
131 // output 2na->4na bin | |
132 INSDC:4na:bin out_cmp_2na_4na_bin | |
133 = < INSDC:2na:bin, INSDC:4na:bin > map < INSDC:2na:map:BINSET, [ 1, 2, 4, 8 ] > ( out_cmp_2na_bin ); | |
134 | |
135 // output 4na bin | |
136 INSDC:4na:bin out_cmp_4na_bin | |
137 = < INSDC:4na:bin > bit_or < ALIGN_RIGHT > ( out_cmp_2na_4na_bin, .CMP_ALTREAD ) | |
138 | out_cmp_2na_4na_bin; | |
139 | |
140 // synthesized packed 4na | |
141 INSDC:4na:packed out_cmp_4na_packed | |
142 = ( INSDC:4na:packed ) pack ( out_cmp_4na_bin ); | |
143 | |
144 // output text | |
145 INSDC:dna:text out_cmp_dna_text | |
146 = < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_cmp_4na_bin ); | |
147 | |
148 | |
149 /* decompressed sequences | |
150 * source is out_dcmp_4na_bin - a virtual production | |
151 */ | |
152 | |
153 // synthesize x2na_bin, 2na_bin and 2na_packed | |
154 INSDC:x2na:bin out_dcmp_x2na_bin | |
155 = < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_dcmp_4na_bin ); | |
156 INSDC:2na:bin out_dcmp_2na_bin | |
157 = < INSDC:x2na:bin, INSDC:2na:bin > map < [ 0,1,2,3,4 ], [ 0,1,2,3,0 ] > ( out_dcmp_x2na_bin ); | |
158 INSDC:2na:packed out_dcmp_2na_packed | |
159 = ( INSDC:2na:packed ) pack ( out_dcmp_2na_bin ); | |
160 | |
161 | |
162 /* INSDC:tbl:sequence inherited productions | |
163 * cs_native | |
164 * out_cs_key | |
165 * out_signal | |
166 * out_2cs_bin | |
167 * out_2na_bin | |
168 * out_4na_bin | |
169 * out_dna_text | |
170 * out_x2cs_bin | |
171 * out_x2na_bin | |
172 * out_2cs_packed | |
173 * out_2na_packed | |
174 * out_4na_packed | |
175 * out_color_text | |
176 * out_color_matrix | |
177 */ | |
178 | |
179 /* NCBI:tbl:dcmp_base_space inherited productions | |
180 * out_dcmp_4na_bin | |
181 */ | |
182 } | |
183 | |
184 | |
185 /* cmp_color_space | |
186 * table representing compressed reads in color space, | |
187 * where the colors are only stored for unaligned reads | |
188 */ | |
189 table NCBI:align:tbl:cmp_color_space #1 = | |
190 INSDC:tbl:sequence #1.0.1, NCBI:tbl:dcmp_color_space #1 | |
191 { | |
192 /* CMP_CSREAD | |
193 * read compressed against a reference sequence | |
194 */ | |
195 | |
196 // default is IUPAC character representation | |
197 extern default column INSDC:color:text CMP_CSREAD = out_cmp_color_text; | |
198 | |
199 // x2cs representation - 2cs with ambiguity | |
200 extern column INSDC:x2cs:bin CMP_CSREAD = out_cmp_x2cs_bin; | |
201 | |
202 // 2cs representation - 2cs with no ambiguity | |
203 extern column INSDC:2cs:bin CMP_CSREAD = out_cmp_2cs_bin; | |
204 extern column INSDC:2cs:packed CMP_CSREAD = out_cmp_2cs_packed; | |
205 | |
206 | |
207 /* input processing rules | |
208 */ | |
209 | |
210 // compressed input text | |
211 INSDC:color:text in_cmp_color_text = CMP_CSREAD; | |
212 | |
213 // compressed input x2cs bin | |
214 INSDC:x2cs:bin in_cmp_x2cs_bin | |
215 = < INSDC:x2cs:bin > range_validate < 0, 4 > ( CMP_CSREAD ) | |
216 | < INSDC:color:text, INSDC:x2cs:bin > map < INSDC:x2cs:map:CHARSET, INSDC:x2cs:map:BINSET > ( in_cmp_color_text ); | |
217 | |
218 // compressed input 2cs bin | |
219 INSDC:2cs:bin in_cmp_2cs_bin | |
220 = < INSDC:2cs:bin > range_validate < 0, 3 > ( CMP_CSREAD ) | |
221 | ( INSDC:2cs:bin ) unpack ( in_cmp_2cs_packed ) | |
222 | < INSDC:x2cs:bin, INSDC:2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 1, 2, 3, 0 ] > ( in_cmp_x2cs_bin ); | |
223 | |
224 // compressed input 2cs packed | |
225 INSDC:2cs:packed in_cmp_2cs_packed = CMP_CSREAD; | |
226 | |
227 // compressed input x2cs alt-read ( ambiguities ) | |
228 INSDC:x2cs:bin in_cmp_alt_x2cs_bin | |
229 = < INSDC:x2cs:bin, INSDC:x2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 0, 0, 0, 4 ] > ( in_cmp_x2cs_bin ); | |
230 | |
231 // preparing a feed into stats column | |
232 U8 in_cmp_stats_bin = in_cmp_2cs_bin; | |
233 | |
234 | |
235 /* physical columns | |
236 */ | |
237 | |
238 physical column INSDC:2cs:packed .CMP_CSREAD | |
239 = in_cmp_2cs_packed | |
240 | ( INSDC:2cs:packed ) pack ( in_cmp_2cs_bin ); | |
241 | |
242 physical column < INSDC:x2cs:bin > zip_encoding .CMP_ALTCSREAD | |
243 = < INSDC:x2cs:bin > trim < 0, 0 > ( in_cmp_alt_x2cs_bin ); | |
244 | |
245 | |
246 /* output processing rules | |
247 */ | |
248 | |
249 // compressed output 2cs packed | |
250 INSDC:2cs:packed out_cmp_2cs_packed = .CMP_CSREAD; | |
251 | |
252 // unambiguous unpacked 2cs | |
253 INSDC:2cs:bin out_cmp_2cs_bin | |
254 = ( INSDC:2cs:bin ) unpack ( out_cmp_2cs_packed ); | |
255 | |
256 // unpacked 2cs with ambiguity | |
257 INSDC:x2cs:bin out_cmp_x2cs_bin | |
258 = ( INSDC:x2cs:bin ) < U8 > bit_or < ALIGN_RIGHT > ( out_cmp_2cs_bin, .CMP_ALTCSREAD ) | |
259 | ( INSDC:x2cs:bin ) out_cmp_2cs_bin; | |
260 | |
261 // output text | |
262 INSDC:color:text out_cmp_color_text | |
263 = < INSDC:x2cs:bin, INSDC:color:text > map < INSDC:x2cs:map:BINSET, INSDC:x2cs:map:CHARSET > ( out_cmp_x2cs_bin ); | |
264 | |
265 | |
266 /* decompressed sequences | |
267 * sources are out_dcmp_x2cs_bin - virtual production | |
268 */ | |
269 | |
270 // synthesize 2cs_bin and 2cs_packed | |
271 INSDC:2cs:bin out_dcmp_2cs_bin | |
272 = < INSDC:x2cs:bin, INSDC:2cs:bin > map < [ 0,1,2,3,4 ], [ 0,1,2,3,0 ] > ( out_dcmp_x2cs_bin ); | |
273 INSDC:2cs:packed out_dcmp_2cs_packed | |
274 = ( INSDC:2cs:packed ) pack ( out_dcmp_2cs_bin ); | |
275 | |
276 | |
277 /* INSDC:tbl:sequence inherited productions | |
278 * cs_native | |
279 * out_cs_key | |
280 * out_signal | |
281 * out_2cs_bin | |
282 * out_2na_bin | |
283 * out_4na_bin | |
284 * out_dna_text | |
285 * out_x2cs_bin | |
286 * out_x2na_bin | |
287 * out_2cs_packed | |
288 * out_2na_packed | |
289 * out_4na_packed | |
290 * out_color_text | |
291 * out_qual_phred | |
292 * out_color_matrix | |
293 * out_qual_text_phred_33 | |
294 * out_qual_text_phred_64 | |
295 */ | |
296 | |
297 /* NCBI:tbl:dcmp_color_space inherited productions | |
298 * out_dcmp_x2cs_bin | |
299 */ | |
300 } |