Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/csra2/read.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:0d65b71ff8df | 3:38ad1130d077 |
---|---|
1 /*=========================================================================== | |
2 * | |
3 * PUBLIC DOMAIN NOTICE | |
4 * National Center for Biotechnology Information | |
5 * | |
6 * This software/database is a "United States Government Work" under the | |
7 * terms of the United States Copyright Act. It was written as part of | |
8 * the author's official duties as a United States Government employee and | |
9 * thus cannot be copyrighted. This software/database is freely available | |
10 * to the public for use. The National Library of Medicine and the U.S. | |
11 * Government have not placed any restriction on its use or reproduction. | |
12 * | |
13 * Although all reasonable efforts have been taken to ensure the accuracy | |
14 * and reliability of the software and data, the NLM and the U.S. | |
15 * Government do not and cannot warrant the performance or results that | |
16 * may be obtained by using this software or data. The NLM and the U.S. | |
17 * Government disclaim all warranties, express or implied, including | |
18 * warranties of performance, merchantability or fitness for any particular | |
19 * purpose. | |
20 * | |
21 * Please cite the author in any work or product based on this material. | |
22 * | |
23 * =========================================================================== | |
24 * | |
25 */ | |
26 | |
27 /*========================================================================== | |
28 * General read table which will be inherited by others | |
29 */ | |
30 version 1; | |
31 | |
32 include 'vdb/vdb.vschema'; | |
33 include 'insdc/insdc.vschema'; | |
34 include 'csra2/stats.vschema'; | |
35 | |
36 | |
37 /*-------------------------------------------------------------------------- | |
38 * tables | |
39 */ | |
40 table NCBI:csra2:tbl:read #1.0 = NCBI:csra2:tbl:read_stats #1 | |
41 { | |
42 /* CHUNK_SZ | |
43 * describes the maximum number of bases in any row | |
44 * | |
45 * if present, allows a single sequence to be broken into multiple rows | |
46 * where this value gives the limit on the number of bases in any row. | |
47 * | |
48 * the sequence will be split across some number of rows, depending upon | |
49 * the value of CHUNK_SZ. if length ( seq ) > CHUNK_SZ, then there will | |
50 * be multiple rows, where all but the last will have a length of CHUNK_SZ. | |
51 * the last ( or only ) row will have a length of length(seq)%CHUNK_SIZE. | |
52 */ | |
53 extern column INSDC:coord:len CHUNK_SZ; | |
54 | |
55 | |
56 /* READ | |
57 * base calls | |
58 */ | |
59 | |
60 // textual representation | |
61 extern default column INSDC:dna:text READ | |
62 { | |
63 read = out_dna_text; | |
64 validate = < INSDC:dna:text > compare ( in_dna_text, out_dna_text ); | |
65 } | |
66 | |
67 // 4na representation - unpacked | |
68 extern column INSDC:4na:bin READ | |
69 = out_4na_bin | |
70 ; | |
71 | |
72 | |
73 /* QUALITY | |
74 * phred-score quality values | |
75 */ | |
76 extern default column INSDC:quality:phred QUALITY | |
77 = out_qual_phred | |
78 ; | |
79 extern column INSDC:quality:text:phred_33 QUALITY | |
80 = ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred ) | |
81 ; | |
82 extern column INSDC:quality:text:phred_64 QUALITY | |
83 = ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred ) | |
84 ; | |
85 | |
86 /* ---------------------------- optional columns ---------------------------- */ | |
87 | |
88 /* RD_ID | |
89 * RD_GROUP | |
90 * reports group and id of current row | |
91 */ | |
92 extern column I64 RD_ID; | |
93 extern column ascii RD_GROUP; | |
94 | |
95 /* RD_FILTER | |
96 * records filter value if used | |
97 */ | |
98 extern column INSDC:SRA:read_filter RD_FILTER; | |
99 | |
100 | |
101 /* ---------------------------- input rules ---------------------------- */ | |
102 | |
103 // input text | |
104 INSDC:dna:text in_dna_text | |
105 = < INSDC:dna:text, INSDC:dna:text > map < '.acmgrsvtwyhkdbn','NACMGRSVTWYHKDBN' > ( READ ) | |
106 ; | |
107 | |
108 // input 4na bin | |
109 INSDC:4na:bin in_4na_bin | |
110 = < INSDC:4na:bin > range_validate < 0, 15 > ( READ ) | |
111 | < INSDC:dna:text, INSDC:4na:bin > map < INSDC:4na:map:CHARSET, INSDC:4na:map:BINSET > ( in_dna_text ) | |
112 ; | |
113 | |
114 // input 2na bin | |
115 INSDC:2na:bin in_2na_bin | |
116 = INSDC:SEQ:rand_4na_2na ( in_4na_bin ) | |
117 ; | |
118 | |
119 // input 4na alt-read ( ambiguities ) | |
120 INSDC:4na:bin in_alt_4na_bin | |
121 = < INSDC:4na:bin, INSDC:4na:bin > map < INSDC:4na:map:BINSET, [ 15,0,0,3,0,5,6,7,0,9,10,11,12,13,14,15 ] > ( in_4na_bin ) | |
122 ; | |
123 | |
124 // feed the statistics | |
125 INSDC:4na:bin in_stats_seq = in_4na_bin; | |
126 | |
127 // quality | |
128 INSDC:quality:text:phred_33 in_qual_text_phred_33 = QUALITY; | |
129 INSDC:quality:text:phred_64 in_qual_text_phred_64 = QUALITY; | |
130 | |
131 INSDC:quality:phred in_qual_phred | |
132 = QUALITY | |
133 | ( INSDC:quality:phred ) < B8 > diff < 33 > ( in_qual_text_phred_33 ) | |
134 | ( INSDC:quality:phred ) < B8 > diff < 64 > ( in_qual_text_phred_64 ) | |
135 ; | |
136 | |
137 // feed the statistics | |
138 INSDC:quality:phred in_stats_qual_phred = in_qual_phred; | |
139 | |
140 ascii in_stats_read_group | |
141 = in_stats_spot_group | |
142 | RD_GROUP | |
143 ; | |
144 | |
145 | |
146 /* ---------------------------- physical columns ---------------------------- */ | |
147 | |
148 physical column INSDC:2na:packed .READ | |
149 = ( INSDC:2na:packed ) pack ( in_2na_bin ) | |
150 ; | |
151 | |
152 physical column < INSDC:4na:bin > zip_encoding .ALTREAD | |
153 = < INSDC:4na:bin > trim < 0, 0 > ( in_alt_4na_bin ) | |
154 ; | |
155 | |
156 physical column < INSDC:quality:phred > delta_average_zip_encoding .QUALITY | |
157 = in_qual_phred | |
158 ; | |
159 | |
160 | |
161 /* ---------------------------- output rules ---------------------------- */ | |
162 | |
163 // output 2na packed | |
164 INSDC:2na:packed out_2na_packed | |
165 = .READ | |
166 ; | |
167 | |
168 // output 2na bin | |
169 INSDC:2na:bin out_2na_bin | |
170 = ( INSDC:2na:bin ) unpack ( out_2na_packed ) | |
171 ; | |
172 | |
173 // output 2na->4na bin | |
174 INSDC:4na:bin out_2na_4na_bin | |
175 = < INSDC:2na:bin, INSDC:4na:bin > map < INSDC:2na:map:BINSET, [ 1, 2, 4, 8 ] > ( out_2na_bin ) | |
176 ; | |
177 | |
178 // output 4na bin | |
179 INSDC:4na:bin out_4na_bin | |
180 = < INSDC:4na:bin > bit_or < ALIGN_RIGHT > ( out_2na_4na_bin, .ALTREAD ) | |
181 | out_2na_4na_bin | |
182 ; | |
183 | |
184 // output text | |
185 INSDC:dna:text out_dna_text | |
186 = < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_4na_bin ) | |
187 ; | |
188 | |
189 // output quality | |
190 INSDC:quality:phred out_qual_phred | |
191 = .QUALITY | |
192 | < INSDC:quality:phred > echo < 30 > ( out_4na_bin ) | |
193 ; | |
194 } | |
195 | |
196 | |
197 /*-------------------------------------------------------------------------- | |
198 * views | |
199 */ | |
200 table NCBI:csra2:view:read #1.0 = | |
201 NCBI:csra2:tbl:read #1.0 | |
202 { | |
203 /* CHUNK_SIZE | |
204 * describes the maximum number of bases in any row | |
205 * | |
206 * if present, allows a single sequence to be broken into multiple rows | |
207 * where this value gives the limit on the number of bases in any row. | |
208 * | |
209 * the sequence will be split across some number of rows, depending upon | |
210 * the value of CHUNK_SIZE. if length ( seq ) > CHUNK_SIZE, then there will | |
211 * be multiple rows, where all but the last will have a length of CHUNK_SIZE. | |
212 * the last ( or only ) row will have a length of length(seq)%CHUNK_SIZE. | |
213 */ | |
214 readonly column INSDC:coord:len CHUNK_SIZE | |
215 = .CHUNK_SZ | |
216 | < INSDC:coord:len > echo < 0xFFFFFFFF > () | |
217 ; | |
218 | |
219 /* READ | |
220 * generate remaining 4 types | |
221 */ | |
222 readonly column INSDC:4na:packed READ | |
223 = ( INSDC:4na:packed ) pack ( out_4na_bin ) | |
224 ; | |
225 readonly column INSDC:x2na:bin READ | |
226 = < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_4na_bin ) | |
227 ; | |
228 readonly column INSDC:2na:bin READ | |
229 = out_2na_bin | |
230 ; | |
231 readonly column INSDC:2na:packed READ | |
232 = out_2na_packed | |
233 ; | |
234 | |
235 /* READ_ID | |
236 * READ_GROUP | |
237 * reports group and id of current row | |
238 */ | |
239 readonly column I64 READ_ID | |
240 = .RD_ID | |
241 | row_id () | |
242 ; | |
243 readonly column ascii READ_GROUP | |
244 = .RD_GROUP | |
245 | < ascii > echo < '' > () | |
246 ; | |
247 | |
248 /* READ_FILTER | |
249 * records filter value if used | |
250 */ | |
251 readonly column INSDC:SRA:read_filter READ_FILTER | |
252 = .RD_FILTER | |
253 | < INSDC:SRA:read_filter > echo < SRA_READ_FILTER_PASS > () | |
254 ; | |
255 } |