Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/sra.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
| author | charles_s_test |
|---|---|
| date | Mon, 27 Nov 2017 11:21:07 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:0d65b71ff8df | 3:38ad1130d077 |
|---|---|
| 1 /*=========================================================================== | |
| 2 * | |
| 3 * PUBLIC DOMAIN NOTICE | |
| 4 * National Center for Biotechnology Information | |
| 5 * | |
| 6 * This software/database is a "United States Government Work" under the | |
| 7 * terms of the United States Copyright Act. It was written as part of | |
| 8 * the author's official duties as a United States Government employee and | |
| 9 * thus cannot be copyrighted. This software/database is freely available | |
| 10 * to the public for use. The National Library of Medicine and the U.S. | |
| 11 * Government have not placed any restriction on its use or reproduction. | |
| 12 * | |
| 13 * Although all reasonable efforts have been taken to ensure the accuracy | |
| 14 * and reliability of the software and data, the NLM and the U.S. | |
| 15 * Government do not and cannot warrant the performance or results that | |
| 16 * may be obtained by using this software or data. The NLM and the U.S. | |
| 17 * Government disclaim all warranties, express or implied, including | |
| 18 * warranties of performance, merchantability or fitness for any particular | |
| 19 * purpose. | |
| 20 * | |
| 21 * Please cite the author in any work or product based on this material. | |
| 22 * | |
| 23 * =========================================================================== | |
| 24 * | |
| 25 */ | |
| 26 | |
| 27 /*========================================================================== | |
| 28 * NCBI Sequence Read Archive schema | |
| 29 */ | |
| 30 version 1; | |
| 31 | |
| 32 include 'vdb/vdb.vschema'; | |
| 33 include 'ncbi/seq.vschema'; | |
| 34 include 'ncbi/spotname.vschema'; | |
| 35 include 'insdc/sra.vschema'; | |
| 36 include 'ncbi/stats.vschema'; | |
| 37 | |
| 38 | |
| 39 /*-------------------------------------------------------------------------- | |
| 40 * types | |
| 41 */ | |
| 42 | |
| 43 | |
| 44 /* Segment - DEPRECATED | |
| 45 * a ( start, len ) pair where start is a zero-based, unsigned coordinate | |
| 46 */ | |
| 47 typedef U16 NCBI:SRA:Segment [ 2 ]; | |
| 48 | |
| 49 | |
| 50 /* SpotDesc - DEPRECATED | |
| 51 * uint16_t spot_len; | |
| 52 * uint16_t fixed_len; | |
| 53 * uint16_t signal_len; | |
| 54 * uint16_t clip_qual_right; | |
| 55 * uint8_t num_reads; | |
| 56 * uint8_t align [ 7 ]; | |
| 57 */ | |
| 58 typedef B8 NCBI:SRA:SpotDesc [ 16 ]; | |
| 59 | |
| 60 | |
| 61 /* ReadDesc - DEPRECATED | |
| 62 * SRASegment { uint16_t start, len; } seg; | |
| 63 * uint8_t type; | |
| 64 * char cs_key; | |
| 65 * char label [ 74 ]; | |
| 66 */ | |
| 67 typedef B8 NCBI:SRA:ReadDesc [ 80 ]; | |
| 68 | |
| 69 | |
| 70 // some types have been moved to INSDC | |
| 71 alias INSDC:SRA:platform_id NCBI:SRA:platform_id; | |
| 72 alias INSDC:SRA:read_type NCBI:SRA:read_type; | |
| 73 alias INSDC:SRA:read_filter NCBI:SRA:read_filter; | |
| 74 | |
| 75 typedef NCBI:fsamp4 NCBI:SRA:rotated_fsamp4, NCBI:SRA:swapped_fsamp4; | |
| 76 | |
| 77 // 16-bit POSITION type | |
| 78 typedef U16 NCBI:SRA:pos16; | |
| 79 | |
| 80 | |
| 81 /*-------------------------------------------------------------------------- | |
| 82 * functions | |
| 83 */ | |
| 84 | |
| 85 /* bio_start | |
| 86 * searches through read_type vector | |
| 87 * returns the 0-based starting coordinate of first biological read | |
| 88 * | |
| 89 * "read_start" [ DATA ] - vector of read start coordinates | |
| 90 * | |
| 91 * "read_type" [ DATA ] - vector of read types | |
| 92 */ | |
| 93 extern function INSDC:coord:zero | |
| 94 NCBI:SRA:bio_start #1 ( INSDC:coord:zero read_start, INSDC:SRA:xread_type read_type ); | |
| 95 | |
| 96 | |
| 97 /* bio_end | |
| 98 * searcehes through read_type vector | |
| 99 * returns the 0 based ending coording (either inclusive or exclusive) of last | |
| 100 * biological read | |
| 101 * | |
| 102 * "read_start" [ DATA ] - vector of read start coordinates | |
| 103 * | |
| 104 * "read_type" [ DATA ] - vector of read types | |
| 105 * | |
| 106 * "read_len" [ DATA ] - vector of read lengths | |
| 107 */ | |
| 108 extern function INSDC:coord:zero | |
| 109 NCBI:SRA:bio_end #1 < bool inclusive > ( INSDC:coord:zero read_start, INSDC:SRA:xread_type read_type, INSDC:coord:len read_len ); | |
| 110 | |
| 111 | |
| 112 /* fix_read_seg | |
| 113 */ | |
| 114 extern function INSDC:coord:len [ 2 ] | |
| 115 NCBI:SRA:fix_read_seg #1 ( U16 [ 2 ] rd_seg, INSDC:coord:len spot_len ); | |
| 116 | |
| 117 | |
| 118 /* make_spot_desc | |
| 119 * assembles several bits of information together into a "C" structure | |
| 120 * | |
| 121 * "spot_len" [ DATA ] - computed spot length value | |
| 122 * | |
| 123 * "fixed_len" [ DATA, DFLT ZERO ] - the stated fixed length of all spots | |
| 124 * or zero if not fixed length | |
| 125 * | |
| 126 * "sig_len" [ DATA, DFLT ZERO ] - the length of signal/intensity data | |
| 127 * or zero if not present | |
| 128 * | |
| 129 * "trim_start" [ DATA ] - the first base included in the trim segment | |
| 130 * | |
| 131 * "trim_len" [ DATA ] - the length of the trim segment | |
| 132 * | |
| 133 * "num_reads" [ DATA ] - 1..n value | |
| 134 */ | |
| 135 extern function NCBI:SRA:SpotDesc NCBI:SRA:make_spot_desc #1 ( INSDC:coord:len spot_len, | |
| 136 INSDC:coord:len fixed_len, INSDC:coord:len sig_len, INSDC:coord:zero trim_start, | |
| 137 INSDC:coord:len trim_len, U8 num_reads ); | |
| 138 | |
| 139 | |
| 140 /* make_read_desc | |
| 141 * assembles several bits of information together into a "C" structure | |
| 142 * in theory resultant segments may intersect other read segments or leave holes in spot. | |
| 143 * | |
| 144 * "num_reads" [ DATA ] - value indicating the resulting row-length of output | |
| 145 * | |
| 146 * "read_start" [ DATA ] - ordered starting coordinates for each read | |
| 147 * not required to be sequential. | |
| 148 * | |
| 149 * "read_len" [ DATA ] - ordered lengths of each read. may be zero when | |
| 150 * read has been described but is not identified in spot. | |
| 151 * | |
| 152 * "read_type" [ DATA ] - ordered type id describing each read | |
| 153 * | |
| 154 * "read_filt" [ DATA ] - ordered read filters | |
| 155 * | |
| 156 * "cs_key" [ DATA ] - ordered color-space keys | |
| 157 * | |
| 158 * "label_start" [ DATA ] - ordered starting coordinates for each label | |
| 159 * "label_len" [ DATA ] - ordered lengths of each label | |
| 160 * | |
| 161 * "label" [ DATA ] - complete sequence of label characters, possibly empty | |
| 162 * individual read labels are identified as {start,len} pairs | |
| 163 */ | |
| 164 extern function NCBI:SRA:ReadDesc NCBI:SRA:make_read_desc #1 ( U8 num_reads, | |
| 165 INSDC:coord:zero read_start, INSDC:coord:len read_len, INSDC:SRA:xread_type read_type, | |
| 166 INSDC:SRA:read_filter read_filt, INSDC:dna:text cs_key, | |
| 167 INSDC:coord:zero label_start, INSDC:coord:len label_len, ascii label ); | |
| 168 | |
| 169 | |
| 170 /* rotate | |
| 171 * rotate a quadruple by called base | |
| 172 * now normally replaced by swap | |
| 173 * | |
| 174 * "T" [ TYPE ] - element type of quadruple to be rotated | |
| 175 * | |
| 176 * "encoding" [ CONST ] - when true, rotate input left until corresponding | |
| 177 * element is in slot 0. when false, rotate input right to restore original | |
| 178 * order. | |
| 179 * | |
| 180 * "in" [ DATA ] - data to be rotated, qualities, signal, intensities... | |
| 181 * | |
| 182 * "called" [ DATA ] - {0..3} or {0..4} binary representation of called bases or colors | |
| 183 */ | |
| 184 extern function < type T > | |
| 185 T NCBI:SRA:rotate #1 < bool encoding > ( T in, U8 called ); | |
| 186 | |
| 187 | |
| 188 /* swap | |
| 189 * swap element 0 and the called element | |
| 190 * used to ensure that the called element is in slot 0 | |
| 191 * | |
| 192 * "T" [ TYPE ] - element type of quadruple to be swapped | |
| 193 * | |
| 194 * "in" [ DATA ] - data to be swapped, qualities, signal, intensities... | |
| 195 * | |
| 196 * "called" [ DATA ] - {0..3} or {0..4} binary representation of called bases or colors | |
| 197 */ | |
| 198 extern function < type T > | |
| 199 T NCBI:SRA:swap #1 ( T in, U8 called ); | |
| 200 | |
| 201 | |
| 202 /* normalize | |
| 203 * denormalize | |
| 204 * | |
| 205 * "T" [ TYPE ] - element type of quadruple to be [de]normalized | |
| 206 * | |
| 207 * "intensity" [ DATA ] - intensity data | |
| 208 * | |
| 209 * "called" [ DATA ] - {0..3} or {0..4} binary representation of called bases or colors | |
| 210 */ | |
| 211 extern function < type T > | |
| 212 T NCBI:SRA:normalize #1 ( T intensity, U8 called ); | |
| 213 | |
| 214 extern function < type T > | |
| 215 T NCBI:SRA:denormalize #1 ( T intensity, U8 called ); | |
| 216 | |
| 217 | |
| 218 /* make_position | |
| 219 * return a synthesized position row with 1-1 correspondence | |
| 220 * | |
| 221 * "T" [ TYPE ] - position type being generated | |
| 222 * | |
| 223 * "start" [ CONST ] - either 0 or 1, depending upon the coordinate system | |
| 224 * | |
| 225 * "bases" [ DATA ] - the actual row of bases. the output row | |
| 226 * will be the same length, but with synthesized data | |
| 227 */ | |
| 228 extern function < type T > | |
| 229 T NCBI:SRA:make_position #1 < T start > ( any bases ); | |
| 230 | |
| 231 /* fsamp4 compression | |
| 232 * performs compression individually | |
| 233 * on called channel and alternate channels | |
| 234 */ | |
| 235 function NCBI:SRA:swapped_fsamp4 NCBI:SRA:fsamp4:decode #2 ( merged_fmt in ) | |
| 236 { | |
| 237 fzip_fmt cmp0 = split < 0 > ( in ); | |
| 238 fzip_fmt cmp123 = split < 1 > ( in ); | |
| 239 F32 ch0 = funzip ( cmp0 ); | |
| 240 F32 ch123a = funzip ( cmp123 ); | |
| 241 F32[3] ch123 = redimension ( ch123a ); | |
| 242 return ( NCBI:SRA:swapped_fsamp4 ) < F32 > paste ( ch0, ch123 ); | |
| 243 } | |
| 244 | |
| 245 function merged_fmt NCBI:SRA:fsamp4:encode #2 < U32 called, U32 alt > ( NCBI:SRA:swapped_fsamp4 in ) | |
| 246 { | |
| 247 F32 ch0 = < F32 > cut < 0 > ( in ); | |
| 248 F32[3] ch123 = < F32 > cut < 1, 2, 3 > ( in ); | |
| 249 fzip_fmt cmp0 = fzip < called > ( ch0 ); | |
| 250 F32 ch123a = redimension ( ch123 ); | |
| 251 fzip_fmt cmp123 = fzip < alt > ( ch123a ); | |
| 252 return merge ( cmp0, cmp123 ); | |
| 253 } | |
| 254 | |
| 255 | |
| 256 /*-------------------------------------------------------------------------- | |
| 257 * spotdesc | |
| 258 * NCBI implementation productions | |
| 259 */ | |
| 260 | |
| 261 /* history: | |
| 262 * 1.0.1 - base explicitly upon sequence #1.0.1, spotdesc #1.0.1 | |
| 263 * 1.0.2 - spotdesc #1.0.2 | |
| 264 */ | |
| 265 table NCBI:SRA:tbl:spotdesc_nocol #1.0.2 = INSDC:tbl:sequence #1.0.1, INSDC:SRA:tbl:spotdesc #1.0.2 | |
| 266 { | |
| 267 /* LABEL_SEG | |
| 268 */ | |
| 269 readonly column NCBI:SRA:Segment LABEL_SEG | |
| 270 = out_label_seg | |
| 271 | cast ( out_label_seg32 ) | |
| 272 | cast ( _out_label_seg32 ); | |
| 273 U32 _out_label_startU32 = ( U32 ) out_label_start; | |
| 274 U32 [ 2 ] _out_label_seg32 = < U32 > paste ( _out_label_startU32, out_label_len ); | |
| 275 | |
| 276 | |
| 277 /* READ_SEG | |
| 278 */ | |
| 279 readonly column NCBI:SRA:Segment READ_SEG | |
| 280 = out_read_seg | |
| 281 | cast ( out_read_seg32 ) | |
| 282 | cast ( _out_read_seg32 ); | |
| 283 U32 _out_read_startU32 = ( U32 ) out_read_start; | |
| 284 U32 [ 2 ] _out_read_seg32 = < U32 > paste ( _out_read_startU32, out_read_len ); | |
| 285 | |
| 286 | |
| 287 /* READ_DESC | |
| 288 */ | |
| 289 readonly column NCBI:SRA:ReadDesc READ_DESC | |
| 290 = NCBI:SRA:make_read_desc ( out_nreads, out_read_start, out_read_len, | |
| 291 out_read_type, _out_rd_filter, out_cs_key, _out_label_start, _out_label_len, _out_label ); | |
| 292 INSDC:SRA:read_filter _out_rd_filter | |
| 293 = out_rd_filter | |
| 294 | < INSDC:SRA:read_filter > echo < SRA_READ_FILTER_PASS > ( out_read_start ); | |
| 295 ascii _out_label | |
| 296 = out_label | |
| 297 | < ascii > echo < '' > (); | |
| 298 INSDC:coord:zero _out_label_start | |
| 299 = out_label_start | |
| 300 | < INSDC:coord:zero > echo < 0 > ( out_read_start ); | |
| 301 INSDC:coord:len _out_label_len | |
| 302 = out_label_len | |
| 303 | < INSDC:coord:len > echo < 0 > ( out_read_start ); | |
| 304 | |
| 305 /* SPOT_DESC | |
| 306 */ | |
| 307 readonly column NCBI:SRA:SpotDesc SPOT_DESC | |
| 308 = NCBI:SRA:make_spot_desc ( spot_len, fixed_spot_len, signal_len, | |
| 309 trim_start, trim_len, out_nreads ); | |
| 310 | |
| 311 /* SIGNAL_LEN | |
| 312 * normally the same as spot length when present, | |
| 313 * but in some cases ( e.g. 454 ) it may be different | |
| 314 */ | |
| 315 readonly column INSDC:coord:len SIGNAL_LEN | |
| 316 = signal_len; | |
| 317 readonly column U16 SIGNAL_LEN | |
| 318 = cast ( signal_len ); | |
| 319 | |
| 320 | |
| 321 /* INSDC:SRA:tbl:spotdesc inherited productions | |
| 322 * trim_len | |
| 323 * out_label | |
| 324 * out_nreads | |
| 325 * trim_start | |
| 326 * out_read_len | |
| 327 * out_label_len | |
| 328 * out_rd_filter | |
| 329 * out_read_type | |
| 330 * out_read_start | |
| 331 * out_label_start | |
| 332 * static_fixed_spot_len | |
| 333 */ | |
| 334 | |
| 335 /* NCBI:SRA:tbl:spotdesc_nocol productions | |
| 336 * out_read_seg | |
| 337 * out_label_seg | |
| 338 * out_read_seg32 | |
| 339 * out_label_seg32 | |
| 340 */ | |
| 341 }; | |
| 342 | |
| 343 /* history: | |
| 344 * 1.0.1 - base explicitly upon spotdesc_nocol #1.0.1 | |
| 345 * 1.0.2 - base explicitly upon spotdesc_nocol #1.0.2 | |
| 346 */ | |
| 347 table NCBI:SRA:tbl:spotdesc_nophys #1.0.2 = NCBI:SRA:tbl:spotdesc_nocol #1.0.2 | |
| 348 { | |
| 349 // resolve virtual productions | |
| 350 U8 out_nreads = .NREADS; | |
| 351 ascii out_label = .LABEL; | |
| 352 INSDC:SRA:xread_type out_read_type = .READ_TYPE; | |
| 353 INSDC:SRA:read_filter out_rd_filter = .RD_FILTER; | |
| 354 | |
| 355 INSDC:coord:zero out_label_start | |
| 356 = .LABEL_START | |
| 357 | ( INSDC:coord:zero ) < U32 > cut < 0 > ( out_label_seg32 ); | |
| 358 INSDC:coord:len out_label_len | |
| 359 = .LABEL_LEN | |
| 360 | ( INSDC:coord:len ) < U32 > cut < 1 > ( out_label_seg32 ); | |
| 361 U32 [ 2 ] out_label_seg32 | |
| 362 = cast ( .LABEL_SEG ); | |
| 363 | |
| 364 INSDC:coord:zero out_read_start | |
| 365 = .READ_START | |
| 366 | ( INSDC:coord:zero ) < U32 > cut < 0 > ( out_read_seg32 ); | |
| 367 INSDC:coord:len out_read_len | |
| 368 = .READ_LEN | |
| 369 | ( INSDC:coord:len ) < U32 > cut < 1 > ( out_read_seg32 ); | |
| 370 U32 [ 2 ] out_read_seg32 | |
| 371 = NCBI:SRA:fix_read_seg ( .READ_SEG, spot_len ); | |
| 372 | |
| 373 | |
| 374 /* INSDC:SRA:tbl:spotdesc inherited productions | |
| 375 * trim_len | |
| 376 * trim_start | |
| 377 * out_read_type | |
| 378 * static_fixed_spot_len | |
| 379 */ | |
| 380 | |
| 381 /* NCBI:SRA:tbl:spotdesc_nocol inherited productions | |
| 382 * out_read_seg | |
| 383 * out_label_seg | |
| 384 */ | |
| 385 | |
| 386 /* NCBI:SRA:tbl:spotdesc_nophys productions | |
| 387 * .LABEL | |
| 388 * .NREADS | |
| 389 * .READ_LEN | |
| 390 * .READ_SEG | |
| 391 * .LABEL_LEN | |
| 392 * .LABEL_SEG | |
| 393 * .RD_FILTER | |
| 394 * .READ_TYPE | |
| 395 * .READ_START | |
| 396 * .LABEL_START | |
| 397 */ | |
| 398 } | |
| 399 | |
| 400 /* history: | |
| 401 * 1.0.1 - base explicitly upon spotdesc_nophys #1.0.1 | |
| 402 * 1.0.2 - base explicitly upon spotdesc_nophys #1.0.2 | |
| 403 */ | |
| 404 table NCBI:SRA:tbl:spotdesc #1.0.2 = NCBI:SRA:tbl:spotdesc_nophys #1.0.2 | |
| 405 { | |
| 406 // physical column encodings | |
| 407 // TBD - this has to be looked at, where dynamic segmentation is involved | |
| 408 physical column < U8 > zip_encoding .NREADS = NREADS; | |
| 409 physical column < ascii > zip_encoding .LABEL = LABEL; | |
| 410 physical column < INSDC:coord:zero > izip_encoding .LABEL_START = LABEL_START; | |
| 411 physical column < INSDC:coord:len > izip_encoding .LABEL_LEN = LABEL_LEN; | |
| 412 physical column < INSDC:coord:zero > izip_encoding .READ_START = READ_START; | |
| 413 physical column < INSDC:coord:len > izip_encoding .READ_LEN = in_read_len; | |
| 414 physical column < INSDC:SRA:xread_type > zip_encoding .READ_TYPE = in_read_type; | |
| 415 physical column < INSDC:SRA:read_filter > zip_encoding .RD_FILTER = READ_FILTER; | |
| 416 | |
| 417 | |
| 418 /* INSDC:SRA:tbl:spotdesc inherited productions | |
| 419 * trim_len | |
| 420 * trim_start | |
| 421 * out_read_type | |
| 422 * static_fixed_spot_len | |
| 423 */ | |
| 424 | |
| 425 /* NCBI:SRA:tbl:spotdesc_nocol inherited productions | |
| 426 * out_read_seg | |
| 427 * out_label_seg | |
| 428 */ | |
| 429 }; | |
| 430 | |
| 431 | |
| 432 /*-------------------------------------------------------------------------- | |
| 433 * pos | |
| 434 * synthetic POSITION column on read | |
| 435 * | |
| 436 * history: | |
| 437 * 1.0.1 - base explicitly upon sequence #1.0.1 | |
| 438 */ | |
| 439 | |
| 440 table NCBI:SRA:tbl:pos #1.0.1 = INSDC:tbl:sequence #1.0.1 | |
| 441 { | |
| 442 INSDC:position:one out_position | |
| 443 = < INSDC:position:one > NCBI:SRA:make_position < 1 > ( out_2na_packed ) | |
| 444 | < INSDC:position:one > NCBI:SRA:make_position < 1 > ( out_2cs_packed ); | |
| 445 NCBI:SRA:pos16 out_position16 | |
| 446 = < NCBI:SRA:pos16 > NCBI:SRA:make_position < 1 > ( out_2na_packed ) | |
| 447 | < NCBI:SRA:pos16 > NCBI:SRA:make_position < 1 > ( out_2cs_packed ); | |
| 448 }; | |
| 449 | |
| 450 | |
| 451 /*-------------------------------------------------------------------------- | |
| 452 * sra | |
| 453 * the NCBI SRA table | |
| 454 */ | |
| 455 | |
| 456 /* history: | |
| 457 * 1.0.1 - base explicitly upon sra #1.0.1 | |
| 458 * 1.0.2 - base explicitly upon sra #1.0.2, spotdesc_nocol #1.0.1 | |
| 459 * 1.0.3 - base explicitly upon sra #1.0.3, spotdesc_nocol #1.0.2 | |
| 460 */ | |
| 461 table NCBI:SRA:tbl:sra_nopos #1.0.3 = INSDC:SRA:tbl:sra #1.0.3, NCBI:SRA:tbl:spotdesc_nocol #1.0.2 | |
| 462 { | |
| 463 // v1 declares the POSITION column for all tables | |
| 464 // but leaves all physical columns unstated | |
| 465 | |
| 466 /* POSITION | |
| 467 * 1-based coordinates | |
| 468 * describes a base's position on signal | |
| 469 */ | |
| 470 column INSDC:position:one POSITION = out_position; | |
| 471 readonly column NCBI:SRA:pos16 POSITION = out_position16; | |
| 472 | |
| 473 // zero-based coordinates available upon request | |
| 474 readonly column INSDC:position:zero POSITION | |
| 475 = ( INSDC:position:zero ) < I32 > diff < 1 > ( out_position ); | |
| 476 | |
| 477 // statistics | |
| 478 U64 base_count | |
| 479 = < U64 > meta:value < "BASE_COUNT" > (); | |
| 480 U64 spot_count | |
| 481 = < U64 > meta:value < ".seq/spot" > () | |
| 482 | < U64 > meta:value < ".seq" > () ; | |
| 483 | |
| 484 | |
| 485 /* INSDC:tbl:sequence inherited productions | |
| 486 * cs_native | |
| 487 * in_cs_key | |
| 488 * out_cs_key | |
| 489 * out_signal | |
| 490 * in_dna_text | |
| 491 * out_2cs_bin | |
| 492 * out_2na_bin | |
| 493 * out_4na_bin | |
| 494 * out_dna_text | |
| 495 * out_x2cs_bin | |
| 496 * out_x2na_bin | |
| 497 * in_color_text | |
| 498 * out_2cs_packed | |
| 499 * out_2na_packed | |
| 500 * out_4na_packed | |
| 501 * out_color_text | |
| 502 * out_qual_phred | |
| 503 * out_color_matrix | |
| 504 */ | |
| 505 | |
| 506 /* INSDC:SRA:tbl:spotname inherited productions | |
| 507 * out_x_coord | |
| 508 * out_y_coord | |
| 509 * out_name_fmt | |
| 510 * out_spot_name | |
| 511 * spot_ids_found | |
| 512 */ | |
| 513 | |
| 514 /* INSDC:SRA:tbl:spotdesc inherited productions | |
| 515 * trim_len | |
| 516 * out_label | |
| 517 * out_nreads | |
| 518 * trim_start | |
| 519 * out_read_len | |
| 520 * out_label_len | |
| 521 * out_rd_filter | |
| 522 * out_read_type | |
| 523 * out_read_start | |
| 524 * out_label_start | |
| 525 * static_fixed_spot_len | |
| 526 */ | |
| 527 | |
| 528 /* INSDC:SRA:tbl:stats inherited productions | |
| 529 * max_spot_id | |
| 530 * min_spot_id | |
| 531 * in_stats_bin | |
| 532 * bio_base_count | |
| 533 */ | |
| 534 | |
| 535 /* INSDC:SRA:tbl:sra inherited productions | |
| 536 * out_platform | |
| 537 * platform_name | |
| 538 */ | |
| 539 | |
| 540 /* NCBI:SRA:tbl:spotdesc_nocol inherited productions | |
| 541 * out_read_seg | |
| 542 * out_label_seg | |
| 543 * out_read_seg32 | |
| 544 * out_label_seg32 | |
| 545 */ | |
| 546 | |
| 547 /* NCBI:SRA:tbl:sra_nopos productions | |
| 548 * out_position | |
| 549 * out_position16 | |
| 550 */ | |
| 551 }; | |
| 552 | |
| 553 /* history: | |
| 554 * 1.0.1 - base explicitly upon sra #1.0.1 | |
| 555 * 1.0.2 - base explicitly upon sra_nopos #1.0.2, pos #1.0.1 | |
| 556 * 1.0.3 - base explicitly upon sra_nopos #1.0.3 | |
| 557 */ | |
| 558 table NCBI:SRA:tbl:sra #1.0.3 = NCBI:SRA:tbl:sra_nopos #1.0.3, NCBI:SRA:tbl:pos #1.0.1 | |
| 559 { | |
| 560 // the POSITION column is synthesized for all contemporary platforms but 454 | |
| 561 | |
| 562 /* INSDC:tbl:sequence inherited productions | |
| 563 * cs_native | |
| 564 * in_cs_key | |
| 565 * out_cs_key | |
| 566 * out_signal | |
| 567 * in_dna_text | |
| 568 * out_2cs_bin | |
| 569 * out_2na_bin | |
| 570 * out_4na_bin | |
| 571 * out_dna_text | |
| 572 * out_x2cs_bin | |
| 573 * out_x2na_bin | |
| 574 * in_color_text | |
| 575 * out_2cs_packed | |
| 576 * out_2na_packed | |
| 577 * out_4na_packed | |
| 578 * out_color_text | |
| 579 * out_qual_phred | |
| 580 * out_color_matrix | |
| 581 */ | |
| 582 | |
| 583 /* INSDC:SRA:tbl:spotname inherited productions | |
| 584 * out_x_coord | |
| 585 * out_y_coord | |
| 586 * out_name_fmt | |
| 587 * out_spot_name | |
| 588 * spot_ids_found | |
| 589 */ | |
| 590 | |
| 591 /* INSDC:SRA:tbl:spotdesc inherited productions | |
| 592 * trim_len | |
| 593 * out_label | |
| 594 * out_nreads | |
| 595 * trim_start | |
| 596 * out_read_len | |
| 597 * out_label_len | |
| 598 * out_rd_filter | |
| 599 * out_read_type | |
| 600 * out_read_start | |
| 601 * out_label_start | |
| 602 * static_fixed_spot_len | |
| 603 */ | |
| 604 | |
| 605 /* INSDC:SRA:tbl:stats inherited productions | |
| 606 * max_spot_id | |
| 607 * min_spot_id | |
| 608 * in_stats_bin | |
| 609 * bio_base_count | |
| 610 */ | |
| 611 | |
| 612 /* INSDC:SRA:tbl:sra inherited productions | |
| 613 * out_platform | |
| 614 * platform_name | |
| 615 */ | |
| 616 | |
| 617 /* NCBI:SRA:tbl:spotdesc_nocol inherited productions | |
| 618 * out_read_seg | |
| 619 * out_label_seg | |
| 620 * out_read_seg32 | |
| 621 * out_label_seg32 | |
| 622 */ | |
| 623 }; | |
| 624 | |
| 625 | |
| 626 /* v2 consolidates many of the auxiliary columns into a single treatment | |
| 627 * left out are reads, qualities and platform-specific columns | |
| 628 * | |
| 629 * history: | |
| 630 * 2.1.2 - base upon sra #1.0.3, spotdesc #1.0.2, stats #1.1.2 | |
| 631 */ | |
| 632 table NCBI:SRA:tbl:sra_nopos #2.1.3 = INSDC:SRA:tbl:sra #1.0.3, | |
| 633 NCBI:SRA:tbl:skeyname #3.0.1, NCBI:SRA:tbl:spotdesc #1.0.2, NCBI:SRA:tbl:stats #1.2.0 | |
| 634 { | |
| 635 // this is already specified in INSDC:SRA:tbl:sra #1 | |
| 636 // but putting it here will quiet down outputs | |
| 637 INSDC:SRA:platform_id out_platform = .PLATFORM; | |
| 638 | |
| 639 column INSDC:position:one POSITION | |
| 640 = out_position; | |
| 641 readonly column NCBI:SRA:pos16 POSITION | |
| 642 = cast ( _clip_position ); | |
| 643 INSDC:position:one _clip_position | |
| 644 = < INSDC:position:one > clip < 0, 0xFFFF > ( out_position ); | |
| 645 readonly column INSDC:position:zero POSITION | |
| 646 = ( INSDC:position:zero ) < I32 > diff < 1 > ( out_position ); | |
| 647 | |
| 648 | |
| 649 /* INSDC:tbl:sequence inherited productions | |
| 650 * cs_native | |
| 651 * in_cs_key | |
| 652 * out_cs_key | |
| 653 * out_signal | |
| 654 * in_dna_text | |
| 655 * out_2cs_bin | |
| 656 * out_2na_bin | |
| 657 * out_4na_bin | |
| 658 * out_dna_text | |
| 659 * out_x2cs_bin | |
| 660 * out_x2na_bin | |
| 661 * in_color_text | |
| 662 * out_2cs_packed | |
| 663 * out_2na_packed | |
| 664 * out_4na_packed | |
| 665 * out_color_text | |
| 666 * out_qual_phred | |
| 667 * out_color_matrix | |
| 668 */ | |
| 669 | |
| 670 /* INSDC:SRA:tbl:spotdesc inherited productions | |
| 671 * trim_len | |
| 672 * trim_start | |
| 673 * out_read_type | |
| 674 * static_fixed_spot_len | |
| 675 */ | |
| 676 | |
| 677 /* INSDC:SRA:tbl:stats inherited productions | |
| 678 * in_stats_bin | |
| 679 */ | |
| 680 | |
| 681 /* INSDC:SRA:tbl:sra inherited productions | |
| 682 * out_platform | |
| 683 * platform_name | |
| 684 */ | |
| 685 | |
| 686 /* NCBI:SRA:tbl:skeyname inherited productions | |
| 687 * in_spot_name_tok | |
| 688 */ | |
| 689 | |
| 690 /* NCBI:SRA:tbl:spotdesc_nocol inherited productions | |
| 691 * out_read_seg | |
| 692 * out_label_seg | |
| 693 */ | |
| 694 | |
| 695 /* NCBI:SRA:tbl:sra_nopos productions | |
| 696 * out_position | |
| 697 */ | |
| 698 }; | |
| 699 | |
| 700 /* most platforms don't have a native POSITION | |
| 701 * mix in "pos" table to synthesize it | |
| 702 * | |
| 703 * history: | |
| 704 * 2.1.2 - base upon sra#1.0.3, spotdesc #1.0.2, stats #1.1.2 | |
| 705 */ | |
| 706 table NCBI:SRA:tbl:sra #2.1.3 = INSDC:SRA:tbl:sra #1.0.3, | |
| 707 NCBI:SRA:tbl:skeyname #3.0.1, NCBI:SRA:tbl:spotdesc #1.0.2, | |
| 708 NCBI:SRA:tbl:stats #1.2.0, NCBI:SRA:tbl:pos #1.0.1 | |
| 709 { | |
| 710 readonly column INSDC:position:one POSITION | |
| 711 = out_position; | |
| 712 readonly column NCBI:SRA:pos16 POSITION | |
| 713 = out_position16; | |
| 714 readonly column INSDC:position:zero POSITION | |
| 715 = ( INSDC:position:zero ) < I32 > diff < 1 > ( out_position ); | |
| 716 | |
| 717 | |
| 718 /* INSDC:tbl:sequence inherited productions | |
| 719 * cs_native | |
| 720 * in_cs_key | |
| 721 * out_cs_key | |
| 722 * out_signal | |
| 723 * in_dna_text | |
| 724 * out_2cs_bin | |
| 725 * out_2na_bin | |
| 726 * out_4na_bin | |
| 727 * out_dna_text | |
| 728 * out_x2cs_bin | |
| 729 * out_x2na_bin | |
| 730 * in_color_text | |
| 731 * out_2cs_packed | |
| 732 * out_2na_packed | |
| 733 * out_4na_packed | |
| 734 * out_color_text | |
| 735 * out_qual_phred | |
| 736 * out_color_matrix | |
| 737 */ | |
| 738 | |
| 739 /* INSDC:SRA:tbl:spotdesc inherited productions | |
| 740 * trim_len | |
| 741 * trim_start | |
| 742 * out_read_type | |
| 743 * static_fixed_spot_len | |
| 744 */ | |
| 745 | |
| 746 /* INSDC:SRA:tbl:stats inherited productions | |
| 747 * in_stats_bin | |
| 748 */ | |
| 749 | |
| 750 /* INSDC:SRA:tbl:sra inherited productions | |
| 751 * out_platform | |
| 752 * platform_name | |
| 753 */ | |
| 754 | |
| 755 /* NCBI:SRA:tbl:skeyname inherited productions | |
| 756 * in_spot_name_tok | |
| 757 */ | |
| 758 }; |
