Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/seq.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
| author | charles_s_test |
|---|---|
| date | Mon, 27 Nov 2017 11:21:07 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:0d65b71ff8df | 3:38ad1130d077 |
|---|---|
| 1 /*=========================================================================== | |
| 2 * | |
| 3 * PUBLIC DOMAIN NOTICE | |
| 4 * National Center for Biotechnology Information | |
| 5 * | |
| 6 * This software/database is a "United States Government Work" under the | |
| 7 * terms of the United States Copyright Act. It was written as part of | |
| 8 * the author's official duties as a United States Government employee and | |
| 9 * thus cannot be copyrighted. This software/database is freely available | |
| 10 * to the public for use. The National Library of Medicine and the U.S. | |
| 11 * Government have not placed any restriction on its use or reproduction. | |
| 12 * | |
| 13 * Although all reasonable efforts have been taken to ensure the accuracy | |
| 14 * and reliability of the software and data, the NLM and the U.S. | |
| 15 * Government do not and cannot warrant the performance or results that | |
| 16 * may be obtained by using this software or data. The NLM and the U.S. | |
| 17 * Government disclaim all warranties, express or implied, including | |
| 18 * warranties of performance, merchantability or fitness for any particular | |
| 19 * purpose. | |
| 20 * | |
| 21 * Please cite the author in any work or product based on this material. | |
| 22 * | |
| 23 * =========================================================================== | |
| 24 * | |
| 25 */ | |
| 26 | |
| 27 /*========================================================================== | |
| 28 * Sequence schema | |
| 29 */ | |
| 30 version 1; | |
| 31 | |
| 32 include 'vdb/vdb.vschema'; | |
| 33 include 'insdc/insdc.vschema'; | |
| 34 | |
| 35 | |
| 36 /*-------------------------------------------------------------------------- | |
| 37 * rand_4na_2na | |
| 38 * converts 4na to 2na | |
| 39 * | |
| 40 * substitutes a random base for ambiguities | |
| 41 * from the bases allowed in the 4na. | |
| 42 * | |
| 43 * A | C | G | T | |
| 44 * ================= | |
| 45 * N | | | # any base may be substituted | |
| 46 * A * | | | # always A | |
| 47 * C | * | | # always C | |
| 48 * M * | * | | # A or C | |
| 49 * G | | * | # always G | |
| 50 * R * | | * | # A or G | |
| 51 * S | * | * | # C or G | |
| 52 * V * | * | * | # A, C or G | |
| 53 * T | | | * # always T | |
| 54 * W * | | | * # A or T | |
| 55 * Y | * | | * # C or T | |
| 56 * H * | * | | * # A, C or T | |
| 57 * K | | * | * # G or T | |
| 58 * D * | | * | * # A, G or T | |
| 59 * B | * | * | * # C, G or T | |
| 60 * N * | * | * | * # any base may be substituted | |
| 61 */ | |
| 62 extern function | |
| 63 INSDC:2na:bin INSDC:SEQ:rand_4na_2na #1 ( INSDC:4na:bin rd_bin ); | |
| 64 | |
| 65 | |
| 66 /*-------------------------------------------------------------------------- | |
| 67 * sequence | |
| 68 * basic sequence table | |
| 69 * | |
| 70 * history: | |
| 71 * 1.0.1 - introduced text-mode QUALITY columns | |
| 72 */ | |
| 73 table INSDC:tbl:sequence #1.0.1 | |
| 74 { | |
| 75 /* READ | |
| 76 * native or converted DNA sequence | |
| 77 */ | |
| 78 | |
| 79 // default is IUPAC character representation | |
| 80 extern default column INSDC:dna:text READ | |
| 81 { | |
| 82 read = out_dna_text; | |
| 83 validate = < INSDC:dna:text > compare ( in_dna_text, out_dna_text ); | |
| 84 } | |
| 85 | |
| 86 // 4na representation - unpacked and packed | |
| 87 extern column INSDC:4na:bin READ = out_4na_bin; | |
| 88 extern column INSDC:4na:packed READ = out_4na_packed; | |
| 89 | |
| 90 // x2na representation - 2na with ambiguity | |
| 91 extern column INSDC:x2na:bin READ = out_x2na_bin; | |
| 92 | |
| 93 // 2na representation - 2na with no ambiguity - unpacked and packed | |
| 94 extern column INSDC:2na:bin READ = out_2na_bin; | |
| 95 extern column INSDC:2na:packed READ = out_2na_packed; | |
| 96 | |
| 97 | |
| 98 | |
| 99 /* CSREAD | |
| 100 * native or converted color-space sequence | |
| 101 */ | |
| 102 | |
| 103 // default is ASCII character representation | |
| 104 extern default column INSDC:color:text CSREAD | |
| 105 { | |
| 106 read = out_color_text; | |
| 107 validate = < INSDC:color:text > compare ( in_color_text, out_color_text ); | |
| 108 } | |
| 109 | |
| 110 // x2cs representation - 2cs with ambiguity | |
| 111 extern column INSDC:x2cs:bin CSREAD = out_x2cs_bin; | |
| 112 | |
| 113 // 2cs representation - 2cs with no ambiguity - unpacked and packed | |
| 114 extern column INSDC:2cs:bin CSREAD = out_2cs_bin; | |
| 115 extern column INSDC:2cs:packed CSREAD = out_2cs_packed; | |
| 116 | |
| 117 /* CS_NATIVE | |
| 118 * is color-space the native sequence space | |
| 119 */ | |
| 120 readonly column bool CS_NATIVE = cs_native; | |
| 121 | |
| 122 /* CS_KEY | |
| 123 * leading call given in base-space | |
| 124 */ | |
| 125 extern column INSDC:dna:text CS_KEY | |
| 126 { | |
| 127 read = out_cs_key; | |
| 128 validate = < INSDC:dna:text > compare ( in_cs_key, out_cs_key ); | |
| 129 } | |
| 130 | |
| 131 /* COLOR_MATRIX | |
| 132 * matrix used for color-space conversions | |
| 133 */ | |
| 134 extern column U8 COLOR_MATRIX = out_color_matrix; | |
| 135 | |
| 136 | |
| 137 /* QUALITY | |
| 138 * base or color call qualities | |
| 139 */ | |
| 140 | |
| 141 // PHRED is default | |
| 142 extern default column INSDC:quality:phred QUALITY = out_qual_phred; | |
| 143 | |
| 144 // textual encodings | |
| 145 extern column INSDC:quality:text:phred_33 QUALITY | |
| 146 = out_qual_text_phred_33 | |
| 147 | ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred ); | |
| 148 extern column INSDC:quality:text:phred_64 QUALITY | |
| 149 = out_qual_text_phred_64 | |
| 150 | ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred ); | |
| 151 | |
| 152 | |
| 153 /* SIGNAL | |
| 154 * signal and intensity information is unspecified | |
| 155 */ | |
| 156 INSDC:coord:len signal_len | |
| 157 = ( INSDC:coord:len ) row_len ( out_signal ) | |
| 158 | < INSDC:coord:len > echo < 0 > (); | |
| 159 | |
| 160 | |
| 161 /* VIRTUAL PRODUCTIONS | |
| 162 * cs_native | |
| 163 * in_cs_key | |
| 164 * out_cs_key | |
| 165 * out_signal | |
| 166 * in_dna_text | |
| 167 * out_2cs_bin | |
| 168 * out_2na_bin | |
| 169 * out_4na_bin | |
| 170 * out_dna_text | |
| 171 * out_x2cs_bin | |
| 172 * out_x2na_bin | |
| 173 * in_color_text | |
| 174 * out_2cs_packed | |
| 175 * out_2na_packed | |
| 176 * out_4na_packed | |
| 177 * out_color_text | |
| 178 * out_qual_phred | |
| 179 * out_color_matrix | |
| 180 */ | |
| 181 }; | |
| 182 | |
| 183 | |
| 184 /*-------------------------------------------------------------------------- | |
| 185 * protein | |
| 186 * basic protein sequence table | |
| 187 */ | |
| 188 table INSDC:tbl:protein #1 | |
| 189 { | |
| 190 /* PROTEIN | |
| 191 * native or converted protein sequence | |
| 192 */ | |
| 193 | |
| 194 // default is IUPAC character representation | |
| 195 extern default column INSDC:protein:text PROTEIN | |
| 196 { | |
| 197 read = out_protein_text; | |
| 198 validate = < INSDC:protein:text > compare ( in_protein_text, out_protein_text ); | |
| 199 } | |
| 200 | |
| 201 // aa representation | |
| 202 extern column INSDC:aa:bin PROTEIN = out_aa_bin; | |
| 203 | |
| 204 | |
| 205 /* INSDC:tbl:protein productions | |
| 206 * out_aa_bin | |
| 207 * in_protein_text | |
| 208 * out_protein_text | |
| 209 */ | |
| 210 }; |
