comparison libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/seq.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
comparison
equal deleted inserted replaced
2:0d65b71ff8df 3:38ad1130d077
1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /*==========================================================================
28 * Sequence schema
29 */
30 version 1;
31
32 include 'vdb/vdb.vschema';
33 include 'insdc/insdc.vschema';
34
35
36 /*--------------------------------------------------------------------------
37 * rand_4na_2na
38 * converts 4na to 2na
39 *
40 * substitutes a random base for ambiguities
41 * from the bases allowed in the 4na.
42 *
43 * A | C | G | T
44 * =================
45 * N | | | # any base may be substituted
46 * A * | | | # always A
47 * C | * | | # always C
48 * M * | * | | # A or C
49 * G | | * | # always G
50 * R * | | * | # A or G
51 * S | * | * | # C or G
52 * V * | * | * | # A, C or G
53 * T | | | * # always T
54 * W * | | | * # A or T
55 * Y | * | | * # C or T
56 * H * | * | | * # A, C or T
57 * K | | * | * # G or T
58 * D * | | * | * # A, G or T
59 * B | * | * | * # C, G or T
60 * N * | * | * | * # any base may be substituted
61 */
62 extern function
63 INSDC:2na:bin INSDC:SEQ:rand_4na_2na #1 ( INSDC:4na:bin rd_bin );
64
65
66 /*--------------------------------------------------------------------------
67 * sequence
68 * basic sequence table
69 *
70 * history:
71 * 1.0.1 - introduced text-mode QUALITY columns
72 */
73 table INSDC:tbl:sequence #1.0.1
74 {
75 /* READ
76 * native or converted DNA sequence
77 */
78
79 // default is IUPAC character representation
80 extern default column INSDC:dna:text READ
81 {
82 read = out_dna_text;
83 validate = < INSDC:dna:text > compare ( in_dna_text, out_dna_text );
84 }
85
86 // 4na representation - unpacked and packed
87 extern column INSDC:4na:bin READ = out_4na_bin;
88 extern column INSDC:4na:packed READ = out_4na_packed;
89
90 // x2na representation - 2na with ambiguity
91 extern column INSDC:x2na:bin READ = out_x2na_bin;
92
93 // 2na representation - 2na with no ambiguity - unpacked and packed
94 extern column INSDC:2na:bin READ = out_2na_bin;
95 extern column INSDC:2na:packed READ = out_2na_packed;
96
97
98
99 /* CSREAD
100 * native or converted color-space sequence
101 */
102
103 // default is ASCII character representation
104 extern default column INSDC:color:text CSREAD
105 {
106 read = out_color_text;
107 validate = < INSDC:color:text > compare ( in_color_text, out_color_text );
108 }
109
110 // x2cs representation - 2cs with ambiguity
111 extern column INSDC:x2cs:bin CSREAD = out_x2cs_bin;
112
113 // 2cs representation - 2cs with no ambiguity - unpacked and packed
114 extern column INSDC:2cs:bin CSREAD = out_2cs_bin;
115 extern column INSDC:2cs:packed CSREAD = out_2cs_packed;
116
117 /* CS_NATIVE
118 * is color-space the native sequence space
119 */
120 readonly column bool CS_NATIVE = cs_native;
121
122 /* CS_KEY
123 * leading call given in base-space
124 */
125 extern column INSDC:dna:text CS_KEY
126 {
127 read = out_cs_key;
128 validate = < INSDC:dna:text > compare ( in_cs_key, out_cs_key );
129 }
130
131 /* COLOR_MATRIX
132 * matrix used for color-space conversions
133 */
134 extern column U8 COLOR_MATRIX = out_color_matrix;
135
136
137 /* QUALITY
138 * base or color call qualities
139 */
140
141 // PHRED is default
142 extern default column INSDC:quality:phred QUALITY = out_qual_phred;
143
144 // textual encodings
145 extern column INSDC:quality:text:phred_33 QUALITY
146 = out_qual_text_phred_33
147 | ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred );
148 extern column INSDC:quality:text:phred_64 QUALITY
149 = out_qual_text_phred_64
150 | ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred );
151
152
153 /* SIGNAL
154 * signal and intensity information is unspecified
155 */
156 INSDC:coord:len signal_len
157 = ( INSDC:coord:len ) row_len ( out_signal )
158 | < INSDC:coord:len > echo < 0 > ();
159
160
161 /* VIRTUAL PRODUCTIONS
162 * cs_native
163 * in_cs_key
164 * out_cs_key
165 * out_signal
166 * in_dna_text
167 * out_2cs_bin
168 * out_2na_bin
169 * out_4na_bin
170 * out_dna_text
171 * out_x2cs_bin
172 * out_x2na_bin
173 * in_color_text
174 * out_2cs_packed
175 * out_2na_packed
176 * out_4na_packed
177 * out_color_text
178 * out_qual_phred
179 * out_color_matrix
180 */
181 };
182
183
184 /*--------------------------------------------------------------------------
185 * protein
186 * basic protein sequence table
187 */
188 table INSDC:tbl:protein #1
189 {
190 /* PROTEIN
191 * native or converted protein sequence
192 */
193
194 // default is IUPAC character representation
195 extern default column INSDC:protein:text PROTEIN
196 {
197 read = out_protein_text;
198 validate = < INSDC:protein:text > compare ( in_protein_text, out_protein_text );
199 }
200
201 // aa representation
202 extern column INSDC:aa:bin PROTEIN = out_aa_bin;
203
204
205 /* INSDC:tbl:protein productions
206 * out_aa_bin
207 * in_protein_text
208 * out_protein_text
209 */
210 };