Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/varloc.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:0d65b71ff8df | 3:38ad1130d077 |
---|---|
1 /*=========================================================================== | |
2 * | |
3 * PUBLIC DOMAIN NOTICE | |
4 * National Center for Biotechnology Information | |
5 * | |
6 * This software/database is a "United States Government Work" under the | |
7 * terms of the United States Copyright Act. It was written as part of | |
8 * the author's official duties as a United States Government employee and | |
9 * thus cannot be copyrighted. This software/database is freely available | |
10 * to the public for use. The National Library of Medicine and the U.S. | |
11 * Government have not placed any restriction on its use or reproduction. | |
12 * | |
13 * Although all reasonable efforts have been taken to ensure the accuracy | |
14 * and reliability of the software and data, the NLM and the U.S. | |
15 * Government do not and cannot warrant the performance or results that | |
16 * may be obtained by using this software or data. The NLM and the U.S. | |
17 * Government disclaim all warranties, express or implied, including | |
18 * warranties of performance, merchantability or fitness for any particular | |
19 * purpose. | |
20 * | |
21 * Please cite the author in any work or product based on this material. | |
22 * | |
23 * =========================================================================== | |
24 * | |
25 */ | |
26 | |
27 /*========================================================================== | |
28 * VarLoc table | |
29 */ | |
30 version 1; | |
31 | |
32 include 'vdb/vdb.vschema'; | |
33 include 'insdc/insdc.vschema'; | |
34 include 'ncbi/ncbi.vschema'; | |
35 | |
36 | |
37 /*-------------------------------------------------------------------------- | |
38 * types | |
39 * http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/asn_spec/Variation-inst.html | |
40 */ | |
41 typedef U8 NCBI:var:inst:type; | |
42 const NCBI:var:inst:type NCBI:var:inst:value:unknown = 0; | |
43 const NCBI:var:inst:type NCBI:var:inst:value:identity = 1; | |
44 const NCBI:var:inst:type NCBI:var:inst:value:inv = 2; | |
45 const NCBI:var:inst:type NCBI:var:inst:value:snv = 3; | |
46 const NCBI:var:inst:type NCBI:var:inst:value:mnp = 4; | |
47 const NCBI:var:inst:type NCBI:var:inst:value:delins = 5; | |
48 const NCBI:var:inst:type NCBI:var:inst:value:del = 6; | |
49 const NCBI:var:inst:type NCBI:var:inst:value:ins = 7; | |
50 const NCBI:var:inst:type NCBI:var:inst:value:microsatellite = 8; | |
51 const NCBI:var:inst:type NCBI:var:inst:value:transposon = 9; | |
52 const NCBI:var:inst:type NCBI:var:inst:value:cnv = 10; | |
53 const NCBI:var:inst:type NCBI:var:inst:value:direct_copy = 11; | |
54 const NCBI:var:inst:type NCBI:var:inst:value:rev_direct_copy = 12; | |
55 const NCBI:var:inst:type NCBI:var:inst:value:inverted_copy = 13; | |
56 const NCBI:var:inst:type NCBI:var:inst:value:everted_copy = 14; | |
57 const NCBI:var:inst:type NCBI:var:inst:value:translocation = 15; | |
58 const NCBI:var:inst:type NCBI:var:inst:value:prot_missense = 16; | |
59 const NCBI:var:inst:type NCBI:var:inst:value:prot_nonsense = 17; | |
60 const NCBI:var:inst:type NCBI:var:inst:value:prot_neutral = 18; | |
61 const NCBI:var:inst:type NCBI:var:inst:value:prot_silent = 19; | |
62 const NCBI:var:inst:type NCBI:var:inst:value:prot_other = 20; | |
63 const NCBI:var:inst:type NCBI:var:inst:value:other = 255; | |
64 | |
65 typedef U8 NCBI:var:source:type; | |
66 const NCBI:var:source:type NCBI:var:source:value:dbSNP = 1; | |
67 const NCBI:var:source:type NCBI:var:source:value:dbVar = 2; | |
68 const NCBI:var:source:type NCBI:var:source:value:ClinVar = 3; | |
69 const NCBI:var:source:type NCBI:var:source:value:other = 10; | |
70 | |
71 | |
72 /*-------------------------------------------------------------------------- | |
73 * functions | |
74 */ | |
75 | |
76 /* tokenize_var_id | |
77 * splits into 2 tokens | |
78 * 0 - prefix | |
79 * 1 - suffix | |
80 */ | |
81 extern function | |
82 text:token NCBI:var:tokenize_var_id #1 ( ascii var_id ); | |
83 | |
84 | |
85 /*-------------------------------------------------------------------------- | |
86 * varloc | |
87 * this name is questionable | |
88 */ | |
89 table NCBI:var:tbl:varloc #1 | |
90 { | |
91 /* SQL schema: | |
92 var_id varchar(50), | |
93 parent_var_id varchar(50) NULL OKAY, | |
94 var_type int, | |
95 var_source int, | |
96 gi int, | |
97 pos_from int, | |
98 pos_to int, | |
99 entrez_id int, | |
100 score int | |
101 */ | |
102 | |
103 /* VAR_ID | |
104 * example: "rs5852452" | |
105 */ | |
106 extern column ascii VAR_ID = out_var_id; | |
107 | |
108 // on input, separate into 3 columns | |
109 ascii in_var_id = VAR_ID; | |
110 text:token in_var_id_tok = NCBI:var:tokenize_var_id ( in_var_id ); | |
111 ascii in_var_id_prefix = extract_token < 0 > ( in_var_id, in_var_id_tok ); | |
112 ascii in_var_id_suffix_text = extract_token < 1 > ( in_var_id, in_var_id_tok ); | |
113 U32 in_var_id_suffix = strtonum ( in_var_id_suffix_text ); | |
114 | |
115 // prefix column | |
116 physical column < ascii > zip_encoding .VAR_ID_PREFIX = in_var_id_prefix; | |
117 physical column < U32 > izip_encoding .VAR_ID_SUFFIX_LEN = row_len ( in_var_id_suffix_text ); | |
118 physical column < U32 > izip_encoding .VAR_ID_SUFFIX = in_var_id_suffix; | |
119 | |
120 // on output, restore original id | |
121 U32 out_var_id_suffix = .VAR_ID_SUFFIX; | |
122 U32 out_var_id_suffix_len = .VAR_ID_SUFFIX_LEN; | |
123 ascii out_var_id_prefix = .VAR_ID_PREFIX; | |
124 ascii out_var_id = sprintf < "%s%0*u" > ( out_var_id_prefix, out_var_id_suffix_len, out_var_id_suffix ); | |
125 | |
126 /* PARENT_VAR_ID | |
127 * example: "rs5852452" | |
128 * may be EMPTY | |
129 */ | |
130 extern column ascii PARENT_VAR_ID = out_parent_var_id; | |
131 | |
132 // same treatment as VAR_ID | |
133 ascii in_parent_var_id = PARENT_VAR_ID; | |
134 text:token in_parent_var_id_tok = NCBI:var:tokenize_var_id ( in_parent_var_id ); | |
135 ascii in_parent_var_id_prefix = extract_token < 0 > ( in_parent_var_id, in_parent_var_id_tok ); | |
136 ascii in_parent_var_id_suffix_text = extract_token < 1 > ( in_parent_var_id, in_parent_var_id_tok ); | |
137 U32 in_parent_var_id_suffix = strtonum ( in_parent_var_id_suffix_text ); | |
138 physical column < ascii > zip_encoding .PARENT_VAR_ID_PREFIX = in_parent_var_id_prefix; | |
139 physical column < U32 > izip_encoding .PARENT_VAR_ID_SUFFIX_LEN = row_len ( in_parent_var_id_suffix_text ); | |
140 physical column < U32 > izip_encoding .PARENT_VAR_ID_SUFFIX = in_parent_var_id_suffix; | |
141 U32 out_parent_var_id_suffix = .PARENT_VAR_ID_SUFFIX; | |
142 U32 out_parent_var_id_suffix_len = .PARENT_VAR_ID_SUFFIX_LEN; | |
143 ascii out_parent_var_id_prefix = .PARENT_VAR_ID_PREFIX; | |
144 ascii out_parent_var_id = sprintf < "%s%.*u" > ( out_parent_var_id_prefix, out_parent_var_id_suffix_len, out_parent_var_id_suffix ); | |
145 | |
146 /* VAR_TYPE | |
147 */ | |
148 extern column < NCBI:var:inst:type > zip_encoding VAR_TYPE; | |
149 | |
150 /* VAR_SOURCE | |
151 */ | |
152 extern column < NCBI:var:source:type > zip_encoding VAR_SOURCE; | |
153 | |
154 /* GI | |
155 */ | |
156 extern column < NCBI:gi > izip_encoding GI; | |
157 | |
158 /* POS_FROM | |
159 * starting position | |
160 */ | |
161 extern column < INSDC:coord:zero > izip_encoding POS_FROM; | |
162 | |
163 INSDC:coord:zero in_pos_from = POS_FROM; | |
164 INSDC:coord:zero out_pos_from = .POS_FROM; | |
165 | |
166 /* POS_TO | |
167 * ending position | |
168 */ | |
169 extern column INSDC:coord:zero POS_TO = out_pos_to; | |
170 | |
171 INSDC:coord:zero in_pos_to = POS_TO; | |
172 INSDC:coord:len in_pos_len = ( INSDC:coord:len ) < I32 > diff < -1 > ( in_pos_to, in_pos_from ); | |
173 | |
174 physical column < INSDC:coord:len > izip_encoding .POS_LEN = in_pos_len; | |
175 | |
176 INSDC:coord:zero out_pos_len = ( INSDC:coord:zero ) .POS_LEN; | |
177 INSDC:coord:zero out_pos_to = < INSDC:coord:zero > sum < -1 > ( out_pos_from, out_pos_len ); | |
178 | |
179 /* ENTREZ_ID | |
180 * do we need this? | |
181 */ | |
182 extern column < I32 > izip_encoding ENTREZ_ID; | |
183 | |
184 /* SCORE | |
185 */ | |
186 extern column < I32 > izip_encoding SCORE; | |
187 }; | |
188 | |
189 table NCBI:var:tbl:hitmap #1 | |
190 { | |
191 extern column U32 MAX_SEQ_LEN; /* must be static */ | |
192 extern column bool_encoding HITS; /* places on the reference with variations */ | |
193 }; | |
194 | |
195 | |
196 /*-------------------------------------------------------------------------- | |
197 * varloc | |
198 * contains the varloc table and hit table | |
199 */ | |
200 database NCBI:var:db:varloc #1 | |
201 { | |
202 table NCBI:var:tbl:varloc VARLOC; | |
203 table NCBI:var:tbl:hitmap HITMAP; | |
204 }; |