comparison libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/varloc.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
comparison
equal deleted inserted replaced
2:0d65b71ff8df 3:38ad1130d077
1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /*==========================================================================
28 * VarLoc table
29 */
30 version 1;
31
32 include 'vdb/vdb.vschema';
33 include 'insdc/insdc.vschema';
34 include 'ncbi/ncbi.vschema';
35
36
37 /*--------------------------------------------------------------------------
38 * types
39 * http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/asn_spec/Variation-inst.html
40 */
41 typedef U8 NCBI:var:inst:type;
42 const NCBI:var:inst:type NCBI:var:inst:value:unknown = 0;
43 const NCBI:var:inst:type NCBI:var:inst:value:identity = 1;
44 const NCBI:var:inst:type NCBI:var:inst:value:inv = 2;
45 const NCBI:var:inst:type NCBI:var:inst:value:snv = 3;
46 const NCBI:var:inst:type NCBI:var:inst:value:mnp = 4;
47 const NCBI:var:inst:type NCBI:var:inst:value:delins = 5;
48 const NCBI:var:inst:type NCBI:var:inst:value:del = 6;
49 const NCBI:var:inst:type NCBI:var:inst:value:ins = 7;
50 const NCBI:var:inst:type NCBI:var:inst:value:microsatellite = 8;
51 const NCBI:var:inst:type NCBI:var:inst:value:transposon = 9;
52 const NCBI:var:inst:type NCBI:var:inst:value:cnv = 10;
53 const NCBI:var:inst:type NCBI:var:inst:value:direct_copy = 11;
54 const NCBI:var:inst:type NCBI:var:inst:value:rev_direct_copy = 12;
55 const NCBI:var:inst:type NCBI:var:inst:value:inverted_copy = 13;
56 const NCBI:var:inst:type NCBI:var:inst:value:everted_copy = 14;
57 const NCBI:var:inst:type NCBI:var:inst:value:translocation = 15;
58 const NCBI:var:inst:type NCBI:var:inst:value:prot_missense = 16;
59 const NCBI:var:inst:type NCBI:var:inst:value:prot_nonsense = 17;
60 const NCBI:var:inst:type NCBI:var:inst:value:prot_neutral = 18;
61 const NCBI:var:inst:type NCBI:var:inst:value:prot_silent = 19;
62 const NCBI:var:inst:type NCBI:var:inst:value:prot_other = 20;
63 const NCBI:var:inst:type NCBI:var:inst:value:other = 255;
64
65 typedef U8 NCBI:var:source:type;
66 const NCBI:var:source:type NCBI:var:source:value:dbSNP = 1;
67 const NCBI:var:source:type NCBI:var:source:value:dbVar = 2;
68 const NCBI:var:source:type NCBI:var:source:value:ClinVar = 3;
69 const NCBI:var:source:type NCBI:var:source:value:other = 10;
70
71
72 /*--------------------------------------------------------------------------
73 * functions
74 */
75
76 /* tokenize_var_id
77 * splits into 2 tokens
78 * 0 - prefix
79 * 1 - suffix
80 */
81 extern function
82 text:token NCBI:var:tokenize_var_id #1 ( ascii var_id );
83
84
85 /*--------------------------------------------------------------------------
86 * varloc
87 * this name is questionable
88 */
89 table NCBI:var:tbl:varloc #1
90 {
91 /* SQL schema:
92 var_id varchar(50),
93 parent_var_id varchar(50) NULL OKAY,
94 var_type int,
95 var_source int,
96 gi int,
97 pos_from int,
98 pos_to int,
99 entrez_id int,
100 score int
101 */
102
103 /* VAR_ID
104 * example: "rs5852452"
105 */
106 extern column ascii VAR_ID = out_var_id;
107
108 // on input, separate into 3 columns
109 ascii in_var_id = VAR_ID;
110 text:token in_var_id_tok = NCBI:var:tokenize_var_id ( in_var_id );
111 ascii in_var_id_prefix = extract_token < 0 > ( in_var_id, in_var_id_tok );
112 ascii in_var_id_suffix_text = extract_token < 1 > ( in_var_id, in_var_id_tok );
113 U32 in_var_id_suffix = strtonum ( in_var_id_suffix_text );
114
115 // prefix column
116 physical column < ascii > zip_encoding .VAR_ID_PREFIX = in_var_id_prefix;
117 physical column < U32 > izip_encoding .VAR_ID_SUFFIX_LEN = row_len ( in_var_id_suffix_text );
118 physical column < U32 > izip_encoding .VAR_ID_SUFFIX = in_var_id_suffix;
119
120 // on output, restore original id
121 U32 out_var_id_suffix = .VAR_ID_SUFFIX;
122 U32 out_var_id_suffix_len = .VAR_ID_SUFFIX_LEN;
123 ascii out_var_id_prefix = .VAR_ID_PREFIX;
124 ascii out_var_id = sprintf < "%s%0*u" > ( out_var_id_prefix, out_var_id_suffix_len, out_var_id_suffix );
125
126 /* PARENT_VAR_ID
127 * example: "rs5852452"
128 * may be EMPTY
129 */
130 extern column ascii PARENT_VAR_ID = out_parent_var_id;
131
132 // same treatment as VAR_ID
133 ascii in_parent_var_id = PARENT_VAR_ID;
134 text:token in_parent_var_id_tok = NCBI:var:tokenize_var_id ( in_parent_var_id );
135 ascii in_parent_var_id_prefix = extract_token < 0 > ( in_parent_var_id, in_parent_var_id_tok );
136 ascii in_parent_var_id_suffix_text = extract_token < 1 > ( in_parent_var_id, in_parent_var_id_tok );
137 U32 in_parent_var_id_suffix = strtonum ( in_parent_var_id_suffix_text );
138 physical column < ascii > zip_encoding .PARENT_VAR_ID_PREFIX = in_parent_var_id_prefix;
139 physical column < U32 > izip_encoding .PARENT_VAR_ID_SUFFIX_LEN = row_len ( in_parent_var_id_suffix_text );
140 physical column < U32 > izip_encoding .PARENT_VAR_ID_SUFFIX = in_parent_var_id_suffix;
141 U32 out_parent_var_id_suffix = .PARENT_VAR_ID_SUFFIX;
142 U32 out_parent_var_id_suffix_len = .PARENT_VAR_ID_SUFFIX_LEN;
143 ascii out_parent_var_id_prefix = .PARENT_VAR_ID_PREFIX;
144 ascii out_parent_var_id = sprintf < "%s%.*u" > ( out_parent_var_id_prefix, out_parent_var_id_suffix_len, out_parent_var_id_suffix );
145
146 /* VAR_TYPE
147 */
148 extern column < NCBI:var:inst:type > zip_encoding VAR_TYPE;
149
150 /* VAR_SOURCE
151 */
152 extern column < NCBI:var:source:type > zip_encoding VAR_SOURCE;
153
154 /* GI
155 */
156 extern column < NCBI:gi > izip_encoding GI;
157
158 /* POS_FROM
159 * starting position
160 */
161 extern column < INSDC:coord:zero > izip_encoding POS_FROM;
162
163 INSDC:coord:zero in_pos_from = POS_FROM;
164 INSDC:coord:zero out_pos_from = .POS_FROM;
165
166 /* POS_TO
167 * ending position
168 */
169 extern column INSDC:coord:zero POS_TO = out_pos_to;
170
171 INSDC:coord:zero in_pos_to = POS_TO;
172 INSDC:coord:len in_pos_len = ( INSDC:coord:len ) < I32 > diff < -1 > ( in_pos_to, in_pos_from );
173
174 physical column < INSDC:coord:len > izip_encoding .POS_LEN = in_pos_len;
175
176 INSDC:coord:zero out_pos_len = ( INSDC:coord:zero ) .POS_LEN;
177 INSDC:coord:zero out_pos_to = < INSDC:coord:zero > sum < -1 > ( out_pos_from, out_pos_len );
178
179 /* ENTREZ_ID
180 * do we need this?
181 */
182 extern column < I32 > izip_encoding ENTREZ_ID;
183
184 /* SCORE
185 */
186 extern column < I32 > izip_encoding SCORE;
187 };
188
189 table NCBI:var:tbl:hitmap #1
190 {
191 extern column U32 MAX_SEQ_LEN; /* must be static */
192 extern column bool_encoding HITS; /* places on the reference with variations */
193 };
194
195
196 /*--------------------------------------------------------------------------
197 * varloc
198 * contains the varloc table and hit table
199 */
200 database NCBI:var:db:varloc #1
201 {
202 table NCBI:var:tbl:varloc VARLOC;
203 table NCBI:var:tbl:hitmap HITMAP;
204 };