Mercurial > repos > charles_s_test > seqsero2
comparison libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/varloc.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
| author | charles_s_test |
|---|---|
| date | Mon, 27 Nov 2017 11:21:07 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:0d65b71ff8df | 3:38ad1130d077 |
|---|---|
| 1 /*=========================================================================== | |
| 2 * | |
| 3 * PUBLIC DOMAIN NOTICE | |
| 4 * National Center for Biotechnology Information | |
| 5 * | |
| 6 * This software/database is a "United States Government Work" under the | |
| 7 * terms of the United States Copyright Act. It was written as part of | |
| 8 * the author's official duties as a United States Government employee and | |
| 9 * thus cannot be copyrighted. This software/database is freely available | |
| 10 * to the public for use. The National Library of Medicine and the U.S. | |
| 11 * Government have not placed any restriction on its use or reproduction. | |
| 12 * | |
| 13 * Although all reasonable efforts have been taken to ensure the accuracy | |
| 14 * and reliability of the software and data, the NLM and the U.S. | |
| 15 * Government do not and cannot warrant the performance or results that | |
| 16 * may be obtained by using this software or data. The NLM and the U.S. | |
| 17 * Government disclaim all warranties, express or implied, including | |
| 18 * warranties of performance, merchantability or fitness for any particular | |
| 19 * purpose. | |
| 20 * | |
| 21 * Please cite the author in any work or product based on this material. | |
| 22 * | |
| 23 * =========================================================================== | |
| 24 * | |
| 25 */ | |
| 26 | |
| 27 /*========================================================================== | |
| 28 * VarLoc table | |
| 29 */ | |
| 30 version 1; | |
| 31 | |
| 32 include 'vdb/vdb.vschema'; | |
| 33 include 'insdc/insdc.vschema'; | |
| 34 include 'ncbi/ncbi.vschema'; | |
| 35 | |
| 36 | |
| 37 /*-------------------------------------------------------------------------- | |
| 38 * types | |
| 39 * http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/asn_spec/Variation-inst.html | |
| 40 */ | |
| 41 typedef U8 NCBI:var:inst:type; | |
| 42 const NCBI:var:inst:type NCBI:var:inst:value:unknown = 0; | |
| 43 const NCBI:var:inst:type NCBI:var:inst:value:identity = 1; | |
| 44 const NCBI:var:inst:type NCBI:var:inst:value:inv = 2; | |
| 45 const NCBI:var:inst:type NCBI:var:inst:value:snv = 3; | |
| 46 const NCBI:var:inst:type NCBI:var:inst:value:mnp = 4; | |
| 47 const NCBI:var:inst:type NCBI:var:inst:value:delins = 5; | |
| 48 const NCBI:var:inst:type NCBI:var:inst:value:del = 6; | |
| 49 const NCBI:var:inst:type NCBI:var:inst:value:ins = 7; | |
| 50 const NCBI:var:inst:type NCBI:var:inst:value:microsatellite = 8; | |
| 51 const NCBI:var:inst:type NCBI:var:inst:value:transposon = 9; | |
| 52 const NCBI:var:inst:type NCBI:var:inst:value:cnv = 10; | |
| 53 const NCBI:var:inst:type NCBI:var:inst:value:direct_copy = 11; | |
| 54 const NCBI:var:inst:type NCBI:var:inst:value:rev_direct_copy = 12; | |
| 55 const NCBI:var:inst:type NCBI:var:inst:value:inverted_copy = 13; | |
| 56 const NCBI:var:inst:type NCBI:var:inst:value:everted_copy = 14; | |
| 57 const NCBI:var:inst:type NCBI:var:inst:value:translocation = 15; | |
| 58 const NCBI:var:inst:type NCBI:var:inst:value:prot_missense = 16; | |
| 59 const NCBI:var:inst:type NCBI:var:inst:value:prot_nonsense = 17; | |
| 60 const NCBI:var:inst:type NCBI:var:inst:value:prot_neutral = 18; | |
| 61 const NCBI:var:inst:type NCBI:var:inst:value:prot_silent = 19; | |
| 62 const NCBI:var:inst:type NCBI:var:inst:value:prot_other = 20; | |
| 63 const NCBI:var:inst:type NCBI:var:inst:value:other = 255; | |
| 64 | |
| 65 typedef U8 NCBI:var:source:type; | |
| 66 const NCBI:var:source:type NCBI:var:source:value:dbSNP = 1; | |
| 67 const NCBI:var:source:type NCBI:var:source:value:dbVar = 2; | |
| 68 const NCBI:var:source:type NCBI:var:source:value:ClinVar = 3; | |
| 69 const NCBI:var:source:type NCBI:var:source:value:other = 10; | |
| 70 | |
| 71 | |
| 72 /*-------------------------------------------------------------------------- | |
| 73 * functions | |
| 74 */ | |
| 75 | |
| 76 /* tokenize_var_id | |
| 77 * splits into 2 tokens | |
| 78 * 0 - prefix | |
| 79 * 1 - suffix | |
| 80 */ | |
| 81 extern function | |
| 82 text:token NCBI:var:tokenize_var_id #1 ( ascii var_id ); | |
| 83 | |
| 84 | |
| 85 /*-------------------------------------------------------------------------- | |
| 86 * varloc | |
| 87 * this name is questionable | |
| 88 */ | |
| 89 table NCBI:var:tbl:varloc #1 | |
| 90 { | |
| 91 /* SQL schema: | |
| 92 var_id varchar(50), | |
| 93 parent_var_id varchar(50) NULL OKAY, | |
| 94 var_type int, | |
| 95 var_source int, | |
| 96 gi int, | |
| 97 pos_from int, | |
| 98 pos_to int, | |
| 99 entrez_id int, | |
| 100 score int | |
| 101 */ | |
| 102 | |
| 103 /* VAR_ID | |
| 104 * example: "rs5852452" | |
| 105 */ | |
| 106 extern column ascii VAR_ID = out_var_id; | |
| 107 | |
| 108 // on input, separate into 3 columns | |
| 109 ascii in_var_id = VAR_ID; | |
| 110 text:token in_var_id_tok = NCBI:var:tokenize_var_id ( in_var_id ); | |
| 111 ascii in_var_id_prefix = extract_token < 0 > ( in_var_id, in_var_id_tok ); | |
| 112 ascii in_var_id_suffix_text = extract_token < 1 > ( in_var_id, in_var_id_tok ); | |
| 113 U32 in_var_id_suffix = strtonum ( in_var_id_suffix_text ); | |
| 114 | |
| 115 // prefix column | |
| 116 physical column < ascii > zip_encoding .VAR_ID_PREFIX = in_var_id_prefix; | |
| 117 physical column < U32 > izip_encoding .VAR_ID_SUFFIX_LEN = row_len ( in_var_id_suffix_text ); | |
| 118 physical column < U32 > izip_encoding .VAR_ID_SUFFIX = in_var_id_suffix; | |
| 119 | |
| 120 // on output, restore original id | |
| 121 U32 out_var_id_suffix = .VAR_ID_SUFFIX; | |
| 122 U32 out_var_id_suffix_len = .VAR_ID_SUFFIX_LEN; | |
| 123 ascii out_var_id_prefix = .VAR_ID_PREFIX; | |
| 124 ascii out_var_id = sprintf < "%s%0*u" > ( out_var_id_prefix, out_var_id_suffix_len, out_var_id_suffix ); | |
| 125 | |
| 126 /* PARENT_VAR_ID | |
| 127 * example: "rs5852452" | |
| 128 * may be EMPTY | |
| 129 */ | |
| 130 extern column ascii PARENT_VAR_ID = out_parent_var_id; | |
| 131 | |
| 132 // same treatment as VAR_ID | |
| 133 ascii in_parent_var_id = PARENT_VAR_ID; | |
| 134 text:token in_parent_var_id_tok = NCBI:var:tokenize_var_id ( in_parent_var_id ); | |
| 135 ascii in_parent_var_id_prefix = extract_token < 0 > ( in_parent_var_id, in_parent_var_id_tok ); | |
| 136 ascii in_parent_var_id_suffix_text = extract_token < 1 > ( in_parent_var_id, in_parent_var_id_tok ); | |
| 137 U32 in_parent_var_id_suffix = strtonum ( in_parent_var_id_suffix_text ); | |
| 138 physical column < ascii > zip_encoding .PARENT_VAR_ID_PREFIX = in_parent_var_id_prefix; | |
| 139 physical column < U32 > izip_encoding .PARENT_VAR_ID_SUFFIX_LEN = row_len ( in_parent_var_id_suffix_text ); | |
| 140 physical column < U32 > izip_encoding .PARENT_VAR_ID_SUFFIX = in_parent_var_id_suffix; | |
| 141 U32 out_parent_var_id_suffix = .PARENT_VAR_ID_SUFFIX; | |
| 142 U32 out_parent_var_id_suffix_len = .PARENT_VAR_ID_SUFFIX_LEN; | |
| 143 ascii out_parent_var_id_prefix = .PARENT_VAR_ID_PREFIX; | |
| 144 ascii out_parent_var_id = sprintf < "%s%.*u" > ( out_parent_var_id_prefix, out_parent_var_id_suffix_len, out_parent_var_id_suffix ); | |
| 145 | |
| 146 /* VAR_TYPE | |
| 147 */ | |
| 148 extern column < NCBI:var:inst:type > zip_encoding VAR_TYPE; | |
| 149 | |
| 150 /* VAR_SOURCE | |
| 151 */ | |
| 152 extern column < NCBI:var:source:type > zip_encoding VAR_SOURCE; | |
| 153 | |
| 154 /* GI | |
| 155 */ | |
| 156 extern column < NCBI:gi > izip_encoding GI; | |
| 157 | |
| 158 /* POS_FROM | |
| 159 * starting position | |
| 160 */ | |
| 161 extern column < INSDC:coord:zero > izip_encoding POS_FROM; | |
| 162 | |
| 163 INSDC:coord:zero in_pos_from = POS_FROM; | |
| 164 INSDC:coord:zero out_pos_from = .POS_FROM; | |
| 165 | |
| 166 /* POS_TO | |
| 167 * ending position | |
| 168 */ | |
| 169 extern column INSDC:coord:zero POS_TO = out_pos_to; | |
| 170 | |
| 171 INSDC:coord:zero in_pos_to = POS_TO; | |
| 172 INSDC:coord:len in_pos_len = ( INSDC:coord:len ) < I32 > diff < -1 > ( in_pos_to, in_pos_from ); | |
| 173 | |
| 174 physical column < INSDC:coord:len > izip_encoding .POS_LEN = in_pos_len; | |
| 175 | |
| 176 INSDC:coord:zero out_pos_len = ( INSDC:coord:zero ) .POS_LEN; | |
| 177 INSDC:coord:zero out_pos_to = < INSDC:coord:zero > sum < -1 > ( out_pos_from, out_pos_len ); | |
| 178 | |
| 179 /* ENTREZ_ID | |
| 180 * do we need this? | |
| 181 */ | |
| 182 extern column < I32 > izip_encoding ENTREZ_ID; | |
| 183 | |
| 184 /* SCORE | |
| 185 */ | |
| 186 extern column < I32 > izip_encoding SCORE; | |
| 187 }; | |
| 188 | |
| 189 table NCBI:var:tbl:hitmap #1 | |
| 190 { | |
| 191 extern column U32 MAX_SEQ_LEN; /* must be static */ | |
| 192 extern column bool_encoding HITS; /* places on the reference with variations */ | |
| 193 }; | |
| 194 | |
| 195 | |
| 196 /*-------------------------------------------------------------------------- | |
| 197 * varloc | |
| 198 * contains the varloc table and hit table | |
| 199 */ | |
| 200 database NCBI:var:db:varloc #1 | |
| 201 { | |
| 202 table NCBI:var:tbl:varloc VARLOC; | |
| 203 table NCBI:var:tbl:hitmap HITMAP; | |
| 204 }; |
