Mercurial > repos > charles_s_test > seqsero2
diff libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/seq.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/seq.vschema Mon Nov 27 11:21:07 2017 -0500 @@ -0,0 +1,210 @@ +/*=========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +*/ + +/*========================================================================== + * Sequence schema + */ +version 1; + +include 'vdb/vdb.vschema'; +include 'insdc/insdc.vschema'; + + +/*-------------------------------------------------------------------------- + * rand_4na_2na + * converts 4na to 2na + * + * substitutes a random base for ambiguities + * from the bases allowed in the 4na. + * + * A | C | G | T + * ================= + * N | | | # any base may be substituted + * A * | | | # always A + * C | * | | # always C + * M * | * | | # A or C + * G | | * | # always G + * R * | | * | # A or G + * S | * | * | # C or G + * V * | * | * | # A, C or G + * T | | | * # always T + * W * | | | * # A or T + * Y | * | | * # C or T + * H * | * | | * # A, C or T + * K | | * | * # G or T + * D * | | * | * # A, G or T + * B | * | * | * # C, G or T + * N * | * | * | * # any base may be substituted + */ +extern function + INSDC:2na:bin INSDC:SEQ:rand_4na_2na #1 ( INSDC:4na:bin rd_bin ); + + +/*-------------------------------------------------------------------------- + * sequence + * basic sequence table + * + * history: + * 1.0.1 - introduced text-mode QUALITY columns + */ +table INSDC:tbl:sequence #1.0.1 +{ + /* READ + * native or converted DNA sequence + */ + + // default is IUPAC character representation + extern default column INSDC:dna:text READ + { + read = out_dna_text; + validate = < INSDC:dna:text > compare ( in_dna_text, out_dna_text ); + } + + // 4na representation - unpacked and packed + extern column INSDC:4na:bin READ = out_4na_bin; + extern column INSDC:4na:packed READ = out_4na_packed; + + // x2na representation - 2na with ambiguity + extern column INSDC:x2na:bin READ = out_x2na_bin; + + // 2na representation - 2na with no ambiguity - unpacked and packed + extern column INSDC:2na:bin READ = out_2na_bin; + extern column INSDC:2na:packed READ = out_2na_packed; + + + + /* CSREAD + * native or converted color-space sequence + */ + + // default is ASCII character representation + extern default column INSDC:color:text CSREAD + { + read = out_color_text; + validate = < INSDC:color:text > compare ( in_color_text, out_color_text ); + } + + // x2cs representation - 2cs with ambiguity + extern column INSDC:x2cs:bin CSREAD = out_x2cs_bin; + + // 2cs representation - 2cs with no ambiguity - unpacked and packed + extern column INSDC:2cs:bin CSREAD = out_2cs_bin; + extern column INSDC:2cs:packed CSREAD = out_2cs_packed; + + /* CS_NATIVE + * is color-space the native sequence space + */ + readonly column bool CS_NATIVE = cs_native; + + /* CS_KEY + * leading call given in base-space + */ + extern column INSDC:dna:text CS_KEY + { + read = out_cs_key; + validate = < INSDC:dna:text > compare ( in_cs_key, out_cs_key ); + } + + /* COLOR_MATRIX + * matrix used for color-space conversions + */ + extern column U8 COLOR_MATRIX = out_color_matrix; + + + /* QUALITY + * base or color call qualities + */ + + // PHRED is default + extern default column INSDC:quality:phred QUALITY = out_qual_phred; + + // textual encodings + extern column INSDC:quality:text:phred_33 QUALITY + = out_qual_text_phred_33 + | ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred ); + extern column INSDC:quality:text:phred_64 QUALITY + = out_qual_text_phred_64 + | ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred ); + + + /* SIGNAL + * signal and intensity information is unspecified + */ + INSDC:coord:len signal_len + = ( INSDC:coord:len ) row_len ( out_signal ) + | < INSDC:coord:len > echo < 0 > (); + + + /* VIRTUAL PRODUCTIONS + * cs_native + * in_cs_key + * out_cs_key + * out_signal + * in_dna_text + * out_2cs_bin + * out_2na_bin + * out_4na_bin + * out_dna_text + * out_x2cs_bin + * out_x2na_bin + * in_color_text + * out_2cs_packed + * out_2na_packed + * out_4na_packed + * out_color_text + * out_qual_phred + * out_color_matrix + */ +}; + + +/*-------------------------------------------------------------------------- + * protein + * basic protein sequence table + */ +table INSDC:tbl:protein #1 +{ + /* PROTEIN + * native or converted protein sequence + */ + + // default is IUPAC character representation + extern default column INSDC:protein:text PROTEIN + { + read = out_protein_text; + validate = < INSDC:protein:text > compare ( in_protein_text, out_protein_text ); + } + + // aa representation + extern column INSDC:aa:bin PROTEIN = out_aa_bin; + + + /* INSDC:tbl:protein productions + * out_aa_bin + * in_protein_text + * out_protein_text + */ +};