diff libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/insdc.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/insdc/insdc.vschema	Mon Nov 27 11:21:07 2017 -0500
@@ -0,0 +1,232 @@
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+/*==========================================================================
+ * INSDC types, constants
+ */
+version 1;
+
+
+/*--------------------------------------------------------------------------
+ * dna
+ *  represented in IUPAC characters
+ */
+typedef ascii INSDC:dna:text;
+
+
+/*--------------------------------------------------------------------------
+ * 4na
+ *  nucleotide data with all possible ambiguity
+ *  does not represent all possible EVENTS
+ *
+ *  text encodings use the IUPAC character set
+ *  legal values: [ACMGRSVTWYHKDBNacmgrsvtwyhkdbn.]
+ *  canonical values: [ACMGRSVTWYHKDBN]
+ *
+ *  binary values are 0..15 = { NACMGRSVTWYHKDBN }
+ *
+ *  4na values use bits for each letter:
+ *
+ *       A | C | G | T
+ *    =================
+ *    N    |   |   |
+ *    A  * |   |   |
+ *    C    | * |   |
+ *    M  * | * |   |
+ *    G    |   | * |
+ *    R  * |   | * |
+ *    S    | * | * |
+ *    V  * | * | * |
+ *    T    |   |   | *
+ *    W  * |   |   | *
+ *    Y    | * |   | *
+ *    H  * | * |   | *
+ *    K    |   | * | *
+ *    D  * |   | * | *
+ *    B    | * | * | *
+ *    N  * | * | * | *
+ */
+typedef	U8 INSDC:4na:bin;
+typedef	B1 INSDC:4na:packed [ 4 ];
+
+const INSDC:4na:bin INSDC:4na:map:BINSET
+    = [ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 ];
+const INSDC:dna:text INSDC:4na:map:CHARSET
+    = ".ACMGRSVTWYHKDBN";
+const INSDC:dna:text INSDC:4na:accept:CHARSET
+    = ".ACMGRSVTWYHKDBNacmgrsvtwyhkdbn";
+
+
+/*--------------------------------------------------------------------------
+ * 2na  - nucleotide data A,T,G,C
+ * x2na - nucleotide data extended with single ambiguity value (N)
+ *
+ *  text encodings use the IUPAC character set
+ *  legal values: [ACGTNacgtn.]
+ *  canonical values: [ACGTN]
+ *
+ *  x2na values are 0..4 = { ACGTN }
+ *
+ *  2na values exclude N:
+ *    A = 0
+ *    C = 1
+ *    G = 2
+ *    T = 3
+ */
+typedef U8 INSDC:2na:bin;
+typedef U8 INSDC:x2na:bin;
+typedef B1 INSDC:2na:packed [ 2 ];
+
+const INSDC:2na:bin  INSDC:2na:map:BINSET      = [ 0,1,2,3 ];
+const INSDC:dna:text INSDC:2na:map:CHARSET     = "ACGT";
+const INSDC:dna:text INSDC:2na:accept:CHARSET  = "ACGTacgt";
+const INSDC:x2na:bin INSDC:x2na:map:BINSET     = [ 0,1,2,3,4 ];
+const INSDC:dna:text INSDC:x2na:map:CHARSET    = "ACGTN";
+const INSDC:dna:text INSDC:x2na:accept:CHARSET = "ACGTNacgtn.";
+
+
+/*--------------------------------------------------------------------------
+ * color - color-space text
+ * 2cs   - color-space data 0,1,2,3
+ * x2cs  - color-space data extended with single ambiguity value (.)
+ *
+ *  text encodings use the ASCII numeric character set
+ *  values: [0123.]
+ *
+ *  x2cs values are 0..4 = { 0123. }
+ *
+ *  2cs values exclude '.':
+ *    '0' = 0
+ *    '1' = 1
+ *    '2' = 2
+ *    '3' = 3
+ */
+typedef ascii INSDC:color:text;
+typedef U8 INSDC:2cs:bin;
+typedef U8 INSDC:x2cs:bin;
+typedef B1 INSDC:2cs:packed [ 2 ];
+
+const INSDC:2cs:bin  INSDC:2cs:map:BINSET        = [ 0,1,2,3 ];
+const INSDC:color:text INSDC:2cs:map:CHARSET     = "0123";
+const INSDC:color:text INSDC:2cs:accept:CHARSET  = "0123";
+const INSDC:x2cs:bin INSDC:x2cs:map:BINSET       = [ 0,1,2,3,4 ];
+const INSDC:color:text INSDC:x2cs:map:CHARSET    = "0123.";
+const INSDC:color:text INSDC:x2cs:accept:CHARSET = "0123.";
+
+const U8 INSDC:color:default_matrix =
+[
+    0, 1, 2, 3, 4,
+    1, 0, 3, 2, 4,
+    2, 3, 0, 1, 4,
+    3, 2, 1, 0, 4,
+    4, 4, 4, 4, 4
+];
+
+
+/*--------------------------------------------------------------------------
+ * protein
+ *  represented in IUPAC characters
+ */
+typedef ascii INSDC:protein:text;
+
+
+/*--------------------------------------------------------------------------
+ * aa
+ *  protein data
+ *  text encodings use the IUPAC character set
+ */
+typedef	U8 INSDC:aa:bin;
+
+const INSDC:aa:bin INSDC:aa:map:BINSET
+= [ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27 ];
+const INSDC:protein:text INSDC:aa:map:CHARSET
+    = "ABCDEFGHIKLMNPQRSTVWXYZU*OJ";
+const INSDC:protein:text INSDC:aa:accept:CHARSET
+    = "ABCDEFGHIJKLMNOPQRSTVWXYZU*abcdefghijklmnopqrstvwxyzu";
+
+
+/*--------------------------------------------------------------------------
+ * quality
+ *  quality scoring values
+ *
+ *  phred legal values: 0..63
+ */
+typedef U8 INSDC:quality:phred;
+typedef I8 INSDC:quality:log_odds;
+
+// text-encoding of quality scores
+// offsets are 33 = '!' and 64 = '@'
+typedef ascii INSDC:quality:text:phred_33;
+typedef ascii INSDC:quality:text:phred_64;
+typedef ascii INSDC:quality:text:log_odds_64;
+
+
+/*--------------------------------------------------------------------------
+ * coordinate
+ *  zero and one based coordinates
+ */
+
+// 32 bit coordinates
+typedef I32 INSDC:coord:val;
+typedef U32 INSDC:coord:len;
+
+// zero or one based coordinate system
+typedef INSDC:coord:val INSDC:coord:zero;
+typedef INSDC:coord:val INSDC:coord:one;
+
+// POSITION types for relating bases to their location in signal
+typedef INSDC:coord:zero INSDC:position:zero;
+typedef INSDC:coord:one INSDC:position:one;
+
+// one-based coordinate limits
+const INSDC:coord:one INSDC:coord:min:one = 0x80000001;
+const INSDC:coord:one INSDC:coord:max:one = 0x3FFFFFFF;
+
+// zero-based coordinate limits
+const INSDC:coord:zero INSDC:coord:min:zero = 0x80000000;
+const INSDC:coord:zero INSDC:coord:max:zero = 0x3FFFFFFE;
+
+/*-------------------------------------------------------------------------
+ * read filters bits
+ */
+typedef U8 INSDC:SRA:read_filter;
+const INSDC:SRA:read_filter SRA_READ_FILTER_PASS = 0;
+const INSDC:SRA:read_filter SRA_READ_FILTER_REJECT = 1;
+const INSDC:SRA:read_filter SRA_READ_FILTER_CRITERIA = 2;
+const INSDC:SRA:read_filter SRA_READ_FILTER_REDACTED = 3;
+
+/*-------------------------------------------------------------------------
+ * read type bits
+ */
+typedef U8 INSDC:SRA:xread_type;
+const INSDC:SRA:xread_type SRA_READ_TYPE_TECHNICAL  = 0;
+const INSDC:SRA:xread_type SRA_READ_TYPE_BIOLOGICAL = 1;
+const INSDC:SRA:xread_type SRA_READ_TYPE_FORWARD    = 2;
+const INSDC:SRA:xread_type SRA_READ_TYPE_REVERSE    = 4;
+
+// original read-types included only technical and biological
+typedef INSDC:SRA:xread_type INSDC:SRA:read_type;
+