diff libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/seq.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/seq.vschema	Mon Nov 27 11:21:07 2017 -0500
@@ -0,0 +1,894 @@
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+/*==========================================================================
+ * Sequence schema implementation tables
+ */
+version 1;
+
+include 'vdb/vdb.vschema';
+include 'ncbi/ncbi.vschema';
+include 'insdc/sra.vschema';
+
+
+/*--------------------------------------------------------------------------
+ * n_encoding - implementation
+ *  introduces common virtual productions
+ */
+table NCBI:tbl:n_encoding #1
+{
+    U8 n_encoding_dummy
+        = read_unpack
+        | read_ndecode;
+};
+
+
+/*--------------------------------------------------------------------------
+ * seqloc
+ *  NCBI sequence locator table
+ */
+table NCBI:tbl:seqloc #1.0
+{
+    /* SEQ_ID
+     *  a FASTA-style SeqId
+     */
+    extern column < ascii > zip_encoding SEQ_ID;
+
+    /* SEQ_START
+     *  provided in both 1 ( default ) and 0-based coordinates
+     */
+    extern default column < INSDC:coord:one > izip_encoding SEQ_START;
+    readonly column INSDC:coord:zero SEQ_START
+        = ( INSDC:coord:zero ) < INSDC:coord:one > diff < 1 > ( .SEQ_START );
+
+    /* SEQ_LEN
+     */
+    extern column < INSDC:coord:len > izip_encoding SEQ_LEN;
+};
+
+
+/*--------------------------------------------------------------------------
+ * base_space - implementation
+ *  READ column rules
+ */
+
+/* color_from_dna
+ *  use starting keys and color matrix to convert individual reads
+ *  to base space.
+ */
+extern function
+INSDC:x2cs:bin NCBI:color_from_dna #1 ( INSDC:x2na:bin bin_x2na,
+    INSDC:coord:zero read_start, INSDC:coord:len read_len,
+    INSDC:dna:text cs_key, U8 color_matrix );
+
+
+/* dcmp_base_space
+ *  table to introduce common virtual productions
+ */
+table NCBI:tbl:dcmp_base_space #1
+{
+    // rules to introduce purely virtual productions
+    // never expected to resolve...
+    INSDC:dna:text dcmp_virtual_productions
+        = out_dcmp_4na_bin
+        | out_dcmp_x2na_bin
+        | out_dcmp_2na_bin
+        | out_dcmp_2na_packed;
+}
+
+/* history:
+ *  1.0.1 - base explicitly upon sequence #1.0.1, spotdesc #1.0.1
+ *  1.0.2 - spotdesc #1.0.2
+ *  1.0.3 - base upon dcmp_base_space for "out_dcmp_2na_bin"
+ */
+table NCBI:tbl:base_space_common #1.0.3
+    = INSDC:tbl:sequence #1.0.1
+    , INSDC:SRA:tbl:spotdesc #1.0.2
+    , INSDC:SRA:tbl:stats #1.1.0
+    , NCBI:tbl:dcmp_base_space #1.0.0
+{
+	/* INSDC:tbl:sequence inherited virtual productions
+     */
+
+    // cs_native - tells user color space is not native
+    bool cs_native = < bool > echo < false > ();
+
+    // in_cs_key is not writable in base_space
+
+    // color-space key is completely artificial
+    INSDC:dna:text out_cs_key
+        = .CS_KEY
+        | < INSDC:dna:text > echo < 'T' > ( out_read_type )
+        | < INSDC:dna:text > echo < 'T' > ( out_read_len )
+        | < INSDC:dna:text > echo < 'T' > ();
+
+    // unambiguous synthesized 2cs
+    INSDC:2cs:bin out_2cs_bin
+        = < INSDC:x2cs:bin, INSDC:2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 1, 2, 3, 0 ] > ( out_x2cs_bin );
+
+    // unambiguous unpacked 2na
+    INSDC:2na:bin out_2na_bin
+        = out_dcmp_2na_bin
+        | ( INSDC:2na:bin ) unpack ( out_2na_packed );
+
+    // synthesized color sequence
+    INSDC:x2cs:bin out_x2cs_bin
+        = NCBI:color_from_dna ( out_x2na_bin, out_read_start, out_read_len, out_cs_key, out_color_matrix );
+
+    // synthesized packed 2cs
+    INSDC:2cs:packed out_2cs_packed
+        = ( INSDC:2cs:packed ) pack ( out_2cs_bin );
+
+    // synthesized packed 4na
+    INSDC:4na:packed out_4na_packed
+        = ( INSDC:4na:packed ) pack ( out_4na_bin );
+
+    // synthesized color text
+    INSDC:color:text out_color_text
+        = < INSDC:x2cs:bin, INSDC:color:text > map <  INSDC:x2cs:map:BINSET, INSDC:x2cs:map:CHARSET > ( out_x2cs_bin );
+
+    // published color matrix
+    U8 out_color_matrix
+        = < U8 > echo < INSDC:color:default_matrix > ();
+
+    // spot_len and fixed_spot_len
+    INSDC:coord:len base_space_spot_len
+        = ( INSDC:coord:len ) row_len ( out_2na_packed );
+    INSDC:coord:len base_space_fixed_spot_len
+        = ( INSDC:coord:len ) fixed_row_len ( out_2na_packed );
+
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_signal
+	 *  in_dna_text
+	 *  out_4na_bin
+	 *  out_dna_text
+	 *  out_x2na_bin
+	 *  out_2na_packed
+	 */
+
+	/* INSDC:SRA:tbl:stats inherited productions
+	 *  in_stats_bin
+	 */
+
+	/* NCBI:tbl:dcmp_base_space inherited productions
+	 *  out_dcmp_2na_bin
+	 *  out_dcmp_4na_bin
+	 *  out_dcmp_x2na_bin
+	 *  out_dcmp_2na_packed
+	 */
+};
+
+
+/* base_space_nocol
+ *  this table describes viewing rules
+ *  but omits writing rules and physical column description
+ *  in order to support older tables
+ *
+ * history:
+ *  1.0.1 - base explicitly upon base_space_common #1.0.1
+ *  1.0.2 - base explicitly upon base_space_common #1.0.2
+ *  1.0.3 - " " 1.0.3
+ */
+table NCBI:tbl:base_space_nocol #1.0.3
+    = NCBI:tbl:base_space_common #1.0.3
+    , NCBI:tbl:n_encoding #1
+{
+    // incoming is disabled
+
+    // synthesized dna text
+    INSDC:dna:text out_dna_text
+        = < INSDC:x2na:bin, INSDC:dna:text > map < INSDC:x2na:map:BINSET, INSDC:x2na:map:CHARSET > ( out_x2na_bin );
+
+    // synthesized 4na
+    INSDC:4na:bin out_4na_bin
+        = < INSDC:x2na:bin, INSDC:4na:bin > map < INSDC:x2na:map:BINSET, [ 1, 2, 4, 8, 15 ] > ( out_x2na_bin );
+
+    // unpacked 2na with ambiguities
+    INSDC:x2na:bin out_x2na_bin
+        = ( INSDC:x2na:bin ) read_ndecode;
+
+    // interface with n-encoded qualities
+    U8 read_unpack = out_2na_bin;
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_signal
+	 *  out_2na_packed
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_ndecode
+	 */
+};
+
+/* base_space #1
+ *  this schema brings in standard .READ column for v1 tables
+ *
+ * history:
+ *  1.0.1 - base explicitly upon base_space_nocol #1.0.1
+ *  1.0.2 - base explicitly upon base_space_nocol #1.0.2
+ *  1.0.3 - base explicitly upon base_space_nocol #1.0.3
+ */
+table NCBI:tbl:base_space #1.0.3 = NCBI:tbl:base_space_nocol #1.0.3
+{
+    // 2-bit 2na representation (0..3)
+    INSDC:2na:packed out_2na_packed = .READ;
+
+    // no rules for writing to .READ
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_signal
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_ndecode
+	 */
+};
+
+
+/* base_space #2
+ *  standard current base-space table
+ *
+ * history:
+ *  2.0.2 - base_space_common #1.0.2
+ *  2.0.3 - base_space_common #1.0.3 now has dcmp_base_space as well
+ */
+table NCBI:tbl:base_space #2.0.3
+    = NCBI:tbl:base_space_common #1.0.3
+    , NCBI:tbl:dcmp_base_space #1
+{
+    /* input rules
+     */
+
+    // input text
+    INSDC:dna:text in_dna_text
+        = < INSDC:dna:text, INSDC:dna:text > map < '.acmgrsvtwyhkdbn','NACMGRSVTWYHKDBN' > ( READ );
+
+    // input 4na bin
+    INSDC:4na:bin in_4na_bin
+        = < INSDC:4na:bin > range_validate < 0, 15 > ( READ )
+        | ( INSDC:4na:bin ) unpack ( in_4na_packed )
+        | < INSDC:dna:text, INSDC:4na:bin > map < INSDC:4na:map:CHARSET, INSDC:4na:map:BINSET > ( in_dna_text )
+        | < INSDC:x2na:bin, INSDC:4na:bin > map < INSDC:x2na:map:BINSET, [ 1, 2, 4, 8, 15 ] > ( in_x2na_bin );
+
+    // input 4na packed
+    INSDC:4na:packed in_4na_packed = READ;
+
+    // input x2na bin
+    INSDC:x2na:bin in_x2na_bin
+        = < INSDC:x2na:bin > range_validate < 0, 4 > ( READ )
+        | < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( in_4na_bin );
+
+    // input 2na bin
+    INSDC:2na:bin in_2na_bin
+        = < INSDC:2na:bin > range_validate < 0, 3 > ( READ )
+        | ( INSDC:2na:bin ) unpack ( in_2na_packed )
+        | INSDC:SEQ:rand_4na_2na ( in_4na_bin );
+
+    // input 2na packed
+    INSDC:2na:packed in_2na_packed = READ;
+
+    // input 4na alt-read ( ambiguities )
+    INSDC:4na:bin in_alt_4na_bin
+        = < INSDC:4na:bin, INSDC:4na:bin > map < INSDC:4na:map:BINSET, [ 15,0,0,3,0,5,6,7,0,9,10,11,12,13,14,15 ] > ( in_4na_bin );
+
+    // preparing a feed into stats column
+    U8 in_stats_bin = in_2na_bin;
+
+
+    /* physical columns
+     */
+
+    physical column INSDC:2na:packed .READ
+        = in_2na_packed
+        | ( INSDC:2na:packed ) pack ( in_2na_bin );
+
+    physical column < INSDC:4na:bin > zip_encoding .ALTREAD
+        = < INSDC:4na:bin > trim < 0, 0 > ( in_alt_4na_bin );
+
+
+    /* output rules
+     */
+
+    // output 2na packed
+    INSDC:2na:packed out_2na_packed
+        = .READ
+        | out_dcmp_2na_packed;
+
+    // output x2na bin
+    INSDC:x2na:bin out_x2na_bin
+        = out_dcmp_x2na_bin
+        | < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_4na_bin );
+
+    // output 2na->4na bin
+    INSDC:4na:bin out_2na_4na_bin
+        = < INSDC:2na:bin, INSDC:4na:bin > map < INSDC:2na:map:BINSET, [ 1, 2, 4, 8 ] > ( out_2na_bin );
+
+    // output 4na bin
+    INSDC:4na:bin out_4na_bin
+        = < INSDC:4na:bin > bit_or < ALIGN_RIGHT > ( out_2na_4na_bin, .ALTREAD )
+        | out_dcmp_4na_bin
+        | out_2na_4na_bin;
+
+    // output text
+    INSDC:dna:text out_dna_text
+        = < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_4na_bin );
+
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_signal
+	 */
+
+	/* NCBI:tbl:dcmp_base_space inherited productions
+	 *  out_dcmp_2na_bin
+	 *  out_dcmp_4na_bin
+	 *  out_dcmp_x2na_bin
+	 *  out_dcmp_2na_packed
+	 */
+};
+
+
+
+
+/*--------------------------------------------------------------------------
+ * color_space - implementation
+ *  nucleotide sequences in color space
+ */
+
+extern function
+INSDC:x2na:bin NCBI:dna_from_color #1 ( INSDC:x2cs:bin color_bin,
+     INSDC:coord:zero read_start, INSDC:coord:len read_len,
+     INSDC:dna:text cs_key, U8 color_matrix );
+
+
+/* dcmp_color_space
+ *  declares common virtual productions
+ */
+table NCBI:tbl:dcmp_color_space #1
+{
+    // rules to introduce purely virtual productions
+    // never expected to resolve...
+    INSDC:dna:text dcmp_virtual_productions
+        = out_dcmp_x2cs_bin
+        | out_dcmp_2cs_bin
+        | out_dcmp_2cs_packed;
+}
+
+/* history:
+ *  1.0.1 - base explicitly upn sequence #1.0.1, spotdesc #1.0.1
+ *  1.0.2 - spotdesc #1.0.2
+ *  1.0.3 - base upon dcmp_color_space for "out_dcmp_2cs_bin"
+ */
+table NCBI:tbl:color_space_common #1.0.3
+    = INSDC:tbl:sequence #1.0.1
+    , INSDC:SRA:tbl:spotdesc #1.0.2
+    , INSDC:SRA:tbl:stats #1.1.0
+    , NCBI:tbl:dcmp_color_space #1.0.0
+{
+    // cs_native - tells user color space is native
+    bool cs_native = < bool > echo < true > ();
+
+     // unambiguous unpacked 2cs
+    INSDC:2cs:bin out_2cs_bin
+        = out_dcmp_2cs_bin
+        | ( INSDC:2cs:bin ) unpack ( out_2cs_packed );
+
+     // unambiguous synthesized 2na
+    INSDC:2na:bin out_2na_bin
+        = < INSDC:x2na:bin, INSDC:2na:bin > map < INSDC:x2na:map:BINSET, [ 0, 1, 2, 3, 0 ] > ( out_x2na_bin );
+
+     // synthesized unpacked 4na
+    INSDC:4na:bin out_4na_bin
+        = < INSDC:x2na:bin, INSDC:4na:bin > map < INSDC:x2na:map:BINSET, [ 1, 2, 4, 8, 15 ] > ( out_x2na_bin );
+
+    // synthesized dna text
+    INSDC:dna:text out_dna_text
+        = < INSDC:x2na:bin, INSDC:dna:text > map < INSDC:x2na:map:BINSET, INSDC:x2na:map:CHARSET > ( out_x2na_bin );
+
+    // synthesized dna sequence
+    INSDC:x2na:bin out_x2na_bin
+        = NCBI:dna_from_color ( out_x2cs_bin, out_read_start, out_read_len, out_cs_key, out_color_matrix );
+
+    // synthesized packed 2na
+    INSDC:2na:packed out_2na_packed
+        = ( INSDC:2na:packed ) pack ( out_2na_bin );
+
+    // synthesized packed 4na
+    INSDC:4na:packed out_4na_packed
+        = ( INSDC:4na:packed ) pack ( out_4na_bin );
+
+    // synthesized color text
+    INSDC:color:text out_color_text
+        = < INSDC:x2cs:bin, INSDC:color:text > map <  INSDC:x2cs:map:BINSET, INSDC:x2cs:map:CHARSET > ( out_x2cs_bin );
+
+    // spot_len and fixed_spot_len
+    INSDC:coord:len color_space_spot_len
+        = ( INSDC:coord:len ) row_len ( out_2cs_packed );
+    INSDC:coord:len color_space_fixed_spot_len
+        = ( INSDC:coord:len ) fixed_row_len ( out_2cs_packed );
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  in_cs_key
+	 *  out_cs_key
+	 *  out_signal
+	 *  out_x2cs_bin
+	 *  in_color_text
+	 *  out_2cs_packed
+	 *  out_color_matrix
+	 */
+
+	/* INSDC:SRA:tbl:stats inherited productions
+	 *  in_stats_bin
+	 */
+
+	/* NCBI:tbl:dcmp_color_space inherited productions
+	 *  out_dcmp_2cs_bin
+	 *  out_dcmp_x2cs_bin
+	 *  out_dcmp_2cs_packed
+	 */
+};
+
+/* color_space_nocol
+ *  this table describes viewing rules
+ *  but omits writing rules and physical column description
+ *  in order to support older tables
+ *
+ * history:
+ *  1.0.1 - base explicitly upon color_space_common #1.0.1
+ *  1.0.2 - color_space_common #1.0.2
+ *  1.0.3 - color_space_common #1.0.3
+ */
+table NCBI:tbl:color_space_nocol #1.0.3
+    = NCBI:tbl:color_space_common #1.0.3
+    , NCBI:tbl:n_encoding #1
+{
+    // incoming is disabled
+
+    // v1 color matrix was stored in metadata
+    U8 out_color_matrix
+        = < U8 > meta:read < "COLOR_MATRIX" > ()
+        | < U8 > echo < INSDC:color:default_matrix > ();
+
+    // unpacked 2cs with ambiguities
+    INSDC:x2cs:bin out_x2cs_bin
+        = ( INSDC:x2cs:bin ) read_ndecode;
+
+    // interface with n-encoded qualities
+    U8 read_unpack = out_2cs_bin;
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_cs_key
+	 *  out_signal
+	 *  out_2cs_packed
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_ndecode
+	 */
+};
+
+/* color_space #1
+ *  this schema brings in .CSREAD and .CS_KEY columns for v1 tables
+ *
+ * history:
+ *  1.0.1 - base explicitly upon color_space_nocol #1.0.1
+ *  1.0.2 - color_space_nocol #1.0.2
+ *  1.0.3 - color_space_nocol #1.0.3
+ */
+table NCBI:tbl:color_space #1.0.3 = NCBI:tbl:color_space_nocol #1.0.3
+{
+    // stored as text
+    INSDC:dna:text out_cs_key = .CS_KEY;
+
+    // stored color sequence
+    INSDC:2cs:packed out_2cs_packed = .CSREAD;
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_signal
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_ndecode
+	 */
+};
+
+/* color_space #2
+ *  standard current color-space table
+ *
+ * history:
+ *  2.0.1 - base explicitly upon color_space_common #1.0.1
+ *  2.0.2 - base explicitly upon color_space_common #1.0.2
+ *  2.1.0 - introduce hooks for compressed color space
+ */
+table NCBI:tbl:color_space #2.1
+    = NCBI:tbl:color_space_common #1.0.3
+    , NCBI:tbl:dcmp_color_space #1.0.0
+{
+    /* input rules
+     */
+
+    // input text is not modified
+    // illegal values are not detected here
+    INSDC:color:text in_color_text = CSREAD;
+
+    // input x2cs bin
+    // illegal values will be caught here
+    INSDC:x2cs:bin in_x2cs_bin
+        = < INSDC:x2cs:bin > range_validate < 0, 4 > ( CSREAD )
+        | < INSDC:color:text, INSDC:x2cs:bin > map < INSDC:x2cs:map:CHARSET, INSDC:x2cs:map:BINSET > ( in_color_text );
+
+    // input 2cs bin
+    INSDC:2cs:bin in_2cs_bin
+        = < INSDC:2cs:bin > range_validate < 0, 3 > ( CSREAD )
+        | ( INSDC:2cs:bin ) unpack ( in_2cs_packed )
+        | < INSDC:x2cs:bin, INSDC:2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 1, 2, 3, 0 ] > ( in_x2cs_bin );
+
+    // input 2cs packed
+    INSDC:2cs:packed in_2cs_packed = CSREAD;
+
+    // input x2cs alt-csread ( ambiguity )
+    INSDC:x2cs:bin in_alt_x2cs_bin
+        = < INSDC:x2cs:bin, INSDC:x2cs:bin > map < INSDC:x2cs:map:BINSET, [ 0, 0, 0, 0, 4 ] > ( in_x2cs_bin );
+
+    // color-space keys ARE modified on input
+    INSDC:dna:text in_cs_key
+        = < INSDC:dna:text, INSDC:dna:text > map < 'acgt', 'ACGT' > ( CS_KEY );
+
+    // color matrix
+    U8 in_color_matrix = < U8 > range_validate < 0, 4 > ( COLOR_MATRIX );
+
+    // prepairing a feed into stats column
+    U8 in_stats_bin = in_2cs_bin;
+
+
+    /* physical columns
+     */
+
+    physical column INSDC:2cs:packed .CSREAD
+        = in_2cs_packed
+        | ( INSDC:2cs:packed ) pack ( in_2cs_bin );
+
+    physical column < INSDC:x2cs:bin > zip_encoding .ALTCSREAD
+        = < INSDC:x2cs:bin > trim < 0, 0 > ( in_alt_x2cs_bin );
+
+    physical column < INSDC:dna:text > zip_encoding .CS_KEY = in_cs_key;
+
+    physical column < U8 > zip_encoding .COLOR_MATRIX = in_color_matrix;
+
+
+    /* output rules
+     */
+
+    // output 2cs packed
+    INSDC:2cs:packed out_2cs_packed
+        = .CSREAD
+        | out_dcmp_2cs_packed;
+
+    // unpacked 2cs with ambiguity
+    INSDC:x2cs:bin out_x2cs_bin
+        = ( INSDC:x2cs:bin ) < U8 > bit_or < ALIGN_RIGHT > ( out_2cs_bin, .ALTCSREAD )
+        | out_dcmp_x2cs_bin
+        | ( INSDC:x2cs:bin ) out_2cs_bin;
+
+    // read directly from physical column
+    INSDC:dna:text out_cs_key = .CS_KEY;
+
+    // color matrix may be synthesized
+    U8 out_color_matrix
+        = .COLOR_MATRIX
+        | < U8 > echo < INSDC:color:default_matrix > ();
+
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_signal
+	 */
+
+	/* NCBI:tbl:dcmp_color_space inherited productions
+	 *  out_dcmp_2cs_bin
+	 *  out_dcmp_x2cs_bin
+	 *  out_dcmp_2cs_packed
+	 */
+};
+
+
+/*--------------------------------------------------------------------------
+ * protein
+ */
+table NCBI:tbl:protein #1 = INSDC:tbl:protein
+{
+    /* upper-case letters */
+    INSDC:protein:text in_protein_text = < INSDC:protein:text, INSDC:protein:text >
+        map < 'abcdefghijklmnopqrstvwxyzu','ABCDEFGHIJKLMNOPQRSTVWXYZU' > ( PROTEIN );
+
+    /* std aa */
+    INSDC:aa:bin in_aa_bin
+        = < INSDC:aa:bin > range_validate < 1, 27 > ( PROTEIN )
+        | < INSDC:protein:text, INSDC:aa:bin > map < INSDC:aa:map:CHARSET, INSDC:aa:map:BINSET > ( in_protein_text );
+
+    /* physical column */
+    physical column < INSDC:aa:bin > zip_encoding .PROTEIN = in_aa_bin;
+
+    /* output rules */
+    INSDC:aa:bin out_aa_bin = .PROTEIN;
+    INSDC:protein:text out_protein_text = < INSDC:aa:bin, INSDC:protein:text >
+        map < INSDC:aa:map:BINSET, INSDC:aa:map:CHARSET > ( out_aa_bin );
+};
+
+
+/*--------------------------------------------------------------------------
+ * phred
+ *  standard phred quality representation
+ *  limits values on input to 1..63
+ *  reserves value 0 as ambiguity symbol for reads
+ */
+
+
+/* history:
+ *  1.0.1 - base explicitly upon sequence #1.0.1
+ */
+table NCBI:tbl:phred_quality_nocol #1.0.1 = INSDC:tbl:sequence #1.0.1, NCBI:tbl:n_encoding #1
+{
+    /* [CS]READ - decoding
+     */
+    U8 read_ndecode
+        = < INSDC:quality:phred, U8 > map < 0, 4 > ( out_qual_phred, read_unpack );
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_qual_phred
+	 *  out_qual_text_phred_33
+	 *  out_qual_text_phred_64
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_unpack
+	 */
+};
+
+/* history:
+ *  1.0.1 - base explicitly upon phred_quality_nocol #1.0.1
+ */
+table NCBI:tbl:phred_quality #1.0.1 = NCBI:tbl:phred_quality_nocol #1.0.1
+{
+    // read directly as n-encoded phred is compatible with phred
+    NCBI:quality:n_encoded:phred out_qual_phred = .QUALITY;
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_qual_text_phred_33
+	 *  out_qual_text_phred_64
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_unpack
+	 */
+};
+
+/* history:
+ *  2.0.1 - added feed of in_stats_qual
+ *  2.0.2 - added input of text encodings
+ *  2.0.3 - base explicitly upon sequence #1.0.1
+ *  2.0.4 - change compression from izip to zip
+ *  2.0.5 - change from zip to delta_average_zip
+ */
+table NCBI:tbl:phred_quality #2.0.4 = INSDC:tbl:sequence #1.0.1
+{
+    // read directly quality as  phred
+    INSDC:quality:phred out_qual_phred = .QUALITY;
+
+    // input rules
+    INSDC:quality:text:phred_33 in_qual_text_phred_33 = QUALITY;
+    INSDC:quality:text:phred_64 in_qual_text_phred_64 = QUALITY;
+
+    INSDC:quality:phred in_qual_phred
+        = QUALITY
+        | ( INSDC:quality:phred ) < B8 > diff < 33 > ( in_qual_text_phred_33 )
+        | ( INSDC:quality:phred ) < B8 > diff < 64 > ( in_qual_text_phred_64 );
+
+    // physical storage
+/*** next line is  for future change in production, but we have to wait until supporting code is released to the public ***/
+// physical column < INSDC:quality:phred > delta_average_zip_encoding .QUALITY = in_qual_phred;
+/*** NB *** MUST change table version to 2.0.5 and propagate to all derived tables ***/
+    physical column < INSDC:quality:phred > zip_encoding .QUALITY = in_qual_phred;
+
+    // feed to compressed statistics
+    INSDC:quality:phred in_stats_qual = in_qual_phred;
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_qual_text_phred_33
+	 *  out_qual_text_phred_64
+	 */
+};
+
+
+
+/*--------------------------------------------------------------------------
+ * log_odds
+ *  log-odds quality score support
+ *
+ *  conversion from log-odds to phred is via formula
+ *    10 * log ( 1 + pow ( 10, x / 10 ) ) / log ( 10 ) + 0.499
+ *  for x = -4..40 : when x = -5, phred = 0
+ */
+
+// the map function requires two lookup tables:
+// the first table detects every legal value...
+const INSDC:quality:log_odds NCBI:quality:from:log_odds =
+[
+             -6,-5,-4,-3,-2,-1, 0,
+     1, 2, 3, 4, 5, 6, 7, 8, 9,10,
+    11,12,13,14,15,16,17,18,19,20,
+    21,22,23,24,25,26,27,28,29,30,
+    31,32,33,34,35,36,37,38,39,40
+];
+
+// ...the second table gives positional translations
+const INSDC:quality:phred NCBI:quality:to:phred =
+[
+              0, 1, 1, 2, 2, 3, 3,
+     4, 4, 5, 5, 6, 7, 8, 9,10,10,
+    11,12,13,14,15,16,17,18,19,20,
+    21,22,23,24,25,26,27,28,29,30,
+    31,32,33,34,35,36,37,38,39,40
+];
+
+function
+INSDC:quality:phred NCBI:log_odds_to_phred #1 ( INSDC:quality:log_odds qual_log_odds )
+{
+    // this range enforcement may not be required
+    INSDC:quality:log_odds log_odds_clip
+        = < INSDC:quality:log_odds > clip < -6, 40 > ( qual_log_odds );
+
+    // use the tables above to map from log-odds to phred
+    return < INSDC:quality:log_odds, INSDC:quality:phred >
+        map < NCBI:quality:from:log_odds, NCBI:quality:to:phred > ( log_odds_clip );
+}
+
+/* history:
+ *  1.0.1 - base explicitly upon sequence #1.0.1
+ */
+table NCBI:tbl:log_odds_quality_nocol #1.0.1 = INSDC:tbl:sequence #1.0.1, NCBI:tbl:n_encoding #1
+{
+    /* READ - decoding
+     */
+    U8 read_ndecode
+        = < INSDC:quality:log_odds, U8 > map < -6, 4 > ( out_qual_log_odds, read_unpack );
+
+    /* QUALITY
+     *  declared in INSDC:tbl:sequence as phred
+     *  introduce here as log-odds
+     */
+    extern column INSDC:quality:log_odds QUALITY = out_qual_log_odds;
+
+    // resolve for phred
+    INSDC:quality:phred out_qual_phred
+        = out_qual2_phred
+        | NCBI:log_odds_to_phred ( out_qual_log_odds );
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_qual_text_phred_33
+	 *  out_qual_text_phred_64
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_unpack
+	 */
+
+	/* NCBI:tbl:log_odds_quality_nocol productions
+	 *  out_qual2_phred
+	 *  out_qual_log_odds
+	 */
+};
+
+/* history:
+ *  1.0.1 - base explicitly upon log_odds_quality_nocol #1.0.1
+ */
+table NCBI:tbl:log_odds_quality #1.0.1 = NCBI:tbl:log_odds_quality_nocol #1.0.1
+{
+    // read directly as n-encoded log_odds is compatible with log_odds
+    NCBI:quality:n_encoded:log_odds out_qual_log_odds = .QUALITY;
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_qual_text_phred_33
+	 *  out_qual_text_phred_64
+	 */
+
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_unpack
+	 */
+
+	/* NCBI:tbl:log_odds_quality_nocol inherited productions
+	 *  out_qual2_phred
+	 */
+};
+
+/* history:
+ *  2.0.1 - base explicitly upon sequence #1.0.1
+ *  2.1.0 - added production of in_qual_phred
+ */
+table NCBI:tbl:log_odds_quality_nocol #2.1.0 = INSDC:tbl:sequence #1.0.1
+{
+    /* QUALITY
+     *  declared in INSDC:tbl:sequence as phred
+     *  introduce here as log-odds
+     */
+    extern column INSDC:quality:log_odds QUALITY
+        = out_qual_log_odds;
+
+    // resolve for phred
+    INSDC:quality:phred in_qual_phred
+        = NCBI:log_odds_to_phred ( in_qual_log_odds );
+
+    INSDC:quality:phred out_qual_phred
+        = NCBI:log_odds_to_phred ( out_qual_log_odds );
+
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_qual_text_phred_33
+	 *  out_qual_text_phred_64
+	 */
+
+	/* NCBI:tbl:log_odds_quality_nocol productions
+	 *  out_qual_log_odds
+	 */
+};
+
+/* history:
+ *  2.0.1 - added feed of in_stats_qual
+ *  2.0.2 - added input of text encodings
+ *  2.0.3 - base explicitly upon log_odds_quality_nocol #2.0.1
+ *  2.0.4 - changed compression from izip to zip
+ *  2.1.0 - base explicitly upon log_odds_quality_nocol #2.1.0
+ */
+table NCBI:tbl:log_odds_quality #2.1.0 = NCBI:tbl:log_odds_quality_nocol #2.1.0
+{
+    INSDC:quality:log_odds out_qual_log_odds= .QUALITY;
+
+    extern column INSDC:quality:text:log_odds_64 QUALITY
+        = out_qual_text_log_odds_64
+        | ( INSDC:quality:text:log_odds_64 ) < B8 > sum < 64 > ( out_qual_log_odds );
+
+    // input rules
+    INSDC:quality:text:log_odds_64 in_qual_text_log_odds_64 = QUALITY;
+
+    INSDC:quality:log_odds in_qual_log_odds
+        = QUALITY
+        | ( INSDC:quality:log_odds ) < B8 > diff < 64 > ( in_qual_text_log_odds_64 );
+
+    physical column < INSDC:quality:log_odds > zip_encoding .QUALITY
+        = in_qual_log_odds;
+
+    // feed to compressed statistics
+    INSDC:quality:log_odds in_stats_qual = in_qual_log_odds;
+
+
+	/* INSDC:tbl:sequence inherited productions
+	 *  out_qual_text_phred_33
+	 *  out_qual_text_phred_64
+	 */
+
+	/* NCBI:tbl:log_odds_quality productions
+	 *  out_qual_text_log_odds_64
+	 */
+};