seqsero2: libs/sratoolkit.2.8.0-centos_linux64/schema/csra2/read.vschema comparison

comparison libs/sratoolkit.2.8.0-centos_linux64/schema/csra2/read.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty

author	charles_s_test
date	Mon, 27 Nov 2017 11:21:07 -0500
parents
children

comparison

equal deleted inserted replaced

-:0d65b71ff8df
+:38ad1130d077
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+/*==========================================================================
+* General read table which will be inherited by others
+*/
+version 1;
+include 'vdb/vdb.vschema';
+include 'insdc/insdc.vschema';
+include 'csra2/stats.vschema';
+/*--------------------------------------------------------------------------
+* tables
+*/
+table NCBI:csra2:tbl:read #1.0 = NCBI:csra2:tbl:read_stats #1
+{
+/* CHUNK_SZ
+*  describes the maximum number of bases in any row
+*
+*  if present, allows a single sequence to be broken into multiple rows
+*  where this value gives the limit on the number of bases in any row.
+*
+*  the sequence will be split across some number of rows, depending upon
+*  the value of CHUNK_SZ. if length ( seq ) > CHUNK_SZ, then there will
+*  be multiple rows, where all but the last will have a length of CHUNK_SZ.
+*  the last ( or only ) row will have a length of length(seq)%CHUNK_SIZE.
+*/
+extern column INSDC:coord:len CHUNK_SZ;
+/* READ
+*  base calls
+*/
+// textual representation
+extern default column INSDC:dna:text READ
+{
+read = out_dna_text;
+validate = < INSDC:dna:text > compare ( in_dna_text, out_dna_text );
+}
+// 4na representation - unpacked
+extern column INSDC:4na:bin READ
+= out_4na_bin
+;
+/* QUALITY
+*  phred-score quality values
+*/
+extern default column INSDC:quality:phred QUALITY
+= out_qual_phred
+;
+extern column INSDC:quality:text:phred_33 QUALITY
+= ( INSDC:quality:text:phred_33 ) < B8 > sum < 33 > ( out_qual_phred )
+;
+extern column INSDC:quality:text:phred_64 QUALITY
+= ( INSDC:quality:text:phred_64 ) < B8 > sum < 64 > ( out_qual_phred )
+;
+/* ---------------------------- optional columns ---------------------------- */
+/* RD_ID
+* RD_GROUP
+*  reports group and id of current row
+*/
+extern column I64 RD_ID;
+extern column ascii RD_GROUP;
+/* RD_FILTER
+*  records filter value if used
+*/
+extern column INSDC:SRA:read_filter RD_FILTER;
+/* ---------------------------- input rules ---------------------------- */
+// input text
+INSDC:dna:text in_dna_text
+= < INSDC:dna:text, INSDC:dna:text > map < '.acmgrsvtwyhkdbn','NACMGRSVTWYHKDBN' > ( READ )
+;
+// input 4na bin
+INSDC:4na:bin in_4na_bin
+= < INSDC:4na:bin > range_validate < 0, 15 > ( READ )
+| < INSDC:dna:text, INSDC:4na:bin > map < INSDC:4na:map:CHARSET, INSDC:4na:map:BINSET > ( in_dna_text )
+;
+// input 2na bin
+INSDC:2na:bin in_2na_bin
+= INSDC:SEQ:rand_4na_2na ( in_4na_bin )
+;
+// input 4na alt-read ( ambiguities )
+INSDC:4na:bin in_alt_4na_bin
+= < INSDC:4na:bin, INSDC:4na:bin > map < INSDC:4na:map:BINSET, [ 15,0,0,3,0,5,6,7,0,9,10,11,12,13,14,15 ] > ( in_4na_bin )
+;
+// feed the statistics
+INSDC:4na:bin in_stats_seq = in_4na_bin;
+// quality
+INSDC:quality:text:phred_33 in_qual_text_phred_33 = QUALITY;
+INSDC:quality:text:phred_64 in_qual_text_phred_64 = QUALITY;
+INSDC:quality:phred in_qual_phred
+= QUALITY
+| ( INSDC:quality:phred ) < B8 > diff < 33 > ( in_qual_text_phred_33 )
+| ( INSDC:quality:phred ) < B8 > diff < 64 > ( in_qual_text_phred_64 )
+;
+// feed the statistics
+INSDC:quality:phred in_stats_qual_phred = in_qual_phred;
+ascii in_stats_read_group
+= in_stats_spot_group
+| RD_GROUP
+;
+/* ---------------------------- physical columns ---------------------------- */
+physical column INSDC:2na:packed .READ
+= ( INSDC:2na:packed ) pack ( in_2na_bin )
+;
+physical column < INSDC:4na:bin > zip_encoding .ALTREAD
+= < INSDC:4na:bin > trim < 0, 0 > ( in_alt_4na_bin )
+;
+physical column < INSDC:quality:phred > delta_average_zip_encoding .QUALITY
+= in_qual_phred
+;
+/* ---------------------------- output rules ---------------------------- */
+// output 2na packed
+INSDC:2na:packed out_2na_packed
+= .READ
+;
+// output 2na bin
+INSDC:2na:bin out_2na_bin
+= ( INSDC:2na:bin ) unpack ( out_2na_packed )
+;
+// output 2na->4na bin
+INSDC:4na:bin out_2na_4na_bin
+= < INSDC:2na:bin, INSDC:4na:bin > map < INSDC:2na:map:BINSET, [ 1, 2, 4, 8 ] > ( out_2na_bin )
+;
+// output 4na bin
+INSDC:4na:bin out_4na_bin
+= < INSDC:4na:bin > bit_or < ALIGN_RIGHT > ( out_2na_4na_bin, .ALTREAD )
+| out_2na_4na_bin
+;
+// output text
+INSDC:dna:text out_dna_text
+= < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_4na_bin )
+;
+// output quality
+INSDC:quality:phred out_qual_phred
+= .QUALITY
+| < INSDC:quality:phred > echo < 30 > ( out_4na_bin )
+;
+}
+/*--------------------------------------------------------------------------
+* views
+*/
+table NCBI:csra2:view:read #1.0 =
+NCBI:csra2:tbl:read #1.0
+{
+/* CHUNK_SIZE
+*  describes the maximum number of bases in any row
+*
+*  if present, allows a single sequence to be broken into multiple rows
+*  where this value gives the limit on the number of bases in any row.
+*
+*  the sequence will be split across some number of rows, depending upon
+*  the value of CHUNK_SIZE. if length ( seq ) > CHUNK_SIZE, then there will
+*  be multiple rows, where all but the last will have a length of CHUNK_SIZE.
+*  the last ( or only ) row will have a length of length(seq)%CHUNK_SIZE.
+*/
+readonly column INSDC:coord:len CHUNK_SIZE
+= .CHUNK_SZ
+| < INSDC:coord:len > echo < 0xFFFFFFFF > ()
+;
+/* READ
+*  generate remaining 4 types
+*/
+readonly column INSDC:4na:packed READ
+= ( INSDC:4na:packed ) pack ( out_4na_bin )
+;
+readonly column INSDC:x2na:bin READ
+= < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_4na_bin )
+;
+readonly column INSDC:2na:bin READ
+= out_2na_bin
+;
+readonly column INSDC:2na:packed READ
+= out_2na_packed
+;
+/* READ_ID
+* READ_GROUP
+*  reports group and id of current row
+*/
+readonly column I64 READ_ID
+= .RD_ID
+| row_id ()
+;
+readonly column ascii READ_GROUP
+= .RD_GROUP
+| < ascii > echo < '' > ()
+;
+/* READ_FILTER
+*  records filter value if used
+*/
+readonly column INSDC:SRA:read_filter READ_FILTER
+= .RD_FILTER
+| < INSDC:SRA:read_filter > echo < SRA_READ_FILTER_PASS > ()
+;
+}

Mercurial > repos > charles_s_test > seqsero2

comparison libs/sratoolkit.2.8.0-centos_linux64/schema/csra2/read.vschema @ 3:38ad1130d077 draft