Mercurial > repos > charles_s_test > seqsero2
diff libs/sratoolkit.2.8.0-centos_linux64/schema/csra2/reference.vschema @ 3:38ad1130d077 draft
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Mon, 27 Nov 2017 11:21:07 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/csra2/reference.vschema Mon Nov 27 11:21:07 2017 -0500 @@ -0,0 +1,245 @@ +/*=========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +*/ + +/*========================================================================== + * VDB Alignment types, functions and tables + */ +version 1; + +include 'vdb/vdb.vschema'; +include 'csra2/stats.vschema'; + + +/*-------------------------------------------------------------------------- + * tables + */ +table NCBI:csra2:tbl:reference #1.0 + = NCBI:csra2:tbl:read_stats #1 +{ + /* CHUNK_SIZE + * describes the maximum number of bases in any cell + */ + extern column INSDC:coord:len CHUNK_SIZE; + + /* CIRCULAR + * true if the reference is circular + */ + extern column bool CIRCULAR; + + /* CANONICAL_NAME + * this should be an accessioned proper name + */ + extern column utf8 CANONICAL_NAME; + + /* COMMON_NAME + * this name may be ambiguous or missing entirely + */ + extern column utf8 COMMON_NAME; + + /* LOCAL_SEQUENCE + * supports name overloading by type + */ + extern default column INSDC:dna:text LOCAL_SEQUENCE + { + read = out_local_dna_text; + validate = < INSDC:dna:text > compare ( in_local_dna_text, out_local_dna_text ); + } + extern column INSDC:4na:bin LOCAL_SEQUENCE = out_local_4na_bin; + + /* PRIMARY_ALIGNMENT_IDS + * SECONDARY_ALIGNMENT_IDS + * an index to rows in the PRIMARY_ALIGNMENT and + * SECONDARY_ALIGNMENT tables having alignments + * STARTING within this chunk + * + * the indicies MUST be sorted in clustered order, + * meaning that they are in ascending numeric order + */ + extern column < I64 > izip_encoding PRIMARY_ALIGNMENT_IDS; + extern column < I64 > izip_encoding SECONDARY_ALIGNMENT_IDS; + + /* OVERLAP_REF_POS + * min ( REF_POS ) for all alignments intersecting this chunk + * but starting in a previous chunk, where the stored position + * is in reference coordinates. + * + * a value of 0 indicates that no alignments starting to + * the left of this chunk also intersect with it. + */ + extern column < INSDC:coord:zero > izip_encoding OVERLAP_REF_POS; + + /* OVERLAP_REF_LEN + * max ( REF_POS + REF_LEN - CHUNK_START ) % CHUNK_SIZE + * for all alignments intersecting this chunk but starting + * in a previous chunk. + * + * indicates the amount of this chunk that is needed by + * alignments not starting within chunk. so if a slice on + * this reference were to start at 100 bases into this chunk, + * for example, and the OVERLAP_REF_LEN were 100 or less, then + * there are no alignments from prior chunks that need to be + * considered. + */ + extern column < INSDC:coord:len > izip_encoding OVERLAP_REF_LEN; + + /* COVERAGE + * graphing statistics for the chunk + */ + + // clipped at 255 + extern column < U8 > izip_encoding CGRAPH_HIGH; + extern column < U8 > izip_encoding CGRAPH_LOW; + + // count of the number of mismatches in the chunk + extern column < U32 > izip_encoding CGRAPH_MISMATCHES; + + // count of the number of inserts and deletes in the chunk + extern column < U32 > izip_encoding CGRAPH_INDELS; + + + /* writing rules */ + INSDC:dna:text in_local_dna_text + = < INSDC:dna:text, INSDC:dna:text > map < '.acmgrsvtwyhkdbn','NACMGRSVTWYHKDBN' > ( LOCAL_SEQUENCE ); + ; + INSDC:4na:bin in_local_4na_bin + = < INSDC:4na:bin > range_validate < 0, 15 > ( LOCAL_SEQUENCE ) + | < INSDC:dna:text, INSDC:4na:bin > map < INSDC:4na:map:CHARSET, INSDC:4na:map:BINSET > ( in_local_dna_text ) + ; + INSDC:2na:bin in_local_2na_bin + = INSDC:SEQ:rand_4na_2na ( in_local_4na_bin ) + ; + INSDC:4na:bin in_ambig_4na_bin + = < INSDC:4na:bin, INSDC:4na:bin > map < INSDC:4na:map:BINSET, [ 15,0,0,3,0,5,6,7,0,9,10,11,12,13,14,15 ] > ( in_local_4na_bin ); + ; + + INSDC:4na:bin in_stats_seq = in_local_4na_bin; + + /* physical columns for sequence */ + physical column INSDC:2na:packed .LOCAL_SEQUENCE + = ( INSDC:2na:packed ) pack ( in_local_2na_bin ) + ; + physical column < INSDC:4na:bin > zip_encoding .LOCAL_AMBIGUITY + = < INSDC:4na:bin > trim < 0, 0 > ( in_ambig_4na_bin ) + ; + + /* reading rules */ + INSDC:2na:packed out_local_2na_packed + = .LOCAL_SEQUENCE + ; + INSDC:2na:bin out_local_2na_bin + = ( INSDC:2na:bin ) unpack ( out_local_2na_packed ) + ; + INSDC:4na:bin out_local_2na_4na_bin + = < INSDC:2na:bin, INSDC:4na:bin > map < INSDC:2na:map:BINSET, [ 1, 2, 4, 8 ] > ( out_local_2na_bin ); + ; + INSDC:4na:bin out_local_4na_bin + = < INSDC:4na:bin > bit_or < ALIGN_RIGHT > ( out_local_2na_4na_bin, .LOCAL_AMBIGUITY ) + ; + INSDC:dna:text out_local_dna_text + = < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_local_4na_bin ) + ; + + + INSDC:coord:len in_local_read_len + = ( INSDC:coord:len ) row_len ( in_local_2na_bin ) + ; + INSDC:SRA:xread_type in_local_read_type + = < INSDC:SRA:xread_type > echo < SRA_READ_TYPE_BIOLOGICAL > () + ; +} + + +/*-------------------------------------------------------------------------- + * "views" + */ +table NCBI:csra2:view:reference #1.0 + = NCBI:csra2:tbl:reference #1.0 +{ + /* EXTERNAL + * may need to be a function + * it can test the CANONICAL_NAME as in cSRA.v1, + * but if internal it can also check row_length of bases + */ + readonly column bool EXTERNAL + = < bool > exists < false > ( .LOCAL_SEQUENCE ) + | < bool > echo < true > () + ; + + /* SEQUENCE + * available as text, 4na, x2na, 2na + */ + default readonly column INSDC:dna:text SEQUENCE + = out_dna_text + ; + readonly column INSDC:4na:bin SEQUENCE + = out_4na_bin + ; + readonly column INSDC:4na:packed SEQUENCE + = ( INSDC:4na:packed ) pack ( out_4na_bin ) + ; + readonly column INSDC:x2na:bin SEQUENCE + = < INSDC:4na:bin, INSDC:x2na:bin > map < INSDC:4na:map:BINSET, [ 4,0,1,4,2,4,4,4,3,4,4,4,4,4,4,4 ] > ( out_4na_bin ) + ; + readonly column INSDC:2na:bin SEQUENCE + = out_2na_bin + ; + readonly column INSDC:2na:packed SEQUENCE + = pack ( out_2na_bin ) + ; + + /* QUALITY + * This is fake column for compatibility + */ + readonly column INSDC:quality:phred QUALITY + = out_qual_phred + ; + + /* column aliases */ + readonly column INSDC:coord:len MAX_SEQ_LEN = .CHUNK_SIZE; + readonly column ascii SEQ_ID = cast ( .CANONICAL_NAME ); + + /* sequence productions */ + INSDC:4na:bin out_4na_bin + = out_local_4na_bin + // TODO: | sub-select from external table + ; + + INSDC:dna:text out_dna_text + = < INSDC:4na:bin, INSDC:dna:text > map < INSDC:4na:map:BINSET, INSDC:4na:map:CHARSET > ( out_4na_bin ) + ; + + INSDC:2na:bin out_2na_bin + = INSDC:SEQ:rand_4na_2na ( out_4na_bin ) + ; + + /* quality productions */ + INSDC:quality:phred out_qual_phred + = < INSDC:quality:phred > echo < 30 > ( SEQUENCE ) + ; + + INSDC:quality:phred in_stats_qual_phred = out_qual_phred; +} +