seqsero2: libs/sratoolkit.2.8.0-centos_linux64/schema/sra/pacbio.vschema comparison

comparison libs/sratoolkit.2.8.0-centos_linux64/schema/sra/pacbio.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty

author	charles_s_test
date	Mon, 27 Nov 2017 11:21:07 -0500
parents
children

comparison

equal deleted inserted replaced

-:0d65b71ff8df
+:38ad1130d077
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+/*==========================================================================
+* NCBI PacBio Fastq Sequence Read Archive schema
+*/
+version 1;
+include 'insdc/sra.vschema';
+include 'ncbi/sra.vschema';
+/*--------------------------------------------------------------------------
+* NCBI:SRA:PacBio
+*  Pacific Biotech SRA Platform
+*
+* history:
+*  1.0.2 - updated ancestry
+*  1.0.3 - updated ancestry
+*/
+table NCBI:SRA:PacBio:common #1.0.3 = NCBI:SRA:tbl:sra #2.1.3
+{
+}
+/* history:
+*  1.0.2 - updated ancestry
+*  1.0.3 - updated ancestry
+*/
+table NCBI:SRA:PacBio:smrt:fastq #1.0.3
+= NCBI:SRA:PacBio:common #1.0.3
+, NCBI:tbl:base_space #2.0.3
+, NCBI:tbl:phred_quality #2.0.3
+{
+/* PLATFORM
+*  platform name is always "PACBIO_SMRT"
+*/
+ascii platform_name
+= < ascii > echo < "PACBIO_SMRT" > ();
+/* TRIMMED SEQUENCE
+*  need to find the 0-based trim_start and trim_len
+*/
+INSDC:coord:zero bio_start
+= NCBI:SRA:bio_start ( out_read_start, out_read_type );
+INSDC:coord:zero trim_start = bio_start;
+U32 trim_left = ( U32 ) trim_start;
+INSDC:coord:len trim_len = ( INSDC:coord:len )
+< U32 > diff ( spot_len, trim_left );
+}
+/*--------------------------------------------------------------------------
+* NCBI:SRA:PacBio:smrt:db
+*  Pacific Biotech SRA Platform
+*/
+table NCBI:SRA:PacBio:smrt:indelsubst #1
+{
+// probability that the current base is an insertion
+column < U8 > zip_encoding INSERTION_QV;
+// probability of a deletion error following current base
+// and identity of deleted base, if it exists
+column < U8 > zip_encoding DELETION_QV;
+column < INSDC:dna:text > zip_encoding DELETION_TAG;
+// probability of a substitution error
+// and most likely alternative base call
+column < U8 > zip_encoding SUBSTITUTION_QV;
+column < INSDC:dna:text > zip_encoding SUBSTITUTION_TAG;
+};
+typedef U8 PacBio:hole:status;
+const PacBio:hole:status PacBio:hole:SEQUENCING  = 0;
+const PacBio:hole:status PacBio:hole:ANTIHOLE    = 1;
+const PacBio:hole:status PacBio:hole:FIDUCIAL    = 2;
+const PacBio:hole:status PacBio:hole:SUSPECT     = 3;
+const PacBio:hole:status PacBio:hole:ANTIMIRROR  = 4;
+const PacBio:hole:status PacBio:hole:FDZMW       = 5;
+const PacBio:hole:status PacBio:hole:FBZMW       = 6;
+const PacBio:hole:status PacBio:hole:ANTIBEAMLET = 7;
+const PacBio:hole:status PacBio:hole:OUTSIDEFOV  = 8;
+/* history:
+*  1.0.1 - updated ancestry
+*  1.0.2 - updated ancestry
+*/
+table NCBI:SRA:PacBio:smrt:basecalls #1.0.2
+= INSDC:SRA:tbl:spotcoord #1
+, NCBI:tbl:base_space #2.0.3
+, NCBI:tbl:phred_quality #2.0.3
+, NCBI:SRA:PacBio:smrt:indelsubst #1
+{
+/* PLATFORM
+*  platform name is always "PACBIO_SMRT"
+*/
+ascii platform_name
+= < ascii > echo < "PACBIO_SMRT" > ();
+// basecalls will be routed to READ column
+readonly column INSDC:dna:text BASECALL
+= out_dna_text;
+// quality value for each base
+readonly column INSDC:quality:phred QUALITY_VALUE
+= out_qual_phred;
+// zero-based hole number
+column < U32 > izip_encoding HOLE_NUMBER;
+// hole status
+column < PacBio:hole:status > zip_encoding HOLE_STATUS;
+// optional column pair to describe hole status
+// when/if it does not line up with our constants above
+column < ascii > zip_encoding HOLE_STATUS_VALUE;
+column < INSDC:coord:len > izip_encoding HOLE_STATUS_VALUE_LEN;
+// hole ( X,Y ) pair will be split and sent to X and Y columns
+column I16 [ 2 ] HOLE_XY
+= < I16 > paste ( x_clip_I16, y_clip_I16 );
+I16 x_clip_I16 = cast ( out_x_coord );
+I16 y_clip_I16 = cast ( out_y_coord );
+I16 in_x16_coord = < I16 > cut < 0 > ( HOLE_XY );
+I16 in_y16_coord = < I16 > cut < 1 > ( HOLE_XY );
+INSDC:coord:val in_x_coord = cast ( in_x16_coord );
+INSDC:coord:val in_y_coord = cast ( in_y16_coord );
+// the number of bases in ZMW
+readonly column INSDC:coord:len NUM_EVENT
+= base_space_spot_len;
+};
+/* history:
+*  1.0.1 - updated ancestry
+*  1.0.2 - updated ancestry
+*/
+table NCBI:SRA:PacBio:smrt:sequence #1.0.2
+= NCBI:SRA:PacBio:smrt:basecalls #1.0.2
+, NCBI:SRA:tbl:sra_nopos #2.1.3
+{
+// pulse information
+column < U16 > izip_encoding PRE_BASE_FRAMES;
+column < U16 > izip_encoding WIDTH_IN_FRAMES;
+// spot to pulse map
+default column INSDC:position:zero PULSE_INDEX
+= .PULSE_INDEX;
+readonly column INSDC:position:one PULSE_INDEX
+= out_position;
+INSDC:position:one out_position
+= ( INSDC:position:one ) < INSDC:position:zero > sum < 1 > ( .PULSE_INDEX );
+column NCBI:SRA:pos16 PULSE_INDEX
+= cast ( .PULSE_INDEX );
+NCBI:SRA:pos16 in_pulse_index16
+= PULSE_INDEX;
+INSDC:position:zero in_pulse_index32
+= PULSE_INDEX
+| cast ( in_pulse_index16 );
+physical column < INSDC:position:zero > izip_encoding .PULSE_INDEX
+= in_pulse_index32;
+/* clip quality */
+extern column < INSDC:coord:zero > izip_encoding CLIP_QUALITY_LEFT;
+extern column < INSDC:coord:one > izip_encoding CLIP_QUALITY_RIGHT;
+/* TRIMMED SEQUENCE
+*  need to find the 0-based trim_start and trim_len
+*/
+INSDC:coord:zero trim_start
+= .CLIP_QUALITY_LEFT
+| NCBI:SRA:bio_start ( out_read_start, out_read_type );
+U32 trim_right
+= ( U32 ) .CLIP_QUALITY_RIGHT
+| spot_len;
+U32 trim_left = ( U32 ) trim_start;
+INSDC:coord:len trim_len = ( INSDC:coord:len )
+< U32 > diff ( trim_right, trim_left );
+};
+/* history:
+*  1.0.1 - updated ancestry
+*  1.0.2 - updated ancestry
+*/
+table NCBI:SRA:PacBio:smrt:cons #1.0.2
+= NCBI:SRA:PacBio:smrt:basecalls #1.0.2
+, NCBI:SRA:tbl:sra #2.1.3
+{
+// documented in both hdf5 and xsd as signed...
+column < I32 > izip_encoding NUM_PASSES;
+/* TRIMMED SEQUENCE
+*  need to find the 0-based trim_start and trim_len
+*/
+INSDC:coord:zero trim_start
+= NCBI:SRA:bio_start ( out_read_start, out_read_type );
+U32 trim_left = ( U32 ) trim_start;
+INSDC:coord:len trim_len = ( INSDC:coord:len )
+< U32 > diff ( spot_len, trim_left );
+};
+/* these encoding rules attempt to compress the channels individually,
+although they may compress fine interleaved as they are... */
+physical
+F32 [ 4 ] NCBI:SRA:PacBio:smrt:F32_4ch_encoding #1.0 < U32 mantissa >
+{
+decode
+{
+fzip_fmt cmp0 = split < 0 > ( @ );
+fzip_fmt cmp1 = split < 1 > ( @ );
+fzip_fmt cmp2 = split < 2 > ( @ );
+fzip_fmt cmp3 = split < 3 > ( @ );
+F32 ch0 = funzip ( cmp0 );
+F32 ch1 = funzip ( cmp1 );
+F32 ch2 = funzip ( cmp2 );
+F32 ch3 = funzip ( cmp3 );
+return < F32 > paste ( ch0, ch1, ch2, ch3 );
+}
+encode
+{
+F32 ch0 = < F32 > cut < 0 > ( @ );
+F32 ch1 = < F32 > cut < 1 > ( @ );
+F32 ch2 = < F32 > cut < 2 > ( @ );
+F32 ch3 = < F32 > cut < 3 > ( @ );
+fzip_fmt cmp0 = fzip < mantissa > ( ch0 );
+fzip_fmt cmp1 = fzip < mantissa > ( ch1 );
+fzip_fmt cmp2 = fzip < mantissa > ( ch2 );
+fzip_fmt cmp3 = fzip < mantissa > ( ch3 );
+return merge ( cmp0, cmp1, cmp2, cmp3 );
+}
+}
+table NCBI:SRA:PacBio:smrt:zmw_metrics #1
+{
+column NCBI:SRA:PacBio:smrt:F32_4ch_encoding < 24 > BASE_FRACTION;
+column < F32 > fzip_encoding < 24 > BASE_IPD;
+column < F32 > fzip_encoding < 24 > BASE_RATE;
+column < F32 > fzip_encoding < 24 > BASE_WIDTH;
+column NCBI:SRA:PacBio:smrt:F32_4ch_encoding < 24 > CHAN_BASE_QV;
+column NCBI:SRA:PacBio:smrt:F32_4ch_encoding < 24 > CHAN_DEL_QV;
+column NCBI:SRA:PacBio:smrt:F32_4ch_encoding < 24 > CHAN_INS_QV;
+column NCBI:SRA:PacBio:smrt:F32_4ch_encoding < 24 > CHAN_SUB_QV;
+column < F32 > fzip_encoding < 24 > LOCAL_BASE_RATE;
+column < F32 > fzip_encoding < 24 > DARK_BASE_RATE;
+column < F32 > fzip_encoding < 24 > HQ_RGN_START_TIME;
+column < F32 > fzip_encoding < 24 > HQ_RGN_END_TIME;
+column NCBI:SRA:PacBio:smrt:F32_4ch_encoding < 24 > HQ_RGN_SNR;
+column < I8 > zip_encoding PRODUCTIVITY;
+column < F32 > fzip_encoding < 24 > READ_SCORE;
+column < F32 > fzip_encoding < 24 > READ_BASE_QV;
+column < F32 > fzip_encoding < 24 > READ_DEL_QV;
+column < F32 > fzip_encoding < 24 > READ_INS_QV;
+column < F32 > fzip_encoding < 24 > READ_SUB_QV;
+};
+table NCBI:SRA:PacBio:smrt:passes #1
+{
+column < U8 > zip_encoding ADAPTER_HIT_BEFORE;
+column < U8 > zip_encoding ADAPTER_HIT_AFTER;
+column < U8 > zip_encoding PASS_DIRECTION;
+column < I32 > izip_encoding PASS_NUM_BASES;
+column < I32 > izip_encoding PASS_START_BASE;
+};
+database NCBI:SRA:PacBio:smrt:db #1.0.1
+{
+table NCBI:SRA:PacBio:smrt:sequence #1.0 SEQUENCE;
+table NCBI:SRA:PacBio:smrt:cons #1.0 CONSENSUS;
+table NCBI:SRA:PacBio:smrt:passes #1.0 PASSES;
+table NCBI:SRA:PacBio:smrt:zmw_metrics #1.0 ZMW_METRICS;
+};

Mercurial > repos > charles_s_test > seqsero2

comparison libs/sratoolkit.2.8.0-centos_linux64/schema/sra/pacbio.vschema @ 3:38ad1130d077 draft