seqsero2: libs/sratoolkit.2.8.0-centos_linux64/schema/sra/454.vschema comparison

comparison libs/sratoolkit.2.8.0-centos_linux64/schema/sra/454.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty

author	charles_s_test
date	Mon, 27 Nov 2017 11:21:07 -0500
parents
children

comparison

equal deleted inserted replaced

-:0d65b71ff8df
+:38ad1130d077
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+/*==========================================================================
+* NCBI 454 Sequence Read Archive schema
+*/
+version 1;
+include 'ncbi/sra.vschema';
+include 'ncbi/spotname.vschema';
+include 'ncbi/clip.vschema';
+/*--------------------------------------------------------------------------
+* functions
+*/
+/* dynamic_read_desc
+*  uses inputs to determine read type and segmentation
+*
+*  "edit_distance" [ CONST, OPTIONAL ] - a tolerance figure for
+*  linker matching, where 0 requires exact match, 5 is default.
+*
+*  "spot" [ DATA ] - bases for entire spot
+*
+*  "key" [ DATA, CONTROL ] - bases for key sequence. for version 1,
+*  the first base following key is taken as biological start
+*
+*  "linker" [ DATA, CONTROL, OPTIONAL ] - if present, is used to separate
+*  all bases following "key" into mate pair biological reads
+*
+*  returns a trio for each identified read, with read type, start and length
+*/
+typeset NCBI:SRA:_454_:drdparam_set { ascii, U8, INSDC:2na:packed };
+extern function
+U32 [ 3 ] NCBI:SRA:_454_:dynamic_read_desc #1 < * U32 edit_distance >
+( NCBI:SRA:_454_:drdparam_set spot, NCBI:SRA:_454_:drdparam_set key
+* NCBI:SRA:_454_:drdparam_set linker );
+const U32 NCBI:SRA:_454_:dyn_read_type  = 0;
+const U32 NCBI:SRA:_454_:dyn_read_start = 1;
+const U32 NCBI:SRA:_454_:dyn_read_len   = 2;
+/* tokenize_spot_name
+*  scans name on input
+*  tokenizes into parts
+*/
+extern function NCBI:SRA:spot_name_token
+NCBI:SRA:_454_:tokenize_spot_name #1 ( ascii name );
+/*--------------------------------------------------------------------------
+* NCBI:SRA:_454_:common
+*  Roche 454 SRA Platform
+*
+* history:
+*  1.0.1 - explictly base upon sra #1.0.1
+*  1.0.2 - bring in clip processing from external table
+*  1.0.3 - base explicitly upon sra #1.0.2, clip #1.0.1
+*  1.0.4 - base explicitly upon sra #1.0.3, clip #1.0.2
+*/
+table NCBI:SRA:_454_:common #1.0.4 = INSDC:SRA:tbl:sra #1.0.3, NCBI:SRA:tbl:clip #1.0.2
+{
+/* PLATFORM
+*  platform name is always 454
+*/
+ascii platform_name
+= < ascii > echo < "454" > ();
+/* 454 TECHNICAL SEQUENCES
+*/
+column INSDC:dna:text FLOW_CHARS = out_flow_chars;
+INSDC:dna:text in_flow_chars
+= < INSDC:dna:text, INSDC:dna:text > map < 'acgtn.', 'ACGTNN' > ( FLOW_CHARS );
+column INSDC:dna:text KEY_SEQUENCE = out_key_sequence;
+INSDC:dna:text in_key_sequence
+= < INSDC:dna:text, INSDC:dna:text > map < 'acgtn.', 'ACGTNN' > ( KEY_SEQUENCE );
+column INSDC:dna:text LINKER_SEQUENCE = out_linker_sequence;
+INSDC:dna:text in_linker_sequence
+= < INSDC:dna:text, INSDC:dna:text > map < 'acgtn.', 'ACGTNN' > ( LINKER_SEQUENCE );
+// binary technical sequences
+INSDC:x2na:bin out_flow_bin
+= < INSDC:dna:text, INSDC:x2na:bin > map < INSDC:x2na:map:CHARSET, INSDC:x2na:map:BINSET > ( out_flow_chars );
+INSDC:x2na:bin out_key_bin
+= < INSDC:dna:text, INSDC:x2na:bin > map < INSDC:x2na:map:CHARSET, INSDC:x2na:map:BINSET > ( out_key_sequence );
+INSDC:x2na:bin out_linker_bin
+= < INSDC:dna:text, INSDC:x2na:bin > map < INSDC:x2na:map:CHARSET, INSDC:x2na:map:BINSET > ( out_linker_sequence );
+/* SIGNAL
+*  single channel integer
+*/
+column NCBI:isamp1 SIGNAL = out_signal;
+NCBI:isamp1 out_signal = .SIGNAL;
+	/* INSDC:tbl:sequence inherited productions
+	 *  cs_native
+	 *  out_cs_key
+	 *  in_dna_text
+	 *  out_2cs_bin
+	 *  out_2na_bin
+	 *  out_4na_bin
+	 *  out_dna_text
+	 *  out_x2cs_bin
+	 *  out_x2na_bin
+	 *  out_2cs_packed
+	 *  out_2na_packed
+	 *  out_4na_packed
+	 *  out_color_text
+	 *  out_qual_phred
+	 *  out_color_matrix
+	 */
+	/* INSDC:SRA:tbl:spotname inherited productions
+	 *  out_x_coord
+	 *  out_y_coord
+	 *  out_name_fmt
+	 *  out_spot_name
+	 *  spot_ids_found
+	 */
+	/* INSDC:SRA:tbl:spotdesc inherited productions
+	 *  trim_len
+	 *  out_label
+	 *  out_nreads
+	 *  trim_start
+	 *  out_read_len
+	 *  out_label_len
+	 *  out_rd_filter
+	 *  out_read_type
+	 *  out_read_start
+	 *  out_label_start
+	 *  static_fixed_spot_len
+	 */
+	/* INSDC:SRA:tbl:stats inherited productions
+	 *  base_count
+	 *  spot_count
+	 *  max_spot_id
+	 *  min_spot_id
+	 *  in_stats_bin
+	 *  bio_base_count
+	 */
+	/* NCBI:tbl:n_encoding inherited productions
+	 *  read_unpack
+	 */
+	/* NCBI:SRA:_454_:common productions
+	 *  .SIGNAL
+	 *  .CLIP_ADAPTER_LEFT
+	 *  .CLIP_QUALITY_LEFT
+	 *  .CLIP_ADAPTER_RIGHT
+	 *  .CLIP_QUALITY_RIGHT
+	 *  out_flow_chars
+	 *  out_key_sequence
+	 *  out_linker_sequence
+	 */
+};
+/*--------------------------------------------------------------------------
+* NCBI:SRA:_454_:tbl:v2
+*  Roche 454 SRA Platform
+*
+* history:
+*  1.0.1 - explictly base upon sra #1.0.1 and related changes
+*  1.0.2 - respond to change to 454:common base table #1.0.2
+*/
+// encodings are declared to have their own version
+// so that they may be changed over time independently
+physical INSDC:coord:one NCBI:SRA:_454_:encoding:CLIP #2
+{
+decode { return ( INSDC:coord:one ) iunzip ( @ ); }
+encode { return izip ( @ ); }
+}
+physical NCBI:isamp1 NCBI:SRA:_454_:encoding:SIGNAL #2
+{
+decode { return ( NCBI:isamp1 ) iunzip ( @ ); }
+encode { return izip ( @ ); }
+}
+physical INSDC:position:one NCBI:SRA:_454_:encoding:POSITION #2
+{
+decode
+{
+I32 pos_1st_deriv = iunzip ( @ );
+return ( INSDC:position:one ) < I32 > integral ( pos_1st_deriv );
+}
+encode
+{
+I32 pos_1st_deriv = < I32 > deriv ( @ );
+return izip ( pos_1st_deriv );
+}
+}
+/* normalized v2 table
+*
+* history:
+*  1.0.6 - base upon updated ancestry
+*  1.0.7 - base upon updated ancestry
+*/
+table NCBI:SRA:_454_:tbl:v2 #1.0.7
+= NCBI:SRA:tbl:sra_nopos #2.1.3
+, NCBI:tbl:base_space #2.0.3
+, NCBI:tbl:phred_quality #2.0.3
+, NCBI:SRA:_454_:common #1.0.4
+{
+/* NAME tokenizing and coordinates
+*  most work happens in skeyname table
+*  we still obtain REGION from name
+*/
+readonly column INSDC:coord:val REGION = ( INSDC:coord:val )
+NCBI:SRA:extract_name_coord < NCBI:SRA:name_token:T > ( _out_name, out_spot_name_tok );
+NCBI:SRA:spot_name_token out_spot_name_tok
+= NCBI:SRA:_454_:tokenize_spot_name ( _out_name );
+NCBI:SRA:spot_name_token in_spot_name_tok
+= NCBI:SRA:_454_:tokenize_spot_name ( NAME );
+// special sequences
+INSDC:dna:text out_flow_chars
+= .FLOW_CHARS
+| < INSDC:dna:text > echo < 'TACG' > ( .SIGNAL )
+| < INSDC:dna:text > echo < 'TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG' > ();
+physical column < INSDC:dna:text > zip_encoding
+.FLOW_CHARS = in_flow_chars;
+INSDC:dna:text out_key_sequence
+= .KEY_SEQUENCE
+| < INSDC:dna:text > echo < 'TCAG' > ();
+physical column < INSDC:dna:text > zip_encoding
+.KEY_SEQUENCE = in_key_sequence;
+INSDC:dna:text out_linker_sequence = .LINKER_SEQUENCE;
+physical column < INSDC:dna:text > zip_encoding
+.LINKER_SEQUENCE = in_linker_sequence;
+// linker needs to be representable by its own table
+// either in metadata or somewhere else
+// position stored as normal 1-based coordinate
+INSDC:position:one out_position = .POSITION;
+physical column NCBI:SRA:_454_:encoding:POSITION #2
+.POSITION = POSITION;
+// clips
+physical column NCBI:SRA:_454_:encoding:CLIP #2
+.CLIP_ADAPTER_LEFT = CLIP_ADAPTER_LEFT;
+physical column NCBI:SRA:_454_:encoding:CLIP #2
+.CLIP_ADAPTER_RIGHT = CLIP_ADAPTER_RIGHT;
+physical column NCBI:SRA:_454_:encoding:CLIP #2
+.CLIP_QUALITY_LEFT = CLIP_QUALITY_LEFT;
+physical column NCBI:SRA:_454_:encoding:CLIP #2
+.CLIP_QUALITY_RIGHT = CLIP_QUALITY_RIGHT;
+// signal
+physical column NCBI:SRA:_454_:encoding:SIGNAL #2
+.SIGNAL = SIGNAL;
+};

Mercurial > repos > charles_s_test > seqsero2

comparison libs/sratoolkit.2.8.0-centos_linux64/schema/sra/454.vschema @ 3:38ad1130d077 draft