diff libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/clip.vschema @ 3:38ad1130d077 draft

planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author charles_s_test
date Mon, 27 Nov 2017 11:21:07 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libs/sratoolkit.2.8.0-centos_linux64/schema/ncbi/clip.vschema	Mon Nov 27 11:21:07 2017 -0500
@@ -0,0 +1,147 @@
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+/*==========================================================================
+ * NCBI Sequence Read Archive schema
+ */
+version 1;
+
+include 'ncbi/sra.vschema';
+include 'ncbi/spotname.vschema';
+
+
+/*--------------------------------------------------------------------------
+ * NCBI:SRA:tbl:clip
+ *  common clip column processing
+ *  shared by 454 and ion-torrent
+ *
+ *  uses spotdesc because it has a dependency upon spot_len
+ *
+ * history:
+ *  1.0.1 - base explicitly upon spotdesc #1.0.1
+ *  1.0.2 - base explicitly upon spotdesc #1.0.2
+ */
+table NCBI:SRA:tbl:clip #1.0.2 = INSDC:SRA:tbl:spotdesc #1.0.2
+{
+    /* CLIP_ADAPTER_LEFT, CLIP_ADAPTER_RIGHT
+     *  adapter clips in 1-based coordinates
+     *  when value is 0, implies that they are NOT SET
+     */
+    column INSDC:coord:one CLIP_ADAPTER_LEFT = out_clip_adapt_left;
+    column INSDC:coord:one CLIP_ADAPTER_RIGHT = out_clip_adapt_right;
+
+    // casts are required to allow multple storage formats
+    INSDC:coord:one out_clip_adapt_left = cast ( .CLIP_ADAPTER_LEFT );
+    INSDC:coord:one out_clip_adapt_right = cast ( .CLIP_ADAPTER_RIGHT );
+
+
+    /* CLIP_QUALITY_LEFT, CLIP_QUALITY_RIGHT
+     *  quality clips in 1-based coordinates
+     *  when value is 0, implies that they are NOT SET
+     */
+    column INSDC:coord:one CLIP_QUALITY_LEFT = out_clip_qual_left;
+    column INSDC:coord:one CLIP_QUALITY_RIGHT = out_clip_qual_right;
+
+    // casts are required to allow multple storage formats
+    INSDC:coord:one out_clip_qual_left
+        = cast ( .CLIP_QUALITY_LEFT )
+        | < INSDC:coord:one > echo < 1 > ();
+    INSDC:coord:one out_clip_qual_right
+        = cast ( .CLIP_QUALITY_RIGHT )
+        | cast ( spot_len );
+
+    // support for reading 16-bit clips, as in v1 schema
+    readonly column U16 CLIP_ADAPTER_LEFT
+        = .CLIP_ADAPTER_LEFT
+        | cast ( out_clip_adapt_left );
+    readonly column U16 CLIP_ADAPTER_RIGHT
+        = .CLIP_ADAPTER_RIGHT
+        | cast ( out_clip_adapt_right );
+    readonly column U16 CLIP_QUALITY_LEFT = cast ( out_clip_qual_left );
+    readonly column U16 CLIP_QUALITY_RIGHT = cast ( out_clip_qual_right );
+
+
+    /* CLIP-MANIA
+     */
+
+    // 1-based fully-closed right edge is row-length
+    INSDC:coord:one spot_right = ( INSDC:coord:one ) spot_len;
+
+    // processed 1-based coordinates >= 1
+    INSDC:coord:one lim_clip_adapt_left
+        = < INSDC:coord:one > clip < 1, 0x7FFFFFFF > ( out_clip_adapt_left );
+    INSDC:coord:one max_clip_adapt_right
+        = < INSDC:coord:one, INSDC:coord:one > map < 0, 0x7FFFFFFF > ( out_clip_adapt_right );
+    INSDC:coord:one lim_clip_adapt_right
+        = < INSDC:coord:one > min ( spot_right, max_clip_adapt_right );
+    INSDC:coord:one lim_clip_qual_left
+        = < INSDC:coord:one > clip < 1, 0x7FFFFFFF > ( out_clip_qual_left );
+    INSDC:coord:one max_clip_qual_right
+        = < INSDC:coord:one, INSDC:coord:one > map < 0, 0x7FFFFFFF > ( out_clip_qual_right );
+    INSDC:coord:one lim_clip_qual_right
+        = < INSDC:coord:one > min ( spot_right, max_clip_qual_right );
+
+    // read-only columns with 0-based coordinates
+    readonly column INSDC:coord:zero CLIP_ADAPTER_LEFT
+        = ( INSDC:coord:zero ) < I32 > diff < 1 > ( lim_clip_adapt_left );
+    readonly column INSDC:coord:zero CLIP_ADAPTER_RIGHT
+        = ( INSDC:coord:zero ) < I32 > diff < 1 > ( lim_clip_adapt_right );
+    readonly column INSDC:coord:zero CLIP_QUALITY_LEFT
+        = ( INSDC:coord:zero ) < I32 > diff < 1 > ( lim_clip_qual_left );
+    readonly column INSDC:coord:zero CLIP_QUALITY_RIGHT
+        = ( INSDC:coord:zero ) < I32 > diff < 1 > ( lim_clip_qual_right );
+
+    // combined clips
+    INSDC:coord:one max_clip_left
+        = < INSDC:coord:one > max ( lim_clip_adapt_left, lim_clip_qual_left )
+        | lim_clip_adapt_left
+        | lim_clip_qual_left;
+    INSDC:coord:zero max_zclip_left
+        = ( INSDC:coord:zero ) < I32 > diff < 1 > ( max_clip_left );
+    INSDC:coord:one min_clip_right
+        = < INSDC:coord:one > min ( lim_clip_adapt_right, lim_clip_qual_right )
+        | lim_clip_adapt_right
+        | lim_clip_qual_right;
+
+
+    /* TRIMMED SEQUENCE
+     *  need to find the 0-based trim_start and trim_len
+     */
+    INSDC:coord:zero bio_start
+        = NCBI:SRA:bio_start ( out_read_start, out_read_type );
+    INSDC:coord:zero trim_start
+        = < INSDC:coord:zero > max ( bio_start, max_zclip_left )
+        | bio_start;
+
+    INSDC:coord:zero bio_end
+        = NCBI:SRA:bio_end < false > ( out_read_start, out_read_type, out_read_len );
+
+    I32 trim_stop
+        = < I32 > max ( min_clip_right, trim_start )
+        | spot_right;
+    INSDC:coord:len trim_len
+        = ( INSDC:coord:len ) < I32 > diff ( trim_stop, trim_start );
+};