changeset 0:75064adad442 draft default tip

Uploaded
author takadonet
date Thu, 16 Jan 2014 10:57:12 -0500
parents
children
files bgzip.xml tabix.sh tabix.xml tool_dependencies.xml
diffstat 4 files changed, 178 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bgzip.xml	Thu Jan 16 10:57:12 2014 -0500
@@ -0,0 +1,50 @@
+<tool id="bgzip" name="bgzip" version="0.0.2">
+    <description>Block compression/decompression utility. Required for use of tabix.</description>
+    <requirements>
+        <requirement type="package" version="0.2.6">tabix</requirement>
+    </requirements>
+    <command>
+        bgzip 
+
+        #if str($virtualOffset) != ""
+            -b $virtualOffset
+        #end if
+
+        #if str($size) != ""
+            -s $size
+        #end if
+
+        $input
+        -c > $output
+    </command>
+    <inputs>
+        <param name="input" type="data" label="Input file" />
+        <param name="virtualOffset" type="text" optional="yes" label="Virtual Offset" />
+        <param name="size" type="text" optional="yes" label="Size" />
+    </inputs>
+
+    <outputs>
+        <data format= "gz" name="output"/>
+    </outputs>
+
+    <help>
+**What it does:** 
+
+The input data file is sorted and compressed by bgzip which has a gzip(1) like interface. Tabix requires the file to be compressed using this tool first.
+
+**Citation:**
+
+Tabix was written by Heng Li. The BGZF library was originally implemented by Bob Handsaker and modified by Heng Li for remote file access and in-memory caching.
+
+http://samtools.sourceforge.net/tabix.shtml
+
+**Example:**
+
+(grep ^"#" in.gff; grep -v ^"#" in.gff | sort -k1,1 -k4,4n) | bgzip > sorted.gff.gz;
+
+tabix -p gff sorted.gff.gz;
+
+tabix sorted.gff.gz chr1:10,000,000-20,000,000;
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tabix.sh	Thu Jan 16 10:57:12 2014 -0500
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+
+output=$1 
+shift
+#get working directory so we can find the output files 
+CUR_DIR=`pwd`
+
+#run tabix
+$CUR_DIR/tabix.sh $@ > $output
+
+exit 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tabix.xml	Thu Jan 16 10:57:12 2014 -0500
@@ -0,0 +1,89 @@
+<tool id="tabix" name="tabix" version="0.0.2">
+    <description>Generic indexer for TAB-delimited genome position files.</description>
+    <requirements>
+        <requirement type="package" version= "0.2.6">tabix</requirement>
+    </requirements>
+    <command>
+        tabix.sh $output
+    #if str($optional.extension) == "tabular"
+	#if str($optional.position) == "no"
+            -0
+        #end if
+
+        #if str($optional.columnseq) != ""
+            -s $optional.columnseq
+        #end if
+
+        #if str($optional.columnstart) != ""
+            -b $optional.columnstart 
+        #end if
+
+        #if str($optional.columnend) != ""
+            -e $optional.columnend
+        #end if
+
+        #if str($optional.skiplines) != ""
+            -S $optional.skiplines
+        #end if
+
+        #if str($optional.skipchar) != ""
+            -c $optional.skipchar
+        #end if
+    #else
+        -p
+    #end if
+
+    $input $region
+    
+    </command>
+    <inputs>
+        <param name="input" type="data" label="Input file" format="gff,bed,sam,vcf,tabular">
+             </param>
+        <param name="region" type="text" optional="true" label="Regions (seperate with spaces)"> </param>
+        <conditional name="optional">
+            <param name= "extension" type="select" value= "tabular" label= "Input Extension"> 
+                <option value="tabular">tabular</option>
+                <option value="gff">gff</option>
+                <option value="bed">bed</option>
+                <option value="sam">sam</option>
+                <option value="vcf">vcf</option>
+            </param>       
+            <when value="tabular">
+                     <param name="columnseq" type="integer" optional= "true" label="Column of sequence name" />
+                     <param name="columnstart" type="integer" optional= "true" label="Column of start chromosomal position" />
+                     <param name="columnend" type="integer" optional= "true" label="Column of end chromosomal position" />
+                     <param name="skiplines" type="integer" optional= "true" label="Skip first INT lines" />
+                     <param name="skipchar" type="text" optional= "true" label="Skip lines started with CHAR" />
+                     <param name="position" type= "select" value= "yes" label="1-based? (if not, 0-based)">
+                        <option value= "yes">yes</option>
+                        <option value= "no">no</option>
+                    </param>
+                </when>   
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="output" />
+    </outputs>
+
+    <help>
+**What it does:** 
+
+Tabix indexes a TAB-delimited genome position file in.tab.bgz and creates an index file in.tab.bgz.tbi when region is absent from the command-line. The input data file must be position sorted and compressed by bgzip which has a gzip(1) like interface. After indexing, tabix is able to quickly retrieve data lines overlapping regions specified in the format "chr:beginPos-endPos". Fast data retrieval also works over network if URI is given as a file name and in this case the index file will be downloaded if it is not present locally.
+
+**Citation:**
+
+Tabix was written by Heng Li. The BGZF library was originally implemented by Bob Handsaker and modified by Heng Li for remote file access and in-memory caching.
+
+http://samtools.sourceforge.net/tabix.shtml
+
+**Example:**
+
+(grep ^"#" in.gff; grep -v ^"#" in.gff | sort -k1,1 -k4,4n) | bgzip > sorted.gff.gz;
+
+tabix -p gff sorted.gff.gz;
+
+tabix sorted.gff.gz chr1:10,000,000-20,000,000;
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jan 16 10:57:12 2014 -0500
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="tabix" version="0.2.6">
+        <install version="1.0">
+            <actions>
+            	<action type="download_by_url">http://sourceforge.net/projects/samtools/files/tabix/tabix-0.2.6.tar.bz2</action>
+                <action type="shell_command">make</action>
+                <action type="move_file">
+                    <source>tabix</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>bgzip</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        	Tabix version 0.2.6. Tabix was written by Heng Li. The BGZF library was originally implemented by Bob Handsaker and modified by Heng Li for remote file access and in-memory caching.
+        </readme>
+    </package>
+    
+    
+</tool_dependency>