changeset 0:cfb4fc1e820d draft

Revamp and renaming of previous tools. Many of tools are new versions. Adding ctat_metagenomics tool.
author trinity_ctat
date Thu, 12 Apr 2018 10:26:28 -0400
parents
children 1c8d5eb15ed1
files ctat_metagenomics.xml tool-data/ctat_centrifuge_indexes.loc.sample tool_data_table_conf.xml.sample
diffstat 3 files changed, 170 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_metagenomics.xml	Thu Apr 12 10:26:28 2018 -0400
@@ -0,0 +1,145 @@
+<tool id="ctat_metagenomics" name="ctat_metagenomics" version=1.0.0 profile="17.05">
+
+    <description>Centrifuge - Classifier for metagenomic sequences (RNA-Seq)</description>
+    <requirements>
+        <requirement type="package" version="1.0.1">ctat-metagenomics</requirement>
+    </requirements>
+    <command detect_errors="default">
+      <![CDATA[     
+      python metagenomics.py
+ 
+      --index "${index.fields.path}" 
+      --out_dir "centrifuge"
+
+      #if $format_type.format == "fasta" 
+          --format fasta --unpaired_reads $format_type.fasta_file
+      #end if
+
+      #if $format_type.format == "fastq"
+          --format fastq
+          #if $format_type.read_type.type == "single"
+              --read_type "single" --unpaired_reads $format_type.read_type.left_fq_single
+          #end if
+
+          #if $format_type.read_type.type == "paired"
+               --read_type "paired" --left_fq $format_type.read_type.left_fq --right_fq $format_type.read_type.right_fq  
+          #end if 
+      #end if
+
+      --threads 4 
+      ]]>
+    </command>
+    <stdio>
+        <exit_code range="1:"  level="fatal"   description="Error running centrifuge" />
+    </stdio>
+
+    <inputs>
+
+        <conditional name="format_type">
+            <param name= "format" type="select" label="Choose input format" help="Choose fasta for Trinity assembled reads">
+                <option value="fasta" selected="true">FASTA</option>
+                <option value="fastq" selected="false">FASTQ</option>
+            </param>
+            <when value="fasta">
+                <param name="fasta_file" type="data" format="fasta" label="Fasta file:" help="Trinity assembled reads in fasta format"/>
+            </when>
+            <when value="fastq">
+                <conditional name="read_type">
+                    <param name= "type" type="select" label="Choose read type" help="Choose read type">
+                        <option value="single" selected="true">SINGLE END DATA</option>
+                        <option value="paired" selected="false">PAIRED END DATA</option>
+                    </param>
+                    <when value="single">
+                        <param name="left_fq_single" type="data" format="fastq" label="Left_fq:" help="Left fastq"/>
+                    </when>
+                    <when value="paired">
+                        <param name="left_fq" type="data" format="fastq" label="Left_fq:" help="Left fastq"/>
+                        <param name="right_fq" type="data" format="fastq" label="Right_fq:" help="Right fastq"/>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+
+        <param name="index" type="select" label="Choose reference genome index :" help="Select genome index">
+            <options from_data_table="ctat_centrifuge_indexes">
+        </param>
+      
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="classification_results" label="Centrifuge classification output" from_work_dir="centrifuge/classification.results.txt"/>
+        <data format="txt" name="classification_report" label="Centrifuge summary output" from_work_dir="centrifuge/classification.report.txt"/>
+        <data format="txt" name="kraken_style_report" label="Kraken-style report" from_work_dir="centrifuge/kraken_style_report.txt"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <!-- The following test uses one file that is unpaired reads.
+            -->
+            <param name="format" value="fastq" />
+            <param name="type" value="single" />
+            <param name="left_fq_single" value="centrifuge/SRR2219890_1.adj.fastq" />
+            <param name="index" value="/N/dc2/projects/galaxyshared/trinity/dev/Trinity_CTAT/metagenomics/phv" />
+            <output name="classification_results" >
+                <assert_contents>
+                    <has_line_matching expression=".+" />
+                    <has_line line="readID&#009;seqID&#009;taxID&#009;score&#009;2ndBestScore&#009;hitLength&#009;queryLength&#009;numMatches" />
+                </assert_contents>
+            </output>
+            <output name="classification_report" file="centrifuge/SRR2219890_1.classification.report.txt" />
+            <output name="kraken_style_report" file="centrifuge/SRR2219890_1.kraken_style_report.txt" />
+        </test>
+        <test>
+            <!-- The following test uses two files that are left/right paired reads.
+            -->
+            <param name="format" value="fastq" />
+            <param name="type" value="paired" />
+            <param name="left_fq" value="centrifuge/SRR073747_1.fastq" />
+            <param name="right_fq" value="centrifuge/SRR073747_2.fastq" />
+            <!-- FIX - How are we going to set the index value when we don't know what is in the table? How do we find it for testing?
+            <param name="index.fields.path" value="/N/dc2/projects/galaxyshared/trinity/dev/Trinity_CTAT/metagenomics/phv" />
+            -->
+            <output name="classification_results" >
+                <assert_contents>
+                    <has_line_matching expression=".+" />
+                    <has_line line="readID&#009;seqID&#009;taxID&#009;score&#009;2ndBestScore&#009;hitLength&#009;queryLength&#009;numMatches" />
+                </assert_contents>
+            </output>
+            <output name="classification_report" file="centrifuge/SRR073747_1_2.classification.report.txt" />
+            <output name="kraken_style_report" file="centrifuge/SRR073747_1_2.kraken_style_report.txt" />
+        </test>
+        <test>
+            <!-- The following test uses an unpaired Trinity fasta file.
+            -->
+            <param name="format_type.format" value="fasta" />
+            <param name="fasta_file" value="centrifuge/TrinitySRR2219890.fasta" />
+            <!-- FIX - How are we going to set the index value when we don't know what is in the table? How do we find it for testing?
+            <param name="index" value="/N/dc2/projects/galaxyshared/trinity/dev/Trinity_CTAT/metagenomics/phv" />
+            -->
+            <output name="classification_results" >
+                <assert_contents>
+                    <has_line_matching expression=".+" />
+                    <has_line line="readID&#009;seqID&#009;taxID&#009;score&#009;2ndBestScore&#009;hitLength&#009;queryLength&#009;numMatches" />
+                </assert_contents>
+            </output>
+            <output name="classification_report" file="centrifuge/TrinitySRR2219890.classification.report.txt" />>
+            <output name="kraken_style_report" file="centrifuge/TrinitySRR2219890.kraken_style_report.txt" />
+        </test>
+    </tests>
+
+    <help>
+.. class:: infomark
+ 
+**Centrifuge is a novel microbial classification engine that enables rapid, accurate, and sensitive labeling of reads and quantification of species**
+
+For more information:
+
+https://ccb.jhu.edu/software/centrifuge/manual.shtml#what-is-centrifuge
+
+The publication documenting Centrifuge can be found here:
+
+http://genome.cshlp.org/content/early/2016/11/16/gr.210641.116.abstract
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ctat_centrifuge_indexes.loc.sample	Thu Apr 12 10:26:28 2018 -0400
@@ -0,0 +1,15 @@
+# This file lists the locations of CTAT Centrifuge Indexes
+# Usually there will only be one index, but it is concievable 
+# that there could be multiple indexes.
+# This file format is as follows
+# (white space characters are TAB characters):
+#
+#<value>    <name>  <path>
+# value is a unique id
+# name is the display name
+# path is the directory where the index files are stored
+#
+#ctat_centrifuge_indexes.loc could look like:
+#
+#p_compressed+h+v CTAT_CentrifugeIndex_p_compressed+h+v  /ctat/centrifuge/index/path/p_compressed+h+v
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Apr 12 10:26:28 2018 -0400
@@ -0,0 +1,10 @@
+<tables>
+    <table name="ctat_genome_ref_libs" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_genome_ref_libs.loc" />
+    </table>
+    <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_centrifuge_indexes.loc" />
+    </table>
+</tables>