Mercurial > repos > jeanfred > sortmerna

--- a/merge_paired_reads.xml	Wed Apr 10 10:25:15 2013 -0400
+++ b/merge_paired_reads.xml	Tue Apr 30 11:17:15 2013 -0400
@@ -3,7 +3,7 @@
     <requirement type='package' version="1.7">sortmerna</requirement>
   </requirements>
   <description>Merges two fastq paired-reads files into one file.</description>
-  <command interpreter="bash">
+  <command>
 merge-paired-reads.sh $input_reads1 $input_reads2 output_file.txt
   </command>
   <inputs>
@@ -32,6 +32,7 @@
 Merges two fastq paired-reads files into one file.

 To run merge-paired-reads::
-  bash merge-paired-reads.sh file1.fastq file2.fastq outputfile.fastq
+
+    bash merge-paired-reads.sh file1.fastq file2.fastq outputfile.fastq
   </help>
 </tool>
\ No newline at end of file
--- a/sortmerna_wrapper.xml	Wed Apr 10 10:25:15 2013 -0400
+++ b/sortmerna_wrapper.xml	Tue Apr 30 11:17:15 2013 -0400
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
 <tool id="sortmerna_wrapper" version="1.0" name="Filter with SortMeRNA">
   <requirements>
     <requirement type='package' version="1.7">sortmerna</requirement>
@@ -5,11 +6,16 @@
   <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
   <command interpreter="python">
 sortmerna_wrapper.py
-
 --sortmerna "
-$read_family $input_reads
 $strand_search
-$sequencing_type
+#if str( $read_family.read_family_selector ) == 'other':
+        --I $input_reads -r $read_family.ratio_parameter
+#else:
+	$read_family.read_family_selector $input_reads
+#end if
+#if str( $sequencing_type.sequencing_type_selector ) == 'paired':
+    $sequencing_type.paired_type
+#end if

 #if $outputs_selected:
     #if 'accept' in $outputs_selected.value:
@@ -36,22 +42,42 @@
 #end if
   </command>
   <inputs>
-    <param name="read_family" type="select" format="text"
-	   help="Currently only Solexa Illumina and Roche 454 technologies are supported.
-		 The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput.">
-      <label>Sequencing technology of querying sequences (reads)</label>
-      <option value="--I">Illumina Solexa</option>
-      <option value="--454">Roche 454</option>
-    </param>
+    <conditional name="read_family">
+      <param name="read_family_selector" type="select" format="text"
+	   help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput.">
+	<label>Sequencing technology of querying sequences (reads)</label>
+	<option value="--I">Illumina Solexa</option>
+	<option value="--454">454 Roche</option>
+	<option value="other">Other</option>
+      </param>
+      <when value="other">
+	<param name="ratio_parameter" type="float" value="1"  min="0" max="1"
+	       label="Ratio parameter (the number of hits on the read / read length)"
+	       help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads.
+		     For other read types, if the sequencing technology produces high quality reads with a low substitution error rate
+		     (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27].
+		     If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent),
+		     then the ratio parameter can be set to r=[0.13,0.17]."/>
+      </when>
+    </conditional>
     <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/>

-    <param name="sequencing_type" type="select" label="Sequencing type" display="radio"
-	   help="The first option filters reads independently of each other
-		 (paired reads can be separated between accept and rejected files).">
-      <option value="">Reads are not paired</option>
-      <option value="--paired-in">Reads are paired: preserve order in output files</option>
-      <option value="--paired-out">Reads are paired: order does not matter in output files</option>
-    </param>
+    <conditional name="sequencing_type">
+      <param name="sequencing_type_selector" type="select" label="Sequencing type">
+	<option value="not_paired">Reads are not paired</option>
+	<option value="paired">Reads are paired</option>
+      </param>
+      <when value="paired">
+	<param name="paired_type" type="select" label="If one read of a pair is accepted and the other not, output both reads" display="radio"
+	       help="SortMeRNA does not use the pairing information for filtering RNA,
+		     however if one read of a pair is accepted and the other is not,
+		     the resulting output may break apart the pair into two separate files.
+		     The purpose of 'Reads are paired' option is to preserve the pairing of the reads.">
+	  <option value="--paired-in">to accepted file</option>
+	  <option value="--paired-out">to rejected file</option>
+	</param>
+      </when>
+    </conditional>

     <param name="strand_search" type="select" label="Which strands to search" display="radio">
       <option value="">Search both strands</option>
@@ -83,8 +109,7 @@
     </conditional>

     <!-- Outputs -->
-    <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options"
-	   help="SortMeRNA can output matching reads by database ; this will be made available in a future version of the wrapper too.">
+    <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options">
       <option value="accept" selected="True">Reads matching to at least one database</option>
       <option value="other">Reads not found in any database</option>
     </param>
@@ -131,15 +156,15 @@
   </stdio>
   <tests>
     <test>
-      <param name="read_family" value="I" />
+      <param name="read_family_selector" value="I" />
       <param name="input_reads" value="sortmerna_wrapper_in1.fastq" />
-      <param name="sequencing_type" value ="" />
+      <param name="sequencing_type_selector" value ="not_paired" />
       <param name="strand_search" value="" />
       <param name="databases_selector" value="cached" />
+      <param name="input_databases" value="rfam-5.8s,rfam-5s" />
       <param name="outputs_selected" value="accept,other" />
       <param name="log" value="" />
       <param name="options_type_selector" value="less" />
-      <param name="input_databases" value="rfam-5.8s,rfam-5s" />
       <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" />
       <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
     </test>
@@ -180,6 +205,8 @@

 The output will follow the same format (FASTA or FASTQ) as the reads.

+In the standalone version of SortMeRNA, the user may output the matching reads in a separate file per database (--bydbs option). This option will be made available in a future version of Galaxy.
+
 ------

 **rRNA databases**
@@ -230,6 +257,8 @@

 **SortMeRNA parameter list**

+The standalone, command-line version of SortMeRNA uses the following parameters.
+
 For indexing (buildtrie):

 This program builds a Burst trie on an input rRNA database file in fasta format
@@ -310,5 +339,16 @@

         --version     version number

+------
+
+**Bibliography**
+
+[1] Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO (2013) The SILVA ribosomal RNA gene database project: improved data processing and web-based tools, Nucleic Acids Research, 41 (D1): D590-D596.
+
+[2] Rfam 11.0: 10 years of RNA families. S.W. Burge, J. Daub, R. Eberhardt, J. Tate, L. Barquist, E.P. Nawrocki, S.R. Eddy, P.P. Gardner, A. Bateman. Nucleic Acids Research (2012),  doi: 10.1093/nar/gks1005
+
+[3] Edgar, R.C. (2010) Search and clustering orders of magnitude faster than BLAST, Bioinformatics 26(19), 2460-2461, doi: 10.1093/bioinformatics/btq461
+
+[4] Loman, N. J. and Misra, Raju V and Dallman, Timothy J and Constantinidou, Chrystala and Gharbia, Saheer E and Wain, John and Pallen, Mark J., Performance comparison of benchtop high-throughput sequencing platforms (2012), Nature Biotechnology, 30 (5). pp. 434-439
   </help>
 </tool>