changeset 2:5048c53a90f4 draft

Uploaded
author jeanfred
date Wed, 10 Apr 2013 10:25:15 -0400
parents 2acc5190be4c
children 9809be9b7567
files merge_paired_reads.xml sortmerna_wrapper.xml test-data/merged-paired-reads_output.fastq tool-data/rRNA_databases.loc.sample tool_dependencies.xml
diffstat 5 files changed, 378 insertions(+), 118 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_paired_reads.xml	Wed Apr 10 10:25:15 2013 -0400
@@ -0,0 +1,37 @@
+<tool id="merged_paired_reads_wrapper" version="1.0" name="Merge paired reads">
+  <requirements>
+    <requirement type='package' version="1.7">sortmerna</requirement>
+  </requirements>
+  <description>Merges two fastq paired-reads files into one file.</description>
+  <command interpreter="bash">
+merge-paired-reads.sh $input_reads1 $input_reads2 output_file.txt
+  </command>
+  <inputs>
+    <param format="fastq" name="input_reads1" type="data"
+	   label="First reads file (fastq)" help=""/>
+    <param format="fastq" name="input_reads2" type="data"
+	   label="Second reads file (fastq)" help=""/>
+  </inputs>
+  <outputs>
+    <data format="fastq" name="output" from_work_dir="output_file.txt"
+	  label="Merged reads from ${on_string} (fastq)">
+    </data>
+  </outputs>
+  <stdio>
+    <exit_code range="2" level="fatal"
+	       description="Too few or two many arguments provided" />
+  </stdio>
+  <tests>
+    <test>
+      <param name="input_reads1" value="sortmerna_wrapper_accept1.fastq" />
+      <param name="input_reads2" value="sortmerna_wrapper_other1.fastq" />
+      <output name="output" file="merged-paired-reads_output.fastq" />
+    </test>
+  </tests>
+  <help>
+Merges two fastq paired-reads files into one file.
+
+To run merge-paired-reads::
+  bash merge-paired-reads.sh file1.fastq file2.fastq outputfile.fastq
+  </help>
+</tool>
\ No newline at end of file
--- a/sortmerna_wrapper.xml	Fri Apr 05 13:07:39 2013 -0400
+++ b/sortmerna_wrapper.xml	Wed Apr 10 10:25:15 2013 -0400
@@ -1,7 +1,9 @@
-<tool id="sortmerna_wrapper" version="1.0" name="SortMeRNA">
-	<requirements><requirement type='package' version="1.7">sortmerna</requirement></requirements>
-	<description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
-	<command interpreter="python">
+<tool id="sortmerna_wrapper" version="1.0" name="Filter with SortMeRNA">
+  <requirements>
+    <requirement type='package' version="1.7">sortmerna</requirement>
+  </requirements>
+  <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
+  <command interpreter="python">
 sortmerna_wrapper.py
 
 --sortmerna "
@@ -32,114 +34,281 @@
     ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y]
                for y in  $databases_type.input_databases.value])}
 #end if
-</command>
-	<inputs>
-		<param name="read_family" type="select" format="text"
-		       help="Currently only Solexa Illumina and Roche 454 technologies are supported.
-			     The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput.">
-			<label>Sequencing technology of querying sequences (reads)</label>
-			<option value="--I">Illumina Solexa</option>
-			<option value="--454">Roche 454</option>
-		</param>
-		<param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/>
+  </command>
+  <inputs>
+    <param name="read_family" type="select" format="text"
+	   help="Currently only Solexa Illumina and Roche 454 technologies are supported.
+		 The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput.">
+      <label>Sequencing technology of querying sequences (reads)</label>
+      <option value="--I">Illumina Solexa</option>
+      <option value="--454">Roche 454</option>
+    </param>
+    <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/>
+
+    <param name="sequencing_type" type="select" label="Sequencing type" display="radio"
+	   help="The first option filters reads independently of each other
+		 (paired reads can be separated between accept and rejected files).">
+      <option value="">Reads are not paired</option>
+      <option value="--paired-in">Reads are paired: preserve order in output files</option>
+      <option value="--paired-out">Reads are paired: order does not matter in output files</option>
+    </param>
+
+    <param name="strand_search" type="select" label="Which strands to search" display="radio">
+      <option value="">Search both strands</option>
+      <option value="-F">Search only the forward strand</option>
+      <option value="-R">Search only the reverse-complementary strand</option>
+    </param>
 
-		<param name="sequencing_type" type="select" label="Sequencing type" display="radio"
-		       help="Unpaired mode filters reads independently of each other
-			     (paired reads can be separated between accept and rejected files).">
-		  <option value="">Reads are not paired</option>
-		  <option value="--paired-in">paired-end</option>
-		  <option value="--paired-out">unpaired-end</option>
-		</param>
+    <conditional name="databases_type">
+      <param name="databases_selector" type="select" label="Databases to query"
+	     help="Public rRNA databases provided with SortMeRNA have been indexed.
+		   On the contrary, personal databases must be indexed each time SortMeRNA is launched.
+		   Please be patient, this may take some time depending on the size of the given database.">
+	<option value="cached" selected="true">Public ribosomal databases</option>
+	<option value="history">Databases from your history</option>
+      </param>
+      <when value="cached">
+	<param name="input_databases" label="rRNA database"
+	       type="select" display="checkboxes" multiple="true">
+	  <options from_data_table="rRNA_databases" />
+	  <validator type="no_options" message="Select at least one database"/>
+	</param>
+      </when>
+      <when value="history">
+	<repeat name="input_databases" title="Database" min="1">
+	  <param name="database_name" type="data" format="fasta" label="rRNA database"
+		 help="Your database will be indexed first, which may take up to several minutes."/>
+	</repeat>
+      </when>
+    </conditional>
 
-		<param name="strand_search" type="select" label="Which strands to search" display="radio">
-		  <option value="">Search both strands</option>
-		  <option value="-F">Search only the forward strand</option>
-		  <option value="-R">Search only the reverse-complementary strand</option>
-		</param>
+    <!-- Outputs -->
+    <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options"
+	   help="SortMeRNA can output matching reads by database ; this will be made available in a future version of the wrapper too.">
+      <option value="accept" selected="True">Reads matching to at least one database</option>
+      <option value="other">Reads not found in any database</option>
+    </param>
+    <param  name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file"
+	    help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution.">
+    </param>
 
-		<conditional name="databases_type">
-			<param name="databases_selector" type="select" label="Databases to query">
-				<option value="cached" selected="true">Public ribosomal databases</option>
-				<option value="history">Databases from your history</option>
-			</param>
-			<when value="cached">
-			  <param name="input_databases" label="rRNA database"
-				 type="select" display="checkboxes" multiple="true">
-			    <options from_data_table="rRNA_databases" />
-			    <validator type="no_options" message="Select at least one database"/>
-			  </param>
-			</when>
-			<when value="history">
-				<repeat name="input_databases" title="Database" min="1">
-					<param name="database_name" type="data" format="fasta" label="rRNA database"
-					       help="Your database will be indexed first, which may take up to several minutes."/>
-				</repeat>
-			</when>
-		</conditional>
+    <!-- Advanced options -->
+    <conditional name="options">
+      <param name="options_type_selector" type="select" label="Advanced Options">
+	<option value="less" selected="True">Less options</option>
+	<option value="more">More options</option>
+      </param>
+      <when value="less">
+	<!-- no options -->
+      </when>
+      <when value="more">
+	<param name="number_of_threads" type="integer" label="Number of threads to use" value="1" min="1"/>
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="input" format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat"
+	  label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})">
+      <filter>outputs_selected and 'accept' in outputs_selected</filter>
+    </data>
+    <data format="input" format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
+	  label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})">
+      <filter>outputs_selected and 'other' in outputs_selected</filter>
+    </data>
+    <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log">
+      <filter>log</filter>
+    </data>
+  </outputs>
+  <stdio>
+    <regex match="This program builds a Burst trie on an input rRNA database"
+	   source="both"
+	   level="fatal"
+	   description="Buildtrie program failed to execute." />
+    <regex match="The database name"
+	   source="both"
+	   level="fatal"
+	   description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." />
+  </stdio>
+  <tests>
+    <test>
+      <param name="read_family" value="I" />
+      <param name="input_reads" value="sortmerna_wrapper_in1.fastq" />
+      <param name="sequencing_type" value ="" />
+      <param name="strand_search" value="" />
+      <param name="databases_selector" value="cached" />
+      <param name="outputs_selected" value="accept,other" />
+      <param name="log" value="" />
+      <param name="options_type_selector" value="less" />
+      <param name="input_databases" value="rfam-5.8s,rfam-5s" />
+      <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" />
+      <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
+    </test>
+  </tests>
+  <help>
+**Overview**
 
-		<!-- Outputs -->
-		<param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options">
-			<option value="accept" selected="True">Reads matching at least one database</option>
-			<option value="other">Reads not found in any database</option>
-		</param>
-		<param  name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file"
-			help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution.">
-		</param>
+SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments
+from metatransriptomic data produced by next-generation sequencers.
+It is capable of handling large RNA databases and sorting out all fragments
+matching to the database with high accuracy and specificity.
+
+.. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/
+
+If you use this tool, please cite Kopylova E., Noé L. and Touzet H.,
+`"SortMeRNA: Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data"`__,
+Bioinformatics (2012), doi: 10.1093/bioinformatics/bts611.
+
+.. __: http://bioinformatics.oxfordjournals.org/content/28/24/3211
+
+------
+
+**Input**
+
+The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against.
+If the user has two foward-reverse paired-sequencing reads files, they may use
+the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order.
+
+If the sequencing type for the reads is paired-ended, the user has two options under
+"Sequencing type" to filter the reads and preserve their order in the file.
+For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_.
 
-		<!-- Advanced options -->
-		<conditional name="options">
-			<param name="options_type_selector" type="select" label="Advanced Options">
-				<option value="less" selected="True">Less options</option>
-				<option value="more">More options</option>
-			</param>
-			<when value="less">
-			  <!-- no options -->
-			</when>
-			<when value="more">
-				<param name="number_of_threads" type="integer" label="Number of threads to use" value="1" min="1"/>
-		   	</when>
-		</conditional>
-	  </inputs>
-	<outputs>
-		<data format="input" format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat"
-		      label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})">
-		    <filter>outputs_selected and 'accept' in outputs_selected</filter>
-		</data>
-		<data format="input" format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
-		      label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})">
-		    <filter>outputs_selected and 'other' in outputs_selected</filter>
-		</data>
-		<data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log">
-		    <filter>log</filter>
-		</data>
-	</outputs>
-	<stdio>
-		<regex match="This program builds a Burst trie on an input rRNA database" 
-		       source="both" 
-		       level="fatal" 
-		       description="Buildtrie program failed to execute." />
-		<regex match="The database name" 
-		       source="both" 
-		       level="fatal" 
-		       description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." />
-	</stdio>
-	<tests>
-	  <test>
-	    <param name="read_family" value="I" />
-	    <param name="input_reads" value="sortmerna_wrapper_in1.fastq" />
-	    <param name="sequencing_type" value ="" />
-	    <param name="strand_search" value="" />
-	    <param name="databases_selector" value="cached" />
-	    <param name="outputs_selected" value="accept,other" />
-	    <param name="log" value="" />
-	    <param name="options_type_selector" value="less" />
-	    <param name="input_databases" value="rfam-5.8s,rfam-5s" />
-	    <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" />
-	    <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
-	  </test>
-	</tests>
-	<help>
-If you use this tool, please cite Kopylova E., Noé L. and Touzet H., `"SortMeRNA: Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data" &lt;http://bioinformatics.oxfordjournals.org/content/28/24/3211&gt;`_, Bioinformatics (2012), doi: 10.1093/bioinformatics/bts611.
-	</help>
+.. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf
+
+------
+
+**Output**
+
+The output will follow the same format (FASTA or FASTQ) as the reads.
+
+------
+
+**rRNA databases**
+
+SortMeRNA is distributed with 8 representative rRNA databases, which were
+all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S
+(version 11.0) databases using the tool UCLUST.
+
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| Representative database  | id % | avergage id% | # seq | Origin                 |  # seq | filtered to remove |
++==========================+======+==============+=======+========================+========+====================+
+| SILVA 16S bacteria       |   85 |         91.6 |  8174 | SILVA SSU Ref NR v.111 | 244077 | 23s                |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 16S archaea        |   95 |         96.7 |  3845 | SILVA SSU Ref NR v.111 |  10919 | 23s                |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 18S eukarya        |   95 |         96.7 |  4512 | SILVA SSU Ref NR v.111 |  31862 | 26s,28s,23s        |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+|                                                                                                               |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 23S bacteria       |   98 |         99.4 |  3055 | SILVA LSU Ref v.111    |  19580 | 16s,26s,28s        |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 23s archaea        |   98 |         99.5 |   164 | SILVA LSU Ref v.111    |    405 | 16s,26s,28s        |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| SILVA 28S eukarya        |   98 |         99.1 |  4578 | SILVA LSU Ref v.111    |   9321 | 18s                |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+|                                                                                                               |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| Rfam 5S archaea/bacteria |   98 |         99.2 | 59513 | RFAM                   | 116760 |                    |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+| Rfam 5.8S eukarya        |   98 |         98.9 | 13034 | RFAM                   | 225185 |                    |
++--------------------------+------+--------------+-------+------------------------+--------+--------------------+
+
+
+id % :
+    members of the cluster must have identity at least 'id %' identity with the representative sequence
+
+average id % :
+    average identity of a cluster member to the representative sequence
+
+The user may also choose to  use their own rRNA databases.
+
+.. class:: warningmark
+
+Note that your personal databases are indexed each time, and that
+this may take some time depending on the size of the given database.
+
+------
+
+**SortMeRNA parameter list**
+
+For indexing (buildtrie):
+
+This program builds a Burst trie on an input rRNA database file in fasta format
+and stores the material in binary files under the folder '/automata'::
+
+    ./buildtrie --db [path to rrnas database file name {.fasta}]  {OPTIONS}
+
+The list of OPTIONS can be left blank, the default values will be used::
+
+    -L  length of the sliding window (the seed)
+        (default: 18)
 
+    -F  search only the forward strand
+    -R  search only the reverse-complementary strand
+        (default: both strands are searched)
+
+    -h  help
+
+
+
+
+For sorting (sortmerna):
+
+To run SortMeRNA, type in any order after 'sortmerna'::
+
+        --I      [illumina reads file name {fasta/fastq}]
+
+        --454    [roche 454 reads file name {fasta/fastq}]
+
+        -n       number of databases to use (must precede --db)
+
+        --db     [rrnas database name(s)]
+
+                 One database,
+                 ex 1. -n 1 --db /path1/database1.fasta
+
+                 Multiple databases,
+                 ex 2. -n 2 --db /path2/database2.fasta /path3/database3.fasta
+
+        {OPTIONS}
+
+The list of OPTIONS can be left blank, the default values will be used::
+
+        --accept      [accepted reads file name]
+        --other       [rejected reads file name]
+                      (default: no output file is created)
+
+        --bydbs       output the accepted reads by database
+                      (default: concatenated file of reads)
+
+        --log         [overall statistics file name]
+                      (default: no statistics file created)
+
+        --paired-in   put both paired-end reads into --accept file
+        --paired-out  put both paired-end reads into --other file
+                      (default: if one read is accepted and the other is not,
+                      separate the reads into --accept and --other files)
+
+        -r            ratio of the number of hits on the read / read length
+                      (default Illumina: 0.25, Roche 454: 0.15)
+
+        -F            search only the forward strand
+        -R            search only the reverse-complementary strand
+                      (default: both strands are searched)
+
+        -a            number of threads to use
+                      (default: 1)
+
+        -m            (m x 4096 bytes) for loading the reads into memory
+                      ex. '-m 4' means 4*4096 = 16384 bytes will be allocated for the reads
+                      note: maximum -m is 1020039
+                      (default: m = 262144 = 1GB)
+
+        -v            verbose
+                      (default: deactivated)
+
+        -h            help
+
+        --version     version number
+
+  </help>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged-paired-reads_output.fastq	Wed Apr 10 10:25:15 2013 -0400
@@ -0,0 +1,50 @@
+@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
+CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC
++PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
+___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__
+@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
+CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC
++PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
+__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa
+@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
+GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT
++PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
+bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T
+@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
+ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG
++PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
+bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__
+@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
+GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT
++PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
+___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_
+@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
+TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC
++PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
+_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b
+@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
+CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC
++PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
+bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b
+@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
+GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA
++PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
+bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb
+@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
+AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC
++PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
+bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb
+@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
+GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC
++PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
+baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged
+@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
+GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT
++PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
+bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q
+
+@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
+GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG
++PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
+Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b`
+
--- a/tool-data/rRNA_databases.loc.sample	Fri Apr 05 13:07:39 2013 -0400
+++ b/tool-data/rRNA_databases.loc.sample	Wed Apr 10 10:25:15 2013 -0400
@@ -10,17 +10,17 @@
 #So, for example, if your database is rfam-5.8s and the path to your base name
 #is /data/rRNA_databases/rfam-5.8s, then the rRNA_databases.loc entry would look like this:
 #
-#rfam-5.8s          Database Rfam 5.8s            /data/rRNA_databases/rfam-5.8s
+#rfam-5.8s          Rfam 5.8S eukarya            /data/rRNA_databases/rfam-5.8s
 #
 #Since SortMeRNA comes bundled with eight ribosomal databases, which are ready
 #for use after the tool installation, this sample file is in fact an actual file
 #to save the user the trouble of setting it.
 #
-rfam-5.8s	Database Rfam 5.8s	$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta
-rfam-5s	Database Rfam 5s	$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta
-silva-arc-16s	Database Silva-Arc 16s	$SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta
-silva-arc-23s	Database Silva-Arc 23s	$SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta
-silva-bac-16s	Database Silva-Bac 16s	$SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta
-silva-bac-23s	Database Silva-Bac 23s	$SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta
-silva-euk-18s	Databse Silva-Euk 18s	$SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta
-silva-euk-28s	Database Silva-Euk 28s	$SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
+rfam-5.8s	Rfam 5.8S eukarya	$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta
+rfam-5s	Rfam 5S archaea/bacteria	$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta
+silva-arc-16s	SILVA 16S archaea	$SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta
+silva-arc-23s	SILVA 16S bacteria	$SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta
+silva-bac-16s	SILVA 16S bacteria	$SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta
+silva-bac-23s	SILVA 23S bacteria	$SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta
+silva-euk-18s	SILVA 18S eukarya	$SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta
+silva-euk-28s	SILVA 28S eukarya	$SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
--- a/tool_dependencies.xml	Fri Apr 05 13:07:39 2013 -0400
+++ b/tool_dependencies.xml	Wed Apr 10 10:25:15 2013 -0400
@@ -15,6 +15,10 @@
                     <source>buildtrie</source>
                     <destination>$INSTALL_DIR/bin</destination>
                 </action>
+                <action type="move_file">
+                    <source>scripts/merge-paired-reads.sh</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
                 <action type="move_directory_files">
                     <source_directory>rRNA_databases</source_directory>
                     <destination_directory>$INSTALL_DIR/rRNA_databases</destination_directory>
@@ -32,7 +36,7 @@
             </actions>
         </install>
         <readme>
-SortMeRNA requires g++ 4.3 and later.
+SortMeRNA requires g++ 4.3 or later. Installation may take a moment since ribosomal databases have to be indexed.
 	</readme>
     </package>
 </tool_dependency>