Mercurial > repos > jeanfred > sortmerna
changeset 3:9809be9b7567 draft default tip
Uploaded
author | jeanfred |
---|---|
date | Tue, 30 Apr 2013 11:17:15 -0400 |
parents | 5048c53a90f4 |
children | |
files | merge_paired_reads.xml sortmerna_wrapper.xml |
diffstat | 2 files changed, 65 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_paired_reads.xml Wed Apr 10 10:25:15 2013 -0400 +++ b/merge_paired_reads.xml Tue Apr 30 11:17:15 2013 -0400 @@ -3,7 +3,7 @@ <requirement type='package' version="1.7">sortmerna</requirement> </requirements> <description>Merges two fastq paired-reads files into one file.</description> - <command interpreter="bash"> + <command> merge-paired-reads.sh $input_reads1 $input_reads2 output_file.txt </command> <inputs> @@ -32,6 +32,7 @@ Merges two fastq paired-reads files into one file. To run merge-paired-reads:: - bash merge-paired-reads.sh file1.fastq file2.fastq outputfile.fastq + + bash merge-paired-reads.sh file1.fastq file2.fastq outputfile.fastq </help> </tool> \ No newline at end of file
--- a/sortmerna_wrapper.xml Wed Apr 10 10:25:15 2013 -0400 +++ b/sortmerna_wrapper.xml Tue Apr 30 11:17:15 2013 -0400 @@ -1,3 +1,4 @@ +<?xml version="1.0" encoding="utf-8"?> <tool id="sortmerna_wrapper" version="1.0" name="Filter with SortMeRNA"> <requirements> <requirement type='package' version="1.7">sortmerna</requirement> @@ -5,11 +6,16 @@ <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> <command interpreter="python"> sortmerna_wrapper.py - --sortmerna " -$read_family $input_reads $strand_search -$sequencing_type +#if str( $read_family.read_family_selector ) == 'other': + --I $input_reads -r $read_family.ratio_parameter +#else: + $read_family.read_family_selector $input_reads +#end if +#if str( $sequencing_type.sequencing_type_selector ) == 'paired': + $sequencing_type.paired_type +#end if #if $outputs_selected: #if 'accept' in $outputs_selected.value: @@ -36,22 +42,42 @@ #end if </command> <inputs> - <param name="read_family" type="select" format="text" - help="Currently only Solexa Illumina and Roche 454 technologies are supported. - The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput."> - <label>Sequencing technology of querying sequences (reads)</label> - <option value="--I">Illumina Solexa</option> - <option value="--454">Roche 454</option> - </param> + <conditional name="read_family"> + <param name="read_family_selector" type="select" format="text" + help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput."> + <label>Sequencing technology of querying sequences (reads)</label> + <option value="--I">Illumina Solexa</option> + <option value="--454">454 Roche</option> + <option value="other">Other</option> + </param> + <when value="other"> + <param name="ratio_parameter" type="float" value="1" min="0" max="1" + label="Ratio parameter (the number of hits on the read / read length)" + help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads. + For other read types, if the sequencing technology produces high quality reads with a low substitution error rate + (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27]. + If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent), + then the ratio parameter can be set to r=[0.13,0.17]."/> + </when> + </conditional> <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/> - <param name="sequencing_type" type="select" label="Sequencing type" display="radio" - help="The first option filters reads independently of each other - (paired reads can be separated between accept and rejected files)."> - <option value="">Reads are not paired</option> - <option value="--paired-in">Reads are paired: preserve order in output files</option> - <option value="--paired-out">Reads are paired: order does not matter in output files</option> - </param> + <conditional name="sequencing_type"> + <param name="sequencing_type_selector" type="select" label="Sequencing type"> + <option value="not_paired">Reads are not paired</option> + <option value="paired">Reads are paired</option> + </param> + <when value="paired"> + <param name="paired_type" type="select" label="If one read of a pair is accepted and the other not, output both reads" display="radio" + help="SortMeRNA does not use the pairing information for filtering RNA, + however if one read of a pair is accepted and the other is not, + the resulting output may break apart the pair into two separate files. + The purpose of 'Reads are paired' option is to preserve the pairing of the reads."> + <option value="--paired-in">to accepted file</option> + <option value="--paired-out">to rejected file</option> + </param> + </when> + </conditional> <param name="strand_search" type="select" label="Which strands to search" display="radio"> <option value="">Search both strands</option> @@ -83,8 +109,7 @@ </conditional> <!-- Outputs --> - <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options" - help="SortMeRNA can output matching reads by database ; this will be made available in a future version of the wrapper too."> + <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options"> <option value="accept" selected="True">Reads matching to at least one database</option> <option value="other">Reads not found in any database</option> </param> @@ -131,15 +156,15 @@ </stdio> <tests> <test> - <param name="read_family" value="I" /> + <param name="read_family_selector" value="I" /> <param name="input_reads" value="sortmerna_wrapper_in1.fastq" /> - <param name="sequencing_type" value ="" /> + <param name="sequencing_type_selector" value ="not_paired" /> <param name="strand_search" value="" /> <param name="databases_selector" value="cached" /> + <param name="input_databases" value="rfam-5.8s,rfam-5s" /> <param name="outputs_selected" value="accept,other" /> <param name="log" value="" /> <param name="options_type_selector" value="less" /> - <param name="input_databases" value="rfam-5.8s,rfam-5s" /> <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" /> <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> </test> @@ -180,6 +205,8 @@ The output will follow the same format (FASTA or FASTQ) as the reads. +In the standalone version of SortMeRNA, the user may output the matching reads in a separate file per database (--bydbs option). This option will be made available in a future version of Galaxy. + ------ **rRNA databases** @@ -230,6 +257,8 @@ **SortMeRNA parameter list** +The standalone, command-line version of SortMeRNA uses the following parameters. + For indexing (buildtrie): This program builds a Burst trie on an input rRNA database file in fasta format @@ -310,5 +339,16 @@ --version version number +------ + +**Bibliography** + +[1] Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO (2013) The SILVA ribosomal RNA gene database project: improved data processing and web-based tools, Nucleic Acids Research, 41 (D1): D590-D596. + +[2] Rfam 11.0: 10 years of RNA families. S.W. Burge, J. Daub, R. Eberhardt, J. Tate, L. Barquist, E.P. Nawrocki, S.R. Eddy, P.P. Gardner, A. Bateman. Nucleic Acids Research (2012), doi: 10.1093/nar/gks1005 + +[3] Edgar, R.C. (2010) Search and clustering orders of magnitude faster than BLAST, Bioinformatics 26(19), 2460-2461, doi: 10.1093/bioinformatics/btq461 + +[4] Loman, N. J. and Misra, Raju V and Dallman, Timothy J and Constantinidou, Chrystala and Gharbia, Saheer E and Wain, John and Pallen, Mark J., Performance comparison of benchtop high-throughput sequencing platforms (2012), Nature Biotechnology, 30 (5). pp. 434-439 </help> </tool>