changeset 1:361d2cb38860 draft

Uploaded
author iuc
date Wed, 25 Feb 2015 22:42:34 -0500
parents b0e3a2df7897
children 048f294031c7
files .shed.yml pear.xml
diffstat 2 files changed, 45 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/.shed.yml	Wed Feb 11 07:52:43 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-# repository published to https://toolshed.g2.bx.psu.edu/repos/iuc/pear
-owner: iuc
-name: pear
--- a/pear.xml	Wed Feb 11 07:52:43 2015 -0500
+++ b/pear.xml	Wed Feb 25 22:42:34 2015 -0500
@@ -13,8 +13,15 @@
     <command>
 <![CDATA[
     pear
-        -f $forward
-        -r $reverse
+        #if str( $library.type ) == "paired":
+            -f "$library.forward"
+            -r "$library.reverse"
+        #else
+            ## prepare collection
+            -f $library.input_collection.forward
+            -r $library.input_collection.reverse
+        #end if
+
         --output pear
         --p-value $pvalue
         --min-overlap $min_overlap
@@ -34,37 +41,50 @@
 ]]>
     </command>
     <inputs>
-        <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq"
-            label="Specify the name of file that contains the forward paired-end reads" help="-f" />
-        <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq"
-            label="Specify the name of file that contains the reverse paired-end reads" help="-r" />
+        <conditional name="library">
+            <param name="type" type="select" label="Dataset type">
+              <option value="paired">Paired-end</option>
+              <option value="paired_collection">Paired-end Dataset Collection</option>
+            </param>
+            <when value="paired">
+                <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq"
+                    label="Name of file that contains the forward paired-end reads" help="-f" />
+                <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq"
+                    label="Name of file that contains the reverse paired-end reads" help="-r" />
+            </when>
+            <when value="paired_collection">
+                <param name="input_collection" format="fastqillumina, fastqsanger, fastq"
+                    type="data_collection" collection_type="paired"
+                    label="FASTQ Paired Dataset" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33. (-f and -r)" />
+            </when>
+        </conditional>
 
         <!-- optional -->
         <param name="pvalue" type="float" value="0.01" min="0" optional="True" max="1" label="Specify a p-value for the statistical test"
             help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)" />
 
-        <param name="min_overlap" type="integer" value="10" optional="True" label="Specify the minimum overlap size"
+        <param name="min_overlap" type="integer" value="10" optional="True" label="Minimum overlap size"
             help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)" />
 
-        <param name="max_assembly_length" type="integer" value="0" optional="True" label="Specify the maximum possible length of the assembled sequences"
+        <param name="max_assembly_length" type="integer" value="0" optional="True" label="Maximum possible length of the assembled sequences"
             help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)" />
 
-        <param name="min_assembly_length" type="integer" value="50" optional="True" label="Specify the minimum possible length of the assembled sequences"
+        <param name="min_assembly_length" type="integer" value="50" optional="True" label="Minimum possible length of the assembled sequences"
             help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short. (--min-assembly-length)" />
 
-        <param name="min_trim_length" type="integer" value="1" optional="True" label="Specify the minimum length of reads after trimming the low quality part"
+        <param name="min_trim_length" type="integer" value="1" optional="True" label="Minimum length of reads after trimming the low quality part"
             help="See option -q. (--min-trim-length)" />
 
-        <param name="quality_threshold" type="integer" value="0" optional="True" label="Specify the quality score threshold for trimming the low quality part of a read"
+        <param name="quality_threshold" type="integer" value="0" optional="True" label="Quality score threshold for trimming the low quality part of a read"
                 help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed. (--quality-threshold)" />
 
-        <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Specify the maximal proportion of uncalled bases in a read"
+        <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Maximal proportion of uncalled bases in a read"
             help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases. (--max-uncalled-base)" />
 
         <param name="cap" type="integer" value="40" optional="True" label="Specify  the upper bound for the resulting quality score"
             help="If set to zero, capping is disabled. (--cap)" />
 
-        <param name="test_method" type="select" label="Specify the type of statistical test" help="(--test-method)">
+        <param name="test_method" type="select" label="Type of statistical test" help="(--test-method)">
             <option value="1" selected="True">Given the minimum allowed overlap, test using the highest OES (1)</option>
             <option value="2">Use the acceptance probability (2)</option>
         </param>
@@ -74,19 +94,17 @@
         <param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false"
             label="Use N base if uncertain" help="When  merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)" />
 
-        <param name="score_method" type="select" label="Specify the scoring method" help="(--score-method)">
+        <param name="score_method" type="select" label="Scoring method" help="(--score-method)">
             <option value="1">OES with +1 for match and -1 for mismatch</option>
             <option value="2" selected="True">Assembly score (AS) use +1 for match and -1 for mismatch multiplied by base quality scores</option>
             <option value="3">Ignore quality scores and use +1 for a match and -1 for a mismatch</option>
         </param>
 
-        <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output files">
+        <param name="outputs" type="select" display="checkboxes" optional="False" multiple="True" label="Output files">
             <option value="assembled" selected="True">Assembled reads</option>
             <option value="forward">Forward unassembled reads</option>
             <option value="reverse">Reverse unassembled reads</option>
             <option value="discarded">Discarded reads</option>
-
-            <validator type="no_options" message="Please select at least one output file." />
         </param>
     </inputs>
     <outputs>
@@ -118,82 +136,20 @@
     <help>
 <![CDATA[
 
+
 **What it does**
 
-PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger.
-It is fully parallelized and can run with as low as just a few kilobytes of memory.
-
-
-**Parameters**
-
-
-Standard (mandatory):
-  -f, --forward-fastq         <str>     Forward paired-end FASTQ file.
-  -r, --reverse-fastq         <str>     Reverse paired-end FASTQ file.
+PEAR_ is an ultrafast, memory-efficient and highly accurate pair-end read merger.
+PEAR evaluates all possible paired-end read overlaps and without requiring the target fragment
+size as input. In addition, it implements a statistical test for minimizing false-positive results.
+Together with a highly optimized implementation, it can merge millions of paired end reads within a couple of minutes
+on a standard desktop computer.
 
-Optional:
-  -p, --p-value               <float>   Specify  a p-value for the statistical test. If the computed
-                                        p-value of a possible assembly exceeds the specified p-value
-                                        then  paired-end  read  will not be assembled. Valid options
-                                        are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables
-                                        the test. (default: 0.01)
-  -v, --min-overlap           <int>     Specify the minimum overlap size. The minimum overlap may be
-                                        set to 1 when the statistical test is used. However, further
-                                        restricting  the  minimum overlap size to a proper value may
-                                        reduce false-positive assembles. (default: 10)
-  -m, --max-assembly-length   <int>     Specify   the  maximum  possible  length  of  the  assembled
-                                        sequences.  Setting this value to 0 disables the restriction
-                                        and assembled sequences may be arbitrary long. (default: 0)
-  -n, --min-assembly-length   <int>     Specify   the  minimum  possible  length  of  the  assembled
-                                        sequences.  Setting this value to 0 disables the restriction
-                                        and  assembled  sequences  may be arbitrary short. (default:
-                                        50)
-  -t, --min-trim-length       <int>     Specify  the  minimum length of reads after trimming the low
-                                        quality part (see option -q). (default: 1)
-  -q, --quality-threshold     <int>     Specify  the  quality  score  threshold for trimming the low
-                                        quality  part  of  a  read.  If  the  quality  scores of two
-                                        consecutive  bases  are  strictly  less  than  the specified
-                                        threshold,  the  rest of the read will be trimmed. (default:
-                                        0)
-  -u, --max-uncalled-base     <float>   Specify  the maximal proportion of uncalled bases in a read.
-                                        Setting this value to 0 will cause PEAR to discard all reads
-                                        containing  uncalled  bases.  The other extreme setting is 1
-                                        which  causes  PEAR  to process all reads independent on the
-                                        number of uncalled bases. (default: 1)
-  -g, --test-method           <int>     Specify  the  type  of  statistical  test.  Two  options are
-                                        available. (default: 1)
-                                        1: Given the minimum allowed overlap, test using the highest
-                                        OES. Note that due to its discrete nature, this test usually
-                                        yields  a lower p-value for the assembled read than the cut-
-                                        off  (specified  by -p). For example, setting the cut-off to
-                                        0.05  using  this  test,  the  assembled reads might have an
-                                        actual p-value of 0.02.
+For more information please look at the documentation_ and `github repository`_.
 
-                                        2. Use the acceptance probability (m.a.p). This test methods
-                                        computes  the same probability as test method 1. However, it
-                                        assumes  that  the  minimal  overlap is the observed overlap
-                                        with  the  highest  OES, instead of the one specified by -v.
-                                        Therefore,  this  is  not  a  valid statistical test and the
-                                        'p-value'  is  in fact the maximal probability for accepting
-                                        the assembly. Nevertheless, we observed in practice that for
-                                        the case the actual overlap sizes are relatively small, test
-                                        2  can  correctly  assemble  more  reads  with only slightly
-                                        higher false-positive rate.
-  -e, --empirical-freqs                 Disable  empirical base frequencies. (default: use empirical
-                                        base frequencies)
-  -s, --score-method          <int>     Specify the scoring method. (default: 2)
-                                        1. OES with +1 for match and -1 for mismatch.
-                                        2: Assembly score (AS). Use +1 for match and -1 for mismatch
-                                        multiplied by base quality scores.
-                                        3: Ignore quality scores and use +1 for a match and -1 for a
-                                        mismatch.
-  -c, --cap                   <int>     Specify  the upper bound for the resulting quality score. If
-                                        set to zero, capping is disabled. (default: 40)
-  -z, --nbase                           When  merging  a  base-pair  that  consists of two non-equal
-                                        bases  out  of which none is degenerate, set the merged base
-                                        to N and use the highest quality score of the two bases
-
-
+.. _PEAR: http://sco.h-its.org/exelixis/web/software/pear/
+.. _documentation: http://sco.h-its.org/exelixis/web/software/pear/doc.html
+.. _github repository: https://github.com/xflouris/PEAR
 
 
 ]]>