Mercurial > repos > iuc > pear
changeset 1:361d2cb38860 draft
Uploaded
| author | iuc | 
|---|---|
| date | Wed, 25 Feb 2015 22:42:34 -0500 | 
| parents | b0e3a2df7897 | 
| children | 048f294031c7 | 
| files | .shed.yml pear.xml | 
| diffstat | 2 files changed, 45 insertions(+), 92 deletions(-) [+] | 
line wrap: on
 line diff
--- a/.shed.yml Wed Feb 11 07:52:43 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -# repository published to https://toolshed.g2.bx.psu.edu/repos/iuc/pear -owner: iuc -name: pear
--- a/pear.xml Wed Feb 11 07:52:43 2015 -0500 +++ b/pear.xml Wed Feb 25 22:42:34 2015 -0500 @@ -13,8 +13,15 @@ <command> <![CDATA[ pear - -f $forward - -r $reverse + #if str( $library.type ) == "paired": + -f "$library.forward" + -r "$library.reverse" + #else + ## prepare collection + -f $library.input_collection.forward + -r $library.input_collection.reverse + #end if + --output pear --p-value $pvalue --min-overlap $min_overlap @@ -34,37 +41,50 @@ ]]> </command> <inputs> - <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq" - label="Specify the name of file that contains the forward paired-end reads" help="-f" /> - <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq" - label="Specify the name of file that contains the reverse paired-end reads" help="-r" /> + <conditional name="library"> + <param name="type" type="select" label="Dataset type"> + <option value="paired">Paired-end</option> + <option value="paired_collection">Paired-end Dataset Collection</option> + </param> + <when value="paired"> + <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq" + label="Name of file that contains the forward paired-end reads" help="-f" /> + <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq" + label="Name of file that contains the reverse paired-end reads" help="-r" /> + </when> + <when value="paired_collection"> + <param name="input_collection" format="fastqillumina, fastqsanger, fastq" + type="data_collection" collection_type="paired" + label="FASTQ Paired Dataset" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33. (-f and -r)" /> + </when> + </conditional> <!-- optional --> <param name="pvalue" type="float" value="0.01" min="0" optional="True" max="1" label="Specify a p-value for the statistical test" help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)" /> - <param name="min_overlap" type="integer" value="10" optional="True" label="Specify the minimum overlap size" + <param name="min_overlap" type="integer" value="10" optional="True" label="Minimum overlap size" help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)" /> - <param name="max_assembly_length" type="integer" value="0" optional="True" label="Specify the maximum possible length of the assembled sequences" + <param name="max_assembly_length" type="integer" value="0" optional="True" label="Maximum possible length of the assembled sequences" help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)" /> - <param name="min_assembly_length" type="integer" value="50" optional="True" label="Specify the minimum possible length of the assembled sequences" + <param name="min_assembly_length" type="integer" value="50" optional="True" label="Minimum possible length of the assembled sequences" help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short. (--min-assembly-length)" /> - <param name="min_trim_length" type="integer" value="1" optional="True" label="Specify the minimum length of reads after trimming the low quality part" + <param name="min_trim_length" type="integer" value="1" optional="True" label="Minimum length of reads after trimming the low quality part" help="See option -q. (--min-trim-length)" /> - <param name="quality_threshold" type="integer" value="0" optional="True" label="Specify the quality score threshold for trimming the low quality part of a read" + <param name="quality_threshold" type="integer" value="0" optional="True" label="Quality score threshold for trimming the low quality part of a read" help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed. (--quality-threshold)" /> - <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Specify the maximal proportion of uncalled bases in a read" + <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Maximal proportion of uncalled bases in a read" help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases. (--max-uncalled-base)" /> <param name="cap" type="integer" value="40" optional="True" label="Specify the upper bound for the resulting quality score" help="If set to zero, capping is disabled. (--cap)" /> - <param name="test_method" type="select" label="Specify the type of statistical test" help="(--test-method)"> + <param name="test_method" type="select" label="Type of statistical test" help="(--test-method)"> <option value="1" selected="True">Given the minimum allowed overlap, test using the highest OES (1)</option> <option value="2">Use the acceptance probability (2)</option> </param> @@ -74,19 +94,17 @@ <param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false" label="Use N base if uncertain" help="When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)" /> - <param name="score_method" type="select" label="Specify the scoring method" help="(--score-method)"> + <param name="score_method" type="select" label="Scoring method" help="(--score-method)"> <option value="1">OES with +1 for match and -1 for mismatch</option> <option value="2" selected="True">Assembly score (AS) use +1 for match and -1 for mismatch multiplied by base quality scores</option> <option value="3">Ignore quality scores and use +1 for a match and -1 for a mismatch</option> </param> - <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output files"> + <param name="outputs" type="select" display="checkboxes" optional="False" multiple="True" label="Output files"> <option value="assembled" selected="True">Assembled reads</option> <option value="forward">Forward unassembled reads</option> <option value="reverse">Reverse unassembled reads</option> <option value="discarded">Discarded reads</option> - - <validator type="no_options" message="Please select at least one output file." /> </param> </inputs> <outputs> @@ -118,82 +136,20 @@ <help> <![CDATA[ + **What it does** -PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger. -It is fully parallelized and can run with as low as just a few kilobytes of memory. - - -**Parameters** - - -Standard (mandatory): - -f, --forward-fastq <str> Forward paired-end FASTQ file. - -r, --reverse-fastq <str> Reverse paired-end FASTQ file. +PEAR_ is an ultrafast, memory-efficient and highly accurate pair-end read merger. +PEAR evaluates all possible paired-end read overlaps and without requiring the target fragment +size as input. In addition, it implements a statistical test for minimizing false-positive results. +Together with a highly optimized implementation, it can merge millions of paired end reads within a couple of minutes +on a standard desktop computer. -Optional: - -p, --p-value <float> Specify a p-value for the statistical test. If the computed - p-value of a possible assembly exceeds the specified p-value - then paired-end read will not be assembled. Valid options - are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables - the test. (default: 0.01) - -v, --min-overlap <int> Specify the minimum overlap size. The minimum overlap may be - set to 1 when the statistical test is used. However, further - restricting the minimum overlap size to a proper value may - reduce false-positive assembles. (default: 10) - -m, --max-assembly-length <int> Specify the maximum possible length of the assembled - sequences. Setting this value to 0 disables the restriction - and assembled sequences may be arbitrary long. (default: 0) - -n, --min-assembly-length <int> Specify the minimum possible length of the assembled - sequences. Setting this value to 0 disables the restriction - and assembled sequences may be arbitrary short. (default: - 50) - -t, --min-trim-length <int> Specify the minimum length of reads after trimming the low - quality part (see option -q). (default: 1) - -q, --quality-threshold <int> Specify the quality score threshold for trimming the low - quality part of a read. If the quality scores of two - consecutive bases are strictly less than the specified - threshold, the rest of the read will be trimmed. (default: - 0) - -u, --max-uncalled-base <float> Specify the maximal proportion of uncalled bases in a read. - Setting this value to 0 will cause PEAR to discard all reads - containing uncalled bases. The other extreme setting is 1 - which causes PEAR to process all reads independent on the - number of uncalled bases. (default: 1) - -g, --test-method <int> Specify the type of statistical test. Two options are - available. (default: 1) - 1: Given the minimum allowed overlap, test using the highest - OES. Note that due to its discrete nature, this test usually - yields a lower p-value for the assembled read than the cut- - off (specified by -p). For example, setting the cut-off to - 0.05 using this test, the assembled reads might have an - actual p-value of 0.02. +For more information please look at the documentation_ and `github repository`_. - 2. Use the acceptance probability (m.a.p). This test methods - computes the same probability as test method 1. However, it - assumes that the minimal overlap is the observed overlap - with the highest OES, instead of the one specified by -v. - Therefore, this is not a valid statistical test and the - 'p-value' is in fact the maximal probability for accepting - the assembly. Nevertheless, we observed in practice that for - the case the actual overlap sizes are relatively small, test - 2 can correctly assemble more reads with only slightly - higher false-positive rate. - -e, --empirical-freqs Disable empirical base frequencies. (default: use empirical - base frequencies) - -s, --score-method <int> Specify the scoring method. (default: 2) - 1. OES with +1 for match and -1 for mismatch. - 2: Assembly score (AS). Use +1 for match and -1 for mismatch - multiplied by base quality scores. - 3: Ignore quality scores and use +1 for a match and -1 for a - mismatch. - -c, --cap <int> Specify the upper bound for the resulting quality score. If - set to zero, capping is disabled. (default: 40) - -z, --nbase When merging a base-pair that consists of two non-equal - bases out of which none is degenerate, set the merged base - to N and use the highest quality score of the two bases - - +.. _PEAR: http://sco.h-its.org/exelixis/web/software/pear/ +.. _documentation: http://sco.h-its.org/exelixis/web/software/pear/doc.html +.. _github repository: https://github.com/xflouris/PEAR ]]>
