Mercurial > repos > veg > qfilt

diff qfilt.xml @ 1:2ab93e1952c5 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qfilt commit c9f251e99e052ab4d87ed5fd89e466cdf387742b-dirty
author: veg
date: Wed, 18 Apr 2018 16:53:05 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qfilt.xml	Wed Apr 18 16:53:05 2018 -0400
@@ -0,0 +1,151 @@
+<?xml version="1.0"?>
+<tool id="qfilt" version="1.0.0" name="qfilt">
+    <description>filter sequencing data using simple heuristics</description>
+    <requirements>
+        <requirement type="package" version="0.0.1">qfilt</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:"/>
+    </stdio>
+    <command><![CDATA[
+        qfilt -o '$filtered_output'
+        #if str($options.advanced) == 'advanced':
+            -q $options.qscore -l $options.length
+            -s $options.split
+            #if $options.replace and $options.remove:
+                 -P $options.replace -R $options.remove
+            #elif $options.mode:
+                -m $options.mode
+            #end if
+            #if $options.prefix:
+                -t $options.mismatch
+                -T $options.prefix
+            #end if
+            -f $options.format
+            $options.tolerate_homopolymeric $options.tolerate_ambiguous
+        #end if
+        -Q '$input_fastq'
+    ]]></command>
+    <inputs>
+
+        <param name="input_fastq" argument="-Q" type="data" format="fastq" label="Input FASTA" help="FASTQ File" />
+
+        <conditional name="options">
+
+            <param name="advanced" type="select" label="Additional options">
+                <option value="defaults">Use defaults</option>
+                <option value="advanced">Specify additional parameters</option>
+            </param>
+
+            <when value="defaults"/>
+
+            <when value="advanced">
+                <param name="qscore" argument="-q" type="integer" value="20" label="QScore" help="minimum per-base quality score below which a read will be split or truncated" />
+                <param name="length" argument="-l" type="integer" value="50" label="Length" help="minimum retained fragment LENGTH" />
+                <param name="mode" argument="-m" type="integer" value="0" label="Mode" help="MODE is a 3-bitmask (an integer in [0-7], default=0)" />
+                <param name="split" argument="-s" type="text" label="Split" help="when encountering a low q-score, split instead of truncate" />
+                <param name="tolerate_homopolymeric" argument="-p" type="boolean" truevalue="-p" falsevalue="" label="Tolerate Homopolymeric" help="tolerate low q-score homopolymeric regions" />
+                <param name="tolerate_ambiguous" argument="-a" type="boolean" truevalue="-a" falsevalue="" label="Tolerate Ambiguous" help="tolerate low q-score ambiguous nucleotides" />
+                <param name="replace" argument="-P" type="text" label="Replace" help="rather than splitting or truncating, replace low quality bases with CHAR this option OVERRIDES all -m mode options" />
+                <param name="remove" argument="-R" type="text" label="Remove" help="rather than splitting or truncating, remove reads which contain more than COUNT low quality bases this option only works in COMBINATION with the -P (punch) option" />
+                <param name="prefix" argument="-T" type="text" label="Prefix" help="if supplied, only reads with this PREFIX are retained, and the PREFIX is stripped from each contributing read" />
+                <param name="mismatch" argument="-t" type="integer" value="0" label="Mismatch" help="if PREFIX is supplied, prefix matching tolerates at most MISMATCH mismatches" />
+                <param name="format" argument="-f" type="select" label="Format" help="output in FASTA or FASTQ format (default=FASTA)" >
+                    <option value="FASTA">FASTA</option>
+                    <option value="FASTQ">FASTQ</option>
+                </param>
+            </when>
+
+        </conditional>
+
+    </inputs>
+    <outputs>
+        <data format="fasta" name="filtered_output"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_fastq" value="qfilt-in1.fastq" />
+            <output file="qfilt-out1.fa" ftype="fasta" name="filtered_output" />
+        </test>
+        <test>
+            <param name="input_fastq" value="qfilt-in1.fastq" />
+            <param name="advanced" value="advanced" />
+            <param name="tolerate_homopolymeric" value="False" />
+            <output file="qfilt-out2.fa" ftype="fasta" name="filtered_output" />
+        </test>
+    </tests>
+    <help><![CDATA[
+qfilt
+=====
+
+This simple program is meant to filter sequencing data,
+optionally removing or splitting reads with poor quality scores
+and to optionally _only_ retain fragments from reads that are tagged with a given 5' sequence.
+
+### OUTPUT: ####
+
+#### stderr: ####
+
+    run settings:
+        input fasta:         data/test.fna
+        input qual:          data/test.qual
+        min q-score:         15
+        min fragment length: 30
+        run mode:            0 (truncate/don't tolerate homopolymers/don't tolerate ambigs)
+        5' tag:              ATATCGCGAGGA
+        max tag mismatches:  0
+
+    run summary:
+        total bases       :  29570
+        original reads    :  305
+        q10               :  0.995502
+        q20               :  0.860298
+        q30               :  0.728745
+        mean q-score      :  33.2273
+        contributing reads:  5
+        retained fragments:  5
+
+    original read length distribution:
+        mean:                96.9508
+        median:              77
+        variance             3743.03
+        standard deviation:  61.1803
+        min:                 49
+        2.5%:                54
+        97.5%:               332
+        max:                 497
+
+    retained fragment length distribution:
+        mean:                41
+        median:              37
+        variance             72.5
+        standard deviation:  8.51469
+        min:                 33
+        2.5%:                33
+        97.5%:               54
+        max:                 54
+
+#### stdout: ####
+
+    >GM98SRO01B77KU rank=0000671 x=796.0 y=1996.0 length=58
+    CCACGCGTATCGATGTCGACTTTTTTTTCTTTTCTTACATAGTAG
+    >GM98SRO01BA3RP rank=0000953 x=419.5 y=1603.5 length=87
+    CTGATGCTGCACCAACTGTACTCCCTCGCGATA
+    >GM98SRO01E1BKW rank=0001233 x=1948.0 y=846.0 length=66
+    TACAGTTGGTGCAGCATCAGAAAAGTACGACATCGATACGCGTGGTCCTCGCGA
+    >GM98SRO01DVVNY rank=0001304 x=1476.0 y=636.5 length=84
+    ACGGCTGATGCTGCACCAACTGTACTCCCTCGCGATA
+    >GM98SRO01D6FIX rank=0001416 x=1596.0 y=1415.0 length=91
+    CGGCTGATGCTGCACCAACTGTACTCCCTCGCGATA
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @UNPUBLISHED{spond,
+                author = "Sergei Kosakovsky Pond",
+                title = "HyPhy: Hypothesis Testing using Phylogenies",
+                year = "2000",
+                note = "http://hyphy.org/",
+                url = "http://hyphy.org/"}
+        </citation>
+    </citations>
+</tool>