Mercurial > repos > matthias > testtool

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gccontent.pl	Tue Jan 08 09:43:35 2019 -0500
@@ -0,0 +1,23 @@
+#!/usr/bin/perl -w
+
+# usage : perl toolExample.pl <FASTA file> <output file>
+
+open (IN, "<$ARGV[0]");
+open (OUT, ">$ARGV[1]");
+while (<IN>) {
+    chop;
+    if (m/^>/) {
+        s/^>//;
+        if ($. > 1) {
+            print OUT sprintf("%.3f", $gc/$length) . "\n";
+        }
+        $gc = 0;
+        $length = 0;
+    } else {
+        ++$gc while m/[gc]/ig;
+        $length += length $_;
+    }
+}
+print OUT sprintf("%.3f", $gc/$length) . "\n";
+close( IN );
+close( OUT );
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gccontent.xml	Tue Jan 08 09:43:35 2019 -0500
@@ -0,0 +1,13 @@
+<tool id="gc_content" name="Compute GC content" version="1">
+  <description>for each sequence in a file</description>
+  <command>perl $__tool_directory__/gccontent.pl $input output.tsv</command>
+  <inputs>
+    <param format="fasta" name="input" type="data" label="Source file"/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output" from_work_dir="output.tsv" />
+  </outputs>
+  <help>
+This tool computes GC content from a FASTA file.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seqtk_seq.xml	Tue Jan 08 09:43:35 2019 -0500
@@ -0,0 +1,95 @@
+<tool id="seqtk_seq" name="Convert to FASTA (seqtk)" version="0.1.0">
+    <requirements>
+        <requirement type="package" version="1.2">seqtk</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+seqtk seq
+-a
+$shift_quality
+-q $quality_min
+-X $quality_max
+#if $mask_regions
+    -M '$maskregions'
+#end if
+#if $sample.sample_selector
+    -f $sample.fraction
+    #if str($sample.seed)!=''
+        -s $sample.seed
+    #end if
+#end if
+
+'$input1'
+>
+'$output1'
+    ]]></command>
+    <inputs>
+        <param type="data" name="input1" format="fastq" />
+        <param name="shift_quality" type="boolean" label="Shift quality"
+               truevalue="-V" falsevalue=""
+               help="shift quality by '(-Q) - 33' (-V)" />
+        <param name="quality_min" type="integer" label="Mask bases with quality lower than"
+               value="0" min="0" max="255" help="(-q)" />
+        <param name="quality_max" type="integer" label="Mask bases with quality higher than"
+               value="255" min="0" max="255" help="(-X)" />
+        <param name="mask_regions" type="data" label="Mask regions in BED"
+		format="bed" help="(-M)" optional="true" />
+        <conditional name="sample">
+            <param name="sample_selector" type="boolean" label="Sample fraction of sequences" />
+            <when value="true">
+                <param name="fraction" label="Fraction" type="float" value="1.0"
+                       help="(-f)" />
+                <param name="seed" label="Random seed" type="integer" value=""
+                       help="(-s)" optional="true" />
+            </when>
+            <when value="false">
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output1" format="fasta" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="2.fastq"/>
+            <output name="output1" file="2.fasta"/>
+        </test>
+        <test>
+            <param name="input1" value="2.fastq"/>
+            <param name="shift_quality" value="-V"/>
+            <param name="quality_min" value="30"/>
+            <param name="quality_max" value="31"/>
+            <output name="output1" file="2.fasta"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Usage:   seqtk seq [options] <in.fq>|<in.fa>
+
+Options: -q INT    mask bases with quality lower than INT [0]
+         -n CHAR   masked bases converted to CHAR; 0 for lowercase [0]
+         -l INT    number of residues per line; 0 for 2^32-1 [0]
+         -Q INT    quality shift: ASCII-INT gives base quality [33]
+         -s INT    random seed (effective with -f) [11]
+         -f FLOAT  sample FLOAT fraction of sequences [1]
+         -M FILE   mask regions in BED or name list FILE [null]
+         -L INT    drop sequences with length shorter than INT [0]
+         -c        mask complement region (effective with -M)
+         -r        reverse complement
+         -A        force FASTA output (discard quality)
+         -C        drop comments at the header lines
+
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubseqtk,
+  author = {LastTODO, FirstTODO},
+  year = {TODO},
+  title = {seqtk},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/lh3/seqtk},
+}</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.fasta	Tue Jan 08 09:43:35 2019 -0500
@@ -0,0 +1,6 @@
+>EAS54_6_R1_2_1_413_324
+CCCTTCTTGTCTTCAGCGTTTCTCC
+>EAS54_6_R1_2_1_540_792
+TTGGCAGGCCAAGGCCGATGGATCA
+>EAS54_6_R1_2_1_443_348
+GTTGCTTCTGGCGTGGGTGGGGGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.fastq	Tue Jan 08 09:43:35 2019 -0500
@@ -0,0 +1,12 @@
+@EAS54_6_R1_2_1_413_324
+CCCTTCTTGTCTTCAGCGTTTCTCC
++
+;;3;;;;;;;;;;;;7;;;;;;;88
+@EAS54_6_R1_2_1_540_792
+TTGGCAGGCCAAGGCCGATGGATCA
++
+;;;;;;;;;;;7;;;;;-;;;3;83
+@EAS54_6_R1_2_1_443_348
+GTTGCTTCTGGCGTGGGTGGGGGGG
++EAS54_6_R1_2_1_443_348
+;;;;;;;;;;;9;7;;.7;393333
\ No newline at end of file