Galaxy |

Changeset 45:0b238b083f77 (2014-07-12)

Previous changeset 44:46d13bbb21f2 (2014-06-30) Next changeset 46:471f3e085664 (2014-07-12)

Commit message:
2 more tools

modified:
count-median.xml
filter-abund.xml
macros.xml
normalize-by-median.xml

added:
README.txt
do-partition.xml
filter-below-abund.py
filter-below-abund.xml
test-data/test-abund-read-2.fa.ct

diff -r 46d13bbb21f2 -r 0b238b083f77 README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt Sat Jul 12 11:13:21 2014 -0400

@@ -0,0 +1,5 @@
+Requires galaxy-central dating from 2014-06-30 or newer
+
+https://bitbucket.org/galaxy/galaxy-central/commits/4de240e5a7819c768b8267c19e477530dab54370
+
+

diff -r 46d13bbb21f2 -r 0b238b083f77 count-median.xml
--- a/count-median.xml Mon Jun 30 16:51:11 2014 -0400
+++ b/count-median.xml Sat Jul 12 11:13:21 2014 -0400

@@ -37,7 +37,7 @@
</stdio>

<tests>
- <test>
+ <test interactor="api">
                         <param name="input_sequence_filename" value="test-abund-read-2.fa" />
                         <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
                         <output name="output_summary_filename">

diff -r 46d13bbb21f2 -r 0b238b083f77 do-partition.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/do-partition.xml Sat Jul 12 11:13:21 2014 -0400

[

@@ -0,0 +1,93 @@
+<tool id="gedlab-khmer-do-partition"
+ name="Sequence partition all-in-one"
+ version="1.1-1"
+ force_history_refresh="true">
+
+ <description>
+ Load, partition, and annotate FAST[AQ] sequences
+ </description>
+        <macros>
+                <token name="@BINARY@">do-parition.py</token>
+                <import>macros.xml</import>
+        </macros>
+        <expand macro="requirements" />
+ <command>
+mkdir -p output;
+@BINARY@
+@TABLEPARAMS@
+@THREADS@
+output
+#for input in $inputs
+$input
+#end for ;
+mv output.info $infomation ;
+mv *.part output/
+ </command>
+
+ <inputs>
+ <expand macro="input_sequences_filenames" />
+ </inputs>
+ <outputs>
+ <data name="information"
+ format="text"
+ label="${tool.name} summary for #echo ','.join(map(str, $inputs ))#" />
+ <expand macro="output_sequences" />
+ </outputs>
+ <stdio>
+        
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+
+
+    
+    
+</tool>

diff -r 46d13bbb21f2 -r 0b238b083f77 filter-abund.xml
--- a/filter-abund.xml Mon Jun 30 16:51:11 2014 -0400
+++ b/filter-abund.xml Sat Jul 12 11:13:21 2014 -0400

[

@@ -14,7 +14,7 @@
<expand macro="requirements" />
<command>
mkdir output; cd output;
-filter-abund.py
+@BINARY@
#if $cutoff != 2
   --cutoff=$cutoff
#fi
@@ -43,7 +43,8 @@
<expand macro="input_counting_table_filename" />
</inputs>
<outputs>
- <expand macro="output_sequences" />
+ 
+ <expand macro="output_sequences_single" />
</outputs>
   <stdio>

diff -r 46d13bbb21f2 -r 0b238b083f77 filter-below-abund.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-below-abund.py Sat Jul 12 11:13:21 2014 -0400

[

@@ -0,0 +1,59 @@
+#! /usr/bin/env python2
+#
+# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2013. It is licensed under
+# the three-clause BSD license; see doc/LICENSE.txt.
+# Contact: khmer-project@idyll.org
+#
+import sys
+import screed.fasta
+import os
+import khmer
+from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter
+
+WORKER_THREADS = 8
+GROUPSIZE = 100
+
+CUTOFF = 50
+
+###
+
+
+def main():
+    counting_ht = sys.argv[1]
+    infiles = sys.argv[2:]
+
+    print 'file with ht: %s' % counting_ht
+    print '-- settings:'
+    print 'N THREADS', WORKER_THREADS
+    print '--'
+
+    print 'making hashtable'
+    ht = khmer.load_counting_hash(counting_ht)
+    K = ht.ksize()
+
+    for infile in infiles:
+        print 'filtering', infile
+        outfile = os.path.basename(infile) + '.below'
+
+        outfp = open(outfile, 'w')
+
+        def process_fn(record, ht=ht):
+            name = record['name']
+            seq = record['sequence']
+            if 'N' in seq:
+                return None, None
+
+            trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF)
+
+            if trim_at >= K:
+                return name, trim_seq
+
+            return None, None
+
+        tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE)
+
+        tsp.start(verbose_fasta_iter(infile), outfp)
+
+if __name__ == '__main__':
+    main()

diff -r 46d13bbb21f2 -r 0b238b083f77 filter-below-abund.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-below-abund.xml Sat Jul 12 11:13:21 2014 -0400

[

@@ -0,0 +1,64 @@
+<tool id="gedlab-khmer-filter-below-abund"
+ name="Filter below abundance cutoff of 50"
+ version="1.1-1"
+ force_history_refresh="true">
+
+ <description>
+ Trims fastq/fasta sequences at k-mers with abundance below 50
+ based on a provided k-mer counting table.
+ </description>
+ <macros>
+ <token name="@BINARY@">filter-below-abund.py</token>
+ <import>macros.xml</import>
+ </macros>
+ <expand macro="requirements" />
+ <command>
+mkdir output; cd output;
+@BINARY@
+$countingtable_to_load
+#for input in inputs
+ $input
+#end for
+ </command>
+
+ <inputs>
+ <expand macro="input_sequences_filenames" />
+ <expand macro="input_counting_table_filename" />
+ </inputs>
+ <outputs>
+ 
+ <expand macro="output_sequences_single" />
+ </outputs>
+ <stdio>
+        
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+ 
+ 
+    
+</tool>

diff -r 46d13bbb21f2 -r 0b238b083f77 macros.xml
--- a/macros.xml Mon Jun 30 16:51:11 2014 -0400
+++ b/macros.xml Sat Jul 12 11:13:21 2014 -0400

@@ -106,6 +106,11 @@
                         <discover_datasets pattern="__name__" directory="output" visible="true"/>
                 </data>
</xml>
+ <xml name="output_sequences_single">
+                <data   name="output sequences"
+                        format="input"
+                        label="${tool.name} processed nucleotide sequence file" />
+ </xml>
<xml name="input_zero">
                 <param  name="zero"
                         type="boolean"

diff -r 46d13bbb21f2 -r 0b238b083f77 normalize-by-median.xml
--- a/normalize-by-median.xml Mon Jun 30 16:51:11 2014 -0400
+++ b/normalize-by-median.xml Sat Jul 12 11:13:21 2014 -0400

[

@@ -26,13 +26,18 @@
--loadtable=$countingtable_to_load
#end if
--report-total-kmers
-#for input in $inputs
+#for entry in $many_inputs
+#for input in $entry.inputs
$input
#end for
+#end for
+--out=$output
</command>

<inputs>
- <expand macro="input_sequences_filenames" />
+ <repeat name="many_inputs" title="input(s) set" min="1" default="1">
+ <expand macro="input_sequences_filenames" />
+ </repeat>
<param name="paired_switch"
type="boolean"
checked="false"
@@ -43,6 +48,7 @@

<param name="countingtable_to_load"
type="data"
+ format="ct"
optional="true"
label="an optional k-mer counting table to load"
help="The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." />
@@ -60,11 +66,12 @@
</inputs>
<outputs>
<data name="countingtable"
- format="data"
+ format="ct"
label="${tool.name} k-mer counting table from  #echo ', '.join(map(str, $inputs ))#">
<filter>save_countingtable == True</filter>
</data>
- <expand macro="output_sequences" />
+ 
+ <expand macro="output_sequences_single" />
</outputs>
   <stdio>