# HG changeset patch # User Michael R. Crusoe # Date 1405178001 14400 # Node ID 0b238b083f7723692874b039b56050f0b4e28b0d # Parent 46d13bbb21f2dc1be0796e2af4b142e89fcbdfda 2 more tools diff -r 46d13bbb21f2 -r 0b238b083f77 README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.txt Sat Jul 12 11:13:21 2014 -0400 @@ -0,0 +1,5 @@ +Requires galaxy-central dating from 2014-06-30 or newer + +https://bitbucket.org/galaxy/galaxy-central/commits/4de240e5a7819c768b8267c19e477530dab54370 + + diff -r 46d13bbb21f2 -r 0b238b083f77 count-median.xml --- a/count-median.xml Mon Jun 30 16:51:11 2014 -0400 +++ b/count-median.xml Sat Jul 12 11:13:21 2014 -0400 @@ -37,7 +37,7 @@ - + diff -r 46d13bbb21f2 -r 0b238b083f77 do-partition.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/do-partition.xml Sat Jul 12 11:13:21 2014 -0400 @@ -0,0 +1,93 @@ + + + + Load, partition, and annotate FAST[AQ] sequences + + + do-parition.py + macros.xml + + + +mkdir -p output; +@BINARY@ +@TABLEPARAMS@ +@THREADS@ +output +#for input in $inputs +$input +#end for ; +mv output.info $infomation ; +mv *.part output/ + + + + + + + + + + + + + + + + + + diff -r 46d13bbb21f2 -r 0b238b083f77 filter-abund.xml --- a/filter-abund.xml Mon Jun 30 16:51:11 2014 -0400 +++ b/filter-abund.xml Sat Jul 12 11:13:21 2014 -0400 @@ -14,7 +14,7 @@ mkdir output; cd output; -filter-abund.py +@BINARY@ #if $cutoff != 2 --cutoff=$cutoff #fi @@ -43,7 +43,8 @@ - + + diff -r 46d13bbb21f2 -r 0b238b083f77 filter-below-abund.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-below-abund.py Sat Jul 12 11:13:21 2014 -0400 @@ -0,0 +1,59 @@ +#! /usr/bin/env python2 +# +# This file is part of khmer, http://github.com/ged-lab/khmer/, and is +# Copyright (C) Michigan State University, 2009-2013. It is licensed under +# the three-clause BSD license; see doc/LICENSE.txt. +# Contact: khmer-project@idyll.org +# +import sys +import screed.fasta +import os +import khmer +from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter + +WORKER_THREADS = 8 +GROUPSIZE = 100 + +CUTOFF = 50 + +### + + +def main(): + counting_ht = sys.argv[1] + infiles = sys.argv[2:] + + print 'file with ht: %s' % counting_ht + print '-- settings:' + print 'N THREADS', WORKER_THREADS + print '--' + + print 'making hashtable' + ht = khmer.load_counting_hash(counting_ht) + K = ht.ksize() + + for infile in infiles: + print 'filtering', infile + outfile = os.path.basename(infile) + '.below' + + outfp = open(outfile, 'w') + + def process_fn(record, ht=ht): + name = record['name'] + seq = record['sequence'] + if 'N' in seq: + return None, None + + trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) + + if trim_at >= K: + return name, trim_seq + + return None, None + + tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) + + tsp.start(verbose_fasta_iter(infile), outfp) + +if __name__ == '__main__': + main() diff -r 46d13bbb21f2 -r 0b238b083f77 filter-below-abund.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-below-abund.xml Sat Jul 12 11:13:21 2014 -0400 @@ -0,0 +1,64 @@ + + + + Trims fastq/fasta sequences at k-mers with abundance below 50 + based on a provided k-mer counting table. + + + filter-below-abund.py + macros.xml + + + +mkdir output; cd output; +@BINARY@ +$countingtable_to_load +#for input in inputs + $input +#end for + + + + + + + + + + + + + + + + + + diff -r 46d13bbb21f2 -r 0b238b083f77 macros.xml --- a/macros.xml Mon Jun 30 16:51:11 2014 -0400 +++ b/macros.xml Sat Jul 12 11:13:21 2014 -0400 @@ -106,6 +106,11 @@ + + + - + + + @@ -60,11 +66,12 @@ save_countingtable == True - + + diff -r 46d13bbb21f2 -r 0b238b083f77 test-data/test-abund-read-2.fa.ct