Mercurial > repos > crusoe > khmer
annotate filter-below-abund.py @ 60:fe697e0cb24a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
| author | crusoe |
|---|---|
| date | Tue, 07 Jul 2015 11:59:39 -0400 |
| parents | 0b238b083f77 |
| children |
| rev | line source |
|---|---|
|
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
1 #! /usr/bin/env python |
| 45 | 2 # |
|
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
3 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is |
|
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
4 # Copyright (C) Michigan State University, 2009-2015. It is licensed under |
|
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
5 # the three-clause BSD license; see LICENSE. |
| 45 | 6 # Contact: khmer-project@idyll.org |
| 7 # | |
|
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
8 from __future__ import print_function |
| 45 | 9 import sys |
| 10 import os | |
| 11 import khmer | |
| 12 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter | |
| 13 | |
| 14 WORKER_THREADS = 8 | |
| 15 GROUPSIZE = 100 | |
| 16 | |
| 17 CUTOFF = 50 | |
| 18 | |
| 19 ### | |
| 20 | |
| 21 | |
| 22 def main(): | |
| 23 counting_ht = sys.argv[1] | |
| 24 infiles = sys.argv[2:] | |
| 25 | |
|
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
26 print('file with ht: %s' % counting_ht) |
|
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
27 print('-- settings:') |
|
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
28 print('N THREADS', WORKER_THREADS) |
|
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
29 print('--') |
| 45 | 30 |
|
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
31 print('making hashtable') |
| 45 | 32 ht = khmer.load_counting_hash(counting_ht) |
| 33 K = ht.ksize() | |
| 34 | |
| 35 for infile in infiles: | |
|
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
36 print('filtering', infile) |
| 45 | 37 outfile = os.path.basename(infile) + '.below' |
| 38 | |
| 39 outfp = open(outfile, 'w') | |
| 40 | |
| 41 def process_fn(record, ht=ht): | |
| 42 name = record['name'] | |
| 43 seq = record['sequence'] | |
| 44 if 'N' in seq: | |
| 45 return None, None | |
| 46 | |
| 47 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) | |
| 48 | |
| 49 if trim_at >= K: | |
| 50 return name, trim_seq | |
| 51 | |
| 52 return None, None | |
| 53 | |
| 54 tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) | |
| 55 | |
| 56 tsp.start(verbose_fasta_iter(infile), outfp) | |
| 57 | |
| 58 if __name__ == '__main__': | |
| 59 main() |
