Mercurial > repos > crusoe > khmer
annotate filter-below-abund.py @ 60:fe697e0cb24a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
author | crusoe |
---|---|
date | Tue, 07 Jul 2015 11:59:39 -0400 |
parents | 0b238b083f77 |
children |
rev | line source |
---|---|
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
1 #! /usr/bin/env python |
45 | 2 # |
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
3 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is |
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
4 # Copyright (C) Michigan State University, 2009-2015. It is licensed under |
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
5 # the three-clause BSD license; see LICENSE. |
45 | 6 # Contact: khmer-project@idyll.org |
7 # | |
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
8 from __future__ import print_function |
45 | 9 import sys |
10 import os | |
11 import khmer | |
12 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter | |
13 | |
14 WORKER_THREADS = 8 | |
15 GROUPSIZE = 100 | |
16 | |
17 CUTOFF = 50 | |
18 | |
19 ### | |
20 | |
21 | |
22 def main(): | |
23 counting_ht = sys.argv[1] | |
24 infiles = sys.argv[2:] | |
25 | |
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
26 print('file with ht: %s' % counting_ht) |
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
27 print('-- settings:') |
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
28 print('N THREADS', WORKER_THREADS) |
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
29 print('--') |
45 | 30 |
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
31 print('making hashtable') |
45 | 32 ht = khmer.load_counting_hash(counting_ht) |
33 K = ht.ksize() | |
34 | |
35 for infile in infiles: | |
60
fe697e0cb24a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
crusoe
parents:
45
diff
changeset
|
36 print('filtering', infile) |
45 | 37 outfile = os.path.basename(infile) + '.below' |
38 | |
39 outfp = open(outfile, 'w') | |
40 | |
41 def process_fn(record, ht=ht): | |
42 name = record['name'] | |
43 seq = record['sequence'] | |
44 if 'N' in seq: | |
45 return None, None | |
46 | |
47 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) | |
48 | |
49 if trim_at >= K: | |
50 return name, trim_seq | |
51 | |
52 return None, None | |
53 | |
54 tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) | |
55 | |
56 tsp.start(verbose_fasta_iter(infile), outfp) | |
57 | |
58 if __name__ == '__main__': | |
59 main() |