Mercurial > repos > nick > duplex
comparison misc/msa_sscs_matcher.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | af383638de66 |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #!/usr/bin/env python | |
2 from __future__ import division | |
3 import sys | |
4 import argparse | |
5 | |
6 OPT_DEFAULTS = {} | |
7 USAGE = "gunzip -c families.msa.tsv.gz | %(prog)s sscs.set.fa" | |
8 DESCRIPTION = """Find the input MSA's which produced a given set of SSCS's. Pipe the full set of | |
9 MSA's to stdin and it will filter them to the matching MSA's on stdout.""" | |
10 | |
11 | |
12 def main(argv): | |
13 | |
14 parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION) | |
15 parser.set_defaults(**OPT_DEFAULTS) | |
16 | |
17 parser.add_argument('sscs', metavar='sscs.set.fa', | |
18 help='A set of SSCS\'s, as output from the duplex.py script with the --sscs-file option.') | |
19 | |
20 args = parser.parse_args(argv[1:]) | |
21 | |
22 sscs = set() | |
23 with open(args.sscs) as sscs_file: | |
24 for line in sscs_file: | |
25 if line.startswith('>'): | |
26 name = line.lstrip('>').split()[0] | |
27 sscs.add(name) | |
28 | |
29 for line in sys.stdin: | |
30 barcode, order, mate, rname, seq, qual = line.rstrip('\r\n').split('\t') | |
31 name = '.'.join((barcode, order, mate)) | |
32 if name in sscs: | |
33 sys.stdout.write(line) | |
34 | |
35 | |
36 if __name__ == '__main__': | |
37 sys.exit(main(sys.argv)) |