Mercurial > repos > nick > duplex
annotate misc/msa_sscs_matcher.py @ 4:af383638de66 draft
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
author | nick |
---|---|
date | Mon, 23 Nov 2015 18:44:23 -0500 |
parents | |
children |
rev | line source |
---|---|
4
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
1 #!/usr/bin/env python |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
2 from __future__ import division |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
3 import sys |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
4 import argparse |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
5 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
6 OPT_DEFAULTS = {} |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
7 USAGE = "gunzip -c families.msa.tsv.gz | %(prog)s sscs.set.fa" |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
8 DESCRIPTION = """Find the input MSA's which produced a given set of SSCS's. Pipe the full set of |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
9 MSA's to stdin and it will filter them to the matching MSA's on stdout.""" |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
10 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
11 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
12 def main(argv): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
13 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
14 parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
15 parser.set_defaults(**OPT_DEFAULTS) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
16 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
17 parser.add_argument('sscs', metavar='sscs.set.fa', |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
18 help='A set of SSCS\'s, as output from the duplex.py script with the --sscs-file option.') |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
19 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
20 args = parser.parse_args(argv[1:]) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
21 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
22 sscs = set() |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
23 with open(args.sscs) as sscs_file: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
24 for line in sscs_file: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
25 if line.startswith('>'): |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
26 name = line.lstrip('>').split()[0] |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
27 sscs.add(name) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
28 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
29 for line in sys.stdin: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
30 barcode, order, mate, rname, seq, qual = line.rstrip('\r\n').split('\t') |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
31 name = '.'.join((barcode, order, mate)) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
32 if name in sscs: |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
33 sys.stdout.write(line) |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
34 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
35 |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
36 if __name__ == '__main__': |
af383638de66
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
nick
parents:
diff
changeset
|
37 sys.exit(main(sys.argv)) |