Mercurial > repos > nick > duplex
view misc/msa_sscs_matcher.py @ 4:af383638de66 draft
planemo upload commit 022984f323d3da44f70b3bf79c684cfd8dda3f61-dirty
author | nick |
---|---|
date | Mon, 23 Nov 2015 18:44:23 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python from __future__ import division import sys import argparse OPT_DEFAULTS = {} USAGE = "gunzip -c families.msa.tsv.gz | %(prog)s sscs.set.fa" DESCRIPTION = """Find the input MSA's which produced a given set of SSCS's. Pipe the full set of MSA's to stdin and it will filter them to the matching MSA's on stdout.""" def main(argv): parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION) parser.set_defaults(**OPT_DEFAULTS) parser.add_argument('sscs', metavar='sscs.set.fa', help='A set of SSCS\'s, as output from the duplex.py script with the --sscs-file option.') args = parser.parse_args(argv[1:]) sscs = set() with open(args.sscs) as sscs_file: for line in sscs_file: if line.startswith('>'): name = line.lstrip('>').split()[0] sscs.add(name) for line in sys.stdin: barcode, order, mate, rname, seq, qual = line.rstrip('\r\n').split('\t') name = '.'.join((barcode, order, mate)) if name in sscs: sys.stdout.write(line) if __name__ == '__main__': sys.exit(main(sys.argv))