Mercurial > repos > nick > duplex
view misc/msa_sscs_matcher.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | af383638de66 |
children |
line wrap: on
line source
#!/usr/bin/env python from __future__ import division import sys import argparse OPT_DEFAULTS = {} USAGE = "gunzip -c families.msa.tsv.gz | %(prog)s sscs.set.fa" DESCRIPTION = """Find the input MSA's which produced a given set of SSCS's. Pipe the full set of MSA's to stdin and it will filter them to the matching MSA's on stdout.""" def main(argv): parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION) parser.set_defaults(**OPT_DEFAULTS) parser.add_argument('sscs', metavar='sscs.set.fa', help='A set of SSCS\'s, as output from the duplex.py script with the --sscs-file option.') args = parser.parse_args(argv[1:]) sscs = set() with open(args.sscs) as sscs_file: for line in sscs_file: if line.startswith('>'): name = line.lstrip('>').split()[0] sscs.add(name) for line in sys.stdin: barcode, order, mate, rname, seq, qual = line.rstrip('\r\n').split('\t') name = '.'.join((barcode, order, mate)) if name in sscs: sys.stdout.write(line) if __name__ == '__main__': sys.exit(main(sys.argv))