Mercurial > repos > nick > duplex
view misc/sscs_diff.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | af383638de66 |
| children |
line wrap: on
line source
#!/usr/bin/env python from __future__ import division import sys import argparse import subprocess OPT_DEFAULTS = {} USAGE = "%(prog)s [options]" DESCRIPTION = """Find differences between the SSCS produced by one version of the pipeline and another, when working on the same input MSA's.""" EPILOG = """Warning: This injects raw command-line arguments into shell commands and executes them. """ def main(argv): parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG) parser.set_defaults(**OPT_DEFAULTS) parser.add_argument('sscs_before', metavar='sscs.all.before.fa', help='SSCSs from earlier version (can be gzipped).') parser.add_argument('sscs_after', metavar='sscs.all.after.fa', help='SSCSs from later version (can be gzipped).') parser.add_argument('-b', '--before', metavar='sscs.all.before.diffs.fa', required=True, help='Output SSCSs from earlier version that differ from the SSCS in the later version here.') parser.add_argument('-a', '--after', metavar='sscs.all.after.diffs.fa', required=True, help='Output SSCSs from later version that differ from the SSCS in the earlier version here.') args = parser.parse_args(argv[1:]) sscs_before = {} if args.sscs_before.endswith('.gz'): command = 'gunzip -c {} | paste - - | sort'.format(args.sscs_before) else: command = 'cat {} | paste - - | sort'.format(args.sscs_before) process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) for line in process.stdout: fields = line.rstrip('\r\n').split('\t') name = fields[0].lstrip('>').split()[0] seq = fields[1] sscs_before[name] = seq before_fh = open(args.before, 'w') after_fh = open(args.after, 'w') diffs = {} if args.sscs_after.endswith('.gz'): command = 'gunzip -c {} | paste - - | sort'.format(args.sscs_after) else: command = 'cat {} | paste - - | sort'.format(args.sscs_after) process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) for line in process.stdout: fields = line.rstrip('\r\n').split('\t') header = fields[0].lstrip('>') name, fam_size = header.split() seq_after = fields[1] if name in sscs_before: seq_before = sscs_before[name] if seq_before != seq_after: diffs[name] = (seq_before, seq_after) before_fh.write('>{} {}\n'.format(name, fam_size)) before_fh.write(seq_before+'\n') after_fh.write('>{} {}\n'.format(name, fam_size)) after_fh.write(seq_after+'\n') before_fh.close() after_fh.close() def fail(message): sys.stderr.write(message+"\n") sys.exit(1) if __name__ == '__main__': sys.exit(main(sys.argv))
