Mercurial > repos > nick > duplex
comparison misc/sscs_diff.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | af383638de66 |
| children |
comparison
equal
deleted
inserted
replaced
| 17:836fa4fe9494 | 18:e4d75f9efb90 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 from __future__ import division | |
| 3 import sys | |
| 4 import argparse | |
| 5 import subprocess | |
| 6 | |
| 7 OPT_DEFAULTS = {} | |
| 8 USAGE = "%(prog)s [options]" | |
| 9 DESCRIPTION = """Find differences between the SSCS produced by one version of the pipeline and | |
| 10 another, when working on the same input MSA's.""" | |
| 11 EPILOG = """Warning: This injects raw command-line arguments into shell commands and executes them. | |
| 12 """ | |
| 13 | |
| 14 | |
| 15 def main(argv): | |
| 16 | |
| 17 parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG) | |
| 18 parser.set_defaults(**OPT_DEFAULTS) | |
| 19 | |
| 20 parser.add_argument('sscs_before', metavar='sscs.all.before.fa', | |
| 21 help='SSCSs from earlier version (can be gzipped).') | |
| 22 parser.add_argument('sscs_after', metavar='sscs.all.after.fa', | |
| 23 help='SSCSs from later version (can be gzipped).') | |
| 24 parser.add_argument('-b', '--before', metavar='sscs.all.before.diffs.fa', required=True, | |
| 25 help='Output SSCSs from earlier version that differ from the SSCS in the later version here.') | |
| 26 parser.add_argument('-a', '--after', metavar='sscs.all.after.diffs.fa', required=True, | |
| 27 help='Output SSCSs from later version that differ from the SSCS in the earlier version here.') | |
| 28 | |
| 29 args = parser.parse_args(argv[1:]) | |
| 30 | |
| 31 sscs_before = {} | |
| 32 if args.sscs_before.endswith('.gz'): | |
| 33 command = 'gunzip -c {} | paste - - | sort'.format(args.sscs_before) | |
| 34 else: | |
| 35 command = 'cat {} | paste - - | sort'.format(args.sscs_before) | |
| 36 process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) | |
| 37 for line in process.stdout: | |
| 38 fields = line.rstrip('\r\n').split('\t') | |
| 39 name = fields[0].lstrip('>').split()[0] | |
| 40 seq = fields[1] | |
| 41 sscs_before[name] = seq | |
| 42 | |
| 43 before_fh = open(args.before, 'w') | |
| 44 after_fh = open(args.after, 'w') | |
| 45 diffs = {} | |
| 46 if args.sscs_after.endswith('.gz'): | |
| 47 command = 'gunzip -c {} | paste - - | sort'.format(args.sscs_after) | |
| 48 else: | |
| 49 command = 'cat {} | paste - - | sort'.format(args.sscs_after) | |
| 50 process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) | |
| 51 for line in process.stdout: | |
| 52 fields = line.rstrip('\r\n').split('\t') | |
| 53 header = fields[0].lstrip('>') | |
| 54 name, fam_size = header.split() | |
| 55 seq_after = fields[1] | |
| 56 if name in sscs_before: | |
| 57 seq_before = sscs_before[name] | |
| 58 if seq_before != seq_after: | |
| 59 diffs[name] = (seq_before, seq_after) | |
| 60 before_fh.write('>{} {}\n'.format(name, fam_size)) | |
| 61 before_fh.write(seq_before+'\n') | |
| 62 after_fh.write('>{} {}\n'.format(name, fam_size)) | |
| 63 after_fh.write(seq_after+'\n') | |
| 64 before_fh.close() | |
| 65 after_fh.close() | |
| 66 | |
| 67 | |
| 68 def fail(message): | |
| 69 sys.stderr.write(message+"\n") | |
| 70 sys.exit(1) | |
| 71 | |
| 72 | |
| 73 if __name__ == '__main__': | |
| 74 sys.exit(main(sys.argv)) |
