Mercurial > repos > nick > duplex
comparison utils/sim-label.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:836fa4fe9494 | 18:e4d75f9efb90 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 from __future__ import division | |
| 3 from __future__ import print_function | |
| 4 import sys | |
| 5 import argparse | |
| 6 import fastareader | |
| 7 | |
| 8 ARG_DEFAULTS = {} | |
| 9 USAGE = "%(prog)s [options]" | |
| 10 DESCRIPTION = """Convert the results of the duplex pipeline on simulated data to a single-line tsv | |
| 11 format, and label them with their original fragments.""" | |
| 12 | |
| 13 | |
| 14 def main(argv): | |
| 15 | |
| 16 parser = argparse.ArgumentParser(description=DESCRIPTION) | |
| 17 parser.set_defaults(**ARG_DEFAULTS) | |
| 18 | |
| 19 parser.add_argument('reads', type=fastareader.FastaReadGenerator, | |
| 20 help='Output fasta file from duplex pipeline. This script depends on their names being the ' | |
| 21 'exact format produced by sim.py.') | |
| 22 parser.add_argument('families', type=argparse.FileType('r'), | |
| 23 help='Families tsv file (output of make-barcodes.awk).') | |
| 24 | |
| 25 args = parser.parse_args(argv[1:]) | |
| 26 | |
| 27 bars_to_frags = {} | |
| 28 for line in args.families: | |
| 29 barcode, order, read_name = line.rstrip('\r\n').split('\t')[:3] | |
| 30 if read_name.startswith('@'): | |
| 31 read_name = read_name[1:] | |
| 32 chrom, frag_id, read_num = read_name.split('-') | |
| 33 bars_to_frags[barcode] = (chrom, frag_id) | |
| 34 | |
| 35 reads = iter(args.reads) | |
| 36 while True: | |
| 37 try: | |
| 38 read = next(reads) | |
| 39 except StopIteration: | |
| 40 break | |
| 41 barcode = read.id | |
| 42 try: | |
| 43 chrom, frag_id = bars_to_frags[barcode] | |
| 44 except KeyError: | |
| 45 sys.stderr.write('Missing barcode: {}\n'.format(barcode)) | |
| 46 continue | |
| 47 try: | |
| 48 frag_num = int(frag_id, 16) | |
| 49 except ValueError: | |
| 50 sys.stderr.write('Invalid fragment id: {}\n'.format(frag_id)) | |
| 51 continue | |
| 52 fam1size, fam2size = get_famsizes(read.name) | |
| 53 print(chrom, frag_num, frag_id, barcode, fam1size, fam2size, read.seq, sep='\t') | |
| 54 | |
| 55 | |
| 56 def get_famsizes(read_name): | |
| 57 try: | |
| 58 faminfo = read_name.split()[1] | |
| 59 except IndexError: | |
| 60 faminfo = '' | |
| 61 famsizes = faminfo.split('-') | |
| 62 try: | |
| 63 fam1size = famsizes[0] | |
| 64 except IndexError: | |
| 65 fam1size = '' | |
| 66 try: | |
| 67 fam2size = famsizes[1] | |
| 68 except IndexError: | |
| 69 fam2size = '' | |
| 70 return fam1size, fam2size | |
| 71 | |
| 72 | |
| 73 def fail(message): | |
| 74 sys.stderr.write(message+"\n") | |
| 75 sys.exit(1) | |
| 76 | |
| 77 if __name__ == '__main__': | |
| 78 sys.exit(main(sys.argv)) |
