Mercurial > repos > nick > duplex
comparison utils/sim-label.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #!/usr/bin/env python | |
2 from __future__ import division | |
3 from __future__ import print_function | |
4 import sys | |
5 import argparse | |
6 import fastareader | |
7 | |
8 ARG_DEFAULTS = {} | |
9 USAGE = "%(prog)s [options]" | |
10 DESCRIPTION = """Convert the results of the duplex pipeline on simulated data to a single-line tsv | |
11 format, and label them with their original fragments.""" | |
12 | |
13 | |
14 def main(argv): | |
15 | |
16 parser = argparse.ArgumentParser(description=DESCRIPTION) | |
17 parser.set_defaults(**ARG_DEFAULTS) | |
18 | |
19 parser.add_argument('reads', type=fastareader.FastaReadGenerator, | |
20 help='Output fasta file from duplex pipeline. This script depends on their names being the ' | |
21 'exact format produced by sim.py.') | |
22 parser.add_argument('families', type=argparse.FileType('r'), | |
23 help='Families tsv file (output of make-barcodes.awk).') | |
24 | |
25 args = parser.parse_args(argv[1:]) | |
26 | |
27 bars_to_frags = {} | |
28 for line in args.families: | |
29 barcode, order, read_name = line.rstrip('\r\n').split('\t')[:3] | |
30 if read_name.startswith('@'): | |
31 read_name = read_name[1:] | |
32 chrom, frag_id, read_num = read_name.split('-') | |
33 bars_to_frags[barcode] = (chrom, frag_id) | |
34 | |
35 reads = iter(args.reads) | |
36 while True: | |
37 try: | |
38 read = next(reads) | |
39 except StopIteration: | |
40 break | |
41 barcode = read.id | |
42 try: | |
43 chrom, frag_id = bars_to_frags[barcode] | |
44 except KeyError: | |
45 sys.stderr.write('Missing barcode: {}\n'.format(barcode)) | |
46 continue | |
47 try: | |
48 frag_num = int(frag_id, 16) | |
49 except ValueError: | |
50 sys.stderr.write('Invalid fragment id: {}\n'.format(frag_id)) | |
51 continue | |
52 fam1size, fam2size = get_famsizes(read.name) | |
53 print(chrom, frag_num, frag_id, barcode, fam1size, fam2size, read.seq, sep='\t') | |
54 | |
55 | |
56 def get_famsizes(read_name): | |
57 try: | |
58 faminfo = read_name.split()[1] | |
59 except IndexError: | |
60 faminfo = '' | |
61 famsizes = faminfo.split('-') | |
62 try: | |
63 fam1size = famsizes[0] | |
64 except IndexError: | |
65 fam1size = '' | |
66 try: | |
67 fam2size = famsizes[1] | |
68 except IndexError: | |
69 fam2size = '' | |
70 return fam1size, fam2size | |
71 | |
72 | |
73 def fail(message): | |
74 sys.stderr.write(message+"\n") | |
75 sys.exit(1) | |
76 | |
77 if __name__ == '__main__': | |
78 sys.exit(main(sys.argv)) |