comparison utils/sim-label.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #!/usr/bin/env python
2 from __future__ import division
3 from __future__ import print_function
4 import sys
5 import argparse
6 import fastareader
7
8 ARG_DEFAULTS = {}
9 USAGE = "%(prog)s [options]"
10 DESCRIPTION = """Convert the results of the duplex pipeline on simulated data to a single-line tsv
11 format, and label them with their original fragments."""
12
13
14 def main(argv):
15
16 parser = argparse.ArgumentParser(description=DESCRIPTION)
17 parser.set_defaults(**ARG_DEFAULTS)
18
19 parser.add_argument('reads', type=fastareader.FastaReadGenerator,
20 help='Output fasta file from duplex pipeline. This script depends on their names being the '
21 'exact format produced by sim.py.')
22 parser.add_argument('families', type=argparse.FileType('r'),
23 help='Families tsv file (output of make-barcodes.awk).')
24
25 args = parser.parse_args(argv[1:])
26
27 bars_to_frags = {}
28 for line in args.families:
29 barcode, order, read_name = line.rstrip('\r\n').split('\t')[:3]
30 if read_name.startswith('@'):
31 read_name = read_name[1:]
32 chrom, frag_id, read_num = read_name.split('-')
33 bars_to_frags[barcode] = (chrom, frag_id)
34
35 reads = iter(args.reads)
36 while True:
37 try:
38 read = next(reads)
39 except StopIteration:
40 break
41 barcode = read.id
42 try:
43 chrom, frag_id = bars_to_frags[barcode]
44 except KeyError:
45 sys.stderr.write('Missing barcode: {}\n'.format(barcode))
46 continue
47 try:
48 frag_num = int(frag_id, 16)
49 except ValueError:
50 sys.stderr.write('Invalid fragment id: {}\n'.format(frag_id))
51 continue
52 fam1size, fam2size = get_famsizes(read.name)
53 print(chrom, frag_num, frag_id, barcode, fam1size, fam2size, read.seq, sep='\t')
54
55
56 def get_famsizes(read_name):
57 try:
58 faminfo = read_name.split()[1]
59 except IndexError:
60 faminfo = ''
61 famsizes = faminfo.split('-')
62 try:
63 fam1size = famsizes[0]
64 except IndexError:
65 fam1size = ''
66 try:
67 fam2size = famsizes[1]
68 except IndexError:
69 fam2size = ''
70 return fam1size, fam2size
71
72
73 def fail(message):
74 sys.stderr.write(message+"\n")
75 sys.exit(1)
76
77 if __name__ == '__main__':
78 sys.exit(main(sys.argv))