Mercurial > repos > jeremie > breakdancer_test
comparison breakdancer2vcf.py @ 15:743d75da4658 draft
Uploaded
author | jeremie |
---|---|
date | Fri, 27 Jun 2014 08:43:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
14:2dd38b423eae | 15:743d75da4658 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 __copyright__ = """ | |
4 Copyright (C) 2013 - Tim te Beek | |
5 Copyright (C) 2013 - Wai Yi Leung | |
6 Copyright (C) 2013 AllBio (see AUTHORS file) | |
7 """ | |
8 | |
9 __desc__ = """Convert breakdancer output to pseudo .vcf file format.""" | |
10 __created__ = "Mar 18, 2013" | |
11 __author__ = "tbeek" | |
12 | |
13 import argparse | |
14 import csv | |
15 import os.path | |
16 import sys | |
17 | |
18 | |
19 def main(tsvfile, vcffile): | |
20 ''' | |
21 :param tsvfile: filename of input file.tsv | |
22 :type tsvfile: string | |
23 :param vcffile: filename of output file.vcf | |
24 :type vcffile: string | |
25 ''' | |
26 with open(tsvfile) as reader: | |
27 # Parse file | |
28 dictreader = _parse_tsvfile(reader) | |
29 print dictreader.fieldnames | |
30 | |
31 # Write out file | |
32 _format_vcffile(dictreader, vcffile) | |
33 | |
34 # Quick output | |
35 with open(vcffile) as reader: | |
36 print reader.read(1000) | |
37 | |
38 | |
39 def _parse_tsvfile(readable): | |
40 ''' | |
41 Read readable using csv.Sniffer and csv.DictReader | |
42 :param readable: open file.tsv handle to read with csv.DictReader | |
43 :type readable: file | |
44 ''' | |
45 prev, curr = 0, 0 | |
46 while True: | |
47 line = readable.readline() | |
48 if not line.startswith('#'): | |
49 # lets start from prev # line, without the hash sign | |
50 readable.seek(prev + 1) | |
51 break | |
52 else: | |
53 prev = curr | |
54 curr = readable.tell() | |
55 | |
56 # Determine dialect | |
57 curr = readable.tell() | |
58 #dialect = csv.Sniffer().sniff(readable.read(3000)) | |
59 dialect = 'excel-tab' | |
60 readable.seek(curr) | |
61 | |
62 # Read file | |
63 dictreader = csv.DictReader(readable, dialect=dialect) | |
64 return dictreader | |
65 | |
66 | |
67 _tsv_fields = ('Chr1', 'Pos1', 'Orientation1', | |
68 'Chr2', 'Pos2', 'Orientation2', | |
69 'Type', 'Size', 'Score', | |
70 'num_Reads', 'num_Reads_lib', | |
71 'ERR031544.sort.bam') | |
72 # 'Chr1': '1', | |
73 # 'Pos1': '269907', | |
74 # 'Orientation1': '39+39-', | |
75 # 'Chr2': '1', | |
76 # 'Pos2': '270919', | |
77 # 'Orientation2': '39+39-', | |
78 # 'Type': 'DEL', | |
79 # 'Size': '99', | |
80 # 'Score': '99', | |
81 # 'num_Reads': '38', | |
82 # 'num_Reads_lib': '/home/allbio/ERR031544.sort.bam|38', | |
83 # 'ERR031544.sort.bam': 'NA' | |
84 | |
85 _vcf_fields = ('CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO') | |
86 | |
87 | |
88 def _format_vcffile(dictreader, vcffile): | |
89 ''' | |
90 Create a pseudo .vcf file based on values read from DictReader instance. | |
91 :param dictreader: DictReader instance to read data from | |
92 :type dictreader: csv.DictRedaer | |
93 :param vcffile: output file.vcf filename | |
94 :type vcffile: string | |
95 ''' | |
96 with open(vcffile, mode='w') as writer: | |
97 writer.write('#{}\n'.format('\t'.join(_vcf_fields))) | |
98 output_vcf = [] | |
99 for line in dictreader: | |
100 CHROM = line['Chr1'] | |
101 # TODO Figure out whether we have zero or one based positioning | |
102 POS = int(line['Pos1']) | |
103 SVEND = int(line['Pos2']) | |
104 INFO = 'PROGRAM=breakdancer;SVTYPE={};SVLEN={}'.format(line['Type'], | |
105 0 - int(line['Size'])) | |
106 if line['Type'] not in ['CTX']: | |
107 INFO += ";SVEND={}".format(SVEND) | |
108 | |
109 # Create record | |
110 output_vcf.append([CHROM, POS, '.', '.', '.', '.', 'PASS', INFO]) | |
111 | |
112 # Sort all results | |
113 output_vcf.sort() | |
114 output = "\n".join(["\t".join(map(str,vcf_row)) for vcf_row in output_vcf]) | |
115 # Write record | |
116 writer.write(output) | |
117 | |
118 | |
119 if __name__ == '__main__': | |
120 parser = argparse.ArgumentParser() | |
121 parser.add_argument('-i', '--breakdancertsv', dest='breakdancertsv', type=str, | |
122 help='Breakdancer TSV outputfile') | |
123 parser.add_argument('-o', '--outputvcf', dest='outputvcf', type=str, | |
124 help='Output vcf to') | |
125 | |
126 args = parser.parse_args() | |
127 main(args.breakdancertsv, args.outputvcf) |