Mercurial > repos > devteam > getindels_2way
comparison getIndels.py @ 0:d6d2391a1cbb
Imported from capsule None
author | devteam |
---|---|
date | Tue, 01 Apr 2014 09:12:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d6d2391a1cbb |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Estimate INDELs for pair-wise alignments. | |
5 | |
6 usage: %prog maf_input out_file1 out_file2 | |
7 """ | |
8 | |
9 from __future__ import division | |
10 import sys | |
11 from bx.cookbook import doc_optparse | |
12 import bx.align.maf | |
13 | |
14 assert sys.version_info[:2] >= ( 2, 4 ) | |
15 | |
16 def main(): | |
17 # Parsing Command Line here | |
18 options, args = doc_optparse.parse( __doc__ ) | |
19 | |
20 try: | |
21 inp_file, out_file1 = args | |
22 except: | |
23 print >> sys.stderr, "Tool initialization error." | |
24 sys.exit() | |
25 | |
26 try: | |
27 open(inp_file, 'r') | |
28 except: | |
29 print >> sys.stderr, "Unable to open input file" | |
30 sys.exit() | |
31 try: | |
32 fout1 = open(out_file1, 'w') | |
33 #fout2 = open(out_file2, 'w') | |
34 except: | |
35 print >> sys.stderr, "Unable to open output file" | |
36 sys.exit() | |
37 | |
38 try: | |
39 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') ) | |
40 except: | |
41 print >> sys.stderr, "Your MAF file appears to be malformed." | |
42 sys.exit() | |
43 | |
44 print >> fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length" | |
45 for block_ind, block in enumerate(maf_reader): | |
46 if len(block.components) < 2: | |
47 continue | |
48 seq1 = block.components[0].text | |
49 src1 = block.components[0].src | |
50 start1 = block.components[0].start | |
51 if len(block.components) == 2: | |
52 seq2 = block.components[1].text | |
53 src2 = block.components[1].src | |
54 start2 = block.components[1].start | |
55 #for pos in range(len(seq1)): | |
56 nt_pos1 = start1-1 #position of the nucleotide (without counting gaps) | |
57 nt_pos2 = start2-1 | |
58 pos = 0 #character column position | |
59 gaplen1 = 0 | |
60 gaplen2 = 0 | |
61 prev_pos_gap1 = 0 | |
62 prev_pos_gap2 = 0 | |
63 while pos < len(seq1): | |
64 if prev_pos_gap1 == 0: | |
65 gaplen1 = 0 | |
66 if prev_pos_gap2 == 0: | |
67 gaplen2 = 0 | |
68 | |
69 if seq1[pos] == '-': | |
70 if seq2[pos] != '-': | |
71 nt_pos2 += 1 | |
72 gaplen1 += 1 | |
73 prev_pos_gap1 = 1 | |
74 #write 2 | |
75 if prev_pos_gap2 == 1: | |
76 prev_pos_gap2 = 0 | |
77 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1, nt_pos1+1, nt_pos2-1, nt_pos2-1+gaplen2, gaplen2 ) | |
78 if pos == len(seq1)-1: | |
79 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1, nt_pos1+1, nt_pos2+1-gaplen1, nt_pos2+1, gaplen1 ) | |
80 else: | |
81 prev_pos_gap1 = 0 | |
82 prev_pos_gap2 = 0 | |
83 """ | |
84 if prev_pos_gap1 == 1: | |
85 prev_pos_gap1 = 0 | |
86 print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, gaplen1 ) | |
87 elif prev_pos_gap2 == 1: | |
88 prev_pos_gap2 = 0 | |
89 print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos2-1, nt_pos2, gaplen2 ) | |
90 """ | |
91 else: | |
92 nt_pos1 += 1 | |
93 if seq2[pos] != '-': | |
94 nt_pos2 += 1 | |
95 #write both | |
96 if prev_pos_gap1 == 1: | |
97 prev_pos_gap1 = 0 | |
98 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2-gaplen1, nt_pos2, gaplen1 ) | |
99 elif prev_pos_gap2 == 1: | |
100 prev_pos_gap2 = 0 | |
101 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1-gaplen2, nt_pos1, nt_pos2-1, nt_pos2, gaplen2 ) | |
102 else: | |
103 gaplen2 += 1 | |
104 prev_pos_gap2 = 1 | |
105 #write 1 | |
106 if prev_pos_gap1 == 1: | |
107 prev_pos_gap1 = 0 | |
108 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2, nt_pos2+gaplen1, gaplen1 ) | |
109 if pos == len(seq1)-1: | |
110 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1+1-gaplen2, nt_pos1+1, nt_pos2, nt_pos2+1, gaplen2 ) | |
111 pos += 1 | |
112 | |
113 | |
114 if __name__ == "__main__": | |
115 main() |