annotate getIndels.py @ 1:51ee76168a2d default tip

Corrected version string.
author devteam <devteam@galaxyproject.org>
date Thu, 10 Apr 2014 13:47:13 -0400
parents d6d2391a1cbb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
2
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
3 """
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
4 Estimate INDELs for pair-wise alignments.
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
5
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
6 usage: %prog maf_input out_file1 out_file2
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
7 """
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
8
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
9 from __future__ import division
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
10 import sys
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
11 from bx.cookbook import doc_optparse
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
12 import bx.align.maf
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
13
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
14 assert sys.version_info[:2] >= ( 2, 4 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
15
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
16 def main():
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
17 # Parsing Command Line here
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
18 options, args = doc_optparse.parse( __doc__ )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
19
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
20 try:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
21 inp_file, out_file1 = args
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
22 except:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
23 print >> sys.stderr, "Tool initialization error."
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
24 sys.exit()
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
25
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
26 try:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
27 open(inp_file, 'r')
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
28 except:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
29 print >> sys.stderr, "Unable to open input file"
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
30 sys.exit()
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
31 try:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
32 fout1 = open(out_file1, 'w')
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
33 #fout2 = open(out_file2, 'w')
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
34 except:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
35 print >> sys.stderr, "Unable to open output file"
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
36 sys.exit()
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
37
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
38 try:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
39 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
40 except:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
41 print >> sys.stderr, "Your MAF file appears to be malformed."
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
42 sys.exit()
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
43
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
44 print >> fout1, "#Block\tSource\tSeq1_Start\tSeq1_End\tSeq2_Start\tSeq2_End\tIndel_length"
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
45 for block_ind, block in enumerate(maf_reader):
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
46 if len(block.components) < 2:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
47 continue
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
48 seq1 = block.components[0].text
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
49 src1 = block.components[0].src
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
50 start1 = block.components[0].start
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
51 if len(block.components) == 2:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
52 seq2 = block.components[1].text
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
53 src2 = block.components[1].src
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
54 start2 = block.components[1].start
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
55 #for pos in range(len(seq1)):
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
56 nt_pos1 = start1-1 #position of the nucleotide (without counting gaps)
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
57 nt_pos2 = start2-1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
58 pos = 0 #character column position
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
59 gaplen1 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
60 gaplen2 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
61 prev_pos_gap1 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
62 prev_pos_gap2 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
63 while pos < len(seq1):
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
64 if prev_pos_gap1 == 0:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
65 gaplen1 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
66 if prev_pos_gap2 == 0:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
67 gaplen2 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
68
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
69 if seq1[pos] == '-':
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
70 if seq2[pos] != '-':
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
71 nt_pos2 += 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
72 gaplen1 += 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
73 prev_pos_gap1 = 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
74 #write 2
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
75 if prev_pos_gap2 == 1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
76 prev_pos_gap2 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
77 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1, nt_pos1+1, nt_pos2-1, nt_pos2-1+gaplen2, gaplen2 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
78 if pos == len(seq1)-1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
79 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1, nt_pos1+1, nt_pos2+1-gaplen1, nt_pos2+1, gaplen1 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
80 else:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
81 prev_pos_gap1 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
82 prev_pos_gap2 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
83 """
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
84 if prev_pos_gap1 == 1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
85 prev_pos_gap1 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
86 print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, gaplen1 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
87 elif prev_pos_gap2 == 1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
88 prev_pos_gap2 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
89 print >> fout1, "%d\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos2-1, nt_pos2, gaplen2 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
90 """
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
91 else:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
92 nt_pos1 += 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
93 if seq2[pos] != '-':
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
94 nt_pos2 += 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
95 #write both
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
96 if prev_pos_gap1 == 1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
97 prev_pos_gap1 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
98 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2-gaplen1, nt_pos2, gaplen1 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
99 elif prev_pos_gap2 == 1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
100 prev_pos_gap2 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
101 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1-gaplen2, nt_pos1, nt_pos2-1, nt_pos2, gaplen2 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
102 else:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
103 gaplen2 += 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
104 prev_pos_gap2 = 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
105 #write 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
106 if prev_pos_gap1 == 1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
107 prev_pos_gap1 = 0
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
108 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src1, nt_pos1-1, nt_pos1, nt_pos2, nt_pos2+gaplen1, gaplen1 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
109 if pos == len(seq1)-1:
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
110 print >> fout1, "%d\t%s\t%s\t%s\t%s\t%s\t%s" % ( block_ind+1, src2, nt_pos1+1-gaplen2, nt_pos1+1, nt_pos2, nt_pos2+1, gaplen2 )
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
111 pos += 1
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
112
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
113
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
114 if __name__ == "__main__":
d6d2391a1cbb Imported from capsule None
devteam
parents:
diff changeset
115 main()