0
|
1 #!/usr/bin/env python
|
|
2 #Guruprasad ANanda
|
|
3 """
|
|
4 Fetches substitutions from pairwise alignments.
|
|
5 """
|
|
6
|
|
7 from galaxy import eggs
|
|
8
|
|
9 from galaxy.tools.util import maf_utilities
|
|
10
|
|
11 import bx.align.maf
|
|
12 import sys
|
|
13
|
|
14 def stop_err(msg):
|
|
15 sys.stderr.write(msg)
|
|
16 sys.exit()
|
|
17
|
|
18
|
|
19 if len(sys.argv) < 3:
|
|
20 stop_err("Incorrect number of arguments.")
|
|
21
|
|
22 inp_file = sys.argv[1]
|
|
23 out_file = sys.argv[2]
|
|
24 fout = open(out_file, 'w')
|
|
25
|
|
26 def fetchSubs(block):
|
|
27 src1 = block.components[0].src
|
|
28 sequence1 = block.components[0].text
|
|
29 start1 = block.components[0].start
|
|
30 end1 = block.components[0].end
|
|
31 len1_withgap = len(sequence1)
|
|
32
|
|
33 for seq in range (1, len(block.components)):
|
|
34 src2 = block.components[seq].src
|
|
35 sequence2 = block.components[seq].text
|
|
36 start2 = block.components[seq].start
|
|
37 end2 = block.components[seq].end
|
|
38 sub_begin = None
|
|
39 sub_end = None
|
|
40 begin = False
|
|
41
|
|
42 for nt in range(len1_withgap):
|
|
43 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character
|
|
44 if sequence1[nt].upper() != sequence2[nt].upper():
|
|
45 if not(begin):
|
|
46 sub_begin = nt
|
|
47 begin = True
|
|
48 sub_end = nt
|
|
49 else:
|
|
50 if begin:
|
|
51 print >> fout, "%s\t%s\t%s" % ( src1, start1+sub_begin-sequence1[0:sub_begin].count('-'), start1+sub_end-sequence1[0:sub_end].count('-') )
|
|
52 print >> fout, "%s\t%s\t%s" % ( src2, start2+sub_begin-sequence2[0:sub_begin].count('-'), start2+sub_end-sequence2[0:sub_end].count('-') )
|
|
53 begin = False
|
|
54 else:
|
|
55 if begin:
|
|
56 print >> fout, "%s\t%s\t%s" % ( src1, start1+sub_begin-sequence1[0:sub_begin].count('-'), end1+sub_end-sequence1[0:sub_end].count('-') )
|
|
57 print >> fout, "%s\t%s\t%s" % ( src2, start2+sub_begin-sequence2[0:sub_begin].count('-'), end2+sub_end-sequence2[0:sub_end].count('-') )
|
|
58 begin = False
|
|
59
|
|
60
|
|
61 def main():
|
|
62 skipped = 0
|
|
63 not_pairwise = 0
|
|
64 try:
|
|
65 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
|
|
66 except:
|
|
67 stop_err("Your MAF file appears to be malformed.")
|
|
68 print >> fout, "#Chr\tStart\tEnd"
|
|
69 for block in maf_reader:
|
|
70 if len(block.components) != 2:
|
|
71 not_pairwise += 1
|
|
72 continue
|
|
73 try:
|
|
74 fetchSubs(block)
|
|
75 except:
|
|
76 skipped += 1
|
|
77
|
|
78 if not_pairwise:
|
|
79 print "Skipped %d non-pairwise blocks" % (not_pairwise)
|
|
80 if skipped:
|
|
81 print "Skipped %d blocks" % (skipped)
|
|
82
|
|
83
|
|
84 if __name__ == "__main__":
|
|
85 main()
|