comparison substitutions.py @ 1:385a5c3d7244 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
author devteam
date Mon, 06 Jul 2020 18:13:37 +0000
parents 190735ce4c2b
children
comparison
equal deleted inserted replaced
0:190735ce4c2b 1:385a5c3d7244
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 #Guruprasad ANanda
3 """ 2 """
4 Fetches substitutions from pairwise alignments. 3 Fetches substitutions from pairwise alignments.
4
5 Guruprasad ANanda
5 """ 6 """
7 from __future__ import print_function
6 8
7 from galaxy import eggs 9 import sys
8
9 from galaxy.tools.util import maf_utilities
10 10
11 import bx.align.maf 11 import bx.align.maf
12 import sys
13
14 def stop_err(msg):
15 sys.stderr.write(msg)
16 sys.exit()
17 12
18 13
19 if len(sys.argv) < 3: 14 if len(sys.argv) < 3:
20 stop_err("Incorrect number of arguments.") 15 sys.exit("Incorrect number of arguments.")
21 16
22 inp_file = sys.argv[1] 17 inp_file = sys.argv[1]
23 out_file = sys.argv[2] 18 out_file = sys.argv[2]
24 fout = open(out_file, 'w') 19 fout = open(out_file, 'w')
20
25 21
26 def fetchSubs(block): 22 def fetchSubs(block):
27 src1 = block.components[0].src 23 src1 = block.components[0].src
28 sequence1 = block.components[0].text 24 sequence1 = block.components[0].text
29 start1 = block.components[0].start 25 start1 = block.components[0].start
30 end1 = block.components[0].end 26 end1 = block.components[0].end
31 len1_withgap = len(sequence1) 27 len1_withgap = len(sequence1)
32 28
33 for seq in range (1, len(block.components)): 29 for seq in range(1, len(block.components)):
34 src2 = block.components[seq].src 30 src2 = block.components[seq].src
35 sequence2 = block.components[seq].text 31 sequence2 = block.components[seq].text
36 start2 = block.components[seq].start 32 start2 = block.components[seq].start
37 end2 = block.components[seq].end 33 end2 = block.components[seq].end
38 sub_begin = None 34 sub_begin = None
39 sub_end = None 35 sub_end = None
40 begin = False 36 begin = False
41 37
42 for nt in range(len1_withgap): 38 for nt in range(len1_withgap):
43 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character 39 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character
44 if sequence1[nt].upper() != sequence2[nt].upper(): 40 if sequence1[nt].upper() != sequence2[nt].upper():
45 if not(begin): 41 if not(begin):
46 sub_begin = nt 42 sub_begin = nt
47 begin = True 43 begin = True
48 sub_end = nt 44 sub_end = nt
49 else: 45 else:
50 if begin: 46 if begin:
51 print >> fout, "%s\t%s\t%s" % ( src1, start1+sub_begin-sequence1[0:sub_begin].count('-'), start1+sub_end-sequence1[0:sub_end].count('-') ) 47 fout.write("%s\t%s\t%s\n" % (src1,
52 print >> fout, "%s\t%s\t%s" % ( src2, start2+sub_begin-sequence2[0:sub_begin].count('-'), start2+sub_end-sequence2[0:sub_end].count('-') ) 48 start1 + sub_begin - sequence1[0:sub_begin].count('-'),
49 start1 + sub_end - sequence1[0:sub_end].count('-')))
50 fout.write("%s\t%s\t%s\n" % (src2,
51 start2 + sub_begin - sequence2[0:sub_begin].count('-'),
52 start2 + sub_end - sequence2[0:sub_end].count('-')))
53 begin = False 53 begin = False
54 else: 54 else:
55 if begin: 55 if begin:
56 print >> fout, "%s\t%s\t%s" % ( src1, start1+sub_begin-sequence1[0:sub_begin].count('-'), end1+sub_end-sequence1[0:sub_end].count('-') ) 56 fout.write("%s\t%s\t%s\n" % (src1,
57 print >> fout, "%s\t%s\t%s" % ( src2, start2+sub_begin-sequence2[0:sub_begin].count('-'), end2+sub_end-sequence2[0:sub_end].count('-') ) 57 start1 + sub_begin - sequence1[0:sub_begin].count('-'),
58 end1 + sub_end - sequence1[0:sub_end].count('-')))
59 fout.write("%s\t%s\t%s\n" % (src2,
60 start2 + sub_begin - sequence2[0:sub_begin].count('-'),
61 end2 + sub_end - sequence2[0:sub_end].count('-')))
58 begin = False 62 begin = False
59 63
60 64
61 def main(): 65 def main():
62 skipped = 0 66 skipped = 0
63 not_pairwise = 0 67 not_pairwise = 0
64 try: 68 try:
65 maf_reader = bx.align.maf.Reader( open(inp_file, 'r') ) 69 maf_reader = bx.align.maf.Reader(open(inp_file, 'r'))
66 except: 70 except Exception:
67 stop_err("Your MAF file appears to be malformed.") 71 sys.exit("Your MAF file appears to be malformed.")
68 print >> fout, "#Chr\tStart\tEnd" 72 fout.write("#Chr\tStart\tEnd\n")
69 for block in maf_reader: 73 for block in maf_reader:
70 if len(block.components) != 2: 74 if len(block.components) != 2:
71 not_pairwise += 1 75 not_pairwise += 1
72 continue 76 continue
73 try: 77 try:
74 fetchSubs(block) 78 fetchSubs(block)
75 except: 79 except Exception:
76 skipped += 1 80 skipped += 1
77 81
78 if not_pairwise: 82 if not_pairwise:
79 print "Skipped %d non-pairwise blocks" % (not_pairwise) 83 print("Skipped %d non-pairwise blocks" % (not_pairwise))
80 if skipped: 84 if skipped:
81 print "Skipped %d blocks" % (skipped) 85 print("Skipped %d blocks" % (skipped))
82 86
83 87
84 if __name__ == "__main__": 88 if __name__ == "__main__":
85 main() 89 main()