annotate substitutions.py @ 1:385a5c3d7244 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
author devteam
date Mon, 06 Jul 2020 18:13:37 +0000
parents 190735ce4c2b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
2 """
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
3 Fetches substitutions from pairwise alignments.
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
4
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
5 Guruprasad ANanda
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
6 """
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
7 from __future__ import print_function
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
8
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
9 import sys
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
10
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
11 import bx.align.maf
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
12
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
13
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
14 if len(sys.argv) < 3:
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
15 sys.exit("Incorrect number of arguments.")
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
16
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
17 inp_file = sys.argv[1]
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
18 out_file = sys.argv[2]
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
19 fout = open(out_file, 'w')
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
20
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
21
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
22 def fetchSubs(block):
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
23 src1 = block.components[0].src
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
24 sequence1 = block.components[0].text
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
25 start1 = block.components[0].start
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
26 end1 = block.components[0].end
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
27 len1_withgap = len(sequence1)
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
28
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
29 for seq in range(1, len(block.components)):
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
30 src2 = block.components[seq].src
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
31 sequence2 = block.components[seq].text
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
32 start2 = block.components[seq].start
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
33 end2 = block.components[seq].end
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
34 sub_begin = None
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
35 sub_end = None
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
36 begin = False
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
37
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
38 for nt in range(len1_withgap):
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
39 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
40 if sequence1[nt].upper() != sequence2[nt].upper():
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
41 if not(begin):
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
42 sub_begin = nt
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
43 begin = True
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
44 sub_end = nt
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
45 else:
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
46 if begin:
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
47 fout.write("%s\t%s\t%s\n" % (src1,
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
48 start1 + sub_begin - sequence1[0:sub_begin].count('-'),
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
49 start1 + sub_end - sequence1[0:sub_end].count('-')))
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
50 fout.write("%s\t%s\t%s\n" % (src2,
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
51 start2 + sub_begin - sequence2[0:sub_begin].count('-'),
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
52 start2 + sub_end - sequence2[0:sub_end].count('-')))
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
53 begin = False
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
54 else:
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
55 if begin:
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
56 fout.write("%s\t%s\t%s\n" % (src1,
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
57 start1 + sub_begin - sequence1[0:sub_begin].count('-'),
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
58 end1 + sub_end - sequence1[0:sub_end].count('-')))
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
59 fout.write("%s\t%s\t%s\n" % (src2,
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
60 start2 + sub_begin - sequence2[0:sub_begin].count('-'),
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
61 end2 + sub_end - sequence2[0:sub_end].count('-')))
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
62 begin = False
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
63
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
64
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
65 def main():
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
66 skipped = 0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
67 not_pairwise = 0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
68 try:
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
69 maf_reader = bx.align.maf.Reader(open(inp_file, 'r'))
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
70 except Exception:
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
71 sys.exit("Your MAF file appears to be malformed.")
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
72 fout.write("#Chr\tStart\tEnd\n")
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
73 for block in maf_reader:
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
74 if len(block.components) != 2:
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
75 not_pairwise += 1
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
76 continue
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
77 try:
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
78 fetchSubs(block)
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
79 except Exception:
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
80 skipped += 1
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
81
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
82 if not_pairwise:
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
83 print("Skipped %d non-pairwise blocks" % (not_pairwise))
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
84 if skipped:
1
385a5c3d7244 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents: 0
diff changeset
85 print("Skipped %d blocks" % (skipped))
0
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
86
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
87
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
88 if __name__ == "__main__":
190735ce4c2b Imported from capsule None
devteam
parents:
diff changeset
89 main()