Mercurial > repos > devteam > substitutions
annotate substitutions.py @ 1:385a5c3d7244 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
| author | devteam |
|---|---|
| date | Mon, 06 Jul 2020 18:13:37 +0000 |
| parents | 190735ce4c2b |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 """ | |
| 3 Fetches substitutions from pairwise alignments. | |
| 4 | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
5 Guruprasad ANanda |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
6 """ |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
7 from __future__ import print_function |
| 0 | 8 |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
9 import sys |
| 0 | 10 |
| 11 import bx.align.maf | |
| 12 | |
| 13 | |
| 14 if len(sys.argv) < 3: | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
15 sys.exit("Incorrect number of arguments.") |
| 0 | 16 |
| 17 inp_file = sys.argv[1] | |
| 18 out_file = sys.argv[2] | |
| 19 fout = open(out_file, 'w') | |
| 20 | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
21 |
| 0 | 22 def fetchSubs(block): |
| 23 src1 = block.components[0].src | |
| 24 sequence1 = block.components[0].text | |
| 25 start1 = block.components[0].start | |
| 26 end1 = block.components[0].end | |
| 27 len1_withgap = len(sequence1) | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
28 |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
29 for seq in range(1, len(block.components)): |
| 0 | 30 src2 = block.components[seq].src |
| 31 sequence2 = block.components[seq].text | |
| 32 start2 = block.components[seq].start | |
| 33 end2 = block.components[seq].end | |
| 34 sub_begin = None | |
| 35 sub_end = None | |
| 36 begin = False | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
37 |
| 0 | 38 for nt in range(len1_withgap): |
| 39 if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': # Not a gap or masked character | |
| 40 if sequence1[nt].upper() != sequence2[nt].upper(): | |
| 41 if not(begin): | |
| 42 sub_begin = nt | |
| 43 begin = True | |
| 44 sub_end = nt | |
| 45 else: | |
| 46 if begin: | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
47 fout.write("%s\t%s\t%s\n" % (src1, |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
48 start1 + sub_begin - sequence1[0:sub_begin].count('-'), |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
49 start1 + sub_end - sequence1[0:sub_end].count('-'))) |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
50 fout.write("%s\t%s\t%s\n" % (src2, |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
51 start2 + sub_begin - sequence2[0:sub_begin].count('-'), |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
52 start2 + sub_end - sequence2[0:sub_end].count('-'))) |
| 0 | 53 begin = False |
| 54 else: | |
| 55 if begin: | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
56 fout.write("%s\t%s\t%s\n" % (src1, |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
57 start1 + sub_begin - sequence1[0:sub_begin].count('-'), |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
58 end1 + sub_end - sequence1[0:sub_end].count('-'))) |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
59 fout.write("%s\t%s\t%s\n" % (src2, |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
60 start2 + sub_begin - sequence2[0:sub_begin].count('-'), |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
61 end2 + sub_end - sequence2[0:sub_end].count('-'))) |
| 0 | 62 begin = False |
| 63 | |
| 64 | |
| 65 def main(): | |
| 66 skipped = 0 | |
| 67 not_pairwise = 0 | |
| 68 try: | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
69 maf_reader = bx.align.maf.Reader(open(inp_file, 'r')) |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
70 except Exception: |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
71 sys.exit("Your MAF file appears to be malformed.") |
|
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
72 fout.write("#Chr\tStart\tEnd\n") |
| 0 | 73 for block in maf_reader: |
| 74 if len(block.components) != 2: | |
| 75 not_pairwise += 1 | |
| 76 continue | |
| 77 try: | |
| 78 fetchSubs(block) | |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
79 except Exception: |
| 0 | 80 skipped += 1 |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
81 |
| 0 | 82 if not_pairwise: |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
83 print("Skipped %d non-pairwise blocks" % (not_pairwise)) |
| 0 | 84 if skipped: |
|
1
385a5c3d7244
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/substitutions commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
devteam
parents:
0
diff
changeset
|
85 print("Skipped %d blocks" % (skipped)) |
| 0 | 86 |
| 87 | |
| 88 if __name__ == "__main__": | |
| 89 main() |
