Mercurial > repos > peterjc > blast2go
comparison tools/blast2go/massage_xml_for_blast2go.py @ 25:242cf17c3bf9 draft default tip
"planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/tools/blast2go commit 0c82b9ef284c686cbffd30582d2586e4fb52881e"
author | peterjc |
---|---|
date | Wed, 09 Sep 2020 15:01:39 +0000 |
parents | 05eef6b222af |
children |
comparison
equal
deleted
inserted
replaced
24:05eef6b222af | 25:242cf17c3bf9 |
---|---|
12 As part of this reformatting, we check for BLASTP or BLASTX output | 12 As part of this reformatting, we check for BLASTP or BLASTX output |
13 (otherwise raise an error), and print the query count. | 13 (otherwise raise an error), and print the query count. |
14 | 14 |
15 This script is called from my Galaxy wrapper for Blast2GO for pipelines, | 15 This script is called from my Galaxy wrapper for Blast2GO for pipelines, |
16 available from the Galaxy Tool Shed here: | 16 available from the Galaxy Tool Shed here: |
17 http://toolshed.g2.bx.psu.edu/view/peterjc/blast2go | 17 http://toolshed.g2.bx.psu.edu/view/peterjc/blast2go |
18 | 18 |
19 This script is under version control here: | 19 This script is under version control here: |
20 https://github.com/peterjc/galaxy_blast/tree/master/blast2go | 20 https://github.com/peterjc/galaxy_blast/tree/master/blast2go |
21 """ | 21 """ |
22 | |
23 import os | |
22 import sys | 24 import sys |
23 import os | |
24 | 25 |
25 def stop_err(msg, error_level=1): | |
26 """Print error message to stdout and quit with given error level.""" | |
27 sys.stderr.write("%s\n" % msg) | |
28 sys.exit(error_level) | |
29 | 26 |
30 def prepare_xml(original_xml, mangled_xml): | 27 def prepare_xml(original_xml, mangled_xml): |
31 """Reformat BLAST XML to suit Blast2GO. | 28 """Reformat BLAST XML to suit Blast2GO. |
32 | 29 |
33 Blast2GO can't cope with 1000s of <Iteration> tags within a | 30 Blast2GO can't cope with 1000s of <Iteration> tags within a |
42 footer = " </BlastOutput_iterations>\n</BlastOutput>\n" | 39 footer = " </BlastOutput_iterations>\n</BlastOutput>\n" |
43 header = "" | 40 header = "" |
44 while True: | 41 while True: |
45 line = in_handle.readline() | 42 line = in_handle.readline() |
46 if not line: | 43 if not line: |
47 #No hits? | 44 # No hits? |
48 stop_err("Problem with XML file?") | 45 sys.exit("Problem with XML file?") |
49 if line.strip() == "<Iteration>": | 46 if line.strip() == "<Iteration>": |
50 break | 47 break |
51 header += line | 48 header += line |
52 | 49 |
53 if "<BlastOutput_program>blastx</BlastOutput_program>" in header: | 50 if "<BlastOutput_program>blastx</BlastOutput_program>" in header: |
54 print("BLASTX output identified") | 51 print("BLASTX output identified") |
55 elif "<BlastOutput_program>blastp</BlastOutput_program>" in header: | 52 elif "<BlastOutput_program>blastp</BlastOutput_program>" in header: |
56 print("BLASTP output identified") | 53 print("BLASTP output identified") |
57 else: | 54 else: |
58 in_handle.close() | 55 in_handle.close() |
59 stop_err("Expect BLASTP or BLASTX output") | 56 sys.exit("Expect BLASTP or BLASTX output") |
60 | 57 |
61 out_handle = open(mangled_xml, "w") | 58 out_handle = open(mangled_xml, "w") |
62 out_handle.write(header) | 59 out_handle.write(header) |
63 out_handle.write(line) | 60 out_handle.write(line) |
64 count = 1 | 61 count = 1 |
65 while True: | 62 while True: |
66 line = in_handle.readline() | 63 line = in_handle.readline() |
67 if not line: | 64 if not line: |
68 break | 65 break |
69 elif line.strip() == "<Iteration>": | 66 elif line.strip() == "<Iteration>": |
70 #Insert footer/header | 67 # Insert footer/header |
71 out_handle.write(footer) | 68 out_handle.write(footer) |
72 out_handle.write(header) | 69 out_handle.write(header) |
73 count += 1 | 70 count += 1 |
74 out_handle.write(line) | 71 out_handle.write(line) |
75 | 72 |
76 out_handle.close() | 73 out_handle.close() |
77 in_handle.close() | 74 in_handle.close() |
78 print("Input has %i queries" % count) | 75 print("Input has %i queries" % count) |
79 | 76 |
80 | 77 |
81 if __name__ == "__main__": | 78 if __name__ == "__main__": |
82 # Run the conversion... | 79 # Run the conversion... |
83 if len(sys.argv) != 3: | 80 if len(sys.argv) != 3: |
84 stop_err("Require two arguments: XML input filename, XML output filename") | 81 sys.exit("Require two arguments: XML input filename, XML output filename") |
85 | 82 |
86 xml_file, out_xml_file = sys.argv[1:] | 83 xml_file, out_xml_file = sys.argv[1:] |
87 | 84 |
88 if not os.path.isfile(xml_file): | 85 if not os.path.isfile(xml_file): |
89 stop_err("Input BLAST XML file not found: %s" % xml_file) | 86 sys.exit("Input BLAST XML file not found: %s" % xml_file) |
90 | 87 |
91 prepare_xml(xml_file, out_xml_file) | 88 prepare_xml(xml_file, out_xml_file) |