comparison tools/blast2go/massage_xml_for_blast2go.py @ 25:242cf17c3bf9 draft default tip

"planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/tools/blast2go commit 0c82b9ef284c686cbffd30582d2586e4fb52881e"
author peterjc
date Wed, 09 Sep 2020 15:01:39 +0000
parents 05eef6b222af
children
comparison
equal deleted inserted replaced
24:05eef6b222af 25:242cf17c3bf9
12 As part of this reformatting, we check for BLASTP or BLASTX output 12 As part of this reformatting, we check for BLASTP or BLASTX output
13 (otherwise raise an error), and print the query count. 13 (otherwise raise an error), and print the query count.
14 14
15 This script is called from my Galaxy wrapper for Blast2GO for pipelines, 15 This script is called from my Galaxy wrapper for Blast2GO for pipelines,
16 available from the Galaxy Tool Shed here: 16 available from the Galaxy Tool Shed here:
17 http://toolshed.g2.bx.psu.edu/view/peterjc/blast2go 17 http://toolshed.g2.bx.psu.edu/view/peterjc/blast2go
18 18
19 This script is under version control here: 19 This script is under version control here:
20 https://github.com/peterjc/galaxy_blast/tree/master/blast2go 20 https://github.com/peterjc/galaxy_blast/tree/master/blast2go
21 """ 21 """
22
23 import os
22 import sys 24 import sys
23 import os
24 25
25 def stop_err(msg, error_level=1):
26 """Print error message to stdout and quit with given error level."""
27 sys.stderr.write("%s\n" % msg)
28 sys.exit(error_level)
29 26
30 def prepare_xml(original_xml, mangled_xml): 27 def prepare_xml(original_xml, mangled_xml):
31 """Reformat BLAST XML to suit Blast2GO. 28 """Reformat BLAST XML to suit Blast2GO.
32 29
33 Blast2GO can't cope with 1000s of <Iteration> tags within a 30 Blast2GO can't cope with 1000s of <Iteration> tags within a
42 footer = " </BlastOutput_iterations>\n</BlastOutput>\n" 39 footer = " </BlastOutput_iterations>\n</BlastOutput>\n"
43 header = "" 40 header = ""
44 while True: 41 while True:
45 line = in_handle.readline() 42 line = in_handle.readline()
46 if not line: 43 if not line:
47 #No hits? 44 # No hits?
48 stop_err("Problem with XML file?") 45 sys.exit("Problem with XML file?")
49 if line.strip() == "<Iteration>": 46 if line.strip() == "<Iteration>":
50 break 47 break
51 header += line 48 header += line
52 49
53 if "<BlastOutput_program>blastx</BlastOutput_program>" in header: 50 if "<BlastOutput_program>blastx</BlastOutput_program>" in header:
54 print("BLASTX output identified") 51 print("BLASTX output identified")
55 elif "<BlastOutput_program>blastp</BlastOutput_program>" in header: 52 elif "<BlastOutput_program>blastp</BlastOutput_program>" in header:
56 print("BLASTP output identified") 53 print("BLASTP output identified")
57 else: 54 else:
58 in_handle.close() 55 in_handle.close()
59 stop_err("Expect BLASTP or BLASTX output") 56 sys.exit("Expect BLASTP or BLASTX output")
60 57
61 out_handle = open(mangled_xml, "w") 58 out_handle = open(mangled_xml, "w")
62 out_handle.write(header) 59 out_handle.write(header)
63 out_handle.write(line) 60 out_handle.write(line)
64 count = 1 61 count = 1
65 while True: 62 while True:
66 line = in_handle.readline() 63 line = in_handle.readline()
67 if not line: 64 if not line:
68 break 65 break
69 elif line.strip() == "<Iteration>": 66 elif line.strip() == "<Iteration>":
70 #Insert footer/header 67 # Insert footer/header
71 out_handle.write(footer) 68 out_handle.write(footer)
72 out_handle.write(header) 69 out_handle.write(header)
73 count += 1 70 count += 1
74 out_handle.write(line) 71 out_handle.write(line)
75 72
76 out_handle.close() 73 out_handle.close()
77 in_handle.close() 74 in_handle.close()
78 print("Input has %i queries" % count) 75 print("Input has %i queries" % count)
79 76
80 77
81 if __name__ == "__main__": 78 if __name__ == "__main__":
82 # Run the conversion... 79 # Run the conversion...
83 if len(sys.argv) != 3: 80 if len(sys.argv) != 3:
84 stop_err("Require two arguments: XML input filename, XML output filename") 81 sys.exit("Require two arguments: XML input filename, XML output filename")
85 82
86 xml_file, out_xml_file = sys.argv[1:] 83 xml_file, out_xml_file = sys.argv[1:]
87 84
88 if not os.path.isfile(xml_file): 85 if not os.path.isfile(xml_file):
89 stop_err("Input BLAST XML file not found: %s" % xml_file) 86 sys.exit("Input BLAST XML file not found: %s" % xml_file)
90 87
91 prepare_xml(xml_file, out_xml_file) 88 prepare_xml(xml_file, out_xml_file)