Mercurial > repos > peterjc > blast2go
comparison tools/ncbi_blast_plus/blast2go.py @ 1:7b53cc52e7ed
Migrated tool version 0.0.2 from old tool shed archive to new tool shed repository
author | peterjc |
---|---|
date | Tue, 07 Jun 2011 15:51:46 -0400 |
parents | 4bfd64cf18ab |
children | 5cb24d07c316 |
comparison
equal
deleted
inserted
replaced
0:4bfd64cf18ab | 1:7b53cc52e7ed |
---|---|
4 This script takes exactly three command line arguments: | 4 This script takes exactly three command line arguments: |
5 * Input BLAST XML filename | 5 * Input BLAST XML filename |
6 * Blast2GO properties filename (settings file) | 6 * Blast2GO properties filename (settings file) |
7 * Output tabular filename | 7 * Output tabular filename |
8 | 8 |
9 Sadly b2g4pipe v2.3.5 cannot cope with current style large BLAST XML | |
10 files (e.g. from BLAST 2.2.25+), so we have to reformat these to | |
11 avoid it crashing with a Java heap space OutOfMemoryError. | |
12 | |
13 As part of this reformatting, we check for BLASTP or BLASTX output | |
14 (otherwise raise an error), and print the query count. | |
15 | |
9 It then calls the Java command line tool, and moves the output file to | 16 It then calls the Java command line tool, and moves the output file to |
10 the location Galaxy is expecting. | 17 the location Galaxy is expecting, and removes the tempory XML file. |
11 """ | 18 """ |
12 import sys | 19 import sys |
13 import os | 20 import os |
14 import subprocess | 21 import subprocess |
15 | 22 |
25 if len(sys.argv) != 4: | 32 if len(sys.argv) != 4: |
26 stop_err("Require three arguments: XML filename, properties filename, output tabular filename") | 33 stop_err("Require three arguments: XML filename, properties filename, output tabular filename") |
27 | 34 |
28 xml_file, prop_file, tabular_file = sys.argv[1:] | 35 xml_file, prop_file, tabular_file = sys.argv[1:] |
29 | 36 |
37 #We should have write access here: | |
38 tmp_xml_file = tabular_file + ".tmp.xml" | |
39 | |
30 if not os.path.isfile(xml_file): | 40 if not os.path.isfile(xml_file): |
31 stop_err("Input BLAST XML file not found: %s" % xml_file) | 41 stop_err("Input BLAST XML file not found: %s" % xml_file) |
32 | 42 |
33 if not os.path.isfile(prop_file): | 43 if not os.path.isfile(prop_file): |
34 stop_err("Blast2GO configuration file not found: %s" % prop_file) | 44 stop_err("Blast2GO configuration file not found: %s" % prop_file) |
45 | |
46 def prepare_xml(original_xml, mangled_xml): | |
47 """Reformat BLAST XML to suit Blast2GO. | |
48 | |
49 Blast2GO can't cope with 1000s of <Iteration> tags within a | |
50 single <BlastResult> tag, so instead split this into one | |
51 full XML record per interation (i.e. per query). This gives | |
52 a concatenated XML file mimicing old versions of BLAST. | |
53 | |
54 This also checks for BLASTP or BLASTX output, and outputs | |
55 the number of queries. Galaxy will show this as "info". | |
56 """ | |
57 in_handle = open(original_xml) | |
58 footer = " </BlastOutput_iterations>\n</BlastOutput>\n" | |
59 header = "" | |
60 while True: | |
61 line = in_handle.readline() | |
62 if not line: | |
63 #No hits? | |
64 stop_err("Problem with XML file?") | |
65 if line.strip() == "<Iteration>": | |
66 break | |
67 header += line | |
68 | |
69 if "<BlastOutput_program>blastx</BlastOutput_program>" in header: | |
70 print "BLASTX output identified" | |
71 elif "<BlastOutput_program>blastp</BlastOutput_program>" in header: | |
72 print "BLASTP output identified" | |
73 else: | |
74 in_handle.close() | |
75 stop_err("Expect BLASTP or BLASTX output") | |
76 | |
77 out_handle = open(mangled_xml, "w") | |
78 out_handle.write(header) | |
79 out_handle.write(line) | |
80 count = 1 | |
81 while True: | |
82 line = in_handle.readline() | |
83 if not line: | |
84 break | |
85 elif line.strip() == "<Iteration>": | |
86 #Insert footer/header | |
87 out_handle.write(footer) | |
88 out_handle.write(header) | |
89 count += 1 | |
90 out_handle.write(line) | |
91 | |
92 out_handle.close() | |
93 in_handle.close() | |
94 print "Input has %i queries" % count | |
95 | |
35 | 96 |
36 def run(cmd): | 97 def run(cmd): |
37 #Avoid using shell=True when we call subprocess to ensure if the Python | 98 #Avoid using shell=True when we call subprocess to ensure if the Python |
38 #script is killed, so too is the child process. | 99 #script is killed, so too is the child process. |
39 try: | 100 try: |
42 stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 103 stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
43 #Use .communicate as can get deadlocks with .wait(), | 104 #Use .communicate as can get deadlocks with .wait(), |
44 stdout, stderr = child.communicate() | 105 stdout, stderr = child.communicate() |
45 return_code = child.returncode | 106 return_code = child.returncode |
46 if return_code: | 107 if return_code: |
108 cmd_str = " ".join(cmd) | |
47 if stderr and stdout: | 109 if stderr and stdout: |
48 stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr)) | 110 stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) |
49 else: | 111 else: |
50 stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr)) | 112 stop_err("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) |
51 #For early diagnostics, | 113 #For early diagnostics, |
52 else: | 114 else: |
53 print stdout | 115 print stdout |
54 print stderr | 116 print stderr |
55 | 117 |
56 if not os.path.isfile(blast2go_jar): | 118 if not os.path.isfile(blast2go_jar): |
57 stop_err("Blast2GO JAR file not found: %s" % blast2go_jar) | 119 stop_err("Blast2GO JAR file not found: %s" % blast2go_jar) |
58 | 120 |
59 #We will have write access whereever the output should be, | 121 prepare_xml(xml_file, tmp_xml_file) |
122 #print "XML file prepared for Blast2GO" | |
123 | |
124 #We will have write access wherever the output should be, | |
60 #so we'll ask Blast2GO to use that as the stem for its output | 125 #so we'll ask Blast2GO to use that as the stem for its output |
61 #(it will append .annot to the filename) | 126 #(it will append .annot to the filename) |
62 cmd = ["java", "-jar", blast2go_jar, | 127 cmd = ["java", "-jar", blast2go_jar, |
63 "-in", xml_file, | 128 "-in", tmp_xml_file, |
64 "-prop", prop_file, | 129 "-prop", prop_file, |
65 "-out", tabular_file, | 130 "-out", tabular_file, #Used as base name for output files |
66 "-a"] | 131 "-a", # Generate *.annot tabular file |
132 #"-img", # Generate images, feature not in v2.3.5 | |
133 ] | |
134 #print " ".join(cmd) | |
67 run(cmd) | 135 run(cmd) |
136 | |
137 #Remove the temp XML file | |
138 os.remove(tmp_xml_file) | |
68 | 139 |
69 out_file = tabular_file + ".annot" | 140 out_file = tabular_file + ".annot" |
70 if not os.path.isfile(out_file): | 141 if not os.path.isfile(out_file): |
71 stop_err("ERROR - No output annotation file from Blast2GO") | 142 stop_err("ERROR - No output annotation file from Blast2GO") |
72 | 143 |