# HG changeset patch
# User peterjc
# Date 1368186506 14400
# Node ID 2b35b5c4b7f49da0b5628ee5fe01b6da624ae19c
# Parent af3174637834b211db599e703bd96191b790bf5c
Uploaded v0.2.5, preview 2, fixed bug in RXLR tools
diff -r af3174637834 -r 2b35b5c4b7f4 test-data/empty_rxlr.Bhattacharjee2006.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_rxlr.Bhattacharjee2006.tabular Fri May 10 07:48:26 2013 -0400
@@ -0,0 +1,1 @@
+#ID Bhattacharjee2006
diff -r af3174637834 -r 2b35b5c4b7f4 test-data/empty_rxlr.Whisson2007.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_rxlr.Whisson2007.tabular Fri May 10 07:48:26 2013 -0400
@@ -0,0 +1,1 @@
+#ID Whisson2007
diff -r af3174637834 -r 2b35b5c4b7f4 test-data/empty_rxlr.Win2007.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_rxlr.Win2007.tabular Fri May 10 07:48:26 2013 -0400
@@ -0,0 +1,1 @@
+#ID Win2007
diff -r af3174637834 -r 2b35b5c4b7f4 tools/protein_analysis/README
--- a/tools/protein_analysis/README Thu May 02 13:32:58 2013 -0400
+++ b/tools/protein_analysis/README Fri May 10 07:48:26 2013 -0400
@@ -147,6 +147,9 @@
v0.2.3 - Added unit tests for WoLF PSORT.
v0.2.4 - Added unit tests for Promoter 2
v0.2.5 - Link to Tool Shed added to help text and this documentation.
+ - More unit tests.
+ - Fixed bug with RXLR tool and empty FASTA files.
+
Developers
diff -r af3174637834 -r 2b35b5c4b7f4 tools/protein_analysis/rxlr_motifs.py
--- a/tools/protein_analysis/rxlr_motifs.py Thu May 02 13:32:58 2013 -0400
+++ b/tools/protein_analysis/rxlr_motifs.py Fri May 10 07:48:26 2013 -0400
@@ -111,25 +111,7 @@
stop_err("Missing HMM file for Whisson et al. (2007)")
if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"):
stop_err("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_searcher)
- #I've left the code to handle HMMER 3 in situ, in case
- #we revisit the choice to insist on HMMER 2.
- hmmer3 = (3 == get_hmmer_version(hmmer_search))
- #Using zero (or 5.6?) for bitscore threshold
- if hmmer3:
- #The HMMER3 table output is easy to parse
- #In HMMER3 can't use both -T and -E
- cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \
- % (hmmer_search, hmm_output_file, hmm_file, fasta_file)
- else:
- #For HMMER2 we are stuck with parsing stdout
- #Put 1e6 to effectively have no expectation threshold (otherwise
- #HMMER defaults to 10 and the calculated e-value depends on the
- #input FASTA file, and we can loose hits of interest).
- cmd = "%s -T 0 -E 1e6 %s %s > %s" \
- % (hmmer_search, hmm_file, fasta_file, hmm_output_file)
- return_code = os.system(cmd)
- if return_code:
- stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd))
+
hmm_hits = set()
valid_ids = set()
for title, seq in fasta_iterator(fasta_file):
@@ -138,29 +120,53 @@
stop_err("Duplicated identifier %r" % name)
else:
valid_ids.add(name)
- handle = open(hmm_output_file)
- for line in handle:
- if not line.strip():
- #We expect blank lines in the HMMER2 stdout
- continue
- elif line.startswith("#"):
- #Header
- continue
+ if not valid_ids:
+ #Special case, don't need to run HMMER if there are no sequences
+ pass
+ else:
+ #I've left the code to handle HMMER 3 in situ, in case
+ #we revisit the choice to insist on HMMER 2.
+ hmmer3 = (3 == get_hmmer_version(hmmer_search))
+ #Using zero (or 5.6?) for bitscore threshold
+ if hmmer3:
+ #The HMMER3 table output is easy to parse
+ #In HMMER3 can't use both -T and -E
+ cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \
+ % (hmmer_search, hmm_output_file, hmm_file, fasta_file)
else:
- name = line.split(None,1)[0]
- #Should be a sequence name in the HMMER3 table output.
- #Could be anything in the HMMER2 stdout.
- if name in valid_ids:
- hmm_hits.add(name)
- elif hmmer3:
- stop_err("Unexpected identifer %r in hmmsearch output" % name)
- handle.close()
- #if hmmer3:
- # print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
- #else:
- # print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
- #print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
- os.remove(hmm_output_file)
+ #For HMMER2 we are stuck with parsing stdout
+ #Put 1e6 to effectively have no expectation threshold (otherwise
+ #HMMER defaults to 10 and the calculated e-value depends on the
+ #input FASTA file, and we can loose hits of interest).
+ cmd = "%s -T 0 -E 1e6 %s %s > %s" \
+ % (hmmer_search, hmm_file, fasta_file, hmm_output_file)
+ return_code = os.system(cmd)
+ if return_code:
+ stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code)
+
+ handle = open(hmm_output_file)
+ for line in handle:
+ if not line.strip():
+ #We expect blank lines in the HMMER2 stdout
+ continue
+ elif line.startswith("#"):
+ #Header
+ continue
+ else:
+ name = line.split(None,1)[0]
+ #Should be a sequence name in the HMMER3 table output.
+ #Could be anything in the HMMER2 stdout.
+ if name in valid_ids:
+ hmm_hits.add(name)
+ elif hmmer3:
+ stop_err("Unexpected identifer %r in hmmsearch output" % name)
+ handle.close()
+ #if hmmer3:
+ # print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
+ #else:
+ # print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
+ #print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
+ os.remove(hmm_output_file)
del valid_ids
diff -r af3174637834 -r 2b35b5c4b7f4 tools/protein_analysis/rxlr_motifs.xml
--- a/tools/protein_analysis/rxlr_motifs.xml Thu May 02 13:32:58 2013 -0400
+++ b/tools/protein_analysis/rxlr_motifs.xml Fri May 10 07:48:26 2013 -0400
@@ -32,6 +32,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+