changeset 18:2b35b5c4b7f4 draft

Uploaded v0.2.5, preview 2, fixed bug in RXLR tools
author peterjc
date Fri, 10 May 2013 07:48:26 -0400
parents af3174637834
children 4cd848c5590b
files test-data/empty_rxlr.Bhattacharjee2006.tabular test-data/empty_rxlr.Whisson2007.tabular test-data/empty_rxlr.Win2007.tabular tools/protein_analysis/README tools/protein_analysis/rxlr_motifs.py tools/protein_analysis/rxlr_motifs.xml
diffstat 6 files changed, 68 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_rxlr.Bhattacharjee2006.tabular	Fri May 10 07:48:26 2013 -0400
@@ -0,0 +1,1 @@
+#ID	Bhattacharjee2006
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_rxlr.Whisson2007.tabular	Fri May 10 07:48:26 2013 -0400
@@ -0,0 +1,1 @@
+#ID	Whisson2007
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_rxlr.Win2007.tabular	Fri May 10 07:48:26 2013 -0400
@@ -0,0 +1,1 @@
+#ID	Win2007
--- a/tools/protein_analysis/README	Thu May 02 13:32:58 2013 -0400
+++ b/tools/protein_analysis/README	Fri May 10 07:48:26 2013 -0400
@@ -147,6 +147,9 @@
 v0.2.3 - Added unit tests for WoLF PSORT.
 v0.2.4 - Added unit tests for Promoter 2
 v0.2.5 - Link to Tool Shed added to help text and this documentation.
+       - More unit tests.
+       - Fixed bug with RXLR tool and empty FASTA files.
+
 
 
 Developers
--- a/tools/protein_analysis/rxlr_motifs.py	Thu May 02 13:32:58 2013 -0400
+++ b/tools/protein_analysis/rxlr_motifs.py	Fri May 10 07:48:26 2013 -0400
@@ -111,25 +111,7 @@
         stop_err("Missing HMM file for Whisson et al. (2007)")
     if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"):
         stop_err("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_searcher)
-    #I've left the code to handle HMMER 3 in situ, in case
-    #we revisit the choice to insist on HMMER 2.
-    hmmer3 = (3 == get_hmmer_version(hmmer_search))
-    #Using zero (or 5.6?) for bitscore threshold
-    if hmmer3:
-        #The HMMER3 table output is easy to parse
-        #In HMMER3 can't use both -T and -E
-        cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \
-              % (hmmer_search, hmm_output_file, hmm_file, fasta_file)
-    else:
-        #For HMMER2 we are stuck with parsing stdout
-        #Put 1e6 to effectively have no expectation threshold (otherwise
-        #HMMER defaults to 10 and the calculated e-value depends on the
-        #input FASTA file, and we can loose hits of interest).
-        cmd = "%s -T 0 -E 1e6 %s %s > %s" \
-              % (hmmer_search, hmm_file, fasta_file, hmm_output_file)
-    return_code = os.system(cmd)
-    if return_code:
-        stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd))
+
     hmm_hits = set()
     valid_ids = set()
     for title, seq in fasta_iterator(fasta_file):
@@ -138,29 +120,53 @@
             stop_err("Duplicated identifier %r" % name)
         else:
             valid_ids.add(name)
-    handle = open(hmm_output_file)
-    for line in handle:
-        if not line.strip():
-            #We expect blank lines in the HMMER2 stdout
-            continue
-        elif line.startswith("#"):
-            #Header
-            continue
+    if not valid_ids:
+        #Special case, don't need to run HMMER if there are no sequences
+        pass
+    else:
+        #I've left the code to handle HMMER 3 in situ, in case
+        #we revisit the choice to insist on HMMER 2.
+        hmmer3 = (3 == get_hmmer_version(hmmer_search))
+        #Using zero (or 5.6?) for bitscore threshold
+        if hmmer3:
+            #The HMMER3 table output is easy to parse
+            #In HMMER3 can't use both -T and -E
+            cmd = "%s -T 0 --tblout %s --noali %s %s > /dev/null" \
+                  % (hmmer_search, hmm_output_file, hmm_file, fasta_file)
         else:
-            name = line.split(None,1)[0]
-            #Should be a sequence name in the HMMER3 table output.
-            #Could be anything in the HMMER2 stdout.
-            if name in valid_ids:
-                hmm_hits.add(name)
-            elif hmmer3:
-                stop_err("Unexpected identifer %r in hmmsearch output" % name)
-    handle.close()
-    #if hmmer3:
-    #    print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
-    #else:
-    #    print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))  
-    #print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
-    os.remove(hmm_output_file)
+            #For HMMER2 we are stuck with parsing stdout
+            #Put 1e6 to effectively have no expectation threshold (otherwise
+            #HMMER defaults to 10 and the calculated e-value depends on the
+            #input FASTA file, and we can loose hits of interest).
+            cmd = "%s -T 0 -E 1e6 %s %s > %s" \
+                  % (hmmer_search, hmm_file, fasta_file, hmm_output_file)
+        return_code = os.system(cmd)
+        if return_code:
+            stop_err("Error %i from hmmsearch:\n%s" % (return_code, cmd), return_code)
+
+        handle = open(hmm_output_file)
+        for line in handle:
+            if not line.strip():
+                #We expect blank lines in the HMMER2 stdout
+                continue
+            elif line.startswith("#"):
+                #Header
+                continue
+            else:
+                name = line.split(None,1)[0]
+                #Should be a sequence name in the HMMER3 table output.
+                #Could be anything in the HMMER2 stdout.
+                if name in valid_ids:
+                    hmm_hits.add(name)
+                elif hmmer3:
+                    stop_err("Unexpected identifer %r in hmmsearch output" % name)
+        handle.close()
+        #if hmmer3:
+        #    print "HMMER3 hits for %i/%i" % (len(hmm_hits), len(valid_ids))
+        #else:
+        #    print "HMMER2 hits for %i/%i" % (len(hmm_hits), len(valid_ids))  
+        #print "%i/%i matched HMM" % (len(hmm_hits), len(valid_ids))
+        os.remove(hmm_output_file)
     del valid_ids
 
 
--- a/tools/protein_analysis/rxlr_motifs.xml	Thu May 02 13:32:58 2013 -0400
+++ b/tools/protein_analysis/rxlr_motifs.xml	Fri May 10 07:48:26 2013 -0400
@@ -32,6 +32,21 @@
             <param name="model" value="Win2007" />
             <output name="tabular_file" file="rxlr_win_et_al_2007.tabular" ftype="tabular" />
         </test>
+        <test>
+            <param name="fasta_file" value="empty.fasta" ftype="fasta"/>
+            <param name="model" value="Bhattacharjee2006"/>
+            <output name="tabular_file" file="empty_rxlr.Bhattacharjee2006.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="fasta_file" value="empty.fasta" ftype="fasta"/>
+            <param name="model" value="Win2007"/>
+            <output name="tabular_file" file="empty_rxlr.Win2007.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="fasta_file" value="empty.fasta" ftype="fasta"/>
+            <param name="model" value="Whisson2007"/>
+            <output name="tabular_file" file="empty_rxlr.Whisson2007.tabular" ftype="tabular"/>
+        </test>
     </tests>
     <help>