changeset 3:78fe723fde78 draft

Uploaded
author fubar
date Wed, 25 Sep 2013 21:09:29 -0400
parents 0dfe1dc2b274
children 86afd5bfb5a4
files rgweblogo/README rgweblogo/README~ rgweblogo/rgWebLogo3.py rgweblogo/rgWebLogo3.xml rgweblogo/tool_dependencies.xml
diffstat 5 files changed, 316 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/rgweblogo/README	Wed Sep 25 07:23:16 2013 -0400
+++ b/rgweblogo/README	Wed Sep 25 21:09:29 2013 -0400
@@ -4,7 +4,11 @@
 
 Note that the image for the help must be in static/images for it to show up on the tool form - it's the same image as goes in test-data
 
-**Installation**
+**Automated Installation**
+As a Galaxy admin, use the admin menu and select the search ToolShed option. This tool should be on the main toolshed - if not try the test toolshed.
+Select it and choose "preview and install" - the process of downloading and installing weblogo3.3 and this wrapper should take a few minutes at most.
+
+** Manual Installation**
 
 Make sure weblogo3 is installed in your system python and is available on the path for all your nodes
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgweblogo/README~	Wed Sep 25 21:09:29 2013 -0400
@@ -0,0 +1,30 @@
+This is a Galaxy tool wrapper for weblogo3 already available as a web app at the site below but neat as a Galaxy tool
+
+It generates sequence logos from fasta files such as the alignments generated by clustalw
+
+Note that the image for the help must be in static/images for it to show up on the tool form - it's the same image as goes in test-data
+
+**Installation**
+
+Make sure weblogo3 is installed in your system python and is available on the path for all your nodes
+
+Move the test data files to your galaxy root test-data
+Move the xml file to a subdirectory of your tools folder (eg rgenetics/) and then add a line in your tool_conf.xml to point there.
+Run
+sh run_functional_tests.sh -id weblogo3
+to make sure the tests work
+
+then restart Galaxy and you should be good to go.
+
+
+**Attribution**
+
+Source for the weblogo3 python executable is at http://weblogo.berkeley.edu
+
+Written by Ross Lazarus for the Rgenetics project
+
+Copyright Ross Lazarus at gmail com 2011
+
+All rights reserved.
+
+Released under the LGPL - see http://www.gnu.org/copyleft/lesser.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgweblogo/rgWebLogo3.py	Wed Sep 25 21:09:29 2013 -0400
@@ -0,0 +1,157 @@
+"""
+# modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion
+# rgWebLogo3.py
+# wrapper to check that all fasta files are same length
+
+"""
+import optparse, os, sys, subprocess, tempfile
+
+WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it?
+
+class WL3:
+    """
+    simple wrapper class to check fasta sequence lengths are all identical
+    """
+    FASTASTARTSYM = '>'
+    badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully'
+
+    def __init__(self,opts=None):
+        assert opts<>None,'WL3 class needs opts passed in - got None'
+        self.opts = opts
+        self.fastaf = file(self.opts.input,'r')
+        self.clparams = {}
+
+    def whereis(self,program):
+        for path in os.environ.get('PATH', '').split(':'):
+            if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)):
+                return os.path.join(path, program)
+        return None
+
+    def runCL(self):
+        """ construct and run a command line
+        """
+        wl = self.whereis(WEBLOGO)
+        if not wl:
+             print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO
+             print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo'
+             sys.exit(1)
+        cll = [WEBLOGO,]
+        cll += [' '.join(it) for it in list(self.clparams.items())]
+        cl = ' '.join(cll)
+        assert cl > '', 'runCL needs a command line as clparms'
+        fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt')
+        tlf = open(templog,'w')
+        process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf)
+        rval = process.wait()
+        tlf.close()
+        tlogs = ''.join(open(templog,'r').readlines())
+        if len(tlogs) > 1:
+            s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs)
+        else:
+            s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval)
+        os.unlink(templog) # always
+        if rval <> 0:
+             print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval)
+             print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO
+             print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO
+             sys.exit(1)
+        return s
+
+        
+    def iter_fasta(self):
+        """
+        generator for fasta sequences from a file
+        """
+        aseq = []
+        seqname = None
+        for i,row in enumerate(self.fastaf):
+            if row.startswith(self.FASTASTARTSYM):
+                if seqname <> None: # already in a sequence
+                    s = ''.join(aseq)
+                    l = len(s)
+                    yield (seqname,l)
+                    seqname = row[1:].strip()
+                    aseq = []
+                else:
+                    if i > 0:
+                        print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
+                        sys.exit(1)
+                    else:
+                        seqname = row[1:].strip() 
+            else: # sequence row
+                if seqname == None:
+                    print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
+                    sys.exit(1) 
+                else:
+                    aseq.append(row.strip())
+                
+        if seqname <> None: # last one
+            l = len(''.join(aseq))
+            yield (seqname,l)
+                
+        
+    def fcheck(self):
+        """ are all fasta sequence same length?
+        might be mongo big
+        """
+        flen = None
+        lasti = None
+        f = self.iter_fasta()
+        for i,(seqname,seqlen) in enumerate(f):
+            lasti = i
+            if i == 0:
+                flen = seqlen
+            else:
+                if seqlen <> flen:
+                    print >> sys.stderr,self.badseq % self.opts.input
+                    sys.exit(1)
+        return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input,lasti,flen)
+
+
+    def run(self):
+        check = self.fcheck()
+        self.clparams['-f'] = self.opts.input
+        self.clparams['-o'] = self.opts.output
+        self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string       
+        self.clparams['-F'] = self.opts.outformat       
+        if self.opts.size <> None:
+            self.clparams['-s'] = self.opts.size
+        if self.opts.lower <> None:
+            self.clparams['-l'] = self.opts.lower
+        if self.opts.upper <> None:
+            self.clparams['-u'] = self.opts.upper        
+        if self.opts.colours <> None:
+            self.clparams['-c'] = self.opts.colours
+        if self.opts.units <> None:
+            self.clparams['-U'] = self.opts.units
+        s = self.runCL()
+        return check,s
+
+
+if __name__ == '__main__':
+    '''
+    called as
+<command interpreter="python"> 
+    rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours"
+#if $range.mode == 'part'
+-l "$range.seqstart" -u "$range.seqend"
+#end if
+    </command>
+
+    '''
+    op = optparse.OptionParser()
+    op.add_option('-i', '--input', default=None)
+    op.add_option('-F', '--outformat', default='png')
+    op.add_option('-s', '--size', default=None) 
+    op.add_option('-o', '--output', default='rgWebLogo3')
+    op.add_option('-t', '--logoname', default='rgWebLogo3')
+    op.add_option('-c', '--colours', default=None)
+    op.add_option('-l', '--lower', default=None)
+    op.add_option('-u', '--upper', default=None)  
+    op.add_option('-U', '--units', default=None)  
+    opts, args = op.parse_args()
+    assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open'
+    assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input
+    w = WL3(opts)
+    checks,s = w.run()
+    print >> sys.stdout, checks # for info
--- a/rgweblogo/rgWebLogo3.xml	Wed Sep 25 07:23:16 2013 -0400
+++ b/rgweblogo/rgWebLogo3.xml	Wed Sep 25 21:09:29 2013 -0400
@@ -1,13 +1,22 @@
-<tool id="weblogo3" name="Sequence Logo" version="0.1">
-   <description>generator for fasta (eg Clustal alignments)</description>
-   <command> 
-    weblogo -F $outformat -s $size -f $input -o $output -t "$logoname"
-   </command>
+<tool id="rgweblogo3" name="Sequence Logo" version="0.5">
+  <stdio>
+   <regex match=".*" source="both" level="info" description="stdout/err chatter from rgWebLogo3.py"/>
+  </stdio>
+  <requirements>
+      <requirement type="package" version="3.3">package_weblogo</requirement>
+   </requirements>
+   <description>Generator from fasta</description>
+   <command interpreter="python"> 
+    rgWebLogo3.py -F $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours" -U "$units"
+#if $range.mode == 'part'
+-l "$range.seqstart" -u "$range.seqend"
+#end if
+    </command>
   <inputs>
    <page>
     <param format="fasta" name="input" type="data" label="Fasta File" />
-    <param name="logoname" label="Name for output logo - will appear on graphics" type="text" size="50" value="Galaxy/Rgenetics weblogo" />
-    <param name="outformat" type="select" label="Output weblogo format" >
+    <param name="logoname" label="Title for output Sequence Logo" type="text" size="50" value="Galaxy-Rgenetics Sequence Logo" />
+    <param name="outformat" type="select" label="Output format for image (or text report)" >
       <option value="png" selected="True">PNG screen quality</option>
       <option value="png_print">High quality printable PNG</option>
       <option value="pdf">PDF</option>
@@ -15,6 +24,42 @@
       <option value="eps">EPS</option>
       <option value="txt">Text (shows the detailed calculations for each position - no image)</option>
     </param>
+    <param name="units" type="select" label="Display Units"
+      help="What the height of each logo element depicts - eg bits of entropy (default)">
+      <option value="bits" selected="True">Entropy (bits)</option>
+      <option value="probability">Probability</option>
+      <option value="nats">Nats</option>
+      <option value="kT">kT</option>
+      <option value="kJ/mol">kJ/mol</option>
+      <option value="kcal/mol">kcal/mol</option>
+    </param>
+    <param name="colours" type="select" label="Colour scheme for output Sequence Logo" 
+      help="Note that some of these only make sense for protein sequences!">
+      <option value="auto" selected="True">Default automatic colour selection</option>
+      <option value="base pairing">Base pairing</option>
+      <option value="charge">Charge colours</option>
+      <option value="chemistry">Chemistry colours</option>
+      <option value="classic">Classical colours</option>
+      <option value="hydrophobicity">Hydrophobicity</option>
+      <option value="monochrome">monochrome</option>
+    </param>
+
+    
+    <conditional name="range">
+        <param name="mode" type="select" label="Include entire sequence (default) or specify a subsequence range to use">
+          <option value="complete" selected="true">complete sequence</option>
+          <option value="part">Only use a part of the sequence</option>
+        </param>
+        <when value="complete">
+        </when>
+        <when value="part">    
+           <param name="seqstart" size="5" type="integer" value="1" help="WARNING: Specifying indexes outside the sequence lengths will cause unpredictable but bad consequences!" 
+             label="Index (eg 1=first letter) of the start of the sequence range to include in the logo">
+           </param>
+           <param name="seqend" size="5" type="integer" value="99999" label="Index (eg 75=75th letter) of the end of the sequence range to include in the logo" >
+           </param> 
+        </when>
+    </conditional>
     <param name="size" type="select" label="Output weblogo size" >
       <option value="large" selected="True">Large</option>
       <option value="medium">Medium</option>
@@ -23,7 +68,7 @@
    </page>
   </inputs>
   <outputs>
-    <data format="pdf" name="output"  label="${logoname}_output.${outformat}">
+    <data format="pdf" name="output"  label="${logoname}.${outformat}">
        <change_format>
            <when input="outformat" value="png_print" format="png" />
            <when input="outformat" value="png" format="png" />
@@ -34,16 +79,26 @@
     </data>
   </outputs>
   <tests>
-    <test>
-  
+    <test>  
       <param name="input" value="rgClustal_testout.fasta" />
       <param name = "logoname" value="Galaxy/Rgenetics weblogo" />
       <param name = "outformat" value="jpeg" />
-      <param name = "size" value="medium" />
-    
-      <output name="output" file="rgWebLogo3_test.jpg" ftype="jpg" />
+      <param name = "mode" value="complete" />
+      <param name = "size" value="medium" />      
+      <param name = "colours" value="auto" />
+      <param name = "units" value="bits" /> 
+      <output name="output" file="rgWebLogo3_test.jpg" ftype="jpg" compare="sim_size" delta="10000" />
     </test>
-
+    <test>  
+      <param name="input" value="rgClustal_testout.fasta" />
+      <param name = "logoname" value="Galaxy/Rgenetics weblogo" />
+      <param name = "outformat" value="png" />
+      <param name = "mode" value="complete" />
+      <param name = "size" value="medium" />      
+      <param name = "colours" value="auto" />
+      <param name = "units" value="probability" /> 
+      <output name="output" file="rgWebLogo3_test2.png" ftype="png" compare="sim_size" delta="10000" />
+    </test>
   </tests>
   <help>
 
@@ -51,35 +106,33 @@
 
 This tool uses Weblogo3_ in Galaxy to generate a sequence logo. The input file must be a fasta file in your current history.
 
+It is recommended for (eg) viewing multiple sequence alignments output from the clustalw tool - set the output to fasta and feed
+it in to this tool.
+
 A typical output looks like this
 
-.. image:: ./static/images/rgWebLogo3_test.jpg
+.. image:: ${static_path}/images/rgWebLogo3_test.jpg
 
 ----
 
-**Why use WebLogo in Galaxy?**
-
-Weblogo3_ is a good example of an easy to use tool and there are plenty of other web accessible weblogo generator sites available. 
+**Warning about input Fasta format files**
 
-However, none of those offer the combination of:
-
-1) persistence of analyses and data in multiple shareable histories, pages and libraries 
+The Weblogo3 program used by this tool will fail if your fasta sequences are not all EXACTLY the same length. The tool will provide a warning
+and refuse to call the weblogo3 executable if irregular length sequences are detected.
 
-2) convenient access to shared data libraries, workflows and user controlled pages,  and to 3rd party data sources like UCSC tables. 
-
-3) analyses integrated with many other applicable generic and specialized tools already available for downstream processing.
+Fasta alignments from the companion ClustalW Galaxy tool will work but many other fasta files may cause this tool to fail - please do not file 
+a Galaxy bug report - this is a feature of the tool and a problem with your source data - not a tool error - please make certain all your fasta 
+sequences are exactly the same length!
 
-that you get for free when you use Galaxy. No muss; no fuss.
-
-----
 
 **Attribution**
 
-Weblogo attribution and associated documentation are available at Weblogo3_
+Weblogo attribution and associated documentation are available at Weblogo3_ 
 
-This wrapper was written by Ross Lazarus for the rgenetics project and the source code is licensed under the LGPL_ like other rgenetics artefacts
+This Galaxy wrapper calls their software so depends on it and their license for your legal comfort.
+The wrapper was written by Ross Lazarus for the rgenetics project and the source code is licensed under the LGPL_ like other rgenetics artefacts
 
-.. _Weblogo3: http://weblogo.berkeley.edu/
+.. _Weblogo3: http://weblogo.threeplusone.com/manual.html 
 
 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgweblogo/tool_dependencies.xml	Wed Sep 25 21:09:29 2013 -0400
@@ -0,0 +1,41 @@
+
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="numpy" version="1.7">
+        <repository name="package_numpy_1_7" owner="blankenberg" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="ghostscript" version="9.07">
+        <repository name="package_ghostscript_9_07" owner="fubar" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="package_weblogo" version="3.3">
+        <install version="1.0">
+            <actions>
+                <!-- populate the environment variables from the dependend repos -->
+                <action type="set_environment_for_install">
+                    <repository name="package_ghostscript_9_07" owner="fubar" toolshed="http://testtoolshed.g2.bx.psu.edu/">
+                        <package name="ghostscript" version="9.07" />
+                    </repository>
+                    <repository name="package_numpy_1_7" owner="blankenberg" toolshed="http://testtoolshed.g2.bx.psu.edu/">
+                        <package name="numpy" version="1.7" />
+                    </repository>
+                </action>
+                <!-- install weblogo -->
+                <action type="make_directory">$INSTALL_DIR/lib/python</action>
+                <action type="download_by_url">http://weblogo.googlecode.com/files/weblogo-3.3.tar.gz</action>
+                <action type="shell_command">export PYTHONPATH=$INSTALL_DIR/lib/python:$PYTHONPATH_NUMPY:$PYTHONPATH &amp;&amp; 
+                    python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin</action>
+                <action type="set_environment">
+                    <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable>
+                    <environment_variable action="append_to" name="PYTHONPATH">$ENV[PYTHONPATH_NUMPY]</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$ENV[PATH_NUMPY]</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                    <environment_variable action="set_to" name="WEBLOGO_PATH">$INSTALL_DIR/bin/weblogo</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            weblogo3 is a python version of the old weblogo2.8 or so. Requires numpy and ghostscript so these are installed if not already on your system - if that happens, please be patient
+            while numpy compiles - especially if the ATLAS libraries are being installed - which is not at present.
+        </readme>
+     </package>
+</tool_dependency>