Mercurial > repos > fubar > weblogo3
changeset 3:78fe723fde78 draft
Uploaded
author | fubar |
---|---|
date | Wed, 25 Sep 2013 21:09:29 -0400 (2013-09-26) |
parents | 0dfe1dc2b274 |
children | 86afd5bfb5a4 |
files | rgweblogo/README rgweblogo/README~ rgweblogo/rgWebLogo3.py rgweblogo/rgWebLogo3.xml rgweblogo/tool_dependencies.xml |
diffstat | 5 files changed, 316 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/rgweblogo/README Wed Sep 25 07:23:16 2013 -0400 +++ b/rgweblogo/README Wed Sep 25 21:09:29 2013 -0400 @@ -4,7 +4,11 @@ Note that the image for the help must be in static/images for it to show up on the tool form - it's the same image as goes in test-data -**Installation** +**Automated Installation** +As a Galaxy admin, use the admin menu and select the search ToolShed option. This tool should be on the main toolshed - if not try the test toolshed. +Select it and choose "preview and install" - the process of downloading and installing weblogo3.3 and this wrapper should take a few minutes at most. + +** Manual Installation** Make sure weblogo3 is installed in your system python and is available on the path for all your nodes
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgweblogo/README~ Wed Sep 25 21:09:29 2013 -0400 @@ -0,0 +1,30 @@ +This is a Galaxy tool wrapper for weblogo3 already available as a web app at the site below but neat as a Galaxy tool + +It generates sequence logos from fasta files such as the alignments generated by clustalw + +Note that the image for the help must be in static/images for it to show up on the tool form - it's the same image as goes in test-data + +**Installation** + +Make sure weblogo3 is installed in your system python and is available on the path for all your nodes + +Move the test data files to your galaxy root test-data +Move the xml file to a subdirectory of your tools folder (eg rgenetics/) and then add a line in your tool_conf.xml to point there. +Run +sh run_functional_tests.sh -id weblogo3 +to make sure the tests work + +then restart Galaxy and you should be good to go. + + +**Attribution** + +Source for the weblogo3 python executable is at http://weblogo.berkeley.edu + +Written by Ross Lazarus for the Rgenetics project + +Copyright Ross Lazarus at gmail com 2011 + +All rights reserved. + +Released under the LGPL - see http://www.gnu.org/copyleft/lesser.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgweblogo/rgWebLogo3.py Wed Sep 25 21:09:29 2013 -0400 @@ -0,0 +1,157 @@ +""" +# modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion +# rgWebLogo3.py +# wrapper to check that all fasta files are same length + +""" +import optparse, os, sys, subprocess, tempfile + +WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it? + +class WL3: + """ + simple wrapper class to check fasta sequence lengths are all identical + """ + FASTASTARTSYM = '>' + badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully' + + def __init__(self,opts=None): + assert opts<>None,'WL3 class needs opts passed in - got None' + self.opts = opts + self.fastaf = file(self.opts.input,'r') + self.clparams = {} + + def whereis(self,program): + for path in os.environ.get('PATH', '').split(':'): + if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)): + return os.path.join(path, program) + return None + + def runCL(self): + """ construct and run a command line + """ + wl = self.whereis(WEBLOGO) + if not wl: + print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO + print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo' + sys.exit(1) + cll = [WEBLOGO,] + cll += [' '.join(it) for it in list(self.clparams.items())] + cl = ' '.join(cll) + assert cl > '', 'runCL needs a command line as clparms' + fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt') + tlf = open(templog,'w') + process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf) + rval = process.wait() + tlf.close() + tlogs = ''.join(open(templog,'r').readlines()) + if len(tlogs) > 1: + s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs) + else: + s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval) + os.unlink(templog) # always + if rval <> 0: + print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval) + print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO + print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO + sys.exit(1) + return s + + + def iter_fasta(self): + """ + generator for fasta sequences from a file + """ + aseq = [] + seqname = None + for i,row in enumerate(self.fastaf): + if row.startswith(self.FASTASTARTSYM): + if seqname <> None: # already in a sequence + s = ''.join(aseq) + l = len(s) + yield (seqname,l) + seqname = row[1:].strip() + aseq = [] + else: + if i > 0: + print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) + sys.exit(1) + else: + seqname = row[1:].strip() + else: # sequence row + if seqname == None: + print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) + sys.exit(1) + else: + aseq.append(row.strip()) + + if seqname <> None: # last one + l = len(''.join(aseq)) + yield (seqname,l) + + + def fcheck(self): + """ are all fasta sequence same length? + might be mongo big + """ + flen = None + lasti = None + f = self.iter_fasta() + for i,(seqname,seqlen) in enumerate(f): + lasti = i + if i == 0: + flen = seqlen + else: + if seqlen <> flen: + print >> sys.stderr,self.badseq % self.opts.input + sys.exit(1) + return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input,lasti,flen) + + + def run(self): + check = self.fcheck() + self.clparams['-f'] = self.opts.input + self.clparams['-o'] = self.opts.output + self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string + self.clparams['-F'] = self.opts.outformat + if self.opts.size <> None: + self.clparams['-s'] = self.opts.size + if self.opts.lower <> None: + self.clparams['-l'] = self.opts.lower + if self.opts.upper <> None: + self.clparams['-u'] = self.opts.upper + if self.opts.colours <> None: + self.clparams['-c'] = self.opts.colours + if self.opts.units <> None: + self.clparams['-U'] = self.opts.units + s = self.runCL() + return check,s + + +if __name__ == '__main__': + ''' + called as +<command interpreter="python"> + rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours" +#if $range.mode == 'part' +-l "$range.seqstart" -u "$range.seqend" +#end if + </command> + + ''' + op = optparse.OptionParser() + op.add_option('-i', '--input', default=None) + op.add_option('-F', '--outformat', default='png') + op.add_option('-s', '--size', default=None) + op.add_option('-o', '--output', default='rgWebLogo3') + op.add_option('-t', '--logoname', default='rgWebLogo3') + op.add_option('-c', '--colours', default=None) + op.add_option('-l', '--lower', default=None) + op.add_option('-u', '--upper', default=None) + op.add_option('-U', '--units', default=None) + opts, args = op.parse_args() + assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open' + assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input + w = WL3(opts) + checks,s = w.run() + print >> sys.stdout, checks # for info
--- a/rgweblogo/rgWebLogo3.xml Wed Sep 25 07:23:16 2013 -0400 +++ b/rgweblogo/rgWebLogo3.xml Wed Sep 25 21:09:29 2013 -0400 @@ -1,13 +1,22 @@ -<tool id="weblogo3" name="Sequence Logo" version="0.1"> - <description>generator for fasta (eg Clustal alignments)</description> - <command> - weblogo -F $outformat -s $size -f $input -o $output -t "$logoname" - </command> +<tool id="rgweblogo3" name="Sequence Logo" version="0.5"> + <stdio> + <regex match=".*" source="both" level="info" description="stdout/err chatter from rgWebLogo3.py"/> + </stdio> + <requirements> + <requirement type="package" version="3.3">package_weblogo</requirement> + </requirements> + <description>Generator from fasta</description> + <command interpreter="python"> + rgWebLogo3.py -F $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours" -U "$units" +#if $range.mode == 'part' +-l "$range.seqstart" -u "$range.seqend" +#end if + </command> <inputs> <page> <param format="fasta" name="input" type="data" label="Fasta File" /> - <param name="logoname" label="Name for output logo - will appear on graphics" type="text" size="50" value="Galaxy/Rgenetics weblogo" /> - <param name="outformat" type="select" label="Output weblogo format" > + <param name="logoname" label="Title for output Sequence Logo" type="text" size="50" value="Galaxy-Rgenetics Sequence Logo" /> + <param name="outformat" type="select" label="Output format for image (or text report)" > <option value="png" selected="True">PNG screen quality</option> <option value="png_print">High quality printable PNG</option> <option value="pdf">PDF</option> @@ -15,6 +24,42 @@ <option value="eps">EPS</option> <option value="txt">Text (shows the detailed calculations for each position - no image)</option> </param> + <param name="units" type="select" label="Display Units" + help="What the height of each logo element depicts - eg bits of entropy (default)"> + <option value="bits" selected="True">Entropy (bits)</option> + <option value="probability">Probability</option> + <option value="nats">Nats</option> + <option value="kT">kT</option> + <option value="kJ/mol">kJ/mol</option> + <option value="kcal/mol">kcal/mol</option> + </param> + <param name="colours" type="select" label="Colour scheme for output Sequence Logo" + help="Note that some of these only make sense for protein sequences!"> + <option value="auto" selected="True">Default automatic colour selection</option> + <option value="base pairing">Base pairing</option> + <option value="charge">Charge colours</option> + <option value="chemistry">Chemistry colours</option> + <option value="classic">Classical colours</option> + <option value="hydrophobicity">Hydrophobicity</option> + <option value="monochrome">monochrome</option> + </param> + + + <conditional name="range"> + <param name="mode" type="select" label="Include entire sequence (default) or specify a subsequence range to use"> + <option value="complete" selected="true">complete sequence</option> + <option value="part">Only use a part of the sequence</option> + </param> + <when value="complete"> + </when> + <when value="part"> + <param name="seqstart" size="5" type="integer" value="1" help="WARNING: Specifying indexes outside the sequence lengths will cause unpredictable but bad consequences!" + label="Index (eg 1=first letter) of the start of the sequence range to include in the logo"> + </param> + <param name="seqend" size="5" type="integer" value="99999" label="Index (eg 75=75th letter) of the end of the sequence range to include in the logo" > + </param> + </when> + </conditional> <param name="size" type="select" label="Output weblogo size" > <option value="large" selected="True">Large</option> <option value="medium">Medium</option> @@ -23,7 +68,7 @@ </page> </inputs> <outputs> - <data format="pdf" name="output" label="${logoname}_output.${outformat}"> + <data format="pdf" name="output" label="${logoname}.${outformat}"> <change_format> <when input="outformat" value="png_print" format="png" /> <when input="outformat" value="png" format="png" /> @@ -34,16 +79,26 @@ </data> </outputs> <tests> - <test> - + <test> <param name="input" value="rgClustal_testout.fasta" /> <param name = "logoname" value="Galaxy/Rgenetics weblogo" /> <param name = "outformat" value="jpeg" /> - <param name = "size" value="medium" /> - - <output name="output" file="rgWebLogo3_test.jpg" ftype="jpg" /> + <param name = "mode" value="complete" /> + <param name = "size" value="medium" /> + <param name = "colours" value="auto" /> + <param name = "units" value="bits" /> + <output name="output" file="rgWebLogo3_test.jpg" ftype="jpg" compare="sim_size" delta="10000" /> </test> - + <test> + <param name="input" value="rgClustal_testout.fasta" /> + <param name = "logoname" value="Galaxy/Rgenetics weblogo" /> + <param name = "outformat" value="png" /> + <param name = "mode" value="complete" /> + <param name = "size" value="medium" /> + <param name = "colours" value="auto" /> + <param name = "units" value="probability" /> + <output name="output" file="rgWebLogo3_test2.png" ftype="png" compare="sim_size" delta="10000" /> + </test> </tests> <help> @@ -51,35 +106,33 @@ This tool uses Weblogo3_ in Galaxy to generate a sequence logo. The input file must be a fasta file in your current history. +It is recommended for (eg) viewing multiple sequence alignments output from the clustalw tool - set the output to fasta and feed +it in to this tool. + A typical output looks like this -.. image:: ./static/images/rgWebLogo3_test.jpg +.. image:: ${static_path}/images/rgWebLogo3_test.jpg ---- -**Why use WebLogo in Galaxy?** - -Weblogo3_ is a good example of an easy to use tool and there are plenty of other web accessible weblogo generator sites available. +**Warning about input Fasta format files** -However, none of those offer the combination of: - -1) persistence of analyses and data in multiple shareable histories, pages and libraries +The Weblogo3 program used by this tool will fail if your fasta sequences are not all EXACTLY the same length. The tool will provide a warning +and refuse to call the weblogo3 executable if irregular length sequences are detected. -2) convenient access to shared data libraries, workflows and user controlled pages, and to 3rd party data sources like UCSC tables. - -3) analyses integrated with many other applicable generic and specialized tools already available for downstream processing. +Fasta alignments from the companion ClustalW Galaxy tool will work but many other fasta files may cause this tool to fail - please do not file +a Galaxy bug report - this is a feature of the tool and a problem with your source data - not a tool error - please make certain all your fasta +sequences are exactly the same length! -that you get for free when you use Galaxy. No muss; no fuss. - ----- **Attribution** -Weblogo attribution and associated documentation are available at Weblogo3_ +Weblogo attribution and associated documentation are available at Weblogo3_ -This wrapper was written by Ross Lazarus for the rgenetics project and the source code is licensed under the LGPL_ like other rgenetics artefacts +This Galaxy wrapper calls their software so depends on it and their license for your legal comfort. +The wrapper was written by Ross Lazarus for the rgenetics project and the source code is licensed under the LGPL_ like other rgenetics artefacts -.. _Weblogo3: http://weblogo.berkeley.edu/ +.. _Weblogo3: http://weblogo.threeplusone.com/manual.html .. _LGPL: http://www.gnu.org/copyleft/lesser.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgweblogo/tool_dependencies.xml Wed Sep 25 21:09:29 2013 -0400 @@ -0,0 +1,41 @@ + +<?xml version="1.0"?> +<tool_dependency> + <package name="numpy" version="1.7"> + <repository name="package_numpy_1_7" owner="blankenberg" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="ghostscript" version="9.07"> + <repository name="package_ghostscript_9_07" owner="fubar" prior_installation_required="True" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="package_weblogo" version="3.3"> + <install version="1.0"> + <actions> + <!-- populate the environment variables from the dependend repos --> + <action type="set_environment_for_install"> + <repository name="package_ghostscript_9_07" owner="fubar" toolshed="http://testtoolshed.g2.bx.psu.edu/"> + <package name="ghostscript" version="9.07" /> + </repository> + <repository name="package_numpy_1_7" owner="blankenberg" toolshed="http://testtoolshed.g2.bx.psu.edu/"> + <package name="numpy" version="1.7" /> + </repository> + </action> + <!-- install weblogo --> + <action type="make_directory">$INSTALL_DIR/lib/python</action> + <action type="download_by_url">http://weblogo.googlecode.com/files/weblogo-3.3.tar.gz</action> + <action type="shell_command">export PYTHONPATH=$INSTALL_DIR/lib/python:$PYTHONPATH_NUMPY:$PYTHONPATH && + python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin</action> + <action type="set_environment"> + <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> + <environment_variable action="append_to" name="PYTHONPATH">$ENV[PYTHONPATH_NUMPY]</environment_variable> + <environment_variable action="prepend_to" name="PATH">$ENV[PATH_NUMPY]</environment_variable> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + <environment_variable action="set_to" name="WEBLOGO_PATH">$INSTALL_DIR/bin/weblogo</environment_variable> + </action> + </actions> + </install> + <readme> + weblogo3 is a python version of the old weblogo2.8 or so. Requires numpy and ghostscript so these are installed if not already on your system - if that happens, please be patient + while numpy compiles - especially if the ATLAS libraries are being installed - which is not at present. + </readme> + </package> +</tool_dependency>