Mercurial > repos > devteam > weblogo3
changeset 3:13045f6015cb draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/weblogo3 commit d0f167e74e705788adcd9c846db011aee490074b
author | iuc |
---|---|
date | Fri, 17 Nov 2017 09:38:19 -0500 |
parents | ced02f5334a0 |
children | |
files | rgWebLogo3.py rgWebLogo3.xml tool_dependencies.xml |
diffstat | 3 files changed, 165 insertions(+), 187 deletions(-) [+] |
line wrap: on
line diff
--- a/rgWebLogo3.py Fri Dec 18 19:20:26 2015 -0500 +++ b/rgWebLogo3.py Fri Nov 17 09:38:19 2017 -0500 @@ -1,12 +1,17 @@ -""" # modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion # rgWebLogo3.py # wrapper to check that all fasta files are same length -""" -import optparse, os, sys, subprocess, tempfile +from __future__ import print_function -WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it? +import optparse +import os +import subprocess +import sys +import tempfile + +WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it? + class WL3: """ @@ -15,13 +20,13 @@ FASTASTARTSYM = '>' badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully' - def __init__(self,opts=None): - assert opts<>None,'WL3 class needs opts passed in - got None' + def __init__(self, opts=None): + assert opts is not None, 'WL3 class needs opts passed in - got None' self.opts = opts - self.fastaf = file(self.opts.input,'r') + self.fastaf = open(self.opts.input, 'r') self.clparams = {} - def whereis(self,program): + def whereis(self, program): for path in os.environ.get('PATH', '').split(':'): if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)): return os.path.join(path, program) @@ -32,64 +37,62 @@ """ wl = self.whereis(WEBLOGO) if not wl: - print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO - print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo' + print('## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % (WEBLOGO), file=sys.stderr) + print('## Please ensure it is installed and working from https://github.com/weblogo/weblogo', file=sys.stderr) sys.exit(1) - cll = [WEBLOGO,] + cll = [WEBLOGO, ] cll += [' '.join(it) for it in list(self.clparams.items())] cl = ' '.join(cll) assert cl > '', 'runCL needs a command line as clparms' - fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt') - tlf = open(templog,'w') + fd, templog = tempfile.mkstemp(suffix='rgtempRun.txt') + tlf = open(templog, 'w') process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf) rval = process.wait() tlf.close() - tlogs = ''.join(open(templog,'r').readlines()) + tlogs = ''.join(open(templog, 'r').readlines()) if len(tlogs) > 1: - s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs) + s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl, rval, tlogs) else: - s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval) - os.unlink(templog) # always - if rval <> 0: - print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval) - print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO - print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO + s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl, rval) + os.unlink(templog) # always + if rval != 0: + print('## rgWebLogo3.py error - executing %s returned error code %d' % (cl, rval), file=sys.stderr) + print('## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO, file=sys.stderr) + print('## Please ensure %s is correctly installed and working on the command line -see https://github.com/weblogo/weblogo' % WEBLOGO, file=sys.stderr) sys.exit(1) return s - def iter_fasta(self): """ generator for fasta sequences from a file """ aseq = [] seqname = None - for i,row in enumerate(self.fastaf): + for i, row in enumerate(self.fastaf): if row.startswith(self.FASTASTARTSYM): - if seqname <> None: # already in a sequence + if seqname is not None: # already in a sequence s = ''.join(aseq) - l = len(s) - yield (seqname,l) + ls = len(s) + yield (seqname, ls) seqname = row[1:].strip() aseq = [] else: if i > 0: - print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) + print('Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input, self.FASTASTARTSYM), file=sys.stderr) sys.exit(1) else: - seqname = row[1:].strip() - else: # sequence row - if seqname == None: - print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM) - sys.exit(1) + seqname = row[1:].strip() + else: # sequence row + if seqname is None: + print('Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input, self.FASTASTARTSYM), file=sys.stderr) + sys.exit(1) else: aseq.append(row.strip()) - - if seqname <> None: # last one - l = len(''.join(aseq)) - yield (seqname,l) - - + + if seqname is not None: # last one + las = len(''.join(aseq)) + yield (seqname, las) + def fcheck(self): """ are all fasta sequence same length? might be mongo big @@ -97,61 +100,50 @@ flen = None lasti = None f = self.iter_fasta() - for i,(seqname,seqlen) in enumerate(f): + for i, (seqname, seqlen) in enumerate(f): lasti = i if i == 0: flen = seqlen else: - if seqlen <> flen: - print >> sys.stderr,self.badseq % self.opts.input + if seqlen != flen: + print(self.badseq % self.opts.input, file=sys.stderr) sys.exit(1) - return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input, lasti+1, flen) - + return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input, lasti + 1, flen) def run(self): check = self.fcheck() self.clparams['-f'] = self.opts.input self.clparams['-o'] = self.opts.output - self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string - self.clparams['-F'] = self.opts.outformat - if self.opts.size <> None: + self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string + self.clparams['-F'] = self.opts.outformat + if self.opts.size is not None: self.clparams['-s'] = self.opts.size - if self.opts.lower <> None: + if self.opts.lower is not None: self.clparams['-l'] = self.opts.lower - if self.opts.upper <> None: - self.clparams['-u'] = self.opts.upper - if self.opts.colours <> None: + if self.opts.upper is not None: + self.clparams['-u'] = self.opts.upper + if self.opts.colours is not None: self.clparams['-c'] = self.opts.colours - if self.opts.units <> None: + if self.opts.units is not None: self.clparams['-U'] = self.opts.units s = self.runCL() - return check,s + return check, s if __name__ == '__main__': - ''' - called as -<command interpreter="python"> - rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours" -#if $range.mode == 'part' --l "$range.seqstart" -u "$range.seqend" -#end if - </command> - - ''' op = optparse.OptionParser() op.add_option('-i', '--input', default=None) op.add_option('-F', '--outformat', default='png') - op.add_option('-s', '--size', default=None) + op.add_option('-s', '--size', default=None) op.add_option('-o', '--output', default='rgWebLogo3') op.add_option('-t', '--logoname', default='rgWebLogo3') op.add_option('-c', '--colours', default=None) op.add_option('-l', '--lower', default=None) - op.add_option('-u', '--upper', default=None) - op.add_option('-U', '--units', default=None) + op.add_option('-u', '--upper', default=None) + op.add_option('-U', '--units', default=None) opts, args = op.parse_args() - assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open' - assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input + assert opts.input is not None, 'weblogo3 needs a -i parameter with a fasta input file - cannot open' + assert os.path.isfile(opts.input), 'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input w = WL3(opts) - checks,s = w.run() - print >> sys.stdout, checks # for info + checks, s = w.run() + print(checks, file=sys.stdout) # for info
--- a/rgWebLogo3.xml Fri Dec 18 19:20:26 2015 -0500 +++ b/rgWebLogo3.xml Fri Nov 17 09:38:19 2017 -0500 @@ -1,106 +1,104 @@ -<tool id="rgweblogo3" name="Sequence Logo" version="0.4"> - <description>generator for fasta (eg Clustal alignments)</description> - <requirements> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="3.3">weblogo</requirement> - <requirement type="package" version="9.10">ghostscript</requirement> - </requirements> - <command interpreter="python"> - rgWebLogo3.py -F $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours" -U "$units" -#if $range.mode == 'part' --l "$range.seqstart" -u "$range.seqend" -#end if - </command> - <inputs> - <page> - <param format="fasta" name="input" type="data" label="Fasta File" /> - <param name="logoname" label="Title for output Sequence Logo" type="text" value="Galaxy-Rgenetics Sequence Logo" /> - <param name="outformat" type="select" label="Output format for image (or text report)" > - <option value="png" selected="True">PNG screen quality</option> - <option value="png_print">High quality printable PNG</option> - <option value="pdf">PDF</option> - <option value="jpeg">JPG</option> - <option value="eps">EPS</option> - <option value="logodata">Text (shows the detailed calculations for each position - no image)</option> - </param> - <param name="units" type="select" label="Display Units" - help="What the height of each logo element depicts - eg bits of entropy (default)"> - <option value="bits" selected="True">Entropy (bits)</option> - <option value="probability">Probability</option> - <option value="nats">Nats</option> - <option value="kT">kT</option> - <option value="kJ/mol">kJ/mol</option> - <option value="kcal/mol">kcal/mol</option> - </param> - <param name="colours" type="select" label="Colour scheme for output Sequence Logo" - help="Note that some of these only make sense for protein sequences!"> - <option value="auto" selected="True">Default automatic colour selection</option> - <option value="base pairing">Base pairing</option> - <option value="charge">Charge colours</option> - <option value="chemistry">Chemistry colours</option> - <option value="classic">Classical colours</option> - <option value="hydrophobicity">Hydrophobicity</option> - <option value="monochrome">monochrome</option> - </param> +<tool id="rgweblogo3" name="Sequence Logo" version="3.5.0"> + <description>generator for fasta (eg Clustal alignments)</description> + <requirements> + <requirement type="package" version="3.5.0">weblogo</requirement> + </requirements> + <command><![CDATA[ + python '$__tool_directory__/rgWebLogo3.py' + -F $outformat + -s $size + -i '$input' + -o '$output' + -t '$logoname' + -c '$colours' + -U '$units' - - <conditional name="range"> - <param name="mode" type="select" label="Include entire sequence (default) or specify a subsequence range to use"> - <option value="complete" selected="true">complete sequence</option> - <option value="part">Only use a part of the sequence</option> + #if $range.mode == 'part' + -l '$range.seqstart' -u '$range.seqend' + #end if + ]]></command> + <inputs> + <param format="fasta" name="input" type="data" label="Fasta File" /> + <param name="logoname" label="Title for output Sequence Logo" type="text" value="Galaxy-Rgenetics Sequence Logo" /> + <param name="outformat" type="select" label="Output format for image (or text report)" > + <option value="png" selected="True">PNG screen quality</option> + <option value="png_print">High quality printable PNG</option> + <option value="pdf">PDF</option> + <option value="jpeg">JPG</option> + <option value="eps">EPS</option> + <option value="logodata">Text (shows the detailed calculations for each position - no image)</option> + </param> + <param name="units" type="select" label="Display Units" help="What the height of each logo element depicts - eg bits of entropy (default)"> + <option value="bits" selected="True">Entropy (bits)</option> + <option value="probability">Probability</option> + <option value="nats">Nats</option> + <option value="kT">kT</option> + <option value="kJ/mol">kJ/mol</option> + <option value="kcal/mol">kcal/mol</option> + </param> + <param name="colours" type="select" label="Colour scheme for output Sequence Logo" + help="Note that some of these only make sense for protein sequences!"> + <option value="auto" selected="True">Default automatic colour selection</option> + <option value="base pairing">Base pairing</option> + <option value="charge">Charge colours</option> + <option value="chemistry">Chemistry colours</option> + <option value="classic">Classical colours</option> + <option value="hydrophobicity">Hydrophobicity</option> + <option value="monochrome">monochrome</option> </param> - <when value="complete"> - </when> - <when value="part"> - <param name="seqstart" type="integer" value="1" help="WARNING: Specifying indexes outside the sequence lengths will cause unpredictable but bad consequences!" - label="Index (eg 1=first letter) of the start of the sequence range to include in the logo"> - </param> - <param name="seqend" type="integer" value="99999" label="Index (eg 75=75th letter) of the end of the sequence range to include in the logo" > - </param> - </when> - </conditional> - <param name="size" type="select" label="Output weblogo size" > - <option value="large" selected="True">Large</option> - <option value="medium">Medium</option> - <option value="small">Small</option> - </param> - </page> - </inputs> - <outputs> - <data format="pdf" name="output" label="${logoname}_output.${outformat}"> - <change_format> - <when input="outformat" value="png_print" format="png" /> - <when input="outformat" value="png" format="png" /> - <when input="outformat" value="jpeg" format="jpg" /> - <when input="outformat" value="eps" format="eps" /> - <when input="outformat" value="logodata" format="txt" /> - </change_format> - </data> - </outputs> - <tests> - <test> - <param name="input" value="rgClustal_testout.fasta" /> - <param name="logoname" value="Galaxy/Rgenetics weblogo" /> - <param name="outformat" value="jpeg" /> - <param name="mode" value="complete" /> - <param name="size" value="medium" /> - <param name="colours" value="auto" /> - <param name="units" value="bits" /> - <output name="output" file="rgWebLogo3_test.jpg" ftype="jpg" compare="sim_size" delta="10000" /> - </test> - <test> - <param name="input" value="rgClustal_testout.fasta" /> - <param name="logoname" value="Galaxy/Rgenetics weblogo" /> - <param name="outformat" value="png" /> - <param name="mode" value="complete" /> - <param name="size" value="medium" /> - <param name="colours" value="auto" /> - <param name="units" value="probability" /> - <output name="output" file="rgWebLogo3_test2.png" ftype="png" compare="sim_size" delta="10000" /> - </test> - </tests> - <help> + <conditional name="range"> + <param name="mode" type="select" label="Include entire sequence (default) or specify a subsequence range to use"> + <option value="complete" selected="true">complete sequence</option> + <option value="part">Only use a part of the sequence</option> + </param> + <when value="complete" /> + <when value="part"> + <param name="seqstart" type="integer" value="1" help="WARNING: Specifying indexes outside the sequence lengths will cause unpredictable but bad consequences!" + label="Index (eg 1=first letter) of the start of the sequence range to include in the logo" /> + <param name="seqend" type="integer" value="99999" label="Index (eg 75=75th letter) of the end of the sequence range to include in the logo" /> + </when> + </conditional> + <param name="size" type="select" label="Output weblogo size" > + <option value="large" selected="True">Large</option> + <option value="medium">Medium</option> + <option value="small">Small</option> + </param> + </inputs> + <outputs> + <data format="pdf" name="output" label="${logoname}_output.${outformat}"> + <change_format> + <when input="outformat" value="png_print" format="png" /> + <when input="outformat" value="png" format="png" /> + <when input="outformat" value="jpeg" format="jpg" /> + <when input="outformat" value="eps" format="eps" /> + <when input="outformat" value="logodata" format="txt" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input" value="rgClustal_testout.fasta" /> + <param name="logoname" value="Galaxy/Rgenetics weblogo" /> + <param name="outformat" value="jpeg" /> + <param name="mode" value="complete" /> + <param name="size" value="medium" /> + <param name="colours" value="auto" /> + <param name="units" value="bits" /> + <output name="output" file="rgWebLogo3_test.jpg" ftype="jpg" compare="sim_size" delta="10000" /> + </test> + <test> + <param name="input" value="rgClustal_testout.fasta" /> + <param name="logoname" value="Galaxy/Rgenetics weblogo" /> + <param name="outformat" value="png" /> + <param name="mode" value="complete" /> + <param name="size" value="medium" /> + <param name="colours" value="auto" /> + <param name="units" value="probability" /> + <output name="output" file="rgWebLogo3_test2.png" ftype="png" compare="sim_size" delta="10000" /> + </test> + </tests> + <help><![CDATA[ **Note** This tool uses Weblogo3_ in Galaxy to generate a sequence logo. The input file must be a fasta file in your current history. @@ -119,8 +117,8 @@ The Weblogo3 program used by this tool will fail if your fasta sequences are not all EXACTLY the same length. The tool will provide a warning and refuse to call the weblogo3 executable if irregular length sequences are detected. -Fasta alignments from the companion ClustalW Galaxy tool will work but many other fasta files may cause this tool to fail - please do not file -a Galaxy bug report - this is a feature of the tool and a problem with your source data - not a tool error - please make certain all your fasta +Fasta alignments from the companion ClustalW Galaxy tool will work but many other fasta files may cause this tool to fail - please do not file +a Galaxy bug report - this is a feature of the tool and a problem with your source data - not a tool error - please make certain all your fasta sequences are the same length! ---- @@ -135,8 +133,8 @@ .. _LGPL: http://www.gnu.org/copyleft/lesser.html - </help> - + ]]></help> + <citations> + <citation type="doi">10.1101/gr.849004</citation> + </citations> </tool> - -
--- a/tool_dependencies.xml Fri Dec 18 19:20:26 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="ghostscript" version="9.10"> - <repository changeset_revision="9345d2740f0c" name="package_ghostscript_9_10" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="numpy" version="1.7.1"> - <repository changeset_revision="5c489d2d630b" name="package_numpy_1_7" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="weblogo" version="3.3"> - <repository changeset_revision="18cb94906560" name="package_weblogo_3_3" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>