Mercurial > repos > fubar > brokenandnotdeletablebyowneroradmin
changeset 0:fda8032fe989
Initial checkin of dynamic script runner. Goal is to add code to generate a new toolshed entry once the script works correctly
author | ross lazarus ross.lazarus@gmail.com |
---|---|
date | Wed, 30 May 2012 22:36:34 +1000 |
parents | |
children | 0133b97e477e |
files | images/dynamicScriptTool.png rgDynamicScriptWrapper.py rgDynamicScriptWrapper.xml |
diffstat | 3 files changed, 347 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgDynamicScriptWrapper.py Wed May 30 22:36:34 2012 +1000 @@ -0,0 +1,253 @@ +# rgDynamicScriptWrapper.py +# derived from +# rgBaseScriptWrapper.py +# to run some user supplied code +# extremely dangerous +# trusted users only - private site only +# a list in the xml is searched - only users in the list can run this tool. +# +# copyright ross lazarus (ross.lazarus@gmail.com) May 2012 +# +# all rights reserved +# Licensed under the LGPL for your pleasure +# Derived from rgDGE.py in May 2012 +# generalized to run required interpreter +# to make your own tools based on a given script and interpreter such as perl or python +# clone this and the corresponding xml wrapper +# replace the parameters/inputs/outputs and the configfile contents with your script +# Use the $foo syntax to place your parameter values inside the script to assign them - at run time, the script will be used as a template +# and returned as part of the output to the user - with the right values for all the parameters. +# Note that this assumes you want all the outputs arranged as a single Html file output +# after this generic script runner runs your script with the specified interpreter, +# it will collect all output files into the specified output_html, making thumbnails for all the pdfs it finds and making links for all the other files. + +import sys +import shutil +import subprocess +import os +import time +import tempfile +import optparse + +progname = os.path.split(sys.argv[0])[1] +myversion = 'V000.1 May 2012' +verbose = False +debug = False + +# characters that are allowed but need to be escaped +# also a test sandboxing of any R system commands +# ultimately futile - we need to generate a new tool +# which will have no new security problems! +mapped_chars = { '>' :'__gt__', + '<' :'__lt__', + "'" :'__sq__', + '"' :'__dq__', + '{' :'__oc__', + '}' :'__cc__', + '@' : '__at__', + '\n' : '__cn__', + '\r' : '__cr__', + '\t' : '__tc__', + '#' : '__pd__', + '[' :'__ob__', + ']' :'__cb__', + '\t' : 'Xt', + 'systemCallsAreNotAllowed' : 'system' + } + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" +galhtmlattr = """<b><a href="http://rgenetics.org">Galaxy Rgenetics Base Script Wrapper based </a> tool output %s run at %s</b><br/>""" +galhtmlpostfix = """</div></body></html>\n""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +def restore_text(text): + """Restores sanitized text""" + if not text: + return text + for key, value in mapped_chars.items(): + text = text.replace(value, key) + return text + +class ScriptRunner: + """class is a wrapper for an arbitrary script + """ + + def __init__(self,opts=None): + """ + run the script + cheetah/galaxy will provide an escaped string so + __pd__ your script goes here + __cr____cn__ourargs __lt__- commandArgs(TRUE) + __cr____cn__inf = ourargs[1] + __cr____cn__outf = ourargs[2] + __cr____cn__inp = read.table(inf,head=T,rownames=F,sep=__sq__Xt__sq__) + __cr____cn__ write.table(inp,outf, quote=FALSE, sep=__dq__Xt__dq__,row.names=F) + __cr____cn__sessionInfo() + __cr____cn__ + """ + self.thumbformat = 'jpg' + self.opts = opts + self.toolname = opts.tool_name.replace(' ','_') + s = open(self.opts.script_path,'r').read() + self.script = restore_text(s) + if opts.output_dir: # may not want these complexities if a simple script + self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname) + artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname) + artifact = open(artifactpath,'w') + artifact.write(self.script) + artifact.write('\n') + artifact.close() + self.cl = [] + a = self.cl.append + a(opts.interpreter) + a('-') # use stdin + a(opts.input_tab) + a(opts.output_tab) + + def compressPDF(self,inpdf=None,thumbformat='png'): + """need absolute path to pdf + """ + assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) + hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname) + sto = open(hlog,'w') + outpdf = '%s_compressed' % inpdf + cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf] + x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + retval1 = x.wait() + if retval1 == 0: + os.unlink(inpdf) + shutil.move(outpdf,inpdf) + outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) + cl2 = ['convert', inpdf, outpng] + x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + retval2 = x.wait() + sto.close() + retval = retval1 or retval2 + return retval + + + def getfSize(self,fpath,outpath): + """ + format a nice file size string + """ + size = '' + fp = os.path.join(outpath,fpath) + if os.path.isfile(fp): + n = float(os.path.getsize(fp)) + if n > 2**20: + size = ' (%1.1f MB)' % (n/2**20) + elif n > 2**10: + size = ' (%1.1f KB)' % (n/2**10) + elif n > 0: + size = ' (%d B)' % (int(n)) + return size + + + def run(self): + """ + """ + if self.opts.output_dir: + sto = open(self.tlog,'w') + p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir) + else: + p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE) + p.stdin.write(self.script) + p.stdin.close() + retval = p.wait() + if self.opts.output_dir: + sto.close() + flist = os.listdir(self.opts.output_dir) + flist = [x for x in flist if x <> 'Rplots.pdf'] + flist.sort() + html = [galhtmlprefix % progname,] + html.append('<h2>Galaxy %s outputs run at %s</h2></br>Click on a thumbnail below to download the original PDF</br>\n' % (self.toolname,timenow())) + fhtml = [] + if len(flist) > 0: + html.append('<table cellpadding="3" cellspacing="3">\n') + for fname in flist: + dname,e = os.path.splitext(fname) + sfsize = self.getfSize(fname,self.opts.output_dir) + if e.lower() == '.pdf' : # compress and make a thumbnail + thumb = '%s.%s' % (dname,self.thumbformat) + pdff = os.path.join(self.opts.output_dir,fname) + retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) + if retval == 0: + s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600"></a></td></tr>\n' % (fname,thumb,fname) + html.append(s) + fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize)) + else: + fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize)) + html.append('</table>\n') + if len(fhtml) > 0: + fhtml.insert(0,'<ul>') + fhtml.append('</ul>') + html += fhtml # add all non-pdf files to the end of the display + else: + html.append('<h2>### Error - %s returned no files - please confirm that parameters are sane</h1>' % self.opts.interpreter) + html.append('<h3>%s log follows below</h3><hr><pre>\n' % self.opts.interpreter) + rlog = open(self.tlog,'r').readlines() + html += rlog + html.append('%s CL = %s</br>\n' % (self.toolname,' '.join(sys.argv))) + html.append('CL = %s</br>\n' % (' '.join(self.cl))) + html.append('</pre>\n') + html.append(galhtmlattr % (progname,timenow())) + html.append(galhtmlpostfix) + htmlf = file(self.opts.output_html,'w') + htmlf.write('\n'.join(html)) + htmlf.write('\n') + htmlf.close() + return retval + + +def main(): + u = """ + This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: + <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" + </command> + """ + permitted_users = ['rlazarus@bakeridi.edu.au','akaspi@bakeridi.edu.au','mziemann@bakeridi.edu.edu'] + op = optparse.OptionParser() + a = op.add_option + a('--script_path',default=None) + a('--tool_name',default=None) + a('--interpreter',default=None) + a('--output_dir',default=None) + a('--output_html',default=None) + a('--input_tab',default='NONE') + a('--output_tab',default='NONE') + a('--user_email',default=None) + a('--bad_user',default=None) + opts, args = op.parse_args() + assert not opts.bad_user,'%s is NOT authorized to use this tool. Please ask your friendly admin' % opts.bad_user + assert opts.tool_name,'## Dynamic script wrapper expects a tool name - eg --tool_name=DESeq' + assert opts.interpreter,'## Dynamic script wrapper expects an interpreter - eg --interpreter=Rscript' + assert os.path.isfile(opts.script_path),'## Dynamic script wrapper expects a script path - eg --script_path=foo.R' + if opts.output_dir: + try: + os.makedirs(opts.output_dir) + except: + pass + r = ScriptRunner(opts) + retcode = r.run() + if retcode: + sys.exit(retcode) # indicate failure to job runner + + +if __name__ == "__main__": + main() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgDynamicScriptWrapper.xml Wed May 30 22:36:34 2012 +1000 @@ -0,0 +1,94 @@ +<tool id="rgDynamic1" name="Dynamic Script Runner" version="0.03"> + <description>DIY scripting</description> + <command interpreter="python"> +#if ( $__user_email__ not in ['rlazarus@bakeridi.edu.au','mziemann@bakeridi.edu.au','akaspi@bakeridi.edu.au'] ): + rgDynamicScriptWrapper.py --bad_user $__user_email__ + #else: + rgDynamicScriptWrapper.py --script_path "$runme" --interpreter "$interpreter" + --tool_name "$tool_name" --input_tab "$input1" --user_email "${__user_email__}" + #if $makeHTML.value=="yes": + --output_dir "$html_file.files_path" --output_html "$html_file" + #end if + #if $makeTAB.value=="yes": + --output_tab "$tab_file" + #end if +#end if + </command> + <inputs> + <param name="input1" type="data" format="tabular" label="Select an optional input tabular file from your history" optional="true" + help="Your script probably needs an input - but if not, this can be left unassigned"/> + <param name="tool_name" type="text" value="My dynamic script" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain"/> + <param name="makeHTML" type="select" label="Create an HTML output with all script outputs collected together, with thumbnails of new PDF images, the script and a run log file" + help="This is useful for presenting complex outputs and is not needed if your script doesn't create anything other than a single tabular output"> + <option value="yes">Yes</option> + <option value="" selected="true">No</option> + </param> + <param name="makeTAB" type="select" label="Create a new tabular history output" + help="This is useful if your script creates a single new tabular file you want to appear in the history after the tool executes"> + <option value="yes" selected="true">Yes</option> + <option value="">No</option> + </param> + <param name="interpreter" type="select" label="Select the interpreter for your code. This must be available on the path of the execution host"> + <option value="Rscript" selected="true">Rscript</option> + <option value="python">python</option> + <option value="perl (ugh)">perl</option> + </param> + <param name="dynScript" label="Your Script Goes Here" type="text" value="" area="True" size="8x80" width="80" + help="Expect FIRST CL parameter = the optional input tabular file path (or NONE if not specified). Ensure your script writes tabular output to the path in the SECOND command line parameter it gets."/> + </inputs> + <outputs> + <data format="tabular" name="tab_file" label="${tool_name}.xls"> + <filter> makeTAB=="yes" </filter> + </data> + <data format="html" name="html_file" label="${tool_name}.html"> + <filter> makeHTML=="yes" </filter> + </data> + </outputs> +<configfiles> +<configfile name="runme"> +${dynScript} +</configfile> +</configfiles> +<help> +**What it does** +This tool enables a user to paste and submit an arbitrary R/python/perl script to run in Galaxy. +This is (extremely) insecure. + +**Restrictions** +This tool will ONLY work if your user id has been added to the local copy's list of permitted users. +Ask your friendly Galaxy administrator to edit this tool's source for you if you need this. + +**Note to system administrators** +Under no circumstances should you allow any user to use this tool unless you really, really trust them to do +no harm. + +**Use on public servers** +is STRONGLY discouraged for obvious reasons + +**Scripting conventions** +The pasted script will be executed. +It will get the path to the (optional) input tabular data file path or NONE if you do not select one +as the first command line parameter + +The script must write it's output as tab delimited text to the path found as the second command line parameter +Note that if an optional HTML output is selected, all the output files spewed by your script will be nicely presented as links to the user. +Any pdf images will automagically be converted to show thumbnails in that output. +This can be handy for complex scripts creating lots of output. + +**Simple Rscript example** + +A simple "filter" that takes an input file, does something and writes the results to a new tabular file might look like this:: + + ourargs = commandArgs(TRUE) + inf = ourargs[1] + outf = ourargs[2] + inp = read.table(inf,head=F,row.names=NULL,sep='\t') + inp[,5] = runif ( nrow(inp) ) + write.table(inp,outf, quote=FALSE, sep="\t",row.names=F,col.names=F) + + +</help> + +</tool> + +