changeset 0:fda8032fe989

Initial checkin of dynamic script runner. Goal is to add code to generate a new toolshed entry once the script works correctly
author ross lazarus ross.lazarus@gmail.com
date Wed, 30 May 2012 22:36:34 +1000
parents
children 0133b97e477e
files images/dynamicScriptTool.png rgDynamicScriptWrapper.py rgDynamicScriptWrapper.xml
diffstat 3 files changed, 347 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file images/dynamicScriptTool.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgDynamicScriptWrapper.py	Wed May 30 22:36:34 2012 +1000
@@ -0,0 +1,253 @@
+# rgDynamicScriptWrapper.py
+# derived from
+# rgBaseScriptWrapper.py
+# to run some user supplied code 
+# extremely dangerous
+# trusted users only - private site only
+# a list in the xml is searched - only users in the list can run this tool.
+# 
+# copyright ross lazarus (ross.lazarus@gmail.com) May 2012
+# 
+# all rights reserved
+# Licensed under the LGPL for your pleasure
+# Derived from rgDGE.py in May 2012
+# generalized to run required interpreter
+# to make your own tools based on a given script and interpreter such as perl or python
+# clone this and the corresponding xml wrapper
+# replace the parameters/inputs/outputs and the configfile contents with your script
+# Use the $foo syntax to place your parameter values inside the script to assign them - at run time, the script will be used as a template
+# and returned as part of the output to the user - with the right values for all the parameters.
+# Note that this assumes you want all the outputs arranged as a single Html file output 
+# after this generic script runner runs your script with the specified interpreter,
+# it will collect all output files into the specified output_html, making thumbnails for all the pdfs it finds and making links for all the other files.
+
+import sys 
+import shutil 
+import subprocess 
+import os 
+import time 
+import tempfile 
+import optparse
+
+progname = os.path.split(sys.argv[0])[1] 
+myversion = 'V000.1 May 2012' 
+verbose = False 
+debug = False
+
+# characters that are allowed but need to be escaped
+# also a test sandboxing of any R system commands 
+# ultimately futile - we need to generate a new tool
+# which will have no new security problems!
+mapped_chars = { '>' :'__gt__',
+                 '<' :'__lt__',
+                 "'" :'__sq__',
+                 '"' :'__dq__',
+                 '{' :'__oc__',
+                 '}' :'__cc__',
+                 '@' : '__at__', 
+                 '\n' : '__cn__',
+                 '\r' : '__cr__',
+                 '\t' : '__tc__',
+                 '#' : '__pd__',
+                 '[' :'__ob__',
+                 ']' :'__cb__',
+                 '\t' : 'Xt', 
+                 'systemCallsAreNotAllowed' : 'system'
+                 }
+
+galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> 
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 
+<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
+<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> 
+<title></title> 
+<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> 
+</head> 
+<body> 
+<div class="document"> 
+""" 
+galhtmlattr = """<b><a href="http://rgenetics.org">Galaxy Rgenetics Base Script Wrapper based </a> tool output %s run at %s</b><br/>""" 
+galhtmlpostfix = """</div></body></html>\n"""
+
+def timenow():
+    """return current time as a string
+    """
+    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+def restore_text(text):
+    """Restores sanitized text"""  
+    if not text:
+        return text
+    for key, value in mapped_chars.items():
+        text = text.replace(value, key)
+    return text
+    
+class ScriptRunner:
+    """class is a wrapper for an arbitrary script
+    """
+
+    def __init__(self,opts=None):
+        """
+        run the script
+        cheetah/galaxy will provide an escaped string so
+        __pd__ your script goes here
+        __cr____cn__ourargs __lt__- commandArgs(TRUE)
+        __cr____cn__inf = ourargs[1]
+        __cr____cn__outf = ourargs[2]
+        __cr____cn__inp = read.table(inf,head=T,rownames=F,sep=__sq__Xt__sq__)
+        __cr____cn__ write.table(inp,outf, quote=FALSE, sep=__dq__Xt__dq__,row.names=F)
+        __cr____cn__sessionInfo()
+        __cr____cn__
+        """
+        self.thumbformat = 'jpg'
+        self.opts = opts
+        self.toolname = opts.tool_name.replace(' ','_')
+        s = open(self.opts.script_path,'r').read()
+        self.script = restore_text(s)
+        if opts.output_dir: # may not want these complexities if a simple script
+            self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
+            artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname) 
+            artifact = open(artifactpath,'w')
+            artifact.write(self.script)
+            artifact.write('\n')
+            artifact.close()
+        self.cl = []
+        a = self.cl.append
+        a(opts.interpreter)
+        a('-') # use stdin
+        a(opts.input_tab)
+        a(opts.output_tab)
+
+    def compressPDF(self,inpdf=None,thumbformat='png'):
+        """need absolute path to pdf
+        """
+        assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
+        hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
+        sto = open(hlog,'w')
+        outpdf = '%s_compressed' % inpdf
+        cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
+        x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+        retval1 = x.wait()
+        if retval1 == 0:
+            os.unlink(inpdf)
+            shutil.move(outpdf,inpdf)
+        outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
+        cl2 = ['convert', inpdf, outpng]
+        x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+        retval2 = x.wait()
+        sto.close()
+        retval = retval1 or retval2
+        return retval
+
+
+    def getfSize(self,fpath,outpath):
+        """
+        format a nice file size string
+        """
+        size = ''
+        fp = os.path.join(outpath,fpath)
+        if os.path.isfile(fp):
+            n = float(os.path.getsize(fp))
+            if n > 2**20:
+                size = ' (%1.1f MB)' % (n/2**20)
+            elif n > 2**10:
+                size = ' (%1.1f KB)' % (n/2**10)
+            elif n > 0:
+                size = ' (%d B)' % (int(n))
+        return size
+
+
+    def run(self):
+        """
+        """
+        if self.opts.output_dir:
+            sto = open(self.tlog,'w')
+            p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
+        else:
+            p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)            
+        p.stdin.write(self.script)
+        p.stdin.close()
+        retval = p.wait()
+        if self.opts.output_dir:
+            sto.close()
+            flist = os.listdir(self.opts.output_dir)
+            flist = [x for x in flist if x <> 'Rplots.pdf']
+            flist.sort()
+            html = [galhtmlprefix % progname,]
+            html.append('<h2>Galaxy %s outputs run at %s</h2></br>Click on a thumbnail below to download the original PDF</br>\n' % (self.toolname,timenow()))
+            fhtml = []
+            if len(flist) > 0:
+                html.append('<table cellpadding="3" cellspacing="3">\n')
+                for fname in flist:
+                    dname,e = os.path.splitext(fname)
+                    sfsize = self.getfSize(fname,self.opts.output_dir)
+                    if e.lower() == '.pdf' : # compress and make a thumbnail
+                        thumb = '%s.%s' % (dname,self.thumbformat)
+                        pdff = os.path.join(self.opts.output_dir,fname)
+                        retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
+                        if retval == 0:
+                            s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600"></a></td></tr>\n' % (fname,thumb,fname)
+                            html.append(s)
+                        fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
+                    else:
+                       fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
+                html.append('</table>\n')
+                if len(fhtml) > 0:
+                    fhtml.insert(0,'<ul>')
+                    fhtml.append('</ul>')
+                    html += fhtml # add all non-pdf files to the end of the display
+            else:
+                html.append('<h2>### Error - %s returned no files - please confirm that parameters are sane</h1>' % self.opts.interpreter)
+            html.append('<h3>%s log follows below</h3><hr><pre>\n' % self.opts.interpreter)
+            rlog = open(self.tlog,'r').readlines()
+            html += rlog
+            html.append('%s CL = %s</br>\n' % (self.toolname,' '.join(sys.argv)))
+            html.append('CL = %s</br>\n' % (' '.join(self.cl)))
+            html.append('</pre>\n')
+            html.append(galhtmlattr % (progname,timenow()))
+            html.append(galhtmlpostfix)
+            htmlf = file(self.opts.output_html,'w')
+            htmlf.write('\n'.join(html))
+            htmlf.write('\n')
+            htmlf.close()
+        return retval
+  
+
+def main():
+    u = """
+    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
+    <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
+    </command>
+    """
+    permitted_users = ['rlazarus@bakeridi.edu.au','akaspi@bakeridi.edu.au','mziemann@bakeridi.edu.edu']
+    op = optparse.OptionParser()
+    a = op.add_option
+    a('--script_path',default=None)
+    a('--tool_name',default=None)
+    a('--interpreter',default=None)
+    a('--output_dir',default=None)
+    a('--output_html',default=None)
+    a('--input_tab',default='NONE')
+    a('--output_tab',default='NONE')
+    a('--user_email',default=None)
+    a('--bad_user',default=None)
+    opts, args = op.parse_args()
+    assert not opts.bad_user,'%s is NOT authorized to use this tool. Please ask your friendly admin' % opts.bad_user
+    assert opts.tool_name,'## Dynamic script wrapper expects a tool name - eg --tool_name=DESeq'
+    assert opts.interpreter,'## Dynamic script wrapper expects an interpreter - eg --interpreter=Rscript'
+    assert os.path.isfile(opts.script_path),'## Dynamic script wrapper expects a script path - eg --script_path=foo.R'
+    if opts.output_dir:
+        try:
+            os.makedirs(opts.output_dir)
+        except:
+            pass
+    r = ScriptRunner(opts)
+    retcode = r.run()
+    if retcode:
+        sys.exit(retcode) # indicate failure to job runner
+
+
+if __name__ == "__main__":
+    main()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgDynamicScriptWrapper.xml	Wed May 30 22:36:34 2012 +1000
@@ -0,0 +1,94 @@
+<tool id="rgDynamic1" name="Dynamic Script Runner" version="0.03">
+  <description>DIY scripting</description>
+  <command interpreter="python">
+#if ( $__user_email__ not in ['rlazarus@bakeridi.edu.au','mziemann@bakeridi.edu.au','akaspi@bakeridi.edu.au'] ):
+     rgDynamicScriptWrapper.py --bad_user $__user_email__
+  #else:
+    rgDynamicScriptWrapper.py --script_path "$runme" --interpreter "$interpreter" 
+     --tool_name "$tool_name"  --input_tab "$input1" --user_email "${__user_email__}"
+    #if $makeHTML.value=="yes":
+      --output_dir "$html_file.files_path" --output_html "$html_file"
+    #end if
+    #if $makeTAB.value=="yes":
+      --output_tab "$tab_file"
+    #end if
+#end if 
+  </command>
+  <inputs>
+    <param name="input1"  type="data" format="tabular" label="Select an optional input tabular file from your history" optional="true"
+       help="Your script probably needs an input - but if not, this can be left unassigned"/>
+    <param name="tool_name" type="text" value="My dynamic script" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain"/>
+    <param name="makeHTML" type="select" label="Create an HTML output with all script outputs collected together, with thumbnails of new PDF images, the script and a run log file" 
+         help="This is useful for presenting complex outputs and is not needed if your script doesn't create anything other than a single tabular output">
+        <option value="yes">Yes</option>
+        <option value="" selected="true">No</option>
+    </param>   
+    <param name="makeTAB" type="select" label="Create a new tabular history output" 
+         help="This is useful if your script creates a single new tabular file you want to appear in the history after the tool executes">
+        <option value="yes" selected="true">Yes</option>
+        <option value="">No</option>
+    </param>   
+    <param name="interpreter" type="select" label="Select the interpreter for your code. This must be available on the path of the execution host">
+        <option value="Rscript" selected="true">Rscript</option>
+        <option value="python">python</option>
+        <option value="perl (ugh)">perl</option>
+    </param>   
+    <param name="dynScript" label="Your Script Goes Here" type="text" value="" area="True" size="8x80" width="80" 
+      help="Expect FIRST CL parameter = the optional input tabular file path (or NONE if not specified). Ensure your script writes tabular output to the path in the SECOND command line parameter it gets."/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="tab_file" label="${tool_name}.xls">
+        <filter> makeTAB=="yes" </filter>
+    </data>
+    <data format="html" name="html_file" label="${tool_name}.html">
+        <filter> makeHTML=="yes" </filter>
+    </data>
+  </outputs>
+<configfiles>
+<configfile name="runme">
+${dynScript}
+</configfile>
+</configfiles>
+<help>
+**What it does**
+This tool enables a user to paste and submit an arbitrary R/python/perl script to run in Galaxy.
+This is (extremely) insecure.
+
+**Restrictions**
+This tool will ONLY work if your user id has been added to the local copy's list of permitted users.
+Ask your friendly Galaxy administrator to edit this tool's source for you if you need this.
+
+**Note to system administrators**
+Under no circumstances should you allow any user to use this tool unless you really, really trust them to do
+no harm.
+
+**Use on public servers** 
+is STRONGLY discouraged for obvious reasons
+
+**Scripting conventions**
+The pasted script will be executed. 
+It will get the path to the (optional) input tabular data file path or NONE if you do not select one
+as the first command line parameter
+
+The script must write it's output as tab delimited text to the path found as the second command line parameter
+Note that if an optional HTML output is selected, all the output files spewed by your script will be nicely presented as links to the user. 
+Any pdf images will automagically be converted to show thumbnails in that output.
+This can be handy for complex scripts creating lots of output.
+
+**Simple Rscript example**
+
+A simple "filter" that takes an input file, does something and writes the results to a new tabular file might look like this::
+
+  ourargs = commandArgs(TRUE)
+  inf = ourargs[1]
+  outf = ourargs[2]
+  inp = read.table(inf,head=F,row.names=NULL,sep='\t')
+  inp[,5] = runif ( nrow(inp) )
+  write.table(inp,outf, quote=FALSE, sep="\t",row.names=F,col.names=F)
+
+
+</help>
+
+</tool>
+
+