diff rgToolFactory.py @ 7:7221619caefa

Updated name and added crude gzip generator for toolshed TODO: add tests and new XML tool descriptor as soon as Greg has it nailed down.
author ross lazarus ross.lazarus@gmail.com
date Sat, 02 Jun 2012 10:43:08 +1000
parents
children 220885b2d7ee
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgToolFactory.py	Sat Jun 02 10:43:08 2012 +1000
@@ -0,0 +1,286 @@
+# rgDynamicScriptWrapper.py
+# derived from
+# rgBaseScriptWrapper.py
+# to run some user supplied code 
+# extremely dangerous
+# trusted users only - private site only
+# a list in the xml is searched - only users in the list can run this tool.
+# 
+# copyright ross lazarus (ross.lazarus@gmail.com) May 2012
+# 
+# all rights reserved
+# Licensed under the LGPL for your pleasure
+# Derived from rgDGE.py in May 2012
+# generalized to run required interpreter
+# to make your own tools based on a given script and interpreter such as perl or python
+# clone this and the corresponding xml wrapper
+# replace the parameters/inputs/outputs and the configfile contents with your script
+# Use the $foo syntax to place your parameter values inside the script to assign them - at run time, the script will be used as a template
+# and returned as part of the output to the user - with the right values for all the parameters.
+# Note that this assumes you want all the outputs arranged as a single Html file output 
+# after this generic script runner runs your script with the specified interpreter,
+# it will collect all output files into the specified output_html, making thumbnails for all the pdfs it finds and making links for all the other files.
+
+import sys 
+import shutil 
+import subprocess 
+import os 
+import time 
+import tempfile 
+import optparse
+import tarfile
+import re
+progname = os.path.split(sys.argv[0])[1] 
+myversion = 'V000.1 May 2012' 
+verbose = False 
+debug = False
+
+
+galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> 
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 
+<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
+<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> 
+<title></title> 
+<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> 
+</head> 
+<body> 
+<div class="document"> 
+""" 
+galhtmlattr = """<b><a href="http://rgenetics.org">Galaxy Rgenetics Base Script Wrapper based </a> tool output %s run at %s</b><br/>""" 
+galhtmlpostfix = """</div></body></html>\n"""
+
+def timenow():
+    """return current time as a string
+    """
+    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+# characters that are allowed but need to be escaped
+mapped_chars = { '>' :'__gt__',
+                 '<' :'__lt__',
+                 "'" :'__sq__',
+                 '"' :'__dq__',
+                 '{' :'__oc__',
+                 '}' :'__cc__',
+                 '@' : '__at__', 
+                 '\n' : '__cn__',
+                 '\r' : '__cr__',
+                 '\t' : '__tc__',
+                 '#' : '__pd__',
+                 '[' :'__ob__',
+                 ']' :'__cb__',
+                 '\t' : 'Xt', 
+                 'systemCallsAreNotAllowed' : 'system'
+                 }
+
+def restore_text(text):
+    """Restores sanitized text"""  
+    if not text:
+        return text
+    for key, value in mapped_chars.items():
+        text = text.replace(value, key)
+    return text
+    
+class ScriptRunner:
+    """class is a wrapper for an arbitrary script
+    """
+
+    def __init__(self,opts=None):
+        """
+        run the script
+        cheetah/galaxy will provide an escaped string so
+        __pd__ your script goes here
+        __cr____cn__ourargs __lt__- commandArgs(TRUE)
+        __cr____cn__inf = ourargs[1]
+        __cr____cn__outf = ourargs[2]
+        __cr____cn__inp = read.table(inf,head=T,rownames=F,sep=__sq__Xt__sq__)
+        __cr____cn__ write.table(inp,outf, quote=FALSE, sep=__dq__Xt__dq__,row.names=F)
+        __cr____cn__sessionInfo()
+        __cr____cn__
+        """
+        self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
+        self.thumbformat = 'jpg'
+        self.opts = opts
+        self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name)
+        s = open(self.opts.script_path,'r').read()
+        self.script = restore_text(s)
+        self.pyfile = self.myname
+        self.xmlfile = '%s.xml' % os.path.splitext(self.pyfile)[0] # punt
+        self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
+        localscript = open(self.sfile,'w')
+        localscript.write(self.script)
+        localscript.close()
+        if opts.output_dir or self.opts.makeTool: # may not want these complexities if a simple script
+            self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
+            artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname) 
+            artifact = open(artifactpath,'w')
+            artifact.write(self.script)
+            artifact.write('\n')
+            artifact.close()
+        self.cl = []
+        self.html = []
+        a = self.cl.append
+        a(opts.interpreter)
+        a('-') # use stdin
+        a(opts.input_tab)
+        a(opts.output_tab)
+
+    def makeTooltar(self):
+        """
+        a tool is a gz tarball with eg
+        /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
+        """
+        retval = self.run()
+        if retval:
+            print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
+            sys.exit(1)
+        tarpath = os.path.join(self.opts.output_dir,"%s.gz" % self.toolname)
+        tar = tarfile.open(tarpath, "w:gz")
+        tar.add(self.xmlfile,arcname='%s.xml' % self.toolname)
+        tar.add(self.pyfile,arcname=os.path.basename(self.pyfile))
+        tar.add(self.sfile,arcname=self.sfile)
+        tar.close()
+        self.makeHtml()
+        return retval
+
+    def compressPDF(self,inpdf=None,thumbformat='png'):
+        """need absolute path to pdf
+        """
+        assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
+        hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
+        sto = open(hlog,'w')
+        outpdf = '%s_compressed' % inpdf
+        cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
+        x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+        retval1 = x.wait()
+        if retval1 == 0:
+            os.unlink(inpdf)
+            shutil.move(outpdf,inpdf)
+        outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
+        cl2 = ['convert', inpdf, outpng]
+        x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+        retval2 = x.wait()
+        sto.close()
+        retval = retval1 or retval2
+        return retval
+
+
+    def getfSize(self,fpath,outpath):
+        """
+        format a nice file size string
+        """
+        size = ''
+        fp = os.path.join(outpath,fpath)
+        if os.path.isfile(fp):
+            n = float(os.path.getsize(fp))
+            if n > 2**20:
+                size = ' (%1.1f MB)' % (n/2**20)
+            elif n > 2**10:
+                size = ' (%1.1f KB)' % (n/2**10)
+            elif n > 0:
+                size = ' (%d B)' % (int(n))
+        return size
+
+    def makeHtml(self):
+       """
+       """
+       flist = os.listdir(self.opts.output_dir)
+       flist = [x for x in flist if x <> 'Rplots.pdf']
+       flist.sort()
+       html = [galhtmlprefix % progname,]
+       html.append('<h2>Galaxy %s outputs run at %s</h2><br/>\n' % (self.toolname,timenow()))
+       fhtml = []
+       if len(flist) > 0:
+           html.append('<table cellpadding="3" cellspacing="3">\n')
+           for fname in flist:
+                dname,e = os.path.splitext(fname)
+                sfsize = self.getfSize(fname,self.opts.output_dir)
+                if e.lower() == '.pdf' : # compress and make a thumbnail
+                    thumb = '%s.%s' % (dname,self.thumbformat)
+                    pdff = os.path.join(self.opts.output_dir,fname)
+                    retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
+                    if retval == 0:
+                        s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600"></a></td></tr>\n' % (fname,thumb,fname)
+                        html.append(s)
+                    fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
+                else:
+                    fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
+           html.append('</table>\n')
+           if len(fhtml) > 0:
+               fhtml.insert(0,'<ul>')
+               fhtml.append('</ul>')
+               html += fhtml # add all non-pdf files to the end of the display
+       else:
+           html.append('<h2>### Error - %s returned no files - please confirm that parameters are sane</h1>' % self.opts.interpreter)
+           html.append('<h3>%s log follows below</h3><hr><pre>\n' % self.opts.interpreter)
+       rlog = open(self.tlog,'r').readlines()
+       html += rlog
+       html.append('%s CL = %s</br>\n' % (self.toolname,' '.join(sys.argv)))
+       html.append('CL = %s</br>\n' % (' '.join(self.cl)))
+       html.append('</pre>\n')
+       html.append(galhtmlattr % (progname,timenow()))
+       html.append(galhtmlpostfix)
+       htmlf = file(self.opts.output_html,'w')
+       htmlf.write('\n'.join(html))
+       htmlf.write('\n')
+       htmlf.close()
+       self.html = html
+
+
+    def run(self):
+        """
+        """
+        if self.opts.output_dir or self.opts.makeTool:
+            sto = open(self.tlog,'w')
+            p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
+        else:
+            p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)            
+        p.stdin.write(self.script)
+        p.stdin.close()
+        retval = p.wait()
+        if self.opts.output_dir or self.opts.makeTool:
+            sto.close()
+            self.makeHtml()
+        return retval
+  
+
+def main():
+    u = """
+    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
+    <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
+    </command>
+    """
+    op = optparse.OptionParser()
+    a = op.add_option
+    a('--script_path',default=None)
+    a('--tool_name',default=None)
+    a('--interpreter',default=None)
+    a('--output_dir',default=None)
+    a('--output_html',default=None)
+    a('--input_tab',default='NONE')
+    a('--output_tab',default='NONE')
+    a('--user_email',default=None)
+    a('--bad_user',default=None)
+    a('--makeTool',default=None)
+    opts, args = op.parse_args()
+    assert not opts.bad_user,'%s is NOT authorized to use this tool. Please ask your friendly admin' % opts.bad_user
+    assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
+    assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
+    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
+    if opts.output_dir:
+        try:
+            os.makedirs(opts.output_dir)
+        except:
+            pass
+    r = ScriptRunner(opts)
+    if opts.makeTool:
+        retcode = r.makeTooltar()
+    else:
+        retcode = r.run()
+    if retcode:
+        sys.exit(retcode) # indicate failure to job runner
+
+
+if __name__ == "__main__":
+    main()
+
+