view rgToolFactory.py @ 7:7221619caefa

Updated name and added crude gzip generator for toolshed TODO: add tests and new XML tool descriptor as soon as Greg has it nailed down.
author ross lazarus ross.lazarus@gmail.com
date Sat, 02 Jun 2012 10:43:08 +1000
parents
children 220885b2d7ee
line wrap: on
line source

# rgDynamicScriptWrapper.py
# derived from
# rgBaseScriptWrapper.py
# to run some user supplied code 
# extremely dangerous
# trusted users only - private site only
# a list in the xml is searched - only users in the list can run this tool.
# 
# copyright ross lazarus (ross.lazarus@gmail.com) May 2012
# 
# all rights reserved
# Licensed under the LGPL for your pleasure
# Derived from rgDGE.py in May 2012
# generalized to run required interpreter
# to make your own tools based on a given script and interpreter such as perl or python
# clone this and the corresponding xml wrapper
# replace the parameters/inputs/outputs and the configfile contents with your script
# Use the $foo syntax to place your parameter values inside the script to assign them - at run time, the script will be used as a template
# and returned as part of the output to the user - with the right values for all the parameters.
# Note that this assumes you want all the outputs arranged as a single Html file output 
# after this generic script runner runs your script with the specified interpreter,
# it will collect all output files into the specified output_html, making thumbnails for all the pdfs it finds and making links for all the other files.

import sys 
import shutil 
import subprocess 
import os 
import time 
import tempfile 
import optparse
import tarfile
import re
progname = os.path.split(sys.argv[0])[1] 
myversion = 'V000.1 May 2012' 
verbose = False 
debug = False


galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> 
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 
<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> 
<title></title> 
<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> 
</head> 
<body> 
<div class="document"> 
""" 
galhtmlattr = """<b><a href="http://rgenetics.org">Galaxy Rgenetics Base Script Wrapper based </a> tool output %s run at %s</b><br/>""" 
galhtmlpostfix = """</div></body></html>\n"""

def timenow():
    """return current time as a string
    """
    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
# characters that are allowed but need to be escaped
mapped_chars = { '>' :'__gt__',
                 '<' :'__lt__',
                 "'" :'__sq__',
                 '"' :'__dq__',
                 '{' :'__oc__',
                 '}' :'__cc__',
                 '@' : '__at__', 
                 '\n' : '__cn__',
                 '\r' : '__cr__',
                 '\t' : '__tc__',
                 '#' : '__pd__',
                 '[' :'__ob__',
                 ']' :'__cb__',
                 '\t' : 'Xt', 
                 'systemCallsAreNotAllowed' : 'system'
                 }

def restore_text(text):
    """Restores sanitized text"""  
    if not text:
        return text
    for key, value in mapped_chars.items():
        text = text.replace(value, key)
    return text
    
class ScriptRunner:
    """class is a wrapper for an arbitrary script
    """

    def __init__(self,opts=None):
        """
        run the script
        cheetah/galaxy will provide an escaped string so
        __pd__ your script goes here
        __cr____cn__ourargs __lt__- commandArgs(TRUE)
        __cr____cn__inf = ourargs[1]
        __cr____cn__outf = ourargs[2]
        __cr____cn__inp = read.table(inf,head=T,rownames=F,sep=__sq__Xt__sq__)
        __cr____cn__ write.table(inp,outf, quote=FALSE, sep=__dq__Xt__dq__,row.names=F)
        __cr____cn__sessionInfo()
        __cr____cn__
        """
        self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
        self.thumbformat = 'jpg'
        self.opts = opts
        self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name)
        s = open(self.opts.script_path,'r').read()
        self.script = restore_text(s)
        self.pyfile = self.myname
        self.xmlfile = '%s.xml' % os.path.splitext(self.pyfile)[0] # punt
        self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
        localscript = open(self.sfile,'w')
        localscript.write(self.script)
        localscript.close()
        if opts.output_dir or self.opts.makeTool: # may not want these complexities if a simple script
            self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
            artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname) 
            artifact = open(artifactpath,'w')
            artifact.write(self.script)
            artifact.write('\n')
            artifact.close()
        self.cl = []
        self.html = []
        a = self.cl.append
        a(opts.interpreter)
        a('-') # use stdin
        a(opts.input_tab)
        a(opts.output_tab)

    def makeTooltar(self):
        """
        a tool is a gz tarball with eg
        /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
        """
        retval = self.run()
        if retval:
            print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
            sys.exit(1)
        tarpath = os.path.join(self.opts.output_dir,"%s.gz" % self.toolname)
        tar = tarfile.open(tarpath, "w:gz")
        tar.add(self.xmlfile,arcname='%s.xml' % self.toolname)
        tar.add(self.pyfile,arcname=os.path.basename(self.pyfile))
        tar.add(self.sfile,arcname=self.sfile)
        tar.close()
        self.makeHtml()
        return retval

    def compressPDF(self,inpdf=None,thumbformat='png'):
        """need absolute path to pdf
        """
        assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
        hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
        sto = open(hlog,'w')
        outpdf = '%s_compressed' % inpdf
        cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
        x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
        retval1 = x.wait()
        if retval1 == 0:
            os.unlink(inpdf)
            shutil.move(outpdf,inpdf)
        outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
        cl2 = ['convert', inpdf, outpng]
        x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
        retval2 = x.wait()
        sto.close()
        retval = retval1 or retval2
        return retval


    def getfSize(self,fpath,outpath):
        """
        format a nice file size string
        """
        size = ''
        fp = os.path.join(outpath,fpath)
        if os.path.isfile(fp):
            n = float(os.path.getsize(fp))
            if n > 2**20:
                size = ' (%1.1f MB)' % (n/2**20)
            elif n > 2**10:
                size = ' (%1.1f KB)' % (n/2**10)
            elif n > 0:
                size = ' (%d B)' % (int(n))
        return size

    def makeHtml(self):
       """
       """
       flist = os.listdir(self.opts.output_dir)
       flist = [x for x in flist if x <> 'Rplots.pdf']
       flist.sort()
       html = [galhtmlprefix % progname,]
       html.append('<h2>Galaxy %s outputs run at %s</h2><br/>\n' % (self.toolname,timenow()))
       fhtml = []
       if len(flist) > 0:
           html.append('<table cellpadding="3" cellspacing="3">\n')
           for fname in flist:
                dname,e = os.path.splitext(fname)
                sfsize = self.getfSize(fname,self.opts.output_dir)
                if e.lower() == '.pdf' : # compress and make a thumbnail
                    thumb = '%s.%s' % (dname,self.thumbformat)
                    pdff = os.path.join(self.opts.output_dir,fname)
                    retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
                    if retval == 0:
                        s= '<tr><td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="10" width="600"></a></td></tr>\n' % (fname,thumb,fname)
                        html.append(s)
                    fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
                else:
                    fhtml.append('<li><a href="%s">%s %s</a></li>' % (fname,fname,sfsize))
           html.append('</table>\n')
           if len(fhtml) > 0:
               fhtml.insert(0,'<ul>')
               fhtml.append('</ul>')
               html += fhtml # add all non-pdf files to the end of the display
       else:
           html.append('<h2>### Error - %s returned no files - please confirm that parameters are sane</h1>' % self.opts.interpreter)
           html.append('<h3>%s log follows below</h3><hr><pre>\n' % self.opts.interpreter)
       rlog = open(self.tlog,'r').readlines()
       html += rlog
       html.append('%s CL = %s</br>\n' % (self.toolname,' '.join(sys.argv)))
       html.append('CL = %s</br>\n' % (' '.join(self.cl)))
       html.append('</pre>\n')
       html.append(galhtmlattr % (progname,timenow()))
       html.append(galhtmlpostfix)
       htmlf = file(self.opts.output_html,'w')
       htmlf.write('\n'.join(html))
       htmlf.write('\n')
       htmlf.close()
       self.html = html


    def run(self):
        """
        """
        if self.opts.output_dir or self.opts.makeTool:
            sto = open(self.tlog,'w')
            p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
        else:
            p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)            
        p.stdin.write(self.script)
        p.stdin.close()
        retval = p.wait()
        if self.opts.output_dir or self.opts.makeTool:
            sto.close()
            self.makeHtml()
        return retval
  

def main():
    u = """
    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
    <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
    </command>
    """
    op = optparse.OptionParser()
    a = op.add_option
    a('--script_path',default=None)
    a('--tool_name',default=None)
    a('--interpreter',default=None)
    a('--output_dir',default=None)
    a('--output_html',default=None)
    a('--input_tab',default='NONE')
    a('--output_tab',default='NONE')
    a('--user_email',default=None)
    a('--bad_user',default=None)
    a('--makeTool',default=None)
    opts, args = op.parse_args()
    assert not opts.bad_user,'%s is NOT authorized to use this tool. Please ask your friendly admin' % opts.bad_user
    assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
    assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
    if opts.output_dir:
        try:
            os.makedirs(opts.output_dir)
        except:
            pass
    r = ScriptRunner(opts)
    if opts.makeTool:
        retcode = r.makeTooltar()
    else:
        retcode = r.run()
    if retcode:
        sys.exit(retcode) # indicate failure to job runner


if __name__ == "__main__":
    main()