Mercurial > repos > mvdbeek > docker_scriptrunner
comparison scriptrunner.py @ 0:b6211faea403 draft
planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit ae672027942a606c1a5e302348279a5493151c11-dirty
| author | mvdbeek |
|---|---|
| date | Fri, 08 Jul 2016 15:09:10 -0400 |
| parents | |
| children | 813b55d27809 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b6211faea403 |
|---|---|
| 1 # DockerToolFactory.py | |
| 2 # see https://github.com/mvdbeek/scriptrunner | |
| 3 | |
| 4 import sys | |
| 5 import shutil | |
| 6 import subprocess | |
| 7 import os | |
| 8 import time | |
| 9 import tempfile | |
| 10 import argparse | |
| 11 import getpass | |
| 12 import tarfile | |
| 13 import re | |
| 14 import shutil | |
| 15 import math | |
| 16 import fileinput | |
| 17 from os.path import abspath | |
| 18 | |
| 19 | |
| 20 progname = os.path.split(sys.argv[0])[1] | |
| 21 verbose = False | |
| 22 debug = False | |
| 23 | |
| 24 def timenow(): | |
| 25 """return current time as a string | |
| 26 """ | |
| 27 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) | |
| 28 | |
| 29 html_escape_table = { | |
| 30 "&": "&", | |
| 31 ">": ">", | |
| 32 "<": "<", | |
| 33 "$": "\$" | |
| 34 } | |
| 35 | |
| 36 def html_escape(text): | |
| 37 """Produce entities within text.""" | |
| 38 return "".join(html_escape_table.get(c,c) for c in text) | |
| 39 | |
| 40 def cmd_exists(cmd): | |
| 41 return subprocess.call("type " + cmd, shell=True, | |
| 42 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 | |
| 43 | |
| 44 def construct_bind(host_path, container_path=False, binds=None, ro=True): | |
| 45 #TODO remove container_path if it's alwyas going to be the same as host_path | |
| 46 '''build or extend binds dictionary with container path. binds is used | |
| 47 to mount all files using the docker-py client.''' | |
| 48 if not binds: | |
| 49 binds={} | |
| 50 if isinstance(host_path, list): | |
| 51 for k,v in enumerate(host_path): | |
| 52 if not container_path: | |
| 53 container_path=host_path[k] | |
| 54 binds[host_path[k]]={'bind':container_path, 'ro':ro} | |
| 55 container_path=False #could be more elegant | |
| 56 return binds | |
| 57 else: | |
| 58 if not container_path: | |
| 59 container_path=host_path | |
| 60 binds[host_path]={'bind':container_path, 'ro':ro} | |
| 61 return binds | |
| 62 | |
| 63 def switch_to_docker(opts): | |
| 64 import docker #need local import, as container does not have docker-py | |
| 65 current_user = getpass.getuser() | |
| 66 docker_client=docker.Client() | |
| 67 toolfactory_path=abspath(sys.argv[0]) | |
| 68 binds=construct_bind(host_path=opts.script_path, ro=False) | |
| 69 binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False) | |
| 70 if len(opts.input_tab)>0: | |
| 71 binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True) | |
| 72 if not opts.output_tab == 'None': | |
| 73 binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False) | |
| 74 if opts.make_HTML: | |
| 75 binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False) | |
| 76 binds=construct_bind(binds=binds, host_path=toolfactory_path) | |
| 77 volumes=binds.keys() | |
| 78 sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir | |
| 79 cmd=['python', '-u']+sys.argv+['--dockerized', '1'] | |
| 80 container=docker_client.create_container( | |
| 81 image=opts.docker_image, # Make this configureable through job_conf | |
| 82 user=current_user, # TODO: make this configurable on the current user | |
| 83 volumes=volumes, | |
| 84 command=cmd | |
| 85 ) | |
| 86 docker_client.start(container=container[u'Id'], binds=binds) | |
| 87 docker_client.wait(container=container[u'Id']) | |
| 88 logs=docker_client.logs(container=container[u'Id']) | |
| 89 print "".join([log for log in logs]) | |
| 90 docker_client.remove_container(container[u'Id']) | |
| 91 | |
| 92 class ScriptRunner: | |
| 93 """class is a wrapper for an arbitrary script | |
| 94 """ | |
| 95 | |
| 96 def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'): | |
| 97 """ | |
| 98 cleanup inputs, setup some outputs | |
| 99 | |
| 100 """ | |
| 101 self.opts = opts | |
| 102 self.scriptname = 'script' | |
| 103 self.useGM = cmd_exists('gm') | |
| 104 self.useIM = cmd_exists('convert') | |
| 105 self.useGS = cmd_exists('gs') | |
| 106 self.temp_warned = False # we want only one warning if $TMP not set | |
| 107 self.treatbashSpecial = treatbashSpecial | |
| 108 self.image_tag = image_tag | |
| 109 os.chdir(abspath(opts.output_dir)) | |
| 110 self.thumbformat = 'png' | |
| 111 s = open(self.opts.script_path,'r').readlines() | |
| 112 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed | |
| 113 self.script = '\n'.join(s) | |
| 114 fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter)) | |
| 115 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen | |
| 116 tscript.write(self.script) | |
| 117 tscript.close() | |
| 118 self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help | |
| 119 self.escapedScript = '\n'.join([html_escape(x) for x in s]) | |
| 120 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname) | |
| 121 if opts.output_dir: # may not want these complexities | |
| 122 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname) | |
| 123 art = '%s.%s' % (self.scriptname,opts.interpreter) | |
| 124 artpath = os.path.join(self.opts.output_dir,art) # need full path | |
| 125 artifact = open(artpath,'w') # use self.sfile as script source for Popen | |
| 126 artifact.write(self.script) | |
| 127 artifact.close() | |
| 128 self.cl = [] | |
| 129 self.html = [] | |
| 130 a = self.cl.append | |
| 131 a(opts.interpreter) | |
| 132 if self.treatbashSpecial and opts.interpreter in ['bash','sh']: | |
| 133 a(self.sfile) | |
| 134 else: | |
| 135 a('-') # stdin | |
| 136 for input in opts.input_tab: | |
| 137 a(input) | |
| 138 if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname | |
| 139 a(str(self.scriptname)+'.out') | |
| 140 a(opts.output_tab) | |
| 141 for param in opts.additional_parameters: | |
| 142 param, value=param.split(',') | |
| 143 a('--'+param) | |
| 144 a(value) | |
| 145 self.outFormats = opts.output_format | |
| 146 self.inputFormats = [formats for formats in opts.input_formats] | |
| 147 self.test1Input = '%s_test1_input.xls' % self.scriptname | |
| 148 self.test1Output = '%s_test1_output.xls' % self.scriptname | |
| 149 self.test1HTML = '%s_test1_output.html' % self.scriptname | |
| 150 | |
| 151 | |
| 152 def compressPDF(self,inpdf=None,thumbformat='png'): | |
| 153 """need absolute path to pdf | |
| 154 note that GS gets confoozled if no $TMP or $TEMP | |
| 155 so we set it | |
| 156 """ | |
| 157 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) | |
| 158 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf)) | |
| 159 sto = open(hlog,'a') | |
| 160 our_env = os.environ.copy() | |
| 161 our_tmp = our_env.get('TMP',None) | |
| 162 if not our_tmp: | |
| 163 our_tmp = our_env.get('TEMP',None) | |
| 164 if not (our_tmp and os.path.exists(our_tmp)): | |
| 165 newtmp = os.path.join(self.opts.output_dir,'tmp') | |
| 166 try: | |
| 167 os.mkdir(newtmp) | |
| 168 except: | |
| 169 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp) | |
| 170 our_env['TEMP'] = newtmp | |
| 171 if not self.temp_warned: | |
| 172 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp) | |
| 173 self.temp_warned = True | |
| 174 outpdf = '%s_compressed' % inpdf | |
| 175 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] | |
| 176 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
| 177 retval1 = x.wait() | |
| 178 sto.close() | |
| 179 if retval1 == 0: | |
| 180 os.unlink(inpdf) | |
| 181 shutil.move(outpdf,inpdf) | |
| 182 os.unlink(hlog) | |
| 183 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf)) | |
| 184 sto = open(hlog,'w') | |
| 185 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) | |
| 186 # if self.useGM: | |
| 187 # cl2 = ['gm', 'convert', inpdf, outpng] | |
| 188 # else: # assume imagemagick | |
| 189 cl2 = ['convert', inpdf, outpng] | |
| 190 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
| 191 retval2 = x.wait() | |
| 192 sto.close() | |
| 193 if retval2 == 0: | |
| 194 os.unlink(hlog) | |
| 195 retval = retval1 or retval2 | |
| 196 return retval | |
| 197 | |
| 198 | |
| 199 def getfSize(self,fpath,outpath): | |
| 200 """ | |
| 201 format a nice file size string | |
| 202 """ | |
| 203 size = '' | |
| 204 fp = os.path.join(outpath,fpath) | |
| 205 if os.path.isfile(fp): | |
| 206 size = '0 B' | |
| 207 n = float(os.path.getsize(fp)) | |
| 208 if n > 2**20: | |
| 209 size = '%1.1f MB' % (n/2**20) | |
| 210 elif n > 2**10: | |
| 211 size = '%1.1f KB' % (n/2**10) | |
| 212 elif n > 0: | |
| 213 size = '%d B' % (int(n)) | |
| 214 return size | |
| 215 | |
| 216 def makeHtml(self): | |
| 217 """ Create an HTML file content to list all the artifacts found in the output_dir | |
| 218 """ | |
| 219 | |
| 220 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
| 221 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
| 222 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
| 223 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> | |
| 224 <title></title> | |
| 225 <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> | |
| 226 </head> | |
| 227 <body> | |
| 228 <div class="toolFormBody"> | |
| 229 """ | |
| 230 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" | |
| 231 galhtmlpostfix = """</div></body></html>\n""" | |
| 232 | |
| 233 flist = os.listdir(self.opts.output_dir) | |
| 234 flist = [x for x in flist if x <> 'Rplots.pdf'] | |
| 235 flist.sort() | |
| 236 html = [] | |
| 237 html.append(galhtmlprefix % progname) | |
| 238 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow())) | |
| 239 fhtml = [] | |
| 240 if len(flist) > 0: | |
| 241 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections | |
| 242 logfiles.sort() | |
| 243 logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)] | |
| 244 logfiles.append(abspath(self.tlog)) # make it the last one | |
| 245 pdflist = [] | |
| 246 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) | |
| 247 for rownum,fname in enumerate(flist): | |
| 248 dname,e = os.path.splitext(fname) | |
| 249 sfsize = self.getfSize(fname,self.opts.output_dir) | |
| 250 if e.lower() == '.pdf' : # compress and make a thumbnail | |
| 251 thumb = '%s.%s' % (dname,self.thumbformat) | |
| 252 pdff = os.path.join(self.opts.output_dir,fname) | |
| 253 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) | |
| 254 if retval == 0: | |
| 255 pdflist.append((fname,thumb)) | |
| 256 else: | |
| 257 pdflist.append((fname,fname)) | |
| 258 if (rownum+1) % 2 == 0: | |
| 259 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
| 260 else: | |
| 261 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
| 262 for logfname in logfiles: # expect at least tlog - if more | |
| 263 if abspath(logfname) == abspath(self.tlog): # handled later | |
| 264 sectionname = 'All tool run' | |
| 265 if (len(logfiles) > 1): | |
| 266 sectionname = 'Other' | |
| 267 ourpdfs = pdflist | |
| 268 else: | |
| 269 realname = os.path.basename(logfname) | |
| 270 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log | |
| 271 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] | |
| 272 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove | |
| 273 nacross = 1 | |
| 274 npdf = len(ourpdfs) | |
| 275 | |
| 276 if npdf > 0: | |
| 277 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) | |
| 278 if int(nacross)**2 != npdf: | |
| 279 nacross += 1 | |
| 280 nacross = int(nacross) | |
| 281 width = min(400,int(1200/nacross)) | |
| 282 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) | |
| 283 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') | |
| 284 ntogo = nacross # counter for table row padding with empty cells | |
| 285 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') | |
| 286 for i,paths in enumerate(ourpdfs): | |
| 287 fname,thumb = paths | |
| 288 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" | |
| 289 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) | |
| 290 if ((i+1) % nacross == 0): | |
| 291 s += '</tr>\n' | |
| 292 ntogo = 0 | |
| 293 if i < (npdf - 1): # more to come | |
| 294 s += '<tr>' | |
| 295 ntogo = nacross | |
| 296 else: | |
| 297 ntogo -= 1 | |
| 298 html.append(s) | |
| 299 if html[-1].strip().endswith('</tr>'): | |
| 300 html.append('</table></div>\n') | |
| 301 else: | |
| 302 if ntogo > 0: # pad | |
| 303 html.append('<td> </td>'*ntogo) | |
| 304 html.append('</tr></table></div>\n') | |
| 305 logt = open(logfname,'r').readlines() | |
| 306 logtext = [x for x in logt if x.strip() > ''] | |
| 307 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) | |
| 308 if len(logtext) > 1: | |
| 309 html.append('\n<pre>\n') | |
| 310 html += logtext | |
| 311 html.append('\n</pre>\n') | |
| 312 else: | |
| 313 html.append('%s is empty<br/>' % logfname) | |
| 314 if len(fhtml) > 0: | |
| 315 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') | |
| 316 fhtml.append('</table></div><br/>') | |
| 317 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') | |
| 318 html += fhtml # add all non-pdf files to the end of the display | |
| 319 else: | |
| 320 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) | |
| 321 html.append(galhtmlpostfix) | |
| 322 htmlf = file(self.opts.output_html,'w') | |
| 323 htmlf.write('\n'.join(html)) | |
| 324 htmlf.write('\n') | |
| 325 htmlf.close() | |
| 326 self.html = html | |
| 327 | |
| 328 | |
| 329 def run(self): | |
| 330 """ | |
| 331 scripts must be small enough not to fill the pipe! | |
| 332 """ | |
| 333 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']: | |
| 334 retval = self.runBash() | |
| 335 else: | |
| 336 if self.opts.output_dir: | |
| 337 ste = open(self.elog,'w') | |
| 338 sto = open(self.tlog,'w') | |
| 339 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) | |
| 340 sto.flush() | |
| 341 print("commandline is %s" % (self.cl)) | |
| 342 print("environment is %s" % (os.environ)) | |
| 343 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir) | |
| 344 else: | |
| 345 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE) | |
| 346 p.stdin.write(self.script) | |
| 347 p.stdin.close() | |
| 348 retval = p.wait() | |
| 349 if self.opts.output_dir: | |
| 350 sto.close() | |
| 351 ste.close() | |
| 352 err = open(self.elog,'r').readlines() | |
| 353 if retval <> 0 and err: # problem | |
| 354 print >> sys.stderr,err #same problem, need to capture docker stdin/stdout | |
| 355 if self.opts.make_HTML: | |
| 356 self.makeHtml() | |
| 357 return retval | |
| 358 | |
| 359 def runBash(self): | |
| 360 """ | |
| 361 cannot use - for bash so use self.sfile | |
| 362 """ | |
| 363 if self.opts.output_dir: | |
| 364 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) | |
| 365 sto = open(self.tlog,'w') | |
| 366 sto.write(s) | |
| 367 sto.flush() | |
| 368 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir) | |
| 369 else: | |
| 370 p = subprocess.Popen(self.cl,shell=False) | |
| 371 retval = p.wait() | |
| 372 if self.opts.output_dir: | |
| 373 sto.close() | |
| 374 if self.opts.make_HTML: | |
| 375 self.makeHtml() | |
| 376 return retval | |
| 377 | |
| 378 | |
| 379 def main(): | |
| 380 u = """ | |
| 381 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: | |
| 382 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" | |
| 383 </command> | |
| 384 """ | |
| 385 op = argparse.ArgumentParser() | |
| 386 a = op.add_argument | |
| 387 a('--docker_image',default=None) | |
| 388 a('--script_path',default=None) | |
| 389 a('--tool_name',default=None) | |
| 390 a('--interpreter',default=None) | |
| 391 a('--output_dir',default='./') | |
| 392 a('--output_html',default=None) | |
| 393 a('--input_tab',default='None', nargs='*') | |
| 394 a('--output_tab',default='None') | |
| 395 a('--user_email',default='Unknown') | |
| 396 a('--bad_user',default=None) | |
| 397 a('--make_HTML',default=None) | |
| 398 a('--new_tool',default=None) | |
| 399 a('--dockerized',default=0) | |
| 400 a('--output_format', default='tabular') | |
| 401 a('--input_format', dest='input_formats', action='append', default=[]) | |
| 402 a('--additional_parameters', dest='additional_parameters', action='append', default=[]) | |
| 403 opts = op.parse_args() | |
| 404 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) | |
| 405 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' | |
| 406 if opts.output_dir: | |
| 407 try: | |
| 408 os.makedirs(opts.output_dir) | |
| 409 except: | |
| 410 pass | |
| 411 if opts.dockerized==0: | |
| 412 switch_to_docker(opts) | |
| 413 return | |
| 414 r = ScriptRunner(opts) | |
| 415 retcode = r.run() | |
| 416 os.unlink(r.sfile) | |
| 417 if retcode: | |
| 418 sys.exit(retcode) # indicate failure to job runner | |
| 419 | |
| 420 | |
| 421 if __name__ == "__main__": | |
| 422 main() |
