2
+ − 1 """
+ − 2 April 2013
+ − 3 eeesh GSEA does NOT respect the mode flag!
+ − 4
+ − 5 Now realise that the creation of the input rank file for gsea needs to take the lowest p value for duplicate
+ − 6 feature names. To make Ish's life easier, remove duplicate gene ids from any gene set to stop GSEA from
+ − 7 barfing.
+ − 8
+ − 9 October 14 2012
+ − 10 Amazingly long time to figure out that GSEA fails with useless error message if any filename contains a dash "-"
+ − 11 eesh.
+ − 12
+ − 13 Added history .gmt source - requires passing a faked name to gsea
+ − 14 Wrapper for GSEA http://www.broadinstitute.org/gsea/index.jsp
+ − 15 Started Feb 22
+ − 16 Copyright 2012 Ross Lazarus
+ − 17 All rights reserved
+ − 18 Licensed under the LGPL
+ − 19
+ − 20 called eg as
+ − 21
+ − 22 #!/bin/sh
+ − 23 GALAXY_LIB="/data/extended/galaxy/lib"
+ − 24 if [ "$GALAXY_LIB" != "None" ]; then
+ − 25 if [ -n "$PYTHONPATH" ]; then
+ − 26 PYTHONPATH="$GALAXY_LIB:$PYTHONPATH"
+ − 27 else
+ − 28 PYTHONPATH="$GALAXY_LIB"
+ − 29 fi
+ − 30 export PYTHONPATH
+ − 31 fi
+ − 32
+ − 33 cd /data/extended/galaxy/database/job_working_directory/027/27311
+ − 34 python /data/extended/galaxy/tools/rgenetics/rgGSEA.py --input_tab "/data/extended/galaxy/database/files/033/dataset_33806.dat" --adjpvalcol "5" --signcol "2"
+ − 35 --idcol "1" --outhtml "/data/extended/galaxy/database/files/034/dataset_34455.dat" --input_name "actaearly-Controlearly-actalate-Controllate_topTable.xls"
+ − 36 --setMax "500" --setMin "15" --nPerm "1000" --plotTop "20"
+ − 37 --gsea_jar "/data/extended/galaxy/tool-data/shared/jars/gsea2-2.0.12.jar"
+ − 38 --output_dir "/data/extended/galaxy/database/job_working_directory/027/27311/dataset_34455_files" --mode "Max_probe"
+ − 39 --title " actaearly-Controlearly-actalate-Controllate_interpro_GSEA" --builtin_gmt "/data/genomes/gsea/3.1/IPR_DOMAIN.gmt"
+ − 40
+ − 41
+ − 42 """
+ − 43 import optparse
+ − 44 import tempfile
+ − 45 import os
+ − 46 import sys
+ − 47 import subprocess
+ − 48 import time
+ − 49 import shutil
+ − 50 import glob
+ − 51 import math
+ − 52 import re
+ − 53
+ − 54 KEEPSELECTION = False # detailed records for selection of multiple probes
+ − 55
+ − 56 def timenow():
+ − 57 """return current time as a string
+ − 58 """
+ − 59 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+ − 60
+ − 61
+ − 62
+ − 63 def fix_subdir(adir,destdir):
+ − 64 """ Galaxy wants everything in the same files_dir
+ − 65 if os.path.exists(adir):
+ − 66 for (d,dirs,files) in os.path.walk(adir):
+ − 67 for f in files:
+ − 68 sauce = os.path.join(d,f)
+ − 69 shutil.copy(sauce,destdir)
+ − 70 """
+ − 71
+ − 72 def fixAffycrap(apath=''):
+ − 73 """class='richTable'>RUNNING ES</th><th class='richTable'>CORE ENRICHMENT</th><tr><td class='lessen'>1</td>
+ − 74 <td><a href='https://www.affymetrix.com/LinkServlet?probeset=LBR'>LBR</a></td><td></td><td></td><td>1113</td>
+ − 75 <td>0.194</td><td>-0.1065</td><td>No</td></tr><tr><td class='lessen'>2</td><td>
+ − 76 <a href='https://www.affymetrix.com/LinkServlet?probeset=GGPS1'>GGPS1</a></td><td></td><td></td><td>4309</td><td>0.014</td><td>-0.4328</td>
+ − 77 <td>No</td></tr>
+ − 78 """
+ − 79 html = []
+ − 80 try:
+ − 81 html = open(apath,'r').readlines()
+ − 82 except:
+ − 83 return html
+ − 84 for i,row in enumerate(html):
+ − 85 row = re.sub('https\:\/\/www.affymetrix.com\/LinkServlet\?probeset=',"http://www.genecards.org/index.php?path=/Search/keyword/",row)
+ − 86 html[i] = row
+ − 87 return html
+ − 88
+ − 89 cleanup = False
+ − 90 if os.path.exists(adir):
+ − 91 flist = os.listdir(adir) # get all files created
+ − 92 for f in flist:
+ − 93 apath = os.path.join(adir,f)
+ − 94 dest = os.path.join(destdir,f)
+ − 95 if not os.path.isdir(apath):
+ − 96 if os.path.splitext(f)[1].lower() == '.html':
+ − 97 html = fixAffycrap(apath)
+ − 98 fixed = open(apath,'w')
+ − 99 fixed.write('\n'.join(html))
+ − 100 fixed.write('\n')
+ − 101 fixed.close()
+ − 102 if not os.path.isfile(dest):
+ − 103 shutil.copy(apath,dest)
+ − 104 else:
+ − 105 fix_subdir(apath,destdir)
+ − 106 if cleanup:
+ − 107 try:
+ − 108 shutil.rmtree(path=adir,ignore_errors=True)
+ − 109 except:
+ − 110 pass
+ − 111
+ − 112
+ − 113
+ − 114 def getFileString(fpath, outpath):
+ − 115 """
+ − 116 format a nice file size string
+ − 117 """
+ − 118 size = ''
+ − 119 fp = os.path.join(outpath, fpath)
+ − 120 s = fpath
+ − 121 if os.path.isfile(fp):
+ − 122 n = float(os.path.getsize(fp))
+ − 123 if n > 2**20:
+ − 124 size = ' (%1.1f MB)' % (n/2**20)
+ − 125 elif n > 2**10:
+ − 126 size = ' (%1.1f KB)' % (n/2**10)
+ − 127 elif n > 0:
+ − 128 size = ' (%d B)' % (int(n))
+ − 129 s = '%s %s' % (fpath, size)
+ − 130 return s
+ − 131
+ − 132 class gsea_wrapper:
+ − 133 """
+ − 134 GSEA java desktop client has a CL interface. CL can be gleaned by clicking the 'command line' button after setting up an analysis
+ − 135 We don't want gsea to do the analysis but it can read .rnk files containing rows of identifiers and an evidence weight such as the signed t statistic from limma for differential expression
+ − 136 (vgalaxy)rlazarus@iaas1:~/public_html/idle_illumina_analysis$ cat gseaHumanREFSEQ.sh
+ − 137 #!/bin/bash
+ − 138 for RNK in `ls *.rnk`
+ − 139 do
+ − 140 DIRNAME=${RNK%.*}
+ − 141 echo $DIRNAME
+ − 142 qsub -cwd -b y java -Xmx4096m -cp /data/app/bin/gsea2-2.07.jar xtools.gsea.GseaPreranked -gmx ../msigdb.v3.0.symbols.gmt -collapse true -mode Max_probe -norm meandiv
+ − 143 -nperm 1000 -rnk $RNK -scoring_scheme weighted -rpt_label $RNK -chip ../RefSeq_human.chip -include_only_symbols true -make_sets true -plot_top_x 20 -rnd_seed timestamp
+ − 144 -set_max 500 -set_min 15 -zip_report false -out gseaout/${DIRNAME} -gui false
+ − 145 done
+ − 146 """
+ − 147
+ − 148 def __init__(self,myName=None,opts=None):
+ − 149 """ setup cl for gsea
+ − 150 """
+ − 151 self.idcol = 0
+ − 152 self.signcol = 0
+ − 153 self.adjpvalcol = 0
+ − 154 self.progname=myName
+ − 155 self.opts = opts
+ − 156 remove_duplicates=True
+ − 157 if not os.path.isdir(opts.output_dir):
+ − 158 try:
+ − 159 os.makedirs(opts.output_dir)
+ − 160 except:
+ − 161 print >> sys.stderr,'##Error: GSEA wrapper unable to create or find output directory %s. Stopping' % (opts.output_dir)
+ − 162 sys.exit(1)
+ − 163 fakeGMT = re.sub('[^a-zA-Z0-9_]+', '', opts.input_name) # gives a more useful title for the GSEA report
+ − 164 fakeGMT = os.path.join(opts.output_dir,fakeGMT)
+ − 165 fakeRanks = '%s.rnk' % fakeGMT
+ − 166 if not fakeGMT.endswith('.gmt'):
+ − 167 fakeGMT = '%s.gmt' % fakeGMT
+ − 168 if opts.builtin_gmt and opts.history_gmt:
+ − 169 newfile = open(fakeGMT,'w')
+ − 170 subprocess.call(['cat',opts.builtin_gmt,opts.history_gmt],stdout=newfile)
+ − 171 newfile.close()
+ − 172 elif opts.history_gmt:
+ − 173 subprocess.call(['cp',opts.history_gmt,fakeGMT])
+ − 174 else:
+ − 175 subprocess.call(['cp',opts.builtin_gmt,fakeGMT])
+ − 176 # remove dupes from each gene set
+ − 177 gmt = open(fakeGMT,'r').readlines()
+ − 178 gmt = [x for x in gmt if len(x.split('\t')) > 3]
+ − 179 ugmt = []
+ − 180 for i,row in enumerate(gmt):
+ − 181 rows = row.rstrip().split('\t')
+ − 182 gmtname = rows[0]
+ − 183 gmtcomment = rows[1]
+ − 184 glist = list(set(rows[2:]))
+ − 185 newgmt = [gmtname,gmtcomment]
+ − 186 newgmt += glist
+ − 187 ugmt.append('\t'.join(newgmt))
+ − 188 gmt = open(fakeGMT,'w')
+ − 189 gmt.write('\n'.join(ugmt))
+ − 190 gmt.write('\n')
+ − 191 gmt.close()
+ − 192 if opts.input_ranks:
+ − 193 infname = opts.input_ranks
+ − 194 rdat = open(opts.input_ranks,'r').readlines() # suck in and remove blank ids that cause gsea to barf rml april 10 2012
+ − 195 rdat = [x.rstrip().split('\t') for x in rdat[1:]] # ignore head
+ − 196 dat = [[x[0],x[1],x[1]] for x in rdat]
+ − 197 # fake same structure as input tabular file
+ − 198 try:
+ − 199 pvals = [float(x[1]) for x in dat]
+ − 200 signs = [float(x[1]) for x in dat]
+ − 201 except:
+ − 202 print >> sys.stderr, '## error converting floating point - cannot process this input'
+ − 203 sys.exit(99)
+ − 204 else: # read tabular
+ − 205 self.idcol = int(opts.idcol) - 1
+ − 206 self.signcol = int(opts.signcol) - 1
+ − 207 self.adjpvalcol = int(opts.adjpvalcol) - 1
+ − 208 maxcol = max(self.idcol,self.signcol,self.adjpvalcol)
+ − 209 infname = opts.input_tab
+ − 210 indat = open(opts.input_tab,'r').readlines()
+ − 211 dat = [x.rstrip().split('\t') for x in indat[1:]]
+ − 212 dat = [x for x in dat if len(x) > maxcol]
+ − 213 dat = [[x[self.idcol],x[self.adjpvalcol],x[self.signcol]] for x in dat] # reduce to rank form
+ − 214 pvals = [float(x[1]) for x in dat]
+ − 215 outofrange = [x for x in pvals if ((x < 0.0) or (x > 1.0))]
+ − 216 assert len(outofrange) == 0, '## p values outside 0-1 encountered - was that the right column for adjusted p value?'
+ − 217 signs = [float(x[2]) for x in dat]
+ − 218 outofrange = [i for i,x in enumerate(signs) if (not x) and (dat[i][self.signcol] <> '0')]
+ − 219 bad = [dat[x][2] for x in outofrange]
+ − 220 assert len(outofrange) == 0, '## null numeric values encountered for sign - was that the right column? %s' % bad
+ − 221 ids = [x[0] for x in dat]
+ − 222 res = []
+ − 223 self.comments = []
+ − 224 useme = []
+ − 225 for i,row in enumerate(dat):
+ − 226 if row[1].upper() != 'NA' and row[2].upper() != 'NA' and row[0] != '' :
+ − 227 useme.append(i)
+ − 228 lost = len(dat) - len(useme)
+ − 229 if lost <> 0:
+ − 230 newdat = [dat[x] for x in useme]
+ − 231 del dat
+ − 232 dat = newdat
+ − 233 print >> sys.stdout, '## %d lost - NA values or null id' % lost
+ − 234 if remove_duplicates:
+ − 235 uids = list(set(ids)) # complex procedure to get min pval for each unique id
+ − 236 if len(uids) <> len(ids): # dupes - deal with mode
+ − 237 print >> sys.stdout,'## Dealing with %d uids in %d ids' % (len(uids),len(ids))
+ − 238 ures = {}
+ − 239 for i,id in enumerate(ids):
+ − 240 p = pvals[i]
+ − 241 ures.setdefault(id,[])
+ − 242 ures[id].append((p,signs[i]))
+ − 243 for id in uids:
+ − 244 tlist = ures[id]
+ − 245 tp = [x[0] for x in tlist]
+ − 246 ts = [x[1] for x in tlist]
+ − 247 if len(tp) == 1:
+ − 248 p = tp[0]
+ − 249 sign = ts[0]
+ − 250 else:
+ − 251 if opts.mode == "Max_probe":
+ − 252 p = min(tp)
+ − 253 sign = ts[tp.index(p)]
+ − 254 else: # guess median - too bad if even count
+ − 255 tp.sort()
+ − 256 ltp = len(tp)
+ − 257 ind = ltp/2 # yes, this is wrong for evens but what if sign wobbles?
+ − 258 if ltp % 2 == 1: # odd
+ − 259 ind += 1 # take the median
+ − 260 p = tp[ind]
+ − 261 sign = ts[ind]
+ − 262 if KEEPSELECTION:
+ − 263 self.comments.append('## for id=%s, got tp=%s, ts=%s, chose p=%f, sign =%f'\
+ − 264 % (id,str(tp),str(ts),p,sign))
+ − 265 if opts.input_ranks: # must be a rank file
+ − 266 res.append((id,'%f' % p))
+ − 267 else:
+ − 268 if p == 0.0:
+ − 269 p = 1e-99
+ − 270 try:
+ − 271 lp = -math.log10(p) # large positive if low p value
+ − 272 except ValueError:
+ − 273 lp = 0.0
+ − 274 if sign < 0:
+ − 275 lp = -lp # if negative, swap p to negative
+ − 276 res.append((id,'%f' % lp))
+ − 277 else: # no duplicates
+ − 278 for i,row in enumerate(dat):
+ − 279 (id,p,sign) = (row[0],float(row[1]),float(row[2]))
+ − 280 if opts.input_ranks: # must be a rank file
+ − 281 res.append((id,'%f' % p))
+ − 282 else:
+ − 283 if p == 0.0:
+ − 284 p = 1e-99
+ − 285 try:
+ − 286 lp = -math.log10(p) # large positive if low p value
+ − 287 except ValueError:
+ − 288 lp = 0.0
+ − 289 if sign < 0:
+ − 290 lp = -lp # if negative, swap p to negative
+ − 291 res.append((id,'%f' % lp))
+ − 292 else:
+ − 293 for i,row in enumerate(dat):
+ − 294 (id,p,sign) = (row[0],float(row[1]),float(row[2]))
+ − 295 if opts.input_ranks: # must be a rank file
+ − 296 res.append((id,'%f' % p))
+ − 297 else:
+ − 298 if p == 0.0:
+ − 299 p = 1e-99
+ − 300 try:
+ − 301 lp = -math.log10(p) # large positive if low p value
+ − 302 except ValueError:
+ − 303 lp = 0.0
+ − 304 if sign < 0:
+ − 305 lp = -lp # if negative, swap p to negative
+ − 306 res.append((id,'%f' % lp))
+ − 307 len1 = len(ids)
+ − 308 ranks = res # [x for x in res if len(x[0]) > 0 and len(x[1].strip()) > 0 and x[0].upper() <> 'NA']
+ − 309 # rd = dict(zip([x[0] for x in res],res)) # this does something mysterious to dupes - probably keeps last one
+ − 310 # ranks = rd.values()
+ − 311 len2 = len(ranks)
+ − 312 delta = len1 - len2
+ − 313 if delta <> 0:
+ − 314 print >> sys.stdout,'NOTE: %d of %d rank input file %s rows deleted - dup, null or NA IDs, pvals or logFCs' % (delta,len1,infname)
+ − 315 ranks = [(float(x[1]),x) for x in ranks] # decorate
+ − 316 ranks.sort()
+ − 317 ranks.reverse()
+ − 318 ranks = [x[1] for x in ranks] # undecorate
+ − 319 if opts.chip == '': # if mouse - need HUGO
+ − 320 ranks = [[x[0].upper(),x[1]] for x in ranks]
+ − 321 print >> sys.stdout, '## Fixed any lower case - now have',','.join([x[0] for x in ranks[:5]])
+ − 322 ranks = ['\t'.join(x) for x in ranks]
+ − 323 if len(ranks) < 2:
+ − 324 print >> sys.stderr,'Input %s has 1 or less rows with two tab delimited fields - please check the tool documentation' % infname
+ − 325 sys.exit(1)
+ − 326 rclean = open(fakeRanks,'w')
+ − 327 rclean.write('contig\tscore\n')
+ − 328 rclean.write('\n'.join(ranks))
+ − 329 rclean.write('\n')
+ − 330 rclean.close()
+ − 331 cl = []
+ − 332 a = cl.append
+ − 333 a('java -Xmx6G -cp')
+ − 334 a(opts.gsea_jar)
+ − 335 a('xtools.gsea.GseaPreranked')
+ − 336 a('-gmx %s' % fakeGMT) # ensure .gmt extension as required by GSEA - gene sets to use
+ − 337 a('-gui false') # use preranked file mode and no gui
+ − 338 a('-make_sets true -rnd_seed timestamp') # more things from the GUI command line display
+ − 339 a('-norm meandiv -zip_report false -scoring_scheme weighted') # ? need to set these?
+ − 340 a('-rnk %s' % fakeRanks) # input ranks file symbol (the chip file is the crosswalk for ids in first column)
+ − 341 a('-out %s' % opts.output_dir)
+ − 342 a('-set_max %s' % opts.setMax)
+ − 343 a('-set_min %s' % opts.setMin)
+ − 344 a('-mode %s' % opts.mode)
+ − 345 if opts.chip > '':
+ − 346 #a('-chip %s -collapse true -include_only_symbols true' % opts.chip)
+ − 347 a('-chip %s -collapse true' % opts.chip)
+ − 348 else:
+ − 349 a("-collapse false") # needed if no chip
+ − 350 a('-nperm %s' % opts.nPerm)
+ − 351 a('-rpt_label %s' % opts.title)
+ − 352 a('-plot_top_x %s' % opts.plotTop)
+ − 353 self.cl = cl
+ − 354 self.comments.append('## GSEA command line:')
+ − 355 self.comments.append(' '.join(self.cl))
+ − 356 self.fakeRanks = fakeRanks
+ − 357 self.fakeGMT = fakeGMT
+ − 358
+ − 359 def grepIds(self):
+ − 360 """
+ − 361 """
+ − 362 found = []
+ − 363 allids = open(self.opts.input_ranks,'r').readlines()
+ − 364 allids = [x.strip().split() for x in allids]
+ − 365 allids = [x[0] for x in allids] # list of ids
+ − 366 gmtpath = os.path.split(self.opts.fakeGMT)[0] # get path to all chip
+ − 367
+ − 368 def run(self):
+ − 369 """
+ − 370
+ − 371 """
+ − 372 tlog = os.path.join(self.opts.output_dir,"gsea_runner.log")
+ − 373 sto = open(tlog,'w')
+ − 374 x = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+ − 375 retval = x.wait()
+ − 376 sto.close()
+ − 377 d = glob.glob(os.path.join(self.opts.output_dir,'%s*' % self.opts.title))
+ − 378 if len(d) > 0:
+ − 379 fix_subdir(d[0],self.opts.output_dir)
+ − 380 htmlfname = os.path.join(self.opts.output_dir,'index.html')
+ − 381 try:
+ − 382 html = open(htmlfname,'r').readlines()
+ − 383 html = [x.strip() for x in html if len(x.strip()) > 0]
+ − 384 if len(self.comments) > 0:
+ − 385 s = ['<pre>']
+ − 386 s += self.comments
+ − 387 s.append('</pre>')
+ − 388 try:
+ − 389 i = html.index('<div id="footer">')
+ − 390 except:
+ − 391 i = len(html) - 7 # fudge
+ − 392 html = html[:i] + s + html[i:]
+ − 393 except:
+ − 394 html = []
+ − 395 htmlhead = '<html><head></head><body>'
+ − 396 html.append('## Galaxy GSEA wrapper failure')
+ − 397 html.append('## Unable to find index.html in %s - listdir=%s' % (d,' '.join(os.listdir(self.opts.output_dir))))
+ − 398 html.append('## Command line was %s' % (' '.join(self.cl)))
+ − 399 html.append('## commonly caused by mismatched ID/chip selection')
+ − 400 glog = open(os.path.join(self.opts.output_dir,'gsea_runner.log'),'r').readlines()
+ − 401 html.append('## gsea_runner.log=%s' % '\n'.join(glog))
+ − 402 #tryme = self.grepIds()
+ − 403 retval = 1
+ − 404 print >> sys.stderr,'\n'.join(html)
+ − 405 html = ['%s<br/>' % x for x in html]
+ − 406 html.insert(0,htmlhead)
+ − 407 html.append('</body></html>')
+ − 408 htmlf = file(self.opts.outhtml,'w')
+ − 409 htmlf.write('\n'.join(html))
+ − 410 htmlf.write('\n')
+ − 411 htmlf.close()
+ − 412 os.unlink(self.fakeRanks)
+ − 413 os.unlink(self.fakeGMT)
+ − 414 if opts.outtab_neg:
+ − 415 tabs = glob.glob(os.path.join(opts.output_dir,"gsea_report_for_*.xls"))
+ − 416 if len(tabs) > 0:
+ − 417 for tabi,t in enumerate(tabs):
+ − 418 tkind = os.path.basename(t).split('_')[4].lower()
+ − 419 if tkind == 'neg':
+ − 420 outtab = opts.outtab_neg
+ − 421 elif tkind == 'pos':
+ − 422 outtab = opts.outtab_pos
+ − 423 else:
+ − 424 print >> sys.stderr, '## tab file matched %s which is not "neg" or "pos" in 4th segment %s' % (t,tkind)
+ − 425 sys.exit()
+ − 426 content = open(t).readlines()
+ − 427 tabf = open(outtab,'w')
+ − 428 tabf.write(''.join(content))
+ − 429 tabf.close()
+ − 430 else:
+ − 431 print >> sys.stdout, 'Odd, maketab = %s but no matches - tabs = %s' % (makeTab,tabs)
+ − 432 return retval
+ − 433
+ − 434
+ − 435 if __name__ == "__main__":
+ − 436 """
+ − 437 called as:
+ − 438 <command interpreter="python">rgGSEA.py --input_ranks "$input1" --outhtml "$html_file"
+ − 439 --setMax "$setMax" --setMin "$setMin" --nPerm "$nPerm" --plotTop "$plotTop" --gsea_jar "$GALAXY_DATA_INDEX_DIR/shared/jars/gsea2-2.07.jar"
+ − 440 --output_dir "$html_file.files_path" --use_gmt ""${use_gmt.fields.path}"" --chip "${use_chip.fields.path}"
+ − 441 </command>
+ − 442 """
+ − 443 op = optparse.OptionParser()
+ − 444 a = op.add_option
+ − 445 a('--input_ranks',default=None)
+ − 446 a('--input_tab',default=None)
+ − 447 a('--input_name',default=None)
+ − 448 a('--use_gmt',default=None)
+ − 449 a('--history_gmt',default=None)
+ − 450 a('--builtin_gmt',default=None)
+ − 451 a('--history_gmt_name',default=None)
+ − 452 a('--setMax',default="500")
+ − 453 a('--setMin',default="15")
+ − 454 a('--nPerm',default="1000")
+ − 455 a('--title',default="GSEA report")
+ − 456 a('--chip',default='')
+ − 457 a('--plotTop',default='20')
+ − 458 a('--outhtml',default=None)
+ − 459 a('--makeTab',default=None)
+ − 460 a('--output_dir',default=None)
+ − 461 a('--outtab_neg',default=None)
+ − 462 a('--outtab_pos',default=None)
+ − 463 a('--adjpvalcol',default=None)
+ − 464 a('--signcol',default=None)
+ − 465 a('--idcol',default=None)
+ − 466 a('--mode',default='Max_probe')
+ − 467 a('-j','--gsea_jar',default='/usr/local/bin/gsea2-2.07.jar')
+ − 468 opts, args = op.parse_args()
+ − 469 assert os.path.isfile(opts.gsea_jar),'## GSEA runner unable to find supplied gsea java desktop executable file %s' % opts.gsea_jar
+ − 470 if opts.input_ranks:
+ − 471 inpf = opts.input_ranks
+ − 472 else:
+ − 473 inpf = opts.input_tab
+ − 474 assert opts.idcol <> None, '## GSEA runner needs an id column if a tabular file provided'
+ − 475 assert opts.signcol <> None, '## GSEA runner needs a sign column if a tabular file provided'
+ − 476 assert opts.adjpvalcol <> None, '## GSEA runner needs an adjusted p value column if a tabular file provided'
+ − 477 assert os.path.isfile(inpf),'## GSEA runner unable to open supplied input file %s' % inpf
+ − 478 if opts.chip > '':
+ − 479 assert os.path.isfile(opts.chip),'## GSEA runner unable to open supplied chip file %s' % opts.chip
+ − 480 some = None
+ − 481 if opts.history_gmt <> None:
+ − 482 some = 1
+ − 483 assert os.path.isfile(opts.history_gmt),'## GSEA runner unable to open supplied history gene set matrix (.gmt) file %s' % opts.history_gmt
+ − 484 if opts.builtin_gmt <> None:
+ − 485 some = 1
+ − 486 assert os.path.isfile(opts.builtin_gmt),'## GSEA runner unable to open supplied history gene set matrix (.gmt) file %s' % opts.builtin_gmt
+ − 487 assert some, '## GSEA runner needs a gene set matrix file - none chosen?'
+ − 488 opts.title = re.sub('[^a-zA-Z0-9_]+', '', opts.title)
+ − 489 myName=os.path.split(sys.argv[0])[-1]
+ − 490 gse = gsea_wrapper(myName, opts=opts)
+ − 491 retcode = gse.run()
+ − 492 if retcode <> 0:
+ − 493 sys.exit(retcode) # indicate failure to job runner
+ − 494
+ − 495