annotate rgGSEA/rgGSEA.py @ 2:71fa159646c9 draft

Uploaded
author fubar
date Sun, 09 Jun 2013 02:01:13 -0400 (2013-06-09)
parents
children 89e89b70a867
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
71fa159646c9 Uploaded
fubar
parents:
diff changeset
1 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
2 April 2013
71fa159646c9 Uploaded
fubar
parents:
diff changeset
3 eeesh GSEA does NOT respect the mode flag!
71fa159646c9 Uploaded
fubar
parents:
diff changeset
4
71fa159646c9 Uploaded
fubar
parents:
diff changeset
5 Now realise that the creation of the input rank file for gsea needs to take the lowest p value for duplicate
71fa159646c9 Uploaded
fubar
parents:
diff changeset
6 feature names. To make Ish's life easier, remove duplicate gene ids from any gene set to stop GSEA from
71fa159646c9 Uploaded
fubar
parents:
diff changeset
7 barfing.
71fa159646c9 Uploaded
fubar
parents:
diff changeset
8
71fa159646c9 Uploaded
fubar
parents:
diff changeset
9 October 14 2012
71fa159646c9 Uploaded
fubar
parents:
diff changeset
10 Amazingly long time to figure out that GSEA fails with useless error message if any filename contains a dash "-"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
11 eesh.
71fa159646c9 Uploaded
fubar
parents:
diff changeset
12
71fa159646c9 Uploaded
fubar
parents:
diff changeset
13 Added history .gmt source - requires passing a faked name to gsea
71fa159646c9 Uploaded
fubar
parents:
diff changeset
14 Wrapper for GSEA http://www.broadinstitute.org/gsea/index.jsp
71fa159646c9 Uploaded
fubar
parents:
diff changeset
15 Started Feb 22
71fa159646c9 Uploaded
fubar
parents:
diff changeset
16 Copyright 2012 Ross Lazarus
71fa159646c9 Uploaded
fubar
parents:
diff changeset
17 All rights reserved
71fa159646c9 Uploaded
fubar
parents:
diff changeset
18 Licensed under the LGPL
71fa159646c9 Uploaded
fubar
parents:
diff changeset
19
71fa159646c9 Uploaded
fubar
parents:
diff changeset
20 called eg as
71fa159646c9 Uploaded
fubar
parents:
diff changeset
21
71fa159646c9 Uploaded
fubar
parents:
diff changeset
22 #!/bin/sh
71fa159646c9 Uploaded
fubar
parents:
diff changeset
23 GALAXY_LIB="/data/extended/galaxy/lib"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
24 if [ "$GALAXY_LIB" != "None" ]; then
71fa159646c9 Uploaded
fubar
parents:
diff changeset
25 if [ -n "$PYTHONPATH" ]; then
71fa159646c9 Uploaded
fubar
parents:
diff changeset
26 PYTHONPATH="$GALAXY_LIB:$PYTHONPATH"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
27 else
71fa159646c9 Uploaded
fubar
parents:
diff changeset
28 PYTHONPATH="$GALAXY_LIB"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
29 fi
71fa159646c9 Uploaded
fubar
parents:
diff changeset
30 export PYTHONPATH
71fa159646c9 Uploaded
fubar
parents:
diff changeset
31 fi
71fa159646c9 Uploaded
fubar
parents:
diff changeset
32
71fa159646c9 Uploaded
fubar
parents:
diff changeset
33 cd /data/extended/galaxy/database/job_working_directory/027/27311
71fa159646c9 Uploaded
fubar
parents:
diff changeset
34 python /data/extended/galaxy/tools/rgenetics/rgGSEA.py --input_tab "/data/extended/galaxy/database/files/033/dataset_33806.dat" --adjpvalcol "5" --signcol "2"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
35 --idcol "1" --outhtml "/data/extended/galaxy/database/files/034/dataset_34455.dat" --input_name "actaearly-Controlearly-actalate-Controllate_topTable.xls"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
36 --setMax "500" --setMin "15" --nPerm "1000" --plotTop "20"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
37 --gsea_jar "/data/extended/galaxy/tool-data/shared/jars/gsea2-2.0.12.jar"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
38 --output_dir "/data/extended/galaxy/database/job_working_directory/027/27311/dataset_34455_files" --mode "Max_probe"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
39 --title " actaearly-Controlearly-actalate-Controllate_interpro_GSEA" --builtin_gmt "/data/genomes/gsea/3.1/IPR_DOMAIN.gmt"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
40
71fa159646c9 Uploaded
fubar
parents:
diff changeset
41
71fa159646c9 Uploaded
fubar
parents:
diff changeset
42 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
43 import optparse
71fa159646c9 Uploaded
fubar
parents:
diff changeset
44 import tempfile
71fa159646c9 Uploaded
fubar
parents:
diff changeset
45 import os
71fa159646c9 Uploaded
fubar
parents:
diff changeset
46 import sys
71fa159646c9 Uploaded
fubar
parents:
diff changeset
47 import subprocess
71fa159646c9 Uploaded
fubar
parents:
diff changeset
48 import time
71fa159646c9 Uploaded
fubar
parents:
diff changeset
49 import shutil
71fa159646c9 Uploaded
fubar
parents:
diff changeset
50 import glob
71fa159646c9 Uploaded
fubar
parents:
diff changeset
51 import math
71fa159646c9 Uploaded
fubar
parents:
diff changeset
52 import re
71fa159646c9 Uploaded
fubar
parents:
diff changeset
53
71fa159646c9 Uploaded
fubar
parents:
diff changeset
54 KEEPSELECTION = False # detailed records for selection of multiple probes
71fa159646c9 Uploaded
fubar
parents:
diff changeset
55
71fa159646c9 Uploaded
fubar
parents:
diff changeset
56 def timenow():
71fa159646c9 Uploaded
fubar
parents:
diff changeset
57 """return current time as a string
71fa159646c9 Uploaded
fubar
parents:
diff changeset
58 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
59 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
60
71fa159646c9 Uploaded
fubar
parents:
diff changeset
61
71fa159646c9 Uploaded
fubar
parents:
diff changeset
62
71fa159646c9 Uploaded
fubar
parents:
diff changeset
63 def fix_subdir(adir,destdir):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
64 """ Galaxy wants everything in the same files_dir
71fa159646c9 Uploaded
fubar
parents:
diff changeset
65 if os.path.exists(adir):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
66 for (d,dirs,files) in os.path.walk(adir):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
67 for f in files:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
68 sauce = os.path.join(d,f)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
69 shutil.copy(sauce,destdir)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
70 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
71
71fa159646c9 Uploaded
fubar
parents:
diff changeset
72 def fixAffycrap(apath=''):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
73 """class='richTable'>RUNNING ES</th><th class='richTable'>CORE ENRICHMENT</th><tr><td class='lessen'>1</td>
71fa159646c9 Uploaded
fubar
parents:
diff changeset
74 <td><a href='https://www.affymetrix.com/LinkServlet?probeset=LBR'>LBR</a></td><td></td><td></td><td>1113</td>
71fa159646c9 Uploaded
fubar
parents:
diff changeset
75 <td>0.194</td><td>-0.1065</td><td>No</td></tr><tr><td class='lessen'>2</td><td>
71fa159646c9 Uploaded
fubar
parents:
diff changeset
76 <a href='https://www.affymetrix.com/LinkServlet?probeset=GGPS1'>GGPS1</a></td><td></td><td></td><td>4309</td><td>0.014</td><td>-0.4328</td>
71fa159646c9 Uploaded
fubar
parents:
diff changeset
77 <td>No</td></tr>
71fa159646c9 Uploaded
fubar
parents:
diff changeset
78 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
79 html = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
80 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
81 html = open(apath,'r').readlines()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
82 except:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
83 return html
71fa159646c9 Uploaded
fubar
parents:
diff changeset
84 for i,row in enumerate(html):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
85 row = re.sub('https\:\/\/www.affymetrix.com\/LinkServlet\?probeset=',"http://www.genecards.org/index.php?path=/Search/keyword/",row)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
86 html[i] = row
71fa159646c9 Uploaded
fubar
parents:
diff changeset
87 return html
71fa159646c9 Uploaded
fubar
parents:
diff changeset
88
71fa159646c9 Uploaded
fubar
parents:
diff changeset
89 cleanup = False
71fa159646c9 Uploaded
fubar
parents:
diff changeset
90 if os.path.exists(adir):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
91 flist = os.listdir(adir) # get all files created
71fa159646c9 Uploaded
fubar
parents:
diff changeset
92 for f in flist:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
93 apath = os.path.join(adir,f)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
94 dest = os.path.join(destdir,f)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
95 if not os.path.isdir(apath):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
96 if os.path.splitext(f)[1].lower() == '.html':
71fa159646c9 Uploaded
fubar
parents:
diff changeset
97 html = fixAffycrap(apath)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
98 fixed = open(apath,'w')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
99 fixed.write('\n'.join(html))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
100 fixed.write('\n')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
101 fixed.close()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
102 if not os.path.isfile(dest):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
103 shutil.copy(apath,dest)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
104 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
105 fix_subdir(apath,destdir)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
106 if cleanup:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
107 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
108 shutil.rmtree(path=adir,ignore_errors=True)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
109 except:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
110 pass
71fa159646c9 Uploaded
fubar
parents:
diff changeset
111
71fa159646c9 Uploaded
fubar
parents:
diff changeset
112
71fa159646c9 Uploaded
fubar
parents:
diff changeset
113
71fa159646c9 Uploaded
fubar
parents:
diff changeset
114 def getFileString(fpath, outpath):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
115 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
116 format a nice file size string
71fa159646c9 Uploaded
fubar
parents:
diff changeset
117 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
118 size = ''
71fa159646c9 Uploaded
fubar
parents:
diff changeset
119 fp = os.path.join(outpath, fpath)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
120 s = fpath
71fa159646c9 Uploaded
fubar
parents:
diff changeset
121 if os.path.isfile(fp):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
122 n = float(os.path.getsize(fp))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
123 if n > 2**20:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
124 size = ' (%1.1f MB)' % (n/2**20)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
125 elif n > 2**10:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
126 size = ' (%1.1f KB)' % (n/2**10)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
127 elif n > 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
128 size = ' (%d B)' % (int(n))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
129 s = '%s %s' % (fpath, size)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
130 return s
71fa159646c9 Uploaded
fubar
parents:
diff changeset
131
71fa159646c9 Uploaded
fubar
parents:
diff changeset
132 class gsea_wrapper:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
133 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
134 GSEA java desktop client has a CL interface. CL can be gleaned by clicking the 'command line' button after setting up an analysis
71fa159646c9 Uploaded
fubar
parents:
diff changeset
135 We don't want gsea to do the analysis but it can read .rnk files containing rows of identifiers and an evidence weight such as the signed t statistic from limma for differential expression
71fa159646c9 Uploaded
fubar
parents:
diff changeset
136 (vgalaxy)rlazarus@iaas1:~/public_html/idle_illumina_analysis$ cat gseaHumanREFSEQ.sh
71fa159646c9 Uploaded
fubar
parents:
diff changeset
137 #!/bin/bash
71fa159646c9 Uploaded
fubar
parents:
diff changeset
138 for RNK in `ls *.rnk`
71fa159646c9 Uploaded
fubar
parents:
diff changeset
139 do
71fa159646c9 Uploaded
fubar
parents:
diff changeset
140 DIRNAME=${RNK%.*}
71fa159646c9 Uploaded
fubar
parents:
diff changeset
141 echo $DIRNAME
71fa159646c9 Uploaded
fubar
parents:
diff changeset
142 qsub -cwd -b y java -Xmx4096m -cp /data/app/bin/gsea2-2.07.jar xtools.gsea.GseaPreranked -gmx ../msigdb.v3.0.symbols.gmt -collapse true -mode Max_probe -norm meandiv
71fa159646c9 Uploaded
fubar
parents:
diff changeset
143 -nperm 1000 -rnk $RNK -scoring_scheme weighted -rpt_label $RNK -chip ../RefSeq_human.chip -include_only_symbols true -make_sets true -plot_top_x 20 -rnd_seed timestamp
71fa159646c9 Uploaded
fubar
parents:
diff changeset
144 -set_max 500 -set_min 15 -zip_report false -out gseaout/${DIRNAME} -gui false
71fa159646c9 Uploaded
fubar
parents:
diff changeset
145 done
71fa159646c9 Uploaded
fubar
parents:
diff changeset
146 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
147
71fa159646c9 Uploaded
fubar
parents:
diff changeset
148 def __init__(self,myName=None,opts=None):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
149 """ setup cl for gsea
71fa159646c9 Uploaded
fubar
parents:
diff changeset
150 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
151 self.idcol = 0
71fa159646c9 Uploaded
fubar
parents:
diff changeset
152 self.signcol = 0
71fa159646c9 Uploaded
fubar
parents:
diff changeset
153 self.adjpvalcol = 0
71fa159646c9 Uploaded
fubar
parents:
diff changeset
154 self.progname=myName
71fa159646c9 Uploaded
fubar
parents:
diff changeset
155 self.opts = opts
71fa159646c9 Uploaded
fubar
parents:
diff changeset
156 remove_duplicates=True
71fa159646c9 Uploaded
fubar
parents:
diff changeset
157 if not os.path.isdir(opts.output_dir):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
158 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
159 os.makedirs(opts.output_dir)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
160 except:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
161 print >> sys.stderr,'##Error: GSEA wrapper unable to create or find output directory %s. Stopping' % (opts.output_dir)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
162 sys.exit(1)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
163 fakeGMT = re.sub('[^a-zA-Z0-9_]+', '', opts.input_name) # gives a more useful title for the GSEA report
71fa159646c9 Uploaded
fubar
parents:
diff changeset
164 fakeGMT = os.path.join(opts.output_dir,fakeGMT)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
165 fakeRanks = '%s.rnk' % fakeGMT
71fa159646c9 Uploaded
fubar
parents:
diff changeset
166 if not fakeGMT.endswith('.gmt'):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
167 fakeGMT = '%s.gmt' % fakeGMT
71fa159646c9 Uploaded
fubar
parents:
diff changeset
168 if opts.builtin_gmt and opts.history_gmt:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
169 newfile = open(fakeGMT,'w')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
170 subprocess.call(['cat',opts.builtin_gmt,opts.history_gmt],stdout=newfile)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
171 newfile.close()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
172 elif opts.history_gmt:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
173 subprocess.call(['cp',opts.history_gmt,fakeGMT])
71fa159646c9 Uploaded
fubar
parents:
diff changeset
174 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
175 subprocess.call(['cp',opts.builtin_gmt,fakeGMT])
71fa159646c9 Uploaded
fubar
parents:
diff changeset
176 # remove dupes from each gene set
71fa159646c9 Uploaded
fubar
parents:
diff changeset
177 gmt = open(fakeGMT,'r').readlines()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
178 gmt = [x for x in gmt if len(x.split('\t')) > 3]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
179 ugmt = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
180 for i,row in enumerate(gmt):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
181 rows = row.rstrip().split('\t')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
182 gmtname = rows[0]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
183 gmtcomment = rows[1]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
184 glist = list(set(rows[2:]))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
185 newgmt = [gmtname,gmtcomment]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
186 newgmt += glist
71fa159646c9 Uploaded
fubar
parents:
diff changeset
187 ugmt.append('\t'.join(newgmt))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
188 gmt = open(fakeGMT,'w')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
189 gmt.write('\n'.join(ugmt))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
190 gmt.write('\n')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
191 gmt.close()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
192 if opts.input_ranks:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
193 infname = opts.input_ranks
71fa159646c9 Uploaded
fubar
parents:
diff changeset
194 rdat = open(opts.input_ranks,'r').readlines() # suck in and remove blank ids that cause gsea to barf rml april 10 2012
71fa159646c9 Uploaded
fubar
parents:
diff changeset
195 rdat = [x.rstrip().split('\t') for x in rdat[1:]] # ignore head
71fa159646c9 Uploaded
fubar
parents:
diff changeset
196 dat = [[x[0],x[1],x[1]] for x in rdat]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
197 # fake same structure as input tabular file
71fa159646c9 Uploaded
fubar
parents:
diff changeset
198 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
199 pvals = [float(x[1]) for x in dat]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
200 signs = [float(x[1]) for x in dat]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
201 except:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
202 print >> sys.stderr, '## error converting floating point - cannot process this input'
71fa159646c9 Uploaded
fubar
parents:
diff changeset
203 sys.exit(99)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
204 else: # read tabular
71fa159646c9 Uploaded
fubar
parents:
diff changeset
205 self.idcol = int(opts.idcol) - 1
71fa159646c9 Uploaded
fubar
parents:
diff changeset
206 self.signcol = int(opts.signcol) - 1
71fa159646c9 Uploaded
fubar
parents:
diff changeset
207 self.adjpvalcol = int(opts.adjpvalcol) - 1
71fa159646c9 Uploaded
fubar
parents:
diff changeset
208 maxcol = max(self.idcol,self.signcol,self.adjpvalcol)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
209 infname = opts.input_tab
71fa159646c9 Uploaded
fubar
parents:
diff changeset
210 indat = open(opts.input_tab,'r').readlines()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
211 dat = [x.rstrip().split('\t') for x in indat[1:]]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
212 dat = [x for x in dat if len(x) > maxcol]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
213 dat = [[x[self.idcol],x[self.adjpvalcol],x[self.signcol]] for x in dat] # reduce to rank form
71fa159646c9 Uploaded
fubar
parents:
diff changeset
214 pvals = [float(x[1]) for x in dat]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
215 outofrange = [x for x in pvals if ((x < 0.0) or (x > 1.0))]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
216 assert len(outofrange) == 0, '## p values outside 0-1 encountered - was that the right column for adjusted p value?'
71fa159646c9 Uploaded
fubar
parents:
diff changeset
217 signs = [float(x[2]) for x in dat]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
218 outofrange = [i for i,x in enumerate(signs) if (not x) and (dat[i][self.signcol] <> '0')]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
219 bad = [dat[x][2] for x in outofrange]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
220 assert len(outofrange) == 0, '## null numeric values encountered for sign - was that the right column? %s' % bad
71fa159646c9 Uploaded
fubar
parents:
diff changeset
221 ids = [x[0] for x in dat]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
222 res = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
223 self.comments = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
224 useme = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
225 for i,row in enumerate(dat):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
226 if row[1].upper() != 'NA' and row[2].upper() != 'NA' and row[0] != '' :
71fa159646c9 Uploaded
fubar
parents:
diff changeset
227 useme.append(i)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
228 lost = len(dat) - len(useme)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
229 if lost <> 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
230 newdat = [dat[x] for x in useme]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
231 del dat
71fa159646c9 Uploaded
fubar
parents:
diff changeset
232 dat = newdat
71fa159646c9 Uploaded
fubar
parents:
diff changeset
233 print >> sys.stdout, '## %d lost - NA values or null id' % lost
71fa159646c9 Uploaded
fubar
parents:
diff changeset
234 if remove_duplicates:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
235 uids = list(set(ids)) # complex procedure to get min pval for each unique id
71fa159646c9 Uploaded
fubar
parents:
diff changeset
236 if len(uids) <> len(ids): # dupes - deal with mode
71fa159646c9 Uploaded
fubar
parents:
diff changeset
237 print >> sys.stdout,'## Dealing with %d uids in %d ids' % (len(uids),len(ids))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
238 ures = {}
71fa159646c9 Uploaded
fubar
parents:
diff changeset
239 for i,id in enumerate(ids):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
240 p = pvals[i]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
241 ures.setdefault(id,[])
71fa159646c9 Uploaded
fubar
parents:
diff changeset
242 ures[id].append((p,signs[i]))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
243 for id in uids:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
244 tlist = ures[id]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
245 tp = [x[0] for x in tlist]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
246 ts = [x[1] for x in tlist]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
247 if len(tp) == 1:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
248 p = tp[0]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
249 sign = ts[0]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
250 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
251 if opts.mode == "Max_probe":
71fa159646c9 Uploaded
fubar
parents:
diff changeset
252 p = min(tp)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
253 sign = ts[tp.index(p)]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
254 else: # guess median - too bad if even count
71fa159646c9 Uploaded
fubar
parents:
diff changeset
255 tp.sort()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
256 ltp = len(tp)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
257 ind = ltp/2 # yes, this is wrong for evens but what if sign wobbles?
71fa159646c9 Uploaded
fubar
parents:
diff changeset
258 if ltp % 2 == 1: # odd
71fa159646c9 Uploaded
fubar
parents:
diff changeset
259 ind += 1 # take the median
71fa159646c9 Uploaded
fubar
parents:
diff changeset
260 p = tp[ind]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
261 sign = ts[ind]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
262 if KEEPSELECTION:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
263 self.comments.append('## for id=%s, got tp=%s, ts=%s, chose p=%f, sign =%f'\
71fa159646c9 Uploaded
fubar
parents:
diff changeset
264 % (id,str(tp),str(ts),p,sign))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
265 if opts.input_ranks: # must be a rank file
71fa159646c9 Uploaded
fubar
parents:
diff changeset
266 res.append((id,'%f' % p))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
267 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
268 if p == 0.0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
269 p = 1e-99
71fa159646c9 Uploaded
fubar
parents:
diff changeset
270 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
271 lp = -math.log10(p) # large positive if low p value
71fa159646c9 Uploaded
fubar
parents:
diff changeset
272 except ValueError:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
273 lp = 0.0
71fa159646c9 Uploaded
fubar
parents:
diff changeset
274 if sign < 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
275 lp = -lp # if negative, swap p to negative
71fa159646c9 Uploaded
fubar
parents:
diff changeset
276 res.append((id,'%f' % lp))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
277 else: # no duplicates
71fa159646c9 Uploaded
fubar
parents:
diff changeset
278 for i,row in enumerate(dat):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
279 (id,p,sign) = (row[0],float(row[1]),float(row[2]))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
280 if opts.input_ranks: # must be a rank file
71fa159646c9 Uploaded
fubar
parents:
diff changeset
281 res.append((id,'%f' % p))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
282 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
283 if p == 0.0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
284 p = 1e-99
71fa159646c9 Uploaded
fubar
parents:
diff changeset
285 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
286 lp = -math.log10(p) # large positive if low p value
71fa159646c9 Uploaded
fubar
parents:
diff changeset
287 except ValueError:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
288 lp = 0.0
71fa159646c9 Uploaded
fubar
parents:
diff changeset
289 if sign < 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
290 lp = -lp # if negative, swap p to negative
71fa159646c9 Uploaded
fubar
parents:
diff changeset
291 res.append((id,'%f' % lp))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
292 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
293 for i,row in enumerate(dat):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
294 (id,p,sign) = (row[0],float(row[1]),float(row[2]))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
295 if opts.input_ranks: # must be a rank file
71fa159646c9 Uploaded
fubar
parents:
diff changeset
296 res.append((id,'%f' % p))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
297 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
298 if p == 0.0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
299 p = 1e-99
71fa159646c9 Uploaded
fubar
parents:
diff changeset
300 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
301 lp = -math.log10(p) # large positive if low p value
71fa159646c9 Uploaded
fubar
parents:
diff changeset
302 except ValueError:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
303 lp = 0.0
71fa159646c9 Uploaded
fubar
parents:
diff changeset
304 if sign < 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
305 lp = -lp # if negative, swap p to negative
71fa159646c9 Uploaded
fubar
parents:
diff changeset
306 res.append((id,'%f' % lp))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
307 len1 = len(ids)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
308 ranks = res # [x for x in res if len(x[0]) > 0 and len(x[1].strip()) > 0 and x[0].upper() <> 'NA']
71fa159646c9 Uploaded
fubar
parents:
diff changeset
309 # rd = dict(zip([x[0] for x in res],res)) # this does something mysterious to dupes - probably keeps last one
71fa159646c9 Uploaded
fubar
parents:
diff changeset
310 # ranks = rd.values()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
311 len2 = len(ranks)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
312 delta = len1 - len2
71fa159646c9 Uploaded
fubar
parents:
diff changeset
313 if delta <> 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
314 print >> sys.stdout,'NOTE: %d of %d rank input file %s rows deleted - dup, null or NA IDs, pvals or logFCs' % (delta,len1,infname)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
315 ranks = [(float(x[1]),x) for x in ranks] # decorate
71fa159646c9 Uploaded
fubar
parents:
diff changeset
316 ranks.sort()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
317 ranks.reverse()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
318 ranks = [x[1] for x in ranks] # undecorate
71fa159646c9 Uploaded
fubar
parents:
diff changeset
319 if opts.chip == '': # if mouse - need HUGO
71fa159646c9 Uploaded
fubar
parents:
diff changeset
320 ranks = [[x[0].upper(),x[1]] for x in ranks]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
321 print >> sys.stdout, '## Fixed any lower case - now have',','.join([x[0] for x in ranks[:5]])
71fa159646c9 Uploaded
fubar
parents:
diff changeset
322 ranks = ['\t'.join(x) for x in ranks]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
323 if len(ranks) < 2:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
324 print >> sys.stderr,'Input %s has 1 or less rows with two tab delimited fields - please check the tool documentation' % infname
71fa159646c9 Uploaded
fubar
parents:
diff changeset
325 sys.exit(1)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
326 rclean = open(fakeRanks,'w')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
327 rclean.write('contig\tscore\n')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
328 rclean.write('\n'.join(ranks))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
329 rclean.write('\n')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
330 rclean.close()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
331 cl = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
332 a = cl.append
71fa159646c9 Uploaded
fubar
parents:
diff changeset
333 a('java -Xmx6G -cp')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
334 a(opts.gsea_jar)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
335 a('xtools.gsea.GseaPreranked')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
336 a('-gmx %s' % fakeGMT) # ensure .gmt extension as required by GSEA - gene sets to use
71fa159646c9 Uploaded
fubar
parents:
diff changeset
337 a('-gui false') # use preranked file mode and no gui
71fa159646c9 Uploaded
fubar
parents:
diff changeset
338 a('-make_sets true -rnd_seed timestamp') # more things from the GUI command line display
71fa159646c9 Uploaded
fubar
parents:
diff changeset
339 a('-norm meandiv -zip_report false -scoring_scheme weighted') # ? need to set these?
71fa159646c9 Uploaded
fubar
parents:
diff changeset
340 a('-rnk %s' % fakeRanks) # input ranks file symbol (the chip file is the crosswalk for ids in first column)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
341 a('-out %s' % opts.output_dir)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
342 a('-set_max %s' % opts.setMax)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
343 a('-set_min %s' % opts.setMin)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
344 a('-mode %s' % opts.mode)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
345 if opts.chip > '':
71fa159646c9 Uploaded
fubar
parents:
diff changeset
346 #a('-chip %s -collapse true -include_only_symbols true' % opts.chip)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
347 a('-chip %s -collapse true' % opts.chip)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
348 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
349 a("-collapse false") # needed if no chip
71fa159646c9 Uploaded
fubar
parents:
diff changeset
350 a('-nperm %s' % opts.nPerm)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
351 a('-rpt_label %s' % opts.title)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
352 a('-plot_top_x %s' % opts.plotTop)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
353 self.cl = cl
71fa159646c9 Uploaded
fubar
parents:
diff changeset
354 self.comments.append('## GSEA command line:')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
355 self.comments.append(' '.join(self.cl))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
356 self.fakeRanks = fakeRanks
71fa159646c9 Uploaded
fubar
parents:
diff changeset
357 self.fakeGMT = fakeGMT
71fa159646c9 Uploaded
fubar
parents:
diff changeset
358
71fa159646c9 Uploaded
fubar
parents:
diff changeset
359 def grepIds(self):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
360 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
361 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
362 found = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
363 allids = open(self.opts.input_ranks,'r').readlines()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
364 allids = [x.strip().split() for x in allids]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
365 allids = [x[0] for x in allids] # list of ids
71fa159646c9 Uploaded
fubar
parents:
diff changeset
366 gmtpath = os.path.split(self.opts.fakeGMT)[0] # get path to all chip
71fa159646c9 Uploaded
fubar
parents:
diff changeset
367
71fa159646c9 Uploaded
fubar
parents:
diff changeset
368 def run(self):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
369 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
370
71fa159646c9 Uploaded
fubar
parents:
diff changeset
371 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
372 tlog = os.path.join(self.opts.output_dir,"gsea_runner.log")
71fa159646c9 Uploaded
fubar
parents:
diff changeset
373 sto = open(tlog,'w')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
374 x = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
375 retval = x.wait()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
376 sto.close()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
377 d = glob.glob(os.path.join(self.opts.output_dir,'%s*' % self.opts.title))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
378 if len(d) > 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
379 fix_subdir(d[0],self.opts.output_dir)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
380 htmlfname = os.path.join(self.opts.output_dir,'index.html')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
381 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
382 html = open(htmlfname,'r').readlines()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
383 html = [x.strip() for x in html if len(x.strip()) > 0]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
384 if len(self.comments) > 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
385 s = ['<pre>']
71fa159646c9 Uploaded
fubar
parents:
diff changeset
386 s += self.comments
71fa159646c9 Uploaded
fubar
parents:
diff changeset
387 s.append('</pre>')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
388 try:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
389 i = html.index('<div id="footer">')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
390 except:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
391 i = len(html) - 7 # fudge
71fa159646c9 Uploaded
fubar
parents:
diff changeset
392 html = html[:i] + s + html[i:]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
393 except:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
394 html = []
71fa159646c9 Uploaded
fubar
parents:
diff changeset
395 htmlhead = '<html><head></head><body>'
71fa159646c9 Uploaded
fubar
parents:
diff changeset
396 html.append('## Galaxy GSEA wrapper failure')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
397 html.append('## Unable to find index.html in %s - listdir=%s' % (d,' '.join(os.listdir(self.opts.output_dir))))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
398 html.append('## Command line was %s' % (' '.join(self.cl)))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
399 html.append('## commonly caused by mismatched ID/chip selection')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
400 glog = open(os.path.join(self.opts.output_dir,'gsea_runner.log'),'r').readlines()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
401 html.append('## gsea_runner.log=%s' % '\n'.join(glog))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
402 #tryme = self.grepIds()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
403 retval = 1
71fa159646c9 Uploaded
fubar
parents:
diff changeset
404 print >> sys.stderr,'\n'.join(html)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
405 html = ['%s<br/>' % x for x in html]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
406 html.insert(0,htmlhead)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
407 html.append('</body></html>')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
408 htmlf = file(self.opts.outhtml,'w')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
409 htmlf.write('\n'.join(html))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
410 htmlf.write('\n')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
411 htmlf.close()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
412 os.unlink(self.fakeRanks)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
413 os.unlink(self.fakeGMT)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
414 if opts.outtab_neg:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
415 tabs = glob.glob(os.path.join(opts.output_dir,"gsea_report_for_*.xls"))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
416 if len(tabs) > 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
417 for tabi,t in enumerate(tabs):
71fa159646c9 Uploaded
fubar
parents:
diff changeset
418 tkind = os.path.basename(t).split('_')[4].lower()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
419 if tkind == 'neg':
71fa159646c9 Uploaded
fubar
parents:
diff changeset
420 outtab = opts.outtab_neg
71fa159646c9 Uploaded
fubar
parents:
diff changeset
421 elif tkind == 'pos':
71fa159646c9 Uploaded
fubar
parents:
diff changeset
422 outtab = opts.outtab_pos
71fa159646c9 Uploaded
fubar
parents:
diff changeset
423 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
424 print >> sys.stderr, '## tab file matched %s which is not "neg" or "pos" in 4th segment %s' % (t,tkind)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
425 sys.exit()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
426 content = open(t).readlines()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
427 tabf = open(outtab,'w')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
428 tabf.write(''.join(content))
71fa159646c9 Uploaded
fubar
parents:
diff changeset
429 tabf.close()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
430 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
431 print >> sys.stdout, 'Odd, maketab = %s but no matches - tabs = %s' % (makeTab,tabs)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
432 return retval
71fa159646c9 Uploaded
fubar
parents:
diff changeset
433
71fa159646c9 Uploaded
fubar
parents:
diff changeset
434
71fa159646c9 Uploaded
fubar
parents:
diff changeset
435 if __name__ == "__main__":
71fa159646c9 Uploaded
fubar
parents:
diff changeset
436 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
437 called as:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
438 <command interpreter="python">rgGSEA.py --input_ranks "$input1" --outhtml "$html_file"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
439 --setMax "$setMax" --setMin "$setMin" --nPerm "$nPerm" --plotTop "$plotTop" --gsea_jar "$GALAXY_DATA_INDEX_DIR/shared/jars/gsea2-2.07.jar"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
440 --output_dir "$html_file.files_path" --use_gmt ""${use_gmt.fields.path}"" --chip "${use_chip.fields.path}"
71fa159646c9 Uploaded
fubar
parents:
diff changeset
441 </command>
71fa159646c9 Uploaded
fubar
parents:
diff changeset
442 """
71fa159646c9 Uploaded
fubar
parents:
diff changeset
443 op = optparse.OptionParser()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
444 a = op.add_option
71fa159646c9 Uploaded
fubar
parents:
diff changeset
445 a('--input_ranks',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
446 a('--input_tab',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
447 a('--input_name',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
448 a('--use_gmt',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
449 a('--history_gmt',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
450 a('--builtin_gmt',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
451 a('--history_gmt_name',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
452 a('--setMax',default="500")
71fa159646c9 Uploaded
fubar
parents:
diff changeset
453 a('--setMin',default="15")
71fa159646c9 Uploaded
fubar
parents:
diff changeset
454 a('--nPerm',default="1000")
71fa159646c9 Uploaded
fubar
parents:
diff changeset
455 a('--title',default="GSEA report")
71fa159646c9 Uploaded
fubar
parents:
diff changeset
456 a('--chip',default='')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
457 a('--plotTop',default='20')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
458 a('--outhtml',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
459 a('--makeTab',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
460 a('--output_dir',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
461 a('--outtab_neg',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
462 a('--outtab_pos',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
463 a('--adjpvalcol',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
464 a('--signcol',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
465 a('--idcol',default=None)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
466 a('--mode',default='Max_probe')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
467 a('-j','--gsea_jar',default='/usr/local/bin/gsea2-2.07.jar')
71fa159646c9 Uploaded
fubar
parents:
diff changeset
468 opts, args = op.parse_args()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
469 assert os.path.isfile(opts.gsea_jar),'## GSEA runner unable to find supplied gsea java desktop executable file %s' % opts.gsea_jar
71fa159646c9 Uploaded
fubar
parents:
diff changeset
470 if opts.input_ranks:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
471 inpf = opts.input_ranks
71fa159646c9 Uploaded
fubar
parents:
diff changeset
472 else:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
473 inpf = opts.input_tab
71fa159646c9 Uploaded
fubar
parents:
diff changeset
474 assert opts.idcol <> None, '## GSEA runner needs an id column if a tabular file provided'
71fa159646c9 Uploaded
fubar
parents:
diff changeset
475 assert opts.signcol <> None, '## GSEA runner needs a sign column if a tabular file provided'
71fa159646c9 Uploaded
fubar
parents:
diff changeset
476 assert opts.adjpvalcol <> None, '## GSEA runner needs an adjusted p value column if a tabular file provided'
71fa159646c9 Uploaded
fubar
parents:
diff changeset
477 assert os.path.isfile(inpf),'## GSEA runner unable to open supplied input file %s' % inpf
71fa159646c9 Uploaded
fubar
parents:
diff changeset
478 if opts.chip > '':
71fa159646c9 Uploaded
fubar
parents:
diff changeset
479 assert os.path.isfile(opts.chip),'## GSEA runner unable to open supplied chip file %s' % opts.chip
71fa159646c9 Uploaded
fubar
parents:
diff changeset
480 some = None
71fa159646c9 Uploaded
fubar
parents:
diff changeset
481 if opts.history_gmt <> None:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
482 some = 1
71fa159646c9 Uploaded
fubar
parents:
diff changeset
483 assert os.path.isfile(opts.history_gmt),'## GSEA runner unable to open supplied history gene set matrix (.gmt) file %s' % opts.history_gmt
71fa159646c9 Uploaded
fubar
parents:
diff changeset
484 if opts.builtin_gmt <> None:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
485 some = 1
71fa159646c9 Uploaded
fubar
parents:
diff changeset
486 assert os.path.isfile(opts.builtin_gmt),'## GSEA runner unable to open supplied history gene set matrix (.gmt) file %s' % opts.builtin_gmt
71fa159646c9 Uploaded
fubar
parents:
diff changeset
487 assert some, '## GSEA runner needs a gene set matrix file - none chosen?'
71fa159646c9 Uploaded
fubar
parents:
diff changeset
488 opts.title = re.sub('[^a-zA-Z0-9_]+', '', opts.title)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
489 myName=os.path.split(sys.argv[0])[-1]
71fa159646c9 Uploaded
fubar
parents:
diff changeset
490 gse = gsea_wrapper(myName, opts=opts)
71fa159646c9 Uploaded
fubar
parents:
diff changeset
491 retcode = gse.run()
71fa159646c9 Uploaded
fubar
parents:
diff changeset
492 if retcode <> 0:
71fa159646c9 Uploaded
fubar
parents:
diff changeset
493 sys.exit(retcode) # indicate failure to job runner
71fa159646c9 Uploaded
fubar
parents:
diff changeset
494
71fa159646c9 Uploaded
fubar
parents:
diff changeset
495