Mercurial > repos > nanettec > go_enrichment
comparison GO_enrichment/GO_enrichment.py @ 0:bde64415f03b draft
Uploaded
author | nanettec |
---|---|
date | Wed, 16 Mar 2016 10:15:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bde64415f03b |
---|---|
1 """ | |
2 @summary: GO enrichment analysis (hotspots) | |
3 @author: nanette.coetzer@gmail.com | |
4 @version 5 | |
5 | |
6 """ | |
7 | |
8 import optparse, sys | |
9 import subprocess | |
10 import tempfile | |
11 import os, re | |
12 | |
13 def stop_err( msg ): | |
14 sys.stderr.write( "%s\n" % msg ) | |
15 sys.exit() | |
16 | |
17 def __main__(): | |
18 #Parse Command Line | |
19 parser = optparse.OptionParser() | |
20 parser.add_option("-n", "--rscript", default=None, dest="rscript", | |
21 help="R script") | |
22 parser.add_option("-i", "--input1", default=None, dest="input1", | |
23 help="Gene universe (Gene positions file)") | |
24 parser.add_option("-j", "--input2", default=None, dest="input2", | |
25 help="Hotspots genelists all eQTLs") | |
26 parser.add_option("-k", "--input3", default=None, dest="input3", | |
27 help="Hotspots genelists cis-eQTLs") | |
28 parser.add_option("-l", "--input4", default=None, dest="input4", | |
29 help="Hotspots genelists trans-eQTLs") | |
30 parser.add_option("-m", "--input5", default=None, dest="input5", | |
31 help="Gene2GO mapping") | |
32 parser.add_option("-o", "--output1", default=None, dest="output1", | |
33 help="GO enrichment output; all eQTLs") | |
34 parser.add_option("-p", "--output2", default=None, dest="output2", | |
35 help="GO enrichment output; cis eQTLs") | |
36 parser.add_option("-q", "--output3", default=None, dest="output3", | |
37 help="GO enrichment output; trans eQTLs") | |
38 parser.add_option("-r", "--output4", default=None, dest="output4", | |
39 help="Hotspot summary file") | |
40 | |
41 (options, args) = parser.parse_args() | |
42 | |
43 try: | |
44 open(options.input1, "r").close() | |
45 except TypeError, e: | |
46 stop_err("You need to supply the Gene Universe file:\n" + str(e)) | |
47 except IOError, e: | |
48 stop_err("Can not open the Gene Universe file:\n" + str(e)) | |
49 | |
50 try: | |
51 open(options.input2, "r").close() | |
52 except TypeError, e: | |
53 stop_err("You need to supply the Genes of Interest file (all eQTLs):\n" + str(e)) | |
54 except IOError, e: | |
55 stop_err("Can not open the Hotspots genelists file (all eQTLs):\n" + str(e)) | |
56 | |
57 try: | |
58 open(options.input3, "r").close() | |
59 except TypeError, e: | |
60 stop_err("You need to supply the Genes of Interest file (cis-eQTLs):\n" + str(e)) | |
61 except IOError, e: | |
62 stop_err("Can not open the Hotspots genelists file (cis-eQTLs):\n" + str(e)) | |
63 | |
64 try: | |
65 open(options.input4, "r").close() | |
66 except TypeError, e: | |
67 stop_err("You need to supply the Genes of Interest file (trnas-eQTLs):\n" + str(e)) | |
68 except IOError, e: | |
69 stop_err("Can not open the Hotspots genelists file (trans-eQTLs):\n" + str(e)) | |
70 | |
71 try: | |
72 open(options.input5, "r").close() | |
73 except TypeError, e: | |
74 stop_err("You need to supply the Gene2GO mapping file:\n" + str(e)) | |
75 except IOError, e: | |
76 stop_err("Can not open the Gene2GO mapping file:\n" + str(e)) | |
77 | |
78 ########################################################## | |
79 | |
80 def uniq(input): | |
81 output = [] | |
82 for x in input: | |
83 if x not in output: | |
84 output.append(x) | |
85 return output | |
86 | |
87 def manage_output(hotspot_nr, tempdir, type): | |
88 sign_list = []; p_BP = 0; adjp_BP = 0; p_MF = 0; adjp_MF = 0; p_CC = 0; adjp_CC = 0 | |
89 | |
90 bpfile = open(tempdir+"/GO_table_BP.txt","r") | |
91 new_file = open(tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_GO_table.txt","w") | |
92 i = 0 | |
93 stars = 0 | |
94 for line in bpfile: | |
95 i += 1 | |
96 if i > 1: | |
97 l = line.strip().split("\t") | |
98 new_file.write("\t".join(l[1:])+"\n") | |
99 if l[9] == "\"*\"": | |
100 p_BP += 1 | |
101 if l[10] == "\"*\"": | |
102 adjp_BP += 1 | |
103 else: | |
104 l = line.strip().split("\t") | |
105 l[0] = "BP_GO.ID" | |
106 newl = "\t".join(l) | |
107 new_file.write(newl+"\n") | |
108 bpfile.close() | |
109 stars = 5 | |
110 name = "tGO_BP_classic_"+str(stars)+"_all.pdf" | |
111 full_name = tempdir+"/"+name | |
112 new_name = tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_"+name | |
113 s = "mv %s %s" %(full_name, new_name) | |
114 subprocess.call(s, shell=True) | |
115 sign_list.append(p_BP) | |
116 sign_list.append(adjp_BP) | |
117 | |
118 mffile = open(tempdir+"/GO_table_MF.txt","r") | |
119 i = 0 | |
120 stars = 0 | |
121 for line in mffile: | |
122 i += 1 | |
123 if i > 1: | |
124 l = line.strip().split("\t") | |
125 new_file.write("\t".join(l[1:])+"\n") | |
126 if l[9] == "\"*\"": | |
127 p_MF += 1 | |
128 if l[10] == "\"*\"": | |
129 adjp_MF += 1 | |
130 else: | |
131 l = line.strip().split("\t") | |
132 l[0] = "MF_GO.ID" | |
133 newl = "\t".join(l) | |
134 new_file.write("\n"+newl+"\n") | |
135 mffile.close() | |
136 stars = 5 | |
137 name = "tGO_MF_classic_"+str(stars)+"_all.pdf" | |
138 full_name = tempdir+"/"+name | |
139 new_name = tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_"+name | |
140 s = "mv %s %s" %(full_name, new_name) | |
141 subprocess.call(s, shell=True) | |
142 sign_list.append(p_MF) | |
143 sign_list.append(adjp_MF) | |
144 | |
145 ccfile = open(tempdir+"/GO_table_CC.txt","r") | |
146 i = 0 | |
147 stars = 0 | |
148 for line in ccfile: | |
149 i += 1 | |
150 if i > 1: | |
151 l = line.strip().split("\t") | |
152 new_file.write("\t".join(l[1:])+"\n") | |
153 if l[9] == "\"*\"": | |
154 p_CC += 1 | |
155 if l[10] == "\"*\"": | |
156 adjp_CC += 1 | |
157 else: | |
158 l = line.strip().split("\t") | |
159 l[0] = "CC_GO.ID" | |
160 newl = "\t".join(l) | |
161 new_file.write("\n"+newl+"\n") | |
162 ccfile.close() | |
163 stars = 5 | |
164 name = "tGO_CC_classic_"+str(stars)+"_all.pdf" | |
165 full_name = tempdir+"/"+name | |
166 new_name = tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_"+name | |
167 s = "mv %s %s" %(full_name, new_name) | |
168 subprocess.call(s, shell=True) | |
169 new_file.close() | |
170 sign_list.append(p_CC) | |
171 sign_list.append(adjp_CC) | |
172 return sign_list | |
173 | |
174 # Create temp direcotry | |
175 tempdir = tempfile.mkdtemp() | |
176 #print tempdir | |
177 #fixdir = "/lustre/galchew/MeQTL/eQTL_pipeline" | |
178 #fixdir = "/Users/nanettecoetzer/Documents/Bioinformatics/MAIZE_project/Hotspots_eQTLs/enrichment_analysis/TopGO/add_to_hotspots_pipeline" | |
179 | |
180 # copy INPUT file to the temp directory | |
181 s = "cp %s %s/gene_universe.txt" %(options.input1, tempdir) | |
182 subprocess.call(s, shell=True) | |
183 s = "cp %s %s/gene2GO_mapping.map" %(options.input5, tempdir) | |
184 subprocess.call(s, shell=True) | |
185 | |
186 # create R script => save in temp directory | |
187 # generate new header | |
188 new_script = open(tempdir+"/new_script.r","w") | |
189 header = "setwd(\"%s\")" %tempdir | |
190 new_script.write(header+"\n") | |
191 # add script body | |
192 script = open(options.rscript,"r") | |
193 #script = open(fixdir+"/TopGO_pipeline_new.txt","r") | |
194 for line in script: | |
195 new_script.write(line.strip()+"\n") | |
196 new_script.close() | |
197 | |
198 # get 2 parents | |
199 #infile = open(options.input2,'r') | |
200 #parents = [] | |
201 #for line in infile: | |
202 # l = line.strip().split("\t") | |
203 # if not line.startswith("=") and len(l) > 4: | |
204 # parents.append(l[12]) | |
205 #parent_list = uniq(parents) | |
206 #infile.close() | |
207 | |
208 summary_file = open(options.output4,'w') | |
209 summary_file.write("Type\tNumber\tChr\tSliding_IDs\tInterval_IDs\tNum_eQTLs\tParent_neg_additive\tParent_pos_additive\tBP_pVal<0.01\tBP_adj_pVal<0.05\tMF_pVal<0.01\tMF_adj_pVal<0.05\tCC_pVal<0.01\tCC_adj_pVal<0.05\n") | |
210 | |
211 ############################## all eQTLs ############################## | |
212 hotspot_nr = 0 | |
213 i = 0 | |
214 parent1 = 0 # Parent_neg_additive | |
215 parent2 = 0 # Parent_pos_additive | |
216 count_eqtl = 0 | |
217 infile2 = open(options.input2,'r') | |
218 for line in infile2: | |
219 l = line.strip().split("\t") | |
220 if line.startswith("="): | |
221 hotspot = l[0].split("Hotspot")[1].split(" ")[1] | |
222 chr = l[0].split("Hotspot")[1].split(" ")[4] | |
223 sliding_IDs = l[1].split(": ")[1] | |
224 int_IDs = l[2].split(": ")[1] | |
225 nr_eqtl = l[3].split(" ")[1] | |
226 if hotspot_nr == 0: | |
227 summary_file.write("All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t") | |
228 #print "All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t" | |
229 if hotspot_nr != 0: | |
230 interesting_genes.close() | |
231 # run R script from temp directory --> for the previous hotspot | |
232 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir | |
233 subprocess.call(s, shell=True) | |
234 # remove old interesting genes file | |
235 #s = "rm %s/interesting_genes.txt" %(tempdir) | |
236 subprocess.call(s, shell=True) | |
237 sign_GO = manage_output(hotspot_nr, tempdir, "all") | |
238 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5]) | |
239 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1) | |
240 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1) | |
241 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n") | |
242 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n" | |
243 summary_file.write("All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t") | |
244 #print "All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t" | |
245 parent1 = 0 | |
246 parent2 = 0 | |
247 count_eqtl = 0 | |
248 # write a new interesting genes file | |
249 hotspot_nr += 1 | |
250 interesting_genes = open(tempdir+"/interesting_genes.txt",'w') | |
251 else: | |
252 if len(l) > 4: | |
253 # populate the new interesting genes file | |
254 gene_id = l[0] | |
255 parent = l[12] | |
256 # populate the new interesting genes file | |
257 if gene_id != "": | |
258 interesting_genes.write(gene_id+"\n") | |
259 count_eqtl += 1 | |
260 #if parent == parent_list[0]: | |
261 # parent1 += 1 | |
262 #if parent == parent_list[1]: | |
263 # parent2 += 1 | |
264 if float(parent) < 0: | |
265 parent1 += 1 | |
266 else: | |
267 parent2 += 1 | |
268 interesting_genes.close() | |
269 # run R script from temp directory --> for the previous hotspot | |
270 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir | |
271 subprocess.call(s, shell=True) | |
272 # remove old interesting genes file | |
273 #s = "rm %s/interesting_genes.txt" %(tempdir) | |
274 subprocess.call(s, shell=True) | |
275 sign_GO = manage_output(hotspot_nr, tempdir, "all") | |
276 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1) | |
277 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1) | |
278 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5]) | |
279 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n") | |
280 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n" | |
281 parent1 = 0 | |
282 parent2 = 0 | |
283 count_eqtl = 0 | |
284 infile2.close() | |
285 | |
286 ############ | |
287 filenames = [] | |
288 os.chdir(tempdir+"/") | |
289 for files in os.listdir("."): | |
290 if (files.endswith(".txt") and files.startswith("hotspot")): | |
291 filenames.append(files) | |
292 filenames = sorted(filenames) | |
293 | |
294 with open(tempdir+"/"+"hotspots_all_GO_tables.txt","w") as outfile: | |
295 for fname in filenames: | |
296 outfile.write("\n"+fname+"\n") | |
297 with open(tempdir+"/"+fname) as infile: | |
298 for line in infile: | |
299 outfile.write(line) | |
300 | |
301 #for f in filenames: | |
302 # os.remove(f) | |
303 | |
304 ############ | |
305 | |
306 tempdir2 = tempfile.mkdtemp() | |
307 s = "zip -j %s/output.zip %s/hotspot*" %(tempdir2,tempdir) | |
308 subprocess.call(s, shell=True) | |
309 s = "rm %s/hotspot*" %(tempdir) | |
310 subprocess.call(s, shell=True) | |
311 | |
312 # move the zipped temp directory to options.output | |
313 os.system("mv %s/output.zip %s" %(tempdir2,options.output1)) | |
314 | |
315 | |
316 ############################## cis eQTLs ############################## | |
317 hotspot_nr = 0 | |
318 i = 0 | |
319 parent1 = 0 | |
320 parent2 = 0 | |
321 count_eqtl = 0 | |
322 infile3 = open(options.input3,'r') | |
323 for line in infile3: | |
324 l = line.strip().split("\t") | |
325 if line.startswith("="): | |
326 hotspot = l[0].split("Hotspot")[1].split(" ")[1] | |
327 chr = l[0].split("Hotspot")[1].split(" ")[4] | |
328 sliding_IDs = l[1].split(": ")[1] | |
329 int_IDs = l[2].split(": ")[1] | |
330 nr_eqtl = l[3].split(" ")[1] | |
331 if hotspot_nr == 0: | |
332 summary_file.write("Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t") | |
333 #print "Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t" | |
334 if hotspot_nr != 0: | |
335 interesting_genes.close() | |
336 # run R script from temp directory --> for the previous hotspot | |
337 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir | |
338 subprocess.call(s, shell=True) | |
339 # remove old interesting genes file | |
340 #s = "rm %s/interesting_genes.txt" %(tempdir) | |
341 subprocess.call(s, shell=True) | |
342 sign_GO = manage_output(hotspot_nr, tempdir, "cis") | |
343 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5]) | |
344 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1) | |
345 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1) | |
346 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n") | |
347 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n" | |
348 summary_file.write("Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t") | |
349 #print "Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t" | |
350 parent1 = 0 | |
351 parent2 = 0 | |
352 count_eqtl = 0 | |
353 # write a new interesting genes file | |
354 hotspot_nr += 1 | |
355 interesting_genes = open(tempdir+"/interesting_genes.txt",'w') | |
356 else: | |
357 if len(l) > 4: | |
358 # populate the new interesting genes file | |
359 gene_id = l[0] | |
360 parent = l[12] | |
361 # populate the new interesting genes file | |
362 if gene_id != "": | |
363 interesting_genes.write(gene_id+"\n") | |
364 count_eqtl += 1 | |
365 #if parent == parent_list[0]: | |
366 # parent1 += 1 | |
367 #if parent == parent_list[1]: | |
368 # parent2 += 1 | |
369 if float(parent) < 0: | |
370 parent1 += 1 | |
371 else: | |
372 parent2 += 1 | |
373 interesting_genes.close() | |
374 # run R script from temp directory --> for the previous hotspot | |
375 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir | |
376 subprocess.call(s, shell=True) | |
377 # remove old interesting genes file | |
378 #s = "rm %s/interesting_genes.txt" %(tempdir) | |
379 subprocess.call(s, shell=True) | |
380 sign_GO = manage_output(hotspot_nr, tempdir, "cis") | |
381 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1) | |
382 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1) | |
383 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5]) | |
384 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n") | |
385 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n" | |
386 parent1 = 0 | |
387 parent2 = 0 | |
388 count_eqtl = 0 | |
389 infile3.close() | |
390 | |
391 ############ | |
392 filenames = [] | |
393 os.chdir(tempdir+"/") | |
394 for files in os.listdir("."): | |
395 if (files.endswith(".txt") and files.startswith("hotspot")): | |
396 filenames.append(files) | |
397 filenames = sorted(filenames) | |
398 | |
399 with open(tempdir+"/"+"hotspots_cis_GO_tables.txt","w") as outfile: | |
400 for fname in filenames: | |
401 outfile.write("\n"+fname+"\n") | |
402 with open(tempdir+"/"+fname) as infile: | |
403 for line in infile: | |
404 outfile.write(line) | |
405 | |
406 #for f in filenames: | |
407 # os.remove(f) | |
408 | |
409 ############ | |
410 | |
411 tempdir2 = tempfile.mkdtemp() | |
412 s = "zip -j %s/output.zip %s/hotspot*" %(tempdir2,tempdir) | |
413 subprocess.call(s, shell=True) | |
414 s = "rm %s/hotspot*" %(tempdir) | |
415 subprocess.call(s, shell=True) | |
416 | |
417 # move the zipped temp directory to options.output | |
418 os.system("mv %s/output.zip %s" %(tempdir2,options.output2)) | |
419 | |
420 | |
421 ############################## trans eQTLs ############################## | |
422 hotspot_nr = 0 | |
423 i = 0 | |
424 parent1 = 0 | |
425 parent2 = 0 | |
426 count_eqtl = 0 | |
427 infile4 = open(options.input4,'r') | |
428 for line in infile4: | |
429 l = line.strip().split("\t") | |
430 if line.startswith("="): | |
431 hotspot = l[0].split("Hotspot")[1].split(" ")[1] | |
432 chr = l[0].split("Hotspot")[1].split(" ")[4] | |
433 sliding_IDs = l[1].split(": ")[1] | |
434 int_IDs = l[2].split(": ")[1] | |
435 nr_eqtl = l[3].split(" ")[1] | |
436 if hotspot_nr == 0: | |
437 summary_file.write("Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t") | |
438 #print "Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t" | |
439 if hotspot_nr != 0: | |
440 interesting_genes.close() | |
441 # run R script from temp directory --> for the previous hotspot | |
442 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir | |
443 subprocess.call(s, shell=True) | |
444 # remove old interesting genes file | |
445 #s = "rm %s/interesting_genes.txt" %(tempdir) | |
446 subprocess.call(s, shell=True) | |
447 sign_GO = manage_output(hotspot_nr, tempdir, "trans") | |
448 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5]) | |
449 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1) | |
450 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1) | |
451 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n") | |
452 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n" | |
453 summary_file.write("Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t") | |
454 #print "Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t" | |
455 parent1 = 0 | |
456 parent2 = 0 | |
457 count_eqtl = 0 | |
458 # write a new interesting genes file | |
459 hotspot_nr += 1 | |
460 interesting_genes = open(tempdir+"/interesting_genes.txt",'w') | |
461 else: | |
462 if len(l) > 4: | |
463 # populate the new interesting genes file | |
464 gene_id = l[0] | |
465 parent = l[12] | |
466 # populate the new interesting genes file | |
467 if gene_id != "": | |
468 interesting_genes.write(gene_id+"\n") | |
469 count_eqtl += 1 | |
470 #if parent == parent_list[0]: | |
471 # parent1 += 1 | |
472 #if parent == parent_list[1]: | |
473 # parent2 += 1 | |
474 if float(parent) < 0: | |
475 parent1 += 1 | |
476 else: | |
477 parent2 += 1 | |
478 interesting_genes.close() | |
479 # run R script from temp directory --> for the previous hotspot | |
480 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir | |
481 subprocess.call(s, shell=True) | |
482 # remove old interesting genes file | |
483 #s = "rm %s/interesting_genes.txt" %(tempdir) | |
484 subprocess.call(s, shell=True) | |
485 sign_GO = manage_output(hotspot_nr, tempdir, "trans") | |
486 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1) | |
487 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1) | |
488 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5]) | |
489 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n") | |
490 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n" | |
491 parent1 = 0 | |
492 parent2 = 0 | |
493 count_eqtl = 0 | |
494 infile4.close() | |
495 | |
496 ############ | |
497 filenames = [] | |
498 os.chdir(tempdir+"/") | |
499 for files in os.listdir("."): | |
500 if (files.endswith(".txt") and files.startswith("hotspot")): | |
501 filenames.append(files) | |
502 filenames = sorted(filenames) | |
503 | |
504 with open(tempdir+"/"+"hotspots_trans_GO_tables.txt","w") as outfile: | |
505 for fname in filenames: | |
506 outfile.write("\n"+fname+"\n") | |
507 with open(tempdir+"/"+fname) as infile: | |
508 for line in infile: | |
509 outfile.write(line) | |
510 | |
511 #for f in filenames: | |
512 # os.remove(f) | |
513 | |
514 ############ | |
515 | |
516 tempdir2 = tempfile.mkdtemp() | |
517 s = "zip -j %s/output.zip %s/hotspot*" %(tempdir2,tempdir) | |
518 subprocess.call(s, shell=True) | |
519 s = "rm %s/hotspot*" %(tempdir) | |
520 subprocess.call(s, shell=True) | |
521 | |
522 # move the zipped temp directory to options.output | |
523 os.system("mv %s/output.zip %s" %(tempdir2,options.output3)) | |
524 summary_file.close() | |
525 | |
526 ############################################## | |
527 | |
528 if __name__=="__main__": | |
529 __main__() |