0
|
1 """
|
|
2 @summary: GO enrichment analysis (hotspots)
|
|
3 @author: nanette.coetzer@gmail.com
|
|
4 @version 5
|
|
5
|
|
6 """
|
|
7
|
|
8 import optparse, sys
|
|
9 import subprocess
|
|
10 import tempfile
|
|
11 import os, re
|
|
12
|
|
13 def stop_err( msg ):
|
|
14 sys.stderr.write( "%s\n" % msg )
|
|
15 sys.exit()
|
|
16
|
|
17 def __main__():
|
|
18 #Parse Command Line
|
|
19 parser = optparse.OptionParser()
|
|
20 parser.add_option("-n", "--rscript", default=None, dest="rscript",
|
|
21 help="R script")
|
|
22 parser.add_option("-i", "--input1", default=None, dest="input1",
|
|
23 help="Gene universe (Gene positions file)")
|
|
24 parser.add_option("-j", "--input2", default=None, dest="input2",
|
|
25 help="Hotspots genelists all eQTLs")
|
|
26 parser.add_option("-k", "--input3", default=None, dest="input3",
|
|
27 help="Hotspots genelists cis-eQTLs")
|
|
28 parser.add_option("-l", "--input4", default=None, dest="input4",
|
|
29 help="Hotspots genelists trans-eQTLs")
|
|
30 parser.add_option("-m", "--input5", default=None, dest="input5",
|
|
31 help="Gene2GO mapping")
|
|
32 parser.add_option("-o", "--output1", default=None, dest="output1",
|
|
33 help="GO enrichment output; all eQTLs")
|
|
34 parser.add_option("-p", "--output2", default=None, dest="output2",
|
|
35 help="GO enrichment output; cis eQTLs")
|
|
36 parser.add_option("-q", "--output3", default=None, dest="output3",
|
|
37 help="GO enrichment output; trans eQTLs")
|
|
38 parser.add_option("-r", "--output4", default=None, dest="output4",
|
|
39 help="Hotspot summary file")
|
|
40
|
|
41 (options, args) = parser.parse_args()
|
|
42
|
|
43 try:
|
|
44 open(options.input1, "r").close()
|
|
45 except TypeError, e:
|
|
46 stop_err("You need to supply the Gene Universe file:\n" + str(e))
|
|
47 except IOError, e:
|
|
48 stop_err("Can not open the Gene Universe file:\n" + str(e))
|
|
49
|
|
50 try:
|
|
51 open(options.input2, "r").close()
|
|
52 except TypeError, e:
|
|
53 stop_err("You need to supply the Genes of Interest file (all eQTLs):\n" + str(e))
|
|
54 except IOError, e:
|
|
55 stop_err("Can not open the Hotspots genelists file (all eQTLs):\n" + str(e))
|
|
56
|
|
57 try:
|
|
58 open(options.input3, "r").close()
|
|
59 except TypeError, e:
|
|
60 stop_err("You need to supply the Genes of Interest file (cis-eQTLs):\n" + str(e))
|
|
61 except IOError, e:
|
|
62 stop_err("Can not open the Hotspots genelists file (cis-eQTLs):\n" + str(e))
|
|
63
|
|
64 try:
|
|
65 open(options.input4, "r").close()
|
|
66 except TypeError, e:
|
|
67 stop_err("You need to supply the Genes of Interest file (trnas-eQTLs):\n" + str(e))
|
|
68 except IOError, e:
|
|
69 stop_err("Can not open the Hotspots genelists file (trans-eQTLs):\n" + str(e))
|
|
70
|
|
71 try:
|
|
72 open(options.input5, "r").close()
|
|
73 except TypeError, e:
|
|
74 stop_err("You need to supply the Gene2GO mapping file:\n" + str(e))
|
|
75 except IOError, e:
|
|
76 stop_err("Can not open the Gene2GO mapping file:\n" + str(e))
|
|
77
|
|
78 ##########################################################
|
|
79
|
|
80 def uniq(input):
|
|
81 output = []
|
|
82 for x in input:
|
|
83 if x not in output:
|
|
84 output.append(x)
|
|
85 return output
|
|
86
|
|
87 def manage_output(hotspot_nr, tempdir, type):
|
|
88 sign_list = []; p_BP = 0; adjp_BP = 0; p_MF = 0; adjp_MF = 0; p_CC = 0; adjp_CC = 0
|
|
89
|
|
90 bpfile = open(tempdir+"/GO_table_BP.txt","r")
|
|
91 new_file = open(tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_GO_table.txt","w")
|
|
92 i = 0
|
|
93 stars = 0
|
|
94 for line in bpfile:
|
|
95 i += 1
|
|
96 if i > 1:
|
|
97 l = line.strip().split("\t")
|
|
98 new_file.write("\t".join(l[1:])+"\n")
|
|
99 if l[9] == "\"*\"":
|
|
100 p_BP += 1
|
|
101 if l[10] == "\"*\"":
|
|
102 adjp_BP += 1
|
|
103 else:
|
|
104 l = line.strip().split("\t")
|
|
105 l[0] = "BP_GO.ID"
|
|
106 newl = "\t".join(l)
|
|
107 new_file.write(newl+"\n")
|
|
108 bpfile.close()
|
|
109 stars = 5
|
|
110 name = "tGO_BP_classic_"+str(stars)+"_all.pdf"
|
|
111 full_name = tempdir+"/"+name
|
|
112 new_name = tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_"+name
|
|
113 s = "mv %s %s" %(full_name, new_name)
|
|
114 subprocess.call(s, shell=True)
|
|
115 sign_list.append(p_BP)
|
|
116 sign_list.append(adjp_BP)
|
|
117
|
|
118 mffile = open(tempdir+"/GO_table_MF.txt","r")
|
|
119 i = 0
|
|
120 stars = 0
|
|
121 for line in mffile:
|
|
122 i += 1
|
|
123 if i > 1:
|
|
124 l = line.strip().split("\t")
|
|
125 new_file.write("\t".join(l[1:])+"\n")
|
|
126 if l[9] == "\"*\"":
|
|
127 p_MF += 1
|
|
128 if l[10] == "\"*\"":
|
|
129 adjp_MF += 1
|
|
130 else:
|
|
131 l = line.strip().split("\t")
|
|
132 l[0] = "MF_GO.ID"
|
|
133 newl = "\t".join(l)
|
|
134 new_file.write("\n"+newl+"\n")
|
|
135 mffile.close()
|
|
136 stars = 5
|
|
137 name = "tGO_MF_classic_"+str(stars)+"_all.pdf"
|
|
138 full_name = tempdir+"/"+name
|
|
139 new_name = tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_"+name
|
|
140 s = "mv %s %s" %(full_name, new_name)
|
|
141 subprocess.call(s, shell=True)
|
|
142 sign_list.append(p_MF)
|
|
143 sign_list.append(adjp_MF)
|
|
144
|
|
145 ccfile = open(tempdir+"/GO_table_CC.txt","r")
|
|
146 i = 0
|
|
147 stars = 0
|
|
148 for line in ccfile:
|
|
149 i += 1
|
|
150 if i > 1:
|
|
151 l = line.strip().split("\t")
|
|
152 new_file.write("\t".join(l[1:])+"\n")
|
|
153 if l[9] == "\"*\"":
|
|
154 p_CC += 1
|
|
155 if l[10] == "\"*\"":
|
|
156 adjp_CC += 1
|
|
157 else:
|
|
158 l = line.strip().split("\t")
|
|
159 l[0] = "CC_GO.ID"
|
|
160 newl = "\t".join(l)
|
|
161 new_file.write("\n"+newl+"\n")
|
|
162 ccfile.close()
|
|
163 stars = 5
|
|
164 name = "tGO_CC_classic_"+str(stars)+"_all.pdf"
|
|
165 full_name = tempdir+"/"+name
|
|
166 new_name = tempdir+"/hotspot"+str(hotspot_nr)+"_"+str(type)+"_"+name
|
|
167 s = "mv %s %s" %(full_name, new_name)
|
|
168 subprocess.call(s, shell=True)
|
|
169 new_file.close()
|
|
170 sign_list.append(p_CC)
|
|
171 sign_list.append(adjp_CC)
|
|
172 return sign_list
|
|
173
|
|
174 # Create temp direcotry
|
|
175 tempdir = tempfile.mkdtemp()
|
|
176 #print tempdir
|
|
177 #fixdir = "/lustre/galchew/MeQTL/eQTL_pipeline"
|
|
178 #fixdir = "/Users/nanettecoetzer/Documents/Bioinformatics/MAIZE_project/Hotspots_eQTLs/enrichment_analysis/TopGO/add_to_hotspots_pipeline"
|
|
179
|
|
180 # copy INPUT file to the temp directory
|
|
181 s = "cp %s %s/gene_universe.txt" %(options.input1, tempdir)
|
|
182 subprocess.call(s, shell=True)
|
|
183 s = "cp %s %s/gene2GO_mapping.map" %(options.input5, tempdir)
|
|
184 subprocess.call(s, shell=True)
|
|
185
|
|
186 # create R script => save in temp directory
|
|
187 # generate new header
|
|
188 new_script = open(tempdir+"/new_script.r","w")
|
|
189 header = "setwd(\"%s\")" %tempdir
|
|
190 new_script.write(header+"\n")
|
|
191 # add script body
|
|
192 script = open(options.rscript,"r")
|
|
193 #script = open(fixdir+"/TopGO_pipeline_new.txt","r")
|
|
194 for line in script:
|
|
195 new_script.write(line.strip()+"\n")
|
|
196 new_script.close()
|
|
197
|
|
198 # get 2 parents
|
|
199 #infile = open(options.input2,'r')
|
|
200 #parents = []
|
|
201 #for line in infile:
|
|
202 # l = line.strip().split("\t")
|
|
203 # if not line.startswith("=") and len(l) > 4:
|
|
204 # parents.append(l[12])
|
|
205 #parent_list = uniq(parents)
|
|
206 #infile.close()
|
|
207
|
|
208 summary_file = open(options.output4,'w')
|
|
209 summary_file.write("Type\tNumber\tChr\tSliding_IDs\tInterval_IDs\tNum_eQTLs\tParent_neg_additive\tParent_pos_additive\tBP_pVal<0.01\tBP_adj_pVal<0.05\tMF_pVal<0.01\tMF_adj_pVal<0.05\tCC_pVal<0.01\tCC_adj_pVal<0.05\n")
|
|
210
|
|
211 ############################## all eQTLs ##############################
|
|
212 hotspot_nr = 0
|
|
213 i = 0
|
|
214 parent1 = 0 # Parent_neg_additive
|
|
215 parent2 = 0 # Parent_pos_additive
|
|
216 count_eqtl = 0
|
|
217 infile2 = open(options.input2,'r')
|
|
218 for line in infile2:
|
|
219 l = line.strip().split("\t")
|
|
220 if line.startswith("="):
|
|
221 hotspot = l[0].split("Hotspot")[1].split(" ")[1]
|
|
222 chr = l[0].split("Hotspot")[1].split(" ")[4]
|
|
223 sliding_IDs = l[1].split(": ")[1]
|
|
224 int_IDs = l[2].split(": ")[1]
|
|
225 nr_eqtl = l[3].split(" ")[1]
|
|
226 if hotspot_nr == 0:
|
|
227 summary_file.write("All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t")
|
|
228 #print "All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t"
|
|
229 if hotspot_nr != 0:
|
|
230 interesting_genes.close()
|
|
231 # run R script from temp directory --> for the previous hotspot
|
|
232 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
|
|
233 subprocess.call(s, shell=True)
|
|
234 # remove old interesting genes file
|
|
235 #s = "rm %s/interesting_genes.txt" %(tempdir)
|
|
236 subprocess.call(s, shell=True)
|
|
237 sign_GO = manage_output(hotspot_nr, tempdir, "all")
|
|
238 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5])
|
|
239 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1)
|
|
240 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1)
|
|
241 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n")
|
|
242 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n"
|
|
243 summary_file.write("All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t")
|
|
244 #print "All\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t"
|
|
245 parent1 = 0
|
|
246 parent2 = 0
|
|
247 count_eqtl = 0
|
|
248 # write a new interesting genes file
|
|
249 hotspot_nr += 1
|
|
250 interesting_genes = open(tempdir+"/interesting_genes.txt",'w')
|
|
251 else:
|
|
252 if len(l) > 4:
|
|
253 # populate the new interesting genes file
|
|
254 gene_id = l[0]
|
|
255 parent = l[12]
|
|
256 # populate the new interesting genes file
|
|
257 if gene_id != "":
|
|
258 interesting_genes.write(gene_id+"\n")
|
|
259 count_eqtl += 1
|
|
260 #if parent == parent_list[0]:
|
|
261 # parent1 += 1
|
|
262 #if parent == parent_list[1]:
|
|
263 # parent2 += 1
|
|
264 if float(parent) < 0:
|
|
265 parent1 += 1
|
|
266 else:
|
|
267 parent2 += 1
|
|
268 interesting_genes.close()
|
|
269 # run R script from temp directory --> for the previous hotspot
|
|
270 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
|
|
271 subprocess.call(s, shell=True)
|
|
272 # remove old interesting genes file
|
|
273 #s = "rm %s/interesting_genes.txt" %(tempdir)
|
|
274 subprocess.call(s, shell=True)
|
|
275 sign_GO = manage_output(hotspot_nr, tempdir, "all")
|
|
276 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1)
|
|
277 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1)
|
|
278 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5])
|
|
279 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n")
|
|
280 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n"
|
|
281 parent1 = 0
|
|
282 parent2 = 0
|
|
283 count_eqtl = 0
|
|
284 infile2.close()
|
|
285
|
|
286 ############
|
|
287 filenames = []
|
|
288 os.chdir(tempdir+"/")
|
|
289 for files in os.listdir("."):
|
|
290 if (files.endswith(".txt") and files.startswith("hotspot")):
|
|
291 filenames.append(files)
|
|
292 filenames = sorted(filenames)
|
|
293
|
|
294 with open(tempdir+"/"+"hotspots_all_GO_tables.txt","w") as outfile:
|
|
295 for fname in filenames:
|
|
296 outfile.write("\n"+fname+"\n")
|
|
297 with open(tempdir+"/"+fname) as infile:
|
|
298 for line in infile:
|
|
299 outfile.write(line)
|
|
300
|
|
301 #for f in filenames:
|
|
302 # os.remove(f)
|
|
303
|
|
304 ############
|
|
305
|
|
306 tempdir2 = tempfile.mkdtemp()
|
|
307 s = "zip -j %s/output.zip %s/hotspot*" %(tempdir2,tempdir)
|
|
308 subprocess.call(s, shell=True)
|
|
309 s = "rm %s/hotspot*" %(tempdir)
|
|
310 subprocess.call(s, shell=True)
|
|
311
|
|
312 # move the zipped temp directory to options.output
|
|
313 os.system("mv %s/output.zip %s" %(tempdir2,options.output1))
|
|
314
|
|
315
|
|
316 ############################## cis eQTLs ##############################
|
|
317 hotspot_nr = 0
|
|
318 i = 0
|
|
319 parent1 = 0
|
|
320 parent2 = 0
|
|
321 count_eqtl = 0
|
|
322 infile3 = open(options.input3,'r')
|
|
323 for line in infile3:
|
|
324 l = line.strip().split("\t")
|
|
325 if line.startswith("="):
|
|
326 hotspot = l[0].split("Hotspot")[1].split(" ")[1]
|
|
327 chr = l[0].split("Hotspot")[1].split(" ")[4]
|
|
328 sliding_IDs = l[1].split(": ")[1]
|
|
329 int_IDs = l[2].split(": ")[1]
|
|
330 nr_eqtl = l[3].split(" ")[1]
|
|
331 if hotspot_nr == 0:
|
|
332 summary_file.write("Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t")
|
|
333 #print "Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t"
|
|
334 if hotspot_nr != 0:
|
|
335 interesting_genes.close()
|
|
336 # run R script from temp directory --> for the previous hotspot
|
|
337 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
|
|
338 subprocess.call(s, shell=True)
|
|
339 # remove old interesting genes file
|
|
340 #s = "rm %s/interesting_genes.txt" %(tempdir)
|
|
341 subprocess.call(s, shell=True)
|
|
342 sign_GO = manage_output(hotspot_nr, tempdir, "cis")
|
|
343 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5])
|
|
344 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1)
|
|
345 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1)
|
|
346 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n")
|
|
347 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n"
|
|
348 summary_file.write("Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t")
|
|
349 #print "Cis\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t"
|
|
350 parent1 = 0
|
|
351 parent2 = 0
|
|
352 count_eqtl = 0
|
|
353 # write a new interesting genes file
|
|
354 hotspot_nr += 1
|
|
355 interesting_genes = open(tempdir+"/interesting_genes.txt",'w')
|
|
356 else:
|
|
357 if len(l) > 4:
|
|
358 # populate the new interesting genes file
|
|
359 gene_id = l[0]
|
|
360 parent = l[12]
|
|
361 # populate the new interesting genes file
|
|
362 if gene_id != "":
|
|
363 interesting_genes.write(gene_id+"\n")
|
|
364 count_eqtl += 1
|
|
365 #if parent == parent_list[0]:
|
|
366 # parent1 += 1
|
|
367 #if parent == parent_list[1]:
|
|
368 # parent2 += 1
|
|
369 if float(parent) < 0:
|
|
370 parent1 += 1
|
|
371 else:
|
|
372 parent2 += 1
|
|
373 interesting_genes.close()
|
|
374 # run R script from temp directory --> for the previous hotspot
|
|
375 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
|
|
376 subprocess.call(s, shell=True)
|
|
377 # remove old interesting genes file
|
|
378 #s = "rm %s/interesting_genes.txt" %(tempdir)
|
|
379 subprocess.call(s, shell=True)
|
|
380 sign_GO = manage_output(hotspot_nr, tempdir, "cis")
|
|
381 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1)
|
|
382 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1)
|
|
383 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5])
|
|
384 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n")
|
|
385 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n"
|
|
386 parent1 = 0
|
|
387 parent2 = 0
|
|
388 count_eqtl = 0
|
|
389 infile3.close()
|
|
390
|
|
391 ############
|
|
392 filenames = []
|
|
393 os.chdir(tempdir+"/")
|
|
394 for files in os.listdir("."):
|
|
395 if (files.endswith(".txt") and files.startswith("hotspot")):
|
|
396 filenames.append(files)
|
|
397 filenames = sorted(filenames)
|
|
398
|
|
399 with open(tempdir+"/"+"hotspots_cis_GO_tables.txt","w") as outfile:
|
|
400 for fname in filenames:
|
|
401 outfile.write("\n"+fname+"\n")
|
|
402 with open(tempdir+"/"+fname) as infile:
|
|
403 for line in infile:
|
|
404 outfile.write(line)
|
|
405
|
|
406 #for f in filenames:
|
|
407 # os.remove(f)
|
|
408
|
|
409 ############
|
|
410
|
|
411 tempdir2 = tempfile.mkdtemp()
|
|
412 s = "zip -j %s/output.zip %s/hotspot*" %(tempdir2,tempdir)
|
|
413 subprocess.call(s, shell=True)
|
|
414 s = "rm %s/hotspot*" %(tempdir)
|
|
415 subprocess.call(s, shell=True)
|
|
416
|
|
417 # move the zipped temp directory to options.output
|
|
418 os.system("mv %s/output.zip %s" %(tempdir2,options.output2))
|
|
419
|
|
420
|
|
421 ############################## trans eQTLs ##############################
|
|
422 hotspot_nr = 0
|
|
423 i = 0
|
|
424 parent1 = 0
|
|
425 parent2 = 0
|
|
426 count_eqtl = 0
|
|
427 infile4 = open(options.input4,'r')
|
|
428 for line in infile4:
|
|
429 l = line.strip().split("\t")
|
|
430 if line.startswith("="):
|
|
431 hotspot = l[0].split("Hotspot")[1].split(" ")[1]
|
|
432 chr = l[0].split("Hotspot")[1].split(" ")[4]
|
|
433 sliding_IDs = l[1].split(": ")[1]
|
|
434 int_IDs = l[2].split(": ")[1]
|
|
435 nr_eqtl = l[3].split(" ")[1]
|
|
436 if hotspot_nr == 0:
|
|
437 summary_file.write("Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t")
|
|
438 #print "Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t"
|
|
439 if hotspot_nr != 0:
|
|
440 interesting_genes.close()
|
|
441 # run R script from temp directory --> for the previous hotspot
|
|
442 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
|
|
443 subprocess.call(s, shell=True)
|
|
444 # remove old interesting genes file
|
|
445 #s = "rm %s/interesting_genes.txt" %(tempdir)
|
|
446 subprocess.call(s, shell=True)
|
|
447 sign_GO = manage_output(hotspot_nr, tempdir, "trans")
|
|
448 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5])
|
|
449 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1)
|
|
450 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1)
|
|
451 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n")
|
|
452 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n"
|
|
453 summary_file.write("Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t")
|
|
454 #print "Trans\t"+hotspot+"\t"+chr+"\t"+sliding_IDs+"\t"+int_IDs+"\t"+nr_eqtl+"\t"
|
|
455 parent1 = 0
|
|
456 parent2 = 0
|
|
457 count_eqtl = 0
|
|
458 # write a new interesting genes file
|
|
459 hotspot_nr += 1
|
|
460 interesting_genes = open(tempdir+"/interesting_genes.txt",'w')
|
|
461 else:
|
|
462 if len(l) > 4:
|
|
463 # populate the new interesting genes file
|
|
464 gene_id = l[0]
|
|
465 parent = l[12]
|
|
466 # populate the new interesting genes file
|
|
467 if gene_id != "":
|
|
468 interesting_genes.write(gene_id+"\n")
|
|
469 count_eqtl += 1
|
|
470 #if parent == parent_list[0]:
|
|
471 # parent1 += 1
|
|
472 #if parent == parent_list[1]:
|
|
473 # parent2 += 1
|
|
474 if float(parent) < 0:
|
|
475 parent1 += 1
|
|
476 else:
|
|
477 parent2 += 1
|
|
478 interesting_genes.close()
|
|
479 # run R script from temp directory --> for the previous hotspot
|
|
480 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
|
|
481 subprocess.call(s, shell=True)
|
|
482 # remove old interesting genes file
|
|
483 #s = "rm %s/interesting_genes.txt" %(tempdir)
|
|
484 subprocess.call(s, shell=True)
|
|
485 sign_GO = manage_output(hotspot_nr, tempdir, "trans")
|
|
486 percentage_1 = round(int(parent1)/float(count_eqtl)*100,1)
|
|
487 percentage_2 = round(int(parent2)/float(count_eqtl)*100,1)
|
|
488 sGO = str(sign_GO[0])+"\t"+str(sign_GO[1])+"\t"+str(sign_GO[2])+"\t"+str(sign_GO[3])+"\t"+str(sign_GO[4])+"\t"+str(sign_GO[5])
|
|
489 summary_file.write(str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n")
|
|
490 #print str(percentage_1)+"\t"+str(percentage_2)+"\t"+str(sGO)+"\n"
|
|
491 parent1 = 0
|
|
492 parent2 = 0
|
|
493 count_eqtl = 0
|
|
494 infile4.close()
|
|
495
|
|
496 ############
|
|
497 filenames = []
|
|
498 os.chdir(tempdir+"/")
|
|
499 for files in os.listdir("."):
|
|
500 if (files.endswith(".txt") and files.startswith("hotspot")):
|
|
501 filenames.append(files)
|
|
502 filenames = sorted(filenames)
|
|
503
|
|
504 with open(tempdir+"/"+"hotspots_trans_GO_tables.txt","w") as outfile:
|
|
505 for fname in filenames:
|
|
506 outfile.write("\n"+fname+"\n")
|
|
507 with open(tempdir+"/"+fname) as infile:
|
|
508 for line in infile:
|
|
509 outfile.write(line)
|
|
510
|
|
511 #for f in filenames:
|
|
512 # os.remove(f)
|
|
513
|
|
514 ############
|
|
515
|
|
516 tempdir2 = tempfile.mkdtemp()
|
|
517 s = "zip -j %s/output.zip %s/hotspot*" %(tempdir2,tempdir)
|
|
518 subprocess.call(s, shell=True)
|
|
519 s = "rm %s/hotspot*" %(tempdir)
|
|
520 subprocess.call(s, shell=True)
|
|
521
|
|
522 # move the zipped temp directory to options.output
|
|
523 os.system("mv %s/output.zip %s" %(tempdir2,options.output3))
|
|
524 summary_file.close()
|
|
525
|
|
526 ##############################################
|
|
527
|
|
528 if __name__=="__main__":
|
|
529 __main__()
|