18
|
1 from mirbase_functions import *
|
|
2 from mirbase_graphs import *
|
|
3 import itertools
|
|
4 import time
|
|
5 import sys
|
|
6 import os
|
|
7 import urllib.request
|
|
8 import gzip
|
|
9 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
|
|
10 import subprocess
|
|
11 import argparse
|
|
12 from collections import OrderedDict
|
|
13 from matplotlib.backends.backend_pdf import PdfPages
|
|
14 import pandas as pd
|
|
15 from math import pi
|
|
16 import numpy as np
|
|
17 import matplotlib.pyplot as plt
|
|
18 from matplotlib.ticker import PercentFormatter
|
|
19 import seaborn as sns
|
|
20 import scipy.stats as stats
|
|
21 from plotnine import *
|
|
22 import math
|
|
23 import re
|
|
24 import matplotlib.ticker as mtick
|
|
25 import copy
|
|
26
|
|
27 subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
|
|
28
|
|
29 parser = argparse.ArgumentParser()
|
|
30 parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
|
|
31 parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
|
|
32 parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
|
|
33 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
|
|
34 parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
|
|
35 parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
|
|
36 parser.add_argument("-f", "--flag", help="choose the database", action="store")
|
|
37 parser.add_argument("-umis", "--umi", help="choose the database", action="store")
|
|
38 parser.add_argument("-percentage", "--per", help="choose the database", action="store")
|
|
39 parser.add_argument("-counts", "--count", help="choose the database", action="store")
|
|
40 parser.add_argument("-name1", "--n1", help="choose the database", action="store")
|
|
41 parser.add_argument("-name2", "--n2", help="choose the database", action="store")
|
|
42 args = parser.parse_args()
|
|
43
|
|
44
|
|
45 ###############################################################################################################################################################################################
|
|
46
|
|
47 if __name__ == '__main__':
|
|
48
|
|
49 starttime = time.time()
|
|
50
|
|
51 q1 = Queue()
|
|
52 q2 = Queue()
|
|
53 lock = Lock()
|
|
54 manager = Manager()
|
|
55
|
|
56 mature_mirnas=manager.list()
|
|
57 ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
|
|
58 ps_mature.start()
|
|
59
|
|
60 args.control[0]=args.control[0][1:]
|
|
61 args.control[len(args.control)-1][:-1]
|
|
62 control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
|
|
63
|
|
64 args.treated[0]=args.treated[0][1:]
|
|
65 args.treated[len(args.treated)-1][:-1]
|
|
66 treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
|
|
67
|
|
68
|
|
69 ############## Detection of templated isoforms ################
|
|
70
|
|
71 radar = manager.list([0,0,0,0])
|
|
72 samples = manager.list()
|
|
73 data= manager.list()
|
|
74 names_con=manager.list()
|
|
75 samples_mirna_names=manager.list()
|
|
76 deseq=manager.list()
|
|
77 unmap_seq=manager.Value('i',0)
|
|
78 unmap_counts=manager.Value('i',0)
|
|
79 LH2E_names=manager.list()
|
|
80 ini_c_samples = manager.list()
|
|
81
|
|
82
|
|
83 radar1 = manager.list([0,0,0,0])
|
|
84 samples1 = manager.list()
|
|
85 data1 = manager.list()
|
|
86 names_tre = manager.list()
|
|
87 samples_mirna_names1=manager.list()
|
|
88 deseq1=manager.list()
|
|
89 unmap1_seq = manager.Value('i',0)
|
|
90 unmap1_counts = manager.Value('i',0)
|
|
91 LH8E_names=manager.list()
|
|
92 ini_t_samples = manager.list()
|
|
93 ps_mature.join()
|
|
94
|
|
95
|
|
96 mature_mirnas=list(mature_mirnas)
|
|
97
|
|
98
|
|
99 starttime1 = time.time()
|
|
100 ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
|
|
101 ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
|
|
102
|
|
103 [p.start() for p in ps_sam]
|
|
104 [p.join() for p in ps_sam]
|
|
105 print('SAM took {} seconds'.format(time.time() - starttime1))
|
|
106
|
|
107 ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
|
|
108 ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
|
|
109 [x.start() for x in ps_hist]
|
|
110
|
|
111 starttime200=time.time()
|
|
112
|
|
113 sc = list(samples)
|
|
114 st = list(samples1)
|
|
115
|
|
116 names_con=list(names_con)
|
|
117 names_tre=list(names_tre)
|
|
118 samples_mirna_names=list(samples_mirna_names)
|
|
119 samples_mirna_names.sort()
|
|
120 samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
|
|
121
|
|
122 samples_mirna_names1=list(samples_mirna_names1)
|
|
123 samples_mirna_names1.sort()
|
|
124 samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
|
|
125
|
|
126 deseq=list(deseq)
|
|
127 deseq1=list(deseq1)
|
|
128
|
|
129 new_names_con=manager.list()
|
|
130 new_names_tre=manager.list()
|
|
131 new_deseq=manager.list()
|
|
132 new_deseq1=manager.list()
|
|
133 ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
|
|
134 ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
|
|
135
|
|
136 [z.start() for z in ps_deseq]
|
|
137 [z.join() for z in ps_deseq]
|
|
138 new_deseq=list(new_deseq)
|
|
139 new_deseq1=list(new_deseq1)
|
|
140
|
|
141 LH2E=[[x[0],x[2]] for x in new_deseq[0]]
|
|
142 [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
|
|
143
|
|
144 LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
|
|
145 [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
|
|
146
|
|
147 print('Deseq took {} seconds'.format(time.time() - starttime200))
|
|
148
|
|
149 merg_nam_LH2E=manager.list()
|
|
150 merg_nam_LH8E=manager.list()
|
|
151
|
|
152 LH2E_copy=copy.deepcopy(list(LH2E))
|
|
153 LH8E_copy=copy.deepcopy(list(LH8E))
|
|
154
|
|
155 fil_sort_tre=manager.list()
|
|
156 fil_sort_con=manager.list()
|
|
157 raw_sort_tre=manager.list()
|
|
158 raw_sort_con=manager.list()
|
|
159
|
|
160 ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con))
|
|
161 ps_main.start()
|
|
162
|
|
163 if args.anal=="2":
|
|
164 all_iso = manager.list()
|
|
165 ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
|
|
166 ps_non_iso.start()
|
|
167
|
|
168 ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
|
|
169 ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
|
|
170 [x.start() for x in ps_merge]
|
|
171 [x.join() for x in ps_merge]
|
|
172
|
|
173 merg_nam_LH2E=list(merg_nam_LH2E)
|
|
174 merg_nam_LH8E=list(merg_nam_LH8E)
|
|
175
|
|
176 starttime2 = time.time()
|
|
177 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
|
|
178 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
|
|
179 procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
|
|
180 if args.anal == "1":
|
|
181 procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
|
|
182
|
|
183 [p.start() for p in procs]
|
|
184
|
|
185
|
|
186 if args.anal=="1":
|
|
187 [x.join() for x in ps_hist]
|
|
188 [p.join() for p in procs]
|
|
189 ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
|
|
190 ps_pdf.start()
|
|
191
|
|
192 print('Graphs took {} seconds'.format(time.time() - starttime2))
|
|
193
|
|
194 ps_main.join()
|
|
195
|
|
196 fil_sort_con=list(fil_sort_con)
|
|
197 fil_sort_tre=list(fil_sort_tre)
|
|
198 if fil_sort_con==[]:
|
|
199 fil_sort_con=raw_sort_con
|
|
200 fil_sort_tre=raw_sort_tre
|
|
201
|
|
202 raw_sort_con=list(raw_sort_con)
|
|
203 raw_sort_tre=list(raw_sort_tre)
|
|
204 names_con=list(new_names_con)
|
|
205 names_tre=list(new_names_tre)
|
|
206
|
|
207 ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1))
|
|
208 ps_write.start()
|
|
209
|
|
210 ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
|
|
211 ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
|
|
212 [p.start() for p in ps1_matrix]
|
|
213
|
|
214 if args.anal=="1":
|
|
215 ps_pdf.join()
|
|
216 if args.anal=="2":
|
|
217 [p.join() for p in procs]
|
|
218 [x.join() for x in ps_hist]
|
|
219
|
|
220 ps_write.join()
|
|
221 [p.join() for p in ps1_matrix]
|
|
222
|
|
223
|
|
224 ############################## Detection of Both #######################################
|
|
225
|
|
226 starttime10 = time.time()
|
|
227
|
|
228 if args.anal == "2":
|
|
229
|
|
230 n_data= manager.list()
|
|
231 n_names_con=manager.list()
|
|
232 n_samples_mirna_names=manager.list()
|
|
233 n_deseq=manager.list()
|
|
234 n_LH2E_names=manager.list()
|
|
235
|
|
236 n_data1 = manager.list()
|
|
237 n_names_tre = manager.list()
|
|
238 n_samples_mirna_names1=manager.list()
|
|
239 n_deseq1=manager.list()
|
|
240 n_LH8E_names=manager.list()
|
|
241
|
|
242
|
|
243 new_mat_mirnas = list(mature_mirnas)
|
|
244 ps_non_iso.join()
|
|
245
|
|
246 all_iso=list(all_iso)
|
|
247 new_mat_mirnas.extend(all_iso)
|
|
248
|
|
249 starttime11=time.time()
|
|
250
|
|
251 ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
|
|
252 ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
|
|
253
|
|
254 [p.start() for p in ps_sam]
|
|
255 [p.join() for p in ps_sam]
|
|
256
|
|
257 print('Non-sam took {} seconds'.format(time.time() - starttime11))
|
|
258
|
|
259 starttime12=time.time()
|
|
260
|
|
261 n_names_con=list(n_names_con)
|
|
262 n_names_tre=list(n_names_tre)
|
|
263 n_samples_mirna_names=list(n_samples_mirna_names)
|
|
264 n_samples_mirna_names.sort()
|
|
265 n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
|
|
266
|
|
267 n_samples_mirna_names1=list(n_samples_mirna_names1)
|
|
268 n_samples_mirna_names1.sort()
|
|
269 n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
|
|
270
|
|
271 n_deseq=list(n_deseq)
|
|
272 n_deseq1=list(n_deseq1)
|
|
273
|
|
274 new_n_names_con=manager.list()
|
|
275 new_n_names_tre=manager.list()
|
|
276 n_new_deseq=manager.list()
|
|
277 n_new_deseq1=manager.list()
|
|
278 ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
|
|
279 ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
|
|
280
|
|
281 [x.start() for x in ps_deseq]
|
|
282 [x.join() for x in ps_deseq]
|
|
283 n_new_deseq=list(n_new_deseq)
|
|
284 n_new_deseq1=list(n_new_deseq1)
|
|
285
|
|
286 n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
|
|
287 [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
|
|
288
|
|
289 n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
|
|
290 [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
|
|
291
|
|
292 print('Non-deseq took {} seconds'.format(time.time() - starttime12))
|
|
293
|
|
294 merg_nam_n_LH2E=manager.list()
|
|
295 merg_nam_n_LH8E=manager.list()
|
|
296
|
|
297 n_LH2E_copy=copy.deepcopy(list(n_LH2E))
|
|
298 n_LH8E_copy=copy.deepcopy(list(n_LH8E))
|
|
299
|
|
300 n_sort_tre=manager.list()
|
|
301 n_sort_con=manager.list()
|
|
302
|
|
303 n_fil_sort_con=manager.list()
|
|
304 n_fil_sort_tre=manager.list()
|
|
305 n_raw_sort_con=manager.list()
|
|
306 n_raw_sort_tre=manager.list()
|
|
307
|
|
308 ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con))
|
|
309 ps_main.start()
|
|
310
|
|
311 ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
|
|
312 ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
|
|
313 [p.start() for p in ps_merge]
|
|
314 [p.join() for p in ps_merge]
|
|
315
|
|
316 merg_nam_n_LH2E=list(merg_nam_n_LH2E)
|
|
317 merg_nam_n_LH8E=list(merg_nam_n_LH8E)
|
|
318
|
|
319 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
|
|
320 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
|
|
321 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
|
|
322 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
|
|
323 procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
|
|
324
|
|
325 starttime13=time.time()
|
|
326 [p.start() for p in procs]
|
|
327 [p.join() for p in procs]
|
|
328
|
|
329 print('Graphs took {} seconds'.format(time.time() - starttime13))
|
|
330
|
|
331 procs1 = Process(target=pdf_before_DE,args=(args.anal))
|
|
332 procs1.start()
|
|
333
|
|
334 starttime14=time.time()
|
|
335 ps_main.join()
|
|
336
|
|
337 n_fil_sort_con=list(n_fil_sort_con)
|
|
338 n_fil_sort_tre=list(n_fil_sort_tre)
|
|
339 if n_fil_sort_con==[]:
|
|
340 n_fil_sort_con=n_raw_sort_con
|
|
341 n_fil_sort_tre=n_raw_sort_tre
|
|
342
|
|
343 n_raw_sort_con=list(n_raw_sort_con)
|
|
344 n_raw_sort_tre=list(n_raw_sort_tre)
|
|
345 n_names_con=list(new_n_names_con)
|
|
346 n_names_tre=list(new_n_names_tre)
|
|
347
|
|
348 ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2))
|
|
349 ps_write.start()
|
|
350
|
|
351 ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
|
|
352 ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
|
|
353 [p.start() for p in ps1_matrix]
|
|
354
|
|
355 ps_write.join()
|
|
356 [p.join() for p in ps1_matrix]
|
|
357 procs1.join()
|
|
358 print('That took {} seconds'.format(time.time() - starttime10))
|
|
359 print('That took {} seconds'.format(time.time() - starttime))
|
|
360
|
|
361
|
|
362
|
|
363
|
|
364
|
|
365
|
|
366
|
|
367
|