comparison mirgene_ultra_v2.py @ 4:88b3ef865431 draft

Uploaded
author glogobyte
date Fri, 16 Oct 2020 18:54:54 +0000
parents
children
comparison
equal deleted inserted replaced
3:106c4aea4650 4:88b3ef865431
1 from mirgene_functions import *
2 from mirgene_graphs import *
3 import itertools
4 import time
5 import sys
6 import os
7 import urllib.request
8 import gzip
9 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
10 import subprocess
11 import argparse
12 from collections import OrderedDict
13 from matplotlib.backends.backend_pdf import PdfPages
14 import pandas as pd
15 from math import pi
16 import numpy as np
17 import matplotlib.pyplot as plt
18 from matplotlib.ticker import PercentFormatter
19 import seaborn as sns
20 import scipy.stats as stats
21 from plotnine import *
22 import math
23 import re
24 import matplotlib.ticker as mtick
25 import copy
26
27 subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
28
29 parser = argparse.ArgumentParser()
30 parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
31 parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
32 parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
33 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
34 parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
35 parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
36 parser.add_argument("-f", "--flag", help="choose the database", action="store")
37 parser.add_argument("-umis", "--umi", help="choose the database", action="store")
38 parser.add_argument("-percentage", "--per", help="choose the database", action="store")
39 parser.add_argument("-counts", "--count", help="choose the database", action="store")
40 parser.add_argument("-name1", "--n1", help="choose the database", action="store")
41 parser.add_argument("-name2", "--n2", help="choose the database", action="store")
42 args = parser.parse_args()
43
44 #########################################################################3###############################################################################################################################################################################################
45
46 if __name__ == '__main__':
47
48 starttime = time.time()
49
50 q1 = Queue()
51 q2 = Queue()
52 lock = Lock()
53 manager = Manager()
54
55 mature_mirnas=manager.list()
56 ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
57 ps_mature.start()
58
59 args.control[0]=args.control[0][1:]
60 args.control[len(args.control)-1][:-1]
61 control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
62
63 args.treated[0]=args.treated[0][1:]
64 args.treated[len(args.treated)-1][:-1]
65 treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
66
67
68 ############## Detection of templated isoforms ################
69
70 radar = manager.list([0,0,0,0])
71 samples = manager.list()
72 data= manager.list()
73 names_con=manager.list()
74 samples_mirna_names=manager.list()
75 deseq=manager.list()
76 unmap_seq=manager.Value('i',0)
77 unmap_counts=manager.Value('i',0)
78 LH2E_names=manager.list()
79 ini_c_samples = manager.list()
80
81
82 radar1 = manager.list([0,0,0,0])
83 samples1 = manager.list()
84 data1 = manager.list()
85 names_tre = manager.list()
86 samples_mirna_names1=manager.list()
87 deseq1=manager.list()
88 unmap1_seq = manager.Value('i',0)
89 unmap1_counts = manager.Value('i',0)
90 LH8E_names=manager.list()
91 ini_t_samples = manager.list()
92 ps_mature.join()
93
94
95 mature_mirnas=list(mature_mirnas)
96
97 starttime1 = time.time()
98 ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
99 ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
100
101 [p.start() for p in ps_sam]
102 [p.join() for p in ps_sam]
103 print('SAM took {} seconds'.format(time.time() - starttime1))
104
105 ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
106 ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
107 [x.start() for x in ps_hist]
108
109 starttime200=time.time()
110 sc = list(samples)
111 st = list(samples1)
112
113 names_con=list(names_con)
114 names_tre=list(names_tre)
115 samples_mirna_names=list(samples_mirna_names)
116 samples_mirna_names.sort()
117 samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
118
119 samples_mirna_names1=list(samples_mirna_names1)
120 samples_mirna_names1.sort()
121 samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
122
123 deseq=list(deseq)
124 deseq1=list(deseq1)
125
126 new_names_con=manager.list()
127 new_names_tre=manager.list()
128 new_deseq=manager.list()
129 new_deseq1=manager.list()
130 ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
131 ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
132
133 [z.start() for z in ps_deseq]
134 [z.join() for z in ps_deseq]
135 new_deseq=list(new_deseq)
136 new_deseq1=list(new_deseq1)
137
138 LH2E=[[x[0],x[2]] for x in new_deseq[0]]
139 [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
140
141 LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
142 [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
143
144 print('Deseq took {} seconds'.format(time.time() - starttime200))
145
146 merg_nam_LH2E=manager.list()
147 merg_nam_LH8E=manager.list()
148
149 LH2E_copy=copy.deepcopy(list(LH2E))
150 LH8E_copy=copy.deepcopy(list(LH8E))
151
152 fil_sort_tre=manager.list()
153 fil_sort_con=manager.list()
154 raw_sort_tre=manager.list()
155 raw_sort_con=manager.list()
156
157 ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con,args.per,args.count))
158 ps_main.start()
159
160 if args.anal=="2":
161 all_iso = manager.list()
162 ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
163 ps_non_iso.start()
164
165 ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
166 ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
167 [x.start() for x in ps_merge]
168 [x.join() for x in ps_merge]
169
170 merg_nam_LH2E=list(merg_nam_LH2E)
171 merg_nam_LH8E=list(merg_nam_LH8E)
172
173 starttime2 = time.time()
174 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
175 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
176 procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
177 if args.anal == "1":
178 procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
179
180 [p.start() for p in procs]
181
182 if args.anal=="1":
183 [x.join() for x in ps_hist]
184 [p.join() for p in procs]
185 ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
186 ps_pdf.start()
187
188 print('Graphs took {} seconds'.format(time.time() - starttime2))
189
190 ps_main.join()
191
192 fil_sort_con=list(fil_sort_con)
193 fil_sort_tre=list(fil_sort_tre)
194 if fil_sort_con==[]:
195 fil_sort_con=raw_sort_con
196 fil_sort_tre=raw_sort_tre
197
198 raw_sort_con=list(raw_sort_con)
199 raw_sort_tre=list(raw_sort_tre)
200 names_con=list(new_names_con)
201 names_tre=list(new_names_tre)
202
203 ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1,args.per,args.n1,args.n2))
204 ps_write.start()
205
206 ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
207 ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
208 [p.start() for p in ps1_matrix]
209
210 if args.anal=="1":
211 ps_pdf.join()
212 if args.anal=="2":
213 [p.join() for p in procs]
214 [x.join() for x in ps_hist]
215
216 ps_write.join()
217 [p.join() for p in ps1_matrix]
218
219 ############################## Detection of Both #######################################
220
221 starttime10 = time.time()
222
223 if args.anal == "2":
224
225 n_data= manager.list()
226 n_names_con=manager.list()
227 n_samples_mirna_names=manager.list()
228 n_deseq=manager.list()
229 n_LH2E_names=manager.list()
230
231 n_data1 = manager.list()
232 n_names_tre = manager.list()
233 n_samples_mirna_names1=manager.list()
234 n_deseq1=manager.list()
235 n_LH8E_names=manager.list()
236
237 new_mat_mirnas = list(mature_mirnas)
238 ps_non_iso.join()
239
240 all_iso=list(all_iso)
241 new_mat_mirnas.extend(all_iso)
242
243 starttime11=time.time()
244
245 ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
246 ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
247
248 [p.start() for p in ps_sam]
249 [p.join() for p in ps_sam]
250
251 print('Non-sam took {} seconds'.format(time.time() - starttime11))
252
253 starttime12=time.time()
254
255 n_names_con=list(n_names_con)
256 n_names_tre=list(n_names_tre)
257 n_samples_mirna_names=list(n_samples_mirna_names)
258 n_samples_mirna_names.sort()
259 n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
260
261
262 n_samples_mirna_names1=list(n_samples_mirna_names1)
263 n_samples_mirna_names1.sort()
264 n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
265
266 n_deseq=list(n_deseq)
267 n_deseq1=list(n_deseq1)
268
269 new_n_names_con=manager.list()
270 new_n_names_tre=manager.list()
271 n_new_deseq=manager.list()
272 n_new_deseq1=manager.list()
273 ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
274 ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
275
276 [x.start() for x in ps_deseq]
277 [x.join() for x in ps_deseq]
278 n_new_deseq=list(n_new_deseq)
279 n_new_deseq1=list(n_new_deseq1)
280
281 n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
282 [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
283
284 n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
285 [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
286
287 print('Non-deseq took {} seconds'.format(time.time() - starttime12))
288
289 merg_nam_n_LH2E=manager.list()
290 merg_nam_n_LH8E=manager.list()
291
292 n_LH2E_copy=copy.deepcopy(list(n_LH2E))
293 n_LH8E_copy=copy.deepcopy(list(n_LH8E))
294
295 n_fil_sort_con=manager.list()
296 n_fil_sort_tre=manager.list()
297 n_raw_sort_con=manager.list()
298 n_raw_sort_tre=manager.list()
299
300 ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con,args.per,args.count))
301 ps_main.start()
302
303 starttime14=time.time()
304 ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
305 ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
306 [p.start() for p in ps_merge]
307 [p.join() for p in ps_merge]
308
309 merg_nam_n_LH2E=list(merg_nam_n_LH2E)
310 merg_nam_n_LH8E=list(merg_nam_n_LH8E)
311
312 print('Merging took {} seconds'.format(time.time() - starttime14))
313
314 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
315 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
316 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
317 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
318 procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
319
320
321 starttime13=time.time()
322 [p.start() for p in procs]
323 [p.join() for p in procs]
324
325 print('Graphs took {} seconds'.format(time.time() - starttime13))
326
327 procs1 = Process(target=pdf_before_DE,args=(args.anal))
328 procs1.start()
329 starttime16=time.time()
330 ps_main.join()
331 print('Main took {} seconds'.format(time.time() - starttime16))
332
333 starttime15=time.time()
334 n_fil_sort_con=list(n_fil_sort_con)
335 n_fil_sort_tre=list(n_fil_sort_tre)
336 if n_fil_sort_con==[]:
337 n_fil_sort_con=n_raw_sort_con
338 n_fil_sort_tre=n_raw_sort_tre
339
340 n_raw_sort_con=list(n_raw_sort_con)
341 n_raw_sort_tre=list(n_raw_sort_tre)
342 n_names_con=list(new_n_names_con)
343 n_names_tre=list(new_n_names_tre)
344
345 ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2,args.per,args.n1,args.n2))
346 ps_write.start()
347
348 ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
349 ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
350 [p.start() for p in ps1_matrix]
351
352 ps_write.join()
353 [p.join() for p in ps1_matrix]
354 procs1.join()
355 print('Files took {} seconds'.format(time.time() - starttime15))
356 print('That took {} seconds'.format(time.time() - starttime10))
357 print('That took {} seconds'.format(time.time() - starttime))
358
359
360
361
362
363
364
365