Mercurial > repos > glogobyte > isoread
comparison mirgene_ultra_v2.py @ 4:88b3ef865431 draft
Uploaded
author | glogobyte |
---|---|
date | Fri, 16 Oct 2020 18:54:54 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:106c4aea4650 | 4:88b3ef865431 |
---|---|
1 from mirgene_functions import * | |
2 from mirgene_graphs import * | |
3 import itertools | |
4 import time | |
5 import sys | |
6 import os | |
7 import urllib.request | |
8 import gzip | |
9 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value | |
10 import subprocess | |
11 import argparse | |
12 from collections import OrderedDict | |
13 from matplotlib.backends.backend_pdf import PdfPages | |
14 import pandas as pd | |
15 from math import pi | |
16 import numpy as np | |
17 import matplotlib.pyplot as plt | |
18 from matplotlib.ticker import PercentFormatter | |
19 import seaborn as sns | |
20 import scipy.stats as stats | |
21 from plotnine import * | |
22 import math | |
23 import re | |
24 import matplotlib.ticker as mtick | |
25 import copy | |
26 | |
27 subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre']) | |
28 | |
29 parser = argparse.ArgumentParser() | |
30 parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store") | |
31 parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[]) | |
32 parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] ) | |
33 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") | |
34 parser.add_argument("-gen", "--org_name", help="tool directory path", action="store") | |
35 parser.add_argument("-program", "--pro", help="choose type of analysis", action="store") | |
36 parser.add_argument("-f", "--flag", help="choose the database", action="store") | |
37 parser.add_argument("-umis", "--umi", help="choose the database", action="store") | |
38 parser.add_argument("-percentage", "--per", help="choose the database", action="store") | |
39 parser.add_argument("-counts", "--count", help="choose the database", action="store") | |
40 parser.add_argument("-name1", "--n1", help="choose the database", action="store") | |
41 parser.add_argument("-name2", "--n2", help="choose the database", action="store") | |
42 args = parser.parse_args() | |
43 | |
44 #########################################################################3############################################################################################################################################################################################### | |
45 | |
46 if __name__ == '__main__': | |
47 | |
48 starttime = time.time() | |
49 | |
50 q1 = Queue() | |
51 q2 = Queue() | |
52 lock = Lock() | |
53 manager = Manager() | |
54 | |
55 mature_mirnas=manager.list() | |
56 ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name)) | |
57 ps_mature.start() | |
58 | |
59 args.control[0]=args.control[0][1:] | |
60 args.control[len(args.control)-1][:-1] | |
61 control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)] | |
62 | |
63 args.treated[0]=args.treated[0][1:] | |
64 args.treated[len(args.treated)-1][:-1] | |
65 treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)] | |
66 | |
67 | |
68 ############## Detection of templated isoforms ################ | |
69 | |
70 radar = manager.list([0,0,0,0]) | |
71 samples = manager.list() | |
72 data= manager.list() | |
73 names_con=manager.list() | |
74 samples_mirna_names=manager.list() | |
75 deseq=manager.list() | |
76 unmap_seq=manager.Value('i',0) | |
77 unmap_counts=manager.Value('i',0) | |
78 LH2E_names=manager.list() | |
79 ini_c_samples = manager.list() | |
80 | |
81 | |
82 radar1 = manager.list([0,0,0,0]) | |
83 samples1 = manager.list() | |
84 data1 = manager.list() | |
85 names_tre = manager.list() | |
86 samples_mirna_names1=manager.list() | |
87 deseq1=manager.list() | |
88 unmap1_seq = manager.Value('i',0) | |
89 unmap1_counts = manager.Value('i',0) | |
90 LH8E_names=manager.list() | |
91 ini_t_samples = manager.list() | |
92 ps_mature.join() | |
93 | |
94 | |
95 mature_mirnas=list(mature_mirnas) | |
96 | |
97 starttime1 = time.time() | |
98 ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control] | |
99 ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated]) | |
100 | |
101 [p.start() for p in ps_sam] | |
102 [p.join() for p in ps_sam] | |
103 print('SAM took {} seconds'.format(time.time() - starttime1)) | |
104 | |
105 ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))] | |
106 ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))]) | |
107 [x.start() for x in ps_hist] | |
108 | |
109 starttime200=time.time() | |
110 sc = list(samples) | |
111 st = list(samples1) | |
112 | |
113 names_con=list(names_con) | |
114 names_tre=list(names_tre) | |
115 samples_mirna_names=list(samples_mirna_names) | |
116 samples_mirna_names.sort() | |
117 samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names)) | |
118 | |
119 samples_mirna_names1=list(samples_mirna_names1) | |
120 samples_mirna_names1.sort() | |
121 samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1)) | |
122 | |
123 deseq=list(deseq) | |
124 deseq1=list(deseq1) | |
125 | |
126 new_names_con=manager.list() | |
127 new_names_tre=manager.list() | |
128 new_deseq=manager.list() | |
129 new_deseq1=manager.list() | |
130 ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)] | |
131 ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)]) | |
132 | |
133 [z.start() for z in ps_deseq] | |
134 [z.join() for z in ps_deseq] | |
135 new_deseq=list(new_deseq) | |
136 new_deseq1=list(new_deseq1) | |
137 | |
138 LH2E=[[x[0],x[2]] for x in new_deseq[0]] | |
139 [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq] | |
140 | |
141 LH8E=[[x[0],x[2]] for x in new_deseq1[0]] | |
142 [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1] | |
143 | |
144 print('Deseq took {} seconds'.format(time.time() - starttime200)) | |
145 | |
146 merg_nam_LH2E=manager.list() | |
147 merg_nam_LH8E=manager.list() | |
148 | |
149 LH2E_copy=copy.deepcopy(list(LH2E)) | |
150 LH8E_copy=copy.deepcopy(list(LH8E)) | |
151 | |
152 fil_sort_tre=manager.list() | |
153 fil_sort_con=manager.list() | |
154 raw_sort_tre=manager.list() | |
155 raw_sort_con=manager.list() | |
156 | |
157 ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con,args.per,args.count)) | |
158 ps_main.start() | |
159 | |
160 if args.anal=="2": | |
161 all_iso = manager.list() | |
162 ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso)) | |
163 ps_non_iso.start() | |
164 | |
165 ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))] | |
166 ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))]) | |
167 [x.start() for x in ps_merge] | |
168 [x.join() for x in ps_merge] | |
169 | |
170 merg_nam_LH2E=list(merg_nam_LH2E) | |
171 merg_nam_LH8E=list(merg_nam_LH8E) | |
172 | |
173 starttime2 = time.time() | |
174 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data] | |
175 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1]) | |
176 procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))]) | |
177 if args.anal == "1": | |
178 procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))]) | |
179 | |
180 [p.start() for p in procs] | |
181 | |
182 if args.anal=="1": | |
183 [x.join() for x in ps_hist] | |
184 [p.join() for p in procs] | |
185 ps_pdf = Process(target=pdf_before_DE,args=(args.anal)) | |
186 ps_pdf.start() | |
187 | |
188 print('Graphs took {} seconds'.format(time.time() - starttime2)) | |
189 | |
190 ps_main.join() | |
191 | |
192 fil_sort_con=list(fil_sort_con) | |
193 fil_sort_tre=list(fil_sort_tre) | |
194 if fil_sort_con==[]: | |
195 fil_sort_con=raw_sort_con | |
196 fil_sort_tre=raw_sort_tre | |
197 | |
198 raw_sort_con=list(raw_sort_con) | |
199 raw_sort_tre=list(raw_sort_tre) | |
200 names_con=list(new_names_con) | |
201 names_tre=list(new_names_tre) | |
202 | |
203 ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1,args.per,args.n1,args.n2)) | |
204 ps_write.start() | |
205 | |
206 ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))] | |
207 ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))]) | |
208 [p.start() for p in ps1_matrix] | |
209 | |
210 if args.anal=="1": | |
211 ps_pdf.join() | |
212 if args.anal=="2": | |
213 [p.join() for p in procs] | |
214 [x.join() for x in ps_hist] | |
215 | |
216 ps_write.join() | |
217 [p.join() for p in ps1_matrix] | |
218 | |
219 ############################## Detection of Both ####################################### | |
220 | |
221 starttime10 = time.time() | |
222 | |
223 if args.anal == "2": | |
224 | |
225 n_data= manager.list() | |
226 n_names_con=manager.list() | |
227 n_samples_mirna_names=manager.list() | |
228 n_deseq=manager.list() | |
229 n_LH2E_names=manager.list() | |
230 | |
231 n_data1 = manager.list() | |
232 n_names_tre = manager.list() | |
233 n_samples_mirna_names1=manager.list() | |
234 n_deseq1=manager.list() | |
235 n_LH8E_names=manager.list() | |
236 | |
237 new_mat_mirnas = list(mature_mirnas) | |
238 ps_non_iso.join() | |
239 | |
240 all_iso=list(all_iso) | |
241 new_mat_mirnas.extend(all_iso) | |
242 | |
243 starttime11=time.time() | |
244 | |
245 ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control] | |
246 ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated]) | |
247 | |
248 [p.start() for p in ps_sam] | |
249 [p.join() for p in ps_sam] | |
250 | |
251 print('Non-sam took {} seconds'.format(time.time() - starttime11)) | |
252 | |
253 starttime12=time.time() | |
254 | |
255 n_names_con=list(n_names_con) | |
256 n_names_tre=list(n_names_tre) | |
257 n_samples_mirna_names=list(n_samples_mirna_names) | |
258 n_samples_mirna_names.sort() | |
259 n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names)) | |
260 | |
261 | |
262 n_samples_mirna_names1=list(n_samples_mirna_names1) | |
263 n_samples_mirna_names1.sort() | |
264 n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1)) | |
265 | |
266 n_deseq=list(n_deseq) | |
267 n_deseq1=list(n_deseq1) | |
268 | |
269 new_n_names_con=manager.list() | |
270 new_n_names_tre=manager.list() | |
271 n_new_deseq=manager.list() | |
272 n_new_deseq1=manager.list() | |
273 ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)] | |
274 ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)]) | |
275 | |
276 [x.start() for x in ps_deseq] | |
277 [x.join() for x in ps_deseq] | |
278 n_new_deseq=list(n_new_deseq) | |
279 n_new_deseq1=list(n_new_deseq1) | |
280 | |
281 n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]] | |
282 [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq] | |
283 | |
284 n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]] | |
285 [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1] | |
286 | |
287 print('Non-deseq took {} seconds'.format(time.time() - starttime12)) | |
288 | |
289 merg_nam_n_LH2E=manager.list() | |
290 merg_nam_n_LH8E=manager.list() | |
291 | |
292 n_LH2E_copy=copy.deepcopy(list(n_LH2E)) | |
293 n_LH8E_copy=copy.deepcopy(list(n_LH8E)) | |
294 | |
295 n_fil_sort_con=manager.list() | |
296 n_fil_sort_tre=manager.list() | |
297 n_raw_sort_con=manager.list() | |
298 n_raw_sort_tre=manager.list() | |
299 | |
300 ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con,args.per,args.count)) | |
301 ps_main.start() | |
302 | |
303 starttime14=time.time() | |
304 ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))] | |
305 ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))]) | |
306 [p.start() for p in ps_merge] | |
307 [p.join() for p in ps_merge] | |
308 | |
309 merg_nam_n_LH2E=list(merg_nam_n_LH2E) | |
310 merg_nam_n_LH8E=list(merg_nam_n_LH8E) | |
311 | |
312 print('Merging took {} seconds'.format(time.time() - starttime14)) | |
313 | |
314 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data] | |
315 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1]) | |
316 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))]) | |
317 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))]) | |
318 procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))]) | |
319 | |
320 | |
321 starttime13=time.time() | |
322 [p.start() for p in procs] | |
323 [p.join() for p in procs] | |
324 | |
325 print('Graphs took {} seconds'.format(time.time() - starttime13)) | |
326 | |
327 procs1 = Process(target=pdf_before_DE,args=(args.anal)) | |
328 procs1.start() | |
329 starttime16=time.time() | |
330 ps_main.join() | |
331 print('Main took {} seconds'.format(time.time() - starttime16)) | |
332 | |
333 starttime15=time.time() | |
334 n_fil_sort_con=list(n_fil_sort_con) | |
335 n_fil_sort_tre=list(n_fil_sort_tre) | |
336 if n_fil_sort_con==[]: | |
337 n_fil_sort_con=n_raw_sort_con | |
338 n_fil_sort_tre=n_raw_sort_tre | |
339 | |
340 n_raw_sort_con=list(n_raw_sort_con) | |
341 n_raw_sort_tre=list(n_raw_sort_tre) | |
342 n_names_con=list(new_n_names_con) | |
343 n_names_tre=list(new_n_names_tre) | |
344 | |
345 ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2,args.per,args.n1,args.n2)) | |
346 ps_write.start() | |
347 | |
348 ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))] | |
349 ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))]) | |
350 [p.start() for p in ps1_matrix] | |
351 | |
352 ps_write.join() | |
353 [p.join() for p in ps1_matrix] | |
354 procs1.join() | |
355 print('Files took {} seconds'.format(time.time() - starttime15)) | |
356 print('That took {} seconds'.format(time.time() - starttime10)) | |
357 print('That took {} seconds'.format(time.time() - starttime)) | |
358 | |
359 | |
360 | |
361 | |
362 | |
363 | |
364 | |
365 |