Mercurial > repos > glogobyte > isoread
comparison mirbase_ultra_v2.py @ 0:99e8a03f8802 draft
Uploaded
author | glogobyte |
---|---|
date | Fri, 16 Oct 2020 18:53:05 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:99e8a03f8802 |
---|---|
1 from mirbase_functions import * | |
2 from mirbase_graphs import * | |
3 import itertools | |
4 import time | |
5 import sys | |
6 import os | |
7 import urllib.request | |
8 import gzip | |
9 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value | |
10 import subprocess | |
11 import argparse | |
12 from collections import OrderedDict | |
13 from matplotlib.backends.backend_pdf import PdfPages | |
14 import pandas as pd | |
15 from math import pi | |
16 import numpy as np | |
17 import matplotlib.pyplot as plt | |
18 from matplotlib.ticker import PercentFormatter | |
19 import seaborn as sns | |
20 import scipy.stats as stats | |
21 from plotnine import * | |
22 import math | |
23 import re | |
24 import matplotlib.ticker as mtick | |
25 import copy | |
26 | |
27 subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre']) | |
28 | |
29 parser = argparse.ArgumentParser() | |
30 parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store") | |
31 parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[]) | |
32 parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] ) | |
33 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") | |
34 parser.add_argument("-gen", "--org_name", help="tool directory path", action="store") | |
35 parser.add_argument("-program", "--pro", help="choose type of analysis", action="store") | |
36 parser.add_argument("-f", "--flag", help="choose the database", action="store") | |
37 parser.add_argument("-umis", "--umi", help="choose the database", action="store") | |
38 parser.add_argument("-percentage", "--per", help="choose the database", action="store") | |
39 parser.add_argument("-counts", "--count", help="choose the database", action="store") | |
40 parser.add_argument("-name1", "--n1", help="choose the database", action="store") | |
41 parser.add_argument("-name2", "--n2", help="choose the database", action="store") | |
42 args = parser.parse_args() | |
43 | |
44 ######################################################################################################################################### | |
45 | |
46 def collapse_sam(path): | |
47 | |
48 ini_sam=read(path,0) | |
49 main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]] | |
50 intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in x.split("\t")[0]] | |
51 | |
52 uni_seq = [] | |
53 for x in main_sam: | |
54 | |
55 if [x[2], x[9]] not in uni_seq: | |
56 uni_seq.append([x[2], x[9]]) | |
57 | |
58 new_main_sam=[] | |
59 incr_num=0 | |
60 for i in range(len(uni_seq)): | |
61 count=0 | |
62 incr_num+=1 | |
63 for y in main_sam: | |
64 if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]: | |
65 count+=1 | |
66 temp=y | |
67 temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" | |
68 temp[0]=str(incr_num)+"-"+str(count) | |
69 new_main_sam.append(temp) | |
70 | |
71 new_sam=intro_sam+new_main_sam | |
72 | |
73 return new_sam | |
74 | |
75 ################################################################################################################################################################################################################# | |
76 | |
77 def duplicate_chroms_isoforms(List): | |
78 | |
79 dupes=[] | |
80 | |
81 for num in range(len(List)): | |
82 | |
83 if [List[num][9],List[num][0],List[num][2]] not in dupes : | |
84 dupes.append([List[num][9],List[num][0],List[num][2]]) | |
85 | |
86 for x in List: | |
87 for y in dupes: | |
88 if x[9]==y[0] and x[0]==y[1] and x[2].split("_")[0]==y[2].split("_")[0] and x[2]!=y[2]: | |
89 y.append(x[2]) | |
90 | |
91 | |
92 double_List = [x[:] for x in List] | |
93 | |
94 chr_order=[] | |
95 for x in dupes: | |
96 temp = [] | |
97 for i in range(2,len(x)): | |
98 if x[i].split("chr")[1].split("(")[0].isdigit(): | |
99 temp.append(int(x[i].split("chr")[1].split("(")[1][0]+x[i].split("chr")[1].split("(")[0])) | |
100 else: | |
101 temp.append(x[i].split("chr")[1][0:4]) | |
102 | |
103 for z in temp: | |
104 if 'X(-)'==z or 'Y(-)'==z or 'X(+)'==z or 'Y(+)'==z: | |
105 temp = [str(j) for j in temp] | |
106 temp=list(set(temp)) | |
107 temp.sort() | |
108 chr_order.append(temp) | |
109 | |
110 final_dupes=[] | |
111 for i in range(len(dupes)): | |
112 final_dupes.append([dupes[i][0],dupes[i][2].split("_")[0],dupes[i][1]]) | |
113 for x in chr_order[i]: | |
114 result = re.match("[-+]?\d+$", str(x)) | |
115 if len(chr_order[i]) == len(set(chr_order[i])): | |
116 if result is not None: | |
117 | |
118 if int(x)<0: | |
119 final_dupes[i][1]=final_dupes[i][1]+"_chr"+str(abs(int(x)))+"(-)" | |
120 else: | |
121 final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(abs(int(x)))+"(+)" | |
122 else: | |
123 final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(x) | |
124 else: | |
125 if result is not None: | |
126 if int(x) < 0: | |
127 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(-)" | |
128 else: | |
129 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(+)" | |
130 else: | |
131 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(x) | |
132 | |
133 final_dupes.sort() | |
134 final_dupes=list(final_dupes for final_dupes,_ in itertools.groupby(final_dupes)) | |
135 | |
136 for i in range(len(double_List)): | |
137 for x in final_dupes: | |
138 | |
139 if double_List[i][9] == x[0] and double_List[i][0] == x[2] and len(double_List[i][2].split("_")) >3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]: | |
140 gg=str("_"+double_List[i][2].split("_")[-2]+"_"+double_List[i][2].split("_")[-1]) | |
141 double_List[i][2] = x[1]+gg | |
142 | |
143 if double_List[i][9]==x[0] and double_List[i][0]== x[2] and len(double_List[i][2].split("_"))==3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]: | |
144 double_List[i][2]=x[1] | |
145 List[i][2] = x[1] | |
146 | |
147 List.sort() | |
148 new_list=list(List for List,_ in itertools.groupby(List)) | |
149 | |
150 double_List.sort() | |
151 new_double_List = list(double_List for double_List, _ in itertools.groupby(double_List)) | |
152 | |
153 return new_list, new_double_List | |
154 | |
155 | |
156 ############################################################################################################################################################################################################# | |
157 | |
158 def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts): | |
159 | |
160 # read the sam file | |
161 ini_sam=read(path,0) | |
162 new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]] | |
163 unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26] | |
164 | |
165 sorted_uni_arms = [] | |
166 | |
167 for i in range(len(mature_mirnas)): | |
168 tmp_count_reads = 0 # calculate the total number of reads | |
169 tmp_count_seq = 0 # calculate the total number of sequences | |
170 for j in range(len(unique_seq)): | |
171 | |
172 if "{" in unique_seq[j][2].split("_")[0]: | |
173 official=unique_seq[j][2].split("_")[0][:-4] | |
174 else: | |
175 official=unique_seq[j][2].split("_")[0] | |
176 | |
177 if mature_mirnas[i].split(" ")[0][1:] == official: | |
178 | |
179 temp_mature = mature_mirnas[i+1].strip().replace("U", "T") | |
180 off_part = longestSubstring(temp_mature, unique_seq[j][9]) | |
181 | |
182 mat_diff = temp_mature.split(off_part) | |
183 mat_diff = [len(mat_diff[0]), len(mat_diff[1])] | |
184 | |
185 unique_diff = unique_seq[j][9].split(off_part) | |
186 unique_diff = [len(unique_diff[0]), len(unique_diff[1])] | |
187 | |
188 # Problem with hsa-miR-8485 | |
189 if mat_diff[1]!=0 and unique_diff[1]!=0: | |
190 unique_seq[j]=1 | |
191 pre_pos = 0 | |
192 post_pos = 0 | |
193 | |
194 elif mat_diff[0]!=0 and unique_diff[0]!=0: | |
195 unique_seq[j]=1 | |
196 pre_pos = 0 | |
197 post_pos = 0 | |
198 | |
199 else: | |
200 pre_pos = mat_diff[0]-unique_diff[0] | |
201 post_pos = unique_diff[1]-mat_diff[1] | |
202 tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1]) | |
203 tmp_count_seq = tmp_count_seq+1 | |
204 | |
205 if pre_pos != 0 or post_pos != 0: | |
206 if pre_pos == 0: | |
207 unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos) | |
208 elif post_pos == 0: | |
209 unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos) | |
210 else: | |
211 unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos) | |
212 | |
213 for x in range(unique_seq.count(1)): | |
214 unique_seq.remove(1) | |
215 if tmp_count_reads != 0 and tmp_count_seq != 0: | |
216 sorted_uni_arms.append([mature_mirnas[i].split(" ")[0][1:], tmp_count_seq, tmp_count_reads]) | |
217 sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True) | |
218 dedup_unique_seq,double_fil_uni_seq=duplicate_chroms_isoforms(unique_seq) | |
219 | |
220 for y in sorted_uni_arms: | |
221 counts=0 | |
222 seqs=0 | |
223 for x in double_fil_uni_seq: | |
224 if y[0]==x[2].split("_")[0]: | |
225 counts+=int(x[0].split("-")[1]) | |
226 seqs+=1 | |
227 | |
228 y[1]=seqs | |
229 y[2]=counts | |
230 | |
231 LHE=[] | |
232 l.acquire() | |
233 if con=="c": | |
234 LHE.extend(z[2] for z in double_fil_uni_seq) | |
235 for y in double_fil_uni_seq: | |
236 samples_mirna_names.append([y[2],y[9]]) | |
237 deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq]) | |
238 LHE_names.extend(LHE) | |
239 unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4']) | |
240 unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4']) | |
241 names.append(name) | |
242 samples.append(dedup_unique_seq) | |
243 data.append([con,name,double_fil_uni_seq,sorted_uni_arms]) | |
244 ini_sample.append(new_main_sam) | |
245 | |
246 if con=="t": | |
247 LHE.extend(z[2] for z in double_fil_uni_seq) | |
248 for y in double_fil_uni_seq: | |
249 samples_mirna_names.append([y[2],y[9]]) | |
250 deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq]) | |
251 LHE_names.extend(LHE) | |
252 unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4']) | |
253 unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4']) | |
254 names.append(name) | |
255 samples.append(dedup_unique_seq) | |
256 data.append([con,name,double_fil_uni_seq,sorted_uni_arms]) | |
257 ini_sample.append(new_main_sam) | |
258 l.release() | |
259 | |
260 | |
261 ###################################################################################################################################### | |
262 """ | |
263 | |
264 Read a sam file from Bowtie and do the followings: | |
265 | |
266 1) Remove reverse stranded mapped reads | |
267 2) Remove unmapped reads | |
268 3) Remove all sequences with reads less than 11 reads | |
269 4) Sort the arms with the most sequences in decreading rate | |
270 5) Sort the sequences of every arm with the most reads in decreasing rate | |
271 6) Calculate total number of sequences of every arm | |
272 7) Calculate total number of reads of sequences of every arm. | |
273 8) Store all the informations in a txt file | |
274 | |
275 """ | |
276 | |
277 def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names): | |
278 | |
279 ini_sam=read(path,0) | |
280 new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]] | |
281 unique_seq=[] | |
282 unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26] | |
283 | |
284 uni_seq=[] | |
285 # Calculate the shifted positions for every isomir and add them to the name of it | |
286 sorted_uni_arms = [] | |
287 for i in range(1,len(mature_mirnas),2): | |
288 tmp_count_reads = 0 # calculate the total number of reads | |
289 tmp_count_seq = 0 # calculate the total number of sequences | |
290 | |
291 for j in range(len(unique_seq)): | |
292 | |
293 temp_mature = mature_mirnas[i].strip().replace("U", "T") | |
294 | |
295 if temp_mature in unique_seq[j][9]: | |
296 | |
297 off_part = longestSubstring(temp_mature, unique_seq[j][9]) | |
298 | |
299 mat_diff = temp_mature.split(off_part) | |
300 mat_diff = [len(mat_diff[0]), len(mat_diff[1])] | |
301 | |
302 unique_diff = unique_seq[j][9].split(off_part) | |
303 if len(unique_diff)<=2: | |
304 unique_diff = [len(unique_diff[0]), len(unique_diff[1])] | |
305 | |
306 pre_pos = mat_diff[0]-unique_diff[0] | |
307 post_pos = unique_diff[1]-mat_diff[1] | |
308 | |
309 lengthofmir = len(off_part) + post_pos | |
310 if pre_pos == 0: | |
311 tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1]) | |
312 tmp_count_seq = tmp_count_seq + 1 | |
313 | |
314 if pre_pos == 0: | |
315 | |
316 t_name=unique_seq[j].copy() | |
317 t_name[2]=mature_mirnas[i - 1].split(" ")[0][1:] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):]) | |
318 uni_seq.append(t_name) | |
319 | |
320 | |
321 if tmp_count_reads != 0 and tmp_count_seq != 0: | |
322 sorted_uni_arms.append([mature_mirnas[i-1].split(" ")[0][1:], tmp_count_seq, tmp_count_reads]) | |
323 | |
324 | |
325 sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True) | |
326 unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq)))) | |
327 | |
328 LHE=[] | |
329 | |
330 l.acquire() | |
331 if con=="c": | |
332 LHE.extend(x[2] for x in unique_seq if x[2]!="*") | |
333 for x in unique_seq: | |
334 if x[2]!="*": | |
335 n_samples_mirna_names.append([x[2],x[9]]) | |
336 n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"]) | |
337 n_LHE_names.extend(LHE) | |
338 names.append(name) | |
339 data.append([con,name,unique_seq,sorted_uni_arms]) | |
340 | |
341 | |
342 if con=="t": | |
343 LHE.extend(x[2] for x in unique_seq if x[2]!="*") | |
344 for x in unique_seq: | |
345 if x[2]!="*": | |
346 n_samples_mirna_names.append([x[2],x[9]]) | |
347 n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"]) | |
348 n_LHE_names.extend(LHE) | |
349 names.append(name) | |
350 data.append([con,name,unique_seq,sorted_uni_arms]) | |
351 l.release() | |
352 | |
353 ##################################################################################################################################################################################################################### | |
354 def deseq2_temp(samples_mirna_names,deseq,con,l): | |
355 | |
356 samples_mirna_names.sort(key=lambda x:[0]) | |
357 for i in range(len(deseq)): | |
358 for y in samples_mirna_names: | |
359 flag = 0 | |
360 for x in deseq[i]: | |
361 if y[0] == x[0]: | |
362 flag = 1 | |
363 break | |
364 | |
365 if flag == 0: | |
366 deseq[i].append([y[0], "0", y[1]]) | |
367 | |
368 [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)] | |
369 deseq_final = [[x[0],x[2]] for x in deseq[0]] | |
370 [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]] | |
371 | |
372 l.acquire() | |
373 if con=="c": | |
374 q1.put(deseq_final) | |
375 | |
376 if con=="t": | |
377 q2.put(deseq_final) | |
378 l.release() | |
379 | |
380 | |
381 #################################################################################################################################################################################################################### | |
382 | |
383 def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E): | |
384 | |
385 LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names] | |
386 LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names] | |
387 | |
388 LH8E_add_names.sort() | |
389 LH2E_add_names.sort() | |
390 LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names)) | |
391 LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names)) | |
392 | |
393 LH2E.sort() | |
394 LH8E.sort() | |
395 LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E)) | |
396 LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E)) | |
397 | |
398 print("LHE_names") | |
399 print([len(LH8E_add_names),len(LH2E_add_names)]) | |
400 print([len(LH8E),len(LH2E)]) | |
401 | |
402 zeros=["0"]*(len(LH8E[0])-2) | |
403 [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)] | |
404 LH8E=LH8E+LH8E_add_names | |
405 | |
406 zeros=["0"]*(len(LH2E[0])-2) | |
407 [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)] | |
408 LH2E=LH2E+LH2E_add_names | |
409 | |
410 dupes=[] | |
411 final_LH2E =[] | |
412 | |
413 for num,_ in enumerate(LH2E): | |
414 | |
415 if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E: | |
416 final_LH2E.append(LH2E[num][1]) | |
417 final_LH2E.append(LH2E[num][0]) | |
418 else: | |
419 dupes.append(LH2E[num][1]) | |
420 | |
421 | |
422 dupes=list(set(dupes)) | |
423 | |
424 dupes=[[x] for x in dupes] | |
425 | |
426 for x in LH2E: | |
427 for y in dupes: | |
428 if x[1]==y[0]: | |
429 fl=0 | |
430 if len(y)==1: | |
431 y.append(x[0]) | |
432 else: | |
433 for i in range(1,len(y)): | |
434 if y[i].split("_")[0]==x[0].split("_")[0]: | |
435 fl=1 | |
436 if len(x[0])<len(y[i]): | |
437 del y[i] | |
438 y.append(x[0]) | |
439 break | |
440 | |
441 if fl==0: | |
442 y.append((x[0])) | |
443 | |
444 for y in dupes: | |
445 if len(y)>2: | |
446 for i in range(len(y)-1,1,-1): | |
447 y[1]=y[1]+"/"+y[i] | |
448 del y[i] | |
449 | |
450 for x in LH2E: | |
451 for y in dupes: | |
452 if x[1]==y[0]: | |
453 x[0]=y[1] | |
454 | |
455 for x in LH8E: | |
456 for y in dupes: | |
457 if x[1]==y[0]: | |
458 x[0]=y[1] | |
459 | |
460 | |
461 LH2E.sort() | |
462 LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E)) | |
463 | |
464 LH8E.sort() | |
465 LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E)) | |
466 | |
467 LH8E_new=[] | |
468 LH2E_new=[] | |
469 | |
470 if int(args.per)!=-1: | |
471 percent=int(args.per)/100 | |
472 print(percent) | |
473 print(args.count) | |
474 | |
475 c_col_filter=round(percent*(len(LH2E[1])-2)) | |
476 t_col_filter=round(percent*(len(LH8E[1])-2)) | |
477 | |
478 for i, _ in enumerate(LH2E): | |
479 c_cols=0 | |
480 t_cols=0 | |
481 | |
482 c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(args.count)]) | |
483 t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(args.count)]) | |
484 | |
485 if c_cols>=c_col_filter or t_cols>=t_col_filter: | |
486 LH8E_new.append(LH8E[i]) | |
487 LH2E_new.append(LH2E[i]) | |
488 | |
489 filter_LH2E.extend(LH2E_new) | |
490 filter_LH8E.extend(LH8E_new) | |
491 raw_LH2E.extend(LH2E) | |
492 raw_LH8E.extend(LH8E) | |
493 | |
494 ################################################################################################################################################################################################################## | |
495 | |
496 def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag): | |
497 | |
498 if flag == 1 and int(args.per)!=-1: | |
499 fp = open('Counts/Filtered '+args.n2 +' Templated Counts', 'w') | |
500 fp.write("Name\t") | |
501 fp.write("Sequence") | |
502 for y in names_tre: | |
503 fp.write("\t"+y) | |
504 | |
505 for x in fil_LH8E: | |
506 fp.write("\n%s" % "\t".join(x)) | |
507 fp.close() | |
508 | |
509 fp = open('Counts/Filtered '+args.n1+' Templated Counts', 'w') | |
510 fp.write("Name\t") | |
511 fp.write("Sequence") | |
512 for y in names_con: | |
513 fp.write("\t"+y) | |
514 | |
515 for x in fil_LH2E: | |
516 fp.write("\n%s" % "\t".join(x)) | |
517 fp.close() | |
518 | |
519 | |
520 if flag == 2 and int(args.per)!=-1: | |
521 fp = open('Counts/Filtered '+args.n2+' Non-Templated Counts', 'w') | |
522 fp.write("Name\t") | |
523 fp.write("Sequence") | |
524 for y in names_tre: | |
525 fp.write("\t"+y) | |
526 | |
527 | |
528 for x in fil_LH8E: | |
529 fp.write("\n%s" % "\t".join(x)) | |
530 fp.close() | |
531 | |
532 fp = open('Counts/Filtered '+args.n1+' Non-Templated Counts', 'w') | |
533 fp.write("Name\t") | |
534 fp.write("Sequence") | |
535 for y in names_con: | |
536 fp.write("\t"+y) | |
537 | |
538 for x in fil_LH2E: | |
539 fp.write("\n%s" % "\t".join(x)) | |
540 fp.close() | |
541 | |
542 | |
543 if flag == 1: | |
544 fp = open('Counts/Raw '+args.n2+' Templated Counts', 'w') | |
545 fp.write("Name\t") | |
546 fp.write("Sequence") | |
547 for y in names_tre: | |
548 fp.write("\t"+y) | |
549 | |
550 for x in raw_LH8E: | |
551 fp.write("\n%s" % "\t".join(x)) | |
552 fp.close() | |
553 | |
554 fp = open('Counts/Raw '+args.n1+' Templated Counts', 'w') | |
555 fp.write("Name\t") | |
556 fp.write("Sequence") | |
557 for y in names_con: | |
558 fp.write("\t"+y) | |
559 | |
560 for x in raw_LH2E: | |
561 fp.write("\n%s" % "\t".join(x)) | |
562 fp.close() | |
563 | |
564 | |
565 if flag == 2: | |
566 fp = open('Counts/Raw '+args.n2+' Non-Templated Counts', 'w') | |
567 fp.write("Name\t") | |
568 fp.write("Sequence") | |
569 for y in names_tre: | |
570 fp.write("\t"+y) | |
571 | |
572 | |
573 for x in raw_LH8E: | |
574 fp.write("\n%s" % "\t".join(x)) | |
575 fp.close() | |
576 | |
577 fp = open('Counts/Raw '+args.n1+' Non-Templated Counts', 'w') | |
578 fp.write("Name\t") | |
579 fp.write("Sequence") | |
580 for y in names_con: | |
581 fp.write("\t"+y) | |
582 | |
583 for x in raw_LH2E: | |
584 fp.write("\n%s" % "\t".join(x)) | |
585 fp.close() | |
586 | |
587 | |
588 ######################################################################################################################################### | |
589 | |
590 def ssamples(names,samp,folder,pro): | |
591 | |
592 for i in range(2,len(samp[0])): | |
593 | |
594 fp = open(folder+names[i-2]+'.txt','w') | |
595 fp.write("miRNA id"+"\t"+names[i-2]+"\n") | |
596 | |
597 for x in samp: | |
598 fp.write("%s" % "\t".join([x[0],x[i]])+"\n") | |
599 fp.close() | |
600 | |
601 ################################################################################################################## | |
602 | |
603 def DB_write(con,name,unique_seq,sorted_uni_arms,f): | |
604 | |
605 if f==1: | |
606 # Write a txt file with all the information | |
607 if con=="c": | |
608 fp = open('split1/'+name, 'w') | |
609 | |
610 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence")) | |
611 if con=="t": | |
612 fp = open('split2/'+name, 'w') | |
613 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence")) | |
614 | |
615 | |
616 for i in range(len(sorted_uni_arms)): | |
617 temp = [] | |
618 for j in range(len(unique_seq)): | |
619 | |
620 if sorted_uni_arms[i][0] in unique_seq[j][2].split("_")[0]: | |
621 | |
622 temp.append(unique_seq[j]) | |
623 | |
624 temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True) | |
625 fp.write("*********************************************************************************************************\n") | |
626 fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|")) | |
627 fp.write("*********************************************************************************************************\n\n") | |
628 [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp] | |
629 fp.write("\n" + "\n") | |
630 fp.close() | |
631 | |
632 if f==2: | |
633 | |
634 if con=="c": | |
635 fp = open('split3/'+name, 'w') | |
636 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence")) | |
637 if con=="t": | |
638 fp = open('split4/'+name, 'w') | |
639 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence")) | |
640 | |
641 | |
642 for i in range(len(sorted_uni_arms)): | |
643 temp = [] | |
644 for j in range(len(unique_seq)): | |
645 if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]: | |
646 temp.append(unique_seq[j]) | |
647 if temp!=[]: | |
648 temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True) | |
649 fp.write("*********************************************************************************************************\n") | |
650 fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|")) | |
651 fp.write("*********************************************************************************************************\n\n") | |
652 [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp] | |
653 fp.write("\n" + "\n") | |
654 fp.close() | |
655 | |
656 | |
657 ########################################################################################################################## | |
658 | |
659 def new_mat_seq(pre_unique_seq,mat_mirnas,l): | |
660 | |
661 unique_iso = [] | |
662 for x in pre_unique_seq: | |
663 if len(x[2].split("_"))==3: | |
664 for y in pre_unique_seq: | |
665 if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]): | |
666 if any(y[2] in lst2 for lst2 in unique_iso)==False: | |
667 y[2]=">"+y[2] | |
668 unique_iso.append(y) | |
669 l.acquire() | |
670 for x in unique_iso: | |
671 mat_mirnas.append(x[2]) | |
672 mat_mirnas.append(x[9]) | |
673 l.release() | |
674 | |
675 ######################################################################################################################### | |
676 def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts): | |
677 | |
678 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
679 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
680 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E] | |
681 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E] | |
682 | |
683 c_templ = 0 | |
684 c_tem_counts = 0 | |
685 c_mature = 0 | |
686 c_mat_counts = 0 | |
687 t_templ = 0 | |
688 t_tem_counts = 0 | |
689 t_mature = 0 | |
690 t_mat_counts = 0 | |
691 | |
692 c_non = len(c_non_samples) | |
693 c_non_counts = sum(x[2] for x in c_non_samples) | |
694 t_non = len(t_non_samples) | |
695 t_non_counts = sum(x[2] for x in t_non_samples) | |
696 | |
697 c_unmap = c_unmap - c_non | |
698 t_unmap = c_unmap - t_non | |
699 | |
700 c_unmap_counts=c_unmap_counts - c_non_counts | |
701 t_unmap_counts=t_unmap_counts - t_non_counts | |
702 | |
703 | |
704 for x in c_samples: | |
705 | |
706 if "/" not in x[0]: | |
707 if "chr" in x[0].split("_")[-1]: | |
708 c_mature+=1 | |
709 c_mat_counts += x[2] | |
710 else: | |
711 c_templ+=1 | |
712 c_tem_counts += x[2] | |
713 else: | |
714 f=0 | |
715 for y in x[0].split("/"): | |
716 if "chr" in y.split("_")[-1]: | |
717 c_mature+=1 | |
718 c_mat_counts += x[2] | |
719 f=1 | |
720 break | |
721 if f==0: | |
722 c_templ+=1 | |
723 c_tem_counts += x[2] | |
724 | |
725 for x in t_samples: | |
726 | |
727 if "/" not in x[0]: | |
728 if "chr" in x[0].split("_")[-1]: | |
729 t_mature+=1 | |
730 t_mat_counts += x[2] | |
731 else: | |
732 t_templ+=1 | |
733 t_tem_counts += x[2] | |
734 else: | |
735 f=0 | |
736 for y in x[0].split("/"): | |
737 if "chr" in y.split("_")[-1]: | |
738 t_mature+=1 | |
739 t_mat_counts += x[2] | |
740 f=1 | |
741 break | |
742 if f==0: | |
743 t_templ+=1 | |
744 t_tem_counts += x[2] | |
745 | |
746 fig = plt.figure(figsize=(7,5)) | |
747 labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template' | |
748 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts] | |
749 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | |
750 ax1 = plt.subplot2grid((1,2),(0,0)) | |
751 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
752 [x.set_fontsize(8) for x in texts] | |
753 plt.title('Control Group (reads)',fontsize=12) | |
754 labels = 'miRNA RefSeq','Template', 'Unmapped','non-template' | |
755 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts] | |
756 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | |
757 ax2 = plt.subplot2grid((1,2),(0,1)) | |
758 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
759 [x.set_fontsize(8) for x in texts] | |
760 plt.title('Treated Group (reads)', fontsize=12) | |
761 plt.savefig('pie_non.png',dpi=300) | |
762 | |
763 ###################################################################################################################################################### | |
764 | |
765 def merging_names(LH2E_copy,new): | |
766 | |
767 dupes=[] | |
768 final_LH2E =[] | |
769 | |
770 for num in range(len(LH2E_copy)): | |
771 | |
772 if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E: | |
773 final_LH2E.append(LH2E_copy[num][1]) | |
774 final_LH2E.append(LH2E_copy[num][0]) | |
775 else: | |
776 dupes.append(LH2E_copy[num][1]) | |
777 | |
778 dupes=list(set(dupes)) | |
779 | |
780 for i in range(len(dupes)): | |
781 dupes[i]=[dupes[i]] | |
782 | |
783 for x in LH2E_copy: | |
784 for y in dupes: | |
785 if x[1]==y[0]: | |
786 fl=0 | |
787 if len(y)==1: | |
788 y.append(x[0]) | |
789 else: | |
790 for i in range(1,len(y)): | |
791 if y[i].split("_")[0]==x[0].split("_")[0]: | |
792 fl=1 | |
793 if len(x[0])<len(y[i]): | |
794 del y[i] | |
795 y.append(x[0]) | |
796 break | |
797 | |
798 if fl==0: | |
799 y.append((x[0])) | |
800 | |
801 for y in dupes: | |
802 if len(y)>2: | |
803 for i in range(len(y)-1,1,-1): | |
804 y[1]=y[1]+"/"+y[i] | |
805 del y[i] | |
806 | |
807 | |
808 for x in LH2E_copy: | |
809 for y in dupes: | |
810 if x[1]==y[0]: | |
811 x[0]=y[1] | |
812 | |
813 LH2E_copy.sort() | |
814 LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy)) | |
815 | |
816 new.extend(LH2E_copy) | |
817 | |
818 | |
819 ###################################################################################################################################################### | |
820 def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts): | |
821 | |
822 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
823 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
824 | |
825 c_templ = 0 | |
826 c_tem_counts = 0 | |
827 c_mature = 0 | |
828 c_mat_counts = 0 | |
829 t_templ = 0 | |
830 t_tem_counts = 0 | |
831 t_mature = 0 | |
832 t_mat_counts = 0 | |
833 | |
834 for x in c_samples: | |
835 | |
836 if "/" not in x[0]: | |
837 if "chr" in x[0].split("_")[-1]: | |
838 c_mature+=1 | |
839 c_mat_counts += x[2] | |
840 else: | |
841 c_templ+=1 | |
842 c_tem_counts += x[2] | |
843 else: | |
844 f=0 | |
845 for y in x[0].split("/"): | |
846 if "chr" in y.split("_")[-1]: | |
847 c_mature+=1 | |
848 c_mat_counts += x[2] | |
849 f=1 | |
850 break | |
851 if f==0: | |
852 c_templ+=1 | |
853 c_tem_counts += x[2] | |
854 | |
855 for x in t_samples: | |
856 | |
857 if "/" not in x[0]: | |
858 if "chr" in x[0].split("_")[-1]: | |
859 t_mature+=1 | |
860 t_mat_counts += x[2] | |
861 else: | |
862 t_templ+=1 | |
863 t_tem_counts += x[2] | |
864 else: | |
865 f=0 | |
866 for y in x[0].split("/"): | |
867 if "chr" in y.split("_")[-1]: | |
868 t_mature+=1 | |
869 t_mat_counts += x[2] | |
870 f=1 | |
871 break | |
872 if f==0: | |
873 t_templ+=1 | |
874 t_tem_counts += x[2] | |
875 | |
876 | |
877 fig = plt.figure() | |
878 labels = 'miRNA RefSeq','Template', 'Unmapped' | |
879 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts] | |
880 colors = ['gold', 'yellowgreen', 'lightskyblue'] | |
881 explode = (0.2, 0.05, 0.1) | |
882 ax1 = plt.subplot2grid((1,2),(0,0)) | |
883 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
884 [x.set_fontsize(8) for x in texts] | |
885 plt.title('Control group (reads)', fontsize=12) | |
886 labels = 'miRNA RefSeq','Template', 'Unmapped' | |
887 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts] | |
888 colors = ['gold', 'yellowgreen', 'lightskyblue'] | |
889 explode = (0.2, 0.05, 0.1) | |
890 ax2 = plt.subplot2grid((1,2),(0,1)) | |
891 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
892 [x.set_fontsize(8) for x in texts] | |
893 plt.title('Treated group (reads)',fontsize = 12) | |
894 plt.savefig('pie_tem.png',dpi=300) | |
895 | |
896 ################################################################################################################################################################################################################### | |
897 | |
898 def make_spider(merge_LH2E,merge_LH8E): | |
899 | |
900 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
901 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
902 | |
903 c_5 = 0 | |
904 c_5_counts = 0 | |
905 c_3 = 0 | |
906 c_3_counts = 0 | |
907 c_both =0 | |
908 c_both_counts=0 | |
909 c_mature = 0 | |
910 c_mat_counts = 0 | |
911 c_exception=0 | |
912 c_exception_counts=0 | |
913 | |
914 | |
915 t_5 = 0 | |
916 t_5_counts = 0 | |
917 t_3 = 0 | |
918 t_3_counts = 0 | |
919 t_both = 0 | |
920 t_both_counts = 0 | |
921 t_mature = 0 | |
922 t_mat_counts = 0 | |
923 t_exception = 0 | |
924 t_exception_counts=0 | |
925 | |
926 for x in c_samples: | |
927 | |
928 if "/" not in x[0]: | |
929 if "chr" in x[0].split("_")[-1]: | |
930 c_mature+=1 | |
931 c_mat_counts += x[2] | |
932 elif 0 == int(x[0].split("_")[-1]): | |
933 c_5+=1 | |
934 c_5_counts += x[2] | |
935 elif 0 == int(x[0].split("_")[-2]): | |
936 c_3+=1 | |
937 c_3_counts += x[2] | |
938 else: | |
939 c_both+=1 | |
940 c_both_counts+=x[2] | |
941 | |
942 else: | |
943 f=0 | |
944 for y in x[0].split("/"): | |
945 if "chr" in y.split("_")[-1]: | |
946 c_mature+=1 | |
947 c_mat_counts += x[2] | |
948 f=1 | |
949 break | |
950 if f==0: | |
951 for y in x[0].split("/"): | |
952 c_exception+=1 | |
953 c_exception_counts += x[2] | |
954 | |
955 | |
956 for x in t_samples: | |
957 | |
958 if "/" not in x[0]: | |
959 if "chr" in x[0].split("_")[-1]: | |
960 t_mature+=1 | |
961 t_mat_counts += x[2] | |
962 elif 0 == int(x[0].split("_")[-1]): | |
963 t_5+=1 | |
964 t_5_counts += x[2] | |
965 elif 0 == int(x[0].split("_")[-2]): | |
966 t_3+=1 | |
967 t_3_counts += x[2] | |
968 else: | |
969 t_both+=1 | |
970 t_both_counts+=x[2] | |
971 | |
972 else: | |
973 f=0 | |
974 for y in x[0].split("/"): | |
975 if "chr" in y.split("_")[-1]: | |
976 t_mature+=1 | |
977 t_mat_counts += x[2] | |
978 f=1 | |
979 break | |
980 if f==0: | |
981 for y in x[0].split("/"): | |
982 t_exception+=1 | |
983 t_exception_counts += x[2] | |
984 | |
985 | |
986 c_all = c_5+c_3+c_both+c_mature+c_exception | |
987 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts | |
988 | |
989 t_all = t_5+t_3+t_both+t_mature + t_exception | |
990 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts | |
991 | |
992 c_5 = round(c_5/c_all*100,2) | |
993 c_3 = round(c_3/c_all*100,2) | |
994 c_both = round(c_both/c_all*100,2) | |
995 c_mature = round(c_mature/c_all*100,2) | |
996 c_exception = round(c_exception/c_all*100,2) | |
997 | |
998 c_5_counts = round(c_5_counts/c_all_counts*100,2) | |
999 c_3_counts = round(c_3_counts/c_all_counts*100,2) | |
1000 c_both_counts = round(c_both_counts/c_all_counts*100,2) | |
1001 c_mat_counts = round(c_mat_counts/c_all_counts*100,2) | |
1002 c_exception_counts = round(c_exception_counts/c_all_counts*100,2) | |
1003 | |
1004 t_5 = round(t_5/t_all*100,2) | |
1005 t_3 = round(t_3/t_all*100,2) | |
1006 t_both = round(t_both/t_all*100,2) | |
1007 t_mature = round(t_mature/t_all*100,2) | |
1008 t_exception = round(t_exception/t_all*100,2) | |
1009 | |
1010 t_5_counts = round(t_5_counts/t_all_counts*100,2) | |
1011 t_3_counts = round(t_3_counts/t_all_counts*100,2) | |
1012 t_both_counts = round(t_both_counts/t_all_counts*100,2) | |
1013 t_mat_counts = round(t_mat_counts/t_all_counts*100,2) | |
1014 t_exception_counts = round(t_exception_counts/t_all_counts*100,2) | |
1015 | |
1016 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception) | |
1017 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts) | |
1018 | |
1019 df=pd.DataFrame({ | |
1020 'group':['Controls','Treated'], | |
1021 """5' and 3' isomiRs""":[c_both,t_both], | |
1022 """3' isomiRs""":[c_3,t_3], | |
1023 'miRNA RefSeq':[c_mature,t_mature], | |
1024 """5' isomiRs""":[c_5,t_5], | |
1025 'Others*':[c_exception,t_exception]}) | |
1026 | |
1027 df1=pd.DataFrame({ | |
1028 'group':['Controls','Treated'], | |
1029 """5' and 3' isomiRs""":[c_both_counts,t_both_counts], | |
1030 """3' isomiRs""":[c_3_counts,t_3_counts], | |
1031 'miRNA RefSeq':[c_mat_counts,t_mat_counts], | |
1032 """5' isomiRs""":[c_5_counts,t_5_counts], | |
1033 'Others*':[c_exception_counts,t_exception_counts]}) | |
1034 | |
1035 spider_last(df,radar_max,1) | |
1036 spider_last(df1,radar_max_counts,2) | |
1037 | |
1038 | |
1039 def spider_last(df,radar_max,flag): | |
1040 # ------- PART 1: Create background | |
1041 fig = plt.figure() | |
1042 # number of variable | |
1043 categories=list(df)[1:] | |
1044 N = len(categories) | |
1045 | |
1046 # What will be the angle of each axis in the plot? (we divide the plot / number of variable) | |
1047 angles = [n / float(N) * 2 * pi for n in range(N)] | |
1048 angles += angles[:1] | |
1049 | |
1050 # Initialise the spider plot | |
1051 ax = plt.subplot(111, polar=True) | |
1052 | |
1053 # If you want the first axis to be on top: | |
1054 ax.set_theta_offset(pi/2) | |
1055 ax.set_theta_direction(-1) | |
1056 | |
1057 # Draw one axe per variable + add labels labels yet | |
1058 plt.xticks(angles[:-1], categories, fontsize=11) | |
1059 | |
1060 # Draw ylabels | |
1061 radar_max=round(radar_max+radar_max*0.1) | |
1062 mul=len(str(radar_max))-1 | |
1063 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul) | |
1064 sep = round(maxi/4) | |
1065 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10) | |
1066 plt.ylim(0, maxi) | |
1067 | |
1068 # ------- PART 2: Add plots | |
1069 | |
1070 # Plot each individual = each line of the data | |
1071 # I don't do a loop, because plotting more than 3 groups makes the chart unreadable | |
1072 | |
1073 # Ind1 | |
1074 values=df.loc[0].drop('group').values.flatten().tolist() | |
1075 values += values[:1] | |
1076 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls") | |
1077 ax.fill(angles, values, 'b', alpha=0.1) | |
1078 | |
1079 # Ind2 | |
1080 values=df.loc[1].drop('group').values.flatten().tolist() | |
1081 values += values[:1] | |
1082 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated") | |
1083 ax.fill(angles, values, 'r', alpha=0.1) | |
1084 | |
1085 # Add legend | |
1086 if flag==1: | |
1087 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1)) | |
1088 plt.savefig('spider_non_red.png',dpi=300) | |
1089 else: | |
1090 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1)) | |
1091 plt.savefig('spider_red.png',dpi=300) | |
1092 | |
1093 | |
1094 ############################################################################################################################################################################################################# | |
1095 def hist_red(samples,flag): | |
1096 lengths=[] | |
1097 cat=[] | |
1098 total_reads=0 | |
1099 seq=[] | |
1100 | |
1101 if flag == "c": | |
1102 title = "Length Distribution of Control group (Redudant reads)" | |
1103 if flag == "t": | |
1104 title = "Length Distribution of Treated group (Redudant reads)" | |
1105 | |
1106 for i in samples: | |
1107 for x in i: | |
1108 lengths.append(len(x[9])) | |
1109 if x[1]=="0": | |
1110 seq.append([x[9],x[0].split("-")[1],"Mapped"]) | |
1111 cat.append("Mapped") | |
1112 if x[1] == "4": | |
1113 seq.append([x[9],x[0].split("-")[1],"Unmapped"]) | |
1114 cat.append("Unmapped") | |
1115 | |
1116 uni_len=list(set(lengths)) | |
1117 uni_len=[x for x in uni_len if x<=35] | |
1118 low=min(lengths) | |
1119 up=max(lengths) | |
1120 seq.sort() | |
1121 uni_seq=list(seq for seq,_ in itertools.groupby(seq)) | |
1122 dim=up-low | |
1123 | |
1124 if dim>20: | |
1125 s=5 | |
1126 else: | |
1127 s=8 | |
1128 | |
1129 total_reads+=sum([int(x[1]) for x in uni_seq]) | |
1130 | |
1131 map_reads=[] | |
1132 unmap_reads=[] | |
1133 length=[] | |
1134 for y in uni_len: | |
1135 map_temp=0 | |
1136 unmap_temp=0 | |
1137 for x in uni_seq: | |
1138 if len(x[0])==y and x[2]=="Mapped": | |
1139 map_temp+=int(x[1]) | |
1140 if len(x[0])==y and x[2]=="Unmapped": | |
1141 unmap_temp+=int(x[1]) | |
1142 if y<=35: | |
1143 length.append(y) | |
1144 map_reads.append(round(map_temp/total_reads*100,2)) | |
1145 unmap_reads.append(round(unmap_temp/total_reads*100,2)) | |
1146 | |
1147 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)]) | |
1148 ylim=ylim+ylim*20/100 | |
1149 fig, ax = plt.subplots() | |
1150 width=0.8 | |
1151 ax.bar(length, unmap_reads, width, label='Unmapped') | |
1152 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped') | |
1153 plt.xticks(np.arange(length[0], length[-1]+1, 1)) | |
1154 plt.xlabel('Length (nt)',fontsize=14) | |
1155 plt.ylabel('Percentage',fontsize=14) | |
1156 plt.title(title,fontsize=14) | |
1157 ax.legend() | |
1158 plt.ylim([0, ylim]) | |
1159 ax.grid(axis='y',linewidth=0.2) | |
1160 | |
1161 if flag=='c': | |
1162 plt.savefig('c_hist_red.png',dpi=300) | |
1163 | |
1164 if flag=='t': | |
1165 plt.savefig('t_hist_red.png',dpi=300) | |
1166 | |
1167 ################################################################################################################# | |
1168 | |
1169 | |
1170 def logo_seq_red(merge, flag): | |
1171 | |
1172 if flag=="c": | |
1173 titlos="Control group (Redundant)" | |
1174 file_logo="c_logo.png" | |
1175 file_bar="c_bar.png" | |
1176 if flag=="t": | |
1177 titlos="Treated group (Redundant)" | |
1178 file_logo="t_logo.png" | |
1179 file_bar="t_bar.png" | |
1180 | |
1181 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge] | |
1182 | |
1183 A=[0]*5 | |
1184 C=[0]*5 | |
1185 G=[0]*5 | |
1186 T=[0]*5 | |
1187 total_reads=0 | |
1188 | |
1189 for y in c_samples: | |
1190 if "/" in y[0]: | |
1191 length=[] | |
1192 for x in y[0].split("/"): | |
1193 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | |
1194 | |
1195 best=min(length) | |
1196 total_reads+=best[2] | |
1197 for i in range(5): | |
1198 if i<len(best[1]): | |
1199 if best[1][i] == "A": | |
1200 A[i]+=best[2] | |
1201 elif best[1][i] == "C": | |
1202 C[i]+=best[2] | |
1203 elif best[1][i] == "G": | |
1204 G[i]+=best[2] | |
1205 else: | |
1206 T[i]+=best[2] | |
1207 else: | |
1208 total_reads+=y[2] | |
1209 for i in range(5): | |
1210 if i<len(y[0].split("_")[-1]): | |
1211 if y[0].split("_")[-1][i] == "A": | |
1212 A[i]+=(y[2]) | |
1213 elif y[0].split("_")[-1][i] == "C": | |
1214 C[i]+=(y[2]) | |
1215 elif y[0].split("_")[-1][i] == "G": | |
1216 G[i]+=(y[2]) | |
1217 else: | |
1218 T[i]+=y[2] | |
1219 | |
1220 A[:] = [round(x*100,1) / total_reads for x in A] | |
1221 C[:] = [round(x*100,1) / total_reads for x in C] | |
1222 G[:] = [round(x*100,1) / total_reads for x in G] | |
1223 T[:] = [round(x*100,1) / total_reads for x in T] | |
1224 | |
1225 | |
1226 | |
1227 data = {'A':A,'C':C,'G':G,'T':T} | |
1228 df = pd.DataFrame(data, index=[1,2,3,4,5]) | |
1229 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) ) | |
1230 h.grid(axis='y',linewidth=0.2) | |
1231 plt.xticks(rotation=0, ha="right") | |
1232 plt.ylabel("Counts (%)",fontsize=18) | |
1233 plt.xlabel("Positions (nt)",fontsize=18) | |
1234 plt.title(titlos,fontsize=20) | |
1235 plt.tight_layout() | |
1236 plt.savefig(file_bar, dpi=300) | |
1237 | |
1238 import logomaker as lm | |
1239 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | |
1240 crp_logo.style_spines(visible=False) | |
1241 crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | |
1242 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | |
1243 | |
1244 # style using Axes methods | |
1245 crp_logo.ax.set_title(titlos,fontsize=18) | |
1246 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5) | |
1247 crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5) | |
1248 crp_logo.ax.xaxis.set_ticks_position('none') | |
1249 crp_logo.ax.xaxis.set_tick_params(pad=-1) | |
1250 figure = plt.gcf() | |
1251 figure.set_size_inches(6, 4) | |
1252 crp_logo.fig.savefig(file_logo,dpi=300) | |
1253 | |
1254 ########################################################################################################################################################################################################## | |
1255 | |
1256 | |
1257 | |
1258 def logo_seq_non_red(merge_LH2E): | |
1259 | |
1260 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
1261 | |
1262 A=[0]*5 | |
1263 C=[0]*5 | |
1264 G=[0]*5 | |
1265 T=[0]*5 | |
1266 | |
1267 for y in c_samples: | |
1268 if "/" in y[0]: | |
1269 length=[] | |
1270 for x in y[0].split("/"): | |
1271 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | |
1272 | |
1273 best=min(length) | |
1274 for i in range(5): | |
1275 if i<len(best[1]): | |
1276 if best[1][i] == "A": | |
1277 A[i]+=1 | |
1278 elif best[1][i] == "C": | |
1279 C[i]+=1 | |
1280 elif best[1][i] == "G": | |
1281 G[i]+=1 | |
1282 else: | |
1283 T[i]+=1 | |
1284 else: | |
1285 for i in range(5): | |
1286 if i<len(y[0].split("_")[-1]): | |
1287 if y[0].split("_")[-1][i] == "A": | |
1288 A[i]+=1 | |
1289 elif y[0].split("_")[-1][i] == "C": | |
1290 C[i]+=1 | |
1291 elif y[0].split("_")[-1][i] == "G": | |
1292 G[i]+=1 | |
1293 else: | |
1294 T[i]+=1 | |
1295 | |
1296 data = {'A':A,'C':C,'G':G,'T':T} | |
1297 df = pd.DataFrame(data, index=[1,2,3,4,5]) | |
1298 h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"])) | |
1299 h.set_xlabel("Positions (nt)") | |
1300 h.set_ylabel("Unique sequences") | |
1301 plt.xticks(rotation=0, ha="right") | |
1302 plt.tight_layout() | |
1303 plt.savefig("bar2.png", dpi=300) | |
1304 | |
1305 | |
1306 import logomaker as lm | |
1307 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | |
1308 | |
1309 # style using Logo methods | |
1310 crp_logo.style_spines(visible=False) | |
1311 crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | |
1312 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | |
1313 | |
1314 # style using Axes methods | |
1315 crp_logo.ax.set_ylabel("Unique sequences", labelpad=5) | |
1316 crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5) | |
1317 crp_logo.ax.xaxis.set_ticks_position('none') | |
1318 crp_logo.ax.xaxis.set_tick_params(pad=-1) | |
1319 crp_logo.ax.set_title("Non-redundant") | |
1320 figure = plt.gcf() | |
1321 crp_logo.fig.savefig('logo2.png', dpi=300) | |
1322 | |
1323 | |
1324 ################################################################################################################################################################################################################### | |
1325 | |
1326 def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro): | |
1327 | |
1328 for i in range(2,len(tem_samp[0])): | |
1329 | |
1330 fp = open(folder+tem_names[i-2]+'.txt','w') | |
1331 fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n") | |
1332 | |
1333 for x in tem_samp: | |
1334 fp.write("%s" % "\t".join([x[0],x[i]])+"\n") | |
1335 | |
1336 for j in range(len(non_names)): | |
1337 if non_names[j]==tem_names[i-2]: | |
1338 for x in non_samp: | |
1339 fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n") | |
1340 fp.close() | |
1341 | |
1342 ################################################################################################################################################################################################################### | |
1343 | |
1344 def download_matures(matures,org_name): | |
1345 | |
1346 #url = 'ftp://mirbase.org/pub/mirbase/21/mature.fa.gz' | |
1347 url = 'ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz' | |
1348 data = urllib.request.urlopen(url).read() | |
1349 file_mirna = gzip.decompress(data).decode('utf-8') | |
1350 file_mirna = file_mirna.split("\n") | |
1351 | |
1352 for i in range(0,len(file_mirna)-1,2): | |
1353 | |
1354 if org_name in file_mirna[i]: | |
1355 matures.append(file_mirna[i]) | |
1356 matures.append(file_mirna[i+1]) | |
1357 | |
1358 ################################################################################################################################################################################################################### | |
1359 def non_template_ref(sc,st,all_isoforms): | |
1360 | |
1361 pre_uni_seq_con = list(sc) | |
1362 pre_uni_seq_tre = list(st) | |
1363 | |
1364 for x in pre_uni_seq_con: | |
1365 for y in x: | |
1366 if ">"+y[2] not in all_isoforms and ")_" in y[2] : | |
1367 all_isoforms.append(">"+y[2]) | |
1368 all_isoforms.append(y[9]) | |
1369 | |
1370 | |
1371 for x in pre_uni_seq_tre: | |
1372 for y in x: | |
1373 if ">"+y[2] not in all_isoforms and ")_" in y[2]: | |
1374 all_isoforms.append(">"+y[2]) | |
1375 all_isoforms.append(y[9]) | |
1376 | |
1377 ################################################################################################################################################################################################ | |
1378 | |
1379 def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order): | |
1380 | |
1381 for y in mir_names: | |
1382 flag=0 | |
1383 for x in sample: | |
1384 if y[0]==x[0]: | |
1385 flag=1 | |
1386 break | |
1387 if flag==0: | |
1388 sample.append([y[0],"0",y[1]]) | |
1389 | |
1390 sample.sort(key=lambda x: x[0]) | |
1391 sample=list(sample for sample,_ in itertools.groupby(sample)) | |
1392 | |
1393 l.acquire() | |
1394 new_d.append(sample) | |
1395 sample_order.append(sample_name) | |
1396 l.release() | |
1397 | |
1398 ############################################################################################################################################################################################### | |
1399 | |
1400 if __name__ == '__main__': | |
1401 | |
1402 starttime = time.time() | |
1403 | |
1404 q1 = Queue() | |
1405 q2 = Queue() | |
1406 lock = Lock() | |
1407 manager = Manager() | |
1408 | |
1409 mature_mirnas=manager.list() | |
1410 ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name)) | |
1411 ps_mature.start() | |
1412 | |
1413 args.control[0]=args.control[0][1:] | |
1414 args.control[len(args.control)-1][:-1] | |
1415 control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)] | |
1416 | |
1417 args.treated[0]=args.treated[0][1:] | |
1418 args.treated[len(args.treated)-1][:-1] | |
1419 treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)] | |
1420 | |
1421 | |
1422 ############## Detection of templated isoforms ################ | |
1423 | |
1424 radar = manager.list([0,0,0,0]) | |
1425 samples = manager.list() | |
1426 data= manager.list() | |
1427 names_con=manager.list() | |
1428 samples_mirna_names=manager.list() | |
1429 deseq=manager.list() | |
1430 unmap_seq=manager.Value('i',0) | |
1431 unmap_counts=manager.Value('i',0) | |
1432 LH2E_names=manager.list() | |
1433 ini_c_samples = manager.list() | |
1434 | |
1435 | |
1436 radar1 = manager.list([0,0,0,0]) | |
1437 samples1 = manager.list() | |
1438 data1 = manager.list() | |
1439 names_tre = manager.list() | |
1440 samples_mirna_names1=manager.list() | |
1441 deseq1=manager.list() | |
1442 unmap1_seq = manager.Value('i',0) | |
1443 unmap1_counts = manager.Value('i',0) | |
1444 LH8E_names=manager.list() | |
1445 ini_t_samples = manager.list() | |
1446 ps_mature.join() | |
1447 | |
1448 | |
1449 mature_mirnas=list(mature_mirnas) | |
1450 | |
1451 | |
1452 starttime1 = time.time() | |
1453 ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control] | |
1454 ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated]) | |
1455 | |
1456 [p.start() for p in ps_sam] | |
1457 [p.join() for p in ps_sam] | |
1458 print('SAM took {} seconds'.format(time.time() - starttime1)) | |
1459 | |
1460 ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))] | |
1461 ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))]) | |
1462 [x.start() for x in ps_hist] | |
1463 | |
1464 starttime200=time.time() | |
1465 | |
1466 sc = list(samples) | |
1467 st = list(samples1) | |
1468 | |
1469 names_con=list(names_con) | |
1470 names_tre=list(names_tre) | |
1471 samples_mirna_names=list(samples_mirna_names) | |
1472 samples_mirna_names.sort() | |
1473 samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names)) | |
1474 | |
1475 samples_mirna_names1=list(samples_mirna_names1) | |
1476 samples_mirna_names1.sort() | |
1477 samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1)) | |
1478 | |
1479 deseq=list(deseq) | |
1480 deseq1=list(deseq1) | |
1481 | |
1482 new_names_con=manager.list() | |
1483 new_names_tre=manager.list() | |
1484 new_deseq=manager.list() | |
1485 new_deseq1=manager.list() | |
1486 ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)] | |
1487 ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)]) | |
1488 | |
1489 [z.start() for z in ps_deseq] | |
1490 [z.join() for z in ps_deseq] | |
1491 new_deseq=list(new_deseq) | |
1492 new_deseq1=list(new_deseq1) | |
1493 | |
1494 LH2E=[[x[0],x[2]] for x in new_deseq[0]] | |
1495 [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq] | |
1496 | |
1497 LH8E=[[x[0],x[2]] for x in new_deseq1[0]] | |
1498 [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1] | |
1499 | |
1500 print('Deseq took {} seconds'.format(time.time() - starttime200)) | |
1501 | |
1502 merg_nam_LH2E=manager.list() | |
1503 merg_nam_LH8E=manager.list() | |
1504 | |
1505 LH2E_copy=copy.deepcopy(list(LH2E)) | |
1506 LH8E_copy=copy.deepcopy(list(LH8E)) | |
1507 | |
1508 fil_sort_tre=manager.list() | |
1509 fil_sort_con=manager.list() | |
1510 raw_sort_tre=manager.list() | |
1511 raw_sort_con=manager.list() | |
1512 | |
1513 ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con)) | |
1514 ps_main.start() | |
1515 | |
1516 if args.anal=="2": | |
1517 all_iso = manager.list() | |
1518 ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso)) | |
1519 ps_non_iso.start() | |
1520 | |
1521 ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))] | |
1522 ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))]) | |
1523 [x.start() for x in ps_merge] | |
1524 [x.join() for x in ps_merge] | |
1525 | |
1526 merg_nam_LH2E=list(merg_nam_LH2E) | |
1527 merg_nam_LH8E=list(merg_nam_LH8E) | |
1528 | |
1529 starttime2 = time.time() | |
1530 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data] | |
1531 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1]) | |
1532 procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))]) | |
1533 if args.anal == "1": | |
1534 procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))]) | |
1535 | |
1536 [p.start() for p in procs] | |
1537 | |
1538 | |
1539 if args.anal=="1": | |
1540 [x.join() for x in ps_hist] | |
1541 [p.join() for p in procs] | |
1542 ps_pdf = Process(target=pdf_before_DE,args=(args.anal)) | |
1543 ps_pdf.start() | |
1544 | |
1545 print('Graphs took {} seconds'.format(time.time() - starttime2)) | |
1546 | |
1547 ps_main.join() | |
1548 | |
1549 fil_sort_con=list(fil_sort_con) | |
1550 fil_sort_tre=list(fil_sort_tre) | |
1551 if fil_sort_con==[]: | |
1552 fil_sort_con=raw_sort_con | |
1553 fil_sort_tre=raw_sort_tre | |
1554 | |
1555 raw_sort_con=list(raw_sort_con) | |
1556 raw_sort_tre=list(raw_sort_tre) | |
1557 names_con=list(new_names_con) | |
1558 names_tre=list(new_names_tre) | |
1559 | |
1560 ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1)) | |
1561 ps_write.start() | |
1562 | |
1563 ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))] | |
1564 ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))]) | |
1565 [p.start() for p in ps1_matrix] | |
1566 | |
1567 if args.anal=="1": | |
1568 ps_pdf.join() | |
1569 if args.anal=="2": | |
1570 [p.join() for p in procs] | |
1571 [x.join() for x in ps_hist] | |
1572 | |
1573 ps_write.join() | |
1574 [p.join() for p in ps1_matrix] | |
1575 | |
1576 | |
1577 ############################## Detection of Both ####################################### | |
1578 | |
1579 starttime10 = time.time() | |
1580 | |
1581 if args.anal == "2": | |
1582 | |
1583 n_data= manager.list() | |
1584 n_names_con=manager.list() | |
1585 n_samples_mirna_names=manager.list() | |
1586 n_deseq=manager.list() | |
1587 n_LH2E_names=manager.list() | |
1588 | |
1589 n_data1 = manager.list() | |
1590 n_names_tre = manager.list() | |
1591 n_samples_mirna_names1=manager.list() | |
1592 n_deseq1=manager.list() | |
1593 n_LH8E_names=manager.list() | |
1594 | |
1595 | |
1596 new_mat_mirnas = list(mature_mirnas) | |
1597 ps_non_iso.join() | |
1598 | |
1599 all_iso=list(all_iso) | |
1600 new_mat_mirnas.extend(all_iso) | |
1601 | |
1602 starttime11=time.time() | |
1603 | |
1604 ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control] | |
1605 ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated]) | |
1606 | |
1607 [p.start() for p in ps_sam] | |
1608 [p.join() for p in ps_sam] | |
1609 | |
1610 print('Non-sam took {} seconds'.format(time.time() - starttime11)) | |
1611 | |
1612 starttime12=time.time() | |
1613 | |
1614 n_names_con=list(n_names_con) | |
1615 n_names_tre=list(n_names_tre) | |
1616 n_samples_mirna_names=list(n_samples_mirna_names) | |
1617 n_samples_mirna_names.sort() | |
1618 n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names)) | |
1619 | |
1620 n_samples_mirna_names1=list(n_samples_mirna_names1) | |
1621 n_samples_mirna_names1.sort() | |
1622 n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1)) | |
1623 | |
1624 n_deseq=list(n_deseq) | |
1625 n_deseq1=list(n_deseq1) | |
1626 | |
1627 new_n_names_con=manager.list() | |
1628 new_n_names_tre=manager.list() | |
1629 n_new_deseq=manager.list() | |
1630 n_new_deseq1=manager.list() | |
1631 ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)] | |
1632 ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)]) | |
1633 | |
1634 [x.start() for x in ps_deseq] | |
1635 [x.join() for x in ps_deseq] | |
1636 n_new_deseq=list(n_new_deseq) | |
1637 n_new_deseq1=list(n_new_deseq1) | |
1638 | |
1639 print([len(n_new_deseq[0]),len(n_new_deseq[1])]) | |
1640 print([len(n_new_deseq1[0]),len(n_new_deseq1[1])]) | |
1641 | |
1642 n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]] | |
1643 [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq] | |
1644 | |
1645 n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]] | |
1646 [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1] | |
1647 | |
1648 print('Non-deseq took {} seconds'.format(time.time() - starttime12)) | |
1649 | |
1650 merg_nam_n_LH2E=manager.list() | |
1651 merg_nam_n_LH8E=manager.list() | |
1652 | |
1653 n_LH2E_copy=copy.deepcopy(list(n_LH2E)) | |
1654 n_LH8E_copy=copy.deepcopy(list(n_LH8E)) | |
1655 | |
1656 n_sort_tre=manager.list() | |
1657 n_sort_con=manager.list() | |
1658 | |
1659 n_fil_sort_con=manager.list() | |
1660 n_fil_sort_tre=manager.list() | |
1661 n_raw_sort_con=manager.list() | |
1662 n_raw_sort_tre=manager.list() | |
1663 | |
1664 ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con)) | |
1665 ps_main.start() | |
1666 | |
1667 ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))] | |
1668 ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))]) | |
1669 [p.start() for p in ps_merge] | |
1670 [p.join() for p in ps_merge] | |
1671 | |
1672 merg_nam_n_LH2E=list(merg_nam_n_LH2E) | |
1673 merg_nam_n_LH8E=list(merg_nam_n_LH8E) | |
1674 | |
1675 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data] | |
1676 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1]) | |
1677 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))]) | |
1678 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))]) | |
1679 procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))]) | |
1680 | |
1681 starttime13=time.time() | |
1682 [p.start() for p in procs] | |
1683 [p.join() for p in procs] | |
1684 | |
1685 print('Graphs took {} seconds'.format(time.time() - starttime13)) | |
1686 | |
1687 procs1 = Process(target=pdf_before_DE,args=(args.anal)) | |
1688 procs1.start() | |
1689 | |
1690 starttime14=time.time() | |
1691 ps_main.join() | |
1692 | |
1693 n_fil_sort_con=list(n_fil_sort_con) | |
1694 n_fil_sort_tre=list(n_fil_sort_tre) | |
1695 if n_fil_sort_con==[]: | |
1696 n_fil_sort_con=n_raw_sort_con | |
1697 n_fil_sort_tre=n_raw_sort_tre | |
1698 | |
1699 n_raw_sort_con=list(n_raw_sort_con) | |
1700 n_raw_sort_tre=list(n_raw_sort_tre) | |
1701 n_names_con=list(new_n_names_con) | |
1702 n_names_tre=list(new_n_names_tre) | |
1703 | |
1704 ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2)) | |
1705 ps_write.start() | |
1706 | |
1707 ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))] | |
1708 ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))]) | |
1709 [p.start() for p in ps1_matrix] | |
1710 | |
1711 ps_write.join() | |
1712 [p.join() for p in ps1_matrix] | |
1713 procs1.join() | |
1714 print('That took {} seconds'.format(time.time() - starttime10)) | |
1715 print('That took {} seconds'.format(time.time() - starttime)) | |
1716 | |
1717 | |
1718 | |
1719 | |
1720 | |
1721 | |
1722 | |
1723 |