Mercurial > repos > glogobyte > isoread
comparison mirgene_graphs.py @ 5:6633f4447090 draft
Uploaded
author | glogobyte |
---|---|
date | Fri, 16 Oct 2020 18:55:18 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:88b3ef865431 | 5:6633f4447090 |
---|---|
1 import itertools | |
2 import time | |
3 import sys | |
4 import os | |
5 import urllib.request | |
6 import gzip | |
7 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value | |
8 import subprocess | |
9 import argparse | |
10 from collections import OrderedDict | |
11 from matplotlib.backends.backend_pdf import PdfPages | |
12 import pandas as pd | |
13 from math import pi | |
14 import numpy as np | |
15 import matplotlib.pyplot as plt | |
16 from matplotlib.ticker import PercentFormatter | |
17 import seaborn as sns | |
18 import scipy.stats as stats | |
19 from plotnine import * | |
20 import math | |
21 import re | |
22 import matplotlib.ticker as mtick | |
23 import copy | |
24 | |
25 | |
26 ######################################################################################################################### | |
27 def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts): | |
28 | |
29 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
30 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
31 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E] | |
32 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E] | |
33 | |
34 c_templ = 0 | |
35 c_tem_counts = 0 | |
36 c_mature = 0 | |
37 c_mat_counts = 0 | |
38 t_templ = 0 | |
39 t_tem_counts = 0 | |
40 t_mature = 0 | |
41 t_mat_counts = 0 | |
42 | |
43 c_non = len(c_non_samples) | |
44 c_non_counts = sum(x[2] for x in c_non_samples) | |
45 t_non = len(t_non_samples) | |
46 t_non_counts = sum(x[2] for x in t_non_samples) | |
47 | |
48 c_unmap = c_unmap - c_non | |
49 t_unmap = c_unmap - t_non | |
50 | |
51 c_unmap_counts=c_unmap_counts - c_non_counts | |
52 t_unmap_counts=t_unmap_counts - t_non_counts | |
53 | |
54 | |
55 for x in c_samples: | |
56 | |
57 if "/" not in x[0]: | |
58 if len(x[0].split("_"))==2: | |
59 c_mature+=1 | |
60 c_mat_counts += x[2] | |
61 else: | |
62 c_templ+=1 | |
63 c_tem_counts += x[2] | |
64 else: | |
65 f=0 | |
66 for y in x[0].split("/"): | |
67 if len(y.split("_"))==2: | |
68 c_mature+=1 | |
69 c_mat_counts += x[2] | |
70 f=1 | |
71 break | |
72 if f==0: | |
73 c_templ+=1 | |
74 c_tem_counts += x[2] | |
75 | |
76 for x in t_samples: | |
77 | |
78 if "/" not in x[0]: | |
79 if len(x[0].split("_"))==2: | |
80 t_mature+=1 | |
81 t_mat_counts += x[2] | |
82 else: | |
83 t_templ+=1 | |
84 t_tem_counts += x[2] | |
85 else: | |
86 f=0 | |
87 for y in x[0].split("/"): | |
88 if len(y.split("_"))==2: | |
89 t_mature+=1 | |
90 t_mat_counts += x[2] | |
91 f=1 | |
92 break | |
93 if f==0: | |
94 t_templ+=1 | |
95 t_tem_counts += x[2] | |
96 | |
97 fig = plt.figure(figsize=(7,5)) | |
98 | |
99 labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template' | |
100 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts] | |
101 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | |
102 # Plot | |
103 ax1 = plt.subplot2grid((1,2),(0,0)) | |
104 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
105 [x.set_fontsize(8) for x in texts] | |
106 plt.title('Control Group (reads)',fontsize=12) | |
107 | |
108 labels = 'miRNA RefSeq','Template', 'Unmapped','non-template' | |
109 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts] | |
110 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | |
111 # Plot | |
112 ax2 = plt.subplot2grid((1,2),(0,1)) | |
113 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
114 [x.set_fontsize(8) for x in texts] | |
115 plt.title('Treated Group (reads)', fontsize=12) | |
116 plt.savefig('pie_non.png',dpi=300) | |
117 | |
118 #################################################################################################################################################################################################################### | |
119 | |
120 def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts): | |
121 | |
122 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
123 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
124 | |
125 c_templ = 0 | |
126 c_tem_counts = 0 | |
127 c_mature = 0 | |
128 c_mat_counts = 0 | |
129 t_templ = 0 | |
130 t_tem_counts = 0 | |
131 t_mature = 0 | |
132 t_mat_counts = 0 | |
133 | |
134 for x in c_samples: | |
135 | |
136 if "/" not in x[0]: | |
137 if len(x[0].split("_"))==2: | |
138 c_mature+=1 | |
139 c_mat_counts += x[2] | |
140 else: | |
141 c_templ+=1 | |
142 c_tem_counts += x[2] | |
143 else: | |
144 f=0 | |
145 for y in x[0].split("/"): | |
146 if len(y.split("_"))==2: | |
147 c_mature+=1 | |
148 c_mat_counts += x[2] | |
149 f=1 | |
150 break | |
151 if f==0: | |
152 c_templ+=1 | |
153 c_tem_counts += x[2] | |
154 | |
155 for x in t_samples: | |
156 | |
157 if "/" not in x[0]: | |
158 if len(x[0].split("_"))==2: | |
159 t_mature+=1 | |
160 t_mat_counts += x[2] | |
161 else: | |
162 t_templ+=1 | |
163 t_tem_counts += x[2] | |
164 else: | |
165 f=0 | |
166 for y in x[0].split("/"): | |
167 if len(y.split("_"))==2: | |
168 t_mature+=1 | |
169 t_mat_counts += x[2] | |
170 f=1 | |
171 break | |
172 if f==0: | |
173 t_templ+=1 | |
174 t_tem_counts += x[2] | |
175 | |
176 | |
177 fig = plt.figure() | |
178 labels = 'miRNA RefSeq','Template', 'Unmapped' | |
179 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts] | |
180 colors = ['gold', 'yellowgreen', 'lightskyblue'] | |
181 explode = (0.2, 0.05, 0.1) | |
182 # Plot | |
183 ax1 = plt.subplot2grid((1,2),(0,0)) | |
184 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
185 [x.set_fontsize(8) for x in texts] | |
186 plt.title('Control group (reads)', fontsize=12) | |
187 labels = 'miRNA RefSeq','Template', 'Unmapped' | |
188 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts] | |
189 colors = ['gold', 'yellowgreen', 'lightskyblue'] | |
190 explode = (0.2, 0.05, 0.1) | |
191 # Plot | |
192 ax2 = plt.subplot2grid((1,2),(0,1)) | |
193 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
194 [x.set_fontsize(8) for x in texts] | |
195 plt.title('Treated group (reads)',fontsize = 12) | |
196 plt.savefig('pie_tem.png',dpi=300) | |
197 | |
198 ################################################################################################################################################################################################################### | |
199 | |
200 def make_spider(merge_LH2E,merge_LH8E): | |
201 | |
202 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
203 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
204 | |
205 c_5 = 0 | |
206 c_5_counts = 0 | |
207 c_3 = 0 | |
208 c_3_counts = 0 | |
209 c_both =0 | |
210 c_both_counts=0 | |
211 c_mature = 0 | |
212 c_mat_counts = 0 | |
213 c_exception=0 | |
214 c_exception_counts=0 | |
215 | |
216 | |
217 t_5 = 0 | |
218 t_5_counts = 0 | |
219 t_3 = 0 | |
220 t_3_counts = 0 | |
221 t_both = 0 | |
222 t_both_counts = 0 | |
223 t_mature = 0 | |
224 t_mat_counts = 0 | |
225 t_exception = 0 | |
226 t_exception_counts=0 | |
227 | |
228 for x in c_samples: | |
229 | |
230 if "/" not in x[0]: | |
231 if len(x[0].split("_"))==2: | |
232 c_mature+=1 | |
233 c_mat_counts += x[2] | |
234 elif 0 == int(x[0].split("_")[3]): | |
235 c_5+=1 | |
236 c_5_counts += x[2] | |
237 elif 0 == int(x[0].split("_")[2]): | |
238 c_3+=1 | |
239 c_3_counts += x[2] | |
240 else: | |
241 c_both+=1 | |
242 c_both_counts+=x[2] | |
243 | |
244 else: | |
245 f=0 | |
246 for y in x[0].split("/"): | |
247 if len(y.split("_"))==2: | |
248 c_mature+=1 | |
249 c_mat_counts += x[2] | |
250 f=1 | |
251 break | |
252 if f==0: | |
253 part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1] | |
254 num=0 | |
255 for y in x[0].split("/"): | |
256 if y.split("_")[-2]+"_"+y.split("_")[-1]==part: | |
257 num+=1 | |
258 if num==len(x[0].split("/")): | |
259 | |
260 if 0 == int(x[0].split("/")[0].split("_")[3]): | |
261 c_5+=1 | |
262 c_5_counts += x[2] | |
263 elif 0 == int(x[0].split("/")[0].split("_")[2]): | |
264 c_3+=1 | |
265 c_3_counts += x[2] | |
266 else: | |
267 c_both+=1 | |
268 c_both_counts+=x[2] | |
269 else: | |
270 c_exception+=1 | |
271 c_exception_counts += x[2] | |
272 | |
273 | |
274 for x in t_samples: | |
275 | |
276 if "/" not in x[0]: | |
277 if len(x[0].split("_"))==2: | |
278 t_mature+=1 | |
279 t_mat_counts += x[2] | |
280 elif 0 == int(x[0].split("_")[3]): | |
281 t_5+=1 | |
282 t_5_counts += x[2] | |
283 elif 0 == int(x[0].split("_")[2]): | |
284 t_3+=1 | |
285 t_3_counts += x[2] | |
286 else: | |
287 t_both+=1 | |
288 t_both_counts+=x[2] | |
289 | |
290 else: | |
291 f=0 | |
292 for y in x[0].split("/"): | |
293 if len(y.split("_"))==2: | |
294 t_mature+=1 | |
295 t_mat_counts += x[2] | |
296 f=1 | |
297 break | |
298 if f==0: | |
299 part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1] | |
300 num=0 | |
301 for y in x[0].split("/"): | |
302 if y.split("_")[-2]+"_"+y.split("_")[-1]==part: | |
303 num+=1 | |
304 if num==len(x[0].split("/")): | |
305 | |
306 if 0 == int(x[0].split("/")[0].split("_")[3]): | |
307 t_5+=1 | |
308 t_5_counts += x[2] | |
309 elif 0 == int(x[0].split("/")[0].split("_")[2]): | |
310 t_3+=1 | |
311 t_3_counts += x[2] | |
312 else: | |
313 t_both+=1 | |
314 t_both_counts+=x[2] | |
315 else: | |
316 t_exception+=1 | |
317 t_exception_counts += x[2] | |
318 | |
319 | |
320 c_all = c_5+c_3+c_both+c_mature+c_exception | |
321 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts | |
322 | |
323 t_all = t_5+t_3+t_both+t_mature + t_exception | |
324 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts | |
325 | |
326 c_5 = round(c_5/c_all*100,2) | |
327 c_3 = round(c_3/c_all*100,2) | |
328 c_both = round(c_both/c_all*100,2) | |
329 c_mature = round(c_mature/c_all*100,2) | |
330 c_exception = round(c_exception/c_all*100,2) | |
331 | |
332 c_5_counts = round(c_5_counts/c_all_counts*100,2) | |
333 c_3_counts = round(c_3_counts/c_all_counts*100,2) | |
334 c_both_counts = round(c_both_counts/c_all_counts*100,2) | |
335 c_mat_counts = round(c_mat_counts/c_all_counts*100,2) | |
336 c_exception_counts = round(c_exception_counts/c_all_counts*100,2) | |
337 | |
338 t_5 = round(t_5/t_all*100,2) | |
339 t_3 = round(t_3/t_all*100,2) | |
340 t_both = round(t_both/t_all*100,2) | |
341 t_mature = round(t_mature/t_all*100,2) | |
342 t_exception = round(t_exception/t_all*100,2) | |
343 | |
344 t_5_counts = round(t_5_counts/t_all_counts*100,2) | |
345 t_3_counts = round(t_3_counts/t_all_counts*100,2) | |
346 t_both_counts = round(t_both_counts/t_all_counts*100,2) | |
347 t_mat_counts = round(t_mat_counts/t_all_counts*100,2) | |
348 t_exception_counts = round(t_exception_counts/t_all_counts*100,2) | |
349 | |
350 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception) | |
351 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts) | |
352 | |
353 df=pd.DataFrame({ | |
354 'group':['Controls','Treated'], | |
355 """5' and 3' isomiRs""":[c_both,t_both], | |
356 """3' isomiRs""":[c_3,t_3], | |
357 'miRNA RefSeq':[c_mature,t_mature], | |
358 """5' isomiRs""":[c_5,t_5], | |
359 'Others*':[c_exception,t_exception]}) | |
360 | |
361 df1=pd.DataFrame({ | |
362 'group':['Controls','Treated'], | |
363 """5' and 3' isomiRs""":[c_both_counts,t_both_counts], | |
364 """3' isomiRs""":[c_3_counts,t_3_counts], | |
365 'miRNA RefSeq':[c_mat_counts,t_mat_counts], | |
366 """5' isomiRs""":[c_5_counts,t_5_counts], | |
367 'Others*':[c_exception_counts,t_exception_counts]}) | |
368 | |
369 spider_last(df,radar_max,1) | |
370 spider_last(df1,radar_max_counts,2) | |
371 | |
372 | |
373 ############################################################################################################################################################################################################## | |
374 | |
375 def spider_last(df,radar_max,flag): | |
376 # ------- PART 1: Create background | |
377 fig = plt.figure() | |
378 # number of variable | |
379 categories=list(df)[1:] | |
380 N = len(categories) | |
381 | |
382 # What will be the angle of each axis in the plot? (we divide the plot / number of variable) | |
383 angles = [n / float(N) * 2 * pi for n in range(N)] | |
384 angles += angles[:1] | |
385 | |
386 # Initialise the spider plot | |
387 ax = plt.subplot(111, polar=True) | |
388 | |
389 # If you want the first axis to be on top: | |
390 ax.set_theta_offset(pi/2) | |
391 ax.set_theta_direction(-1) | |
392 | |
393 # Draw one axe per variable + add labels labels yet | |
394 plt.xticks(angles[:-1], categories, fontsize=11) | |
395 | |
396 # Draw ylabels | |
397 radar_max=round(radar_max+radar_max*0.1) | |
398 mul=len(str(radar_max))-1 | |
399 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul) | |
400 sep = round(maxi/4) | |
401 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10) | |
402 plt.ylim(0, maxi) | |
403 | |
404 # ------- PART 2: Add plots | |
405 | |
406 # Plot each individual = each line of the data | |
407 # I don't do a loop, because plotting more than 3 groups makes the chart unreadable | |
408 | |
409 # Ind1 | |
410 values=df.loc[0].drop('group').values.flatten().tolist() | |
411 values += values[:1] | |
412 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls") | |
413 ax.fill(angles, values, 'b', alpha=0.1) | |
414 | |
415 # Ind2 | |
416 values=df.loc[1].drop('group').values.flatten().tolist() | |
417 values += values[:1] | |
418 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated") | |
419 ax.fill(angles, values, 'r', alpha=0.1) | |
420 | |
421 # Add legend | |
422 if flag==1: | |
423 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1)) | |
424 plt.savefig('spider_non_red.png',dpi=300) | |
425 else: | |
426 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1)) | |
427 plt.savefig('spider_red.png',dpi=300) | |
428 | |
429 ############################################################################################################################################################################################################# | |
430 | |
431 def hist_red(samples,flag): | |
432 | |
433 lengths=[] | |
434 cat=[] | |
435 total_reads=0 | |
436 seq=[] | |
437 | |
438 if flag == "c": | |
439 title = "Length Distribution of Control group (Redudant reads)" | |
440 if flag == "t": | |
441 title = "Length Distribution of Treated group (Redudant reads)" | |
442 | |
443 for i in samples: | |
444 for x in i: | |
445 lengths.append(len(x[9])) | |
446 if x[1]=="0": | |
447 seq.append([x[9],x[0].split("-")[1],"Mapped"]) | |
448 cat.append("Mapped") | |
449 if x[1] == "4": | |
450 seq.append([x[9],x[0].split("-")[1],"Unmapped"]) | |
451 cat.append("Unmapped") | |
452 | |
453 uni_len=list(set(lengths)) | |
454 uni_len=[x for x in uni_len if x<=35] | |
455 low=min(lengths) | |
456 up=max(lengths) | |
457 seq.sort() | |
458 uni_seq=list(seq for seq,_ in itertools.groupby(seq)) | |
459 dim=up-low | |
460 | |
461 if dim>20: | |
462 s=5 | |
463 else: | |
464 s=8 | |
465 total_reads+=sum([int(x[1]) for x in uni_seq]) | |
466 | |
467 map_reads=[] | |
468 unmap_reads=[] | |
469 length=[] | |
470 for y in uni_len: | |
471 map_temp=0 | |
472 unmap_temp=0 | |
473 for x in uni_seq: | |
474 if len(x[0])==y and x[2]=="Mapped": | |
475 map_temp+=int(x[1]) | |
476 if len(x[0])==y and x[2]=="Unmapped": | |
477 unmap_temp+=int(x[1]) | |
478 if y<=35: | |
479 length.append(y) | |
480 map_reads.append(round(map_temp/total_reads*100,2)) | |
481 unmap_reads.append(round(unmap_temp/total_reads*100,2)) | |
482 | |
483 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)]) | |
484 ylim=ylim+ylim*20/100 | |
485 fig, ax = plt.subplots() | |
486 width=0.8 | |
487 ax.bar(length, unmap_reads, width, label='Unmapped') | |
488 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped') | |
489 plt.xticks(np.arange(length[0], length[-1]+1, 1)) | |
490 plt.xlabel('Length (nt)',fontsize=14) | |
491 plt.ylabel('Percentage',fontsize=14) | |
492 plt.title(title,fontsize=14) | |
493 ax.legend() | |
494 plt.ylim([0, ylim]) | |
495 ax.grid(axis='y',linewidth=0.2) | |
496 | |
497 if flag=='c': | |
498 plt.savefig('c_hist_red.png',dpi=300) | |
499 | |
500 if flag=='t': | |
501 plt.savefig('t_hist_red.png',dpi=300) | |
502 | |
503 ############################################################################################################################################################################################################################################## | |
504 | |
505 def logo_seq_red(merge, flag): | |
506 | |
507 if flag=="c": | |
508 titlos="Control group (Redundant)" | |
509 file_logo="c_logo.png" | |
510 file_bar="c_bar.png" | |
511 if flag=="t": | |
512 titlos="Treated group (Redundant)" | |
513 file_logo="t_logo.png" | |
514 file_bar="t_bar.png" | |
515 | |
516 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge] | |
517 | |
518 A=[0]*5 | |
519 C=[0]*5 | |
520 G=[0]*5 | |
521 T=[0]*5 | |
522 total_reads=0 | |
523 | |
524 for y in c_samples: | |
525 if "/" in y[0]: | |
526 length=[] | |
527 for x in y[0].split("/"): | |
528 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | |
529 | |
530 best=min(length) | |
531 total_reads+=best[2] | |
532 for i in range(5): | |
533 if i<len(best[1]): | |
534 if best[1][i] == "A": | |
535 A[i]+=best[2] | |
536 elif best[1][i] == "C": | |
537 C[i]+=best[2] | |
538 elif best[1][i] == "G": | |
539 G[i]+=best[2] | |
540 else: | |
541 T[i]+=best[2] | |
542 else: | |
543 total_reads+=y[2] | |
544 for i in range(5): | |
545 if i<len(y[0].split("_")[-1]): | |
546 if y[0].split("_")[-1][i] == "A": | |
547 A[i]+=(y[2]) | |
548 elif y[0].split("_")[-1][i] == "C": | |
549 C[i]+=(y[2]) | |
550 elif y[0].split("_")[-1][i] == "G": | |
551 G[i]+=(y[2]) | |
552 else: | |
553 T[i]+=y[2] | |
554 | |
555 A[:] = [round(x*100,1) / total_reads for x in A] | |
556 C[:] = [round(x*100,1) / total_reads for x in C] | |
557 G[:] = [round(x*100,1) / total_reads for x in G] | |
558 T[:] = [round(x*100,1) / total_reads for x in T] | |
559 | |
560 data = {'A':A,'C':C,'G':G,'T':T} | |
561 df = pd.DataFrame(data, index=[1,2,3,4,5]) | |
562 | |
563 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) ) | |
564 h.grid(axis='y',linewidth=0.2) | |
565 plt.xticks(rotation=0, ha="right") | |
566 plt.ylabel("Counts (%)",fontsize=18) | |
567 plt.xlabel("Positions (nt)",fontsize=18) | |
568 plt.title(titlos,fontsize=20) | |
569 plt.tight_layout() | |
570 plt.savefig(file_bar, dpi=300) | |
571 | |
572 import logomaker as lm | |
573 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | |
574 | |
575 crp_logo.style_spines(visible=False) | |
576 crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | |
577 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | |
578 crp_logo.ax.set_title(titlos,fontsize=18) | |
579 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5) | |
580 crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5) | |
581 crp_logo.ax.xaxis.set_ticks_position('none') | |
582 crp_logo.ax.xaxis.set_tick_params(pad=-1) | |
583 figure = plt.gcf() | |
584 figure.set_size_inches(6, 4) | |
585 crp_logo.fig.savefig(file_logo,dpi=300) | |
586 | |
587 ########################################################################################################################################################################################################## | |
588 | |
589 def logo_seq_non_red(merge_LH2E): | |
590 | |
591 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
592 | |
593 A=[0]*5 | |
594 C=[0]*5 | |
595 G=[0]*5 | |
596 T=[0]*5 | |
597 | |
598 for y in c_samples: | |
599 if "/" in y[0]: | |
600 length=[] | |
601 for x in y[0].split("/"): | |
602 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | |
603 | |
604 best=min(length) | |
605 for i in range(5): | |
606 if i<len(best[1]): | |
607 if best[1][i] == "A": | |
608 A[i]+=1 | |
609 elif best[1][i] == "C": | |
610 C[i]+=1 | |
611 elif best[1][i] == "G": | |
612 G[i]+=1 | |
613 else: | |
614 T[i]+=1 | |
615 else: | |
616 for i in range(5): | |
617 if i<len(y[0].split("_")[-1]): | |
618 if y[0].split("_")[-1][i] == "A": | |
619 A[i]+=1 | |
620 elif y[0].split("_")[-1][i] == "C": | |
621 C[i]+=1 | |
622 elif y[0].split("_")[-1][i] == "G": | |
623 G[i]+=1 | |
624 else: | |
625 T[i]+=1 | |
626 | |
627 data = {'A':A,'C':C,'G':G,'T':T} | |
628 df = pd.DataFrame(data, index=[1,2,3,4,5]) | |
629 h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"])) | |
630 h.set_xlabel("Positions (nt)") | |
631 h.set_ylabel("Unique sequences") | |
632 plt.xticks(rotation=0, ha="right") | |
633 plt.tight_layout() | |
634 plt.savefig("bar2.png", dpi=300) | |
635 | |
636 | |
637 import logomaker as lm | |
638 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | |
639 | |
640 crp_logo.style_spines(visible=False) | |
641 crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | |
642 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | |
643 | |
644 crp_logo.ax.set_ylabel("Unique sequences", labelpad=5) | |
645 crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5) | |
646 crp_logo.ax.xaxis.set_ticks_position('none') | |
647 crp_logo.ax.xaxis.set_tick_params(pad=-1) | |
648 crp_logo.ax.set_title("Non-redundant") | |
649 figure = plt.gcf() | |
650 crp_logo.fig.savefig('logo2.png', dpi=300) | |
651 | |
652 | |
653 ################################################################################################################################################################################################################ | |
654 | |
655 | |
656 | |
657 | |
658 | |
659 ################################################################################################################################################################ | |
660 | |
661 def pdf_before_DE(analysis): | |
662 | |
663 # Import FPDF class | |
664 from fpdf import FPDF, fpdf | |
665 | |
666 # Import glob module to find all the files matching a pattern | |
667 import glob | |
668 | |
669 # Image extensions | |
670 if analysis=="2": | |
671 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png") | |
672 else: | |
673 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png") | |
674 # This list will hold the images file names | |
675 images = [] | |
676 | |
677 # Build the image list by merging the glob results (a list of files) | |
678 # for each extension. We are taking images from current folder. | |
679 for extension in image_extensions: | |
680 images.extend(glob.glob(extension)) | |
681 #sys.exit(images) | |
682 # Create instance of FPDF class | |
683 pdf = FPDF('P', 'in', 'A4') | |
684 # Add new page. Without this you cannot create the document. | |
685 pdf.add_page() | |
686 # Set font to Arial, 'B'old, 16 pts | |
687 pdf.set_font('Arial', 'B', 20.0) | |
688 | |
689 # Page header | |
690 pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C') | |
691 pdf.ln(0.7) | |
692 pdf.set_font('Arial','', 16.0) | |
693 pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C') | |
694 | |
695 # Smaller font for image captions | |
696 pdf.set_font('Arial', '', 11.0) | |
697 | |
698 # Image caption | |
699 pdf.ln(0.5) | |
700 | |
701 yh=FPDF.get_y(pdf) | |
702 pdf.image(images[0],x=0.3,w=4, h=3) | |
703 pdf.image(images[1],x=4,y=yh, w=4, h=3) | |
704 pdf.ln(0.3) | |
705 | |
706 # Image caption | |
707 pdf.cell(0.2) | |
708 pdf.cell(3.0, 0.0, " Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)") | |
709 pdf.ln(0.2) | |
710 pdf.cell(0.2) | |
711 pdf.cell(3.0, 0.0, " and Treated (right) groups") | |
712 | |
713 | |
714 pdf.ln(0.5) | |
715 h1=FPDF.get_y(pdf) | |
716 pdf.image(images[2],x=1, w=6.5, h=5) | |
717 h2=FPDF.get_y(pdf) | |
718 FPDF.set_y(pdf,h1+0.2) | |
719 pdf.set_font('Arial','', 14.0) | |
720 pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C') | |
721 pdf.set_font('Arial', '', 11.0) | |
722 FPDF.set_y(pdf,h2) | |
723 FPDF.set_y(pdf,9.5) | |
724 # Image caption | |
725 pdf.cell(0.2) | |
726 if analysis=="2": | |
727 pdf.cell(3.0, 0.0, " Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads") | |
728 pdf.ln(0.2) | |
729 pdf.cell(0.2) | |
730 pdf.cell(3.0, 0.0, " in Control (left) and treated (right) groups") | |
731 else: | |
732 pdf.cell(3.0, 0.0, " Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in") | |
733 pdf.ln(0.2) | |
734 pdf.cell(0.2) | |
735 pdf.cell(3.0, 0.0, " Control (left) and treated (right) groups") | |
736 | |
737 | |
738 | |
739 pdf.add_page() | |
740 pdf.set_font('Arial', 'B', 16.0) | |
741 pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C') | |
742 pdf.ln(0.7) | |
743 pdf.set_font('Arial', 'B', 12.0) | |
744 pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C') | |
745 pdf.ln(0.5) | |
746 pdf.image(images[3],x=1.5, w=5.5, h=4) | |
747 pdf.ln(0.6) | |
748 pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C') | |
749 pdf.set_font('Arial', '', 12.0) | |
750 pdf.ln(0.2) | |
751 pdf.image(images[4],x=1.5, w=5.5, h=4) | |
752 pdf.ln(0.3) | |
753 pdf.set_font('Arial', '', 11.0) | |
754 pdf.cell(0.2) | |
755 pdf.cell(3.0, 0.0, " * IsomiRs potentialy initiated from multiple loci") | |
756 | |
757 | |
758 if analysis=="2": | |
759 pdf.add_page('L') | |
760 | |
761 pdf.set_font('Arial', 'B', 16.0) | |
762 pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C') | |
763 pdf.ln(0.5) | |
764 pdf.set_font('Arial', 'B', 12.0) | |
765 pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C') | |
766 pdf.ln(0.7) | |
767 | |
768 yh=FPDF.get_y(pdf) | |
769 pdf.image(images[5],x=1.5,w=3.65, h=2.65) | |
770 pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65) | |
771 pdf.ln(0.5) | |
772 yh=FPDF.get_y(pdf) | |
773 pdf.image(images[6],x=1.5,w=3.65, h=2.65) | |
774 pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65) | |
775 | |
776 pdf.close() | |
777 pdf.output('report1.pdf','F') | |
778 | |
779 | |
780 | |
781 | |
782 #############################################################################################################################################################3 | |
783 | |
784 def pdf_after_DE(analysis): | |
785 | |
786 # Import FPDF class | |
787 from fpdf import FPDF | |
788 | |
789 # Import glob module to find all the files matching a pattern | |
790 import glob | |
791 | |
792 # Image extensions | |
793 image_extensions = ("tem.png","non.png","a2.png") | |
794 | |
795 # This list will hold the images file names | |
796 images = [] | |
797 | |
798 # Build the image list by merging the glob results (a list of files) | |
799 # for each extension. We are taking images from current folder. | |
800 for extension in image_extensions: | |
801 images.extend(glob.glob(extension)) | |
802 | |
803 # Create instance of FPDF class | |
804 pdf = FPDF('P', 'in', 'letter') | |
805 # Add new page. Without this you cannot create the document. | |
806 pdf.add_page() | |
807 # Set font to Arial, 'B'old, 16 pts | |
808 pdf.set_font('Arial', 'B', 16.0) | |
809 | |
810 # Page header | |
811 pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C') | |
812 #pdf.ln(0.25) | |
813 | |
814 pdf.ln(0.7) | |
815 pdf.set_font('Arial','B', 12.0) | |
816 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C') | |
817 | |
818 | |
819 # Smaller font for image captions | |
820 pdf.set_font('Arial', '', 10.0) | |
821 | |
822 # Image caption | |
823 pdf.ln(0.4) | |
824 pdf.image(images[0],x=0.8, w=7, h=8) | |
825 pdf.ln(0.3) | |
826 | |
827 if analysis=="2": | |
828 | |
829 pdf.add_page() | |
830 pdf.ln(0.7) | |
831 pdf.set_font('Arial','B', 12.0) | |
832 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C') | |
833 pdf.ln(0.4) | |
834 pdf.image(images[1],x=0.5, w=7.5, h=6.5) | |
835 | |
836 pdf.add_page() | |
837 pdf.ln(0.5) | |
838 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C') | |
839 pdf.ln(0.4) | |
840 pdf.image(images[2],x=0.8, w=7, h=8) | |
841 pdf.ln(0.3) | |
842 | |
843 | |
844 pdf.output('report2.pdf', 'F') | |
845 | |
846 | |
847 |