comparison mirgene_graphs.py @ 5:6633f4447090 draft

Uploaded
author glogobyte
date Fri, 16 Oct 2020 18:55:18 +0000
parents
children
comparison
equal deleted inserted replaced
4:88b3ef865431 5:6633f4447090
1 import itertools
2 import time
3 import sys
4 import os
5 import urllib.request
6 import gzip
7 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
8 import subprocess
9 import argparse
10 from collections import OrderedDict
11 from matplotlib.backends.backend_pdf import PdfPages
12 import pandas as pd
13 from math import pi
14 import numpy as np
15 import matplotlib.pyplot as plt
16 from matplotlib.ticker import PercentFormatter
17 import seaborn as sns
18 import scipy.stats as stats
19 from plotnine import *
20 import math
21 import re
22 import matplotlib.ticker as mtick
23 import copy
24
25
26 #########################################################################################################################
27 def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
28
29 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
30 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
31 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
32 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
33
34 c_templ = 0
35 c_tem_counts = 0
36 c_mature = 0
37 c_mat_counts = 0
38 t_templ = 0
39 t_tem_counts = 0
40 t_mature = 0
41 t_mat_counts = 0
42
43 c_non = len(c_non_samples)
44 c_non_counts = sum(x[2] for x in c_non_samples)
45 t_non = len(t_non_samples)
46 t_non_counts = sum(x[2] for x in t_non_samples)
47
48 c_unmap = c_unmap - c_non
49 t_unmap = c_unmap - t_non
50
51 c_unmap_counts=c_unmap_counts - c_non_counts
52 t_unmap_counts=t_unmap_counts - t_non_counts
53
54
55 for x in c_samples:
56
57 if "/" not in x[0]:
58 if len(x[0].split("_"))==2:
59 c_mature+=1
60 c_mat_counts += x[2]
61 else:
62 c_templ+=1
63 c_tem_counts += x[2]
64 else:
65 f=0
66 for y in x[0].split("/"):
67 if len(y.split("_"))==2:
68 c_mature+=1
69 c_mat_counts += x[2]
70 f=1
71 break
72 if f==0:
73 c_templ+=1
74 c_tem_counts += x[2]
75
76 for x in t_samples:
77
78 if "/" not in x[0]:
79 if len(x[0].split("_"))==2:
80 t_mature+=1
81 t_mat_counts += x[2]
82 else:
83 t_templ+=1
84 t_tem_counts += x[2]
85 else:
86 f=0
87 for y in x[0].split("/"):
88 if len(y.split("_"))==2:
89 t_mature+=1
90 t_mat_counts += x[2]
91 f=1
92 break
93 if f==0:
94 t_templ+=1
95 t_tem_counts += x[2]
96
97 fig = plt.figure(figsize=(7,5))
98
99 labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
100 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
101 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
102 # Plot
103 ax1 = plt.subplot2grid((1,2),(0,0))
104 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
105 [x.set_fontsize(8) for x in texts]
106 plt.title('Control Group (reads)',fontsize=12)
107
108 labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
109 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
110 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
111 # Plot
112 ax2 = plt.subplot2grid((1,2),(0,1))
113 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
114 [x.set_fontsize(8) for x in texts]
115 plt.title('Treated Group (reads)', fontsize=12)
116 plt.savefig('pie_non.png',dpi=300)
117
118 ####################################################################################################################################################################################################################
119
120 def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
121
122 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
123 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
124
125 c_templ = 0
126 c_tem_counts = 0
127 c_mature = 0
128 c_mat_counts = 0
129 t_templ = 0
130 t_tem_counts = 0
131 t_mature = 0
132 t_mat_counts = 0
133
134 for x in c_samples:
135
136 if "/" not in x[0]:
137 if len(x[0].split("_"))==2:
138 c_mature+=1
139 c_mat_counts += x[2]
140 else:
141 c_templ+=1
142 c_tem_counts += x[2]
143 else:
144 f=0
145 for y in x[0].split("/"):
146 if len(y.split("_"))==2:
147 c_mature+=1
148 c_mat_counts += x[2]
149 f=1
150 break
151 if f==0:
152 c_templ+=1
153 c_tem_counts += x[2]
154
155 for x in t_samples:
156
157 if "/" not in x[0]:
158 if len(x[0].split("_"))==2:
159 t_mature+=1
160 t_mat_counts += x[2]
161 else:
162 t_templ+=1
163 t_tem_counts += x[2]
164 else:
165 f=0
166 for y in x[0].split("/"):
167 if len(y.split("_"))==2:
168 t_mature+=1
169 t_mat_counts += x[2]
170 f=1
171 break
172 if f==0:
173 t_templ+=1
174 t_tem_counts += x[2]
175
176
177 fig = plt.figure()
178 labels = 'miRNA RefSeq','Template', 'Unmapped'
179 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
180 colors = ['gold', 'yellowgreen', 'lightskyblue']
181 explode = (0.2, 0.05, 0.1)
182 # Plot
183 ax1 = plt.subplot2grid((1,2),(0,0))
184 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
185 [x.set_fontsize(8) for x in texts]
186 plt.title('Control group (reads)', fontsize=12)
187 labels = 'miRNA RefSeq','Template', 'Unmapped'
188 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
189 colors = ['gold', 'yellowgreen', 'lightskyblue']
190 explode = (0.2, 0.05, 0.1)
191 # Plot
192 ax2 = plt.subplot2grid((1,2),(0,1))
193 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
194 [x.set_fontsize(8) for x in texts]
195 plt.title('Treated group (reads)',fontsize = 12)
196 plt.savefig('pie_tem.png',dpi=300)
197
198 ###################################################################################################################################################################################################################
199
200 def make_spider(merge_LH2E,merge_LH8E):
201
202 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
203 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
204
205 c_5 = 0
206 c_5_counts = 0
207 c_3 = 0
208 c_3_counts = 0
209 c_both =0
210 c_both_counts=0
211 c_mature = 0
212 c_mat_counts = 0
213 c_exception=0
214 c_exception_counts=0
215
216
217 t_5 = 0
218 t_5_counts = 0
219 t_3 = 0
220 t_3_counts = 0
221 t_both = 0
222 t_both_counts = 0
223 t_mature = 0
224 t_mat_counts = 0
225 t_exception = 0
226 t_exception_counts=0
227
228 for x in c_samples:
229
230 if "/" not in x[0]:
231 if len(x[0].split("_"))==2:
232 c_mature+=1
233 c_mat_counts += x[2]
234 elif 0 == int(x[0].split("_")[3]):
235 c_5+=1
236 c_5_counts += x[2]
237 elif 0 == int(x[0].split("_")[2]):
238 c_3+=1
239 c_3_counts += x[2]
240 else:
241 c_both+=1
242 c_both_counts+=x[2]
243
244 else:
245 f=0
246 for y in x[0].split("/"):
247 if len(y.split("_"))==2:
248 c_mature+=1
249 c_mat_counts += x[2]
250 f=1
251 break
252 if f==0:
253 part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1]
254 num=0
255 for y in x[0].split("/"):
256 if y.split("_")[-2]+"_"+y.split("_")[-1]==part:
257 num+=1
258 if num==len(x[0].split("/")):
259
260 if 0 == int(x[0].split("/")[0].split("_")[3]):
261 c_5+=1
262 c_5_counts += x[2]
263 elif 0 == int(x[0].split("/")[0].split("_")[2]):
264 c_3+=1
265 c_3_counts += x[2]
266 else:
267 c_both+=1
268 c_both_counts+=x[2]
269 else:
270 c_exception+=1
271 c_exception_counts += x[2]
272
273
274 for x in t_samples:
275
276 if "/" not in x[0]:
277 if len(x[0].split("_"))==2:
278 t_mature+=1
279 t_mat_counts += x[2]
280 elif 0 == int(x[0].split("_")[3]):
281 t_5+=1
282 t_5_counts += x[2]
283 elif 0 == int(x[0].split("_")[2]):
284 t_3+=1
285 t_3_counts += x[2]
286 else:
287 t_both+=1
288 t_both_counts+=x[2]
289
290 else:
291 f=0
292 for y in x[0].split("/"):
293 if len(y.split("_"))==2:
294 t_mature+=1
295 t_mat_counts += x[2]
296 f=1
297 break
298 if f==0:
299 part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1]
300 num=0
301 for y in x[0].split("/"):
302 if y.split("_")[-2]+"_"+y.split("_")[-1]==part:
303 num+=1
304 if num==len(x[0].split("/")):
305
306 if 0 == int(x[0].split("/")[0].split("_")[3]):
307 t_5+=1
308 t_5_counts += x[2]
309 elif 0 == int(x[0].split("/")[0].split("_")[2]):
310 t_3+=1
311 t_3_counts += x[2]
312 else:
313 t_both+=1
314 t_both_counts+=x[2]
315 else:
316 t_exception+=1
317 t_exception_counts += x[2]
318
319
320 c_all = c_5+c_3+c_both+c_mature+c_exception
321 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
322
323 t_all = t_5+t_3+t_both+t_mature + t_exception
324 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
325
326 c_5 = round(c_5/c_all*100,2)
327 c_3 = round(c_3/c_all*100,2)
328 c_both = round(c_both/c_all*100,2)
329 c_mature = round(c_mature/c_all*100,2)
330 c_exception = round(c_exception/c_all*100,2)
331
332 c_5_counts = round(c_5_counts/c_all_counts*100,2)
333 c_3_counts = round(c_3_counts/c_all_counts*100,2)
334 c_both_counts = round(c_both_counts/c_all_counts*100,2)
335 c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
336 c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
337
338 t_5 = round(t_5/t_all*100,2)
339 t_3 = round(t_3/t_all*100,2)
340 t_both = round(t_both/t_all*100,2)
341 t_mature = round(t_mature/t_all*100,2)
342 t_exception = round(t_exception/t_all*100,2)
343
344 t_5_counts = round(t_5_counts/t_all_counts*100,2)
345 t_3_counts = round(t_3_counts/t_all_counts*100,2)
346 t_both_counts = round(t_both_counts/t_all_counts*100,2)
347 t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
348 t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
349
350 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
351 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
352
353 df=pd.DataFrame({
354 'group':['Controls','Treated'],
355 """5' and 3' isomiRs""":[c_both,t_both],
356 """3' isomiRs""":[c_3,t_3],
357 'miRNA RefSeq':[c_mature,t_mature],
358 """5' isomiRs""":[c_5,t_5],
359 'Others*':[c_exception,t_exception]})
360
361 df1=pd.DataFrame({
362 'group':['Controls','Treated'],
363 """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
364 """3' isomiRs""":[c_3_counts,t_3_counts],
365 'miRNA RefSeq':[c_mat_counts,t_mat_counts],
366 """5' isomiRs""":[c_5_counts,t_5_counts],
367 'Others*':[c_exception_counts,t_exception_counts]})
368
369 spider_last(df,radar_max,1)
370 spider_last(df1,radar_max_counts,2)
371
372
373 ##############################################################################################################################################################################################################
374
375 def spider_last(df,radar_max,flag):
376 # ------- PART 1: Create background
377 fig = plt.figure()
378 # number of variable
379 categories=list(df)[1:]
380 N = len(categories)
381
382 # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
383 angles = [n / float(N) * 2 * pi for n in range(N)]
384 angles += angles[:1]
385
386 # Initialise the spider plot
387 ax = plt.subplot(111, polar=True)
388
389 # If you want the first axis to be on top:
390 ax.set_theta_offset(pi/2)
391 ax.set_theta_direction(-1)
392
393 # Draw one axe per variable + add labels labels yet
394 plt.xticks(angles[:-1], categories, fontsize=11)
395
396 # Draw ylabels
397 radar_max=round(radar_max+radar_max*0.1)
398 mul=len(str(radar_max))-1
399 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
400 sep = round(maxi/4)
401 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
402 plt.ylim(0, maxi)
403
404 # ------- PART 2: Add plots
405
406 # Plot each individual = each line of the data
407 # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
408
409 # Ind1
410 values=df.loc[0].drop('group').values.flatten().tolist()
411 values += values[:1]
412 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
413 ax.fill(angles, values, 'b', alpha=0.1)
414
415 # Ind2
416 values=df.loc[1].drop('group').values.flatten().tolist()
417 values += values[:1]
418 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
419 ax.fill(angles, values, 'r', alpha=0.1)
420
421 # Add legend
422 if flag==1:
423 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
424 plt.savefig('spider_non_red.png',dpi=300)
425 else:
426 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
427 plt.savefig('spider_red.png',dpi=300)
428
429 #############################################################################################################################################################################################################
430
431 def hist_red(samples,flag):
432
433 lengths=[]
434 cat=[]
435 total_reads=0
436 seq=[]
437
438 if flag == "c":
439 title = "Length Distribution of Control group (Redudant reads)"
440 if flag == "t":
441 title = "Length Distribution of Treated group (Redudant reads)"
442
443 for i in samples:
444 for x in i:
445 lengths.append(len(x[9]))
446 if x[1]=="0":
447 seq.append([x[9],x[0].split("-")[1],"Mapped"])
448 cat.append("Mapped")
449 if x[1] == "4":
450 seq.append([x[9],x[0].split("-")[1],"Unmapped"])
451 cat.append("Unmapped")
452
453 uni_len=list(set(lengths))
454 uni_len=[x for x in uni_len if x<=35]
455 low=min(lengths)
456 up=max(lengths)
457 seq.sort()
458 uni_seq=list(seq for seq,_ in itertools.groupby(seq))
459 dim=up-low
460
461 if dim>20:
462 s=5
463 else:
464 s=8
465 total_reads+=sum([int(x[1]) for x in uni_seq])
466
467 map_reads=[]
468 unmap_reads=[]
469 length=[]
470 for y in uni_len:
471 map_temp=0
472 unmap_temp=0
473 for x in uni_seq:
474 if len(x[0])==y and x[2]=="Mapped":
475 map_temp+=int(x[1])
476 if len(x[0])==y and x[2]=="Unmapped":
477 unmap_temp+=int(x[1])
478 if y<=35:
479 length.append(y)
480 map_reads.append(round(map_temp/total_reads*100,2))
481 unmap_reads.append(round(unmap_temp/total_reads*100,2))
482
483 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
484 ylim=ylim+ylim*20/100
485 fig, ax = plt.subplots()
486 width=0.8
487 ax.bar(length, unmap_reads, width, label='Unmapped')
488 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
489 plt.xticks(np.arange(length[0], length[-1]+1, 1))
490 plt.xlabel('Length (nt)',fontsize=14)
491 plt.ylabel('Percentage',fontsize=14)
492 plt.title(title,fontsize=14)
493 ax.legend()
494 plt.ylim([0, ylim])
495 ax.grid(axis='y',linewidth=0.2)
496
497 if flag=='c':
498 plt.savefig('c_hist_red.png',dpi=300)
499
500 if flag=='t':
501 plt.savefig('t_hist_red.png',dpi=300)
502
503 ##############################################################################################################################################################################################################################################
504
505 def logo_seq_red(merge, flag):
506
507 if flag=="c":
508 titlos="Control group (Redundant)"
509 file_logo="c_logo.png"
510 file_bar="c_bar.png"
511 if flag=="t":
512 titlos="Treated group (Redundant)"
513 file_logo="t_logo.png"
514 file_bar="t_bar.png"
515
516 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
517
518 A=[0]*5
519 C=[0]*5
520 G=[0]*5
521 T=[0]*5
522 total_reads=0
523
524 for y in c_samples:
525 if "/" in y[0]:
526 length=[]
527 for x in y[0].split("/"):
528 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
529
530 best=min(length)
531 total_reads+=best[2]
532 for i in range(5):
533 if i<len(best[1]):
534 if best[1][i] == "A":
535 A[i]+=best[2]
536 elif best[1][i] == "C":
537 C[i]+=best[2]
538 elif best[1][i] == "G":
539 G[i]+=best[2]
540 else:
541 T[i]+=best[2]
542 else:
543 total_reads+=y[2]
544 for i in range(5):
545 if i<len(y[0].split("_")[-1]):
546 if y[0].split("_")[-1][i] == "A":
547 A[i]+=(y[2])
548 elif y[0].split("_")[-1][i] == "C":
549 C[i]+=(y[2])
550 elif y[0].split("_")[-1][i] == "G":
551 G[i]+=(y[2])
552 else:
553 T[i]+=y[2]
554
555 A[:] = [round(x*100,1) / total_reads for x in A]
556 C[:] = [round(x*100,1) / total_reads for x in C]
557 G[:] = [round(x*100,1) / total_reads for x in G]
558 T[:] = [round(x*100,1) / total_reads for x in T]
559
560 data = {'A':A,'C':C,'G':G,'T':T}
561 df = pd.DataFrame(data, index=[1,2,3,4,5])
562
563 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
564 h.grid(axis='y',linewidth=0.2)
565 plt.xticks(rotation=0, ha="right")
566 plt.ylabel("Counts (%)",fontsize=18)
567 plt.xlabel("Positions (nt)",fontsize=18)
568 plt.title(titlos,fontsize=20)
569 plt.tight_layout()
570 plt.savefig(file_bar, dpi=300)
571
572 import logomaker as lm
573 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
574
575 crp_logo.style_spines(visible=False)
576 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
577 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
578 crp_logo.ax.set_title(titlos,fontsize=18)
579 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
580 crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
581 crp_logo.ax.xaxis.set_ticks_position('none')
582 crp_logo.ax.xaxis.set_tick_params(pad=-1)
583 figure = plt.gcf()
584 figure.set_size_inches(6, 4)
585 crp_logo.fig.savefig(file_logo,dpi=300)
586
587 ##########################################################################################################################################################################################################
588
589 def logo_seq_non_red(merge_LH2E):
590
591 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
592
593 A=[0]*5
594 C=[0]*5
595 G=[0]*5
596 T=[0]*5
597
598 for y in c_samples:
599 if "/" in y[0]:
600 length=[]
601 for x in y[0].split("/"):
602 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
603
604 best=min(length)
605 for i in range(5):
606 if i<len(best[1]):
607 if best[1][i] == "A":
608 A[i]+=1
609 elif best[1][i] == "C":
610 C[i]+=1
611 elif best[1][i] == "G":
612 G[i]+=1
613 else:
614 T[i]+=1
615 else:
616 for i in range(5):
617 if i<len(y[0].split("_")[-1]):
618 if y[0].split("_")[-1][i] == "A":
619 A[i]+=1
620 elif y[0].split("_")[-1][i] == "C":
621 C[i]+=1
622 elif y[0].split("_")[-1][i] == "G":
623 G[i]+=1
624 else:
625 T[i]+=1
626
627 data = {'A':A,'C':C,'G':G,'T':T}
628 df = pd.DataFrame(data, index=[1,2,3,4,5])
629 h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
630 h.set_xlabel("Positions (nt)")
631 h.set_ylabel("Unique sequences")
632 plt.xticks(rotation=0, ha="right")
633 plt.tight_layout()
634 plt.savefig("bar2.png", dpi=300)
635
636
637 import logomaker as lm
638 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
639
640 crp_logo.style_spines(visible=False)
641 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
642 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
643
644 crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
645 crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
646 crp_logo.ax.xaxis.set_ticks_position('none')
647 crp_logo.ax.xaxis.set_tick_params(pad=-1)
648 crp_logo.ax.set_title("Non-redundant")
649 figure = plt.gcf()
650 crp_logo.fig.savefig('logo2.png', dpi=300)
651
652
653 ################################################################################################################################################################################################################
654
655
656
657
658
659 ################################################################################################################################################################
660
661 def pdf_before_DE(analysis):
662
663 # Import FPDF class
664 from fpdf import FPDF, fpdf
665
666 # Import glob module to find all the files matching a pattern
667 import glob
668
669 # Image extensions
670 if analysis=="2":
671 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
672 else:
673 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
674 # This list will hold the images file names
675 images = []
676
677 # Build the image list by merging the glob results (a list of files)
678 # for each extension. We are taking images from current folder.
679 for extension in image_extensions:
680 images.extend(glob.glob(extension))
681 #sys.exit(images)
682 # Create instance of FPDF class
683 pdf = FPDF('P', 'in', 'A4')
684 # Add new page. Without this you cannot create the document.
685 pdf.add_page()
686 # Set font to Arial, 'B'old, 16 pts
687 pdf.set_font('Arial', 'B', 20.0)
688
689 # Page header
690 pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C')
691 pdf.ln(0.7)
692 pdf.set_font('Arial','', 16.0)
693 pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C')
694
695 # Smaller font for image captions
696 pdf.set_font('Arial', '', 11.0)
697
698 # Image caption
699 pdf.ln(0.5)
700
701 yh=FPDF.get_y(pdf)
702 pdf.image(images[0],x=0.3,w=4, h=3)
703 pdf.image(images[1],x=4,y=yh, w=4, h=3)
704 pdf.ln(0.3)
705
706 # Image caption
707 pdf.cell(0.2)
708 pdf.cell(3.0, 0.0, " Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)")
709 pdf.ln(0.2)
710 pdf.cell(0.2)
711 pdf.cell(3.0, 0.0, " and Treated (right) groups")
712
713
714 pdf.ln(0.5)
715 h1=FPDF.get_y(pdf)
716 pdf.image(images[2],x=1, w=6.5, h=5)
717 h2=FPDF.get_y(pdf)
718 FPDF.set_y(pdf,h1+0.2)
719 pdf.set_font('Arial','', 14.0)
720 pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C')
721 pdf.set_font('Arial', '', 11.0)
722 FPDF.set_y(pdf,h2)
723 FPDF.set_y(pdf,9.5)
724 # Image caption
725 pdf.cell(0.2)
726 if analysis=="2":
727 pdf.cell(3.0, 0.0, " Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads")
728 pdf.ln(0.2)
729 pdf.cell(0.2)
730 pdf.cell(3.0, 0.0, " in Control (left) and treated (right) groups")
731 else:
732 pdf.cell(3.0, 0.0, " Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in")
733 pdf.ln(0.2)
734 pdf.cell(0.2)
735 pdf.cell(3.0, 0.0, " Control (left) and treated (right) groups")
736
737
738
739 pdf.add_page()
740 pdf.set_font('Arial', 'B', 16.0)
741 pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C')
742 pdf.ln(0.7)
743 pdf.set_font('Arial', 'B', 12.0)
744 pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C')
745 pdf.ln(0.5)
746 pdf.image(images[3],x=1.5, w=5.5, h=4)
747 pdf.ln(0.6)
748 pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C')
749 pdf.set_font('Arial', '', 12.0)
750 pdf.ln(0.2)
751 pdf.image(images[4],x=1.5, w=5.5, h=4)
752 pdf.ln(0.3)
753 pdf.set_font('Arial', '', 11.0)
754 pdf.cell(0.2)
755 pdf.cell(3.0, 0.0, " * IsomiRs potentialy initiated from multiple loci")
756
757
758 if analysis=="2":
759 pdf.add_page('L')
760
761 pdf.set_font('Arial', 'B', 16.0)
762 pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C')
763 pdf.ln(0.5)
764 pdf.set_font('Arial', 'B', 12.0)
765 pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C')
766 pdf.ln(0.7)
767
768 yh=FPDF.get_y(pdf)
769 pdf.image(images[5],x=1.5,w=3.65, h=2.65)
770 pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
771 pdf.ln(0.5)
772 yh=FPDF.get_y(pdf)
773 pdf.image(images[6],x=1.5,w=3.65, h=2.65)
774 pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
775
776 pdf.close()
777 pdf.output('report1.pdf','F')
778
779
780
781
782 #############################################################################################################################################################3
783
784 def pdf_after_DE(analysis):
785
786 # Import FPDF class
787 from fpdf import FPDF
788
789 # Import glob module to find all the files matching a pattern
790 import glob
791
792 # Image extensions
793 image_extensions = ("tem.png","non.png","a2.png")
794
795 # This list will hold the images file names
796 images = []
797
798 # Build the image list by merging the glob results (a list of files)
799 # for each extension. We are taking images from current folder.
800 for extension in image_extensions:
801 images.extend(glob.glob(extension))
802
803 # Create instance of FPDF class
804 pdf = FPDF('P', 'in', 'letter')
805 # Add new page. Without this you cannot create the document.
806 pdf.add_page()
807 # Set font to Arial, 'B'old, 16 pts
808 pdf.set_font('Arial', 'B', 16.0)
809
810 # Page header
811 pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C')
812 #pdf.ln(0.25)
813
814 pdf.ln(0.7)
815 pdf.set_font('Arial','B', 12.0)
816 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C')
817
818
819 # Smaller font for image captions
820 pdf.set_font('Arial', '', 10.0)
821
822 # Image caption
823 pdf.ln(0.4)
824 pdf.image(images[0],x=0.8, w=7, h=8)
825 pdf.ln(0.3)
826
827 if analysis=="2":
828
829 pdf.add_page()
830 pdf.ln(0.7)
831 pdf.set_font('Arial','B', 12.0)
832 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C')
833 pdf.ln(0.4)
834 pdf.image(images[1],x=0.5, w=7.5, h=6.5)
835
836 pdf.add_page()
837 pdf.ln(0.5)
838 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C')
839 pdf.ln(0.4)
840 pdf.image(images[2],x=0.8, w=7, h=8)
841 pdf.ln(0.3)
842
843
844 pdf.output('report2.pdf', 'F')
845
846
847