Mercurial > repos > glogobyte > viztool
comparison mirbase_graphs.py @ 1:561b0abcae87 draft
Uploaded
author | glogobyte |
---|---|
date | Fri, 16 Oct 2020 12:15:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:258aaaa465f3 | 1:561b0abcae87 |
---|---|
1 import itertools | |
2 import time | |
3 import sys | |
4 import os | |
5 import urllib.request | |
6 import gzip | |
7 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value | |
8 import subprocess | |
9 import argparse | |
10 from collections import OrderedDict | |
11 from matplotlib.backends.backend_pdf import PdfPages | |
12 import pandas as pd | |
13 from math import pi | |
14 import numpy as np | |
15 import matplotlib.pyplot as plt | |
16 from matplotlib.ticker import PercentFormatter | |
17 import seaborn as sns | |
18 import scipy.stats as stats | |
19 from plotnine import * | |
20 import math | |
21 import re | |
22 import matplotlib.ticker as mtick | |
23 import copy | |
24 | |
25 | |
26 ################################################################################################################################################################# | |
27 def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts): | |
28 | |
29 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
30 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
31 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E] | |
32 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E] | |
33 | |
34 c_templ = 0 | |
35 c_tem_counts = 0 | |
36 c_mature = 0 | |
37 c_mat_counts = 0 | |
38 t_templ = 0 | |
39 t_tem_counts = 0 | |
40 t_mature = 0 | |
41 t_mat_counts = 0 | |
42 | |
43 c_non = len(c_non_samples) | |
44 c_non_counts = sum(x[2] for x in c_non_samples) | |
45 t_non = len(t_non_samples) | |
46 t_non_counts = sum(x[2] for x in t_non_samples) | |
47 | |
48 c_unmap = c_unmap - c_non | |
49 t_unmap = c_unmap - t_non | |
50 | |
51 c_unmap_counts=c_unmap_counts - c_non_counts | |
52 t_unmap_counts=t_unmap_counts - t_non_counts | |
53 | |
54 | |
55 for x in c_samples: | |
56 | |
57 if "/" not in x[0]: | |
58 if "chr" in x[0].split("_")[-1]: | |
59 c_mature+=1 | |
60 c_mat_counts += x[2] | |
61 else: | |
62 c_templ+=1 | |
63 c_tem_counts += x[2] | |
64 else: | |
65 f=0 | |
66 for y in x[0].split("/"): | |
67 if "chr" in y.split("_")[-1]: | |
68 c_mature+=1 | |
69 c_mat_counts += x[2] | |
70 f=1 | |
71 break | |
72 if f==0: | |
73 c_templ+=1 | |
74 c_tem_counts += x[2] | |
75 | |
76 for x in t_samples: | |
77 | |
78 if "/" not in x[0]: | |
79 if "chr" in x[0].split("_")[-1]: | |
80 t_mature+=1 | |
81 t_mat_counts += x[2] | |
82 else: | |
83 t_templ+=1 | |
84 t_tem_counts += x[2] | |
85 else: | |
86 f=0 | |
87 for y in x[0].split("/"): | |
88 if "chr" in y.split("_")[-1]: | |
89 t_mature+=1 | |
90 t_mat_counts += x[2] | |
91 f=1 | |
92 break | |
93 if f==0: | |
94 t_templ+=1 | |
95 t_tem_counts += x[2] | |
96 | |
97 fig = plt.figure(figsize=(7,5)) | |
98 labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template' | |
99 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts] | |
100 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | |
101 ax1 = plt.subplot2grid((1,2),(0,0)) | |
102 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
103 [x.set_fontsize(8) for x in texts] | |
104 plt.title('Control Group (reads)',fontsize=12) | |
105 labels = 'miRNA RefSeq','Template', 'Unmapped','non-template' | |
106 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts] | |
107 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | |
108 ax2 = plt.subplot2grid((1,2),(0,1)) | |
109 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
110 [x.set_fontsize(8) for x in texts] | |
111 plt.title('Treated Group (reads)', fontsize=12) | |
112 plt.savefig('pie_non.png',dpi=300) | |
113 | |
114 ###################################################################################################################################################### | |
115 | |
116 | |
117 def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts): | |
118 | |
119 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
120 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
121 | |
122 c_templ = 0 | |
123 c_tem_counts = 0 | |
124 c_mature = 0 | |
125 c_mat_counts = 0 | |
126 t_templ = 0 | |
127 t_tem_counts = 0 | |
128 t_mature = 0 | |
129 t_mat_counts = 0 | |
130 | |
131 for x in c_samples: | |
132 | |
133 if "/" not in x[0]: | |
134 if "chr" in x[0].split("_")[-1]: | |
135 c_mature+=1 | |
136 c_mat_counts += x[2] | |
137 else: | |
138 c_templ+=1 | |
139 c_tem_counts += x[2] | |
140 else: | |
141 f=0 | |
142 for y in x[0].split("/"): | |
143 if "chr" in y.split("_")[-1]: | |
144 c_mature+=1 | |
145 c_mat_counts += x[2] | |
146 f=1 | |
147 break | |
148 if f==0: | |
149 c_templ+=1 | |
150 c_tem_counts += x[2] | |
151 | |
152 for x in t_samples: | |
153 | |
154 if "/" not in x[0]: | |
155 if "chr" in x[0].split("_")[-1]: | |
156 t_mature+=1 | |
157 t_mat_counts += x[2] | |
158 else: | |
159 t_templ+=1 | |
160 t_tem_counts += x[2] | |
161 else: | |
162 f=0 | |
163 for y in x[0].split("/"): | |
164 if "chr" in y.split("_")[-1]: | |
165 t_mature+=1 | |
166 t_mat_counts += x[2] | |
167 f=1 | |
168 break | |
169 if f==0: | |
170 t_templ+=1 | |
171 t_tem_counts += x[2] | |
172 | |
173 | |
174 fig = plt.figure() | |
175 labels = 'miRNA RefSeq','Template', 'Unmapped' | |
176 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts] | |
177 colors = ['gold', 'yellowgreen', 'lightskyblue'] | |
178 explode = (0.2, 0.05, 0.1) | |
179 ax1 = plt.subplot2grid((1,2),(0,0)) | |
180 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
181 [x.set_fontsize(8) for x in texts] | |
182 plt.title('Control group (reads)', fontsize=12) | |
183 labels = 'miRNA RefSeq','Template', 'Unmapped' | |
184 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts] | |
185 colors = ['gold', 'yellowgreen', 'lightskyblue'] | |
186 explode = (0.2, 0.05, 0.1) | |
187 ax2 = plt.subplot2grid((1,2),(0,1)) | |
188 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | |
189 [x.set_fontsize(8) for x in texts] | |
190 plt.title('Treated group (reads)',fontsize = 12) | |
191 plt.savefig('pie_tem.png',dpi=300) | |
192 | |
193 ################################################################################################################################################################################################################### | |
194 | |
195 | |
196 def make_spider(merge_LH2E,merge_LH8E): | |
197 | |
198 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
199 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E] | |
200 | |
201 c_5 = 0 | |
202 c_5_counts = 0 | |
203 c_3 = 0 | |
204 c_3_counts = 0 | |
205 c_both =0 | |
206 c_both_counts=0 | |
207 c_mature = 0 | |
208 c_mat_counts = 0 | |
209 c_exception=0 | |
210 c_exception_counts=0 | |
211 | |
212 | |
213 t_5 = 0 | |
214 t_5_counts = 0 | |
215 t_3 = 0 | |
216 t_3_counts = 0 | |
217 t_both = 0 | |
218 t_both_counts = 0 | |
219 t_mature = 0 | |
220 t_mat_counts = 0 | |
221 t_exception = 0 | |
222 t_exception_counts=0 | |
223 | |
224 for x in c_samples: | |
225 | |
226 if "/" not in x[0]: | |
227 if "chr" in x[0].split("_")[-1]: | |
228 c_mature+=1 | |
229 c_mat_counts += x[2] | |
230 elif 0 == int(x[0].split("_")[-1]): | |
231 c_5+=1 | |
232 c_5_counts += x[2] | |
233 elif 0 == int(x[0].split("_")[-2]): | |
234 c_3+=1 | |
235 c_3_counts += x[2] | |
236 else: | |
237 c_both+=1 | |
238 c_both_counts+=x[2] | |
239 | |
240 else: | |
241 f=0 | |
242 for y in x[0].split("/"): | |
243 if "chr" in y.split("_")[-1]: | |
244 c_mature+=1 | |
245 c_mat_counts += x[2] | |
246 f=1 | |
247 break | |
248 if f==0: | |
249 for y in x[0].split("/"): | |
250 c_exception+=1 | |
251 c_exception_counts += x[2] | |
252 | |
253 | |
254 for x in t_samples: | |
255 | |
256 if "/" not in x[0]: | |
257 if "chr" in x[0].split("_")[-1]: | |
258 t_mature+=1 | |
259 t_mat_counts += x[2] | |
260 elif 0 == int(x[0].split("_")[-1]): | |
261 t_5+=1 | |
262 t_5_counts += x[2] | |
263 elif 0 == int(x[0].split("_")[-2]): | |
264 t_3+=1 | |
265 t_3_counts += x[2] | |
266 else: | |
267 t_both+=1 | |
268 t_both_counts+=x[2] | |
269 | |
270 else: | |
271 f=0 | |
272 for y in x[0].split("/"): | |
273 if "chr" in y.split("_")[-1]: | |
274 t_mature+=1 | |
275 t_mat_counts += x[2] | |
276 f=1 | |
277 break | |
278 if f==0: | |
279 for y in x[0].split("/"): | |
280 t_exception+=1 | |
281 t_exception_counts += x[2] | |
282 | |
283 | |
284 c_all = c_5+c_3+c_both+c_mature+c_exception | |
285 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts | |
286 | |
287 t_all = t_5+t_3+t_both+t_mature + t_exception | |
288 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts | |
289 | |
290 c_5 = round(c_5/c_all*100,2) | |
291 c_3 = round(c_3/c_all*100,2) | |
292 c_both = round(c_both/c_all*100,2) | |
293 c_mature = round(c_mature/c_all*100,2) | |
294 c_exception = round(c_exception/c_all*100,2) | |
295 | |
296 c_5_counts = round(c_5_counts/c_all_counts*100,2) | |
297 c_3_counts = round(c_3_counts/c_all_counts*100,2) | |
298 c_both_counts = round(c_both_counts/c_all_counts*100,2) | |
299 c_mat_counts = round(c_mat_counts/c_all_counts*100,2) | |
300 c_exception_counts = round(c_exception_counts/c_all_counts*100,2) | |
301 | |
302 t_5 = round(t_5/t_all*100,2) | |
303 t_3 = round(t_3/t_all*100,2) | |
304 t_both = round(t_both/t_all*100,2) | |
305 t_mature = round(t_mature/t_all*100,2) | |
306 t_exception = round(t_exception/t_all*100,2) | |
307 | |
308 t_5_counts = round(t_5_counts/t_all_counts*100,2) | |
309 t_3_counts = round(t_3_counts/t_all_counts*100,2) | |
310 t_both_counts = round(t_both_counts/t_all_counts*100,2) | |
311 t_mat_counts = round(t_mat_counts/t_all_counts*100,2) | |
312 t_exception_counts = round(t_exception_counts/t_all_counts*100,2) | |
313 | |
314 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception) | |
315 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts) | |
316 | |
317 df=pd.DataFrame({ | |
318 'group':['Controls','Treated'], | |
319 """5' and 3' isomiRs""":[c_both,t_both], | |
320 """3' isomiRs""":[c_3,t_3], | |
321 'miRNA RefSeq':[c_mature,t_mature], | |
322 """5' isomiRs""":[c_5,t_5], | |
323 'Others*':[c_exception,t_exception]}) | |
324 | |
325 df1=pd.DataFrame({ | |
326 'group':['Controls','Treated'], | |
327 """5' and 3' isomiRs""":[c_both_counts,t_both_counts], | |
328 """3' isomiRs""":[c_3_counts,t_3_counts], | |
329 'miRNA RefSeq':[c_mat_counts,t_mat_counts], | |
330 """5' isomiRs""":[c_5_counts,t_5_counts], | |
331 'Others*':[c_exception_counts,t_exception_counts]}) | |
332 | |
333 spider_last(df,radar_max,1) | |
334 spider_last(df1,radar_max_counts,2) | |
335 | |
336 ##################################################################################################################################################### | |
337 | |
338 def spider_last(df,radar_max,flag): | |
339 # ------- PART 1: Create background | |
340 fig = plt.figure() | |
341 # number of variable | |
342 categories=list(df)[1:] | |
343 N = len(categories) | |
344 | |
345 # What will be the angle of each axis in the plot? (we divide the plot / number of variable) | |
346 angles = [n / float(N) * 2 * pi for n in range(N)] | |
347 angles += angles[:1] | |
348 | |
349 # Initialise the spider plot | |
350 ax = plt.subplot(111, polar=True) | |
351 | |
352 # If you want the first axis to be on top: | |
353 ax.set_theta_offset(pi/2) | |
354 ax.set_theta_direction(-1) | |
355 | |
356 # Draw one axe per variable + add labels labels yet | |
357 plt.xticks(angles[:-1], categories, fontsize=11) | |
358 | |
359 # Draw ylabels | |
360 radar_max=round(radar_max+radar_max*0.1) | |
361 mul=len(str(radar_max))-1 | |
362 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul) | |
363 sep = round(maxi/4) | |
364 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10) | |
365 plt.ylim(0, maxi) | |
366 | |
367 # ------- PART 2: Add plots | |
368 | |
369 # Plot each individual = each line of the data | |
370 # I don't do a loop, because plotting more than 3 groups makes the chart unreadable | |
371 | |
372 # Ind1 | |
373 values=df.loc[0].drop('group').values.flatten().tolist() | |
374 values += values[:1] | |
375 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls") | |
376 ax.fill(angles, values, 'b', alpha=0.1) | |
377 | |
378 # Ind2 | |
379 values=df.loc[1].drop('group').values.flatten().tolist() | |
380 values += values[:1] | |
381 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated") | |
382 ax.fill(angles, values, 'r', alpha=0.1) | |
383 | |
384 # Add legend | |
385 if flag==1: | |
386 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1)) | |
387 plt.savefig('spider_non_red.png',dpi=300) | |
388 else: | |
389 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1)) | |
390 plt.savefig('spider_red.png',dpi=300) | |
391 | |
392 | |
393 ############################################################################################################################################################################################################# | |
394 | |
395 def hist_red(samples,flag): | |
396 lengths=[] | |
397 cat=[] | |
398 total_reads=0 | |
399 seq=[] | |
400 | |
401 if flag == "c": | |
402 title = "Length Distribution of Control group (Redudant reads)" | |
403 if flag == "t": | |
404 title = "Length Distribution of Treated group (Redudant reads)" | |
405 | |
406 for i in samples: | |
407 for x in i: | |
408 lengths.append(len(x[9])) | |
409 if x[1]=="0": | |
410 seq.append([x[9],x[0].split("-")[1],"Mapped"]) | |
411 cat.append("Mapped") | |
412 if x[1] == "4": | |
413 seq.append([x[9],x[0].split("-")[1],"Unmapped"]) | |
414 cat.append("Unmapped") | |
415 | |
416 uni_len=list(set(lengths)) | |
417 uni_len=[x for x in uni_len if x<=35] | |
418 low=min(lengths) | |
419 up=max(lengths) | |
420 seq.sort() | |
421 uni_seq=list(seq for seq,_ in itertools.groupby(seq)) | |
422 dim=up-low | |
423 | |
424 if dim>20: | |
425 s=5 | |
426 else: | |
427 s=8 | |
428 | |
429 total_reads+=sum([int(x[1]) for x in uni_seq]) | |
430 | |
431 map_reads=[] | |
432 unmap_reads=[] | |
433 length=[] | |
434 for y in uni_len: | |
435 map_temp=0 | |
436 unmap_temp=0 | |
437 for x in uni_seq: | |
438 if len(x[0])==y and x[2]=="Mapped": | |
439 map_temp+=int(x[1]) | |
440 if len(x[0])==y and x[2]=="Unmapped": | |
441 unmap_temp+=int(x[1]) | |
442 if y<=35: | |
443 length.append(y) | |
444 map_reads.append(round(map_temp/total_reads*100,2)) | |
445 unmap_reads.append(round(unmap_temp/total_reads*100,2)) | |
446 | |
447 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)]) | |
448 ylim=ylim+ylim*20/100 | |
449 fig, ax = plt.subplots() | |
450 width=0.8 | |
451 ax.bar(length, unmap_reads, width, label='Unmapped') | |
452 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped') | |
453 plt.xticks(np.arange(length[0], length[-1]+1, 1)) | |
454 plt.xlabel('Length (nt)',fontsize=14) | |
455 plt.ylabel('Percentage',fontsize=14) | |
456 plt.title(title,fontsize=14) | |
457 ax.legend() | |
458 plt.ylim([0, ylim]) | |
459 ax.grid(axis='y',linewidth=0.2) | |
460 | |
461 if flag=='c': | |
462 plt.savefig('c_hist_red.png',dpi=300) | |
463 | |
464 if flag=='t': | |
465 plt.savefig('t_hist_red.png',dpi=300) | |
466 | |
467 ################################################################################################################# | |
468 | |
469 def logo_seq_red(merge, flag): | |
470 | |
471 if flag=="c": | |
472 titlos="Control group (Redundant)" | |
473 file_logo="c_logo.png" | |
474 file_bar="c_bar.png" | |
475 if flag=="t": | |
476 titlos="Treated group (Redundant)" | |
477 file_logo="t_logo.png" | |
478 file_bar="t_bar.png" | |
479 | |
480 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge] | |
481 | |
482 A=[0]*5 | |
483 C=[0]*5 | |
484 G=[0]*5 | |
485 T=[0]*5 | |
486 total_reads=0 | |
487 | |
488 for y in c_samples: | |
489 if "/" in y[0]: | |
490 length=[] | |
491 for x in y[0].split("/"): | |
492 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | |
493 | |
494 best=min(length) | |
495 total_reads+=best[2] | |
496 for i in range(5): | |
497 if i<len(best[1]): | |
498 if best[1][i] == "A": | |
499 A[i]+=best[2] | |
500 elif best[1][i] == "C": | |
501 C[i]+=best[2] | |
502 elif best[1][i] == "G": | |
503 G[i]+=best[2] | |
504 else: | |
505 T[i]+=best[2] | |
506 else: | |
507 total_reads+=y[2] | |
508 for i in range(5): | |
509 if i<len(y[0].split("_")[-1]): | |
510 if y[0].split("_")[-1][i] == "A": | |
511 A[i]+=(y[2]) | |
512 elif y[0].split("_")[-1][i] == "C": | |
513 C[i]+=(y[2]) | |
514 elif y[0].split("_")[-1][i] == "G": | |
515 G[i]+=(y[2]) | |
516 else: | |
517 T[i]+=y[2] | |
518 | |
519 A[:] = [round(x*100,1) / total_reads for x in A] | |
520 C[:] = [round(x*100,1) / total_reads for x in C] | |
521 G[:] = [round(x*100,1) / total_reads for x in G] | |
522 T[:] = [round(x*100,1) / total_reads for x in T] | |
523 | |
524 | |
525 | |
526 data = {'A':A,'C':C,'G':G,'T':T} | |
527 df = pd.DataFrame(data, index=[1,2,3,4,5]) | |
528 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) ) | |
529 h.grid(axis='y',linewidth=0.2) | |
530 plt.xticks(rotation=0, ha="right") | |
531 plt.ylabel("Counts (%)",fontsize=18) | |
532 plt.xlabel("Positions (nt)",fontsize=18) | |
533 plt.title(titlos,fontsize=20) | |
534 plt.tight_layout() | |
535 plt.savefig(file_bar, dpi=300) | |
536 | |
537 import logomaker as lm | |
538 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | |
539 crp_logo.style_spines(visible=False) | |
540 crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | |
541 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | |
542 | |
543 # style using Axes methods | |
544 crp_logo.ax.set_title(titlos,fontsize=18) | |
545 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5) | |
546 crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5) | |
547 crp_logo.ax.xaxis.set_ticks_position('none') | |
548 crp_logo.ax.xaxis.set_tick_params(pad=-1) | |
549 figure = plt.gcf() | |
550 figure.set_size_inches(6, 4) | |
551 crp_logo.fig.savefig(file_logo,dpi=300) | |
552 | |
553 ########################################################################################################################################################################################################## | |
554 | |
555 def logo_seq_non_red(merge_LH2E): | |
556 | |
557 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E] | |
558 | |
559 A=[0]*5 | |
560 C=[0]*5 | |
561 G=[0]*5 | |
562 T=[0]*5 | |
563 | |
564 for y in c_samples: | |
565 if "/" in y[0]: | |
566 length=[] | |
567 for x in y[0].split("/"): | |
568 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | |
569 | |
570 best=min(length) | |
571 for i in range(5): | |
572 if i<len(best[1]): | |
573 if best[1][i] == "A": | |
574 A[i]+=1 | |
575 elif best[1][i] == "C": | |
576 C[i]+=1 | |
577 elif best[1][i] == "G": | |
578 G[i]+=1 | |
579 else: | |
580 T[i]+=1 | |
581 else: | |
582 for i in range(5): | |
583 if i<len(y[0].split("_")[-1]): | |
584 if y[0].split("_")[-1][i] == "A": | |
585 A[i]+=1 | |
586 elif y[0].split("_")[-1][i] == "C": | |
587 C[i]+=1 | |
588 elif y[0].split("_")[-1][i] == "G": | |
589 G[i]+=1 | |
590 else: | |
591 T[i]+=1 | |
592 | |
593 data = {'A':A,'C':C,'G':G,'T':T} | |
594 df = pd.DataFrame(data, index=[1,2,3,4,5]) | |
595 h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"])) | |
596 h.set_xlabel("Positions (nt)") | |
597 h.set_ylabel("Unique sequences") | |
598 plt.xticks(rotation=0, ha="right") | |
599 plt.tight_layout() | |
600 plt.savefig("bar2.png", dpi=300) | |
601 | |
602 | |
603 import logomaker as lm | |
604 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | |
605 | |
606 # style using Logo methods | |
607 crp_logo.style_spines(visible=False) | |
608 crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | |
609 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | |
610 | |
611 # style using Axes methods | |
612 crp_logo.ax.set_ylabel("Unique sequences", labelpad=5) | |
613 crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5) | |
614 crp_logo.ax.xaxis.set_ticks_position('none') | |
615 crp_logo.ax.xaxis.set_tick_params(pad=-1) | |
616 crp_logo.ax.set_title("Non-redundant") | |
617 figure = plt.gcf() | |
618 crp_logo.fig.savefig('logo2.png', dpi=300) | |
619 | |
620 | |
621 ################################################################################################################################################################ | |
622 | |
623 def pdf_before_DE(analysis): | |
624 | |
625 # Import FPDF class | |
626 from fpdf import FPDF, fpdf | |
627 | |
628 # Import glob module to find all the files matching a pattern | |
629 import glob | |
630 | |
631 # Image extensions | |
632 if analysis=="2": | |
633 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png") | |
634 else: | |
635 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png") | |
636 # This list will hold the images file names | |
637 images = [] | |
638 | |
639 # Build the image list by merging the glob results (a list of files) | |
640 # for each extension. We are taking images from current folder. | |
641 for extension in image_extensions: | |
642 images.extend(glob.glob(extension)) | |
643 #sys.exit(images) | |
644 # Create instance of FPDF class | |
645 pdf = FPDF('P', 'in', 'A4') | |
646 # Add new page. Without this you cannot create the document. | |
647 pdf.add_page() | |
648 # Set font to Arial, 'B'old, 16 pts | |
649 pdf.set_font('Arial', 'B', 20.0) | |
650 | |
651 # Page header | |
652 pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C') | |
653 pdf.ln(0.7) | |
654 pdf.set_font('Arial','', 16.0) | |
655 pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C') | |
656 | |
657 # Smaller font for image captions | |
658 pdf.set_font('Arial', '', 11.0) | |
659 | |
660 # Image caption | |
661 pdf.ln(0.5) | |
662 | |
663 yh=FPDF.get_y(pdf) | |
664 pdf.image(images[0],x=0.3,w=4, h=3) | |
665 pdf.image(images[1],x=4,y=yh, w=4, h=3) | |
666 pdf.ln(0.3) | |
667 | |
668 # Image caption | |
669 pdf.cell(0.2) | |
670 pdf.cell(3.0, 0.0, " Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)") | |
671 pdf.ln(0.2) | |
672 pdf.cell(0.2) | |
673 pdf.cell(3.0, 0.0, " and Treated (right) groups") | |
674 | |
675 | |
676 pdf.ln(0.5) | |
677 h1=FPDF.get_y(pdf) | |
678 pdf.image(images[2],x=1, w=6.5, h=5) | |
679 h2=FPDF.get_y(pdf) | |
680 FPDF.set_y(pdf,h1+0.2) | |
681 pdf.set_font('Arial','', 14.0) | |
682 pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C') | |
683 pdf.set_font('Arial', '', 11.0) | |
684 FPDF.set_y(pdf,h2) | |
685 FPDF.set_y(pdf,9.5) | |
686 # Image caption | |
687 pdf.cell(0.2) | |
688 if analysis=="2": | |
689 pdf.cell(3.0, 0.0, " Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads") | |
690 pdf.ln(0.2) | |
691 pdf.cell(0.2) | |
692 pdf.cell(3.0, 0.0, " in Control (left) and treated (right) groups") | |
693 else: | |
694 pdf.cell(3.0, 0.0, " Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in") | |
695 pdf.ln(0.2) | |
696 pdf.cell(0.2) | |
697 pdf.cell(3.0, 0.0, " Control (left) and treated (right) groups") | |
698 | |
699 | |
700 | |
701 pdf.add_page() | |
702 pdf.set_font('Arial', 'B', 16.0) | |
703 pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C') | |
704 pdf.ln(0.7) | |
705 pdf.set_font('Arial', 'B', 12.0) | |
706 pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C') | |
707 pdf.ln(0.5) | |
708 pdf.image(images[3],x=1.5, w=5.5, h=4) | |
709 pdf.ln(0.6) | |
710 pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C') | |
711 pdf.set_font('Arial', '', 12.0) | |
712 pdf.ln(0.2) | |
713 pdf.image(images[4],x=1.5, w=5.5, h=4) | |
714 pdf.ln(0.3) | |
715 pdf.set_font('Arial', '', 11.0) | |
716 pdf.cell(0.2) | |
717 pdf.cell(3.0, 0.0, " * IsomiRs potentialy initiated from multiple loci") | |
718 | |
719 | |
720 if analysis=="2": | |
721 pdf.add_page('L') | |
722 | |
723 pdf.set_font('Arial', 'B', 16.0) | |
724 pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C') | |
725 pdf.ln(0.5) | |
726 pdf.set_font('Arial', 'B', 12.0) | |
727 pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C') | |
728 pdf.ln(0.7) | |
729 | |
730 yh=FPDF.get_y(pdf) | |
731 pdf.image(images[5],x=1.5,w=3.65, h=2.65) | |
732 pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65) | |
733 pdf.ln(0.5) | |
734 yh=FPDF.get_y(pdf) | |
735 pdf.image(images[6],x=1.5,w=3.65, h=2.65) | |
736 pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65) | |
737 | |
738 pdf.close() | |
739 pdf.output('report1.pdf','F') | |
740 | |
741 | |
742 | |
743 | |
744 #############################################################################################################################################################3 | |
745 | |
746 def pdf_after_DE(analysis): | |
747 | |
748 # Import FPDF class | |
749 from fpdf import FPDF | |
750 | |
751 # Import glob module to find all the files matching a pattern | |
752 import glob | |
753 | |
754 # Image extensions | |
755 image_extensions = ("tem.png","non.png","a2.png") | |
756 | |
757 # This list will hold the images file names | |
758 images = [] | |
759 | |
760 # Build the image list by merging the glob results (a list of files) | |
761 # for each extension. We are taking images from current folder. | |
762 for extension in image_extensions: | |
763 images.extend(glob.glob(extension)) | |
764 | |
765 # Create instance of FPDF class | |
766 pdf = FPDF('P', 'in', 'letter') | |
767 # Add new page. Without this you cannot create the document. | |
768 pdf.add_page() | |
769 # Set font to Arial, 'B'old, 16 pts | |
770 pdf.set_font('Arial', 'B', 16.0) | |
771 | |
772 # Page header | |
773 pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C') | |
774 #pdf.ln(0.25) | |
775 | |
776 pdf.ln(0.7) | |
777 pdf.set_font('Arial','B', 12.0) | |
778 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C') | |
779 | |
780 | |
781 # Smaller font for image captions | |
782 pdf.set_font('Arial', '', 10.0) | |
783 | |
784 # Image caption | |
785 pdf.ln(0.4) | |
786 pdf.image(images[0],x=0.8, w=7, h=8) | |
787 pdf.ln(0.3) | |
788 | |
789 if analysis=="2": | |
790 | |
791 pdf.add_page() | |
792 pdf.ln(0.7) | |
793 pdf.set_font('Arial','B', 12.0) | |
794 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C') | |
795 pdf.ln(0.4) | |
796 pdf.image(images[1],x=0.5, w=7.5, h=6.5) | |
797 | |
798 pdf.add_page() | |
799 pdf.ln(0.5) | |
800 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C') | |
801 pdf.ln(0.4) | |
802 pdf.image(images[2],x=0.8, w=7, h=8) | |
803 pdf.ln(0.3) | |
804 | |
805 | |
806 pdf.output('report2.pdf', 'F') | |
807 | |
808 | |
809 |