5
|
1 import itertools
|
|
2 import time
|
|
3 import sys
|
|
4 import os
|
|
5 import urllib.request
|
|
6 import gzip
|
|
7 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
|
|
8 import subprocess
|
|
9 import argparse
|
|
10 from collections import OrderedDict
|
|
11 from matplotlib.backends.backend_pdf import PdfPages
|
|
12 import pandas as pd
|
|
13 from math import pi
|
|
14 import numpy as np
|
|
15 import matplotlib.pyplot as plt
|
|
16 from matplotlib.ticker import PercentFormatter
|
|
17 import seaborn as sns
|
|
18 import scipy.stats as stats
|
|
19 from plotnine import *
|
|
20 import math
|
|
21 import re
|
|
22 import matplotlib.ticker as mtick
|
|
23 import copy
|
|
24
|
|
25
|
|
26 #########################################################################################################################
|
|
27 def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
|
|
28
|
|
29 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
|
|
30 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
|
|
31 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
|
|
32 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
|
|
33
|
|
34 c_templ = 0
|
|
35 c_tem_counts = 0
|
|
36 c_mature = 0
|
|
37 c_mat_counts = 0
|
|
38 t_templ = 0
|
|
39 t_tem_counts = 0
|
|
40 t_mature = 0
|
|
41 t_mat_counts = 0
|
|
42
|
|
43 c_non = len(c_non_samples)
|
|
44 c_non_counts = sum(x[2] for x in c_non_samples)
|
|
45 t_non = len(t_non_samples)
|
|
46 t_non_counts = sum(x[2] for x in t_non_samples)
|
|
47
|
|
48 c_unmap = c_unmap - c_non
|
|
49 t_unmap = c_unmap - t_non
|
|
50
|
|
51 c_unmap_counts=c_unmap_counts - c_non_counts
|
|
52 t_unmap_counts=t_unmap_counts - t_non_counts
|
|
53
|
|
54
|
|
55 for x in c_samples:
|
|
56
|
|
57 if "/" not in x[0]:
|
|
58 if len(x[0].split("_"))==2:
|
|
59 c_mature+=1
|
|
60 c_mat_counts += x[2]
|
|
61 else:
|
|
62 c_templ+=1
|
|
63 c_tem_counts += x[2]
|
|
64 else:
|
|
65 f=0
|
|
66 for y in x[0].split("/"):
|
|
67 if len(y.split("_"))==2:
|
|
68 c_mature+=1
|
|
69 c_mat_counts += x[2]
|
|
70 f=1
|
|
71 break
|
|
72 if f==0:
|
|
73 c_templ+=1
|
|
74 c_tem_counts += x[2]
|
|
75
|
|
76 for x in t_samples:
|
|
77
|
|
78 if "/" not in x[0]:
|
|
79 if len(x[0].split("_"))==2:
|
|
80 t_mature+=1
|
|
81 t_mat_counts += x[2]
|
|
82 else:
|
|
83 t_templ+=1
|
|
84 t_tem_counts += x[2]
|
|
85 else:
|
|
86 f=0
|
|
87 for y in x[0].split("/"):
|
|
88 if len(y.split("_"))==2:
|
|
89 t_mature+=1
|
|
90 t_mat_counts += x[2]
|
|
91 f=1
|
|
92 break
|
|
93 if f==0:
|
|
94 t_templ+=1
|
|
95 t_tem_counts += x[2]
|
|
96
|
|
97 fig = plt.figure(figsize=(7,5))
|
|
98
|
|
99 labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
|
|
100 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
|
|
101 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
|
|
102 # Plot
|
|
103 ax1 = plt.subplot2grid((1,2),(0,0))
|
|
104 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
|
105 [x.set_fontsize(8) for x in texts]
|
|
106 plt.title('Control Group (reads)',fontsize=12)
|
|
107
|
|
108 labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
|
|
109 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
|
|
110 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
|
|
111 # Plot
|
|
112 ax2 = plt.subplot2grid((1,2),(0,1))
|
|
113 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
|
114 [x.set_fontsize(8) for x in texts]
|
|
115 plt.title('Treated Group (reads)', fontsize=12)
|
|
116 plt.savefig('pie_non.png',dpi=300)
|
|
117
|
|
118 ####################################################################################################################################################################################################################
|
|
119
|
|
120 def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
|
|
121
|
|
122 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
|
|
123 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
|
|
124
|
|
125 c_templ = 0
|
|
126 c_tem_counts = 0
|
|
127 c_mature = 0
|
|
128 c_mat_counts = 0
|
|
129 t_templ = 0
|
|
130 t_tem_counts = 0
|
|
131 t_mature = 0
|
|
132 t_mat_counts = 0
|
|
133
|
|
134 for x in c_samples:
|
|
135
|
|
136 if "/" not in x[0]:
|
|
137 if len(x[0].split("_"))==2:
|
|
138 c_mature+=1
|
|
139 c_mat_counts += x[2]
|
|
140 else:
|
|
141 c_templ+=1
|
|
142 c_tem_counts += x[2]
|
|
143 else:
|
|
144 f=0
|
|
145 for y in x[0].split("/"):
|
|
146 if len(y.split("_"))==2:
|
|
147 c_mature+=1
|
|
148 c_mat_counts += x[2]
|
|
149 f=1
|
|
150 break
|
|
151 if f==0:
|
|
152 c_templ+=1
|
|
153 c_tem_counts += x[2]
|
|
154
|
|
155 for x in t_samples:
|
|
156
|
|
157 if "/" not in x[0]:
|
|
158 if len(x[0].split("_"))==2:
|
|
159 t_mature+=1
|
|
160 t_mat_counts += x[2]
|
|
161 else:
|
|
162 t_templ+=1
|
|
163 t_tem_counts += x[2]
|
|
164 else:
|
|
165 f=0
|
|
166 for y in x[0].split("/"):
|
|
167 if len(y.split("_"))==2:
|
|
168 t_mature+=1
|
|
169 t_mat_counts += x[2]
|
|
170 f=1
|
|
171 break
|
|
172 if f==0:
|
|
173 t_templ+=1
|
|
174 t_tem_counts += x[2]
|
|
175
|
|
176
|
|
177 fig = plt.figure()
|
|
178 labels = 'miRNA RefSeq','Template', 'Unmapped'
|
|
179 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
|
|
180 colors = ['gold', 'yellowgreen', 'lightskyblue']
|
|
181 explode = (0.2, 0.05, 0.1)
|
|
182 # Plot
|
|
183 ax1 = plt.subplot2grid((1,2),(0,0))
|
|
184 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
|
185 [x.set_fontsize(8) for x in texts]
|
|
186 plt.title('Control group (reads)', fontsize=12)
|
|
187 labels = 'miRNA RefSeq','Template', 'Unmapped'
|
|
188 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
|
|
189 colors = ['gold', 'yellowgreen', 'lightskyblue']
|
|
190 explode = (0.2, 0.05, 0.1)
|
|
191 # Plot
|
|
192 ax2 = plt.subplot2grid((1,2),(0,1))
|
|
193 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
|
194 [x.set_fontsize(8) for x in texts]
|
|
195 plt.title('Treated group (reads)',fontsize = 12)
|
|
196 plt.savefig('pie_tem.png',dpi=300)
|
|
197
|
|
198 ###################################################################################################################################################################################################################
|
|
199
|
|
200 def make_spider(merge_LH2E,merge_LH8E):
|
|
201
|
|
202 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
|
|
203 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
|
|
204
|
|
205 c_5 = 0
|
|
206 c_5_counts = 0
|
|
207 c_3 = 0
|
|
208 c_3_counts = 0
|
|
209 c_both =0
|
|
210 c_both_counts=0
|
|
211 c_mature = 0
|
|
212 c_mat_counts = 0
|
|
213 c_exception=0
|
|
214 c_exception_counts=0
|
|
215
|
|
216
|
|
217 t_5 = 0
|
|
218 t_5_counts = 0
|
|
219 t_3 = 0
|
|
220 t_3_counts = 0
|
|
221 t_both = 0
|
|
222 t_both_counts = 0
|
|
223 t_mature = 0
|
|
224 t_mat_counts = 0
|
|
225 t_exception = 0
|
|
226 t_exception_counts=0
|
|
227
|
|
228 for x in c_samples:
|
|
229
|
|
230 if "/" not in x[0]:
|
|
231 if len(x[0].split("_"))==2:
|
|
232 c_mature+=1
|
|
233 c_mat_counts += x[2]
|
|
234 elif 0 == int(x[0].split("_")[3]):
|
|
235 c_5+=1
|
|
236 c_5_counts += x[2]
|
|
237 elif 0 == int(x[0].split("_")[2]):
|
|
238 c_3+=1
|
|
239 c_3_counts += x[2]
|
|
240 else:
|
|
241 c_both+=1
|
|
242 c_both_counts+=x[2]
|
|
243
|
|
244 else:
|
|
245 f=0
|
|
246 for y in x[0].split("/"):
|
|
247 if len(y.split("_"))==2:
|
|
248 c_mature+=1
|
|
249 c_mat_counts += x[2]
|
|
250 f=1
|
|
251 break
|
|
252 if f==0:
|
|
253 part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1]
|
|
254 num=0
|
|
255 for y in x[0].split("/"):
|
|
256 if y.split("_")[-2]+"_"+y.split("_")[-1]==part:
|
|
257 num+=1
|
|
258 if num==len(x[0].split("/")):
|
|
259
|
|
260 if 0 == int(x[0].split("/")[0].split("_")[3]):
|
|
261 c_5+=1
|
|
262 c_5_counts += x[2]
|
|
263 elif 0 == int(x[0].split("/")[0].split("_")[2]):
|
|
264 c_3+=1
|
|
265 c_3_counts += x[2]
|
|
266 else:
|
|
267 c_both+=1
|
|
268 c_both_counts+=x[2]
|
|
269 else:
|
|
270 c_exception+=1
|
|
271 c_exception_counts += x[2]
|
|
272
|
|
273
|
|
274 for x in t_samples:
|
|
275
|
|
276 if "/" not in x[0]:
|
|
277 if len(x[0].split("_"))==2:
|
|
278 t_mature+=1
|
|
279 t_mat_counts += x[2]
|
|
280 elif 0 == int(x[0].split("_")[3]):
|
|
281 t_5+=1
|
|
282 t_5_counts += x[2]
|
|
283 elif 0 == int(x[0].split("_")[2]):
|
|
284 t_3+=1
|
|
285 t_3_counts += x[2]
|
|
286 else:
|
|
287 t_both+=1
|
|
288 t_both_counts+=x[2]
|
|
289
|
|
290 else:
|
|
291 f=0
|
|
292 for y in x[0].split("/"):
|
|
293 if len(y.split("_"))==2:
|
|
294 t_mature+=1
|
|
295 t_mat_counts += x[2]
|
|
296 f=1
|
|
297 break
|
|
298 if f==0:
|
|
299 part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1]
|
|
300 num=0
|
|
301 for y in x[0].split("/"):
|
|
302 if y.split("_")[-2]+"_"+y.split("_")[-1]==part:
|
|
303 num+=1
|
|
304 if num==len(x[0].split("/")):
|
|
305
|
|
306 if 0 == int(x[0].split("/")[0].split("_")[3]):
|
|
307 t_5+=1
|
|
308 t_5_counts += x[2]
|
|
309 elif 0 == int(x[0].split("/")[0].split("_")[2]):
|
|
310 t_3+=1
|
|
311 t_3_counts += x[2]
|
|
312 else:
|
|
313 t_both+=1
|
|
314 t_both_counts+=x[2]
|
|
315 else:
|
|
316 t_exception+=1
|
|
317 t_exception_counts += x[2]
|
|
318
|
|
319
|
|
320 c_all = c_5+c_3+c_both+c_mature+c_exception
|
|
321 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
|
|
322
|
|
323 t_all = t_5+t_3+t_both+t_mature + t_exception
|
|
324 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
|
|
325
|
|
326 c_5 = round(c_5/c_all*100,2)
|
|
327 c_3 = round(c_3/c_all*100,2)
|
|
328 c_both = round(c_both/c_all*100,2)
|
|
329 c_mature = round(c_mature/c_all*100,2)
|
|
330 c_exception = round(c_exception/c_all*100,2)
|
|
331
|
|
332 c_5_counts = round(c_5_counts/c_all_counts*100,2)
|
|
333 c_3_counts = round(c_3_counts/c_all_counts*100,2)
|
|
334 c_both_counts = round(c_both_counts/c_all_counts*100,2)
|
|
335 c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
|
|
336 c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
|
|
337
|
|
338 t_5 = round(t_5/t_all*100,2)
|
|
339 t_3 = round(t_3/t_all*100,2)
|
|
340 t_both = round(t_both/t_all*100,2)
|
|
341 t_mature = round(t_mature/t_all*100,2)
|
|
342 t_exception = round(t_exception/t_all*100,2)
|
|
343
|
|
344 t_5_counts = round(t_5_counts/t_all_counts*100,2)
|
|
345 t_3_counts = round(t_3_counts/t_all_counts*100,2)
|
|
346 t_both_counts = round(t_both_counts/t_all_counts*100,2)
|
|
347 t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
|
|
348 t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
|
|
349
|
|
350 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
|
|
351 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
|
|
352
|
|
353 df=pd.DataFrame({
|
|
354 'group':['Controls','Treated'],
|
|
355 """5' and 3' isomiRs""":[c_both,t_both],
|
|
356 """3' isomiRs""":[c_3,t_3],
|
|
357 'miRNA RefSeq':[c_mature,t_mature],
|
|
358 """5' isomiRs""":[c_5,t_5],
|
|
359 'Others*':[c_exception,t_exception]})
|
|
360
|
|
361 df1=pd.DataFrame({
|
|
362 'group':['Controls','Treated'],
|
|
363 """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
|
|
364 """3' isomiRs""":[c_3_counts,t_3_counts],
|
|
365 'miRNA RefSeq':[c_mat_counts,t_mat_counts],
|
|
366 """5' isomiRs""":[c_5_counts,t_5_counts],
|
|
367 'Others*':[c_exception_counts,t_exception_counts]})
|
|
368
|
|
369 spider_last(df,radar_max,1)
|
|
370 spider_last(df1,radar_max_counts,2)
|
|
371
|
|
372
|
|
373 ##############################################################################################################################################################################################################
|
|
374
|
|
375 def spider_last(df,radar_max,flag):
|
|
376 # ------- PART 1: Create background
|
|
377 fig = plt.figure()
|
|
378 # number of variable
|
|
379 categories=list(df)[1:]
|
|
380 N = len(categories)
|
|
381
|
|
382 # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
|
|
383 angles = [n / float(N) * 2 * pi for n in range(N)]
|
|
384 angles += angles[:1]
|
|
385
|
|
386 # Initialise the spider plot
|
|
387 ax = plt.subplot(111, polar=True)
|
|
388
|
|
389 # If you want the first axis to be on top:
|
|
390 ax.set_theta_offset(pi/2)
|
|
391 ax.set_theta_direction(-1)
|
|
392
|
|
393 # Draw one axe per variable + add labels labels yet
|
|
394 plt.xticks(angles[:-1], categories, fontsize=11)
|
|
395
|
|
396 # Draw ylabels
|
|
397 radar_max=round(radar_max+radar_max*0.1)
|
|
398 mul=len(str(radar_max))-1
|
|
399 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
|
|
400 sep = round(maxi/4)
|
|
401 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
|
|
402 plt.ylim(0, maxi)
|
|
403
|
|
404 # ------- PART 2: Add plots
|
|
405
|
|
406 # Plot each individual = each line of the data
|
|
407 # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
|
|
408
|
|
409 # Ind1
|
|
410 values=df.loc[0].drop('group').values.flatten().tolist()
|
|
411 values += values[:1]
|
|
412 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
|
|
413 ax.fill(angles, values, 'b', alpha=0.1)
|
|
414
|
|
415 # Ind2
|
|
416 values=df.loc[1].drop('group').values.flatten().tolist()
|
|
417 values += values[:1]
|
|
418 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
|
|
419 ax.fill(angles, values, 'r', alpha=0.1)
|
|
420
|
|
421 # Add legend
|
|
422 if flag==1:
|
|
423 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
|
|
424 plt.savefig('spider_non_red.png',dpi=300)
|
|
425 else:
|
|
426 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
|
|
427 plt.savefig('spider_red.png',dpi=300)
|
|
428
|
|
429 #############################################################################################################################################################################################################
|
|
430
|
|
431 def hist_red(samples,flag):
|
|
432
|
|
433 lengths=[]
|
|
434 cat=[]
|
|
435 total_reads=0
|
|
436 seq=[]
|
|
437
|
|
438 if flag == "c":
|
|
439 title = "Length Distribution of Control group (Redudant reads)"
|
|
440 if flag == "t":
|
|
441 title = "Length Distribution of Treated group (Redudant reads)"
|
|
442
|
|
443 for i in samples:
|
|
444 for x in i:
|
|
445 lengths.append(len(x[9]))
|
|
446 if x[1]=="0":
|
|
447 seq.append([x[9],x[0].split("-")[1],"Mapped"])
|
|
448 cat.append("Mapped")
|
|
449 if x[1] == "4":
|
|
450 seq.append([x[9],x[0].split("-")[1],"Unmapped"])
|
|
451 cat.append("Unmapped")
|
|
452
|
|
453 uni_len=list(set(lengths))
|
|
454 uni_len=[x for x in uni_len if x<=35]
|
|
455 low=min(lengths)
|
|
456 up=max(lengths)
|
|
457 seq.sort()
|
|
458 uni_seq=list(seq for seq,_ in itertools.groupby(seq))
|
|
459 dim=up-low
|
|
460
|
|
461 if dim>20:
|
|
462 s=5
|
|
463 else:
|
|
464 s=8
|
|
465 total_reads+=sum([int(x[1]) for x in uni_seq])
|
|
466
|
|
467 map_reads=[]
|
|
468 unmap_reads=[]
|
|
469 length=[]
|
|
470 for y in uni_len:
|
|
471 map_temp=0
|
|
472 unmap_temp=0
|
|
473 for x in uni_seq:
|
|
474 if len(x[0])==y and x[2]=="Mapped":
|
|
475 map_temp+=int(x[1])
|
|
476 if len(x[0])==y and x[2]=="Unmapped":
|
|
477 unmap_temp+=int(x[1])
|
|
478 if y<=35:
|
|
479 length.append(y)
|
|
480 map_reads.append(round(map_temp/total_reads*100,2))
|
|
481 unmap_reads.append(round(unmap_temp/total_reads*100,2))
|
|
482
|
|
483 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
|
|
484 ylim=ylim+ylim*20/100
|
|
485 fig, ax = plt.subplots()
|
|
486 width=0.8
|
|
487 ax.bar(length, unmap_reads, width, label='Unmapped')
|
|
488 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
|
|
489 plt.xticks(np.arange(length[0], length[-1]+1, 1))
|
|
490 plt.xlabel('Length (nt)',fontsize=14)
|
|
491 plt.ylabel('Percentage',fontsize=14)
|
|
492 plt.title(title,fontsize=14)
|
|
493 ax.legend()
|
|
494 plt.ylim([0, ylim])
|
|
495 ax.grid(axis='y',linewidth=0.2)
|
|
496
|
|
497 if flag=='c':
|
|
498 plt.savefig('c_hist_red.png',dpi=300)
|
|
499
|
|
500 if flag=='t':
|
|
501 plt.savefig('t_hist_red.png',dpi=300)
|
|
502
|
|
503 ##############################################################################################################################################################################################################################################
|
|
504
|
|
505 def logo_seq_red(merge, flag):
|
|
506
|
|
507 if flag=="c":
|
|
508 titlos="Control group (Redundant)"
|
|
509 file_logo="c_logo.png"
|
|
510 file_bar="c_bar.png"
|
|
511 if flag=="t":
|
|
512 titlos="Treated group (Redundant)"
|
|
513 file_logo="t_logo.png"
|
|
514 file_bar="t_bar.png"
|
|
515
|
|
516 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
|
|
517
|
|
518 A=[0]*5
|
|
519 C=[0]*5
|
|
520 G=[0]*5
|
|
521 T=[0]*5
|
|
522 total_reads=0
|
|
523
|
|
524 for y in c_samples:
|
|
525 if "/" in y[0]:
|
|
526 length=[]
|
|
527 for x in y[0].split("/"):
|
|
528 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
|
|
529
|
|
530 best=min(length)
|
|
531 total_reads+=best[2]
|
|
532 for i in range(5):
|
|
533 if i<len(best[1]):
|
|
534 if best[1][i] == "A":
|
|
535 A[i]+=best[2]
|
|
536 elif best[1][i] == "C":
|
|
537 C[i]+=best[2]
|
|
538 elif best[1][i] == "G":
|
|
539 G[i]+=best[2]
|
|
540 else:
|
|
541 T[i]+=best[2]
|
|
542 else:
|
|
543 total_reads+=y[2]
|
|
544 for i in range(5):
|
|
545 if i<len(y[0].split("_")[-1]):
|
|
546 if y[0].split("_")[-1][i] == "A":
|
|
547 A[i]+=(y[2])
|
|
548 elif y[0].split("_")[-1][i] == "C":
|
|
549 C[i]+=(y[2])
|
|
550 elif y[0].split("_")[-1][i] == "G":
|
|
551 G[i]+=(y[2])
|
|
552 else:
|
|
553 T[i]+=y[2]
|
|
554
|
|
555 A[:] = [round(x*100,1) / total_reads for x in A]
|
|
556 C[:] = [round(x*100,1) / total_reads for x in C]
|
|
557 G[:] = [round(x*100,1) / total_reads for x in G]
|
|
558 T[:] = [round(x*100,1) / total_reads for x in T]
|
|
559
|
|
560 data = {'A':A,'C':C,'G':G,'T':T}
|
|
561 df = pd.DataFrame(data, index=[1,2,3,4,5])
|
|
562
|
|
563 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
|
|
564 h.grid(axis='y',linewidth=0.2)
|
|
565 plt.xticks(rotation=0, ha="right")
|
|
566 plt.ylabel("Counts (%)",fontsize=18)
|
|
567 plt.xlabel("Positions (nt)",fontsize=18)
|
|
568 plt.title(titlos,fontsize=20)
|
|
569 plt.tight_layout()
|
|
570 plt.savefig(file_bar, dpi=300)
|
|
571
|
|
572 import logomaker as lm
|
|
573 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
|
|
574
|
|
575 crp_logo.style_spines(visible=False)
|
|
576 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
|
|
577 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
|
|
578 crp_logo.ax.set_title(titlos,fontsize=18)
|
|
579 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
|
|
580 crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
|
|
581 crp_logo.ax.xaxis.set_ticks_position('none')
|
|
582 crp_logo.ax.xaxis.set_tick_params(pad=-1)
|
|
583 figure = plt.gcf()
|
|
584 figure.set_size_inches(6, 4)
|
|
585 crp_logo.fig.savefig(file_logo,dpi=300)
|
|
586
|
|
587 ##########################################################################################################################################################################################################
|
|
588
|
|
589 def logo_seq_non_red(merge_LH2E):
|
|
590
|
|
591 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
|
|
592
|
|
593 A=[0]*5
|
|
594 C=[0]*5
|
|
595 G=[0]*5
|
|
596 T=[0]*5
|
|
597
|
|
598 for y in c_samples:
|
|
599 if "/" in y[0]:
|
|
600 length=[]
|
|
601 for x in y[0].split("/"):
|
|
602 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
|
|
603
|
|
604 best=min(length)
|
|
605 for i in range(5):
|
|
606 if i<len(best[1]):
|
|
607 if best[1][i] == "A":
|
|
608 A[i]+=1
|
|
609 elif best[1][i] == "C":
|
|
610 C[i]+=1
|
|
611 elif best[1][i] == "G":
|
|
612 G[i]+=1
|
|
613 else:
|
|
614 T[i]+=1
|
|
615 else:
|
|
616 for i in range(5):
|
|
617 if i<len(y[0].split("_")[-1]):
|
|
618 if y[0].split("_")[-1][i] == "A":
|
|
619 A[i]+=1
|
|
620 elif y[0].split("_")[-1][i] == "C":
|
|
621 C[i]+=1
|
|
622 elif y[0].split("_")[-1][i] == "G":
|
|
623 G[i]+=1
|
|
624 else:
|
|
625 T[i]+=1
|
|
626
|
|
627 data = {'A':A,'C':C,'G':G,'T':T}
|
|
628 df = pd.DataFrame(data, index=[1,2,3,4,5])
|
|
629 h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
|
|
630 h.set_xlabel("Positions (nt)")
|
|
631 h.set_ylabel("Unique sequences")
|
|
632 plt.xticks(rotation=0, ha="right")
|
|
633 plt.tight_layout()
|
|
634 plt.savefig("bar2.png", dpi=300)
|
|
635
|
|
636
|
|
637 import logomaker as lm
|
|
638 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
|
|
639
|
|
640 crp_logo.style_spines(visible=False)
|
|
641 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
|
|
642 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
|
|
643
|
|
644 crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
|
|
645 crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
|
|
646 crp_logo.ax.xaxis.set_ticks_position('none')
|
|
647 crp_logo.ax.xaxis.set_tick_params(pad=-1)
|
|
648 crp_logo.ax.set_title("Non-redundant")
|
|
649 figure = plt.gcf()
|
|
650 crp_logo.fig.savefig('logo2.png', dpi=300)
|
|
651
|
|
652
|
|
653 ################################################################################################################################################################################################################
|
|
654
|
|
655
|
|
656
|
|
657
|
|
658
|
|
659 ################################################################################################################################################################
|
|
660
|
|
661 def pdf_before_DE(analysis):
|
|
662
|
|
663 # Import FPDF class
|
|
664 from fpdf import FPDF, fpdf
|
|
665
|
|
666 # Import glob module to find all the files matching a pattern
|
|
667 import glob
|
|
668
|
|
669 # Image extensions
|
|
670 if analysis=="2":
|
|
671 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
|
|
672 else:
|
|
673 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
|
|
674 # This list will hold the images file names
|
|
675 images = []
|
|
676
|
|
677 # Build the image list by merging the glob results (a list of files)
|
|
678 # for each extension. We are taking images from current folder.
|
|
679 for extension in image_extensions:
|
|
680 images.extend(glob.glob(extension))
|
|
681 #sys.exit(images)
|
|
682 # Create instance of FPDF class
|
|
683 pdf = FPDF('P', 'in', 'A4')
|
|
684 # Add new page. Without this you cannot create the document.
|
|
685 pdf.add_page()
|
|
686 # Set font to Arial, 'B'old, 16 pts
|
|
687 pdf.set_font('Arial', 'B', 20.0)
|
|
688
|
|
689 # Page header
|
|
690 pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C')
|
|
691 pdf.ln(0.7)
|
|
692 pdf.set_font('Arial','', 16.0)
|
|
693 pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C')
|
|
694
|
|
695 # Smaller font for image captions
|
|
696 pdf.set_font('Arial', '', 11.0)
|
|
697
|
|
698 # Image caption
|
|
699 pdf.ln(0.5)
|
|
700
|
|
701 yh=FPDF.get_y(pdf)
|
|
702 pdf.image(images[0],x=0.3,w=4, h=3)
|
|
703 pdf.image(images[1],x=4,y=yh, w=4, h=3)
|
|
704 pdf.ln(0.3)
|
|
705
|
|
706 # Image caption
|
|
707 pdf.cell(0.2)
|
|
708 pdf.cell(3.0, 0.0, " Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)")
|
|
709 pdf.ln(0.2)
|
|
710 pdf.cell(0.2)
|
|
711 pdf.cell(3.0, 0.0, " and Treated (right) groups")
|
|
712
|
|
713
|
|
714 pdf.ln(0.5)
|
|
715 h1=FPDF.get_y(pdf)
|
|
716 pdf.image(images[2],x=1, w=6.5, h=5)
|
|
717 h2=FPDF.get_y(pdf)
|
|
718 FPDF.set_y(pdf,h1+0.2)
|
|
719 pdf.set_font('Arial','', 14.0)
|
|
720 pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C')
|
|
721 pdf.set_font('Arial', '', 11.0)
|
|
722 FPDF.set_y(pdf,h2)
|
|
723 FPDF.set_y(pdf,9.5)
|
|
724 # Image caption
|
|
725 pdf.cell(0.2)
|
|
726 if analysis=="2":
|
|
727 pdf.cell(3.0, 0.0, " Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads")
|
|
728 pdf.ln(0.2)
|
|
729 pdf.cell(0.2)
|
|
730 pdf.cell(3.0, 0.0, " in Control (left) and treated (right) groups")
|
|
731 else:
|
|
732 pdf.cell(3.0, 0.0, " Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in")
|
|
733 pdf.ln(0.2)
|
|
734 pdf.cell(0.2)
|
|
735 pdf.cell(3.0, 0.0, " Control (left) and treated (right) groups")
|
|
736
|
|
737
|
|
738
|
|
739 pdf.add_page()
|
|
740 pdf.set_font('Arial', 'B', 16.0)
|
|
741 pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C')
|
|
742 pdf.ln(0.7)
|
|
743 pdf.set_font('Arial', 'B', 12.0)
|
|
744 pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C')
|
|
745 pdf.ln(0.5)
|
|
746 pdf.image(images[3],x=1.5, w=5.5, h=4)
|
|
747 pdf.ln(0.6)
|
|
748 pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C')
|
|
749 pdf.set_font('Arial', '', 12.0)
|
|
750 pdf.ln(0.2)
|
|
751 pdf.image(images[4],x=1.5, w=5.5, h=4)
|
|
752 pdf.ln(0.3)
|
|
753 pdf.set_font('Arial', '', 11.0)
|
|
754 pdf.cell(0.2)
|
|
755 pdf.cell(3.0, 0.0, " * IsomiRs potentialy initiated from multiple loci")
|
|
756
|
|
757
|
|
758 if analysis=="2":
|
|
759 pdf.add_page('L')
|
|
760
|
|
761 pdf.set_font('Arial', 'B', 16.0)
|
|
762 pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C')
|
|
763 pdf.ln(0.5)
|
|
764 pdf.set_font('Arial', 'B', 12.0)
|
|
765 pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C')
|
|
766 pdf.ln(0.7)
|
|
767
|
|
768 yh=FPDF.get_y(pdf)
|
|
769 pdf.image(images[5],x=1.5,w=3.65, h=2.65)
|
|
770 pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
|
|
771 pdf.ln(0.5)
|
|
772 yh=FPDF.get_y(pdf)
|
|
773 pdf.image(images[6],x=1.5,w=3.65, h=2.65)
|
|
774 pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
|
|
775
|
|
776 pdf.close()
|
|
777 pdf.output('report1.pdf','F')
|
|
778
|
|
779
|
|
780
|
|
781
|
|
782 #############################################################################################################################################################3
|
|
783
|
|
784 def pdf_after_DE(analysis):
|
|
785
|
|
786 # Import FPDF class
|
|
787 from fpdf import FPDF
|
|
788
|
|
789 # Import glob module to find all the files matching a pattern
|
|
790 import glob
|
|
791
|
|
792 # Image extensions
|
|
793 image_extensions = ("tem.png","non.png","a2.png")
|
|
794
|
|
795 # This list will hold the images file names
|
|
796 images = []
|
|
797
|
|
798 # Build the image list by merging the glob results (a list of files)
|
|
799 # for each extension. We are taking images from current folder.
|
|
800 for extension in image_extensions:
|
|
801 images.extend(glob.glob(extension))
|
|
802
|
|
803 # Create instance of FPDF class
|
|
804 pdf = FPDF('P', 'in', 'letter')
|
|
805 # Add new page. Without this you cannot create the document.
|
|
806 pdf.add_page()
|
|
807 # Set font to Arial, 'B'old, 16 pts
|
|
808 pdf.set_font('Arial', 'B', 16.0)
|
|
809
|
|
810 # Page header
|
|
811 pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C')
|
|
812 #pdf.ln(0.25)
|
|
813
|
|
814 pdf.ln(0.7)
|
|
815 pdf.set_font('Arial','B', 12.0)
|
|
816 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C')
|
|
817
|
|
818
|
|
819 # Smaller font for image captions
|
|
820 pdf.set_font('Arial', '', 10.0)
|
|
821
|
|
822 # Image caption
|
|
823 pdf.ln(0.4)
|
|
824 pdf.image(images[0],x=0.8, w=7, h=8)
|
|
825 pdf.ln(0.3)
|
|
826
|
|
827 if analysis=="2":
|
|
828
|
|
829 pdf.add_page()
|
|
830 pdf.ln(0.7)
|
|
831 pdf.set_font('Arial','B', 12.0)
|
|
832 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C')
|
|
833 pdf.ln(0.4)
|
|
834 pdf.image(images[1],x=0.5, w=7.5, h=6.5)
|
|
835
|
|
836 pdf.add_page()
|
|
837 pdf.ln(0.5)
|
|
838 pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C')
|
|
839 pdf.ln(0.4)
|
|
840 pdf.image(images[2],x=0.8, w=7, h=8)
|
|
841 pdf.ln(0.3)
|
|
842
|
|
843
|
|
844 pdf.output('report2.pdf', 'F')
|
|
845
|
|
846
|
|
847
|