| 28 | 1 import itertools | 
|  | 2 import pandas as pd | 
|  | 3 from math import pi | 
|  | 4 import numpy as np | 
|  | 5 import matplotlib.pyplot as plt | 
|  | 6 import math | 
|  | 7 import logomaker as lm | 
|  | 8 from fpdf import FPDF, fpdf | 
|  | 9 import glob | 
|  | 10 | 
|  | 11 ################################################################################################################################################################# | 
|  | 12 | 
|  | 13 def pie_non_temp(merge_con,merge_non_con,merge_tre,merge_non_tre,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts,group_name1,group_name2): | 
|  | 14 | 
|  | 15     c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con] | 
|  | 16     t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre] | 
|  | 17     c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_con] | 
|  | 18     t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_tre] | 
|  | 19 | 
|  | 20     c_templ = 0 | 
|  | 21     c_tem_counts = 0 | 
|  | 22     c_mature = 0 | 
|  | 23     c_mat_counts = 0 | 
|  | 24     t_templ = 0 | 
|  | 25     t_tem_counts = 0 | 
|  | 26     t_mature = 0 | 
|  | 27     t_mat_counts = 0 | 
|  | 28 | 
|  | 29     c_non = len(c_non_samples) | 
|  | 30     c_non_counts = sum(x[2] for x in c_non_samples) | 
|  | 31     t_non = len(t_non_samples) | 
|  | 32     t_non_counts = sum(x[2] for x in t_non_samples) | 
|  | 33 | 
|  | 34     c_unmap = c_unmap - c_non | 
|  | 35     t_unmap = c_unmap - t_non | 
|  | 36 | 
|  | 37     c_unmap_counts=c_unmap_counts - c_non_counts | 
|  | 38     t_unmap_counts=t_unmap_counts - t_non_counts | 
|  | 39 | 
|  | 40 | 
|  | 41     for x in c_samples: | 
|  | 42 | 
|  | 43         if "/" not in x[0]: | 
|  | 44             if "chr" in  x[0].split("_")[-1]: | 
|  | 45                c_mature+=1 | 
|  | 46                c_mat_counts += x[2] | 
|  | 47             else: | 
|  | 48                c_templ+=1 | 
|  | 49                c_tem_counts += x[2] | 
|  | 50         else: | 
|  | 51            f=0 | 
|  | 52            for y in x[0].split("/"): | 
|  | 53                if "chr" in y.split("_")[-1]: | 
|  | 54                   c_mature+=1 | 
|  | 55                   c_mat_counts += x[2] | 
|  | 56                   f=1 | 
|  | 57                   break | 
|  | 58            if f==0: | 
|  | 59               c_templ+=1 | 
|  | 60               c_tem_counts += x[2] | 
|  | 61 | 
|  | 62     for x in t_samples: | 
|  | 63 | 
|  | 64         if "/" not in x[0]: | 
|  | 65             if "chr" in  x[0].split("_")[-1]: | 
|  | 66                t_mature+=1 | 
|  | 67                t_mat_counts += x[2] | 
|  | 68             else: | 
|  | 69                t_templ+=1 | 
|  | 70                t_tem_counts += x[2] | 
|  | 71         else: | 
|  | 72            f=0 | 
|  | 73            for y in x[0].split("/"): | 
|  | 74                if "chr" in y.split("_")[-1]: | 
|  | 75                   t_mature+=1 | 
|  | 76                   t_mat_counts += x[2] | 
|  | 77                   f=1 | 
|  | 78                   break | 
|  | 79            if f==0: | 
|  | 80               t_templ+=1 | 
|  | 81               t_tem_counts += x[2] | 
|  | 82 | 
|  | 83     fig = plt.figure(figsize=(7,5)) | 
|  | 84     labels = 'miRNA RefSeq','templated', 'unassigned','non-templated' | 
|  | 85     sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts] | 
|  | 86     colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | 
|  | 87     ax1 = plt.subplot2grid((1,2),(0,0)) | 
|  | 88     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 89     [x.set_fontsize(10) for x in texts] | 
|  | 90     plt.title(group_name1.capitalize() + ' group (reads)',fontsize=12) | 
|  | 91     labels = 'miRNA RefSeq','templated', 'unassigned','non-templated' | 
|  | 92     sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts] | 
|  | 93     colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | 
|  | 94     ax2 = plt.subplot2grid((1,2),(0,1)) | 
|  | 95     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 96     [x.set_fontsize(10) for x in texts] | 
|  | 97     plt.title(group_name2.capitalize() + ' group (reads)', fontsize=12) | 
|  | 98     plt.savefig('pie_non.png',dpi=300) | 
|  | 99 | 
|  | 100 ###################################################################################################################################################### | 
|  | 101 | 
|  | 102 | 
|  | 103 def pie_temp(merge_con,c_unmap,c_unmap_counts,merge_tre,t_unmap,t_unmap_counts,group_name1,group_name2): | 
|  | 104 | 
|  | 105     c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con] | 
|  | 106     t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre] | 
|  | 107 | 
|  | 108     c_templ = 0 | 
|  | 109     c_tem_counts = 0 | 
|  | 110     c_mature = 0 | 
|  | 111     c_mat_counts = 0 | 
|  | 112     t_templ = 0 | 
|  | 113     t_tem_counts = 0 | 
|  | 114     t_mature = 0 | 
|  | 115     t_mat_counts = 0 | 
|  | 116 | 
|  | 117     for x in c_samples: | 
|  | 118 | 
|  | 119         if "/" not in x[0]: | 
|  | 120             if "chr" in  x[0].split("_")[-1]: | 
|  | 121                c_mature+=1 | 
|  | 122                c_mat_counts += x[2] | 
|  | 123             else: | 
|  | 124                c_templ+=1 | 
|  | 125                c_tem_counts += x[2] | 
|  | 126         else: | 
|  | 127            f=0 | 
|  | 128            for y in x[0].split("/"): | 
|  | 129                if "chr" in y.split("_")[-1]: | 
|  | 130                   c_mature+=1 | 
|  | 131                   c_mat_counts += x[2] | 
|  | 132                   f=1 | 
|  | 133                   break | 
|  | 134            if f==0: | 
|  | 135               c_templ+=1 | 
|  | 136               c_tem_counts += x[2] | 
|  | 137 | 
|  | 138     for x in t_samples: | 
|  | 139 | 
|  | 140         if "/" not in x[0]: | 
|  | 141             if "chr" in  x[0].split("_")[-1]: | 
|  | 142                t_mature+=1 | 
|  | 143                t_mat_counts += x[2] | 
|  | 144             else: | 
|  | 145                t_templ+=1 | 
|  | 146                t_tem_counts += x[2] | 
|  | 147         else: | 
|  | 148            f=0 | 
|  | 149            for y in x[0].split("/"): | 
|  | 150                if "chr" in y.split("_")[-1]: | 
|  | 151                   t_mature+=1 | 
|  | 152                   t_mat_counts += x[2] | 
|  | 153                   f=1 | 
|  | 154                   break | 
|  | 155            if f==0: | 
|  | 156               t_templ+=1 | 
|  | 157               t_tem_counts += x[2] | 
|  | 158 | 
|  | 159 | 
|  | 160     fig = plt.figure() | 
|  | 161     labels = 'miRNA RefSeq','templated', 'unassigned' | 
|  | 162     sizes = [c_mat_counts, c_tem_counts, c_unmap_counts] | 
|  | 163     colors = ['gold', 'yellowgreen', 'lightskyblue'] | 
|  | 164     explode = (0.2, 0.05, 0.1) | 
|  | 165     ax1 = plt.subplot2grid((1,2),(0,0)) | 
|  | 166     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 167     [x.set_fontsize(10) for x in texts] | 
|  | 168     plt.title(group_name1.capitalize() + ' group (reads)', fontsize=12) | 
|  | 169     labels = 'miRNA RefSeq','templated', 'unassigned' | 
|  | 170     sizes = [t_mat_counts, t_tem_counts, t_unmap_counts] | 
|  | 171     colors = ['gold', 'yellowgreen', 'lightskyblue'] | 
|  | 172     explode = (0.2, 0.05, 0.1) | 
|  | 173     ax2 = plt.subplot2grid((1,2),(0,1)) | 
|  | 174     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 175     [x.set_fontsize(10) for x in texts] | 
|  | 176     plt.title(group_name2.capitalize() + ' group (reads)',fontsize = 12) | 
|  | 177     plt.savefig('pie_tem.png',dpi=300) | 
|  | 178 | 
|  | 179 ################################################################################################################################################################################################################### | 
|  | 180 | 
|  | 181 | 
|  | 182 def make_spider(merge_con,merge_tre,group_name1,group_name2): | 
|  | 183 | 
|  | 184   c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con] | 
|  | 185   t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre] | 
|  | 186 | 
|  | 187   c_5 = 0 | 
|  | 188   c_5_counts = 0 | 
|  | 189   c_3 = 0 | 
|  | 190   c_3_counts = 0 | 
|  | 191   c_both =0 | 
|  | 192   c_both_counts=0 | 
|  | 193   c_mature = 0 | 
|  | 194   c_mat_counts = 0 | 
|  | 195   c_exception=0 | 
|  | 196   c_exception_counts=0 | 
|  | 197 | 
|  | 198 | 
|  | 199   t_5 = 0 | 
|  | 200   t_5_counts = 0 | 
|  | 201   t_3 = 0 | 
|  | 202   t_3_counts = 0 | 
|  | 203   t_both = 0 | 
|  | 204   t_both_counts = 0 | 
|  | 205   t_mature = 0 | 
|  | 206   t_mat_counts = 0 | 
|  | 207   t_exception = 0 | 
|  | 208   t_exception_counts=0 | 
|  | 209 | 
|  | 210   for x in c_samples: | 
|  | 211 | 
|  | 212       if "/" not in x[0]: | 
|  | 213           if "chr" in  x[0].split("_")[-1]: | 
|  | 214              c_mature+=1 | 
|  | 215              c_mat_counts += x[2] | 
|  | 216           elif 0 == int(x[0].split("_")[-1]): | 
|  | 217              c_5+=1 | 
|  | 218              c_5_counts += x[2] | 
|  | 219           elif 0 == int(x[0].split("_")[-2]): | 
|  | 220              c_3+=1 | 
|  | 221              c_3_counts += x[2] | 
|  | 222           else: | 
|  | 223              c_both+=1 | 
|  | 224              c_both_counts+=x[2] | 
|  | 225 | 
|  | 226       else: | 
|  | 227          f=0 | 
|  | 228          for y in x[0].split("/"): | 
|  | 229              if "chr" in y.split("_")[-1]: | 
|  | 230                 c_mature+=1 | 
|  | 231                 c_mat_counts += x[2] | 
|  | 232                 f=1 | 
|  | 233                 break | 
|  | 234          if f==0: | 
|  | 235             for y in x[0].split("/"): | 
|  | 236                 c_exception+=1 | 
|  | 237                 c_exception_counts += x[2] | 
|  | 238 | 
|  | 239 | 
|  | 240   for x in t_samples: | 
|  | 241 | 
|  | 242       if "/" not in x[0]: | 
|  | 243           if "chr" in  x[0].split("_")[-1]: | 
|  | 244              t_mature+=1 | 
|  | 245              t_mat_counts += x[2] | 
|  | 246           elif 0 == int(x[0].split("_")[-1]): | 
|  | 247              t_5+=1 | 
|  | 248              t_5_counts += x[2] | 
|  | 249           elif 0 == int(x[0].split("_")[-2]): | 
|  | 250              t_3+=1 | 
|  | 251              t_3_counts += x[2] | 
|  | 252           else: | 
|  | 253              t_both+=1 | 
|  | 254              t_both_counts+=x[2] | 
|  | 255 | 
|  | 256       else: | 
|  | 257          f=0 | 
|  | 258          for y in x[0].split("/"): | 
|  | 259              if "chr" in y.split("_")[-1]: | 
|  | 260                 t_mature+=1 | 
|  | 261                 t_mat_counts += x[2] | 
|  | 262                 f=1 | 
|  | 263                 break | 
|  | 264          if f==0: | 
|  | 265             for y in x[0].split("/"): | 
|  | 266                 t_exception+=1 | 
|  | 267                 t_exception_counts += x[2] | 
|  | 268 | 
|  | 269 | 
|  | 270   c_all = c_5+c_3+c_both+c_mature+c_exception | 
|  | 271   c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts | 
|  | 272 | 
|  | 273   t_all = t_5+t_3+t_both+t_mature + t_exception | 
|  | 274   t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts | 
|  | 275 | 
|  | 276   c_5 = round(c_5/c_all*100,2) | 
|  | 277   c_3 = round(c_3/c_all*100,2) | 
|  | 278   c_both = round(c_both/c_all*100,2) | 
|  | 279   c_mature = round(c_mature/c_all*100,2) | 
|  | 280   c_exception = round(c_exception/c_all*100,2) | 
|  | 281 | 
|  | 282   c_5_counts = round(c_5_counts/c_all_counts*100,2) | 
|  | 283   c_3_counts = round(c_3_counts/c_all_counts*100,2) | 
|  | 284   c_both_counts = round(c_both_counts/c_all_counts*100,2) | 
|  | 285   c_mat_counts = round(c_mat_counts/c_all_counts*100,2) | 
|  | 286   c_exception_counts = round(c_exception_counts/c_all_counts*100,2) | 
|  | 287 | 
|  | 288   t_5 = round(t_5/t_all*100,2) | 
|  | 289   t_3 = round(t_3/t_all*100,2) | 
|  | 290   t_both = round(t_both/t_all*100,2) | 
|  | 291   t_mature = round(t_mature/t_all*100,2) | 
|  | 292   t_exception = round(t_exception/t_all*100,2) | 
|  | 293 | 
|  | 294   t_5_counts = round(t_5_counts/t_all_counts*100,2) | 
|  | 295   t_3_counts = round(t_3_counts/t_all_counts*100,2) | 
|  | 296   t_both_counts = round(t_both_counts/t_all_counts*100,2) | 
|  | 297   t_mat_counts = round(t_mat_counts/t_all_counts*100,2) | 
|  | 298   t_exception_counts = round(t_exception_counts/t_all_counts*100,2) | 
|  | 299 | 
|  | 300   radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception) | 
|  | 301   radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts) | 
|  | 302 | 
|  | 303   df=pd.DataFrame({ | 
|  | 304   'group':[group_name1.capitalize(),group_name2.capitalize()], | 
|  | 305   """5'3'-isomiRs""":[c_both,t_both], | 
|  | 306   """3'-isomiRs""":[c_3,t_3], | 
|  | 307   'RefSeq miRNA':[c_mature,t_mature], | 
|  | 308   """5'-isomiRs""":[c_5,t_5], | 
|  | 309   'others*':[c_exception,t_exception]}) | 
|  | 310 | 
|  | 311   df1=pd.DataFrame({ | 
|  | 312   'group':[group_name1.capitalize(),group_name2.capitalize()], | 
|  | 313   """5'3'-isomiRs""":[c_both_counts,t_both_counts], | 
|  | 314   """3'-isomiRs""":[c_3_counts,t_3_counts], | 
|  | 315   'RefSeq miRNA':[c_mat_counts,t_mat_counts], | 
|  | 316   """5'-isomiRs""":[c_5_counts,t_5_counts], | 
|  | 317   'others*':[c_exception_counts,t_exception_counts]}) | 
|  | 318 | 
|  | 319   spider_last(df,radar_max,1,group_name1,group_name2) | 
|  | 320   spider_last(df1,radar_max_counts,2,group_name1,group_name2) | 
|  | 321 | 
|  | 322 ##################################################################################################################################################### | 
|  | 323 | 
|  | 324 def spider_last(df,radar_max,flag,group_name1,group_name2): | 
|  | 325   # ------- PART 1: Create background | 
|  | 326   fig = plt.figure() | 
|  | 327   # number of variable | 
|  | 328   categories=list(df)[1:] | 
|  | 329   N = len(categories) | 
|  | 330 | 
|  | 331   # What will be the angle of each axis in the plot? (we divide the plot / number of variable) | 
|  | 332   angles = [n / float(N) * 2 * pi for n in range(N)] | 
|  | 333   angles += angles[:1] | 
|  | 334 | 
|  | 335   # Initialise the spider plot | 
|  | 336   ax = plt.subplot(111, polar=True) | 
|  | 337 | 
|  | 338   # If you want the first axis to be on top: | 
|  | 339   ax.set_theta_offset(pi/2) | 
|  | 340   ax.set_theta_direction(-1) | 
|  | 341 | 
|  | 342   # Draw one axe per variable + add labels labels yet | 
|  | 343   plt.xticks(angles[:-1], categories, fontsize=13) | 
|  | 344 | 
|  | 345   # Draw ylabels | 
|  | 346   radar_max=round(radar_max+radar_max*0.1) | 
|  | 347   mul=len(str(radar_max))-1 | 
|  | 348   maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul) | 
|  | 349   sep = round(maxi/4) | 
|  | 350   plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=12) | 
|  | 351   plt.ylim(0, maxi) | 
|  | 352 | 
|  | 353   # ------- PART 2: Add plots | 
|  | 354 | 
|  | 355   # Plot each individual = each line of the data | 
|  | 356   # I don't do a loop, because plotting more than 2 groups makes the chart unreadable | 
|  | 357 | 
|  | 358   # Ind1 | 
|  | 359   values=df.loc[0].drop('group').values.flatten().tolist() | 
|  | 360   values += values[:1] | 
|  | 361   ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label=group_name1.capitalize()) | 
|  | 362   ax.fill(angles, values, 'b', alpha=0.1) | 
|  | 363 | 
|  | 364   # Ind2 | 
|  | 365   values=df.loc[1].drop('group').values.flatten().tolist() | 
|  | 366   values += values[:1] | 
|  | 367   ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label=group_name2.capitalize()) | 
|  | 368   ax.fill(angles, values, 'r', alpha=0.1) | 
|  | 369 | 
|  | 370   # Add legend | 
|  | 371   if flag==1: | 
|  | 372      plt.legend(loc='upper right', prop={'size': 11}, bbox_to_anchor=(0.0, 0.1)) | 
|  | 373      plt.savefig('spider_non_red.png',dpi=300) | 
|  | 374   else: | 
|  | 375      plt.legend(loc='upper right', prop={'size': 11}, bbox_to_anchor=(0.0, 0.1)) | 
|  | 376      plt.savefig('spider_red.png',dpi=300) | 
|  | 377 | 
|  | 378 | 
|  | 379 ############################################################################################################################################################################################################# | 
|  | 380 | 
|  | 381 def hist_red(samples,flag,group_name): | 
|  | 382 | 
|  | 383     lengths=[] | 
|  | 384     cat=[] | 
|  | 385     total_reads=0 | 
|  | 386     seq=[] | 
|  | 387 | 
|  | 388     if flag == "c": | 
|  | 389         title = "Length distribution of "+ group_name.lower() +" group (redundant reads)" | 
|  | 390     if flag == "t": | 
|  | 391         title = "Length distribution of "+ group_name.lower() +" group (redundant reads)" | 
|  | 392 | 
|  | 393     # classification of the sequences on two categories mapped or unmapped | 
|  | 394     for i in samples: | 
|  | 395         for x in i: | 
|  | 396             lengths.append(x[3]) | 
|  | 397             if x[1]=="0": | 
|  | 398                seq.append([x[3],x[0].split("-")[1],"Mapped"]) | 
|  | 399                cat.append("Mapped") | 
|  | 400             if x[1] == "4": | 
|  | 401                seq.append([x[3],x[0].split("-")[1],"Unassigned"]) | 
|  | 402                cat.append("Unassigned") | 
|  | 403 | 
|  | 404     # Keep lengths below 35nts | 
|  | 405     uni_len=list(set(lengths)) | 
|  | 406     uni_len=[x for x in uni_len if x<=35] | 
|  | 407 | 
|  | 408     # Remove duplicates from sequences | 
|  | 409     seq.sort() | 
|  | 410     uni_seq=list(seq for seq,_ in itertools.groupby(seq)) | 
|  | 411 | 
|  | 412     # Calculation of the reads per group (mapped or unmapped) | 
|  | 413     total_reads+=sum([int(x[1]) for x in uni_seq]) | 
|  | 414     map_reads=[] | 
|  | 415     unmap_reads=[] | 
|  | 416     length=[] | 
|  | 417     for y in uni_len: | 
|  | 418         map_temp=0 | 
|  | 419         unmap_temp=0 | 
|  | 420         for x in uni_seq: | 
|  | 421             if x[0]==y and x[2]=="Mapped": | 
|  | 422                map_temp+=int(x[1]) | 
|  | 423             if x[0]==y and x[2]=="Unassigned": | 
|  | 424                unmap_temp+=int(x[1]) | 
|  | 425         length.append(y) | 
|  | 426         map_reads.append(round(map_temp/total_reads*100,2))      # percentage of mapped reads over total number of sequences | 
|  | 427         unmap_reads.append(round(unmap_temp/total_reads*100,2))  # percentage of unmapped reads over total number of sequences | 
|  | 428 | 
|  | 429     # Generation of the graph | 
|  | 430     ylim=max([sum(x) for x in zip(unmap_reads, map_reads)]) | 
|  | 431     ylim=ylim+ylim*20/100 | 
|  | 432     fig, ax = plt.subplots() | 
|  | 433     width=0.8 | 
|  | 434     ax.bar(length, unmap_reads, width, label='Unassigned') | 
|  | 435     h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped') | 
|  | 436     plt.xticks(np.arange(length[0], length[-1]+1, 1)) | 
|  | 437     plt.yticks(np.arange(0, ylim, 5)) | 
|  | 438     plt.xlabel('Length (nt)',fontsize=14) | 
|  | 439     plt.ylabel('Percentage',fontsize=14) | 
|  | 440     plt.title(title,fontsize=14) | 
|  | 441     ax.legend() | 
|  | 442     plt.ylim([0, ylim]) | 
|  | 443     ax.grid(axis='y',linewidth=0.2) | 
|  | 444 | 
|  | 445     # Save of the graph | 
|  | 446     if flag=='c': | 
|  | 447        plt.savefig('c_hist_red.png',dpi=300) | 
|  | 448 | 
|  | 449     if flag=='t': | 
|  | 450        plt.savefig('t_hist_red.png',dpi=300) | 
|  | 451 | 
|  | 452 ################################################################################################################# | 
|  | 453 | 
|  | 454 def logo_seq_red(merge, flag, group_name): | 
|  | 455 | 
|  | 456     if flag=="c": | 
|  | 457        titlos= group_name.capitalize() + " group (redundant)" | 
|  | 458        file_logo="c_logo.png" | 
|  | 459        file_bar="c_bar.png" | 
|  | 460     if flag=="t": | 
|  | 461        titlos= group_name.capitalize() + " group (redundant)" | 
|  | 462        file_logo="t_logo.png" | 
|  | 463        file_bar="t_bar.png" | 
|  | 464 | 
|  | 465     c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge] | 
|  | 466 | 
|  | 467     A=[0]*3 | 
|  | 468     C=[0]*3 | 
|  | 469     G=[0]*3 | 
|  | 470     T=[0]*3 | 
|  | 471     total_reads=0 | 
|  | 472 | 
|  | 473     for y in c_samples: | 
|  | 474         if "/" in y[0]: | 
|  | 475             length=[] | 
|  | 476             for x in y[0].split("/"): | 
|  | 477                 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | 
|  | 478 | 
|  | 479             best=min(length) | 
|  | 480             total_reads+=best[2] | 
|  | 481             for i in range(3): | 
|  | 482                 if i<len(best[1]): | 
|  | 483                     if best[1][i] == "A": | 
|  | 484                         A[i]+=best[2] | 
|  | 485                     elif best[1][i] == "C": | 
|  | 486                         C[i]+=best[2] | 
|  | 487                     elif best[1][i] == "G": | 
|  | 488                         G[i]+=best[2] | 
|  | 489                     else: | 
|  | 490                         T[i]+=best[2] | 
|  | 491         else: | 
|  | 492             total_reads+=y[2] | 
|  | 493             for i in range(3): | 
|  | 494                 if i<len(y[0].split("_")[-1]): | 
|  | 495                     if y[0].split("_")[-1][i] == "A": | 
|  | 496                         A[i]+=(y[2]) | 
|  | 497                     elif y[0].split("_")[-1][i] == "C": | 
|  | 498                         C[i]+=(y[2]) | 
|  | 499                     elif y[0].split("_")[-1][i] == "G": | 
|  | 500                         G[i]+=(y[2]) | 
|  | 501                     else: | 
|  | 502                         T[i]+=y[2] | 
|  | 503 | 
|  | 504     A[:] = [round(x*100,1) / total_reads for x in A] | 
|  | 505     C[:] = [round(x*100,1) / total_reads for x in C] | 
|  | 506     G[:] = [round(x*100,1) / total_reads for x in G] | 
|  | 507     T[:] = [round(x*100,1) / total_reads for x in T] | 
|  | 508 | 
|  | 509 | 
|  | 510 | 
|  | 511     data = {'A':A,'C':C,'G':G,'T':T} | 
|  | 512     df = pd.DataFrame(data, index=[1,2,3]) | 
|  | 513     h=df.plot.bar(color=tuple(["g", "b","gold","r"]) ) | 
|  | 514     h.grid(axis='y',linewidth=0.2) | 
|  | 515     plt.xticks(rotation=0, ha="right") | 
|  | 516     plt.ylabel("Counts (%)",fontsize=18) | 
|  | 517     plt.xlabel("Numbers of additional nucleotides",fontsize=18) | 
|  | 518     plt.title(titlos,fontsize=20) | 
|  | 519     plt.tight_layout() | 
|  | 520     plt.savefig(file_bar, dpi=300) | 
|  | 521 | 
|  | 522     crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | 
|  | 523     crp_logo.style_spines(visible=False) | 
|  | 524     crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | 
|  | 525     crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | 
|  | 526 | 
|  | 527     # style using Axes methods | 
|  | 528     crp_logo.ax.set_title(titlos,fontsize=18) | 
|  | 529     crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5) | 
|  | 530     crp_logo.ax.set_xlabel("Numbers of additional nucleotides",fontsize=16, labelpad=5) | 
|  | 531     crp_logo.ax.xaxis.set_ticks_position('none') | 
|  | 532     crp_logo.ax.xaxis.set_tick_params(pad=-1) | 
|  | 533     figure = plt.gcf() | 
|  | 534     figure.set_size_inches(6, 4) | 
|  | 535     crp_logo.fig.savefig(file_logo,dpi=300) | 
|  | 536 | 
|  | 537 ########################################################################################################################################################################################################## | 
|  | 538 | 
|  | 539 def pdf_before_DE(analysis,group_name1,group_name2): | 
|  | 540 | 
|  | 541   # Image extensions | 
|  | 542   if analysis=="2": | 
|  | 543      image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png") | 
|  | 544   else: | 
|  | 545      image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png") | 
|  | 546   # This list will hold the images file names | 
|  | 547   images = [] | 
|  | 548 | 
|  | 549   # Build the image list by merging the glob results (a list of files) | 
|  | 550   # for each extension. We are taking images from current folder. | 
|  | 551   for extension in image_extensions: | 
|  | 552       images.extend(glob.glob(extension)) | 
|  | 553 | 
|  | 554   # Create instance of FPDF class | 
|  | 555   pdf = FPDF('P', 'in', 'A4') | 
|  | 556   # Add new page. Without this you cannot create the document. | 
|  | 557   pdf.add_page() | 
|  | 558   # Set font to Arial, 'B'old, 16 pts | 
|  | 559   pdf.set_font('Arial', 'B', 20.0) | 
|  | 560 | 
|  | 561   # Page header | 
|  | 562   pdf.cell(pdf.w-0.5, 0.5, 'IsomiR profile report',align='C') | 
|  | 563   pdf.ln(0.7) | 
|  | 564   pdf.set_font('Arial','B', 16.0) | 
|  | 565   pdf.cell(pdf.w-0.5, 0.5, 'sRNA length distribution',align='C') | 
|  | 566 | 
|  | 567   # Smaller font for image captions | 
|  | 568   pdf.set_font('Arial', '', 11.0) | 
|  | 569   pdf.ln(0.5) | 
|  | 570 | 
|  | 571   yh=FPDF.get_y(pdf) | 
|  | 572   pdf.image(images[0],x=0.3,w=4, h=3) | 
|  | 573   pdf.image(images[1],x=4,y=yh, w=4, h=3) | 
|  | 574   pdf.ln(0.3) | 
|  | 575 | 
|  | 576   pdf.cell(0.2) | 
|  | 577   pdf.cell(3.0, 0.0, "  Mapped and unmapped reads to custom precursor arm reference DB (5p and 3p arms) in "+group_name1.lower()) | 
|  | 578   pdf.ln(0.2) | 
|  | 579   pdf.cell(0.2) | 
|  | 580   pdf.cell(3.0, 0.0, "  (left) and "+group_name2.lower()+" (right) groups") | 
|  | 581 | 
|  | 582   pdf.ln(0.5) | 
|  | 583   h1=FPDF.get_y(pdf) | 
|  | 584   pdf.image(images[2],x=1, w=6.5, h=5) | 
|  | 585   h2=FPDF.get_y(pdf) | 
|  | 586   FPDF.set_y(pdf,h1+0.2) | 
|  | 587   pdf.set_font('Arial','B', 16.0) | 
|  | 588   if analysis=="2": | 
|  | 589      pdf.cell(pdf.w-0.5, 0.5, 'Templated and non-templated isomiRs',align='C') | 
|  | 590   else: | 
|  | 591      pdf.cell(pdf.w-0.5, 0.5, 'Templated isomiRs',align='C') | 
|  | 592   pdf.set_font('Arial', '', 11.0) | 
|  | 593   FPDF.set_y(pdf,h2) | 
|  | 594   FPDF.set_y(pdf,9.5) | 
|  | 595   pdf.cell(0.2) | 
|  | 596 | 
|  | 597   if analysis=="2": | 
|  | 598      pdf.cell(3.0, 0.0, "  RefSeq miRNAs, templated isomiRs, non-templated isomiRs and unassigned sequences as percentage") | 
|  | 599      pdf.ln(0.2) | 
|  | 600      pdf.cell(0.2) | 
|  | 601      pdf.cell(3.0, 0.0, "  of total sRNA reads in "+group_name1.lower()+" (left) and "+group_name2.lower()+" (right) groups") | 
|  | 602   else: | 
|  | 603      pdf.cell(3.0, 0.0, "  RefSeq miRNAS, templated isomiRs and unassigned sequences as percentage of total sRNA reads in") | 
|  | 604      pdf.ln(0.2) | 
|  | 605      pdf.cell(0.2) | 
|  | 606      pdf.cell(3.0, 0.0, "  "+group_name1.lower()+" (left) and "+group_name2.lower() + " (right) groups") | 
|  | 607 | 
|  | 608   pdf.add_page() | 
|  | 609   pdf.set_font('Arial', 'B', 18.0) | 
|  | 610   pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR subtypes",align='C') | 
|  | 611   pdf.ln(0.7) | 
|  | 612   pdf.set_font('Arial', 'B', 14.0) | 
|  | 613   pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR profile (redundant reads)",align='C') | 
|  | 614   pdf.ln(0.5) | 
|  | 615   pdf.image(images[3],x=1.5, w=5.5, h=4) | 
|  | 616   pdf.ln(0.6) | 
|  | 617   pdf.cell(pdf.w-0.5, 0.0, "Templated isomiR profile (non-redundant reads)",align='C') | 
|  | 618   pdf.set_font('Arial', '', 12.0) | 
|  | 619   pdf.ln(0.2) | 
|  | 620   pdf.image(images[4],x=1.5, w=5.5, h=4) | 
|  | 621   pdf.ln(0.3) | 
|  | 622   pdf.set_font('Arial', '', 11.0) | 
|  | 623   pdf.cell(0.2) | 
|  | 624   pdf.cell(3.0, 0.0, "  * IsomiRs potentially generated from multiple loci") | 
|  | 625 | 
|  | 626 | 
|  | 627   if analysis=="2": | 
|  | 628      pdf.add_page('L') | 
|  | 629 | 
|  | 630      pdf.set_font('Arial', 'B', 18.0) | 
|  | 631      pdf.cell(pdf.w-0.5, 0.5, "Non-templated isomiRs",align='C') | 
|  | 632      pdf.ln(0.5) | 
|  | 633      pdf.set_font('Arial', 'B', 14.0) | 
|  | 634      pdf.cell(pdf.w-0.5, 0.5, "3'-end additions to RefSeq miRNAs and templated isomiRs",align='C') | 
|  | 635      pdf.ln(0.7) | 
|  | 636 | 
|  | 637      yh=FPDF.get_y(pdf) | 
|  | 638      pdf.image(images[5],x=1.5,w=3.65, h=2.65) | 
|  | 639      pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65) | 
|  | 640      pdf.ln(0.5) | 
|  | 641      yh=FPDF.get_y(pdf) | 
|  | 642      pdf.image(images[6],x=1.5,w=3.65, h=2.65) | 
|  | 643      pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65) | 
|  | 644 | 
|  | 645   pdf.close() | 
|  | 646   pdf.output('report1.pdf','F') | 
|  | 647 | 
|  | 648 #############################################################################################################################################################3 | 
|  | 649 | 
|  | 650 |