| 32 | 1 import itertools | 
|  | 2 import pandas as pd | 
|  | 3 from math import pi | 
|  | 4 import numpy as np | 
|  | 5 import matplotlib.pyplot as plt | 
|  | 6 import math | 
|  | 7 import logomaker as lm | 
|  | 8 from fpdf import FPDF, fpdf | 
|  | 9 import glob | 
|  | 10 | 
|  | 11 ############################################################################################################################################################################### | 
|  | 12 def pie_non_temp(merge_con,merge_non_con,merge_tre,merge_non_tre,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts,group_name1,group_name2): | 
|  | 13 | 
|  | 14     c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con] | 
|  | 15     t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre] | 
|  | 16     c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_con] | 
|  | 17     t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_tre] | 
|  | 18 | 
|  | 19     c_templ = 0 | 
|  | 20     c_tem_counts = 0 | 
|  | 21     c_mature = 0 | 
|  | 22     c_mat_counts = 0 | 
|  | 23     t_templ = 0 | 
|  | 24     t_tem_counts = 0 | 
|  | 25     t_mature = 0 | 
|  | 26     t_mat_counts = 0 | 
|  | 27 | 
|  | 28     c_non = len(c_non_samples) | 
|  | 29     c_non_counts = sum(x[2] for x in c_non_samples) | 
|  | 30     t_non = len(t_non_samples) | 
|  | 31     t_non_counts = sum(x[2] for x in t_non_samples) | 
|  | 32 | 
|  | 33     c_unmap = c_unmap - c_non | 
|  | 34     t_unmap = c_unmap - t_non | 
|  | 35 | 
|  | 36     c_unmap_counts=c_unmap_counts - c_non_counts | 
|  | 37     t_unmap_counts=t_unmap_counts - t_non_counts | 
|  | 38 | 
|  | 39 | 
|  | 40     for x in c_samples: | 
|  | 41 | 
|  | 42         if "/" not in x[0]: | 
|  | 43             if len(x[0].split("_"))==2: | 
|  | 44                c_mature+=1 | 
|  | 45                c_mat_counts += x[2] | 
|  | 46             else: | 
|  | 47                c_templ+=1 | 
|  | 48                c_tem_counts += x[2] | 
|  | 49         else: | 
|  | 50            f=0 | 
|  | 51            for y in x[0].split("/"): | 
|  | 52                if len(y.split("_"))==2: | 
|  | 53                   c_mature+=1 | 
|  | 54                   c_mat_counts += x[2] | 
|  | 55                   f=1 | 
|  | 56                   break | 
|  | 57            if f==0: | 
|  | 58               c_templ+=1 | 
|  | 59               c_tem_counts += x[2] | 
|  | 60 | 
|  | 61     for x in t_samples: | 
|  | 62 | 
|  | 63         if "/" not in x[0]: | 
|  | 64             if len(x[0].split("_"))==2: | 
|  | 65                t_mature+=1 | 
|  | 66                t_mat_counts += x[2] | 
|  | 67             else: | 
|  | 68                t_templ+=1 | 
|  | 69                t_tem_counts += x[2] | 
|  | 70         else: | 
|  | 71            f=0 | 
|  | 72            for y in x[0].split("/"): | 
|  | 73                if len(y.split("_"))==2: | 
|  | 74                   t_mature+=1 | 
|  | 75                   t_mat_counts += x[2] | 
|  | 76                   f=1 | 
|  | 77                   break | 
|  | 78            if f==0: | 
|  | 79               t_templ+=1 | 
|  | 80               t_tem_counts += x[2] | 
|  | 81 | 
|  | 82     fig = plt.figure(figsize=(7,5)) | 
|  | 83 | 
|  | 84     labels = 'miRNA RefSeq','templated', 'unassigned' ,'non-templated' | 
|  | 85     sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts] | 
|  | 86     colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | 
|  | 87     # Plot | 
|  | 88     ax1 = plt.subplot2grid((1,2),(0,0)) | 
|  | 89     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 90     [x.set_fontsize(10) for x in texts] | 
|  | 91     plt.title(group_name1.capitalize() + ' group (reads)',fontsize=12) | 
|  | 92 | 
|  | 93     labels = 'miRNA RefSeq','templated', 'unassigned','non-templated' | 
|  | 94     sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts] | 
|  | 95     colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] | 
|  | 96     # Plot | 
|  | 97     ax2 = plt.subplot2grid((1,2),(0,1)) | 
|  | 98     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 99     [x.set_fontsize(10) for x in texts] | 
|  | 100     plt.title(group_name2.capitalize() + ' group (reads)', fontsize=12) | 
|  | 101     plt.savefig('pie_non.png',dpi=300) | 
|  | 102 | 
|  | 103 #################################################################################################################################################################################################################### | 
|  | 104 | 
|  | 105 def pie_temp(merge_con,c_unmap,c_unmap_counts,merge_tre,t_unmap,t_unmap_counts,group_name1,group_name2): | 
|  | 106 | 
|  | 107     c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con] | 
|  | 108     t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre] | 
|  | 109 | 
|  | 110     c_templ = 0 | 
|  | 111     c_tem_counts = 0 | 
|  | 112     c_mature = 0 | 
|  | 113     c_mat_counts = 0 | 
|  | 114     t_templ = 0 | 
|  | 115     t_tem_counts = 0 | 
|  | 116     t_mature = 0 | 
|  | 117     t_mat_counts = 0 | 
|  | 118 | 
|  | 119     for x in c_samples: | 
|  | 120 | 
|  | 121         if "/" not in x[0]: | 
|  | 122             if len(x[0].split("_"))==2: | 
|  | 123                c_mature+=1 | 
|  | 124                c_mat_counts += x[2] | 
|  | 125             else: | 
|  | 126                c_templ+=1 | 
|  | 127                c_tem_counts += x[2] | 
|  | 128         else: | 
|  | 129            f=0 | 
|  | 130            for y in x[0].split("/"): | 
|  | 131                if len(y.split("_"))==2: | 
|  | 132                   c_mature+=1 | 
|  | 133                   c_mat_counts += x[2] | 
|  | 134                   f=1 | 
|  | 135                   break | 
|  | 136            if f==0: | 
|  | 137               c_templ+=1 | 
|  | 138               c_tem_counts += x[2] | 
|  | 139 | 
|  | 140     for x in t_samples: | 
|  | 141 | 
|  | 142         if "/" not in x[0]: | 
|  | 143             if len(x[0].split("_"))==2: | 
|  | 144                t_mature+=1 | 
|  | 145                t_mat_counts += x[2] | 
|  | 146             else: | 
|  | 147                t_templ+=1 | 
|  | 148                t_tem_counts += x[2] | 
|  | 149         else: | 
|  | 150            f=0 | 
|  | 151            for y in x[0].split("/"): | 
|  | 152                if len(y.split("_"))==2: | 
|  | 153                   t_mature+=1 | 
|  | 154                   t_mat_counts += x[2] | 
|  | 155                   f=1 | 
|  | 156                   break | 
|  | 157            if f==0: | 
|  | 158               t_templ+=1 | 
|  | 159               t_tem_counts += x[2] | 
|  | 160 | 
|  | 161 | 
|  | 162     fig = plt.figure() | 
|  | 163     labels = 'miRNA RefSeq','templated', 'unassigned' | 
|  | 164     sizes = [c_mat_counts, c_tem_counts, c_unmap_counts] | 
|  | 165     colors = ['gold', 'yellowgreen', 'lightskyblue'] | 
|  | 166     explode = (0.2, 0.05, 0.1) | 
|  | 167     # Plot | 
|  | 168     ax1 = plt.subplot2grid((1,2),(0,0)) | 
|  | 169     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 170     [x.set_fontsize(10) for x in texts] | 
|  | 171     plt.title(group_name1.capitalize() + ' group (reads)', fontsize=12) | 
|  | 172     labels = 'miRNA RefSeq','templated', 'unassigned' | 
|  | 173     sizes = [t_mat_counts, t_tem_counts, t_unmap_counts] | 
|  | 174     colors = ['gold', 'yellowgreen', 'lightskyblue'] | 
|  | 175     explode = (0.2, 0.05, 0.1) | 
|  | 176     # Plot | 
|  | 177     ax2 = plt.subplot2grid((1,2),(0,1)) | 
|  | 178     patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8) | 
|  | 179     [x.set_fontsize(10) for x in texts] | 
|  | 180     plt.title(group_name2.capitalize() + ' group (reads)',fontsize = 12) | 
|  | 181     plt.savefig('pie_tem.png',dpi=300) | 
|  | 182 | 
|  | 183 ################################################################################################################################################################################################################### | 
|  | 184 | 
|  | 185 def make_spider(merge_con,merge_tre,group_name1,group_name2): | 
|  | 186 | 
|  | 187   c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con] | 
|  | 188   t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre] | 
|  | 189 | 
|  | 190   c_5 = 0 | 
|  | 191   c_5_counts = 0 | 
|  | 192   c_3 = 0 | 
|  | 193   c_3_counts = 0 | 
|  | 194   c_both =0 | 
|  | 195   c_both_counts=0 | 
|  | 196   c_mature = 0 | 
|  | 197   c_mat_counts = 0 | 
|  | 198   c_exception=0 | 
|  | 199   c_exception_counts=0 | 
|  | 200 | 
|  | 201 | 
|  | 202   t_5 = 0 | 
|  | 203   t_5_counts = 0 | 
|  | 204   t_3 = 0 | 
|  | 205   t_3_counts = 0 | 
|  | 206   t_both = 0 | 
|  | 207   t_both_counts = 0 | 
|  | 208   t_mature = 0 | 
|  | 209   t_mat_counts = 0 | 
|  | 210   t_exception = 0 | 
|  | 211   t_exception_counts=0 | 
|  | 212 | 
|  | 213   for x in c_samples: | 
|  | 214 | 
|  | 215       if "/" not in x[0]: | 
|  | 216           if len(x[0].split("_"))==2: | 
|  | 217              c_mature+=1 | 
|  | 218              c_mat_counts += x[2] | 
|  | 219           elif 0 == int(x[0].split("_")[3]): | 
|  | 220              c_5+=1 | 
|  | 221              c_5_counts += x[2] | 
|  | 222           elif 0 == int(x[0].split("_")[4]): | 
|  | 223              c_3+=1 | 
|  | 224              c_3_counts += x[2] | 
|  | 225           else: | 
|  | 226              c_both+=1 | 
|  | 227              c_both_counts+=x[2] | 
|  | 228 | 
|  | 229       else: | 
|  | 230          f=0 | 
|  | 231          for y in x[0].split("/"): | 
|  | 232              if len(y.split("_"))==2: | 
|  | 233                 c_mature+=1 | 
|  | 234                 c_mat_counts += x[2] | 
|  | 235                 f=1 | 
|  | 236                 break | 
|  | 237          if f==0: | 
|  | 238             part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1] | 
|  | 239             num=0 | 
|  | 240             for y in x[0].split("/"): | 
|  | 241                 if y.split("_")[-2]+"_"+y.split("_")[-1]==part: | 
|  | 242                    num+=1 | 
|  | 243             if num==len(x[0].split("/")): | 
|  | 244 | 
|  | 245                if  0 == int(x[0].split("/")[0].split("_")[3]): | 
|  | 246                    c_5+=1 | 
|  | 247                    c_5_counts += x[2] | 
|  | 248                elif 0 == int(x[0].split("/")[0].split("_")[4]): | 
|  | 249                     c_3+=1 | 
|  | 250                     c_3_counts += x[2] | 
|  | 251                else: | 
|  | 252                     c_both+=1 | 
|  | 253                     c_both_counts+=x[2] | 
|  | 254             else: | 
|  | 255                c_exception+=1 | 
|  | 256                c_exception_counts += x[2] | 
|  | 257 | 
|  | 258 | 
|  | 259   for x in t_samples: | 
|  | 260 | 
|  | 261       if "/" not in x[0]: | 
|  | 262           if len(x[0].split("_"))==2: | 
|  | 263              t_mature+=1 | 
|  | 264              t_mat_counts += x[2] | 
|  | 265           elif 0 == int(x[0].split("_")[3]): | 
|  | 266              t_5+=1 | 
|  | 267              t_5_counts += x[2] | 
|  | 268           elif 0 == int(x[0].split("_")[4]): | 
|  | 269              t_3+=1 | 
|  | 270              t_3_counts += x[2] | 
|  | 271           else: | 
|  | 272              t_both+=1 | 
|  | 273              t_both_counts+=x[2] | 
|  | 274 | 
|  | 275       else: | 
|  | 276          f=0 | 
|  | 277          for y in x[0].split("/"): | 
|  | 278              if len(y.split("_"))==2: | 
|  | 279                 t_mature+=1 | 
|  | 280                 t_mat_counts += x[2] | 
|  | 281                 f=1 | 
|  | 282                 break | 
|  | 283          if f==0: | 
|  | 284             part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1] | 
|  | 285             num=0 | 
|  | 286             for y in x[0].split("/"): | 
|  | 287                 if y.split("_")[-2]+"_"+y.split("_")[-1]==part: | 
|  | 288                    num+=1 | 
|  | 289             if num==len(x[0].split("/")): | 
|  | 290 | 
|  | 291                if  0 == int(x[0].split("/")[0].split("_")[3]): | 
|  | 292                    t_5+=1 | 
|  | 293                    t_5_counts += x[2] | 
|  | 294                elif 0 == int(x[0].split("/")[0].split("_")[4]): | 
|  | 295                     t_3+=1 | 
|  | 296                     t_3_counts += x[2] | 
|  | 297                else: | 
|  | 298                     t_both+=1 | 
|  | 299                     t_both_counts+=x[2] | 
|  | 300             else: | 
|  | 301                t_exception+=1 | 
|  | 302                t_exception_counts += x[2] | 
|  | 303 | 
|  | 304 | 
|  | 305   c_all = c_5+c_3+c_both+c_mature+c_exception | 
|  | 306   c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts | 
|  | 307 | 
|  | 308   t_all = t_5+t_3+t_both+t_mature + t_exception | 
|  | 309   t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts | 
|  | 310 | 
|  | 311   c_5 = round(c_5/c_all*100,2) | 
|  | 312   c_3 = round(c_3/c_all*100,2) | 
|  | 313   c_both = round(c_both/c_all*100,2) | 
|  | 314   c_mature = round(c_mature/c_all*100,2) | 
|  | 315   c_exception = round(c_exception/c_all*100,2) | 
|  | 316 | 
|  | 317   c_5_counts = round(c_5_counts/c_all_counts*100,2) | 
|  | 318   c_3_counts = round(c_3_counts/c_all_counts*100,2) | 
|  | 319   c_both_counts = round(c_both_counts/c_all_counts*100,2) | 
|  | 320   c_mat_counts = round(c_mat_counts/c_all_counts*100,2) | 
|  | 321   c_exception_counts = round(c_exception_counts/c_all_counts*100,2) | 
|  | 322 | 
|  | 323   t_5 = round(t_5/t_all*100,2) | 
|  | 324   t_3 = round(t_3/t_all*100,2) | 
|  | 325   t_both = round(t_both/t_all*100,2) | 
|  | 326   t_mature = round(t_mature/t_all*100,2) | 
|  | 327   t_exception = round(t_exception/t_all*100,2) | 
|  | 328 | 
|  | 329   t_5_counts = round(t_5_counts/t_all_counts*100,2) | 
|  | 330   t_3_counts = round(t_3_counts/t_all_counts*100,2) | 
|  | 331   t_both_counts = round(t_both_counts/t_all_counts*100,2) | 
|  | 332   t_mat_counts = round(t_mat_counts/t_all_counts*100,2) | 
|  | 333   t_exception_counts = round(t_exception_counts/t_all_counts*100,2) | 
|  | 334 | 
|  | 335   radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception) | 
|  | 336   radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts) | 
|  | 337 | 
|  | 338   df=pd.DataFrame({ | 
|  | 339   'group':[group_name1.capitalize(),group_name2.capitalize()], | 
|  | 340   """5'3'-isomiRs""":[c_both,t_both], | 
|  | 341   """3'-isomiRs""":[c_3,t_3], | 
|  | 342   'RefSeq miRNA':[c_mature,t_mature], | 
|  | 343   """5'-isomiRs""":[c_5,t_5], | 
|  | 344   'others*':[c_exception,t_exception]}) | 
|  | 345 | 
|  | 346   df1=pd.DataFrame({ | 
|  | 347   'group':[group_name1.capitalize(),group_name2.capitalize()], | 
|  | 348   """5'-3'-isomiRs""":[c_both_counts,t_both_counts], | 
|  | 349   """3'-isomiRs""":[c_3_counts,t_3_counts], | 
|  | 350   'RefSeq miRNA':[c_mat_counts,t_mat_counts], | 
|  | 351   """5'-isomiRs""":[c_5_counts,t_5_counts], | 
|  | 352   'others*':[c_exception_counts,t_exception_counts]}) | 
|  | 353 | 
|  | 354   spider_last(df,radar_max,1,group_name1,group_name2) | 
|  | 355   spider_last(df1,radar_max_counts,2,group_name1,group_name2) | 
|  | 356 | 
|  | 357 | 
|  | 358 ############################################################################################################################################################################################################## | 
|  | 359 | 
|  | 360 def spider_last(df,radar_max,flag,group_name1,group_name2): | 
|  | 361   # ------- PART 1: Create background | 
|  | 362   fig = plt.figure() | 
|  | 363   # number of variable | 
|  | 364   categories=list(df)[1:] | 
|  | 365   N = len(categories) | 
|  | 366 | 
|  | 367   # What will be the angle of each axis in the plot? (we divide the plot / number of variable) | 
|  | 368   angles = [n / float(N) * 2 * pi for n in range(N)] | 
|  | 369   angles += angles[:1] | 
|  | 370 | 
|  | 371   # Initialise the spider plot | 
|  | 372   ax = plt.subplot(111, polar=True) | 
|  | 373 | 
|  | 374   # If you want the first axis to be on top: | 
|  | 375   ax.set_theta_offset(pi/2) | 
|  | 376   ax.set_theta_direction(-1) | 
|  | 377 | 
|  | 378   # Draw one axe per variable + add labels labels yet | 
|  | 379   plt.xticks(angles[:-1], categories, fontsize=13) | 
|  | 380 | 
|  | 381   # Draw ylabels | 
|  | 382   radar_max=round(radar_max+radar_max*0.1) | 
|  | 383   mul=len(str(radar_max))-1 | 
|  | 384   maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul) | 
|  | 385   sep = round(maxi/4) | 
|  | 386   plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=12) | 
|  | 387   plt.ylim(0, maxi) | 
|  | 388 | 
|  | 389   # ------- PART 2: Add plots | 
|  | 390 | 
|  | 391   # Plot each individual = each line of the data | 
|  | 392   # I don't do a loop, because plotting more than 3 groups makes the chart unreadable | 
|  | 393 | 
|  | 394   # Ind1 | 
|  | 395   values=df.loc[0].drop('group').values.flatten().tolist() | 
|  | 396   values += values[:1] | 
|  | 397   ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label=group_name1.capitalize()) | 
|  | 398   ax.fill(angles, values, 'b', alpha=0.1) | 
|  | 399 | 
|  | 400   # Ind2 | 
|  | 401   values=df.loc[1].drop('group').values.flatten().tolist() | 
|  | 402   values += values[:1] | 
|  | 403   ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label=group_name2.capitalize()) | 
|  | 404   ax.fill(angles, values, 'r', alpha=0.1) | 
|  | 405 | 
|  | 406   # Add legend | 
|  | 407   if flag==1: | 
|  | 408      plt.legend(loc='upper right', prop={'size': 11}, bbox_to_anchor=(0.0, 0.1)) | 
|  | 409      plt.savefig('spider_non_red.png',dpi=300) | 
|  | 410   else: | 
|  | 411      plt.legend(loc='upper right', prop={'size': 11}, bbox_to_anchor=(0.0, 0.1)) | 
|  | 412      plt.savefig('spider_red.png',dpi=300) | 
|  | 413 | 
|  | 414 ############################################################################################################################################################################################################# | 
|  | 415 | 
|  | 416 def hist_red(samples,flag,group): | 
|  | 417 | 
|  | 418     lengths=[] | 
|  | 419     cat=[] | 
|  | 420     total_reads=0 | 
|  | 421     seq=[] | 
|  | 422 | 
|  | 423     if flag == "c": | 
|  | 424         title = "Length distribution of "+group.lower()+" group (redundant reads)" | 
|  | 425     if flag == "t": | 
|  | 426         title = "Length distribution of "+group.lower()+" group (redundant reads)" | 
|  | 427 | 
|  | 428     for i in samples: | 
|  | 429         for x in i: | 
|  | 430             lengths.append(x[3]) | 
|  | 431             if x[1]=="0": | 
|  | 432                seq.append([x[3],x[0].split("-")[1],"Mapped"]) | 
|  | 433                cat.append("Mapped") | 
|  | 434             if x[1] == "4": | 
|  | 435                seq.append([x[3],x[0].split("-")[1],"Unassigned"]) | 
|  | 436                cat.append("Unassigned") | 
|  | 437 | 
|  | 438     uni_len=list(set(lengths)) | 
|  | 439     uni_len=[x for x in uni_len if x<=35] | 
|  | 440     low=min(lengths) | 
|  | 441     up=max(lengths) | 
|  | 442     seq.sort() | 
|  | 443     uni_seq=list(seq for seq,_ in itertools.groupby(seq)) | 
|  | 444     dim=up-low | 
|  | 445 | 
|  | 446     if dim>20: | 
|  | 447        s=5 | 
|  | 448     else: | 
|  | 449        s=8 | 
|  | 450 | 
|  | 451     total_reads+=sum([int(x[1]) for x in uni_seq]) | 
|  | 452 | 
|  | 453     map_reads=[] | 
|  | 454     unmap_reads=[] | 
|  | 455     length=[] | 
|  | 456     for y in uni_len: | 
|  | 457         map_temp=0 | 
|  | 458         unmap_temp=0 | 
|  | 459         for x in uni_seq: | 
|  | 460             if x[0]==y and x[2]=="Mapped": | 
|  | 461                map_temp+=int(x[1]) | 
|  | 462             if x[0]==y and x[2]=="Unassigned": | 
|  | 463                unmap_temp+=int(x[1]) | 
|  | 464         if y<=35: | 
|  | 465            length.append(y) | 
|  | 466            map_reads.append(round(map_temp/total_reads*100,2)) | 
|  | 467            unmap_reads.append(round(unmap_temp/total_reads*100,2)) | 
|  | 468 | 
|  | 469     ylim=max([sum(x) for x in zip(unmap_reads, map_reads)]) | 
|  | 470     ylim=ylim+ylim*20/100 | 
|  | 471     fig, ax = plt.subplots() | 
|  | 472     width=0.8 | 
|  | 473     ax.bar(length, unmap_reads, width, label='Unassigned') | 
|  | 474     h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped') | 
|  | 475     plt.xticks(np.arange(length[0], length[-1]+1, 1)) | 
|  | 476     plt.yticks(np.arange(0, ylim, 5)) | 
|  | 477     plt.xlabel('Length (nt)',fontsize=14) | 
|  | 478     plt.ylabel('Percentage',fontsize=14) | 
|  | 479     plt.title(title,fontsize=14) | 
|  | 480     ax.legend() | 
|  | 481     plt.ylim([0, ylim]) | 
|  | 482     ax.grid(axis='y',linewidth=0.2) | 
|  | 483 | 
|  | 484     if flag=='c': | 
|  | 485        plt.savefig('c_hist_red.png',dpi=300) | 
|  | 486 | 
|  | 487     if flag=='t': | 
|  | 488        plt.savefig('t_hist_red.png',dpi=300) | 
|  | 489 | 
|  | 490 ############################################################################################################################################################################################################################################## | 
|  | 491 | 
|  | 492 def logo_seq_red(merge, flag, group): | 
|  | 493 | 
|  | 494     if flag=="c": | 
|  | 495        titlos = group.capitalize() + " group (redundant)" | 
|  | 496        file_logo = "c_logo.png" | 
|  | 497        file_bar = "c_bar.png" | 
|  | 498     if flag=="t": | 
|  | 499        titlos = group.capitalize() + " group (redundant)" | 
|  | 500        file_logo = "t_logo.png" | 
|  | 501        file_bar = "t_bar.png" | 
|  | 502 | 
|  | 503     c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge] | 
|  | 504 | 
|  | 505     A=[0]*3 | 
|  | 506     C=[0]*3 | 
|  | 507     G=[0]*3 | 
|  | 508     T=[0]*3 | 
|  | 509     total_reads=0 | 
|  | 510 | 
|  | 511     for y in c_samples: | 
|  | 512         if "/" in y[0]: | 
|  | 513             length=[] | 
|  | 514             for x in y[0].split("/"): | 
|  | 515                 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]]) | 
|  | 516 | 
|  | 517             best=min(length) | 
|  | 518             total_reads+=best[2] | 
|  | 519             for i in range(3): | 
|  | 520                 if i<len(best[1]): | 
|  | 521                     if best[1][i] == "A": | 
|  | 522                         A[i]+=best[2] | 
|  | 523                     elif best[1][i] == "C": | 
|  | 524                         C[i]+=best[2] | 
|  | 525                     elif best[1][i] == "G": | 
|  | 526                         G[i]+=best[2] | 
|  | 527                     else: | 
|  | 528                         T[i]+=best[2] | 
|  | 529         else: | 
|  | 530             total_reads+=y[2] | 
|  | 531             for i in range(3): | 
|  | 532                 if i<len(y[0].split("_")[-1]): | 
|  | 533                     if y[0].split("_")[-1][i] == "A": | 
|  | 534                         A[i]+=(y[2]) | 
|  | 535                     elif y[0].split("_")[-1][i] == "C": | 
|  | 536                         C[i]+=(y[2]) | 
|  | 537                     elif y[0].split("_")[-1][i] == "G": | 
|  | 538                         G[i]+=(y[2]) | 
|  | 539                     else: | 
|  | 540                         T[i]+=y[2] | 
|  | 541 | 
|  | 542     A[:] = [round(x*100,1) / total_reads for x in A] | 
|  | 543     C[:] = [round(x*100,1) / total_reads for x in C] | 
|  | 544     G[:] = [round(x*100,1) / total_reads for x in G] | 
|  | 545     T[:] = [round(x*100,1) / total_reads for x in T] | 
|  | 546 | 
|  | 547     data = {'A':A,'C':C,'G':G,'T':T} | 
|  | 548     df = pd.DataFrame(data, index=[1,2,3]) | 
|  | 549 | 
|  | 550     h=df.plot.bar(color=tuple(["g", "b","gold","r"]) ) | 
|  | 551     h.grid(axis='y',linewidth=0.2) | 
|  | 552     plt.xticks(rotation=0, ha="right") | 
|  | 553     plt.ylabel("Counts (%)",fontsize=18) | 
|  | 554     plt.xlabel("Numbers of additional nucleotides",fontsize=18) | 
|  | 555     plt.title(titlos,fontsize=20) | 
|  | 556     plt.tight_layout() | 
|  | 557     plt.savefig(file_bar, dpi=300) | 
|  | 558 | 
|  | 559     crp_logo = lm.Logo(df, font_name = 'DejaVu Sans') | 
|  | 560 | 
|  | 561     crp_logo.style_spines(visible=False) | 
|  | 562     crp_logo.style_spines(spines=['left', 'bottom'], visible=True) | 
|  | 563     crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0) | 
|  | 564     crp_logo.ax.set_title(titlos,fontsize=18) | 
|  | 565     crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5) | 
|  | 566     crp_logo.ax.set_xlabel("Numbers of additional nucleotides",fontsize=16, labelpad=5) | 
|  | 567     crp_logo.ax.xaxis.set_ticks_position('none') | 
|  | 568     crp_logo.ax.xaxis.set_tick_params(pad=-1) | 
|  | 569     figure = plt.gcf() | 
|  | 570     figure.set_size_inches(6, 4) | 
|  | 571     crp_logo.fig.savefig(file_logo,dpi=300) | 
|  | 572 | 
|  | 573 ########################################################################################################################################################################################################## | 
|  | 574 | 
|  | 575 def pdf_before_DE(analysis,group_name1,group_name2): | 
|  | 576 | 
|  | 577   # Image extensions | 
|  | 578   if analysis=="2": | 
|  | 579      image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png") | 
|  | 580   else: | 
|  | 581      image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png") | 
|  | 582   # This list will hold the images file names | 
|  | 583   images = [] | 
|  | 584 | 
|  | 585   # Build the image list by merging the glob results (a list of files) | 
|  | 586   # for each extension. We are taking images from current folder. | 
|  | 587   for extension in image_extensions: | 
|  | 588       images.extend(glob.glob(extension)) | 
|  | 589   #sys.exit(images) | 
|  | 590   # Create instance of FPDF class | 
|  | 591   pdf = FPDF('P', 'in', 'A4') | 
|  | 592   # Add new page. Without this you cannot create the document. | 
|  | 593   pdf.add_page() | 
|  | 594   # Set font to Arial, 'B'old, 16 pts | 
|  | 595   pdf.set_font('Arial', 'B', 20.0) | 
|  | 596 | 
|  | 597   # Page header | 
|  | 598   pdf.cell(pdf.w-0.5, 0.5, 'IsomiR profile report',align='C') | 
|  | 599   pdf.ln(0.7) | 
|  | 600   pdf.set_font('Arial','B', 16.0) | 
|  | 601   pdf.cell(pdf.w-0.5, 0.5, 'sRNA length distribution',align='C') | 
|  | 602 | 
|  | 603   # Smaller font for image captions | 
|  | 604   pdf.set_font('Arial', '', 11.0) | 
|  | 605 | 
|  | 606   # Image caption | 
|  | 607   pdf.ln(0.5) | 
|  | 608 | 
|  | 609   yh=FPDF.get_y(pdf) | 
|  | 610   pdf.image(images[0],x=0.3,w=4, h=3) | 
|  | 611   pdf.image(images[1],x=4,y=yh, w=4, h=3) | 
|  | 612   pdf.ln(0.3) | 
|  | 613 | 
|  | 614   # Image caption | 
|  | 615   pdf.cell(0.2) | 
|  | 616   pdf.cell(3.0, 0.0, "  Mapped and unmapped reads to custom precursor arm reference DB (5p and 3p arms) in "+group_name1.lower()) | 
|  | 617   pdf.ln(0.2) | 
|  | 618   pdf.cell(0.2) | 
|  | 619   pdf.cell(3.0, 0.0, "  (left) and "+group_name2.lower()+" (right) groups") | 
|  | 620 | 
|  | 621 | 
|  | 622   pdf.ln(0.5) | 
|  | 623   h1=FPDF.get_y(pdf) | 
|  | 624   pdf.image(images[2],x=1, w=6.5, h=5) | 
|  | 625   h2=FPDF.get_y(pdf) | 
|  | 626   FPDF.set_y(pdf,h1+0.2) | 
|  | 627   pdf.set_font('Arial','B', 16.0) | 
|  | 628   if analysis=="2": | 
|  | 629      pdf.cell(pdf.w-0.5, 0.5, 'Templated and non-templated isomiRs',align='C') | 
|  | 630   else: | 
|  | 631      pdf.cell(pdf.w-0.5, 0.5, 'Templated isomiRs',align='C') | 
|  | 632   pdf.set_font('Arial', '', 11.0) | 
|  | 633   FPDF.set_y(pdf,h2) | 
|  | 634   FPDF.set_y(pdf,9.5) | 
|  | 635   # Image caption | 
|  | 636   pdf.cell(0.2) | 
|  | 637   if analysis=="2": | 
|  | 638      pdf.cell(3.0, 0.0, "  RefSeq miRNAs, templated isomiRs, non-templated isomiRs and unassigned sequences as percentage") | 
|  | 639      pdf.ln(0.2) | 
|  | 640      pdf.cell(0.2) | 
|  | 641      pdf.cell(3.0, 0.0, "  of total sRNA reads in "+group_name1.lower()+" (left) and "+group_name2.lower()+" (right) groups") | 
|  | 642 | 
|  | 643   else: | 
|  | 644      pdf.cell(3.0, 0.0, "  RefSeq miRNAS, templated isomiRs and unassigned sequences as percentage of total sRNA reads in") | 
|  | 645      pdf.ln(0.2) | 
|  | 646      pdf.cell(0.2) | 
|  | 647      pdf.cell(3.0, 0.0, "  "+group_name1.lower()+" (left) and "+group_name2.lower()+" (right) groups") | 
|  | 648 | 
|  | 649   pdf.add_page() | 
|  | 650   pdf.set_font('Arial', 'B', 18.0) | 
|  | 651   pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR subtypes",align='C') | 
|  | 652   pdf.ln(0.7) | 
|  | 653   pdf.set_font('Arial', 'B', 14.0) | 
|  | 654   pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR profile (redundant reads)",align='C') | 
|  | 655   pdf.ln(0.5) | 
|  | 656   pdf.image(images[3],x=1.5, w=5.5, h=4) | 
|  | 657   pdf.ln(0.6) | 
|  | 658   pdf.cell(pdf.w-0.5, 0.0, "Templated isomiR profile (non-redundant reads)",align='C') | 
|  | 659   pdf.set_font('Arial', '', 12.0) | 
|  | 660   pdf.ln(0.2) | 
|  | 661   pdf.image(images[4],x=1.5, w=5.5, h=4) | 
|  | 662   pdf.ln(0.3) | 
|  | 663   pdf.set_font('Arial', '', 11.0) | 
|  | 664   pdf.cell(0.2) | 
|  | 665   pdf.cell(3.0, 0.0, "  * IsomiRs potentially generated from multiple loci") | 
|  | 666 | 
|  | 667   if analysis=="2": | 
|  | 668      pdf.add_page('L') | 
|  | 669 | 
|  | 670      pdf.set_font('Arial', 'B', 18.0) | 
|  | 671      pdf.cell(pdf.w-0.5, 0.5, "Non-templated isomiRs",align='C') | 
|  | 672      pdf.ln(0.5) | 
|  | 673      pdf.set_font('Arial', 'B', 14.0) | 
|  | 674      pdf.cell(pdf.w-0.5, 0.5, "3'-end additions to RefSeq miRNAs and templated isomiRs",align='C') | 
|  | 675      pdf.ln(0.7) | 
|  | 676 | 
|  | 677      yh=FPDF.get_y(pdf) | 
|  | 678      pdf.image(images[5],x=1.5,w=3.65, h=2.65) | 
|  | 679      pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65) | 
|  | 680      pdf.ln(0.5) | 
|  | 681      yh=FPDF.get_y(pdf) | 
|  | 682      pdf.image(images[6],x=1.5,w=3.65, h=2.65) | 
|  | 683      pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65) | 
|  | 684 | 
|  | 685   pdf.close() | 
|  | 686   pdf.output('report1.pdf','F') | 
|  | 687 | 
|  | 688 #############################################################################################################################################################3 | 
|  | 689 |