diff mirbase_graphs.py @ 1:561b0abcae87 draft

Uploaded
author glogobyte
date Fri, 16 Oct 2020 12:15:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirbase_graphs.py	Fri Oct 16 12:15:50 2020 +0000
@@ -0,0 +1,809 @@
+import itertools
+import time
+import sys
+import os
+import urllib.request
+import gzip
+from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
+import subprocess
+import argparse
+from collections import OrderedDict
+from matplotlib.backends.backend_pdf import PdfPages
+import pandas as pd
+from math import pi
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.ticker import PercentFormatter
+import seaborn as sns
+import scipy.stats as stats
+from plotnine import *
+import math
+import re
+import matplotlib.ticker as mtick
+import copy
+
+
+#################################################################################################################################################################
+def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
+    c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
+    t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
+
+    c_templ = 0
+    c_tem_counts = 0
+    c_mature = 0
+    c_mat_counts = 0
+    t_templ = 0
+    t_tem_counts = 0
+    t_mature = 0
+    t_mat_counts = 0
+
+    c_non = len(c_non_samples)
+    c_non_counts = sum(x[2] for x in c_non_samples)
+    t_non = len(t_non_samples)
+    t_non_counts = sum(x[2] for x in t_non_samples)
+
+    c_unmap = c_unmap - c_non
+    t_unmap = c_unmap - t_non
+
+    c_unmap_counts=c_unmap_counts - c_non_counts
+    t_unmap_counts=t_unmap_counts - t_non_counts
+
+
+    for x in c_samples:
+
+        if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               c_mature+=1
+               c_mat_counts += x[2]
+            else:
+               c_templ+=1
+               c_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  c_mature+=1
+                  c_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              c_templ+=1
+              c_tem_counts += x[2]
+
+    for x in t_samples:
+
+        if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               t_mature+=1
+               t_mat_counts += x[2]
+            else:
+               t_templ+=1
+               t_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  t_mature+=1
+                  t_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              t_templ+=1
+              t_tem_counts += x[2]
+
+    fig = plt.figure(figsize=(7,5))
+    labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
+    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
+    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
+    ax1 = plt.subplot2grid((1,2),(0,0))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Control Group (reads)',fontsize=12)
+    labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
+    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
+    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
+    ax2 = plt.subplot2grid((1,2),(0,1))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Treated Group (reads)', fontsize=12)
+    plt.savefig('pie_non.png',dpi=300)
+
+######################################################################################################################################################
+
+
+def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
+
+    c_templ = 0
+    c_tem_counts = 0
+    c_mature = 0
+    c_mat_counts = 0
+    t_templ = 0
+    t_tem_counts = 0
+    t_mature = 0
+    t_mat_counts = 0
+
+    for x in c_samples:
+
+        if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               c_mature+=1
+               c_mat_counts += x[2]
+            else:
+               c_templ+=1
+               c_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  c_mature+=1
+                  c_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              c_templ+=1
+              c_tem_counts += x[2]
+
+    for x in t_samples:
+
+        if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               t_mature+=1
+               t_mat_counts += x[2]
+            else:
+               t_templ+=1
+               t_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  t_mature+=1
+                  t_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              t_templ+=1
+              t_tem_counts += x[2]
+
+
+    fig = plt.figure()
+    labels = 'miRNA RefSeq','Template', 'Unmapped'
+    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
+    colors = ['gold', 'yellowgreen', 'lightskyblue']
+    explode = (0.2, 0.05, 0.1)
+    ax1 = plt.subplot2grid((1,2),(0,0))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Control group (reads)', fontsize=12)
+    labels = 'miRNA RefSeq','Template', 'Unmapped'
+    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
+    colors = ['gold', 'yellowgreen', 'lightskyblue']
+    explode = (0.2, 0.05, 0.1)
+    ax2 = plt.subplot2grid((1,2),(0,1))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Treated group (reads)',fontsize = 12)
+    plt.savefig('pie_tem.png',dpi=300)
+
+###################################################################################################################################################################################################################
+
+
+def make_spider(merge_LH2E,merge_LH8E):
+
+  c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+  t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
+
+  c_5 = 0
+  c_5_counts = 0
+  c_3 = 0
+  c_3_counts = 0
+  c_both =0
+  c_both_counts=0
+  c_mature = 0
+  c_mat_counts = 0
+  c_exception=0
+  c_exception_counts=0
+
+
+  t_5 = 0
+  t_5_counts = 0
+  t_3 = 0
+  t_3_counts = 0
+  t_both = 0
+  t_both_counts = 0
+  t_mature = 0
+  t_mat_counts = 0
+  t_exception = 0
+  t_exception_counts=0
+
+  for x in c_samples:
+
+      if "/" not in x[0]:
+          if "chr" in  x[0].split("_")[-1]:
+             c_mature+=1
+             c_mat_counts += x[2]
+          elif 0 == int(x[0].split("_")[-1]):
+             c_5+=1
+             c_5_counts += x[2]
+          elif 0 == int(x[0].split("_")[-2]):
+             c_3+=1
+             c_3_counts += x[2]
+          else:
+             c_both+=1
+             c_both_counts+=x[2]
+
+      else:
+         f=0
+         for y in x[0].split("/"):
+             if "chr" in y.split("_")[-1]:
+                c_mature+=1
+                c_mat_counts += x[2]
+                f=1
+                break
+         if f==0:
+            for y in x[0].split("/"):
+                c_exception+=1
+                c_exception_counts += x[2]
+
+
+  for x in t_samples:
+
+      if "/" not in x[0]:
+          if "chr" in  x[0].split("_")[-1]:
+             t_mature+=1
+             t_mat_counts += x[2]
+          elif 0 == int(x[0].split("_")[-1]):
+             t_5+=1
+             t_5_counts += x[2]
+          elif 0 == int(x[0].split("_")[-2]):
+             t_3+=1
+             t_3_counts += x[2]
+          else:
+             t_both+=1
+             t_both_counts+=x[2]
+
+      else:
+         f=0
+         for y in x[0].split("/"):
+             if "chr" in y.split("_")[-1]:
+                t_mature+=1
+                t_mat_counts += x[2]
+                f=1
+                break
+         if f==0:
+            for y in x[0].split("/"):
+                t_exception+=1
+                t_exception_counts += x[2]
+
+
+  c_all = c_5+c_3+c_both+c_mature+c_exception
+  c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
+
+  t_all = t_5+t_3+t_both+t_mature + t_exception
+  t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
+
+  c_5 = round(c_5/c_all*100,2)
+  c_3 = round(c_3/c_all*100,2)
+  c_both = round(c_both/c_all*100,2)
+  c_mature = round(c_mature/c_all*100,2)
+  c_exception = round(c_exception/c_all*100,2)
+
+  c_5_counts = round(c_5_counts/c_all_counts*100,2)
+  c_3_counts = round(c_3_counts/c_all_counts*100,2)
+  c_both_counts = round(c_both_counts/c_all_counts*100,2)
+  c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
+  c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
+
+  t_5 = round(t_5/t_all*100,2)
+  t_3 = round(t_3/t_all*100,2)
+  t_both = round(t_both/t_all*100,2)
+  t_mature = round(t_mature/t_all*100,2)
+  t_exception = round(t_exception/t_all*100,2)
+
+  t_5_counts = round(t_5_counts/t_all_counts*100,2)
+  t_3_counts = round(t_3_counts/t_all_counts*100,2)
+  t_both_counts = round(t_both_counts/t_all_counts*100,2)
+  t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
+  t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
+
+  radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
+  radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
+
+  df=pd.DataFrame({
+  'group':['Controls','Treated'],
+  """5' and 3' isomiRs""":[c_both,t_both],
+  """3' isomiRs""":[c_3,t_3],
+  'miRNA RefSeq':[c_mature,t_mature],
+  """5' isomiRs""":[c_5,t_5],
+  'Others*':[c_exception,t_exception]})
+
+  df1=pd.DataFrame({
+  'group':['Controls','Treated'],
+  """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
+  """3' isomiRs""":[c_3_counts,t_3_counts],
+  'miRNA RefSeq':[c_mat_counts,t_mat_counts],
+  """5' isomiRs""":[c_5_counts,t_5_counts],
+  'Others*':[c_exception_counts,t_exception_counts]})
+
+  spider_last(df,radar_max,1)
+  spider_last(df1,radar_max_counts,2)
+
+#####################################################################################################################################################
+
+def spider_last(df,radar_max,flag):
+  # ------- PART 1: Create background
+  fig = plt.figure()
+  # number of variable
+  categories=list(df)[1:]
+  N = len(categories)
+
+  # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
+  angles = [n / float(N) * 2 * pi for n in range(N)]
+  angles += angles[:1]
+
+  # Initialise the spider plot
+  ax = plt.subplot(111, polar=True)
+
+  # If you want the first axis to be on top:
+  ax.set_theta_offset(pi/2)
+  ax.set_theta_direction(-1)
+
+  # Draw one axe per variable + add labels labels yet
+  plt.xticks(angles[:-1], categories, fontsize=11)
+
+  # Draw ylabels
+  radar_max=round(radar_max+radar_max*0.1)
+  mul=len(str(radar_max))-1
+  maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
+  sep = round(maxi/4)
+  plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
+  plt.ylim(0, maxi)
+
+  # ------- PART 2: Add plots
+
+  # Plot each individual = each line of the data
+  # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
+
+  # Ind1
+  values=df.loc[0].drop('group').values.flatten().tolist()
+  values += values[:1]
+  ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
+  ax.fill(angles, values, 'b', alpha=0.1)
+
+  # Ind2
+  values=df.loc[1].drop('group').values.flatten().tolist()
+  values += values[:1]
+  ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
+  ax.fill(angles, values, 'r', alpha=0.1)
+
+  # Add legend
+  if flag==1:
+     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
+     plt.savefig('spider_non_red.png',dpi=300)
+  else:
+     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
+     plt.savefig('spider_red.png',dpi=300)
+
+
+#############################################################################################################################################################################################################
+
+def hist_red(samples,flag):
+    lengths=[]
+    cat=[]
+    total_reads=0
+    seq=[]
+
+    if flag == "c":
+        title = "Length Distribution of Control group (Redudant reads)"
+    if flag == "t":
+        title = "Length Distribution of Treated group (Redudant reads)"
+
+    for i in samples:
+        for x in i:
+            lengths.append(len(x[9]))
+            if x[1]=="0":
+               seq.append([x[9],x[0].split("-")[1],"Mapped"])
+               cat.append("Mapped")
+            if x[1] == "4":
+               seq.append([x[9],x[0].split("-")[1],"Unmapped"])
+               cat.append("Unmapped")
+
+    uni_len=list(set(lengths))
+    uni_len=[x for x in uni_len if x<=35]
+    low=min(lengths)
+    up=max(lengths)
+    seq.sort()
+    uni_seq=list(seq for seq,_ in itertools.groupby(seq))
+    dim=up-low
+
+    if dim>20:
+       s=5
+    else:
+       s=8
+
+    total_reads+=sum([int(x[1]) for x in uni_seq])
+
+    map_reads=[]
+    unmap_reads=[]
+    length=[]
+    for y in uni_len:
+        map_temp=0
+        unmap_temp=0
+        for x in uni_seq:
+            if len(x[0])==y and x[2]=="Mapped":
+               map_temp+=int(x[1])
+            if len(x[0])==y and x[2]=="Unmapped":
+               unmap_temp+=int(x[1])
+        if y<=35:
+           length.append(y)
+           map_reads.append(round(map_temp/total_reads*100,2))
+           unmap_reads.append(round(unmap_temp/total_reads*100,2))
+
+    ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
+    ylim=ylim+ylim*20/100
+    fig, ax = plt.subplots()
+    width=0.8
+    ax.bar(length, unmap_reads, width, label='Unmapped')
+    h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
+    plt.xticks(np.arange(length[0], length[-1]+1, 1))
+    plt.xlabel('Length (nt)',fontsize=14)
+    plt.ylabel('Percentage',fontsize=14)
+    plt.title(title,fontsize=14)
+    ax.legend()
+    plt.ylim([0, ylim])
+    ax.grid(axis='y',linewidth=0.2)
+
+    if flag=='c':
+       plt.savefig('c_hist_red.png',dpi=300)
+
+    if flag=='t':
+       plt.savefig('t_hist_red.png',dpi=300)
+
+#################################################################################################################
+
+def logo_seq_red(merge, flag):
+
+    if flag=="c":
+       titlos="Control group (Redundant)"
+       file_logo="c_logo.png"
+       file_bar="c_bar.png"
+    if flag=="t":
+       titlos="Treated group (Redundant)"
+       file_logo="t_logo.png"
+       file_bar="t_bar.png"
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
+
+    A=[0]*5
+    C=[0]*5
+    G=[0]*5
+    T=[0]*5
+    total_reads=0
+
+    for y in c_samples:
+        if "/" in y[0]:
+            length=[]
+            for x in y[0].split("/"):
+                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
+
+            best=min(length)
+            total_reads+=best[2]
+            for i in range(5):
+                if i<len(best[1]):
+                    if best[1][i] == "A":
+                        A[i]+=best[2]
+                    elif best[1][i] == "C":
+                        C[i]+=best[2]
+                    elif best[1][i] == "G":
+                        G[i]+=best[2]
+                    else:
+                        T[i]+=best[2]
+        else:
+            total_reads+=y[2]
+            for i in range(5):
+                if i<len(y[0].split("_")[-1]):
+                    if y[0].split("_")[-1][i] == "A":
+                        A[i]+=(y[2])
+                    elif y[0].split("_")[-1][i] == "C":
+                        C[i]+=(y[2])
+                    elif y[0].split("_")[-1][i] == "G":
+                        G[i]+=(y[2])
+                    else:
+                        T[i]+=y[2]
+
+    A[:] = [round(x*100,1) / total_reads for x in A]
+    C[:] = [round(x*100,1) / total_reads for x in C]
+    G[:] = [round(x*100,1) / total_reads for x in G]
+    T[:] = [round(x*100,1) / total_reads for x in T]
+
+
+
+    data = {'A':A,'C':C,'G':G,'T':T}
+    df = pd.DataFrame(data, index=[1,2,3,4,5])
+    h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
+    h.grid(axis='y',linewidth=0.2)
+    plt.xticks(rotation=0, ha="right")
+    plt.ylabel("Counts (%)",fontsize=18)
+    plt.xlabel("Positions (nt)",fontsize=18)
+    plt.title(titlos,fontsize=20)
+    plt.tight_layout()
+    plt.savefig(file_bar, dpi=300)
+
+    import logomaker as lm
+    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
+    crp_logo.style_spines(visible=False)
+    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
+    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
+
+    # style using Axes methods
+    crp_logo.ax.set_title(titlos,fontsize=18)
+    crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
+    crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
+    crp_logo.ax.xaxis.set_ticks_position('none')
+    crp_logo.ax.xaxis.set_tick_params(pad=-1)
+    figure = plt.gcf()
+    figure.set_size_inches(6, 4)
+    crp_logo.fig.savefig(file_logo,dpi=300)
+
+##########################################################################################################################################################################################################
+
+def logo_seq_non_red(merge_LH2E):
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+
+    A=[0]*5
+    C=[0]*5
+    G=[0]*5
+    T=[0]*5
+
+    for y in c_samples:
+        if "/" in y[0]:
+            length=[]
+            for x in y[0].split("/"):
+                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
+
+            best=min(length)
+            for i in range(5):
+                if i<len(best[1]):
+                    if best[1][i] == "A":
+                        A[i]+=1
+                    elif best[1][i] == "C":
+                        C[i]+=1
+                    elif best[1][i] == "G":
+                        G[i]+=1
+                    else:
+                        T[i]+=1
+        else:
+            for i in range(5):
+                if i<len(y[0].split("_")[-1]):
+                    if y[0].split("_")[-1][i] == "A":
+                        A[i]+=1
+                    elif y[0].split("_")[-1][i] == "C":
+                        C[i]+=1
+                    elif y[0].split("_")[-1][i] == "G":
+                        G[i]+=1
+                    else:
+                        T[i]+=1
+
+    data = {'A':A,'C':C,'G':G,'T':T}
+    df = pd.DataFrame(data, index=[1,2,3,4,5])
+    h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
+    h.set_xlabel("Positions (nt)")
+    h.set_ylabel("Unique sequences")
+    plt.xticks(rotation=0, ha="right")
+    plt.tight_layout()
+    plt.savefig("bar2.png", dpi=300)
+
+
+    import logomaker as lm
+    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
+
+    # style using Logo methods
+    crp_logo.style_spines(visible=False)
+    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
+    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
+
+    # style using Axes methods
+    crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
+    crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
+    crp_logo.ax.xaxis.set_ticks_position('none')
+    crp_logo.ax.xaxis.set_tick_params(pad=-1)
+    crp_logo.ax.set_title("Non-redundant")
+    figure = plt.gcf()
+    crp_logo.fig.savefig('logo2.png', dpi=300)
+
+
+################################################################################################################################################################
+
+def pdf_before_DE(analysis):
+
+  # Import FPDF class
+  from fpdf import FPDF, fpdf
+
+  # Import glob module to find all the files matching a pattern
+  import glob
+
+  # Image extensions
+  if analysis=="2":
+     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
+  else:
+     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
+  # This list will hold the images file names
+  images = []
+
+  # Build the image list by merging the glob results (a list of files)
+  # for each extension. We are taking images from current folder.
+  for extension in image_extensions:
+      images.extend(glob.glob(extension))
+  #sys.exit(images)
+  # Create instance of FPDF class
+  pdf = FPDF('P', 'in', 'A4')
+  # Add new page. Without this you cannot create the document.
+  pdf.add_page()
+  # Set font to Arial, 'B'old, 16 pts
+  pdf.set_font('Arial', 'B', 20.0)
+
+  # Page header
+  pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C')
+  pdf.ln(0.7)
+  pdf.set_font('Arial','', 16.0)
+  pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C')
+
+  # Smaller font for image captions
+  pdf.set_font('Arial', '', 11.0)
+
+  # Image caption
+  pdf.ln(0.5)
+
+  yh=FPDF.get_y(pdf)
+  pdf.image(images[0],x=0.3,w=4, h=3)
+  pdf.image(images[1],x=4,y=yh, w=4, h=3)
+  pdf.ln(0.3)
+
+  # Image caption
+  pdf.cell(0.2)
+  pdf.cell(3.0, 0.0, "  Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)")
+  pdf.ln(0.2)
+  pdf.cell(0.2)
+  pdf.cell(3.0, 0.0, "  and Treated (right) groups")
+
+
+  pdf.ln(0.5)
+  h1=FPDF.get_y(pdf)
+  pdf.image(images[2],x=1, w=6.5, h=5)
+  h2=FPDF.get_y(pdf)
+  FPDF.set_y(pdf,h1+0.2)
+  pdf.set_font('Arial','', 14.0)
+  pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C')
+  pdf.set_font('Arial', '', 11.0)
+  FPDF.set_y(pdf,h2)
+  FPDF.set_y(pdf,9.5)
+  # Image caption
+  pdf.cell(0.2)
+  if analysis=="2":
+     pdf.cell(3.0, 0.0, "  Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads")
+     pdf.ln(0.2)
+     pdf.cell(0.2)
+     pdf.cell(3.0, 0.0, "  in Control (left) and treated (right) groups")
+  else:
+     pdf.cell(3.0, 0.0, "  Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in")
+     pdf.ln(0.2)
+     pdf.cell(0.2)
+     pdf.cell(3.0, 0.0, "  Control (left) and treated (right) groups")
+
+
+
+  pdf.add_page()
+  pdf.set_font('Arial', 'B', 16.0)
+  pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C')
+  pdf.ln(0.7)
+  pdf.set_font('Arial', 'B', 12.0)
+  pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C')
+  pdf.ln(0.5)
+  pdf.image(images[3],x=1.5, w=5.5, h=4)
+  pdf.ln(0.6)
+  pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C')
+  pdf.set_font('Arial', '', 12.0)
+  pdf.ln(0.2)
+  pdf.image(images[4],x=1.5, w=5.5, h=4)
+  pdf.ln(0.3)
+  pdf.set_font('Arial', '', 11.0)
+  pdf.cell(0.2)
+  pdf.cell(3.0, 0.0, "  * IsomiRs potentialy initiated from multiple loci")
+
+
+  if analysis=="2":
+     pdf.add_page('L')
+
+     pdf.set_font('Arial', 'B', 16.0)
+     pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C')
+     pdf.ln(0.5)
+     pdf.set_font('Arial', 'B', 12.0)
+     pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C')
+     pdf.ln(0.7)
+
+     yh=FPDF.get_y(pdf)
+     pdf.image(images[5],x=1.5,w=3.65, h=2.65)
+     pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
+     pdf.ln(0.5)
+     yh=FPDF.get_y(pdf)
+     pdf.image(images[6],x=1.5,w=3.65, h=2.65)
+     pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
+
+  pdf.close()
+  pdf.output('report1.pdf','F')
+
+
+
+
+#############################################################################################################################################################3
+
+def pdf_after_DE(analysis):
+
+  # Import FPDF class
+  from fpdf import FPDF
+
+  # Import glob module to find all the files matching a pattern
+  import glob
+
+  # Image extensions
+  image_extensions = ("tem.png","non.png","a2.png")
+
+  # This list will hold the images file names
+  images = []
+
+  # Build the image list by merging the glob results (a list of files)
+  # for each extension. We are taking images from current folder.
+  for extension in image_extensions:
+      images.extend(glob.glob(extension))
+
+  # Create instance of FPDF class
+  pdf = FPDF('P', 'in', 'letter')
+  # Add new page. Without this you cannot create the document.
+  pdf.add_page()
+  # Set font to Arial, 'B'old, 16 pts
+  pdf.set_font('Arial', 'B', 16.0)
+
+  # Page header
+  pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C')
+  #pdf.ln(0.25)
+
+  pdf.ln(0.7)
+  pdf.set_font('Arial','B', 12.0)
+  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C')
+
+
+  # Smaller font for image captions
+  pdf.set_font('Arial', '', 10.0)
+
+  # Image caption 
+  pdf.ln(0.4)
+  pdf.image(images[0],x=0.8, w=7, h=8)
+  pdf.ln(0.3)
+
+  if analysis=="2":
+
+     pdf.add_page()
+     pdf.ln(0.7)
+     pdf.set_font('Arial','B', 12.0)
+     pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C')
+     pdf.ln(0.4)
+     pdf.image(images[1],x=0.5, w=7.5, h=6.5)
+
+  pdf.add_page()
+  pdf.ln(0.5)
+  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C')
+  pdf.ln(0.4)
+  pdf.image(images[2],x=0.8, w=7, h=8)
+  pdf.ln(0.3)
+
+
+  pdf.output('report2.pdf', 'F')
+
+
+