changeset 2:c607e8513b5b draft

Uploaded
author glogobyte
date Fri, 16 Oct 2020 12:16:37 +0000
parents 561b0abcae87
children 67f6938b3c42
files mirbase_ultra_v2.py
diffstat 1 files changed, 1723 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirbase_ultra_v2.py	Fri Oct 16 12:16:37 2020 +0000
@@ -0,0 +1,1723 @@
+from mirbase_functions import *
+from mirbase_graphs import *
+import itertools
+import time
+import sys
+import os
+import urllib.request
+import gzip
+from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
+import subprocess
+import argparse
+from collections import OrderedDict
+from matplotlib.backends.backend_pdf import PdfPages
+import pandas as pd
+from math import pi
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.ticker import PercentFormatter
+import seaborn as sns
+import scipy.stats as stats
+from plotnine import *
+import math
+import re
+import matplotlib.ticker as mtick
+import copy
+
+subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
+parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
+parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
+parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
+parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
+parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
+parser.add_argument("-f", "--flag", help="choose the database", action="store")
+parser.add_argument("-umis", "--umi", help="choose the database", action="store")
+parser.add_argument("-percentage", "--per", help="choose the database", action="store")
+parser.add_argument("-counts", "--count", help="choose the database", action="store")
+parser.add_argument("-name1", "--n1", help="choose the database", action="store")
+parser.add_argument("-name2", "--n2", help="choose the database", action="store")
+args = parser.parse_args()
+
+#########################################################################################################################################
+
+def collapse_sam(path):
+
+    ini_sam=read(path,0)
+    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
+    intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in  x.split("\t")[0]]
+
+    uni_seq = []
+    for x in main_sam:
+
+        if [x[2], x[9]] not in uni_seq:
+            uni_seq.append([x[2], x[9]])
+
+    new_main_sam=[]
+    incr_num=0
+    for i in range(len(uni_seq)):
+        count=0
+        incr_num+=1
+        for y in main_sam:
+            if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
+               count+=1
+               temp=y
+        temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+        temp[0]=str(incr_num)+"-"+str(count)
+        new_main_sam.append(temp)
+
+    new_sam=intro_sam+new_main_sam
+
+    return new_sam
+
+#################################################################################################################################################################################################################
+
+def duplicate_chroms_isoforms(List):
+
+ dupes=[]
+
+ for num in range(len(List)):
+
+    if  [List[num][9],List[num][0],List[num][2]] not in dupes :
+        dupes.append([List[num][9],List[num][0],List[num][2]])
+
+ for x in List:
+     for y in dupes:
+         if x[9]==y[0] and x[0]==y[1] and x[2].split("_")[0]==y[2].split("_")[0] and x[2]!=y[2]:
+            y.append(x[2])
+
+
+ double_List = [x[:] for x in List]
+
+ chr_order=[]
+ for x in dupes:
+     temp = []
+     for i in range(2,len(x)):
+         if x[i].split("chr")[1].split("(")[0].isdigit():
+            temp.append(int(x[i].split("chr")[1].split("(")[1][0]+x[i].split("chr")[1].split("(")[0]))
+         else:
+            temp.append(x[i].split("chr")[1][0:4])
+
+     for z in temp:
+         if 'X(-)'==z or 'Y(-)'==z or 'X(+)'==z or 'Y(+)'==z:
+             temp = [str(j) for j in temp]
+     temp=list(set(temp))
+     temp.sort()
+     chr_order.append(temp)
+
+ final_dupes=[]
+ for i in range(len(dupes)):
+     final_dupes.append([dupes[i][0],dupes[i][2].split("_")[0],dupes[i][1]])
+     for x in chr_order[i]:
+         result = re.match("[-+]?\d+$", str(x))
+         if len(chr_order[i]) == len(set(chr_order[i])):
+           if result is not None:
+
+             if int(x)<0:
+                final_dupes[i][1]=final_dupes[i][1]+"_chr"+str(abs(int(x)))+"(-)"
+             else:
+                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(abs(int(x)))+"(+)"
+           else:
+                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(x)
+         else:
+             if result is not None:
+                 if int(x) < 0:
+                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(-)"
+                 else:
+                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(+)"
+             else:
+                 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(x)
+
+ final_dupes.sort()
+ final_dupes=list(final_dupes for final_dupes,_ in itertools.groupby(final_dupes))
+
+ for i in range(len(double_List)):
+     for x in final_dupes:
+
+         if double_List[i][9] == x[0] and double_List[i][0] == x[2] and len(double_List[i][2].split("_")) >3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
+            gg=str("_"+double_List[i][2].split("_")[-2]+"_"+double_List[i][2].split("_")[-1])
+            double_List[i][2] = x[1]+gg
+
+         if double_List[i][9]==x[0] and double_List[i][0]== x[2] and len(double_List[i][2].split("_"))==3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
+            double_List[i][2]=x[1]
+            List[i][2] = x[1]
+
+ List.sort()
+ new_list=list(List for List,_ in itertools.groupby(List))
+
+ double_List.sort()
+ new_double_List = list(double_List for double_List, _ in itertools.groupby(double_List))
+
+ return new_list, new_double_List
+
+
+#############################################################################################################################################################################################################
+
+def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):
+
+    # read the sam file
+    ini_sam=read(path,0)
+    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
+    unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
+
+    sorted_uni_arms = []
+
+    for i in range(len(mature_mirnas)):
+        tmp_count_reads = 0   # calculate the total number of reads
+        tmp_count_seq = 0     # calculate the total number of sequences
+        for j in range(len(unique_seq)):
+
+         if "{" in unique_seq[j][2].split("_")[0]:
+             official=unique_seq[j][2].split("_")[0][:-4]
+         else:
+             official=unique_seq[j][2].split("_")[0]
+
+         if mature_mirnas[i].split(" ")[0][1:] == official:
+
+                temp_mature = mature_mirnas[i+1].strip().replace("U", "T")
+                off_part = longestSubstring(temp_mature, unique_seq[j][9])
+
+                mat_diff = temp_mature.split(off_part)
+                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
+
+                unique_diff = unique_seq[j][9].split(off_part)
+                unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
+
+                # Problem with hsa-miR-8485
+                if mat_diff[1]!=0 and unique_diff[1]!=0:
+                    unique_seq[j]=1
+                    pre_pos = 0
+                    post_pos = 0
+
+                elif mat_diff[0]!=0 and unique_diff[0]!=0:
+                    unique_seq[j]=1
+                    pre_pos = 0
+                    post_pos = 0
+
+                else:
+                   pre_pos = mat_diff[0]-unique_diff[0]
+                   post_pos = unique_diff[1]-mat_diff[1]
+                   tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
+                   tmp_count_seq = tmp_count_seq+1
+
+                if pre_pos != 0 or post_pos != 0:
+                    if pre_pos == 0:
+                        unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
+                    elif post_pos == 0:
+                        unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
+                    else:
+                        unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
+
+        for x in range(unique_seq.count(1)):
+           unique_seq.remove(1)
+        if tmp_count_reads != 0 and tmp_count_seq != 0:
+           sorted_uni_arms.append([mature_mirnas[i].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
+    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
+    dedup_unique_seq,double_fil_uni_seq=duplicate_chroms_isoforms(unique_seq)
+
+    for y in sorted_uni_arms:
+       counts=0
+       seqs=0
+       for x in double_fil_uni_seq:
+           if y[0]==x[2].split("_")[0]:
+              counts+=int(x[0].split("-")[1])
+              seqs+=1
+
+       y[1]=seqs
+       y[2]=counts
+
+    LHE=[]
+    l.acquire()
+    if con=="c":
+       LHE.extend(z[2] for z in double_fil_uni_seq)
+       for y in double_fil_uni_seq:
+           samples_mirna_names.append([y[2],y[9]])
+       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
+       LHE_names.extend(LHE)
+       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
+       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
+       names.append(name)
+       samples.append(dedup_unique_seq)
+       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
+       ini_sample.append(new_main_sam)
+
+    if con=="t":
+       LHE.extend(z[2] for z in double_fil_uni_seq)
+       for y in double_fil_uni_seq:
+           samples_mirna_names.append([y[2],y[9]])
+       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
+       LHE_names.extend(LHE)
+       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
+       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
+       names.append(name)
+       samples.append(dedup_unique_seq)
+       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
+       ini_sample.append(new_main_sam)
+    l.release()
+
+
+######################################################################################################################################
+"""
+
+Read a sam file from Bowtie and do the followings:
+
+1) Remove reverse stranded mapped reads
+2) Remove unmapped reads 
+3) Remove all sequences with reads less than 11 reads
+4) Sort the arms with the most sequences in decreading rate
+5) Sort the sequences of every arm with the most reads in decreasing rate
+6) Calculate total number of sequences of every arm
+7) Calculate total number of reads of sequences of every arm.
+8) Store all the informations in a txt file 
+
+"""
+
+def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):
+
+    ini_sam=read(path,0)
+    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
+    unique_seq=[]
+    unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
+
+    uni_seq=[]
+    # Calculate the shifted positions for every isomir and add them to the name of it
+    sorted_uni_arms = []
+    for i in range(1,len(mature_mirnas),2):
+        tmp_count_reads = 0   # calculate the total number of reads
+        tmp_count_seq = 0     # calculate the total number of sequences
+
+        for j in range(len(unique_seq)):
+
+            temp_mature = mature_mirnas[i].strip().replace("U", "T")
+
+            if temp_mature in unique_seq[j][9]:
+
+                off_part = longestSubstring(temp_mature, unique_seq[j][9])
+
+                mat_diff = temp_mature.split(off_part)
+                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
+
+                unique_diff = unique_seq[j][9].split(off_part)
+                if len(unique_diff)<=2:
+                   unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
+
+                   pre_pos = mat_diff[0]-unique_diff[0]
+                   post_pos = unique_diff[1]-mat_diff[1]
+
+                   lengthofmir = len(off_part) + post_pos
+                   if pre_pos == 0:
+                      tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
+                      tmp_count_seq = tmp_count_seq + 1
+
+                      if pre_pos == 0:
+
+                          t_name=unique_seq[j].copy()
+                          t_name[2]=mature_mirnas[i - 1].split(" ")[0][1:] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
+                          uni_seq.append(t_name)
+
+
+        if tmp_count_reads != 0 and tmp_count_seq != 0:
+            sorted_uni_arms.append([mature_mirnas[i-1].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
+
+
+    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
+    unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
+
+    LHE=[]
+
+    l.acquire()
+    if con=="c":
+       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
+       for x in unique_seq:
+           if x[2]!="*":
+              n_samples_mirna_names.append([x[2],x[9]])
+       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
+       n_LHE_names.extend(LHE)
+       names.append(name)
+       data.append([con,name,unique_seq,sorted_uni_arms])
+
+
+    if con=="t":
+       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
+       for x in unique_seq:
+           if x[2]!="*":
+              n_samples_mirna_names.append([x[2],x[9]])
+       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
+       n_LHE_names.extend(LHE)
+       names.append(name)
+       data.append([con,name,unique_seq,sorted_uni_arms])
+    l.release()
+
+#####################################################################################################################################################################################################################
+def deseq2_temp(samples_mirna_names,deseq,con,l):
+
+    samples_mirna_names.sort(key=lambda x:[0])
+    for i in range(len(deseq)):
+        for y in samples_mirna_names:
+            flag = 0
+            for x in deseq[i]:
+                if y[0] == x[0]:
+                    flag = 1
+                    break
+
+            if flag == 0:
+                deseq[i].append([y[0], "0", y[1]])
+
+    [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
+    deseq_final = [[x[0],x[2]] for x in deseq[0]]
+    [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]
+
+    l.acquire()
+    if con=="c":
+       q1.put(deseq_final)
+
+    if con=="t":
+       q2.put(deseq_final)
+    l.release()
+
+
+####################################################################################################################################################################################################################
+
+def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E):
+
+ LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
+ LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]
+
+ LH8E_add_names.sort()
+ LH2E_add_names.sort()
+ LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
+ LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))
+
+ LH2E.sort()
+ LH8E.sort()
+ LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
+ LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))
+
+ print("LHE_names")
+ print([len(LH8E_add_names),len(LH2E_add_names)])
+ print([len(LH8E),len(LH2E)])
+
+ zeros=["0"]*(len(LH8E[0])-2)
+ [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
+ LH8E=LH8E+LH8E_add_names
+
+ zeros=["0"]*(len(LH2E[0])-2)
+ [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
+ LH2E=LH2E+LH2E_add_names
+
+ dupes=[]
+ final_LH2E =[]
+
+ for num,_ in enumerate(LH2E):
+
+    if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
+        final_LH2E.append(LH2E[num][1])
+        final_LH2E.append(LH2E[num][0])
+    else:
+        dupes.append(LH2E[num][1])
+
+
+ dupes=list(set(dupes))
+
+ dupes=[[x] for x in dupes]
+
+ for x in LH2E:
+     for y in dupes:
+         if x[1]==y[0]:
+             fl=0
+             if len(y)==1:
+                 y.append(x[0])
+             else:
+                 for i in range(1,len(y)):
+                     if y[i].split("_")[0]==x[0].split("_")[0]:
+                        fl=1
+                        if len(x[0])<len(y[i]):
+                           del y[i]
+                           y.append(x[0])
+                           break
+
+                 if fl==0:
+                    y.append((x[0]))
+
+ for y in dupes:
+    if len(y)>2:
+        for i in range(len(y)-1,1,-1):
+            y[1]=y[1]+"/"+y[i]
+            del y[i]
+
+ for x in LH2E:
+     for y in dupes:
+         if x[1]==y[0]:
+            x[0]=y[1]
+
+ for x in LH8E:
+    for y in dupes:
+        if x[1]==y[0]:
+           x[0]=y[1]
+
+
+ LH2E.sort()
+ LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))
+
+ LH8E.sort()
+ LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))
+
+ LH8E_new=[]
+ LH2E_new=[]
+
+ if int(args.per)!=-1:
+    percent=int(args.per)/100
+    print(percent)
+    print(args.count)
+
+    c_col_filter=round(percent*(len(LH2E[1])-2))
+    t_col_filter=round(percent*(len(LH8E[1])-2))
+
+    for i, _ in enumerate(LH2E):
+        c_cols=0
+        t_cols=0
+
+        c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(args.count)])
+        t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(args.count)])
+
+        if c_cols>=c_col_filter or t_cols>=t_col_filter:
+           LH8E_new.append(LH8E[i])
+           LH2E_new.append(LH2E[i])
+
+ filter_LH2E.extend(LH2E_new)
+ filter_LH8E.extend(LH8E_new)
+ raw_LH2E.extend(LH2E)
+ raw_LH8E.extend(LH8E)
+
+##################################################################################################################################################################################################################
+
+def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag):
+
+ if flag == 1 and int(args.per)!=-1:
+    fp = open('Counts/Filtered '+args.n2 +' Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_tre:
+       fp.write("\t"+y)
+
+    for x in fil_LH8E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+    fp = open('Counts/Filtered '+args.n1+' Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_con:
+       fp.write("\t"+y)
+
+    for x in fil_LH2E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+
+ if flag == 2 and int(args.per)!=-1:
+    fp = open('Counts/Filtered '+args.n2+' Non-Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_tre:
+       fp.write("\t"+y)
+
+
+    for x in fil_LH8E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+    fp = open('Counts/Filtered '+args.n1+' Non-Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_con:
+       fp.write("\t"+y)
+
+    for x in fil_LH2E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+
+ if flag == 1:
+    fp = open('Counts/Raw '+args.n2+' Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_tre:
+       fp.write("\t"+y)
+
+    for x in raw_LH8E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+    fp = open('Counts/Raw '+args.n1+' Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_con:
+       fp.write("\t"+y)
+
+    for x in raw_LH2E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+
+ if flag == 2:
+    fp = open('Counts/Raw '+args.n2+' Non-Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_tre:
+       fp.write("\t"+y)
+
+
+    for x in raw_LH8E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+    fp = open('Counts/Raw '+args.n1+' Non-Templated Counts', 'w')
+    fp.write("Name\t")
+    fp.write("Sequence")
+    for y in names_con:
+       fp.write("\t"+y)
+
+    for x in raw_LH2E:
+        fp.write("\n%s" % "\t".join(x))
+    fp.close()
+
+
+#########################################################################################################################################
+
+def ssamples(names,samp,folder,pro):
+
+    for i in range(2,len(samp[0])):
+
+       fp = open(folder+names[i-2]+'.txt','w')
+       fp.write("miRNA id"+"\t"+names[i-2]+"\n")
+
+       for x in samp:
+           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
+       fp.close()
+
+##################################################################################################################
+
+def DB_write(con,name,unique_seq,sorted_uni_arms,f):
+
+ if f==1:
+    # Write a txt file with all the information
+    if con=="c":
+       fp = open('split1/'+name, 'w')
+
+       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
+    if con=="t":
+       fp = open('split2/'+name, 'w') 
+       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
+
+
+    for i in range(len(sorted_uni_arms)):
+        temp = []
+        for j in range(len(unique_seq)):
+
+            if sorted_uni_arms[i][0] in unique_seq[j][2].split("_")[0]:
+
+                temp.append(unique_seq[j])
+
+        temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
+        fp.write("*********************************************************************************************************\n")
+        fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
+        fp.write("*********************************************************************************************************\n\n")
+        [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
+        fp.write("\n" + "\n")
+    fp.close()
+
+ if f==2:
+
+    if con=="c":
+       fp = open('split3/'+name, 'w')
+       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
+    if con=="t":
+       fp = open('split4/'+name, 'w')
+       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
+
+
+    for i in range(len(sorted_uni_arms)):
+        temp = []
+        for j in range(len(unique_seq)):
+               if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
+                  temp.append(unique_seq[j])
+        if temp!=[]:
+           temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
+           fp.write("*********************************************************************************************************\n")
+           fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
+           fp.write("*********************************************************************************************************\n\n")
+           [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
+           fp.write("\n" + "\n")
+    fp.close()
+
+
+##########################################################################################################################
+
+def new_mat_seq(pre_unique_seq,mat_mirnas,l):
+
+    unique_iso = []
+    for x in pre_unique_seq:
+       if len(x[2].split("_"))==3:
+          for y in pre_unique_seq:
+              if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
+                 if any(y[2] in lst2 for lst2 in unique_iso)==False:
+                    y[2]=">"+y[2]
+                    unique_iso.append(y)
+    l.acquire()
+    for x in unique_iso:
+        mat_mirnas.append(x[2])
+        mat_mirnas.append(x[9])
+    l.release()
+
+#########################################################################################################################
+def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
+    c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
+    t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
+
+    c_templ = 0
+    c_tem_counts = 0
+    c_mature = 0
+    c_mat_counts = 0
+    t_templ = 0
+    t_tem_counts = 0
+    t_mature = 0
+    t_mat_counts = 0
+
+    c_non = len(c_non_samples)
+    c_non_counts = sum(x[2] for x in c_non_samples)
+    t_non = len(t_non_samples)
+    t_non_counts = sum(x[2] for x in t_non_samples)
+
+    c_unmap = c_unmap - c_non
+    t_unmap = c_unmap - t_non
+
+    c_unmap_counts=c_unmap_counts - c_non_counts
+    t_unmap_counts=t_unmap_counts - t_non_counts
+
+
+    for x in c_samples:
+
+       	if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               c_mature+=1
+               c_mat_counts += x[2]
+            else:
+               c_templ+=1
+               c_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  c_mature+=1
+                  c_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              c_templ+=1
+              c_tem_counts += x[2]
+
+    for x in t_samples:
+
+        if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               t_mature+=1
+               t_mat_counts += x[2]
+            else:
+               t_templ+=1
+               t_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  t_mature+=1
+                  t_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              t_templ+=1
+              t_tem_counts += x[2]
+
+    fig = plt.figure(figsize=(7,5))
+    labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
+    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
+    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
+    ax1 = plt.subplot2grid((1,2),(0,0))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Control Group (reads)',fontsize=12)
+    labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
+    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
+    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
+    ax2 = plt.subplot2grid((1,2),(0,1))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Treated Group (reads)', fontsize=12)
+    plt.savefig('pie_non.png',dpi=300)
+
+######################################################################################################################################################
+
+def merging_names(LH2E_copy,new):
+
+    dupes=[]
+    final_LH2E =[]
+
+    for num in range(len(LH2E_copy)):
+
+        if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
+           final_LH2E.append(LH2E_copy[num][1])
+           final_LH2E.append(LH2E_copy[num][0])
+        else:
+           dupes.append(LH2E_copy[num][1])
+
+    dupes=list(set(dupes))
+
+    for i in range(len(dupes)):
+        dupes[i]=[dupes[i]]
+
+    for x in LH2E_copy:
+        for y in dupes:
+            if x[1]==y[0]:
+               fl=0
+               if len(y)==1:
+                  y.append(x[0])
+               else:
+                  for i in range(1,len(y)):
+                      if y[i].split("_")[0]==x[0].split("_")[0]:
+                         fl=1
+                         if len(x[0])<len(y[i]):
+                            del y[i]
+                            y.append(x[0])
+                            break
+
+                  if fl==0:
+                     y.append((x[0]))
+
+    for y in dupes:
+        if len(y)>2:
+           for i in range(len(y)-1,1,-1):
+               y[1]=y[1]+"/"+y[i]
+               del y[i]
+
+
+    for x in LH2E_copy:
+        for y in dupes:
+            if x[1]==y[0]:
+               x[0]=y[1]
+
+    LH2E_copy.sort()
+    LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
+
+    new.extend(LH2E_copy)
+
+
+######################################################################################################################################################
+def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
+
+    c_templ = 0
+    c_tem_counts = 0
+    c_mature = 0
+    c_mat_counts = 0
+    t_templ = 0
+    t_tem_counts = 0
+    t_mature = 0
+    t_mat_counts = 0
+
+    for x in c_samples:
+
+        if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               c_mature+=1
+               c_mat_counts += x[2]
+            else:
+               c_templ+=1
+               c_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  c_mature+=1
+                  c_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              c_templ+=1
+              c_tem_counts += x[2]
+
+    for x in t_samples:
+
+        if "/" not in x[0]:
+            if "chr" in  x[0].split("_")[-1]:
+               t_mature+=1
+               t_mat_counts += x[2]
+            else:
+               t_templ+=1
+               t_tem_counts += x[2]
+        else:
+           f=0
+           for y in x[0].split("/"):
+               if "chr" in y.split("_")[-1]:
+                  t_mature+=1
+                  t_mat_counts += x[2]
+                  f=1
+                  break
+           if f==0:
+              t_templ+=1
+              t_tem_counts += x[2]
+
+
+    fig = plt.figure()
+    labels = 'miRNA RefSeq','Template', 'Unmapped'
+    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
+    colors = ['gold', 'yellowgreen', 'lightskyblue']
+    explode = (0.2, 0.05, 0.1)
+    ax1 = plt.subplot2grid((1,2),(0,0))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Control group (reads)', fontsize=12)
+    labels = 'miRNA RefSeq','Template', 'Unmapped'
+    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
+    colors = ['gold', 'yellowgreen', 'lightskyblue']
+    explode = (0.2, 0.05, 0.1)
+    ax2 = plt.subplot2grid((1,2),(0,1))
+    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
+    [x.set_fontsize(8) for x in texts]
+    plt.title('Treated group (reads)',fontsize = 12)
+    plt.savefig('pie_tem.png',dpi=300)
+
+###################################################################################################################################################################################################################
+
+def make_spider(merge_LH2E,merge_LH8E):
+
+  c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+  t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
+
+  c_5 = 0
+  c_5_counts = 0
+  c_3 = 0
+  c_3_counts = 0
+  c_both =0
+  c_both_counts=0
+  c_mature = 0
+  c_mat_counts = 0
+  c_exception=0
+  c_exception_counts=0
+
+
+  t_5 = 0
+  t_5_counts = 0
+  t_3 = 0
+  t_3_counts = 0
+  t_both = 0
+  t_both_counts = 0
+  t_mature = 0
+  t_mat_counts = 0
+  t_exception = 0
+  t_exception_counts=0
+
+  for x in c_samples:
+
+      if "/" not in x[0]:
+          if "chr" in  x[0].split("_")[-1]:
+             c_mature+=1
+             c_mat_counts += x[2]
+          elif 0 == int(x[0].split("_")[-1]):
+             c_5+=1
+             c_5_counts += x[2]
+          elif 0 == int(x[0].split("_")[-2]):
+             c_3+=1
+             c_3_counts += x[2]
+          else:
+             c_both+=1
+             c_both_counts+=x[2]
+
+      else:
+         f=0
+         for y in x[0].split("/"):
+             if "chr" in y.split("_")[-1]:
+                c_mature+=1
+                c_mat_counts += x[2]
+                f=1
+                break
+         if f==0:
+            for y in x[0].split("/"):
+                c_exception+=1
+                c_exception_counts += x[2]
+
+
+  for x in t_samples:
+
+      if "/" not in x[0]:
+          if "chr" in  x[0].split("_")[-1]:
+             t_mature+=1
+             t_mat_counts += x[2]
+          elif 0 == int(x[0].split("_")[-1]):
+             t_5+=1
+             t_5_counts += x[2]
+          elif 0 == int(x[0].split("_")[-2]):
+             t_3+=1
+             t_3_counts += x[2]
+          else:
+             t_both+=1
+             t_both_counts+=x[2]
+
+      else:
+         f=0
+         for y in x[0].split("/"):
+             if "chr" in y.split("_")[-1]:
+                t_mature+=1
+                t_mat_counts += x[2]
+                f=1
+                break
+         if f==0:
+            for y in x[0].split("/"):
+                t_exception+=1
+                t_exception_counts += x[2]
+
+
+  c_all = c_5+c_3+c_both+c_mature+c_exception
+  c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
+
+  t_all = t_5+t_3+t_both+t_mature + t_exception
+  t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
+
+  c_5 = round(c_5/c_all*100,2)
+  c_3 = round(c_3/c_all*100,2)
+  c_both = round(c_both/c_all*100,2)
+  c_mature = round(c_mature/c_all*100,2)
+  c_exception = round(c_exception/c_all*100,2)
+
+  c_5_counts = round(c_5_counts/c_all_counts*100,2)
+  c_3_counts = round(c_3_counts/c_all_counts*100,2)
+  c_both_counts = round(c_both_counts/c_all_counts*100,2)
+  c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
+  c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
+
+  t_5 = round(t_5/t_all*100,2)
+  t_3 = round(t_3/t_all*100,2)
+  t_both = round(t_both/t_all*100,2)
+  t_mature = round(t_mature/t_all*100,2)
+  t_exception = round(t_exception/t_all*100,2)
+
+  t_5_counts = round(t_5_counts/t_all_counts*100,2)
+  t_3_counts = round(t_3_counts/t_all_counts*100,2)
+  t_both_counts = round(t_both_counts/t_all_counts*100,2)
+  t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
+  t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
+
+  radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
+  radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
+
+  df=pd.DataFrame({
+  'group':['Controls','Treated'],
+  """5' and 3' isomiRs""":[c_both,t_both],
+  """3' isomiRs""":[c_3,t_3],
+  'miRNA RefSeq':[c_mature,t_mature],
+  """5' isomiRs""":[c_5,t_5],
+  'Others*':[c_exception,t_exception]})
+
+  df1=pd.DataFrame({
+  'group':['Controls','Treated'],
+  """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
+  """3' isomiRs""":[c_3_counts,t_3_counts],
+  'miRNA RefSeq':[c_mat_counts,t_mat_counts],
+  """5' isomiRs""":[c_5_counts,t_5_counts],
+  'Others*':[c_exception_counts,t_exception_counts]})
+
+  spider_last(df,radar_max,1)
+  spider_last(df1,radar_max_counts,2)
+
+
+def spider_last(df,radar_max,flag):
+  # ------- PART 1: Create background
+  fig = plt.figure()
+  # number of variable
+  categories=list(df)[1:]
+  N = len(categories)
+
+  # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
+  angles = [n / float(N) * 2 * pi for n in range(N)]
+  angles += angles[:1]
+
+  # Initialise the spider plot
+  ax = plt.subplot(111, polar=True)
+
+  # If you want the first axis to be on top:
+  ax.set_theta_offset(pi/2)
+  ax.set_theta_direction(-1)
+
+  # Draw one axe per variable + add labels labels yet
+  plt.xticks(angles[:-1], categories, fontsize=11)
+
+  # Draw ylabels
+  radar_max=round(radar_max+radar_max*0.1)
+  mul=len(str(radar_max))-1
+  maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
+  sep = round(maxi/4)
+  plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
+  plt.ylim(0, maxi)
+
+  # ------- PART 2: Add plots
+
+  # Plot each individual = each line of the data
+  # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
+
+  # Ind1
+  values=df.loc[0].drop('group').values.flatten().tolist()
+  values += values[:1]
+  ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
+  ax.fill(angles, values, 'b', alpha=0.1)
+
+  # Ind2
+  values=df.loc[1].drop('group').values.flatten().tolist()
+  values += values[:1]
+  ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
+  ax.fill(angles, values, 'r', alpha=0.1)
+
+  # Add legend
+  if flag==1:
+     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
+     plt.savefig('spider_non_red.png',dpi=300)
+  else:
+     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
+     plt.savefig('spider_red.png',dpi=300)
+
+
+#############################################################################################################################################################################################################
+def hist_red(samples,flag):
+    lengths=[]
+    cat=[]
+    total_reads=0
+    seq=[]
+
+    if flag == "c":
+        title = "Length Distribution of Control group (Redudant reads)"
+    if flag == "t":
+        title = "Length Distribution of Treated group (Redudant reads)"
+
+    for i in samples:
+        for x in i:
+            lengths.append(len(x[9]))
+            if x[1]=="0":
+               seq.append([x[9],x[0].split("-")[1],"Mapped"])
+               cat.append("Mapped")
+            if x[1] == "4":
+               seq.append([x[9],x[0].split("-")[1],"Unmapped"])
+               cat.append("Unmapped")
+
+    uni_len=list(set(lengths))
+    uni_len=[x for x in uni_len if x<=35]
+    low=min(lengths)
+    up=max(lengths)
+    seq.sort()
+    uni_seq=list(seq for seq,_ in itertools.groupby(seq))
+    dim=up-low
+
+    if dim>20:
+       s=5
+    else:
+       s=8
+
+    total_reads+=sum([int(x[1]) for x in uni_seq])
+
+    map_reads=[]
+    unmap_reads=[]
+    length=[]
+    for y in uni_len:
+        map_temp=0
+        unmap_temp=0
+        for x in uni_seq:
+            if len(x[0])==y and x[2]=="Mapped":
+               map_temp+=int(x[1])
+            if len(x[0])==y and x[2]=="Unmapped":
+               unmap_temp+=int(x[1])
+        if y<=35:
+           length.append(y)
+           map_reads.append(round(map_temp/total_reads*100,2))
+           unmap_reads.append(round(unmap_temp/total_reads*100,2))
+
+    ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
+    ylim=ylim+ylim*20/100
+    fig, ax = plt.subplots()
+    width=0.8
+    ax.bar(length, unmap_reads, width, label='Unmapped')
+    h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
+    plt.xticks(np.arange(length[0], length[-1]+1, 1))
+    plt.xlabel('Length (nt)',fontsize=14)
+    plt.ylabel('Percentage',fontsize=14)
+    plt.title(title,fontsize=14)
+    ax.legend()
+    plt.ylim([0, ylim])
+    ax.grid(axis='y',linewidth=0.2)
+
+    if flag=='c':
+       plt.savefig('c_hist_red.png',dpi=300)
+
+    if flag=='t':
+       plt.savefig('t_hist_red.png',dpi=300)
+
+#################################################################################################################
+
+
+def logo_seq_red(merge, flag):
+
+    if flag=="c":
+       titlos="Control group (Redundant)"
+       file_logo="c_logo.png"
+       file_bar="c_bar.png"
+    if flag=="t":
+       titlos="Treated group (Redundant)"
+       file_logo="t_logo.png"
+       file_bar="t_bar.png"
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
+
+    A=[0]*5
+    C=[0]*5
+    G=[0]*5
+    T=[0]*5
+    total_reads=0
+
+    for y in c_samples:
+        if "/" in y[0]:
+            length=[]
+            for x in y[0].split("/"):
+                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
+
+            best=min(length)
+            total_reads+=best[2]
+            for i in range(5):
+                if i<len(best[1]):
+                    if best[1][i] == "A":
+                        A[i]+=best[2]
+                    elif best[1][i] == "C":
+                        C[i]+=best[2]
+                    elif best[1][i] == "G":
+                        G[i]+=best[2]
+                    else:
+                        T[i]+=best[2]
+        else:
+            total_reads+=y[2]
+            for i in range(5):
+                if i<len(y[0].split("_")[-1]):
+                    if y[0].split("_")[-1][i] == "A":
+                        A[i]+=(y[2])
+                    elif y[0].split("_")[-1][i] == "C":
+                        C[i]+=(y[2])
+                    elif y[0].split("_")[-1][i] == "G":
+                        G[i]+=(y[2])
+                    else:
+                        T[i]+=y[2]
+
+    A[:] = [round(x*100,1) / total_reads for x in A]
+    C[:] = [round(x*100,1) / total_reads for x in C]
+    G[:] = [round(x*100,1) / total_reads for x in G]
+    T[:] = [round(x*100,1) / total_reads for x in T]
+
+
+
+    data = {'A':A,'C':C,'G':G,'T':T}
+    df = pd.DataFrame(data, index=[1,2,3,4,5])
+    h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
+    h.grid(axis='y',linewidth=0.2)
+    plt.xticks(rotation=0, ha="right")
+    plt.ylabel("Counts (%)",fontsize=18)
+    plt.xlabel("Positions (nt)",fontsize=18)
+    plt.title(titlos,fontsize=20)
+    plt.tight_layout()
+    plt.savefig(file_bar, dpi=300)
+
+    import logomaker as lm
+    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
+    crp_logo.style_spines(visible=False)
+    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
+    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
+
+    # style using Axes methods
+    crp_logo.ax.set_title(titlos,fontsize=18)
+    crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
+    crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
+    crp_logo.ax.xaxis.set_ticks_position('none')
+    crp_logo.ax.xaxis.set_tick_params(pad=-1)
+    figure = plt.gcf()
+    figure.set_size_inches(6, 4)
+    crp_logo.fig.savefig(file_logo,dpi=300)
+
+##########################################################################################################################################################################################################
+
+
+
+def logo_seq_non_red(merge_LH2E):
+
+    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
+
+    A=[0]*5
+    C=[0]*5
+    G=[0]*5
+    T=[0]*5
+
+    for y in c_samples:
+        if "/" in y[0]:
+            length=[]
+            for x in y[0].split("/"):
+                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
+
+            best=min(length)
+            for i in range(5):
+                if i<len(best[1]):
+                    if best[1][i] == "A":
+                        A[i]+=1
+                    elif best[1][i] == "C":
+                        C[i]+=1
+                    elif best[1][i] == "G":
+                        G[i]+=1
+                    else:
+                        T[i]+=1
+        else:
+            for i in range(5):
+                if i<len(y[0].split("_")[-1]):
+                    if y[0].split("_")[-1][i] == "A":
+                        A[i]+=1
+                    elif y[0].split("_")[-1][i] == "C":
+                        C[i]+=1
+                    elif y[0].split("_")[-1][i] == "G":
+                        G[i]+=1
+                    else:
+                        T[i]+=1
+
+    data = {'A':A,'C':C,'G':G,'T':T}
+    df = pd.DataFrame(data, index=[1,2,3,4,5])
+    h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
+    h.set_xlabel("Positions (nt)")
+    h.set_ylabel("Unique sequences")
+    plt.xticks(rotation=0, ha="right")
+    plt.tight_layout()
+    plt.savefig("bar2.png", dpi=300)
+
+
+    import logomaker as lm
+    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
+
+    # style using Logo methods
+    crp_logo.style_spines(visible=False)
+    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
+    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
+
+    # style using Axes methods
+    crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
+    crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
+    crp_logo.ax.xaxis.set_ticks_position('none')
+    crp_logo.ax.xaxis.set_tick_params(pad=-1)
+    crp_logo.ax.set_title("Non-redundant")
+    figure = plt.gcf()
+    crp_logo.fig.savefig('logo2.png', dpi=300)
+
+
+###################################################################################################################################################################################################################
+
+def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):
+
+    for i in range(2,len(tem_samp[0])):
+
+       fp = open(folder+tem_names[i-2]+'.txt','w')
+       fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
+
+       for x in tem_samp:
+           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
+
+       for j in range(len(non_names)):
+           if non_names[j]==tem_names[i-2]:
+              for x in non_samp:
+                  fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
+       fp.close()
+
+###################################################################################################################################################################################################################
+
+def download_matures(matures,org_name):
+
+    #url = 'ftp://mirbase.org/pub/mirbase/21/mature.fa.gz'
+    url = 'ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz'
+    data = urllib.request.urlopen(url).read()
+    file_mirna = gzip.decompress(data).decode('utf-8')
+    file_mirna = file_mirna.split("\n")
+
+    for i in range(0,len(file_mirna)-1,2):
+
+        if org_name in file_mirna[i]:
+           matures.append(file_mirna[i])
+           matures.append(file_mirna[i+1])
+
+###################################################################################################################################################################################################################
+def non_template_ref(sc,st,all_isoforms):
+
+  pre_uni_seq_con = list(sc)
+  pre_uni_seq_tre = list(st)
+
+  for x in pre_uni_seq_con:
+      for y in x:
+          if ">"+y[2] not in all_isoforms and ")_" in y[2] :
+             all_isoforms.append(">"+y[2])
+             all_isoforms.append(y[9])
+
+
+  for x in pre_uni_seq_tre:
+      for y in x:
+          if ">"+y[2] not in all_isoforms and ")_" in y[2]:
+             all_isoforms.append(">"+y[2])
+             all_isoforms.append(y[9])
+
+################################################################################################################################################################################################
+
+def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):
+
+    for y in mir_names:
+        flag=0
+        for x in sample:
+            if y[0]==x[0]:
+               flag=1
+               break
+        if flag==0:
+           sample.append([y[0],"0",y[1]])
+
+    sample.sort(key=lambda x: x[0])
+    sample=list(sample for sample,_ in itertools.groupby(sample))
+
+    l.acquire()
+    new_d.append(sample)
+    sample_order.append(sample_name)
+    l.release()
+
+###############################################################################################################################################################################################
+
+if __name__ == '__main__':
+
+ starttime = time.time()
+
+ q1 = Queue()
+ q2 = Queue()
+ lock = Lock()
+ manager = Manager()
+
+ mature_mirnas=manager.list()
+ ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
+ ps_mature.start()
+
+ args.control[0]=args.control[0][1:]
+ args.control[len(args.control)-1][:-1]
+ control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
+
+ args.treated[0]=args.treated[0][1:]
+ args.treated[len(args.treated)-1][:-1]
+ treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
+
+
+##############  Detection of templated isoforms ################
+
+ radar = manager.list([0,0,0,0])
+ samples = manager.list()
+ data= manager.list()
+ names_con=manager.list()
+ samples_mirna_names=manager.list()
+ deseq=manager.list()
+ unmap_seq=manager.Value('i',0)
+ unmap_counts=manager.Value('i',0)
+ LH2E_names=manager.list()
+ ini_c_samples = manager.list()
+
+
+ radar1 = manager.list([0,0,0,0])
+ samples1 = manager.list()
+ data1 = manager.list()
+ names_tre = manager.list()
+ samples_mirna_names1=manager.list()
+ deseq1=manager.list()
+ unmap1_seq = manager.Value('i',0)
+ unmap1_counts = manager.Value('i',0)
+ LH8E_names=manager.list()
+ ini_t_samples = manager.list()
+ ps_mature.join()
+
+
+ mature_mirnas=list(mature_mirnas)
+
+
+ starttime1 = time.time()
+ ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
+ ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
+
+ [p.start() for p in ps_sam]
+ [p.join() for p in ps_sam]
+ print('SAM took {} seconds'.format(time.time() - starttime1))
+
+ ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
+ ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
+ [x.start() for x in ps_hist]
+
+ starttime200=time.time()
+
+ sc = list(samples)
+ st = list(samples1)
+
+ names_con=list(names_con)
+ names_tre=list(names_tre)
+ samples_mirna_names=list(samples_mirna_names)
+ samples_mirna_names.sort()
+ samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
+
+ samples_mirna_names1=list(samples_mirna_names1)
+ samples_mirna_names1.sort()
+ samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
+
+ deseq=list(deseq)
+ deseq1=list(deseq1)
+
+ new_names_con=manager.list()
+ new_names_tre=manager.list()
+ new_deseq=manager.list()
+ new_deseq1=manager.list()
+ ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
+ ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
+
+ [z.start() for z in ps_deseq]
+ [z.join() for z in ps_deseq]
+ new_deseq=list(new_deseq)
+ new_deseq1=list(new_deseq1)
+
+ LH2E=[[x[0],x[2]] for x in new_deseq[0]]
+ [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
+
+ LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
+ [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
+
+ print('Deseq took {} seconds'.format(time.time() - starttime200))
+
+ merg_nam_LH2E=manager.list()
+ merg_nam_LH8E=manager.list()
+
+ LH2E_copy=copy.deepcopy(list(LH2E))
+ LH8E_copy=copy.deepcopy(list(LH8E))
+
+ fil_sort_tre=manager.list()
+ fil_sort_con=manager.list()
+ raw_sort_tre=manager.list()
+ raw_sort_con=manager.list()
+
+ ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con))
+ ps_main.start()
+
+ if args.anal=="2":
+    all_iso = manager.list()
+    ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
+    ps_non_iso.start()
+
+ ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
+ ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
+ [x.start() for x in ps_merge]
+ [x.join() for x in ps_merge]
+
+ merg_nam_LH2E=list(merg_nam_LH2E)
+ merg_nam_LH8E=list(merg_nam_LH8E)
+
+ starttime2 = time.time()
+ procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
+ procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
+ procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
+ if args.anal == "1":
+    procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
+
+ [p.start() for p in procs]
+
+
+ if args.anal=="1":
+    [x.join() for x in ps_hist]
+    [p.join() for p in procs]
+    ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
+    ps_pdf.start()
+
+ print('Graphs took {} seconds'.format(time.time() - starttime2))
+
+ ps_main.join()
+
+ fil_sort_con=list(fil_sort_con)
+ fil_sort_tre=list(fil_sort_tre)
+ if fil_sort_con==[]:
+    fil_sort_con=raw_sort_con
+    fil_sort_tre=raw_sort_tre
+
+ raw_sort_con=list(raw_sort_con)
+ raw_sort_tre=list(raw_sort_tre)
+ names_con=list(new_names_con)
+ names_tre=list(new_names_tre)
+
+ ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1))
+ ps_write.start()
+
+ ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
+ ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
+ [p.start() for p in ps1_matrix]
+
+ if args.anal=="1":
+    ps_pdf.join()
+ if args.anal=="2":
+    [p.join() for p in procs]
+    [x.join() for x in ps_hist]
+
+ ps_write.join()
+ [p.join() for p in ps1_matrix]
+
+
+############################## Detection of Both  #######################################
+
+ starttime10 = time.time()
+
+ if args.anal == "2":
+
+  n_data= manager.list()
+  n_names_con=manager.list()
+  n_samples_mirna_names=manager.list()
+  n_deseq=manager.list()
+  n_LH2E_names=manager.list()
+
+  n_data1 = manager.list()
+  n_names_tre = manager.list()
+  n_samples_mirna_names1=manager.list()
+  n_deseq1=manager.list()
+  n_LH8E_names=manager.list()
+
+
+  new_mat_mirnas = list(mature_mirnas)
+  ps_non_iso.join()
+
+  all_iso=list(all_iso)
+  new_mat_mirnas.extend(all_iso)
+
+  starttime11=time.time()
+
+  ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
+  ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
+
+  [p.start() for p in ps_sam]
+  [p.join() for p in ps_sam]
+
+  print('Non-sam took {} seconds'.format(time.time() - starttime11))
+
+  starttime12=time.time()
+
+  n_names_con=list(n_names_con)
+  n_names_tre=list(n_names_tre)
+  n_samples_mirna_names=list(n_samples_mirna_names)
+  n_samples_mirna_names.sort()
+  n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
+
+  n_samples_mirna_names1=list(n_samples_mirna_names1)
+  n_samples_mirna_names1.sort()
+  n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
+
+  n_deseq=list(n_deseq)
+  n_deseq1=list(n_deseq1)
+
+  new_n_names_con=manager.list()
+  new_n_names_tre=manager.list()
+  n_new_deseq=manager.list()
+  n_new_deseq1=manager.list()
+  ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
+  ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
+
+  [x.start() for x in ps_deseq]
+  [x.join() for x in ps_deseq]
+  n_new_deseq=list(n_new_deseq)
+  n_new_deseq1=list(n_new_deseq1)
+
+  print([len(n_new_deseq[0]),len(n_new_deseq[1])])
+  print([len(n_new_deseq1[0]),len(n_new_deseq1[1])])
+
+  n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
+  [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
+
+  n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
+  [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
+
+  print('Non-deseq took {} seconds'.format(time.time() - starttime12))
+
+  merg_nam_n_LH2E=manager.list()
+  merg_nam_n_LH8E=manager.list()
+
+  n_LH2E_copy=copy.deepcopy(list(n_LH2E))
+  n_LH8E_copy=copy.deepcopy(list(n_LH8E))
+
+  n_sort_tre=manager.list()
+  n_sort_con=manager.list()
+
+  n_fil_sort_con=manager.list()
+  n_fil_sort_tre=manager.list()
+  n_raw_sort_con=manager.list()
+  n_raw_sort_tre=manager.list()
+
+  ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con))
+  ps_main.start()
+
+  ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
+  ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
+  [p.start() for p in ps_merge]
+  [p.join() for p in ps_merge]
+
+  merg_nam_n_LH2E=list(merg_nam_n_LH2E)
+  merg_nam_n_LH8E=list(merg_nam_n_LH8E)
+
+  procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
+  procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
+  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
+  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
+  procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
+
+  starttime13=time.time()
+  [p.start() for p in procs]
+  [p.join() for p in procs]
+
+  print('Graphs took {} seconds'.format(time.time() - starttime13))
+
+  procs1 = Process(target=pdf_before_DE,args=(args.anal))
+  procs1.start()
+
+  starttime14=time.time()
+  ps_main.join()
+
+  n_fil_sort_con=list(n_fil_sort_con)
+  n_fil_sort_tre=list(n_fil_sort_tre)
+  if n_fil_sort_con==[]:
+     n_fil_sort_con=n_raw_sort_con
+     n_fil_sort_tre=n_raw_sort_tre
+
+  n_raw_sort_con=list(n_raw_sort_con)
+  n_raw_sort_tre=list(n_raw_sort_tre)
+  n_names_con=list(new_n_names_con)
+  n_names_tre=list(new_n_names_tre)
+
+  ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2))
+  ps_write.start()
+
+  ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
+  ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
+  [p.start() for p in ps1_matrix]
+
+  ps_write.join()
+  [p.join() for p in ps1_matrix]
+  procs1.join()
+ print('That took {} seconds'.format(time.time() - starttime10))
+ print('That took {} seconds'.format(time.time() - starttime))
+
+
+
+
+
+
+
+