Mercurial > repos > glogobyte > viztool

--- a/mirbase_functions.py	Fri Oct 16 12:17:22 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,829 +0,0 @@
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-
-"""---------------------- Simple Functions -----------------------"""
-
-# Read a file and return it as a list
-def read(path, flag):
-    if flag == 0:
-        with open(path) as fp:
-            file=fp.readlines()
-        fp.close()
-        return file
-
-    if flag == 1:
-        with open(path) as fp:
-            file = fp.read().splitlines()
-        fp.close()
-        return file
-
-# Write a list to a txt file
-def write(path, list):
-    with open(path,'w') as fp:
-        for x in list:
-            fp.write(str("\t".join(x[1:-1])))
-    fp.close()
-
-"""---------------------- RNA-seq Functions ----------------------"""
-
-# Detect the longest common substring sequence between two mirnas
-def longestSubstring(str1, str2):
-
-    from difflib import SequenceMatcher
-    # initialize SequenceMatcher object with
-    # input string
-    seqMatch = SequenceMatcher(None, str1, str2)
-
-    # find match of longest sub-string
-    # output will be like Match(a=0, b=0, size=5)
-    match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))
-
-    # print longest substring
-    if (match.size != 0):
-        return str1[match.a: match.a + match.size]
-    else:
-        print('No longest common sub-string found')
-
-
-
-########################################################################################################################################################
-
-def collapse_sam(path):
-
-    ini_sam=read(path,0)
-    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in  x.split("\t")[0]]
-
-    uni_seq = []
-    for x in main_sam:
-
-        if [x[2], x[9]] not in uni_seq:
-            uni_seq.append([x[2], x[9]])
-
-    new_main_sam=[]
-    incr_num=0
-    for i in range(len(uni_seq)):
-        count=0
-        incr_num+=1
-        for y in main_sam:
-            if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
-               count+=1
-               temp=y
-        temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-        temp[0]=str(incr_num)+"-"+str(count)
-        new_main_sam.append(temp)
-
-    new_sam=intro_sam+new_main_sam
-
-    return new_sam
-
-#################################################################################################################################################################################################################
-
-def duplicate_chroms_isoforms(List):
-
- dupes=[]
-
- for num in range(len(List)):
-
-    if  [List[num][9],List[num][0],List[num][2]] not in dupes :
-        dupes.append([List[num][9],List[num][0],List[num][2]])
-
- for x in List:
-     for y in dupes:
-         if x[9]==y[0] and x[0]==y[1] and x[2].split("_")[0]==y[2].split("_")[0] and x[2]!=y[2]:
-            y.append(x[2])
-
-
- double_List = [x[:] for x in List]
-
- chr_order=[]
- for x in dupes:
-     temp = []
-     for i in range(2,len(x)):
-         if x[i].split("chr")[1].split("(")[0].isdigit():
-            temp.append(int(x[i].split("chr")[1].split("(")[1][0]+x[i].split("chr")[1].split("(")[0]))
-         else:
-            temp.append(x[i].split("chr")[1][0:4])
-
-     for z in temp:
-         if 'X(-)'==z or 'Y(-)'==z or 'X(+)'==z or 'Y(+)'==z:
-             temp = [str(j) for j in temp]
-     temp=list(set(temp))
-     temp.sort()
-     chr_order.append(temp)
-
- final_dupes=[]
- for i in range(len(dupes)):
-     final_dupes.append([dupes[i][0],dupes[i][2].split("_")[0],dupes[i][1]])
-     for x in chr_order[i]:
-         result = re.match("[-+]?\d+$", str(x))
-         if len(chr_order[i]) == len(set(chr_order[i])):
-           if result is not None:
-
-             if int(x)<0:
-                final_dupes[i][1]=final_dupes[i][1]+"_chr"+str(abs(int(x)))+"(-)"
-             else:
-                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(abs(int(x)))+"(+)"
-           else:
-                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(x)
-         else:
-             if result is not None:
-                 if int(x) < 0:
-                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(-)"
-                 else:
-                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(+)"
-             else:
-                 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(x)
-
- final_dupes.sort()
- final_dupes=list(final_dupes for final_dupes,_ in itertools.groupby(final_dupes))
-
- for i in range(len(double_List)):
-     for x in final_dupes:
-
-         if double_List[i][9] == x[0] and double_List[i][0] == x[2] and len(double_List[i][2].split("_")) >3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
-            gg=str("_"+double_List[i][2].split("_")[-2]+"_"+double_List[i][2].split("_")[-1])
-            double_List[i][2] = x[1]+gg
-
-         if double_List[i][9]==x[0] and double_List[i][0]== x[2] and len(double_List[i][2].split("_"))==3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
-            double_List[i][2]=x[1]
-            List[i][2] = x[1]
-
- List.sort()
- new_list=list(List for List,_ in itertools.groupby(List))
-
- double_List.sort()
- new_double_List = list(double_List for double_List, _ in itertools.groupby(double_List))
-
- return new_list, new_double_List
-
-
-#############################################################################################################################################################################################################
-
-def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):
-
-    # read the sam file
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
-
-    sorted_uni_arms = []
-
-    for i in range(len(mature_mirnas)):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-        for j in range(len(unique_seq)):
-
-         if "{" in unique_seq[j][2].split("_")[0]:
-             official=unique_seq[j][2].split("_")[0][:-4]
-         else:
-             official=unique_seq[j][2].split("_")[0]
-
-         if mature_mirnas[i].split(" ")[0][1:] == official:
-
-                temp_mature = mature_mirnas[i+1].strip().replace("U", "T")
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                # Problem with hsa-miR-8485
-                if mat_diff[1]!=0 and unique_diff[1]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                elif mat_diff[0]!=0 and unique_diff[0]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                else:
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-                   tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                   tmp_count_seq = tmp_count_seq+1
-
-                if pre_pos != 0 or post_pos != 0:
-                    if pre_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
-                    elif post_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
-                    else:
-                        unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
-
-        for x in range(unique_seq.count(1)):
-           unique_seq.remove(1)
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-           sorted_uni_arms.append([mature_mirnas[i].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    dedup_unique_seq,double_fil_uni_seq=duplicate_chroms_isoforms(unique_seq)
-
-    for y in sorted_uni_arms:
-       counts=0
-       seqs=0
-       for x in double_fil_uni_seq:
-           if y[0]==x[2].split("_")[0]:
-              counts+=int(x[0].split("-")[1])
-              seqs+=1
-
-       y[1]=seqs
-       y[2]=counts
-
-    LHE=[]
-    l.acquire()
-    if con=="c":
-       LHE.extend(z[2] for z in double_fil_uni_seq)
-       for y in double_fil_uni_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(dedup_unique_seq)
-       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-
-    if con=="t":
-       LHE.extend(z[2] for z in double_fil_uni_seq)
-       for y in double_fil_uni_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(dedup_unique_seq)
-       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-    l.release()
-
-
-######################################################################################################################################
-
-"""
-
-Read a sam file from Bowtie and do the followings:
-
-1) Remove reverse stranded mapped reads
-2) Remove unmapped reads
-3) Remove all sequences with reads less than 11 reads
-4) Sort the arms with the most sequences in decreading rate
-5) Sort the sequences of every arm with the most reads in decreasing rate
-6) Calculate total number of sequences of every arm
-7) Calculate total number of reads of sequences of every arm.
-8) Store all the informations in a txt file
-
-"""
-
-def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):
-
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq=[]
-    unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
-
-    uni_seq=[]
-    # Calculate the shifted positions for every isomir and add them to the name of it
-    sorted_uni_arms = []
-    for i in range(1,len(mature_mirnas),2):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-
-        for j in range(len(unique_seq)):
-
-            temp_mature = mature_mirnas[i].strip().replace("U", "T")
-
-            if temp_mature in unique_seq[j][9]:
-
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                if len(unique_diff)<=2:
-                   unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-
-                   lengthofmir = len(off_part) + post_pos
-                   if pre_pos == 0:
-                      tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                      tmp_count_seq = tmp_count_seq + 1
-
-                      if pre_pos == 0:
-
-                          t_name=unique_seq[j].copy()
-                          t_name[2]=mature_mirnas[i - 1].split(" ")[0][1:] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
-                          uni_seq.append(t_name)
-
-
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-            sorted_uni_arms.append([mature_mirnas[i-1].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-
-
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
-
-    LHE=[]
-
-    l.acquire()
-    if con=="c":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-
-
-    if con=="t":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-    l.release()
-
-#####################################################################################################################################################################################################################
-def deseq2_temp(samples_mirna_names,deseq,con,l):
-
-    samples_mirna_names.sort(key=lambda x:[0])
-    for i in range(len(deseq)):
-        for y in samples_mirna_names:
-            flag = 0
-            for x in deseq[i]:
-                if y[0] == x[0]:
-                    flag = 1
-                    break
-
-            if flag == 0:
-                deseq[i].append([y[0], "0", y[1]])
-
-    [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
-    deseq_final = [[x[0],x[2]] for x in deseq[0]]
-    [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]
-
-    l.acquire()
-    if con=="c":
-       q1.put(deseq_final)
-
-    if con=="t":
-       q2.put(deseq_final)
-    l.release()
-
-
-####################################################################################################################################################################################################################
-
-def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E,per,count):
-
- LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
- LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]
-
- LH8E_add_names.sort()
- LH2E_add_names.sort()
- LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
- LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))
-
- LH2E.sort()
- LH8E.sort()
- LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
- LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- print("LHE_names")
- print([len(LH8E_add_names),len(LH2E_add_names)])
- print([len(LH8E),len(LH2E)])
-
- zeros=["0"]*(len(LH8E[0])-2)
- [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
- LH8E=LH8E+LH8E_add_names
-
- zeros=["0"]*(len(LH2E[0])-2)
- [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
- LH2E=LH2E+LH2E_add_names
-
- dupes=[]
- final_LH2E =[]
-
- for num,_ in enumerate(LH2E):
-
-    if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
-        final_LH2E.append(LH2E[num][1])
-        final_LH2E.append(LH2E[num][0])
-    else:
-        dupes.append(LH2E[num][1])
-
-
- dupes=list(set(dupes))
-
- dupes=[[x] for x in dupes]
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-             fl=0
-             if len(y)==1:
-                 y.append(x[0])
-             else:
-                 for i in range(1,len(y)):
-                     if y[i].split("_")[0]==x[0].split("_")[0]:
-                        fl=1
-                        if len(x[0])<len(y[i]):
-                           del y[i]
-                           y.append(x[0])
-                           break
-
-                 if fl==0:
-                    y.append((x[0]))
-
- for y in dupes:
-    if len(y)>2:
-        for i in range(len(y)-1,1,-1):
-            y[1]=y[1]+"/"+y[i]
-            del y[i]
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-            x[0]=y[1]
-
- for x in LH8E:
-    for y in dupes:
-        if x[1]==y[0]:
-           x[0]=y[1]
-
-
- LH2E.sort()
- LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))
-
- LH8E.sort()
- LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- if int(per)!=-1:
-    percent=int(per)/100
-    print(percent)
-    print(count)
-
-    c_col_filter=round(percent*(len(LH2E[1])-2))
-    t_col_filter=round(percent*(len(LH8E[1])-2))
-
-    for i, _ in enumerate(LH2E):
-        c_cols=0
-        t_cols=0
-
-        c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(count)])
-        t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(count)])
-
-        if c_cols>=c_col_filter or t_cols>=t_col_filter:
-           filter_LH8E.append(LH8E[i])
-           filter_LH2E.append(LH2E[i])
-
- raw_LH2E.extend(LH2E)
- raw_LH8E.extend(LH8E)
-
-##################################################################################################################################################################################################################
-
-def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag, per, n1, n2):
-
- if flag == 1 and int(per)!=-1:
-    fp = open('Counts/Filtered '+n2 +' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2 and int(per)!=-1:
-    fp = open('Counts/Filtered '+n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 1:
-    fp = open('Counts/Raw '+n2+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2:
-    fp = open('Counts/Raw '+n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
-#########################################################################################################################################
-
-def ssamples(names,samp,folder,pro):
-
-    for i in range(2,len(samp[0])):
-
-       fp = open(folder+names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+names[i-2]+"\n")
-
-       for x in samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-       fp.close()
-
-##################################################################################################################
-
-def DB_write(con,name,unique_seq,sorted_uni_arms,f):
-
- if f==1:
-    # Write a txt file with all the information
-    if con=="c":
-       fp = open('split1/'+name, 'w')
-
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split2/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-
-            if sorted_uni_arms[i][0] in unique_seq[j][2].split("_")[0]:
-
-                temp.append(unique_seq[j])
-
-        temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-        fp.write("*********************************************************************************************************\n")
-        fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-        fp.write("*********************************************************************************************************\n\n")
-        [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-        fp.write("\n" + "\n")
-    fp.close()
-
- if f==2:
-
-    if con=="c":
-       fp = open('split3/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split4/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-               if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
-                  temp.append(unique_seq[j])
-        if temp!=[]:
-           temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-           fp.write("*********************************************************************************************************\n")
-           fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-           fp.write("*********************************************************************************************************\n\n")
-           [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-           fp.write("\n" + "\n")
-    fp.close()
-
-
-##########################################################################################################################
-
-def new_mat_seq(pre_unique_seq,mat_mirnas,l):
-
-    unique_iso = []
-    for x in pre_unique_seq:
-       if len(x[2].split("_"))==3:
-          for y in pre_unique_seq:
-              if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
-                 if any(y[2] in lst2 for lst2 in unique_iso)==False:
-                    y[2]=">"+y[2]
-                    unique_iso.append(y)
-    l.acquire()
-    for x in unique_iso:
-        mat_mirnas.append(x[2])
-        mat_mirnas.append(x[9])
-    l.release()
-
-#########################################################################################################################
-
-def merging_names(LH2E_copy,new):
-
-    dupes=[]
-    final_LH2E =[]
-
-    for num in range(len(LH2E_copy)):
-
-        if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
-           final_LH2E.append(LH2E_copy[num][1])
-           final_LH2E.append(LH2E_copy[num][0])
-        else:
-           dupes.append(LH2E_copy[num][1])
-
-    dupes=list(set(dupes))
-
-    for i in range(len(dupes)):
-        dupes[i]=[dupes[i]]
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               fl=0
-               if len(y)==1:
-                  y.append(x[0])
-               else:
-                  for i in range(1,len(y)):
-                      if y[i].split("_")[0]==x[0].split("_")[0]:
-                         fl=1
-                         if len(x[0])<len(y[i]):
-                            del y[i]
-                            y.append(x[0])
-                            break
-
-                  if fl==0:
-                     y.append((x[0]))
-
-    for y in dupes:
-        if len(y)>2:
-           for i in range(len(y)-1,1,-1):
-               y[1]=y[1]+"/"+y[i]
-               del y[i]
-
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               x[0]=y[1]
-
-    LH2E_copy.sort()
-    LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
-
-    new.extend(LH2E_copy)
-
-
-######################################################################################################################################################
-
-def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):
-
-    for i in range(2,len(tem_samp[0])):
-
-       fp = open(folder+tem_names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
-
-       for x in tem_samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-
-       for j in range(len(non_names)):
-           if non_names[j]==tem_names[i-2]:
-              for x in non_samp:
-                  fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
-       fp.close()
-
-###################################################################################################################################################################################################################
-
-def download_matures(matures,org_name):
-
-    #url = 'ftp://mirbase.org/pub/mirbase/21/mature.fa.gz'
-    url = 'ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz'
-    data = urllib.request.urlopen(url).read()
-    file_mirna = gzip.decompress(data).decode('utf-8')
-    file_mirna = file_mirna.split("\n")
-
-    for i in range(0,len(file_mirna)-1,2):
-
-        if org_name in file_mirna[i]:
-           matures.append(file_mirna[i])
-           matures.append(file_mirna[i+1])
-
-###################################################################################################################################################################################################################
-def non_template_ref(sc,st,all_isoforms):
-
-  pre_uni_seq_con = list(sc)
-  pre_uni_seq_tre = list(st)
-
-  for x in pre_uni_seq_con:
-      for y in x:
-          if ">"+y[2] not in all_isoforms and ")_" in y[2] :
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-
-  for x in pre_uni_seq_tre:
-      for y in x:
-          if ">"+y[2] not in all_isoforms and ")_" in y[2]:
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-################################################################################################################################################################################################
-
-def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):
-
-    for y in mir_names:
-        flag=0
-        for x in sample:
-            if y[0]==x[0]:
-               flag=1
-               break
-        if flag==0:
-           sample.append([y[0],"0",y[1]])
-
-    sample.sort(key=lambda x: x[0])
-    sample=list(sample for sample,_ in itertools.groupby(sample))
-
-    l.acquire()
-    new_d.append(sample)
-    sample_order.append(sample_name)
-    l.release()
-
-###############################################################################################################################################################################################
-
--- a/mirbase_graphs.py	Fri Oct 16 12:17:22 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,809 +0,0 @@
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-
-#################################################################################################################################################################
-def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-    c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
-    t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    c_non = len(c_non_samples)
-    c_non_counts = sum(x[2] for x in c_non_samples)
-    t_non = len(t_non_samples)
-    t_non_counts = sum(x[2] for x in t_non_samples)
-
-    c_unmap = c_unmap - c_non
-    t_unmap = c_unmap - t_non
-
-    c_unmap_counts=c_unmap_counts - c_non_counts
-    t_unmap_counts=t_unmap_counts - t_non_counts
-
-
-    for x in c_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-
-    fig = plt.figure(figsize=(7,5))
-    labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control Group (reads)',fontsize=12)
-    labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated Group (reads)', fontsize=12)
-    plt.savefig('pie_non.png',dpi=300)
-
-######################################################################################################################################################
-
-
-def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    for x in c_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-
-
-    fig = plt.figure()
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control group (reads)', fontsize=12)
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated group (reads)',fontsize = 12)
-    plt.savefig('pie_tem.png',dpi=300)
-
-###################################################################################################################################################################################################################
-
-
-def make_spider(merge_LH2E,merge_LH8E):
-
-  c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-  t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-  c_5 = 0
-  c_5_counts = 0
-  c_3 = 0
-  c_3_counts = 0
-  c_both =0
-  c_both_counts=0
-  c_mature = 0
-  c_mat_counts = 0
-  c_exception=0
-  c_exception_counts=0
-
-
-  t_5 = 0
-  t_5_counts = 0
-  t_3 = 0
-  t_3_counts = 0
-  t_both = 0
-  t_both_counts = 0
-  t_mature = 0
-  t_mat_counts = 0
-  t_exception = 0
-  t_exception_counts=0
-
-  for x in c_samples:
-
-      if "/" not in x[0]:
-          if "chr" in  x[0].split("_")[-1]:
-             c_mature+=1
-             c_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[-1]):
-             c_5+=1
-             c_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[-2]):
-             c_3+=1
-             c_3_counts += x[2]
-          else:
-             c_both+=1
-             c_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if "chr" in y.split("_")[-1]:
-                c_mature+=1
-                c_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            for y in x[0].split("/"):
-                c_exception+=1
-                c_exception_counts += x[2]
-
-
-  for x in t_samples:
-
-      if "/" not in x[0]:
-          if "chr" in  x[0].split("_")[-1]:
-             t_mature+=1
-             t_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[-1]):
-             t_5+=1
-             t_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[-2]):
-             t_3+=1
-             t_3_counts += x[2]
-          else:
-             t_both+=1
-             t_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if "chr" in y.split("_")[-1]:
-                t_mature+=1
-                t_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            for y in x[0].split("/"):
-                t_exception+=1
-                t_exception_counts += x[2]
-
-
-  c_all = c_5+c_3+c_both+c_mature+c_exception
-  c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
-
-  t_all = t_5+t_3+t_both+t_mature + t_exception
-  t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
-
-  c_5 = round(c_5/c_all*100,2)
-  c_3 = round(c_3/c_all*100,2)
-  c_both = round(c_both/c_all*100,2)
-  c_mature = round(c_mature/c_all*100,2)
-  c_exception = round(c_exception/c_all*100,2)
-
-  c_5_counts = round(c_5_counts/c_all_counts*100,2)
-  c_3_counts = round(c_3_counts/c_all_counts*100,2)
-  c_both_counts = round(c_both_counts/c_all_counts*100,2)
-  c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
-  c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
-
-  t_5 = round(t_5/t_all*100,2)
-  t_3 = round(t_3/t_all*100,2)
-  t_both = round(t_both/t_all*100,2)
-  t_mature = round(t_mature/t_all*100,2)
-  t_exception = round(t_exception/t_all*100,2)
-
-  t_5_counts = round(t_5_counts/t_all_counts*100,2)
-  t_3_counts = round(t_3_counts/t_all_counts*100,2)
-  t_both_counts = round(t_both_counts/t_all_counts*100,2)
-  t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
-  t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
-
-  radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
-  radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
-
-  df=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both,t_both],
-  """3' isomiRs""":[c_3,t_3],
-  'miRNA RefSeq':[c_mature,t_mature],
-  """5' isomiRs""":[c_5,t_5],
-  'Others*':[c_exception,t_exception]})
-
-  df1=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
-  """3' isomiRs""":[c_3_counts,t_3_counts],
-  'miRNA RefSeq':[c_mat_counts,t_mat_counts],
-  """5' isomiRs""":[c_5_counts,t_5_counts],
-  'Others*':[c_exception_counts,t_exception_counts]})
-
-  spider_last(df,radar_max,1)
-  spider_last(df1,radar_max_counts,2)
-
-#####################################################################################################################################################
-
-def spider_last(df,radar_max,flag):
-  # ------- PART 1: Create background
-  fig = plt.figure()
-  # number of variable
-  categories=list(df)[1:]
-  N = len(categories)
-
-  # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
-  angles = [n / float(N) * 2 * pi for n in range(N)]
-  angles += angles[:1]
-
-  # Initialise the spider plot
-  ax = plt.subplot(111, polar=True)
-
-  # If you want the first axis to be on top:
-  ax.set_theta_offset(pi/2)
-  ax.set_theta_direction(-1)
-
-  # Draw one axe per variable + add labels labels yet
-  plt.xticks(angles[:-1], categories, fontsize=11)
-
-  # Draw ylabels
-  radar_max=round(radar_max+radar_max*0.1)
-  mul=len(str(radar_max))-1
-  maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
-  sep = round(maxi/4)
-  plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
-  plt.ylim(0, maxi)
-
-  # ------- PART 2: Add plots
-
-  # Plot each individual = each line of the data
-  # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
-
-  # Ind1
-  values=df.loc[0].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
-  ax.fill(angles, values, 'b', alpha=0.1)
-
-  # Ind2
-  values=df.loc[1].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
-  ax.fill(angles, values, 'r', alpha=0.1)
-
-  # Add legend
-  if flag==1:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_non_red.png',dpi=300)
-  else:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_red.png',dpi=300)
-
-
-#############################################################################################################################################################################################################
-
-def hist_red(samples,flag):
-    lengths=[]
-    cat=[]
-    total_reads=0
-    seq=[]
-
-    if flag == "c":
-        title = "Length Distribution of Control group (Redudant reads)"
-    if flag == "t":
-        title = "Length Distribution of Treated group (Redudant reads)"
-
-    for i in samples:
-        for x in i:
-            lengths.append(len(x[9]))
-            if x[1]=="0":
-               seq.append([x[9],x[0].split("-")[1],"Mapped"])
-               cat.append("Mapped")
-            if x[1] == "4":
-               seq.append([x[9],x[0].split("-")[1],"Unmapped"])
-               cat.append("Unmapped")
-
-    uni_len=list(set(lengths))
-    uni_len=[x for x in uni_len if x<=35]
-    low=min(lengths)
-    up=max(lengths)
-    seq.sort()
-    uni_seq=list(seq for seq,_ in itertools.groupby(seq))
-    dim=up-low
-
-    if dim>20:
-       s=5
-    else:
-       s=8
-
-    total_reads+=sum([int(x[1]) for x in uni_seq])
-
-    map_reads=[]
-    unmap_reads=[]
-    length=[]
-    for y in uni_len:
-        map_temp=0
-        unmap_temp=0
-        for x in uni_seq:
-            if len(x[0])==y and x[2]=="Mapped":
-               map_temp+=int(x[1])
-            if len(x[0])==y and x[2]=="Unmapped":
-               unmap_temp+=int(x[1])
-        if y<=35:
-           length.append(y)
-           map_reads.append(round(map_temp/total_reads*100,2))
-           unmap_reads.append(round(unmap_temp/total_reads*100,2))
-
-    ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
-    ylim=ylim+ylim*20/100
-    fig, ax = plt.subplots()
-    width=0.8
-    ax.bar(length, unmap_reads, width, label='Unmapped')
-    h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
-    plt.xticks(np.arange(length[0], length[-1]+1, 1))
-    plt.xlabel('Length (nt)',fontsize=14)
-    plt.ylabel('Percentage',fontsize=14)
-    plt.title(title,fontsize=14)
-    ax.legend()
-    plt.ylim([0, ylim])
-    ax.grid(axis='y',linewidth=0.2)
-
-    if flag=='c':
-       plt.savefig('c_hist_red.png',dpi=300)
-
-    if flag=='t':
-       plt.savefig('t_hist_red.png',dpi=300)
-
-#################################################################################################################
-
-def logo_seq_red(merge, flag):
-
-    if flag=="c":
-       titlos="Control group (Redundant)"
-       file_logo="c_logo.png"
-       file_bar="c_bar.png"
-    if flag=="t":
-       titlos="Treated group (Redundant)"
-       file_logo="t_logo.png"
-       file_bar="t_bar.png"
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-    total_reads=0
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            total_reads+=best[2]
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=best[2]
-                    elif best[1][i] == "C":
-                        C[i]+=best[2]
-                    elif best[1][i] == "G":
-                        G[i]+=best[2]
-                    else:
-                        T[i]+=best[2]
-        else:
-            total_reads+=y[2]
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=(y[2])
-                    else:
-                        T[i]+=y[2]
-
-    A[:] = [round(x*100,1) / total_reads for x in A]
-    C[:] = [round(x*100,1) / total_reads for x in C]
-    G[:] = [round(x*100,1) / total_reads for x in G]
-    T[:] = [round(x*100,1) / total_reads for x in T]
-
-
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-    h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
-    h.grid(axis='y',linewidth=0.2)
-    plt.xticks(rotation=0, ha="right")
-    plt.ylabel("Counts (%)",fontsize=18)
-    plt.xlabel("Positions (nt)",fontsize=18)
-    plt.title(titlos,fontsize=20)
-    plt.tight_layout()
-    plt.savefig(file_bar, dpi=300)
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-
-    # style using Axes methods
-    crp_logo.ax.set_title(titlos,fontsize=18)
-    crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    figure = plt.gcf()
-    figure.set_size_inches(6, 4)
-    crp_logo.fig.savefig(file_logo,dpi=300)
-
-##########################################################################################################################################################################################################
-
-def logo_seq_non_red(merge_LH2E):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=1
-                    elif best[1][i] == "C":
-                        C[i]+=1
-                    elif best[1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-        else:
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=1
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=1
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-    h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
-    h.set_xlabel("Positions (nt)")
-    h.set_ylabel("Unique sequences")
-    plt.xticks(rotation=0, ha="right")
-    plt.tight_layout()
-    plt.savefig("bar2.png", dpi=300)
-
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-
-    # style using Logo methods
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-
-    # style using Axes methods
-    crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    crp_logo.ax.set_title("Non-redundant")
-    figure = plt.gcf()
-    crp_logo.fig.savefig('logo2.png', dpi=300)
-
-
-################################################################################################################################################################
-
-def pdf_before_DE(analysis):
-
-  # Import FPDF class
-  from fpdf import FPDF, fpdf
-
-  # Import glob module to find all the files matching a pattern
-  import glob
-
-  # Image extensions
-  if analysis=="2":
-     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
-  else:
-     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
-  # This list will hold the images file names
-  images = []
-
-  # Build the image list by merging the glob results (a list of files)
-  # for each extension. We are taking images from current folder.
-  for extension in image_extensions:
-      images.extend(glob.glob(extension))
-  #sys.exit(images)
-  # Create instance of FPDF class
-  pdf = FPDF('P', 'in', 'A4')
-  # Add new page. Without this you cannot create the document.
-  pdf.add_page()
-  # Set font to Arial, 'B'old, 16 pts
-  pdf.set_font('Arial', 'B', 20.0)
-
-  # Page header
-  pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C')
-  pdf.ln(0.7)
-  pdf.set_font('Arial','', 16.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C')
-
-  # Smaller font for image captions
-  pdf.set_font('Arial', '', 11.0)
-
-  # Image caption
-  pdf.ln(0.5)
-
-  yh=FPDF.get_y(pdf)
-  pdf.image(images[0],x=0.3,w=4, h=3)
-  pdf.image(images[1],x=4,y=yh, w=4, h=3)
-  pdf.ln(0.3)
-
-  # Image caption
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)")
-  pdf.ln(0.2)
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  and Treated (right) groups")
-
-
-  pdf.ln(0.5)
-  h1=FPDF.get_y(pdf)
-  pdf.image(images[2],x=1, w=6.5, h=5)
-  h2=FPDF.get_y(pdf)
-  FPDF.set_y(pdf,h1+0.2)
-  pdf.set_font('Arial','', 14.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C')
-  pdf.set_font('Arial', '', 11.0)
-  FPDF.set_y(pdf,h2)
-  FPDF.set_y(pdf,9.5)
-  # Image caption
-  pdf.cell(0.2)
-  if analysis=="2":
-     pdf.cell(3.0, 0.0, "  Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads")
-     pdf.ln(0.2)
-     pdf.cell(0.2)
-     pdf.cell(3.0, 0.0, "  in Control (left) and treated (right) groups")
-  else:
-     pdf.cell(3.0, 0.0, "  Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in")
-     pdf.ln(0.2)
-     pdf.cell(0.2)
-     pdf.cell(3.0, 0.0, "  Control (left) and treated (right) groups")
-
-
-
-  pdf.add_page()
-  pdf.set_font('Arial', 'B', 16.0)
-  pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C')
-  pdf.ln(0.7)
-  pdf.set_font('Arial', 'B', 12.0)
-  pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C')
-  pdf.ln(0.5)
-  pdf.image(images[3],x=1.5, w=5.5, h=4)
-  pdf.ln(0.6)
-  pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C')
-  pdf.set_font('Arial', '', 12.0)
-  pdf.ln(0.2)
-  pdf.image(images[4],x=1.5, w=5.5, h=4)
-  pdf.ln(0.3)
-  pdf.set_font('Arial', '', 11.0)
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  * IsomiRs potentialy initiated from multiple loci")
-
-
-  if analysis=="2":
-     pdf.add_page('L')
-
-     pdf.set_font('Arial', 'B', 16.0)
-     pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C')
-     pdf.ln(0.5)
-     pdf.set_font('Arial', 'B', 12.0)
-     pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C')
-     pdf.ln(0.7)
-
-     yh=FPDF.get_y(pdf)
-     pdf.image(images[5],x=1.5,w=3.65, h=2.65)
-     pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
-     pdf.ln(0.5)
-     yh=FPDF.get_y(pdf)
-     pdf.image(images[6],x=1.5,w=3.65, h=2.65)
-     pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
-
-  pdf.close()
-  pdf.output('report1.pdf','F')
-
-
-
-
-#############################################################################################################################################################3
-
-def pdf_after_DE(analysis):
-
-  # Import FPDF class
-  from fpdf import FPDF
-
-  # Import glob module to find all the files matching a pattern
-  import glob
-
-  # Image extensions
-  image_extensions = ("tem.png","non.png","a2.png")
-
-  # This list will hold the images file names
-  images = []
-
-  # Build the image list by merging the glob results (a list of files)
-  # for each extension. We are taking images from current folder.
-  for extension in image_extensions:
-      images.extend(glob.glob(extension))
-
-  # Create instance of FPDF class
-  pdf = FPDF('P', 'in', 'letter')
-  # Add new page. Without this you cannot create the document.
-  pdf.add_page()
-  # Set font to Arial, 'B'old, 16 pts
-  pdf.set_font('Arial', 'B', 16.0)
-
-  # Page header
-  pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C')
-  #pdf.ln(0.25)
-
-  pdf.ln(0.7)
-  pdf.set_font('Arial','B', 12.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C')
-
-
-  # Smaller font for image captions
-  pdf.set_font('Arial', '', 10.0)
-
-  # Image caption
-  pdf.ln(0.4)
-  pdf.image(images[0],x=0.8, w=7, h=8)
-  pdf.ln(0.3)
-
-  if analysis=="2":
-
-     pdf.add_page()
-     pdf.ln(0.7)
-     pdf.set_font('Arial','B', 12.0)
-     pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C')
-     pdf.ln(0.4)
-     pdf.image(images[1],x=0.5, w=7.5, h=6.5)
-
-  pdf.add_page()
-  pdf.ln(0.5)
-  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C')
-  pdf.ln(0.4)
-  pdf.image(images[2],x=0.8, w=7, h=8)
-  pdf.ln(0.3)
-
-
-  pdf.output('report2.pdf', 'F')
-
-
-
--- a/mirbase_ultra_v2.py	Fri Oct 16 12:17:22 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1723 +0,0 @@
-from mirbase_functions import *
-from mirbase_graphs import *
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
-parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
-parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
-parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
-parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
-parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
-parser.add_argument("-f", "--flag", help="choose the database", action="store")
-parser.add_argument("-umis", "--umi", help="choose the database", action="store")
-parser.add_argument("-percentage", "--per", help="choose the database", action="store")
-parser.add_argument("-counts", "--count", help="choose the database", action="store")
-parser.add_argument("-name1", "--n1", help="choose the database", action="store")
-parser.add_argument("-name2", "--n2", help="choose the database", action="store")
-args = parser.parse_args()
-
-#########################################################################################################################################
-
-def collapse_sam(path):
-
-    ini_sam=read(path,0)
-    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in  x.split("\t")[0]]
-
-    uni_seq = []
-    for x in main_sam:
-
-        if [x[2], x[9]] not in uni_seq:
-            uni_seq.append([x[2], x[9]])
-
-    new_main_sam=[]
-    incr_num=0
-    for i in range(len(uni_seq)):
-        count=0
-        incr_num+=1
-        for y in main_sam:
-            if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
-               count+=1
-               temp=y
-        temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-        temp[0]=str(incr_num)+"-"+str(count)
-        new_main_sam.append(temp)
-
-    new_sam=intro_sam+new_main_sam
-
-    return new_sam
-
-#################################################################################################################################################################################################################
-
-def duplicate_chroms_isoforms(List):
-
- dupes=[]
-
- for num in range(len(List)):
-
-    if  [List[num][9],List[num][0],List[num][2]] not in dupes :
-        dupes.append([List[num][9],List[num][0],List[num][2]])
-
- for x in List:
-     for y in dupes:
-         if x[9]==y[0] and x[0]==y[1] and x[2].split("_")[0]==y[2].split("_")[0] and x[2]!=y[2]:
-            y.append(x[2])
-
-
- double_List = [x[:] for x in List]
-
- chr_order=[]
- for x in dupes:
-     temp = []
-     for i in range(2,len(x)):
-         if x[i].split("chr")[1].split("(")[0].isdigit():
-            temp.append(int(x[i].split("chr")[1].split("(")[1][0]+x[i].split("chr")[1].split("(")[0]))
-         else:
-            temp.append(x[i].split("chr")[1][0:4])
-
-     for z in temp:
-         if 'X(-)'==z or 'Y(-)'==z or 'X(+)'==z or 'Y(+)'==z:
-             temp = [str(j) for j in temp]
-     temp=list(set(temp))
-     temp.sort()
-     chr_order.append(temp)
-
- final_dupes=[]
- for i in range(len(dupes)):
-     final_dupes.append([dupes[i][0],dupes[i][2].split("_")[0],dupes[i][1]])
-     for x in chr_order[i]:
-         result = re.match("[-+]?\d+$", str(x))
-         if len(chr_order[i]) == len(set(chr_order[i])):
-           if result is not None:
-
-             if int(x)<0:
-                final_dupes[i][1]=final_dupes[i][1]+"_chr"+str(abs(int(x)))+"(-)"
-             else:
-                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(abs(int(x)))+"(+)"
-           else:
-                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(x)
-         else:
-             if result is not None:
-                 if int(x) < 0:
-                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(-)"
-                 else:
-                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(+)"
-             else:
-                 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(x)
-
- final_dupes.sort()
- final_dupes=list(final_dupes for final_dupes,_ in itertools.groupby(final_dupes))
-
- for i in range(len(double_List)):
-     for x in final_dupes:
-
-         if double_List[i][9] == x[0] and double_List[i][0] == x[2] and len(double_List[i][2].split("_")) >3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
-            gg=str("_"+double_List[i][2].split("_")[-2]+"_"+double_List[i][2].split("_")[-1])
-            double_List[i][2] = x[1]+gg
-
-         if double_List[i][9]==x[0] and double_List[i][0]== x[2] and len(double_List[i][2].split("_"))==3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
-            double_List[i][2]=x[1]
-            List[i][2] = x[1]
-
- List.sort()
- new_list=list(List for List,_ in itertools.groupby(List))
-
- double_List.sort()
- new_double_List = list(double_List for double_List, _ in itertools.groupby(double_List))
-
- return new_list, new_double_List
-
-
-#############################################################################################################################################################################################################
-
-def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):
-
-    # read the sam file
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
-
-    sorted_uni_arms = []
-
-    for i in range(len(mature_mirnas)):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-        for j in range(len(unique_seq)):
-
-         if "{" in unique_seq[j][2].split("_")[0]:
-             official=unique_seq[j][2].split("_")[0][:-4]
-         else:
-             official=unique_seq[j][2].split("_")[0]
-
-         if mature_mirnas[i].split(" ")[0][1:] == official:
-
-                temp_mature = mature_mirnas[i+1].strip().replace("U", "T")
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                # Problem with hsa-miR-8485
-                if mat_diff[1]!=0 and unique_diff[1]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                elif mat_diff[0]!=0 and unique_diff[0]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                else:
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-                   tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                   tmp_count_seq = tmp_count_seq+1
-
-                if pre_pos != 0 or post_pos != 0:
-                    if pre_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
-                    elif post_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
-                    else:
-                        unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
-
-        for x in range(unique_seq.count(1)):
-           unique_seq.remove(1)
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-           sorted_uni_arms.append([mature_mirnas[i].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    dedup_unique_seq,double_fil_uni_seq=duplicate_chroms_isoforms(unique_seq)
-
-    for y in sorted_uni_arms:
-       counts=0
-       seqs=0
-       for x in double_fil_uni_seq:
-           if y[0]==x[2].split("_")[0]:
-              counts+=int(x[0].split("-")[1])
-              seqs+=1
-
-       y[1]=seqs
-       y[2]=counts
-
-    LHE=[]
-    l.acquire()
-    if con=="c":
-       LHE.extend(z[2] for z in double_fil_uni_seq)
-       for y in double_fil_uni_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(dedup_unique_seq)
-       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-
-    if con=="t":
-       LHE.extend(z[2] for z in double_fil_uni_seq)
-       for y in double_fil_uni_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(dedup_unique_seq)
-       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-    l.release()
-
-
-######################################################################################################################################
-"""
-
-Read a sam file from Bowtie and do the followings:
-
-1) Remove reverse stranded mapped reads
-2) Remove unmapped reads
-3) Remove all sequences with reads less than 11 reads
-4) Sort the arms with the most sequences in decreading rate
-5) Sort the sequences of every arm with the most reads in decreasing rate
-6) Calculate total number of sequences of every arm
-7) Calculate total number of reads of sequences of every arm.
-8) Store all the informations in a txt file
-
-"""
-
-def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):
-
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq=[]
-    unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
-
-    uni_seq=[]
-    # Calculate the shifted positions for every isomir and add them to the name of it
-    sorted_uni_arms = []
-    for i in range(1,len(mature_mirnas),2):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-
-        for j in range(len(unique_seq)):
-
-            temp_mature = mature_mirnas[i].strip().replace("U", "T")
-
-            if temp_mature in unique_seq[j][9]:
-
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                if len(unique_diff)<=2:
-                   unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-
-                   lengthofmir = len(off_part) + post_pos
-                   if pre_pos == 0:
-                      tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                      tmp_count_seq = tmp_count_seq + 1
-
-                      if pre_pos == 0:
-
-                          t_name=unique_seq[j].copy()
-                          t_name[2]=mature_mirnas[i - 1].split(" ")[0][1:] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
-                          uni_seq.append(t_name)
-
-
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-            sorted_uni_arms.append([mature_mirnas[i-1].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-
-
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
-
-    LHE=[]
-
-    l.acquire()
-    if con=="c":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-
-
-    if con=="t":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-    l.release()
-
-#####################################################################################################################################################################################################################
-def deseq2_temp(samples_mirna_names,deseq,con,l):
-
-    samples_mirna_names.sort(key=lambda x:[0])
-    for i in range(len(deseq)):
-        for y in samples_mirna_names:
-            flag = 0
-            for x in deseq[i]:
-                if y[0] == x[0]:
-                    flag = 1
-                    break
-
-            if flag == 0:
-                deseq[i].append([y[0], "0", y[1]])
-
-    [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
-    deseq_final = [[x[0],x[2]] for x in deseq[0]]
-    [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]
-
-    l.acquire()
-    if con=="c":
-       q1.put(deseq_final)
-
-    if con=="t":
-       q2.put(deseq_final)
-    l.release()
-
-
-####################################################################################################################################################################################################################
-
-def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E):
-
- LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
- LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]
-
- LH8E_add_names.sort()
- LH2E_add_names.sort()
- LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
- LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))
-
- LH2E.sort()
- LH8E.sort()
- LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
- LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- print("LHE_names")
- print([len(LH8E_add_names),len(LH2E_add_names)])
- print([len(LH8E),len(LH2E)])
-
- zeros=["0"]*(len(LH8E[0])-2)
- [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
- LH8E=LH8E+LH8E_add_names
-
- zeros=["0"]*(len(LH2E[0])-2)
- [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
- LH2E=LH2E+LH2E_add_names
-
- dupes=[]
- final_LH2E =[]
-
- for num,_ in enumerate(LH2E):
-
-    if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
-        final_LH2E.append(LH2E[num][1])
-        final_LH2E.append(LH2E[num][0])
-    else:
-        dupes.append(LH2E[num][1])
-
-
- dupes=list(set(dupes))
-
- dupes=[[x] for x in dupes]
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-             fl=0
-             if len(y)==1:
-                 y.append(x[0])
-             else:
-                 for i in range(1,len(y)):
-                     if y[i].split("_")[0]==x[0].split("_")[0]:
-                        fl=1
-                        if len(x[0])<len(y[i]):
-                           del y[i]
-                           y.append(x[0])
-                           break
-
-                 if fl==0:
-                    y.append((x[0]))
-
- for y in dupes:
-    if len(y)>2:
-        for i in range(len(y)-1,1,-1):
-            y[1]=y[1]+"/"+y[i]
-            del y[i]
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-            x[0]=y[1]
-
- for x in LH8E:
-    for y in dupes:
-        if x[1]==y[0]:
-           x[0]=y[1]
-
-
- LH2E.sort()
- LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))
-
- LH8E.sort()
- LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- LH8E_new=[]
- LH2E_new=[]
-
- if int(args.per)!=-1:
-    percent=int(args.per)/100
-    print(percent)
-    print(args.count)
-
-    c_col_filter=round(percent*(len(LH2E[1])-2))
-    t_col_filter=round(percent*(len(LH8E[1])-2))
-
-    for i, _ in enumerate(LH2E):
-        c_cols=0
-        t_cols=0
-
-        c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(args.count)])
-        t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(args.count)])
-
-        if c_cols>=c_col_filter or t_cols>=t_col_filter:
-           LH8E_new.append(LH8E[i])
-           LH2E_new.append(LH2E[i])
-
- filter_LH2E.extend(LH2E_new)
- filter_LH8E.extend(LH8E_new)
- raw_LH2E.extend(LH2E)
- raw_LH8E.extend(LH8E)
-
-##################################################################################################################################################################################################################
-
-def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag):
-
- if flag == 1 and int(args.per)!=-1:
-    fp = open('Counts/Filtered '+args.n2 +' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+args.n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2 and int(args.per)!=-1:
-    fp = open('Counts/Filtered '+args.n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+args.n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 1:
-    fp = open('Counts/Raw '+args.n2+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+args.n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2:
-    fp = open('Counts/Raw '+args.n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+args.n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
-#########################################################################################################################################
-
-def ssamples(names,samp,folder,pro):
-
-    for i in range(2,len(samp[0])):
-
-       fp = open(folder+names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+names[i-2]+"\n")
-
-       for x in samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-       fp.close()
-
-##################################################################################################################
-
-def DB_write(con,name,unique_seq,sorted_uni_arms,f):
-
- if f==1:
-    # Write a txt file with all the information
-    if con=="c":
-       fp = open('split1/'+name, 'w')
-
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split2/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-
-            if sorted_uni_arms[i][0] in unique_seq[j][2].split("_")[0]:
-
-                temp.append(unique_seq[j])
-
-        temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-        fp.write("*********************************************************************************************************\n")
-        fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-        fp.write("*********************************************************************************************************\n\n")
-        [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-        fp.write("\n" + "\n")
-    fp.close()
-
- if f==2:
-
-    if con=="c":
-       fp = open('split3/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split4/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-               if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
-                  temp.append(unique_seq[j])
-        if temp!=[]:
-           temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-           fp.write("*********************************************************************************************************\n")
-           fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-           fp.write("*********************************************************************************************************\n\n")
-           [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-           fp.write("\n" + "\n")
-    fp.close()
-
-
-##########################################################################################################################
-
-def new_mat_seq(pre_unique_seq,mat_mirnas,l):
-
-    unique_iso = []
-    for x in pre_unique_seq:
-       if len(x[2].split("_"))==3:
-          for y in pre_unique_seq:
-              if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
-                 if any(y[2] in lst2 for lst2 in unique_iso)==False:
-                    y[2]=">"+y[2]
-                    unique_iso.append(y)
-    l.acquire()
-    for x in unique_iso:
-        mat_mirnas.append(x[2])
-        mat_mirnas.append(x[9])
-    l.release()
-
-#########################################################################################################################
-def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-    c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
-    t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    c_non = len(c_non_samples)
-    c_non_counts = sum(x[2] for x in c_non_samples)
-    t_non = len(t_non_samples)
-    t_non_counts = sum(x[2] for x in t_non_samples)
-
-    c_unmap = c_unmap - c_non
-    t_unmap = c_unmap - t_non
-
-    c_unmap_counts=c_unmap_counts - c_non_counts
-    t_unmap_counts=t_unmap_counts - t_non_counts
-
-
-    for x in c_samples:
-
-       	if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-
-    fig = plt.figure(figsize=(7,5))
-    labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control Group (reads)',fontsize=12)
-    labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated Group (reads)', fontsize=12)
-    plt.savefig('pie_non.png',dpi=300)
-
-######################################################################################################################################################
-
-def merging_names(LH2E_copy,new):
-
-    dupes=[]
-    final_LH2E =[]
-
-    for num in range(len(LH2E_copy)):
-
-        if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
-           final_LH2E.append(LH2E_copy[num][1])
-           final_LH2E.append(LH2E_copy[num][0])
-        else:
-           dupes.append(LH2E_copy[num][1])
-
-    dupes=list(set(dupes))
-
-    for i in range(len(dupes)):
-        dupes[i]=[dupes[i]]
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               fl=0
-               if len(y)==1:
-                  y.append(x[0])
-               else:
-                  for i in range(1,len(y)):
-                      if y[i].split("_")[0]==x[0].split("_")[0]:
-                         fl=1
-                         if len(x[0])<len(y[i]):
-                            del y[i]
-                            y.append(x[0])
-                            break
-
-                  if fl==0:
-                     y.append((x[0]))
-
-    for y in dupes:
-        if len(y)>2:
-           for i in range(len(y)-1,1,-1):
-               y[1]=y[1]+"/"+y[i]
-               del y[i]
-
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               x[0]=y[1]
-
-    LH2E_copy.sort()
-    LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
-
-    new.extend(LH2E_copy)
-
-
-######################################################################################################################################################
-def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    for x in c_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-
-
-    fig = plt.figure()
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control group (reads)', fontsize=12)
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated group (reads)',fontsize = 12)
-    plt.savefig('pie_tem.png',dpi=300)
-
-###################################################################################################################################################################################################################
-
-def make_spider(merge_LH2E,merge_LH8E):
-
-  c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-  t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-  c_5 = 0
-  c_5_counts = 0
-  c_3 = 0
-  c_3_counts = 0
-  c_both =0
-  c_both_counts=0
-  c_mature = 0
-  c_mat_counts = 0
-  c_exception=0
-  c_exception_counts=0
-
-
-  t_5 = 0
-  t_5_counts = 0
-  t_3 = 0
-  t_3_counts = 0
-  t_both = 0
-  t_both_counts = 0
-  t_mature = 0
-  t_mat_counts = 0
-  t_exception = 0
-  t_exception_counts=0
-
-  for x in c_samples:
-
-      if "/" not in x[0]:
-          if "chr" in  x[0].split("_")[-1]:
-             c_mature+=1
-             c_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[-1]):
-             c_5+=1
-             c_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[-2]):
-             c_3+=1
-             c_3_counts += x[2]
-          else:
-             c_both+=1
-             c_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if "chr" in y.split("_")[-1]:
-                c_mature+=1
-                c_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            for y in x[0].split("/"):
-                c_exception+=1
-                c_exception_counts += x[2]
-
-
-  for x in t_samples:
-
-      if "/" not in x[0]:
-          if "chr" in  x[0].split("_")[-1]:
-             t_mature+=1
-             t_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[-1]):
-             t_5+=1
-             t_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[-2]):
-             t_3+=1
-             t_3_counts += x[2]
-          else:
-             t_both+=1
-             t_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if "chr" in y.split("_")[-1]:
-                t_mature+=1
-                t_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            for y in x[0].split("/"):
-                t_exception+=1
-                t_exception_counts += x[2]
-
-
-  c_all = c_5+c_3+c_both+c_mature+c_exception
-  c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
-
-  t_all = t_5+t_3+t_both+t_mature + t_exception
-  t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
-
-  c_5 = round(c_5/c_all*100,2)
-  c_3 = round(c_3/c_all*100,2)
-  c_both = round(c_both/c_all*100,2)
-  c_mature = round(c_mature/c_all*100,2)
-  c_exception = round(c_exception/c_all*100,2)
-
-  c_5_counts = round(c_5_counts/c_all_counts*100,2)
-  c_3_counts = round(c_3_counts/c_all_counts*100,2)
-  c_both_counts = round(c_both_counts/c_all_counts*100,2)
-  c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
-  c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
-
-  t_5 = round(t_5/t_all*100,2)
-  t_3 = round(t_3/t_all*100,2)
-  t_both = round(t_both/t_all*100,2)
-  t_mature = round(t_mature/t_all*100,2)
-  t_exception = round(t_exception/t_all*100,2)
-
-  t_5_counts = round(t_5_counts/t_all_counts*100,2)
-  t_3_counts = round(t_3_counts/t_all_counts*100,2)
-  t_both_counts = round(t_both_counts/t_all_counts*100,2)
-  t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
-  t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
-
-  radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
-  radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
-
-  df=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both,t_both],
-  """3' isomiRs""":[c_3,t_3],
-  'miRNA RefSeq':[c_mature,t_mature],
-  """5' isomiRs""":[c_5,t_5],
-  'Others*':[c_exception,t_exception]})
-
-  df1=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
-  """3' isomiRs""":[c_3_counts,t_3_counts],
-  'miRNA RefSeq':[c_mat_counts,t_mat_counts],
-  """5' isomiRs""":[c_5_counts,t_5_counts],
-  'Others*':[c_exception_counts,t_exception_counts]})
-
-  spider_last(df,radar_max,1)
-  spider_last(df1,radar_max_counts,2)
-
-
-def spider_last(df,radar_max,flag):
-  # ------- PART 1: Create background
-  fig = plt.figure()
-  # number of variable
-  categories=list(df)[1:]
-  N = len(categories)
-
-  # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
-  angles = [n / float(N) * 2 * pi for n in range(N)]
-  angles += angles[:1]
-
-  # Initialise the spider plot
-  ax = plt.subplot(111, polar=True)
-
-  # If you want the first axis to be on top:
-  ax.set_theta_offset(pi/2)
-  ax.set_theta_direction(-1)
-
-  # Draw one axe per variable + add labels labels yet
-  plt.xticks(angles[:-1], categories, fontsize=11)
-
-  # Draw ylabels
-  radar_max=round(radar_max+radar_max*0.1)
-  mul=len(str(radar_max))-1
-  maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
-  sep = round(maxi/4)
-  plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
-  plt.ylim(0, maxi)
-
-  # ------- PART 2: Add plots
-
-  # Plot each individual = each line of the data
-  # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
-
-  # Ind1
-  values=df.loc[0].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
-  ax.fill(angles, values, 'b', alpha=0.1)
-
-  # Ind2
-  values=df.loc[1].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
-  ax.fill(angles, values, 'r', alpha=0.1)
-
-  # Add legend
-  if flag==1:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_non_red.png',dpi=300)
-  else:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_red.png',dpi=300)
-
-
-#############################################################################################################################################################################################################
-def hist_red(samples,flag):
-    lengths=[]
-    cat=[]
-    total_reads=0
-    seq=[]
-
-    if flag == "c":
-        title = "Length Distribution of Control group (Redudant reads)"
-    if flag == "t":
-        title = "Length Distribution of Treated group (Redudant reads)"
-
-    for i in samples:
-        for x in i:
-            lengths.append(len(x[9]))
-            if x[1]=="0":
-               seq.append([x[9],x[0].split("-")[1],"Mapped"])
-               cat.append("Mapped")
-            if x[1] == "4":
-               seq.append([x[9],x[0].split("-")[1],"Unmapped"])
-               cat.append("Unmapped")
-
-    uni_len=list(set(lengths))
-    uni_len=[x for x in uni_len if x<=35]
-    low=min(lengths)
-    up=max(lengths)
-    seq.sort()
-    uni_seq=list(seq for seq,_ in itertools.groupby(seq))
-    dim=up-low
-
-    if dim>20:
-       s=5
-    else:
-       s=8
-
-    total_reads+=sum([int(x[1]) for x in uni_seq])
-
-    map_reads=[]
-    unmap_reads=[]
-    length=[]
-    for y in uni_len:
-        map_temp=0
-        unmap_temp=0
-        for x in uni_seq:
-            if len(x[0])==y and x[2]=="Mapped":
-               map_temp+=int(x[1])
-            if len(x[0])==y and x[2]=="Unmapped":
-               unmap_temp+=int(x[1])
-        if y<=35:
-           length.append(y)
-           map_reads.append(round(map_temp/total_reads*100,2))
-           unmap_reads.append(round(unmap_temp/total_reads*100,2))
-
-    ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
-    ylim=ylim+ylim*20/100
-    fig, ax = plt.subplots()
-    width=0.8
-    ax.bar(length, unmap_reads, width, label='Unmapped')
-    h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
-    plt.xticks(np.arange(length[0], length[-1]+1, 1))
-    plt.xlabel('Length (nt)',fontsize=14)
-    plt.ylabel('Percentage',fontsize=14)
-    plt.title(title,fontsize=14)
-    ax.legend()
-    plt.ylim([0, ylim])
-    ax.grid(axis='y',linewidth=0.2)
-
-    if flag=='c':
-       plt.savefig('c_hist_red.png',dpi=300)
-
-    if flag=='t':
-       plt.savefig('t_hist_red.png',dpi=300)
-
-#################################################################################################################
-
-
-def logo_seq_red(merge, flag):
-
-    if flag=="c":
-       titlos="Control group (Redundant)"
-       file_logo="c_logo.png"
-       file_bar="c_bar.png"
-    if flag=="t":
-       titlos="Treated group (Redundant)"
-       file_logo="t_logo.png"
-       file_bar="t_bar.png"
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-    total_reads=0
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            total_reads+=best[2]
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=best[2]
-                    elif best[1][i] == "C":
-                        C[i]+=best[2]
-                    elif best[1][i] == "G":
-                        G[i]+=best[2]
-                    else:
-                        T[i]+=best[2]
-        else:
-            total_reads+=y[2]
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=(y[2])
-                    else:
-                        T[i]+=y[2]
-
-    A[:] = [round(x*100,1) / total_reads for x in A]
-    C[:] = [round(x*100,1) / total_reads for x in C]
-    G[:] = [round(x*100,1) / total_reads for x in G]
-    T[:] = [round(x*100,1) / total_reads for x in T]
-
-
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-    h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
-    h.grid(axis='y',linewidth=0.2)
-    plt.xticks(rotation=0, ha="right")
-    plt.ylabel("Counts (%)",fontsize=18)
-    plt.xlabel("Positions (nt)",fontsize=18)
-    plt.title(titlos,fontsize=20)
-    plt.tight_layout()
-    plt.savefig(file_bar, dpi=300)
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-
-    # style using Axes methods
-    crp_logo.ax.set_title(titlos,fontsize=18)
-    crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    figure = plt.gcf()
-    figure.set_size_inches(6, 4)
-    crp_logo.fig.savefig(file_logo,dpi=300)
-
-##########################################################################################################################################################################################################
-
-
-
-def logo_seq_non_red(merge_LH2E):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=1
-                    elif best[1][i] == "C":
-                        C[i]+=1
-                    elif best[1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-        else:
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=1
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=1
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-    h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
-    h.set_xlabel("Positions (nt)")
-    h.set_ylabel("Unique sequences")
-    plt.xticks(rotation=0, ha="right")
-    plt.tight_layout()
-    plt.savefig("bar2.png", dpi=300)
-
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-
-    # style using Logo methods
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-
-    # style using Axes methods
-    crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    crp_logo.ax.set_title("Non-redundant")
-    figure = plt.gcf()
-    crp_logo.fig.savefig('logo2.png', dpi=300)
-
-
-###################################################################################################################################################################################################################
-
-def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):
-
-    for i in range(2,len(tem_samp[0])):
-
-       fp = open(folder+tem_names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
-
-       for x in tem_samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-
-       for j in range(len(non_names)):
-           if non_names[j]==tem_names[i-2]:
-              for x in non_samp:
-                  fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
-       fp.close()
-
-###################################################################################################################################################################################################################
-
-def download_matures(matures,org_name):
-
-    #url = 'ftp://mirbase.org/pub/mirbase/21/mature.fa.gz'
-    url = 'ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz'
-    data = urllib.request.urlopen(url).read()
-    file_mirna = gzip.decompress(data).decode('utf-8')
-    file_mirna = file_mirna.split("\n")
-
-    for i in range(0,len(file_mirna)-1,2):
-
-        if org_name in file_mirna[i]:
-           matures.append(file_mirna[i])
-           matures.append(file_mirna[i+1])
-
-###################################################################################################################################################################################################################
-def non_template_ref(sc,st,all_isoforms):
-
-  pre_uni_seq_con = list(sc)
-  pre_uni_seq_tre = list(st)
-
-  for x in pre_uni_seq_con:
-      for y in x:
-          if ">"+y[2] not in all_isoforms and ")_" in y[2] :
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-
-  for x in pre_uni_seq_tre:
-      for y in x:
-          if ">"+y[2] not in all_isoforms and ")_" in y[2]:
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-################################################################################################################################################################################################
-
-def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):
-
-    for y in mir_names:
-        flag=0
-        for x in sample:
-            if y[0]==x[0]:
-               flag=1
-               break
-        if flag==0:
-           sample.append([y[0],"0",y[1]])
-
-    sample.sort(key=lambda x: x[0])
-    sample=list(sample for sample,_ in itertools.groupby(sample))
-
-    l.acquire()
-    new_d.append(sample)
-    sample_order.append(sample_name)
-    l.release()
-
-###############################################################################################################################################################################################
-
-if __name__ == '__main__':
-
- starttime = time.time()
-
- q1 = Queue()
- q2 = Queue()
- lock = Lock()
- manager = Manager()
-
- mature_mirnas=manager.list()
- ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
- ps_mature.start()
-
- args.control[0]=args.control[0][1:]
- args.control[len(args.control)-1][:-1]
- control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
-
- args.treated[0]=args.treated[0][1:]
- args.treated[len(args.treated)-1][:-1]
- treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
-
-
-##############  Detection of templated isoforms ################
-
- radar = manager.list([0,0,0,0])
- samples = manager.list()
- data= manager.list()
- names_con=manager.list()
- samples_mirna_names=manager.list()
- deseq=manager.list()
- unmap_seq=manager.Value('i',0)
- unmap_counts=manager.Value('i',0)
- LH2E_names=manager.list()
- ini_c_samples = manager.list()
-
-
- radar1 = manager.list([0,0,0,0])
- samples1 = manager.list()
- data1 = manager.list()
- names_tre = manager.list()
- samples_mirna_names1=manager.list()
- deseq1=manager.list()
- unmap1_seq = manager.Value('i',0)
- unmap1_counts = manager.Value('i',0)
- LH8E_names=manager.list()
- ini_t_samples = manager.list()
- ps_mature.join()
-
-
- mature_mirnas=list(mature_mirnas)
-
-
- starttime1 = time.time()
- ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
- ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
-
- [p.start() for p in ps_sam]
- [p.join() for p in ps_sam]
- print('SAM took {} seconds'.format(time.time() - starttime1))
-
- ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
- ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
- [x.start() for x in ps_hist]
-
- starttime200=time.time()
-
- sc = list(samples)
- st = list(samples1)
-
- names_con=list(names_con)
- names_tre=list(names_tre)
- samples_mirna_names=list(samples_mirna_names)
- samples_mirna_names.sort()
- samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
-
- samples_mirna_names1=list(samples_mirna_names1)
- samples_mirna_names1.sort()
- samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
-
- deseq=list(deseq)
- deseq1=list(deseq1)
-
- new_names_con=manager.list()
- new_names_tre=manager.list()
- new_deseq=manager.list()
- new_deseq1=manager.list()
- ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
- ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
-
- [z.start() for z in ps_deseq]
- [z.join() for z in ps_deseq]
- new_deseq=list(new_deseq)
- new_deseq1=list(new_deseq1)
-
- LH2E=[[x[0],x[2]] for x in new_deseq[0]]
- [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
-
- LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
- [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
-
- print('Deseq took {} seconds'.format(time.time() - starttime200))
-
- merg_nam_LH2E=manager.list()
- merg_nam_LH8E=manager.list()
-
- LH2E_copy=copy.deepcopy(list(LH2E))
- LH8E_copy=copy.deepcopy(list(LH8E))
-
- fil_sort_tre=manager.list()
- fil_sort_con=manager.list()
- raw_sort_tre=manager.list()
- raw_sort_con=manager.list()
-
- ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con))
- ps_main.start()
-
- if args.anal=="2":
-    all_iso = manager.list()
-    ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
-    ps_non_iso.start()
-
- ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
- ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
- [x.start() for x in ps_merge]
- [x.join() for x in ps_merge]
-
- merg_nam_LH2E=list(merg_nam_LH2E)
- merg_nam_LH8E=list(merg_nam_LH8E)
-
- starttime2 = time.time()
- procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
- procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
- procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
- if args.anal == "1":
-    procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
-
- [p.start() for p in procs]
-
-
- if args.anal=="1":
-    [x.join() for x in ps_hist]
-    [p.join() for p in procs]
-    ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
-    ps_pdf.start()
-
- print('Graphs took {} seconds'.format(time.time() - starttime2))
-
- ps_main.join()
-
- fil_sort_con=list(fil_sort_con)
- fil_sort_tre=list(fil_sort_tre)
- if fil_sort_con==[]:
-    fil_sort_con=raw_sort_con
-    fil_sort_tre=raw_sort_tre
-
- raw_sort_con=list(raw_sort_con)
- raw_sort_tre=list(raw_sort_tre)
- names_con=list(new_names_con)
- names_tre=list(new_names_tre)
-
- ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1))
- ps_write.start()
-
- ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
- ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
- [p.start() for p in ps1_matrix]
-
- if args.anal=="1":
-    ps_pdf.join()
- if args.anal=="2":
-    [p.join() for p in procs]
-    [x.join() for x in ps_hist]
-
- ps_write.join()
- [p.join() for p in ps1_matrix]
-
-
-############################## Detection of Both  #######################################
-
- starttime10 = time.time()
-
- if args.anal == "2":
-
-  n_data= manager.list()
-  n_names_con=manager.list()
-  n_samples_mirna_names=manager.list()
-  n_deseq=manager.list()
-  n_LH2E_names=manager.list()
-
-  n_data1 = manager.list()
-  n_names_tre = manager.list()
-  n_samples_mirna_names1=manager.list()
-  n_deseq1=manager.list()
-  n_LH8E_names=manager.list()
-
-
-  new_mat_mirnas = list(mature_mirnas)
-  ps_non_iso.join()
-
-  all_iso=list(all_iso)
-  new_mat_mirnas.extend(all_iso)
-
-  starttime11=time.time()
-
-  ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
-  ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
-
-  [p.start() for p in ps_sam]
-  [p.join() for p in ps_sam]
-
-  print('Non-sam took {} seconds'.format(time.time() - starttime11))
-
-  starttime12=time.time()
-
-  n_names_con=list(n_names_con)
-  n_names_tre=list(n_names_tre)
-  n_samples_mirna_names=list(n_samples_mirna_names)
-  n_samples_mirna_names.sort()
-  n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
-
-  n_samples_mirna_names1=list(n_samples_mirna_names1)
-  n_samples_mirna_names1.sort()
-  n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
-
-  n_deseq=list(n_deseq)
-  n_deseq1=list(n_deseq1)
-
-  new_n_names_con=manager.list()
-  new_n_names_tre=manager.list()
-  n_new_deseq=manager.list()
-  n_new_deseq1=manager.list()
-  ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
-  ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
-
-  [x.start() for x in ps_deseq]
-  [x.join() for x in ps_deseq]
-  n_new_deseq=list(n_new_deseq)
-  n_new_deseq1=list(n_new_deseq1)
-
-  print([len(n_new_deseq[0]),len(n_new_deseq[1])])
-  print([len(n_new_deseq1[0]),len(n_new_deseq1[1])])
-
-  n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
-  [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
-
-  n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
-  [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
-
-  print('Non-deseq took {} seconds'.format(time.time() - starttime12))
-
-  merg_nam_n_LH2E=manager.list()
-  merg_nam_n_LH8E=manager.list()
-
-  n_LH2E_copy=copy.deepcopy(list(n_LH2E))
-  n_LH8E_copy=copy.deepcopy(list(n_LH8E))
-
-  n_sort_tre=manager.list()
-  n_sort_con=manager.list()
-
-  n_fil_sort_con=manager.list()
-  n_fil_sort_tre=manager.list()
-  n_raw_sort_con=manager.list()
-  n_raw_sort_tre=manager.list()
-
-  ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con))
-  ps_main.start()
-
-  ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
-  ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
-  [p.start() for p in ps_merge]
-  [p.join() for p in ps_merge]
-
-  merg_nam_n_LH2E=list(merg_nam_n_LH2E)
-  merg_nam_n_LH8E=list(merg_nam_n_LH8E)
-
-  procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
-  procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
-  procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
-
-  starttime13=time.time()
-  [p.start() for p in procs]
-  [p.join() for p in procs]
-
-  print('Graphs took {} seconds'.format(time.time() - starttime13))
-
-  procs1 = Process(target=pdf_before_DE,args=(args.anal))
-  procs1.start()
-
-  starttime14=time.time()
-  ps_main.join()
-
-  n_fil_sort_con=list(n_fil_sort_con)
-  n_fil_sort_tre=list(n_fil_sort_tre)
-  if n_fil_sort_con==[]:
-     n_fil_sort_con=n_raw_sort_con
-     n_fil_sort_tre=n_raw_sort_tre
-
-  n_raw_sort_con=list(n_raw_sort_con)
-  n_raw_sort_tre=list(n_raw_sort_tre)
-  n_names_con=list(new_n_names_con)
-  n_names_tre=list(new_n_names_tre)
-
-  ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2))
-  ps_write.start()
-
-  ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
-  ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
-  [p.start() for p in ps1_matrix]
-
-  ps_write.join()
-  [p.join() for p in ps1_matrix]
-  procs1.join()
- print('That took {} seconds'.format(time.time() - starttime10))
- print('That took {} seconds'.format(time.time() - starttime))
-
-
-
-
-
-
-
-
--- a/toolExample.xml	Fri Oct 16 12:17:22 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,199 +0,0 @@
-
-<tool id="fa_gc_content_1" name="IsoRead: miR and isomiR identification and classification" version="0.1.0">
-  <description>for each sequence in a file</description>
-  <requirements>
-   <requirement type="package" version="1.7">fpdf</requirement>
-   <requirement type="package" version="0.8">logomaker</requirement>
-   <requirement type="package" version="0.6.0">plotnine</requirement>
-   <requirement type="package" version="3.7.4">python</requirement>
-   <requirement type="package" version="1.17.3">numpy</requirement>
-   <requirement type="package" version="3.1.2">matplotlib</requirement>
-   <requirement type="package" version="0.9.0">seaborn</requirement>
-   <requirement type="package" version="1.0.3">pandas</requirement>
-  </requirements>
-  <command>
-    #set controls=[]
-    #for $input in $control#
-    $controls.extend([str($input.element_identifier),str($input)])
-    #end for#
-    #set treateds=[]
-    #for $input in $treated#
-    $treateds.extend([str($input.element_identifier),str($input)])
-    #end for#
-    #if $mir_input.database == "1":
-      #if $f.fil == "1":
-        #set path=$mir_input.genome1.fields.path
-        python -W ignore $__tool_directory__/mirbase_ultra_v2.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -umis $umis -percentage "-1" -counts "-1" -name1 "$fal1" -name2 "$fal2"
-      #end if
-      #if $f.fil == "2":
-        #set path=$mir_input.genome1.fields.path
-        python -W ignore $__tool_directory__/mirbase_ultra_v2.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -umis $umis -percentage "$f.fil1" -counts "$f.fil2" -name1 "$fal1" -name2 "$fal2"
-      #end if
-    #else:
-      #if $f.fil == "1":
-        #set path=$mir_input.genome2.fields.value
-        python -W ignore $__tool_directory__/mirgene_ultra_v2.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -umis $umis -percentage "-1" -counts "-1" -name1 "$fal1" -name2 "$fal2"
-      #end if
-      #if $f.fil == "2":
-        #set path=$mir_input.genome2.fields.value
-        python -W ignore $__tool_directory__/mirgene_ultra_v2.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -umis $umis -percentage "$f.fil1" -counts "$f.fil2" -name1 "$fal1" -name2 "$fal2"
-      #end if
-    #end if
-
-    <!--
-    #if $mir_input.database == "1" and $f.filter == "2":
-      #set path=$mir_input.genome1.fields.path
-      python -W ignore $__tool_directory__/mirbase_ultra_v1.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -umis $umis -percentage "$filters.fil1" -counts "$filter.fil2 -name1 $fal1 -name2 $fal2"
-    #else:
-      #set path=$mir_input.genome2.fields.value
-      python -W ignore $__tool_directory__/main_galaxy_mirgene.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -program $program -f "$mir_input.database" -percentage "$filters.fil1" -counts "$filters.fil2" ->
-    #end if
-    -->
-  </command>
-  <inputs>
-    <param name="analysis" type="select" label="Discover miR with templated or/and non-templated isomiRs" help="Choose the category of miRNAs for detection">
-      <option value="1" selected="true">Detection of only templated miRNAs</option>
-      <option value="2">Detection of templated and non-templated miRNAs</option>
-    </param>
-    <!--param name="program" type="select" label="Choose differential expression/statistics module" help="Choose the category of miRNAs for detection">
-      <option value="0" selected="true">Deseq2</option>
-      <option value="1">EdgeR</option>
-      <option value="2">Deseq2 and EdgeR</option>
-    </param-->
-
-    <conditional name="mir_input">
-     <param name="database" type="select" label="Choose Database of miRNAs organisms" help="Choose which database prefer to be used.">
-      <option value="1" selected="true">MirBase</option>
-      <option value="2">MirGene</option>
-     </param>
-     <when value="1">
-      <param name="genome1" type="select" label="Reference miRNAs (organism)" help="If your genome coordinates of interest is not listed, contact the Galaxy team">
-        <options from_data_table="n_spiecies" />
-      </param>
-     </when>
-     <when value="2">
-      <param name="genome2" type="select" label="Reference miRNAs (organism)" help="If your genome coordinates of interest is not listed, contact the Galaxy team">
-        <options from_data_table="mirgene" />
-      </param>
-     </when>
-    </conditional>
-
-
-    <param name="fal1" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"/>
-    <param name="control" format="sam" type="data" multiple="True" label="Select BAM files of the factor level samples" />
-    <param name="fal2" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"/>
-    <param name="treated" format="sam" type="data" multiple="True" label="Select BAM files of the factor level samples" />
-
-    <conditional name="f">
-     <param name="fil" type="select" label="Filter low counts" help="Treat genes with very low expression as unexpressed and filter out">
-      <option value="1" selected="true">No</option>
-      <option value="2">Yes</option>
-     </param>
-     <when value="2">
-      <param name="fil1" type="integer" value="0" label="Minimum percentage of the samples" help="Filter out all genes that do not meet the Minimum counts in at least this many samples of every category"/>
-      <param name="fil2" type="integer" value="0" label="Minimum counts" help="Filter out all genes that do not meet this minimum count"/>
-     </when>
-     <when value="1">
-     </when>
-    </conditional>
-
-    <param name="db" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Output Database files" />
-    <param name="cmatrix" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Output Matrix files, one for each factor level"  />
-    <param name="c_files" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Output Count tables, one for each sample" />
-    <param name="umis" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Collapsing sam files if samples include UMIs" />
-  </inputs>
-  <outputs>
-    <collection name="list_output1" type="list" label="Database ${fal1} templated" >
-        <discover_datasets pattern="__name__" format="tabular" directory="split1"  />
-        <filter>db == 1 and (analysis == "1" or analysis == "2")</filter>
-    </collection>
-    <collection name="list_output2" type="list" label="Database ${fal2} templated" >
-        <discover_datasets pattern="__name__" format="tabular" directory="split2" />
-        <filter>db == 1 and (analysis == "1" or analysis == "2")</filter>
-    </collection>
-    <collection name="list_output3" type="list" label="Database ${fal1} non-templated" >
-        <discover_datasets pattern="__name__" format="tabular" directory="split3" />
-        <filter>db == 1 and analysis == "2"</filter>
-    </collection>
-    <collection name="list_output4" type="list" label="Database ${fal2} non-templated" >
-        <discover_datasets pattern="__name__" format="tabular" directory="split4" />
-        <filter>db == 1 and analysis == "2"</filter>
-    </collection>
-
-    <!--
-    <data name="Results non templated control" format="tabular" label="${fal1} matrix counts of non templated results" from_work_dir="$__tool_directory__/Deseq_final_non_LH2E">
-      <filter>cmatrix==1 and analysis == "2"</filter>
-    </data>
-    <data name="Results non templated treated" format="tabular" label="${fal2} matrix counts of non templated results" from_work_dir="$__tool_directory__/Deseq_final_non_LH8E">
-      <filter>cmatrix==1 and analysis == "2"</filter>
-    </data>
-    <data name="Results templated control" format="tabular" label="${fal1} matrix counts of templated results" from_work_dir="$__tool_directory__/Deseq_final_temp_LH2E">
-      <filter>cmatrix==1</filter>
-    </data>
-    <data name="Results templated treated" format="tabular" label="${fal2} matrix counts of templated results" from_work_dir="$__tool_directory__/Deseq_final_temp_LH8E">
-       <filter>cmatrix==1</filter>
-    </data>
-    -->
-    <collection name="Counts" type="list" label="Count Matrices" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Counts" />
-        <filter>cmatrix==1</filter>
-    </collection>
-
-
-    <collection name="list_output9" type="list" label="Count files ${fal1} for Differential Expression" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Diff/temp_con"  />
-        <filter>c_files==1 and (analysis == "1")</filter>
-    </collection>
-    <collection name="list_output10" type="list" label="Count files ${fal2} for Differential Expression" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Diff/temp_tre" />
-        <filter>c_files==1 and (analysis == "1")</filter>
-    </collection>
-    <collection name="list_output11" type="list" label="Count files ${fal1} for Differential Expression" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Diff/n_temp_con" />
-        <filter>c_files==1 and analysis == "2"</filter>
-    </collection>
-    <collection name="list_output12" type="list" label="Count files ${fal2} for Differential Expression" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Diff/n_temp_tre" />
-        <filter>c_files==1 and analysis == "2"</filter>
-    </collection>
-
-
-    <!--
-    <collection name="list_output13" type="list" label="EdgeR count files ${fal1} templated" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Edger/split5"  />
-        <filter>c_files==1 and (program=="1" or program=="2" ) and (analysis == "1")</filter>
-    </collection>
-    <collection name="list_output14" type="list" label="EdgeR count files ${fal2} templated" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Edger/split6" />
-        <filter>c_files==1 and (program=="1" or program=="2") and (analysis == "1")</filter>
-    </collection>
-    <collection name="list_output15" type="list" label="EdgeR count files ${fal1}" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Edger/split7" />
-        <filter>c_files==1 and (program=="1" or program=="2") and analysis == "2"</filter>
-    </collection>
-    <collection name="list_output16" type="list" label="EdgeR count files ${fal2}" >
-        <discover_datasets pattern="__name__" format="tabular" directory="Edger/split8" />
-        <filter>c_files==1 and (program=="1" or program=="2") and analysis == "2"</filter>
-    </collection>
-    -->
-
-    <!--data name="O" format="png" label="c_hist" from_work_dir="$__tool_direcory__/c_hist_red.png"/-->
-    <!--data name="O1" format="png" label="t_hist" from_work_dir="$__tool_direcory__/t_hist_red.png"/-->
-    <!--data name="OOOOOOOOO" format="png" label="c_non-hist" from_work_dir="$__tool_direcory__/c_hist_non_red.png"/-->
-    <!--data name="SSSSSSSSS" format="png" label="t_non-hist" from_work_dir="$__tool_direcory__/t_hist_non_red.png"/-->
-    <!--data name="OOOOOOOOO111" format="png" label="non-pie" from_work_dir="$__tool_direcory__/pie2.png"/-->
-    <!--data name="SSSSSSSSS1" format="png" label="pie" from_work_dir="$__tool_direcory__/pie1.png"/-->
-    <!--data name="OOOOO" format="png" label="spider1" from_work_dir="$__tool_direcory__/spider1.png"/-->
-    <!--data name="OOOOOO" format="png" label="spider2" from_work_dir="$__tool_direcory__/spider2.png"/-->
-    <!--data name="OOo" format="png" label="WWW" from_work_dir="$__tool_direcory__/bar2.png"/-->
-    <!--data name="OOOOOOOOO1" format="png" label="WWWWWWW" from_work_dir="$__tool_direcory__/bar1.png"/-->
-    <!--data name="OO" format="png" label="logo1" from_work_dir="$__tool_direcory__/logo1.png"/-->
-    <!--data name="O5" format="png" label="logo2" from_work_dir="$__tool_direcory__/logo2.png"/-->
-    <data name="Results non templated treated1" format="pdf" label="PDF" from_work_dir="$__tool_directory__/report1.pdf" />
-    <!--data name="Results" format="png" label="results2" from_work_dir="$__tool_directory__/w.png"/-->
-    <data name="Results non templated treated3" format="tabular" label="results3" from_work_dir="$__tool_directory__/LH8E" />
-    <!--data name="Results non templated treated4" format="tabular" label="results4" from_work_dir="$__tool_directory__/Deseq_final_temp_LH8E"/-->
-  </outputs>
-  <help>
-  </help>
-</tool>