view mirgene_functions.py @ 23:d2eea02053a0 draft

Deleted selected files
author glogobyte
date Wed, 28 Oct 2020 08:13:30 +0000
parents dc31f01cf21d
children
line wrap: on
line source

import itertools
import time
import sys
import os
import urllib.request
import gzip
from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
import subprocess
import argparse
from collections import OrderedDict
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd
from math import pi
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import scipy.stats as stats
from plotnine import *
import math
import re
import matplotlib.ticker as mtick
import copy




"""---------------------- Simple Functions -----------------------"""

# Read a file and return it as a list
def read(path, flag):
    if flag == 0:
        with open(path) as fp:
            file=fp.readlines()
        fp.close()
        return file

    if flag == 1:
        with open(path) as fp:
            file = fp.read().splitlines()
        fp.close()
        return file

# Write a list to a txt file
def write(path, list):
    with open(path,'w') as fp:
        for x in list:
            fp.write(str("\t".join(x[1:-1])))
    fp.close()

"""---------------------- RNA-seq Functions ----------------------"""

# Detect the longest common substring sequence between two mirnas
def longestSubstring(str1, str2):

    from difflib import SequenceMatcher
    # initialize SequenceMatcher object with
    # input string
    seqMatch = SequenceMatcher(None, str1, str2)

    # find match of longest sub-string
    # output will be like Match(a=0, b=0, size=5)
    match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))

    # print longest substring
    if (match.size != 0):
        return str1[match.a: match.a + match.size]
    else:
        print('No longest common sub-string found')



########################################################################################################################################################
def collapse_sam(path):

    ini_sam=read(path,0)
    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
    intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in  x.split("\t")[0]]

    uni_seq = []
    for x in main_sam:

        if [x[2], x[9]] not in uni_seq:
            uni_seq.append([x[2], x[9]])

    new_main_sam=[]
    incr_num=0
    for i in range(len(uni_seq)):
        count=0
        incr_num+=1
        for y in main_sam:
            if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
               count+=1
               temp=y
        temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        temp[0]=str(incr_num)+"-"+str(count)
        new_main_sam.append(temp)

    new_sam=intro_sam+new_main_sam

    return new_sam

#################################################################################################################################################################

def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):

    # read the sam file

    ini_sam=read(path,0)
    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
    unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]

    # Calculate the shifted positions for every isomir and add them to the name of it
    sorted_uni_arms = []

    for i in range(len(mature_mirnas)):
        tmp_count_reads = 0   # calculate the total number of reads
        tmp_count_seq = 0     # calculate the total number of sequences
        for j in range(len(unique_seq)):

         if mature_mirnas[i] == unique_seq[j][2]:

                temp_mature = mature_mirnas[i+1]
                off_part = longestSubstring(temp_mature, unique_seq[j][9])

                mat_diff = temp_mature.split(off_part)
                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]

                unique_diff = unique_seq[j][9].split(off_part)
                unique_diff = [len(unique_diff[0]), len(unique_diff[1])]

                # Problem with hsa-miR-8485
                if mat_diff[1]!=0 and unique_diff[1]!=0:
                    unique_seq[j]=1
                    pre_pos = 0
                    post_pos = 0

                elif mat_diff[0]!=0 and unique_diff[0]!=0:
                    unique_seq[j]=1
                    pre_pos = 0
                    post_pos = 0

                else:
                   pre_pos = mat_diff[0]-unique_diff[0]
                   post_pos = unique_diff[1]-mat_diff[1]
                   tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
                   tmp_count_seq = tmp_count_seq+1


                if pre_pos != 0 or post_pos != 0:
                    if pre_pos == 0:
                        unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
                    elif post_pos == 0:
                        unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
                    else:
                        unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)

        for x in range(unique_seq.count(1)):
           unique_seq.remove(1)
        if tmp_count_reads != 0 and tmp_count_seq != 0:
           sorted_uni_arms.append([mature_mirnas[i], tmp_count_seq, tmp_count_reads])

    # Store name of arms, number of sequences and number of reads
    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)

    for y in sorted_uni_arms:
       counts=0
       seqs=0
       for x in unique_seq:
           if y[0]==x[2].split("_")[0]+"_"+x[2].split("_")[1]:
              counts+=int(x[0].split("-")[1])
              seqs+=1

       y[1]=seqs
       y[2]=counts

    LHE=[]

    l.acquire()
    if con=="c":
       LHE.extend(z[2] for z in unique_seq)
       for y in unique_seq:
           samples_mirna_names.append([y[2],y[9]])
       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq])
       LHE_names.extend(LHE)
       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
       names.append(name)
       samples.append(unique_seq)
       data.append([con,name,unique_seq,sorted_uni_arms])
       ini_sample.append(new_main_sam)

    if con=="t":
       LHE.extend(z[2] for z in unique_seq)
       for y in unique_seq:
           samples_mirna_names.append([y[2],y[9]])
       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq])
       LHE_names.extend(LHE)
       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
       names.append(name)
       samples.append(unique_seq)
       data.append([con,name,unique_seq,sorted_uni_arms])
       ini_sample.append(new_main_sam)
    l.release()


######################################################################################################################################
"""
Read a sam file from Bowtie and do the followings:

1) Remove reverse stranded mapped reads
2) Remove unmapped reads 
3) Remove all sequences with reads less than 11 reads
4) Sort the arms with the most sequences in decreading rate
5) Sort the sequences of every arm with the most reads in decreasing rate
6) Calculate total number of sequences of every arm
7) Calculate total number of reads of sequences of every arm.
8) Store all the informations in a txt file 

"""

def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):


    ini_sam=read(path,0)
    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
    unique_seq=[]
    unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
    uni_seq=[]

    # Calculate the shifted positions for every isomir and add them to the name of it
    sorted_uni_arms = []
    for i in range(1,len(mature_mirnas),2):
        tmp_count_reads = 0   # calculate the total number of reads
        tmp_count_seq = 0     # calculate the total number of sequences

        for j in range(len(unique_seq)):

            temp_mature = mature_mirnas[i].strip().replace("U", "T")

            if temp_mature in unique_seq[j][9]:

                off_part = longestSubstring(temp_mature, unique_seq[j][9])

                mat_diff = temp_mature.split(off_part)
                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]

                unique_diff = unique_seq[j][9].split(off_part)
                if len(unique_diff)<=2:
                   unique_diff = [len(unique_diff[0]), len(unique_diff[1])]

                   pre_pos = mat_diff[0]-unique_diff[0]
                   post_pos = unique_diff[1]-mat_diff[1]

                   lengthofmir = len(off_part) + post_pos
                   if pre_pos == 0:
                      tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
                      tmp_count_seq = tmp_count_seq + 1

                      if pre_pos == 0:
                          t_name=copy.deepcopy(unique_seq[j])
                          t_name[2]=mature_mirnas[i - 1] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
                          uni_seq.append(t_name)

        if tmp_count_reads != 0 and tmp_count_seq != 0:
            sorted_uni_arms.append([mature_mirnas[i-1], tmp_count_seq, tmp_count_reads])


    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
    unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))

    LHE=[]

    l.acquire()
    if con=="c":
       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
       for x in unique_seq:
           if x[2]!="*":
              n_samples_mirna_names.append([x[2],x[9]])
       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
       n_LHE_names.extend(LHE)
       names.append(name)
       data.append([con,name,unique_seq,sorted_uni_arms])


    if con=="t":
       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
       for x in unique_seq:
           if x[2]!="*":
              n_samples_mirna_names.append([x[2],x[9]])
       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
       n_LHE_names.extend(LHE)
       names.append(name)
       data.append([con,name,unique_seq,sorted_uni_arms])
    l.release()

#################################################################################################################################################################################################################

def deseq2_temp(samples_mirna_names,deseq,con,l):

    samples_mirna_names.sort(key=lambda x:[0])

    for i in range(len(deseq)):
        for y in samples_mirna_names:
            flag = 0
            for x in deseq[i]:
                if y[0] == x[0]:
                    flag = 1
                    break

            if flag == 0:
                deseq[i].append([y[0], "0", y[1]])

    [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
    deseq_final = [[x[0],x[2]] for x in deseq[0]]
    [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]

    l.acquire()
    if con=="c":
       q1.put(deseq_final)

    if con=="t":
       q2.put(deseq_final)
    l.release()


##################################################################################################################################################################################################################

def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E,per,count):

 LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
 LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]

 LH8E_add_names.sort()
 LH2E_add_names.sort()
 LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
 LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))

 LH2E.sort()
 LH8E.sort()
 LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
 LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))

 zeros=["0"]*(len(LH8E[0])-2)
 [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
 LH8E=LH8E+LH8E_add_names

 zeros=["0"]*(len(LH2E[0])-2)
 [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
 LH2E=LH2E+LH2E_add_names

 dupes=[]
 final_LH2E =[]

 for num,_ in enumerate(LH2E):

    if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
        final_LH2E.append(LH2E[num][1])
        final_LH2E.append(LH2E[num][0])
    else:
        dupes.append(LH2E[num][1])

 dupes=list(set(dupes))

 dupes=[[x] for x in dupes]

 for x in LH2E:
     for y in dupes:
         if x[1]==y[0]:
             fl=0
             if len(y)==1:
                 y.append(x[0])
             else:
                 for i in range(1,len(y)):
                     if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
                        fl=1
                        if len(x[0])<len(y[i]):
                           del y[i]
                           y.append(x[0])
                           break

                 if fl==0:
                    y.append((x[0]))

 for y in dupes:
    if len(y)>2:
        for i in range(len(y)-1,1,-1):
            y[1]=y[1]+"/"+y[i]
            del y[i]


 for x in LH2E:
     for y in dupes:
         if x[1]==y[0]:
            x[0]=y[1]

 for x in LH8E:
    for y in dupes:
        if x[1]==y[0]:
           x[0]=y[1]



 LH2E.sort()
 LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))

 LH8E.sort()
 LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))

 if int(per)!=-1:
    percent=int(per)/100

    c_col_filter=round(percent*(len(LH2E[1])-2))
    t_col_filter=round(percent*(len(LH8E[1])-2))

    for i, _ in enumerate(LH2E):
        c_cols=0
        t_cols=0

        c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(count)])
        t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(count)])

        if c_cols>=c_col_filter or t_cols>=t_col_filter:
           filter_LH8E.append(LH8E[i])
           filter_LH2E.append(LH2E[i])

 raw_LH2E.extend(LH2E)
 raw_LH8E.extend(LH8E)

##################################################################################################################################################################################################################

def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag, per, n1, n2):

 if flag == 1 and int(per)!=-1:
    fp = open('Counts/Filtered '+n2 +' Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_tre:
       fp.write("\t"+y)

    for x in fil_LH8E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()

    fp = open('Counts/Filtered '+n1+' Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_con:
       fp.write("\t"+y)

    for x in fil_LH2E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()


 if flag == 2 and int(per)!=-1:
    fp = open('Counts/Filtered '+n2+' Non-Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_tre:
       fp.write("\t"+y)


    for x in fil_LH8E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()

    fp = open('Counts/Filtered '+n1+' Non-Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_con:
       fp.write("\t"+y)

    for x in fil_LH2E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()


 if flag == 1:
    fp = open('Counts/Raw '+n2+' Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_tre:
       fp.write("\t"+y)

    for x in raw_LH8E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()

    fp = open('Counts/Raw '+n1+' Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_con:
       fp.write("\t"+y)

    for x in raw_LH2E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()

 if flag == 2:
    fp = open('Counts/Raw '+n2+' Non-Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_tre:
       fp.write("\t"+y)


    for x in raw_LH8E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()

    fp = open('Counts/Raw '+n1+' Non-Templated Counts', 'w')
    fp.write("Name\t")
    fp.write("Sequence")
    for y in names_con:
       fp.write("\t"+y)

    for x in raw_LH2E:
        fp.write("\n%s" % "\t".join(x))
    fp.close()

####################################################################################################################################################################################################################

def ssamples(names,samp,folder,pro):

    for i in range(2,len(samp[0])):

       fp = open(folder+names[i-2]+'.txt','w')
       fp.write("miRNA id"+"\t"+names[i-2]+"\n")

       for x in samp:
           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
       fp.close()

####################################################################################################################################################################################################################

def DB_write(con,name,unique_seq,sorted_uni_arms,f):

 if f==1:
    # Write a txt file with all the information
    if con=="c":
       fp = open('split1/'+name, 'w')

       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
    if con=="t":
       fp = open('split2/'+name, 'w') 
       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))

    for i in range(len(sorted_uni_arms)):
        temp = []
        for j in range(len(unique_seq)):

            if sorted_uni_arms[i][0] in (unique_seq[j][2].split("_")[0]+"_"+unique_seq[j][2].split("_")[1]):

                temp.append(unique_seq[j])

        temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
        fp.write("*********************************************************************************************************\n")
        fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
        fp.write("*********************************************************************************************************\n\n")
        [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
        fp.write("\n" + "\n")
    fp.close()

 if f==2:

    if con=="c":
       fp = open('split3/'+name, 'w')
       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
    if con=="t":
       fp = open('split4/'+name, 'w')
       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))

    for i in range(len(sorted_uni_arms)):
        temp = []
        for j in range(len(unique_seq)):
               if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
                  temp.append(unique_seq[j])
        if temp!=[]:
           temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
           fp.write("*********************************************************************************************************\n")
           fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
           fp.write("*********************************************************************************************************\n\n")
           [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
           fp.write("\n" + "\n")
    fp.close()


##########################################################################################################################

def new_mat_seq(pre_unique_seq,mat_mirnas,l):

    unique_iso = []
    for x in pre_unique_seq:
       if len(x[2].split("_"))==3:
          for y in pre_unique_seq:
              if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
                 if any(y[2] in lst2 for lst2 in unique_iso)==False:
                    y[2]=">"+y[2]
                    unique_iso.append(y)
    l.acquire()
    for x in unique_iso:
        mat_mirnas.append(x[2])
        mat_mirnas.append(x[9])
    l.release()

#########################################################################################################################

def merging_names(LH2E_copy,new):

    dupes=[]
    final_LH2E =[]

    for num in range(len(LH2E_copy)):

        if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
           final_LH2E.append(LH2E_copy[num][1])
           final_LH2E.append(LH2E_copy[num][0])
        else:
           dupes.append(LH2E_copy[num][1])

    dupes=list(set(dupes))

    for i in range(len(dupes)):
        dupes[i]=[dupes[i]]

    for x in LH2E_copy:
        for y in dupes:
            if x[1]==y[0]:
               fl=0
               if len(y)==1:
                  y.append(x[0])
               else:
                  for i in range(1,len(y)):
                      if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
                         fl=1
                         if len(x[0])<len(y[i]):
                            del y[i]
                            y.append(x[0])
                            break

                  if fl==0:
                     y.append((x[0]))

    for y in dupes:
        if len(y)>2:
           for i in range(len(y)-1,1,-1):
               y[1]=y[1]+"/"+y[i]
               del y[i]


    for x in LH2E_copy:
        for y in dupes:
            if x[1]==y[0]:
               x[0]=y[1]

    LH2E_copy.sort()
    LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
    new.extend(LH2E_copy)

######################################################################################################################################################

def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):

    for i in range(2,len(tem_samp[0])):

       fp = open(folder+tem_names[i-2]+'.txt','w')
       fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")

       for x in tem_samp:
           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")

       for j in range(len(non_names)):
           if non_names[j]==tem_names[i-2]:
              for x in non_samp:
                  fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
       fp.close()

#################################################################################################################################################################################################################

def download_matures(matures,org_name):

    mature_mir=[]

    mat_url = 'http://mirgenedb.org/fasta/'+org_name+'?mat=1'
    star_url = 'http://mirgenedb.org/fasta/'+org_name+'?star=1'

    data = urllib.request.urlopen(mat_url).read()
    file_mirna = data.decode('utf-8')
    mature_mir = file_mirna.split("\n")
    mature_mir = [x.replace(">","") for x in mature_mir]
    del mature_mir[-1]

    data = urllib.request.urlopen(star_url).read()
    file_mirna = data.decode('utf-8')
    star_mir = file_mirna.split("\n")
    star_mir = [x.replace(">","") for x in star_mir]
    del star_mir[-1]

    mature_mir.extend(star_mir)

    for i in range(1,len(mature_mir),2):
        mature_mir[i]=mature_mir[i].replace("U","T")

    matures.extend(mature_mir)

###################################################################################################################

def non_template_ref(sc,st,all_isoforms):

  pre_uni_seq_con = list(sc)
  pre_uni_seq_tre = list(st)

  for x in pre_uni_seq_con:
      for y in x:
          if y[2] not in all_isoforms and len(y[2].split("_"))>2:
             all_isoforms.append(y[2])
             all_isoforms.append(y[9])

  for x in pre_uni_seq_tre:
      for y in x:
          if y[2] not in all_isoforms and len(y[2].split("_"))>2:
             all_isoforms.append(y[2])
             all_isoforms.append(y[9])

################################################################################################################################################################################################

def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):

    for y in mir_names:
        flag=0
        for x in sample:
            if y[0]==x[0]:
               flag=1
               break
        if flag==0:
           sample.append([y[0],"0",y[1]])

    sample.sort(key=lambda x: x[0])
    sample=list(sample for sample,_ in itertools.groupby(sample))

    l.acquire()
    new_d.append(sample)
    sample_order.append(sample_name)
    l.release()

###############################################################################################################################################################################################