| 
17
 | 
     1 import itertools
 | 
| 
 | 
     2 import time
 | 
| 
 | 
     3 import sys
 | 
| 
 | 
     4 import os
 | 
| 
 | 
     5 import urllib.request
 | 
| 
 | 
     6 import gzip
 | 
| 
 | 
     7 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
 | 
| 
 | 
     8 import subprocess
 | 
| 
 | 
     9 import argparse
 | 
| 
 | 
    10 from collections import OrderedDict
 | 
| 
 | 
    11 from matplotlib.backends.backend_pdf import PdfPages
 | 
| 
 | 
    12 import pandas as pd
 | 
| 
 | 
    13 from math import pi
 | 
| 
 | 
    14 import numpy as np
 | 
| 
 | 
    15 import matplotlib.pyplot as plt
 | 
| 
 | 
    16 from matplotlib.ticker import PercentFormatter
 | 
| 
 | 
    17 import seaborn as sns
 | 
| 
 | 
    18 import scipy.stats as stats
 | 
| 
 | 
    19 from plotnine import *
 | 
| 
 | 
    20 import math
 | 
| 
 | 
    21 import re
 | 
| 
 | 
    22 import matplotlib.ticker as mtick
 | 
| 
 | 
    23 import copy
 | 
| 
 | 
    24 
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 """---------------------- Simple Functions -----------------------"""
 | 
| 
 | 
    29 
 | 
| 
 | 
    30 # Read a file and return it as a list
 | 
| 
 | 
    31 def read(path, flag):
 | 
| 
 | 
    32     if flag == 0:
 | 
| 
 | 
    33         with open(path) as fp:
 | 
| 
 | 
    34             file=fp.readlines()
 | 
| 
 | 
    35         fp.close()
 | 
| 
 | 
    36         return file
 | 
| 
 | 
    37 
 | 
| 
 | 
    38     if flag == 1:
 | 
| 
 | 
    39         with open(path) as fp:
 | 
| 
 | 
    40             file = fp.read().splitlines()
 | 
| 
 | 
    41         fp.close()
 | 
| 
 | 
    42         return file
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 # Write a list to a txt file
 | 
| 
 | 
    45 def write(path, list):
 | 
| 
 | 
    46     with open(path,'w') as fp:
 | 
| 
 | 
    47         for x in list:
 | 
| 
 | 
    48             fp.write(str("\t".join(x[1:-1])))
 | 
| 
 | 
    49     fp.close()
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 """---------------------- RNA-seq Functions ----------------------"""
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 # Detect the longest common substring sequence between two mirnas
 | 
| 
 | 
    54 def longestSubstring(str1, str2):
 | 
| 
 | 
    55 
 | 
| 
 | 
    56     from difflib import SequenceMatcher
 | 
| 
 | 
    57     # initialize SequenceMatcher object with
 | 
| 
 | 
    58     # input string
 | 
| 
 | 
    59     seqMatch = SequenceMatcher(None, str1, str2)
 | 
| 
 | 
    60 
 | 
| 
 | 
    61     # find match of longest sub-string
 | 
| 
 | 
    62     # output will be like Match(a=0, b=0, size=5)
 | 
| 
 | 
    63     match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))
 | 
| 
 | 
    64 
 | 
| 
 | 
    65     # print longest substring
 | 
| 
 | 
    66     if (match.size != 0):
 | 
| 
 | 
    67         return str1[match.a: match.a + match.size]
 | 
| 
 | 
    68     else:
 | 
| 
 | 
    69         print('No longest common sub-string found')
 | 
| 
 | 
    70 
 | 
| 
 | 
    71 
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 ########################################################################################################################################################
 | 
| 
 | 
    74 def collapse_sam(path):
 | 
| 
 | 
    75 
 | 
| 
 | 
    76     ini_sam=read(path,0)
 | 
| 
 | 
    77     main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
 | 
| 
 | 
    78     intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in  x.split("\t")[0]]
 | 
| 
 | 
    79 
 | 
| 
 | 
    80     uni_seq = []
 | 
| 
 | 
    81     for x in main_sam:
 | 
| 
 | 
    82 
 | 
| 
 | 
    83         if [x[2], x[9]] not in uni_seq:
 | 
| 
 | 
    84             uni_seq.append([x[2], x[9]])
 | 
| 
 | 
    85 
 | 
| 
 | 
    86     new_main_sam=[]
 | 
| 
 | 
    87     incr_num=0
 | 
| 
 | 
    88     for i in range(len(uni_seq)):
 | 
| 
 | 
    89         count=0
 | 
| 
 | 
    90         incr_num+=1
 | 
| 
 | 
    91         for y in main_sam:
 | 
| 
 | 
    92             if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
 | 
| 
 | 
    93                count+=1
 | 
| 
 | 
    94                temp=y
 | 
| 
 | 
    95         temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
 | 
| 
 | 
    96         temp[0]=str(incr_num)+"-"+str(count)
 | 
| 
 | 
    97         new_main_sam.append(temp)
 | 
| 
 | 
    98 
 | 
| 
 | 
    99     new_sam=intro_sam+new_main_sam
 | 
| 
 | 
   100 
 | 
| 
 | 
   101     return new_sam
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 #################################################################################################################################################################
 | 
| 
 | 
   104 
 | 
| 
 | 
   105 def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):
 | 
| 
 | 
   106 
 | 
| 
 | 
   107     # read the sam file
 | 
| 
 | 
   108 
 | 
| 
 | 
   109     ini_sam=read(path,0)
 | 
| 
 | 
   110     new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
 | 
| 
 | 
   111     unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
 | 
| 
 | 
   112 
 | 
| 
 | 
   113     # Calculate the shifted positions for every isomir and add them to the name of it
 | 
| 
 | 
   114     sorted_uni_arms = []
 | 
| 
 | 
   115 
 | 
| 
 | 
   116     for i in range(len(mature_mirnas)):
 | 
| 
 | 
   117         tmp_count_reads = 0   # calculate the total number of reads
 | 
| 
 | 
   118         tmp_count_seq = 0     # calculate the total number of sequences
 | 
| 
 | 
   119         for j in range(len(unique_seq)):
 | 
| 
 | 
   120 
 | 
| 
 | 
   121          if mature_mirnas[i] == unique_seq[j][2]:
 | 
| 
 | 
   122 
 | 
| 
 | 
   123                 temp_mature = mature_mirnas[i+1]
 | 
| 
 | 
   124                 off_part = longestSubstring(temp_mature, unique_seq[j][9])
 | 
| 
 | 
   125 
 | 
| 
 | 
   126                 mat_diff = temp_mature.split(off_part)
 | 
| 
 | 
   127                 mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
 | 
| 
 | 
   128 
 | 
| 
 | 
   129                 unique_diff = unique_seq[j][9].split(off_part)
 | 
| 
 | 
   130                 unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
 | 
| 
 | 
   131 
 | 
| 
 | 
   132                 # Problem with hsa-miR-8485
 | 
| 
 | 
   133                 if mat_diff[1]!=0 and unique_diff[1]!=0:
 | 
| 
 | 
   134                     unique_seq[j]=1
 | 
| 
 | 
   135                     pre_pos = 0
 | 
| 
 | 
   136                     post_pos = 0
 | 
| 
 | 
   137 
 | 
| 
 | 
   138                 elif mat_diff[0]!=0 and unique_diff[0]!=0:
 | 
| 
 | 
   139                     unique_seq[j]=1
 | 
| 
 | 
   140                     pre_pos = 0
 | 
| 
 | 
   141                     post_pos = 0
 | 
| 
 | 
   142 
 | 
| 
 | 
   143                 else:
 | 
| 
 | 
   144                    pre_pos = mat_diff[0]-unique_diff[0]
 | 
| 
 | 
   145                    post_pos = unique_diff[1]-mat_diff[1]
 | 
| 
 | 
   146                    tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
 | 
| 
 | 
   147                    tmp_count_seq = tmp_count_seq+1
 | 
| 
 | 
   148 
 | 
| 
 | 
   149 
 | 
| 
 | 
   150                 if pre_pos != 0 or post_pos != 0:
 | 
| 
 | 
   151                     if pre_pos == 0:
 | 
| 
 | 
   152                         unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
 | 
| 
 | 
   153                     elif post_pos == 0:
 | 
| 
 | 
   154                         unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
 | 
| 
 | 
   155                     else:
 | 
| 
 | 
   156                         unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
 | 
| 
 | 
   157 
 | 
| 
 | 
   158         for x in range(unique_seq.count(1)):
 | 
| 
 | 
   159            unique_seq.remove(1)
 | 
| 
 | 
   160         if tmp_count_reads != 0 and tmp_count_seq != 0:
 | 
| 
 | 
   161            sorted_uni_arms.append([mature_mirnas[i], tmp_count_seq, tmp_count_reads])
 | 
| 
 | 
   162 
 | 
| 
 | 
   163     # Store name of arms, number of sequences and number of reads
 | 
| 
 | 
   164     sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
 | 
| 
 | 
   165 
 | 
| 
 | 
   166     for y in sorted_uni_arms:
 | 
| 
 | 
   167        counts=0
 | 
| 
 | 
   168        seqs=0
 | 
| 
 | 
   169        for x in unique_seq:
 | 
| 
 | 
   170            if y[0]==x[2].split("_")[0]+"_"+x[2].split("_")[1]:
 | 
| 
 | 
   171               counts+=int(x[0].split("-")[1])
 | 
| 
 | 
   172               seqs+=1
 | 
| 
 | 
   173 
 | 
| 
 | 
   174        y[1]=seqs
 | 
| 
 | 
   175        y[2]=counts
 | 
| 
 | 
   176 
 | 
| 
 | 
   177     LHE=[]
 | 
| 
 | 
   178 
 | 
| 
 | 
   179     l.acquire()
 | 
| 
 | 
   180     if con=="c":
 | 
| 
 | 
   181        LHE.extend(z[2] for z in unique_seq)
 | 
| 
 | 
   182        for y in unique_seq:
 | 
| 
 | 
   183            samples_mirna_names.append([y[2],y[9]])
 | 
| 
 | 
   184        deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq])
 | 
| 
 | 
   185        LHE_names.extend(LHE)
 | 
| 
 | 
   186        unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
 | 
| 
 | 
   187        unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
 | 
| 
 | 
   188        names.append(name)
 | 
| 
 | 
   189        samples.append(unique_seq)
 | 
| 
 | 
   190        data.append([con,name,unique_seq,sorted_uni_arms])
 | 
| 
 | 
   191        ini_sample.append(new_main_sam)
 | 
| 
 | 
   192 
 | 
| 
 | 
   193     if con=="t":
 | 
| 
 | 
   194        LHE.extend(z[2] for z in unique_seq)
 | 
| 
 | 
   195        for y in unique_seq:
 | 
| 
 | 
   196            samples_mirna_names.append([y[2],y[9]])
 | 
| 
 | 
   197        deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq])
 | 
| 
 | 
   198        LHE_names.extend(LHE)
 | 
| 
 | 
   199        unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
 | 
| 
 | 
   200        unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
 | 
| 
 | 
   201        names.append(name)
 | 
| 
 | 
   202        samples.append(unique_seq)
 | 
| 
 | 
   203        data.append([con,name,unique_seq,sorted_uni_arms])
 | 
| 
 | 
   204        ini_sample.append(new_main_sam)
 | 
| 
 | 
   205     l.release()
 | 
| 
 | 
   206 
 | 
| 
 | 
   207 
 | 
| 
 | 
   208 ######################################################################################################################################
 | 
| 
 | 
   209 """
 | 
| 
 | 
   210 Read a sam file from Bowtie and do the followings:
 | 
| 
 | 
   211 
 | 
| 
 | 
   212 1) Remove reverse stranded mapped reads
 | 
| 
 | 
   213 2) Remove unmapped reads 
 | 
| 
 | 
   214 3) Remove all sequences with reads less than 11 reads
 | 
| 
 | 
   215 4) Sort the arms with the most sequences in decreading rate
 | 
| 
 | 
   216 5) Sort the sequences of every arm with the most reads in decreasing rate
 | 
| 
 | 
   217 6) Calculate total number of sequences of every arm
 | 
| 
 | 
   218 7) Calculate total number of reads of sequences of every arm.
 | 
| 
 | 
   219 8) Store all the informations in a txt file 
 | 
| 
 | 
   220 
 | 
| 
 | 
   221 """
 | 
| 
 | 
   222 
 | 
| 
 | 
   223 def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):
 | 
| 
 | 
   224 
 | 
| 
 | 
   225 
 | 
| 
 | 
   226     ini_sam=read(path,0)
 | 
| 
 | 
   227     new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
 | 
| 
 | 
   228     unique_seq=[]
 | 
| 
 | 
   229     unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
 | 
| 
 | 
   230     uni_seq=[]
 | 
| 
 | 
   231 
 | 
| 
 | 
   232     # Calculate the shifted positions for every isomir and add them to the name of it
 | 
| 
 | 
   233     sorted_uni_arms = []
 | 
| 
 | 
   234     for i in range(1,len(mature_mirnas),2):
 | 
| 
 | 
   235         tmp_count_reads = 0   # calculate the total number of reads
 | 
| 
 | 
   236         tmp_count_seq = 0     # calculate the total number of sequences
 | 
| 
 | 
   237 
 | 
| 
 | 
   238         for j in range(len(unique_seq)):
 | 
| 
 | 
   239 
 | 
| 
 | 
   240             temp_mature = mature_mirnas[i].strip().replace("U", "T")
 | 
| 
 | 
   241 
 | 
| 
 | 
   242             if temp_mature in unique_seq[j][9]:
 | 
| 
 | 
   243 
 | 
| 
 | 
   244                 off_part = longestSubstring(temp_mature, unique_seq[j][9])
 | 
| 
 | 
   245 
 | 
| 
 | 
   246                 mat_diff = temp_mature.split(off_part)
 | 
| 
 | 
   247                 mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
 | 
| 
 | 
   248 
 | 
| 
 | 
   249                 unique_diff = unique_seq[j][9].split(off_part)
 | 
| 
 | 
   250                 if len(unique_diff)<=2:
 | 
| 
 | 
   251                    unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
 | 
| 
 | 
   252 
 | 
| 
 | 
   253                    pre_pos = mat_diff[0]-unique_diff[0]
 | 
| 
 | 
   254                    post_pos = unique_diff[1]-mat_diff[1]
 | 
| 
 | 
   255 
 | 
| 
 | 
   256                    lengthofmir = len(off_part) + post_pos
 | 
| 
 | 
   257                    if pre_pos == 0:
 | 
| 
 | 
   258                       tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
 | 
| 
 | 
   259                       tmp_count_seq = tmp_count_seq + 1
 | 
| 
 | 
   260 
 | 
| 
 | 
   261                       if pre_pos == 0:
 | 
| 
 | 
   262                           t_name=copy.deepcopy(unique_seq[j])
 | 
| 
 | 
   263                           t_name[2]=mature_mirnas[i - 1] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
 | 
| 
 | 
   264                           uni_seq.append(t_name)
 | 
| 
 | 
   265 
 | 
| 
 | 
   266         if tmp_count_reads != 0 and tmp_count_seq != 0:
 | 
| 
 | 
   267             sorted_uni_arms.append([mature_mirnas[i-1], tmp_count_seq, tmp_count_reads])
 | 
| 
 | 
   268 
 | 
| 
 | 
   269 
 | 
| 
 | 
   270     sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
 | 
| 
 | 
   271     unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
 | 
| 
 | 
   272 
 | 
| 
 | 
   273     LHE=[]
 | 
| 
 | 
   274 
 | 
| 
 | 
   275     l.acquire()
 | 
| 
 | 
   276     if con=="c":
 | 
| 
 | 
   277        LHE.extend(x[2] for x in unique_seq if x[2]!="*")
 | 
| 
 | 
   278        for x in unique_seq:
 | 
| 
 | 
   279            if x[2]!="*":
 | 
| 
 | 
   280               n_samples_mirna_names.append([x[2],x[9]])
 | 
| 
 | 
   281        n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
 | 
| 
 | 
   282        n_LHE_names.extend(LHE)
 | 
| 
 | 
   283        names.append(name)
 | 
| 
 | 
   284        data.append([con,name,unique_seq,sorted_uni_arms])
 | 
| 
 | 
   285 
 | 
| 
 | 
   286 
 | 
| 
 | 
   287     if con=="t":
 | 
| 
 | 
   288        LHE.extend(x[2] for x in unique_seq if x[2]!="*")
 | 
| 
 | 
   289        for x in unique_seq:
 | 
| 
 | 
   290            if x[2]!="*":
 | 
| 
 | 
   291               n_samples_mirna_names.append([x[2],x[9]])
 | 
| 
 | 
   292        n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
 | 
| 
 | 
   293        n_LHE_names.extend(LHE)
 | 
| 
 | 
   294        names.append(name)
 | 
| 
 | 
   295        data.append([con,name,unique_seq,sorted_uni_arms])
 | 
| 
 | 
   296     l.release()
 | 
| 
 | 
   297 
 | 
| 
 | 
   298 #################################################################################################################################################################################################################
 | 
| 
 | 
   299 
 | 
| 
 | 
   300 def deseq2_temp(samples_mirna_names,deseq,con,l):
 | 
| 
 | 
   301 
 | 
| 
 | 
   302     samples_mirna_names.sort(key=lambda x:[0])
 | 
| 
 | 
   303 
 | 
| 
 | 
   304     for i in range(len(deseq)):
 | 
| 
 | 
   305         for y in samples_mirna_names:
 | 
| 
 | 
   306             flag = 0
 | 
| 
 | 
   307             for x in deseq[i]:
 | 
| 
 | 
   308                 if y[0] == x[0]:
 | 
| 
 | 
   309                     flag = 1
 | 
| 
 | 
   310                     break
 | 
| 
 | 
   311 
 | 
| 
 | 
   312             if flag == 0:
 | 
| 
 | 
   313                 deseq[i].append([y[0], "0", y[1]])
 | 
| 
 | 
   314 
 | 
| 
 | 
   315     [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
 | 
| 
 | 
   316     deseq_final = [[x[0],x[2]] for x in deseq[0]]
 | 
| 
 | 
   317     [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]
 | 
| 
 | 
   318 
 | 
| 
 | 
   319     l.acquire()
 | 
| 
 | 
   320     if con=="c":
 | 
| 
 | 
   321        q1.put(deseq_final)
 | 
| 
 | 
   322 
 | 
| 
 | 
   323     if con=="t":
 | 
| 
 | 
   324        q2.put(deseq_final)
 | 
| 
 | 
   325     l.release()
 | 
| 
 | 
   326 
 | 
| 
 | 
   327 
 | 
| 
 | 
   328 ##################################################################################################################################################################################################################
 | 
| 
 | 
   329 
 | 
| 
 | 
   330 def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E,per,count):
 | 
| 
 | 
   331 
 | 
| 
 | 
   332  LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
 | 
| 
 | 
   333  LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]
 | 
| 
 | 
   334 
 | 
| 
 | 
   335  LH8E_add_names.sort()
 | 
| 
 | 
   336  LH2E_add_names.sort()
 | 
| 
 | 
   337  LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
 | 
| 
 | 
   338  LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))
 | 
| 
 | 
   339 
 | 
| 
 | 
   340  LH2E.sort()
 | 
| 
 | 
   341  LH8E.sort()
 | 
| 
 | 
   342  LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
 | 
| 
 | 
   343  LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))
 | 
| 
 | 
   344 
 | 
| 
 | 
   345  zeros=["0"]*(len(LH8E[0])-2)
 | 
| 
 | 
   346  [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
 | 
| 
 | 
   347  LH8E=LH8E+LH8E_add_names
 | 
| 
 | 
   348 
 | 
| 
 | 
   349  zeros=["0"]*(len(LH2E[0])-2)
 | 
| 
 | 
   350  [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
 | 
| 
 | 
   351  LH2E=LH2E+LH2E_add_names
 | 
| 
 | 
   352 
 | 
| 
 | 
   353  dupes=[]
 | 
| 
 | 
   354  final_LH2E =[]
 | 
| 
 | 
   355 
 | 
| 
 | 
   356  for num,_ in enumerate(LH2E):
 | 
| 
 | 
   357 
 | 
| 
 | 
   358     if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
 | 
| 
 | 
   359         final_LH2E.append(LH2E[num][1])
 | 
| 
 | 
   360         final_LH2E.append(LH2E[num][0])
 | 
| 
 | 
   361     else:
 | 
| 
 | 
   362         dupes.append(LH2E[num][1])
 | 
| 
 | 
   363 
 | 
| 
 | 
   364  dupes=list(set(dupes))
 | 
| 
 | 
   365 
 | 
| 
 | 
   366  dupes=[[x] for x in dupes]
 | 
| 
 | 
   367 
 | 
| 
 | 
   368  for x in LH2E:
 | 
| 
 | 
   369      for y in dupes:
 | 
| 
 | 
   370          if x[1]==y[0]:
 | 
| 
 | 
   371              fl=0
 | 
| 
 | 
   372              if len(y)==1:
 | 
| 
 | 
   373                  y.append(x[0])
 | 
| 
 | 
   374              else:
 | 
| 
 | 
   375                  for i in range(1,len(y)):
 | 
| 
 | 
   376                      if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
 | 
| 
 | 
   377                         fl=1
 | 
| 
 | 
   378                         if len(x[0])<len(y[i]):
 | 
| 
 | 
   379                            del y[i]
 | 
| 
 | 
   380                            y.append(x[0])
 | 
| 
 | 
   381                            break
 | 
| 
 | 
   382 
 | 
| 
 | 
   383                  if fl==0:
 | 
| 
 | 
   384                     y.append((x[0]))
 | 
| 
 | 
   385 
 | 
| 
 | 
   386  for y in dupes:
 | 
| 
 | 
   387     if len(y)>2:
 | 
| 
 | 
   388         for i in range(len(y)-1,1,-1):
 | 
| 
 | 
   389             y[1]=y[1]+"/"+y[i]
 | 
| 
 | 
   390             del y[i]
 | 
| 
 | 
   391 
 | 
| 
 | 
   392 
 | 
| 
 | 
   393  for x in LH2E:
 | 
| 
 | 
   394      for y in dupes:
 | 
| 
 | 
   395          if x[1]==y[0]:
 | 
| 
 | 
   396             x[0]=y[1]
 | 
| 
 | 
   397 
 | 
| 
 | 
   398  for x in LH8E:
 | 
| 
 | 
   399     for y in dupes:
 | 
| 
 | 
   400         if x[1]==y[0]:
 | 
| 
 | 
   401            x[0]=y[1]
 | 
| 
 | 
   402 
 | 
| 
 | 
   403 
 | 
| 
 | 
   404 
 | 
| 
 | 
   405  LH2E.sort()
 | 
| 
 | 
   406  LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))
 | 
| 
 | 
   407 
 | 
| 
 | 
   408  LH8E.sort()
 | 
| 
 | 
   409  LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))
 | 
| 
 | 
   410 
 | 
| 
 | 
   411  if int(per)!=-1:
 | 
| 
 | 
   412     percent=int(per)/100
 | 
| 
 | 
   413 
 | 
| 
 | 
   414     c_col_filter=round(percent*(len(LH2E[1])-2))
 | 
| 
 | 
   415     t_col_filter=round(percent*(len(LH8E[1])-2))
 | 
| 
 | 
   416 
 | 
| 
 | 
   417     for i, _ in enumerate(LH2E):
 | 
| 
 | 
   418         c_cols=0
 | 
| 
 | 
   419         t_cols=0
 | 
| 
 | 
   420 
 | 
| 
 | 
   421         c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(count)])
 | 
| 
 | 
   422         t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(count)])
 | 
| 
 | 
   423 
 | 
| 
 | 
   424         if c_cols>=c_col_filter or t_cols>=t_col_filter:
 | 
| 
 | 
   425            filter_LH8E.append(LH8E[i])
 | 
| 
 | 
   426            filter_LH2E.append(LH2E[i])
 | 
| 
 | 
   427 
 | 
| 
 | 
   428  raw_LH2E.extend(LH2E)
 | 
| 
 | 
   429  raw_LH8E.extend(LH8E)
 | 
| 
 | 
   430 
 | 
| 
 | 
   431 ##################################################################################################################################################################################################################
 | 
| 
 | 
   432 
 | 
| 
 | 
   433 def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag, per, n1, n2):
 | 
| 
 | 
   434 
 | 
| 
 | 
   435  if flag == 1 and int(per)!=-1:
 | 
| 
 | 
   436     fp = open('Counts/Filtered '+n2 +' Templated Counts', 'w')
 | 
| 
 | 
   437     fp.write("Name\t")
 | 
| 
 | 
   438     fp.write("Sequence")
 | 
| 
 | 
   439     for y in names_tre:
 | 
| 
 | 
   440        fp.write("\t"+y)
 | 
| 
 | 
   441 
 | 
| 
 | 
   442     for x in fil_LH8E:
 | 
| 
 | 
   443         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   444     fp.close()
 | 
| 
 | 
   445 
 | 
| 
 | 
   446     fp = open('Counts/Filtered '+n1+' Templated Counts', 'w')
 | 
| 
 | 
   447     fp.write("Name\t")
 | 
| 
 | 
   448     fp.write("Sequence")
 | 
| 
 | 
   449     for y in names_con:
 | 
| 
 | 
   450        fp.write("\t"+y)
 | 
| 
 | 
   451 
 | 
| 
 | 
   452     for x in fil_LH2E:
 | 
| 
 | 
   453         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   454     fp.close()
 | 
| 
 | 
   455 
 | 
| 
 | 
   456 
 | 
| 
 | 
   457  if flag == 2 and int(per)!=-1:
 | 
| 
 | 
   458     fp = open('Counts/Filtered '+n2+' Non-Templated Counts', 'w')
 | 
| 
 | 
   459     fp.write("Name\t")
 | 
| 
 | 
   460     fp.write("Sequence")
 | 
| 
 | 
   461     for y in names_tre:
 | 
| 
 | 
   462        fp.write("\t"+y)
 | 
| 
 | 
   463 
 | 
| 
 | 
   464 
 | 
| 
 | 
   465     for x in fil_LH8E:
 | 
| 
 | 
   466         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   467     fp.close()
 | 
| 
 | 
   468 
 | 
| 
 | 
   469     fp = open('Counts/Filtered '+n1+' Non-Templated Counts', 'w')
 | 
| 
 | 
   470     fp.write("Name\t")
 | 
| 
 | 
   471     fp.write("Sequence")
 | 
| 
 | 
   472     for y in names_con:
 | 
| 
 | 
   473        fp.write("\t"+y)
 | 
| 
 | 
   474 
 | 
| 
 | 
   475     for x in fil_LH2E:
 | 
| 
 | 
   476         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   477     fp.close()
 | 
| 
 | 
   478 
 | 
| 
 | 
   479 
 | 
| 
 | 
   480  if flag == 1:
 | 
| 
 | 
   481     fp = open('Counts/Raw '+n2+' Templated Counts', 'w')
 | 
| 
 | 
   482     fp.write("Name\t")
 | 
| 
 | 
   483     fp.write("Sequence")
 | 
| 
 | 
   484     for y in names_tre:
 | 
| 
 | 
   485        fp.write("\t"+y)
 | 
| 
 | 
   486 
 | 
| 
 | 
   487     for x in raw_LH8E:
 | 
| 
 | 
   488         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   489     fp.close()
 | 
| 
 | 
   490 
 | 
| 
 | 
   491     fp = open('Counts/Raw '+n1+' Templated Counts', 'w')
 | 
| 
 | 
   492     fp.write("Name\t")
 | 
| 
 | 
   493     fp.write("Sequence")
 | 
| 
 | 
   494     for y in names_con:
 | 
| 
 | 
   495        fp.write("\t"+y)
 | 
| 
 | 
   496 
 | 
| 
 | 
   497     for x in raw_LH2E:
 | 
| 
 | 
   498         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   499     fp.close()
 | 
| 
 | 
   500 
 | 
| 
 | 
   501  if flag == 2:
 | 
| 
 | 
   502     fp = open('Counts/Raw '+n2+' Non-Templated Counts', 'w')
 | 
| 
 | 
   503     fp.write("Name\t")
 | 
| 
 | 
   504     fp.write("Sequence")
 | 
| 
 | 
   505     for y in names_tre:
 | 
| 
 | 
   506        fp.write("\t"+y)
 | 
| 
 | 
   507 
 | 
| 
 | 
   508 
 | 
| 
 | 
   509     for x in raw_LH8E:
 | 
| 
 | 
   510         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   511     fp.close()
 | 
| 
 | 
   512 
 | 
| 
 | 
   513     fp = open('Counts/Raw '+n1+' Non-Templated Counts', 'w')
 | 
| 
 | 
   514     fp.write("Name\t")
 | 
| 
 | 
   515     fp.write("Sequence")
 | 
| 
 | 
   516     for y in names_con:
 | 
| 
 | 
   517        fp.write("\t"+y)
 | 
| 
 | 
   518 
 | 
| 
 | 
   519     for x in raw_LH2E:
 | 
| 
 | 
   520         fp.write("\n%s" % "\t".join(x))
 | 
| 
 | 
   521     fp.close()
 | 
| 
 | 
   522 
 | 
| 
 | 
   523 ####################################################################################################################################################################################################################
 | 
| 
 | 
   524 
 | 
| 
 | 
   525 def ssamples(names,samp,folder,pro):
 | 
| 
 | 
   526 
 | 
| 
 | 
   527     for i in range(2,len(samp[0])):
 | 
| 
 | 
   528 
 | 
| 
 | 
   529        fp = open(folder+names[i-2]+'.txt','w')
 | 
| 
 | 
   530        fp.write("miRNA id"+"\t"+names[i-2]+"\n")
 | 
| 
 | 
   531 
 | 
| 
 | 
   532        for x in samp:
 | 
| 
 | 
   533            fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
 | 
| 
 | 
   534        fp.close()
 | 
| 
 | 
   535 
 | 
| 
 | 
   536 ####################################################################################################################################################################################################################
 | 
| 
 | 
   537 
 | 
| 
 | 
   538 def DB_write(con,name,unique_seq,sorted_uni_arms,f):
 | 
| 
 | 
   539 
 | 
| 
 | 
   540  if f==1:
 | 
| 
 | 
   541     # Write a txt file with all the information
 | 
| 
 | 
   542     if con=="c":
 | 
| 
 | 
   543        fp = open('split1/'+name, 'w')
 | 
| 
 | 
   544 
 | 
| 
 | 
   545        fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
 | 
| 
 | 
   546     if con=="t":
 | 
| 
 | 
   547        fp = open('split2/'+name, 'w') 
 | 
| 
 | 
   548        fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
 | 
| 
 | 
   549 
 | 
| 
 | 
   550     for i in range(len(sorted_uni_arms)):
 | 
| 
 | 
   551         temp = []
 | 
| 
 | 
   552         for j in range(len(unique_seq)):
 | 
| 
 | 
   553 
 | 
| 
 | 
   554             if sorted_uni_arms[i][0] in (unique_seq[j][2].split("_")[0]+"_"+unique_seq[j][2].split("_")[1]):
 | 
| 
 | 
   555 
 | 
| 
 | 
   556                 temp.append(unique_seq[j])
 | 
| 
 | 
   557 
 | 
| 
 | 
   558         temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
 | 
| 
 | 
   559         fp.write("*********************************************************************************************************\n")
 | 
| 
 | 
   560         fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
 | 
| 
 | 
   561         fp.write("*********************************************************************************************************\n\n")
 | 
| 
 | 
   562         [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
 | 
| 
 | 
   563         fp.write("\n" + "\n")
 | 
| 
 | 
   564     fp.close()
 | 
| 
 | 
   565 
 | 
| 
 | 
   566  if f==2:
 | 
| 
 | 
   567 
 | 
| 
 | 
   568     if con=="c":
 | 
| 
 | 
   569        fp = open('split3/'+name, 'w')
 | 
| 
 | 
   570        fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
 | 
| 
 | 
   571     if con=="t":
 | 
| 
 | 
   572        fp = open('split4/'+name, 'w')
 | 
| 
 | 
   573        fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
 | 
| 
 | 
   574 
 | 
| 
 | 
   575     for i in range(len(sorted_uni_arms)):
 | 
| 
 | 
   576         temp = []
 | 
| 
 | 
   577         for j in range(len(unique_seq)):
 | 
| 
 | 
   578                if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
 | 
| 
 | 
   579                   temp.append(unique_seq[j])
 | 
| 
 | 
   580         if temp!=[]:
 | 
| 
 | 
   581            temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
 | 
| 
 | 
   582            fp.write("*********************************************************************************************************\n")
 | 
| 
 | 
   583            fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
 | 
| 
 | 
   584            fp.write("*********************************************************************************************************\n\n")
 | 
| 
 | 
   585            [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
 | 
| 
 | 
   586            fp.write("\n" + "\n")
 | 
| 
 | 
   587     fp.close()
 | 
| 
 | 
   588 
 | 
| 
 | 
   589 
 | 
| 
 | 
   590 ##########################################################################################################################
 | 
| 
 | 
   591 
 | 
| 
 | 
   592 def new_mat_seq(pre_unique_seq,mat_mirnas,l):
 | 
| 
 | 
   593 
 | 
| 
 | 
   594     unique_iso = []
 | 
| 
 | 
   595     for x in pre_unique_seq:
 | 
| 
 | 
   596        if len(x[2].split("_"))==3:
 | 
| 
 | 
   597           for y in pre_unique_seq:
 | 
| 
 | 
   598               if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
 | 
| 
 | 
   599                  if any(y[2] in lst2 for lst2 in unique_iso)==False:
 | 
| 
 | 
   600                     y[2]=">"+y[2]
 | 
| 
 | 
   601                     unique_iso.append(y)
 | 
| 
 | 
   602     l.acquire()
 | 
| 
 | 
   603     for x in unique_iso:
 | 
| 
 | 
   604         mat_mirnas.append(x[2])
 | 
| 
 | 
   605         mat_mirnas.append(x[9])
 | 
| 
 | 
   606     l.release()
 | 
| 
 | 
   607 
 | 
| 
 | 
   608 #########################################################################################################################
 | 
| 
 | 
   609 
 | 
| 
 | 
   610 def merging_names(LH2E_copy,new):
 | 
| 
 | 
   611 
 | 
| 
 | 
   612     dupes=[]
 | 
| 
 | 
   613     final_LH2E =[]
 | 
| 
 | 
   614 
 | 
| 
 | 
   615     for num in range(len(LH2E_copy)):
 | 
| 
 | 
   616 
 | 
| 
 | 
   617         if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
 | 
| 
 | 
   618            final_LH2E.append(LH2E_copy[num][1])
 | 
| 
 | 
   619            final_LH2E.append(LH2E_copy[num][0])
 | 
| 
 | 
   620         else:
 | 
| 
 | 
   621            dupes.append(LH2E_copy[num][1])
 | 
| 
 | 
   622 
 | 
| 
 | 
   623     dupes=list(set(dupes))
 | 
| 
 | 
   624 
 | 
| 
 | 
   625     for i in range(len(dupes)):
 | 
| 
 | 
   626         dupes[i]=[dupes[i]]
 | 
| 
 | 
   627 
 | 
| 
 | 
   628     for x in LH2E_copy:
 | 
| 
 | 
   629         for y in dupes:
 | 
| 
 | 
   630             if x[1]==y[0]:
 | 
| 
 | 
   631                fl=0
 | 
| 
 | 
   632                if len(y)==1:
 | 
| 
 | 
   633                   y.append(x[0])
 | 
| 
 | 
   634                else:
 | 
| 
 | 
   635                   for i in range(1,len(y)):
 | 
| 
 | 
   636                       if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
 | 
| 
 | 
   637                          fl=1
 | 
| 
 | 
   638                          if len(x[0])<len(y[i]):
 | 
| 
 | 
   639                             del y[i]
 | 
| 
 | 
   640                             y.append(x[0])
 | 
| 
 | 
   641                             break
 | 
| 
 | 
   642 
 | 
| 
 | 
   643                   if fl==0:
 | 
| 
 | 
   644                      y.append((x[0]))
 | 
| 
 | 
   645 
 | 
| 
 | 
   646     for y in dupes:
 | 
| 
 | 
   647         if len(y)>2:
 | 
| 
 | 
   648            for i in range(len(y)-1,1,-1):
 | 
| 
 | 
   649                y[1]=y[1]+"/"+y[i]
 | 
| 
 | 
   650                del y[i]
 | 
| 
 | 
   651 
 | 
| 
 | 
   652 
 | 
| 
 | 
   653     for x in LH2E_copy:
 | 
| 
 | 
   654         for y in dupes:
 | 
| 
 | 
   655             if x[1]==y[0]:
 | 
| 
 | 
   656                x[0]=y[1]
 | 
| 
 | 
   657 
 | 
| 
 | 
   658     LH2E_copy.sort()
 | 
| 
 | 
   659     LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
 | 
| 
 | 
   660     new.extend(LH2E_copy)
 | 
| 
 | 
   661 
 | 
| 
 | 
   662 ######################################################################################################################################################
 | 
| 
 | 
   663 
 | 
| 
 | 
   664 def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):
 | 
| 
 | 
   665 
 | 
| 
 | 
   666     for i in range(2,len(tem_samp[0])):
 | 
| 
 | 
   667 
 | 
| 
 | 
   668        fp = open(folder+tem_names[i-2]+'.txt','w')
 | 
| 
 | 
   669        fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
 | 
| 
 | 
   670 
 | 
| 
 | 
   671        for x in tem_samp:
 | 
| 
 | 
   672            fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
 | 
| 
 | 
   673 
 | 
| 
 | 
   674        for j in range(len(non_names)):
 | 
| 
 | 
   675            if non_names[j]==tem_names[i-2]:
 | 
| 
 | 
   676               for x in non_samp:
 | 
| 
 | 
   677                   fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
 | 
| 
 | 
   678        fp.close()
 | 
| 
 | 
   679 
 | 
| 
 | 
   680 #################################################################################################################################################################################################################
 | 
| 
 | 
   681 
 | 
| 
 | 
   682 def download_matures(matures,org_name):
 | 
| 
 | 
   683 
 | 
| 
 | 
   684     mature_mir=[]
 | 
| 
 | 
   685 
 | 
| 
 | 
   686     mat_url = 'http://mirgenedb.org/fasta/'+org_name+'?mat=1'
 | 
| 
 | 
   687     star_url = 'http://mirgenedb.org/fasta/'+org_name+'?star=1'
 | 
| 
 | 
   688 
 | 
| 
 | 
   689     data = urllib.request.urlopen(mat_url).read()
 | 
| 
 | 
   690     file_mirna = data.decode('utf-8')
 | 
| 
 | 
   691     mature_mir = file_mirna.split("\n")
 | 
| 
 | 
   692     mature_mir = [x.replace(">","") for x in mature_mir]
 | 
| 
 | 
   693     del mature_mir[-1]
 | 
| 
 | 
   694 
 | 
| 
 | 
   695     data = urllib.request.urlopen(star_url).read()
 | 
| 
 | 
   696     file_mirna = data.decode('utf-8')
 | 
| 
 | 
   697     star_mir = file_mirna.split("\n")
 | 
| 
 | 
   698     star_mir = [x.replace(">","") for x in star_mir]
 | 
| 
 | 
   699     del star_mir[-1]
 | 
| 
 | 
   700 
 | 
| 
 | 
   701     mature_mir.extend(star_mir)
 | 
| 
 | 
   702 
 | 
| 
 | 
   703     for i in range(1,len(mature_mir),2):
 | 
| 
 | 
   704         mature_mir[i]=mature_mir[i].replace("U","T")
 | 
| 
 | 
   705 
 | 
| 
 | 
   706     matures.extend(mature_mir)
 | 
| 
 | 
   707 
 | 
| 
 | 
   708 ###################################################################################################################
 | 
| 
 | 
   709 
 | 
| 
 | 
   710 def non_template_ref(sc,st,all_isoforms):
 | 
| 
 | 
   711 
 | 
| 
 | 
   712   pre_uni_seq_con = list(sc)
 | 
| 
 | 
   713   pre_uni_seq_tre = list(st)
 | 
| 
 | 
   714 
 | 
| 
 | 
   715   for x in pre_uni_seq_con:
 | 
| 
 | 
   716       for y in x:
 | 
| 
 | 
   717           if y[2] not in all_isoforms and len(y[2].split("_"))>2:
 | 
| 
 | 
   718              all_isoforms.append(y[2])
 | 
| 
 | 
   719              all_isoforms.append(y[9])
 | 
| 
 | 
   720 
 | 
| 
 | 
   721   for x in pre_uni_seq_tre:
 | 
| 
 | 
   722       for y in x:
 | 
| 
 | 
   723           if y[2] not in all_isoforms and len(y[2].split("_"))>2:
 | 
| 
 | 
   724              all_isoforms.append(y[2])
 | 
| 
 | 
   725              all_isoforms.append(y[9])
 | 
| 
 | 
   726 
 | 
| 
 | 
   727 ################################################################################################################################################################################################
 | 
| 
 | 
   728 
 | 
| 
 | 
   729 def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):
 | 
| 
 | 
   730 
 | 
| 
 | 
   731     for y in mir_names:
 | 
| 
 | 
   732         flag=0
 | 
| 
 | 
   733         for x in sample:
 | 
| 
 | 
   734             if y[0]==x[0]:
 | 
| 
 | 
   735                flag=1
 | 
| 
 | 
   736                break
 | 
| 
 | 
   737         if flag==0:
 | 
| 
 | 
   738            sample.append([y[0],"0",y[1]])
 | 
| 
 | 
   739 
 | 
| 
 | 
   740     sample.sort(key=lambda x: x[0])
 | 
| 
 | 
   741     sample=list(sample for sample,_ in itertools.groupby(sample))
 | 
| 
 | 
   742 
 | 
| 
 | 
   743     l.acquire()
 | 
| 
 | 
   744     new_d.append(sample)
 | 
| 
 | 
   745     sample_order.append(sample_name)
 | 
| 
 | 
   746     l.release()
 | 
| 
 | 
   747 
 | 
| 
 | 
   748 ###############################################################################################################################################################################################
 | 
| 
 | 
   749 
 |