# HG changeset patch
# User glogobyte
# Date 1621316910 0
# Node ID d9b2a7df9d4b621dac8f3c9e4e1ff1253cd1dcbb
# Parent  d2eea02053a071f29d2359afa13f958d932887bc
Deleted selected files

diff -r d2eea02053a0 -r d9b2a7df9d4b mirbase_functions.py
--- a/mirbase_functions.py	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,823 +0,0 @@
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-
-"""---------------------- Simple Functions -----------------------"""
-
-# Read a file and return it as a list
-def read(path, flag):
-    if flag == 0:
-        with open(path) as fp:
-            file=fp.readlines()
-        fp.close()
-        return file
-
-    if flag == 1:
-        with open(path) as fp:
-            file = fp.read().splitlines()
-        fp.close()
-        return file
-
-# Write a list to a txt file
-def write(path, list):
-    with open(path,'w') as fp:
-        for x in list:
-            fp.write(str("\t".join(x[1:-1])))
-    fp.close()
-
-"""---------------------- RNA-seq Functions ----------------------"""
-
-# Detect the longest common substring sequence between two mirnas
-def longestSubstring(str1, str2):
-
-    from difflib import SequenceMatcher
-    # initialize SequenceMatcher object with
-    # input string
-    seqMatch = SequenceMatcher(None, str1, str2)
-
-    # find match of longest sub-string
-    # output will be like Match(a=0, b=0, size=5)
-    match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))
-
-    # print longest substring
-    if (match.size != 0):
-        return str1[match.a: match.a + match.size]
-    else:
-        print('No longest common sub-string found')
-
-
-
-########################################################################################################################################################
-
-def collapse_sam(path):
-
-    ini_sam=read(path,0)
-    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in  x.split("\t")[0]]
-
-    uni_seq = []
-    for x in main_sam:
-
-        if [x[2], x[9]] not in uni_seq:
-            uni_seq.append([x[2], x[9]])
-
-    new_main_sam=[]
-    incr_num=0
-    for i in range(len(uni_seq)):
-        count=0
-        incr_num+=1
-        for y in main_sam:
-            if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
-               count+=1
-               temp=y
-        temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-        temp[0]=str(incr_num)+"-"+str(count)
-        new_main_sam.append(temp)
-
-    new_sam=intro_sam+new_main_sam
-
-    return new_sam
-
-#################################################################################################################################################################################################################
-
-def duplicate_chroms_isoforms(List):
-
- dupes=[]
-
- for num in range(len(List)):
-
-    if  [List[num][9],List[num][0],List[num][2]] not in dupes :
-        dupes.append([List[num][9],List[num][0],List[num][2]])
-
- for x in List:
-     for y in dupes:
-         if x[9]==y[0] and x[0]==y[1] and x[2].split("_")[0]==y[2].split("_")[0] and x[2]!=y[2]:
-            y.append(x[2])
-
-
- double_List = [x[:] for x in List]
-
- chr_order=[]
- for x in dupes:
-     temp = []
-     for i in range(2,len(x)):
-         if x[i].split("chr")[1].split("(")[0].isdigit():
-            temp.append(int(x[i].split("chr")[1].split("(")[1][0]+x[i].split("chr")[1].split("(")[0]))
-         else:
-            temp.append(x[i].split("chr")[1][0:4])
-
-     for z in temp:
-         if 'X(-)'==z or 'Y(-)'==z or 'X(+)'==z or 'Y(+)'==z:
-             temp = [str(j) for j in temp]
-     temp=list(set(temp))
-     temp.sort()
-     chr_order.append(temp)
-
- final_dupes=[]
- for i in range(len(dupes)):
-     final_dupes.append([dupes[i][0],dupes[i][2].split("_")[0],dupes[i][1]])
-     for x in chr_order[i]:
-         result = re.match("[-+]?\d+$", str(x))
-         if len(chr_order[i]) == len(set(chr_order[i])):
-           if result is not None:
-
-             if int(x)<0:
-                final_dupes[i][1]=final_dupes[i][1]+"_chr"+str(abs(int(x)))+"(-)"
-             else:
-                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(abs(int(x)))+"(+)"
-           else:
-                final_dupes[i][1] = final_dupes[i][1] + "_chr" + str(x)
-         else:
-             if result is not None:
-                 if int(x) < 0:
-                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(-)"
-                 else:
-                     final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(abs(int(x))) + "(+)"
-             else:
-                 final_dupes[i][1] = final_dupes[i][1] +dupes[i][2].split("_")[1]+ "_chr" + str(x)
-
- final_dupes.sort()
- final_dupes=list(final_dupes for final_dupes,_ in itertools.groupby(final_dupes))
-
- for i in range(len(double_List)):
-     for x in final_dupes:
-
-         if double_List[i][9] == x[0] and double_List[i][0] == x[2] and len(double_List[i][2].split("_")) >3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
-            gg=str("_"+double_List[i][2].split("_")[-2]+"_"+double_List[i][2].split("_")[-1])
-            double_List[i][2] = x[1]+gg
-
-         if double_List[i][9]==x[0] and double_List[i][0]== x[2] and len(double_List[i][2].split("_"))==3 and double_List[i][2].split("_")[0]==x[1].split("_")[0]:
-            double_List[i][2]=x[1]
-            List[i][2] = x[1]
-
- List.sort()
- new_list=list(List for List,_ in itertools.groupby(List))
-
- double_List.sort()
- new_double_List = list(double_List for double_List, _ in itertools.groupby(double_List))
-
- return new_list, new_double_List
-
-
-#############################################################################################################################################################################################################
-
-def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):
-
-    # read the sam file
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
-
-    sorted_uni_arms = []
-
-    for i in range(len(mature_mirnas)):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-        for j in range(len(unique_seq)):
-
-         if "{" in unique_seq[j][2].split("_")[0]:
-             official=unique_seq[j][2].split("_")[0][:-4]
-         else:
-             official=unique_seq[j][2].split("_")[0]
-
-         if mature_mirnas[i].split(" ")[0][1:] == official:
-
-                temp_mature = mature_mirnas[i+1].strip().replace("U", "T")
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                # Problem with hsa-miR-8485
-                if mat_diff[1]!=0 and unique_diff[1]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                elif mat_diff[0]!=0 and unique_diff[0]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                else:
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-                   tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                   tmp_count_seq = tmp_count_seq+1
-
-                if pre_pos != 0 or post_pos != 0:
-                    if pre_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
-                    elif post_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
-                    else:
-                        unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
-
-        for x in range(unique_seq.count(1)):
-           unique_seq.remove(1)
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-           sorted_uni_arms.append([mature_mirnas[i].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    dedup_unique_seq,double_fil_uni_seq=duplicate_chroms_isoforms(unique_seq)
-
-    for y in sorted_uni_arms:
-       counts=0
-       seqs=0
-       for x in double_fil_uni_seq:
-           if y[0]==x[2].split("_")[0]:
-              counts+=int(x[0].split("-")[1])
-              seqs+=1
-
-       y[1]=seqs
-       y[2]=counts
-
-    LHE=[]
-    l.acquire()
-    if con=="c":
-       LHE.extend(z[2] for z in double_fil_uni_seq)
-       for y in double_fil_uni_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(dedup_unique_seq)
-       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-
-    if con=="t":
-       LHE.extend(z[2] for z in double_fil_uni_seq)
-       for y in double_fil_uni_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in double_fil_uni_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(dedup_unique_seq)
-       data.append([con,name,double_fil_uni_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-    l.release()
-
-
-######################################################################################################################################
-
-"""
-
-Read a sam file from Bowtie and do the followings:
-
-1) Remove reverse stranded mapped reads
-2) Remove unmapped reads 
-3) Remove all sequences with reads less than 11 reads
-4) Sort the arms with the most sequences in decreading rate
-5) Sort the sequences of every arm with the most reads in decreasing rate
-6) Calculate total number of sequences of every arm
-7) Calculate total number of reads of sequences of every arm.
-8) Store all the informations in a txt file 
-
-"""
-
-def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):
-
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq=[]
-    unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
-
-    uni_seq=[]
-    # Calculate the shifted positions for every isomir and add them to the name of it
-    sorted_uni_arms = []
-    for i in range(1,len(mature_mirnas),2):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-
-        for j in range(len(unique_seq)):
-
-            temp_mature = mature_mirnas[i].strip().replace("U", "T")
-
-            if temp_mature in unique_seq[j][9]:
-
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                if len(unique_diff)<=2:
-                   unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-
-                   lengthofmir = len(off_part) + post_pos
-                   if pre_pos == 0:
-                      tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                      tmp_count_seq = tmp_count_seq + 1
-
-                      if pre_pos == 0:
-
-                          t_name=unique_seq[j].copy()
-                          t_name[2]=mature_mirnas[i - 1].split(" ")[0][1:] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
-                          uni_seq.append(t_name)
-
-
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-            sorted_uni_arms.append([mature_mirnas[i-1].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-
-
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
-
-    LHE=[]
-
-    l.acquire()
-    if con=="c":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-
-
-    if con=="t":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-    l.release()
-
-#####################################################################################################################################################################################################################
-def deseq2_temp(samples_mirna_names,deseq,con,l):
-
-    samples_mirna_names.sort(key=lambda x:[0])
-    for i in range(len(deseq)):
-        for y in samples_mirna_names:
-            flag = 0
-            for x in deseq[i]:
-                if y[0] == x[0]:
-                    flag = 1
-                    break
-
-            if flag == 0:
-                deseq[i].append([y[0], "0", y[1]])
-
-    [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
-    deseq_final = [[x[0],x[2]] for x in deseq[0]]
-    [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]
-
-    l.acquire()
-    if con=="c":
-       q1.put(deseq_final)
-
-    if con=="t":
-       q2.put(deseq_final)
-    l.release()
-
-
-####################################################################################################################################################################################################################
-
-def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E,per,count):
-
- LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
- LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]
-
- LH8E_add_names.sort()
- LH2E_add_names.sort()
- LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
- LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))
-
- LH2E.sort()
- LH8E.sort()
- LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
- LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- zeros=["0"]*(len(LH8E[0])-2)
- [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
- LH8E=LH8E+LH8E_add_names
-
- zeros=["0"]*(len(LH2E[0])-2)
- [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
- LH2E=LH2E+LH2E_add_names
-
- dupes=[]
- final_LH2E =[]
-
- for num,_ in enumerate(LH2E):
-
-    if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
-        final_LH2E.append(LH2E[num][1])
-        final_LH2E.append(LH2E[num][0])
-    else:
-        dupes.append(LH2E[num][1])
-
-
- dupes=list(set(dupes))
-
- dupes=[[x] for x in dupes]
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-             fl=0
-             if len(y)==1:
-                 y.append(x[0])
-             else:
-                 for i in range(1,len(y)):
-                     if y[i].split("_")[0]==x[0].split("_")[0]:
-                        fl=1
-                        if len(x[0])<len(y[i]):
-                           del y[i]
-                           y.append(x[0])
-                           break
-
-                 if fl==0:
-                    y.append((x[0]))
-
- for y in dupes:
-    if len(y)>2:
-        for i in range(len(y)-1,1,-1):
-            y[1]=y[1]+"/"+y[i]
-            del y[i]
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-            x[0]=y[1]
-
- for x in LH8E:
-    for y in dupes:
-        if x[1]==y[0]:
-           x[0]=y[1]
-
-
- LH2E.sort()
- LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))
-
- LH8E.sort()
- LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- if int(per)!=-1:
-    percent=int(per)/100
-
-    c_col_filter=round(percent*(len(LH2E[1])-2))
-    t_col_filter=round(percent*(len(LH8E[1])-2))
-
-    for i, _ in enumerate(LH2E):
-        c_cols=0
-        t_cols=0
-
-        c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(count)])
-        t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(count)])
-
-        if c_cols>=c_col_filter or t_cols>=t_col_filter:
-           filter_LH8E.append(LH8E[i])
-           filter_LH2E.append(LH2E[i])
-
- raw_LH2E.extend(LH2E)
- raw_LH8E.extend(LH8E)
-
-##################################################################################################################################################################################################################
-
-def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag, per, n1, n2):
-
- if flag == 1 and int(per)!=-1:
-    fp = open('Counts/Filtered '+n2 +' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2 and int(per)!=-1:
-    fp = open('Counts/Filtered '+n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 1:
-    fp = open('Counts/Raw '+n2+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2:
-    fp = open('Counts/Raw '+n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
-#########################################################################################################################################
-
-def ssamples(names,samp,folder,pro):
-
-    for i in range(2,len(samp[0])):
-
-       fp = open(folder+names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+names[i-2]+"\n")
-
-       for x in samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-       fp.close()
-
-##################################################################################################################
-
-def DB_write(con,name,unique_seq,sorted_uni_arms,f):
-
- if f==1:
-    # Write a txt file with all the information
-    if con=="c":
-       fp = open('split1/'+name, 'w')
-
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split2/'+name, 'w') 
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-
-            if sorted_uni_arms[i][0] in unique_seq[j][2].split("_")[0]:
-
-                temp.append(unique_seq[j])
-
-        temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-        fp.write("*********************************************************************************************************\n")
-        fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-        fp.write("*********************************************************************************************************\n\n")
-        [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-        fp.write("\n" + "\n")
-    fp.close()
-
- if f==2:
-
-    if con=="c":
-       fp = open('split3/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split4/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-               if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
-                  temp.append(unique_seq[j])
-        if temp!=[]:
-           temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-           fp.write("*********************************************************************************************************\n")
-           fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-           fp.write("*********************************************************************************************************\n\n")
-           [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-           fp.write("\n" + "\n")
-    fp.close()
-
-
-##########################################################################################################################
-
-def new_mat_seq(pre_unique_seq,mat_mirnas,l):
-
-    unique_iso = []
-    for x in pre_unique_seq:
-       if len(x[2].split("_"))==3:
-          for y in pre_unique_seq:
-              if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
-                 if any(y[2] in lst2 for lst2 in unique_iso)==False:
-                    y[2]=">"+y[2]
-                    unique_iso.append(y)
-    l.acquire()
-    for x in unique_iso:
-        mat_mirnas.append(x[2])
-        mat_mirnas.append(x[9])
-    l.release()
-
-#########################################################################################################################
-
-def merging_names(LH2E_copy,new):
-
-    dupes=[]
-    final_LH2E =[]
-
-    for num in range(len(LH2E_copy)):
-
-        if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
-           final_LH2E.append(LH2E_copy[num][1])
-           final_LH2E.append(LH2E_copy[num][0])
-        else:
-           dupes.append(LH2E_copy[num][1])
-
-    dupes=list(set(dupes))
-
-    for i in range(len(dupes)):
-        dupes[i]=[dupes[i]]
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               fl=0
-               if len(y)==1:
-                  y.append(x[0])
-               else:
-                  for i in range(1,len(y)):
-                      if y[i].split("_")[0]==x[0].split("_")[0]:
-                         fl=1
-                         if len(x[0])<len(y[i]):
-                            del y[i]
-                            y.append(x[0])
-                            break
-
-                  if fl==0:
-                     y.append((x[0]))
-
-    for y in dupes:
-        if len(y)>2:
-           for i in range(len(y)-1,1,-1):
-               y[1]=y[1]+"/"+y[i]
-               del y[i]
-
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               x[0]=y[1]
-
-    LH2E_copy.sort()
-    LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
-
-    new.extend(LH2E_copy)
-
-
-######################################################################################################################################################
-
-def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):
-
-    for i in range(2,len(tem_samp[0])):
-
-       fp = open(folder+tem_names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
-
-       for x in tem_samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-
-       for j in range(len(non_names)):
-           if non_names[j]==tem_names[i-2]:
-              for x in non_samp:
-                  fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
-       fp.close()
-
-###################################################################################################################################################################################################################
-
-def download_matures(matures,org_name):
-
-    #url = 'ftp://mirbase.org/pub/mirbase/21/mature.fa.gz'
-    url = 'ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz'
-    data = urllib.request.urlopen(url).read()
-    file_mirna = gzip.decompress(data).decode('utf-8')
-    file_mirna = file_mirna.split("\n")
-
-    for i in range(0,len(file_mirna)-1,2):
-
-        if org_name in file_mirna[i]:
-           matures.append(file_mirna[i])
-           matures.append(file_mirna[i+1])
-
-###################################################################################################################################################################################################################
-def non_template_ref(sc,st,all_isoforms):
-
-  pre_uni_seq_con = list(sc)
-  pre_uni_seq_tre = list(st)
-
-  for x in pre_uni_seq_con:
-      for y in x:
-          if ">"+y[2] not in all_isoforms and ")_" in y[2] :
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-
-  for x in pre_uni_seq_tre:
-      for y in x:
-          if ">"+y[2] not in all_isoforms and ")_" in y[2]:
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-################################################################################################################################################################################################
-
-def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):
-
-    for y in mir_names:
-        flag=0
-        for x in sample:
-            if y[0]==x[0]:
-               flag=1
-               break
-        if flag==0:
-           sample.append([y[0],"0",y[1]])
-
-    sample.sort(key=lambda x: x[0])
-    sample=list(sample for sample,_ in itertools.groupby(sample))
-
-    l.acquire()
-    new_d.append(sample)
-    sample_order.append(sample_name)
-    l.release()
-
-###############################################################################################################################################################################################
-
diff -r d2eea02053a0 -r d9b2a7df9d4b mirbase_graphs.py
--- a/mirbase_graphs.py	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,809 +0,0 @@
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-
-#################################################################################################################################################################
-def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-    c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
-    t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    c_non = len(c_non_samples)
-    c_non_counts = sum(x[2] for x in c_non_samples)
-    t_non = len(t_non_samples)
-    t_non_counts = sum(x[2] for x in t_non_samples)
-
-    c_unmap = c_unmap - c_non
-    t_unmap = c_unmap - t_non
-
-    c_unmap_counts=c_unmap_counts - c_non_counts
-    t_unmap_counts=t_unmap_counts - t_non_counts
-
-
-    for x in c_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-
-    fig = plt.figure(figsize=(7,5))
-    labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control Group (reads)',fontsize=12)
-    labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated Group (reads)', fontsize=12)
-    plt.savefig('pie_non.png',dpi=300)
-
-######################################################################################################################################################
-
-
-def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    for x in c_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if "chr" in  x[0].split("_")[-1]:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if "chr" in y.split("_")[-1]:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-
-
-    fig = plt.figure()
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control group (reads)', fontsize=12)
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated group (reads)',fontsize = 12)
-    plt.savefig('pie_tem.png',dpi=300)
-
-###################################################################################################################################################################################################################
-
-
-def make_spider(merge_LH2E,merge_LH8E):
-
-  c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-  t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-  c_5 = 0
-  c_5_counts = 0
-  c_3 = 0
-  c_3_counts = 0
-  c_both =0
-  c_both_counts=0
-  c_mature = 0
-  c_mat_counts = 0
-  c_exception=0
-  c_exception_counts=0
-
-
-  t_5 = 0
-  t_5_counts = 0
-  t_3 = 0
-  t_3_counts = 0
-  t_both = 0
-  t_both_counts = 0
-  t_mature = 0
-  t_mat_counts = 0
-  t_exception = 0
-  t_exception_counts=0
-
-  for x in c_samples:
-
-      if "/" not in x[0]:
-          if "chr" in  x[0].split("_")[-1]:
-             c_mature+=1
-             c_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[-1]):
-             c_5+=1
-             c_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[-2]):
-             c_3+=1
-             c_3_counts += x[2]
-          else:
-             c_both+=1
-             c_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if "chr" in y.split("_")[-1]:
-                c_mature+=1
-                c_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            for y in x[0].split("/"):
-                c_exception+=1
-                c_exception_counts += x[2]
-
-
-  for x in t_samples:
-
-      if "/" not in x[0]:
-          if "chr" in  x[0].split("_")[-1]:
-             t_mature+=1
-             t_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[-1]):
-             t_5+=1
-             t_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[-2]):
-             t_3+=1
-             t_3_counts += x[2]
-          else:
-             t_both+=1
-             t_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if "chr" in y.split("_")[-1]:
-                t_mature+=1
-                t_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            for y in x[0].split("/"):
-                t_exception+=1
-                t_exception_counts += x[2]
-
-
-  c_all = c_5+c_3+c_both+c_mature+c_exception
-  c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
-
-  t_all = t_5+t_3+t_both+t_mature + t_exception
-  t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
-
-  c_5 = round(c_5/c_all*100,2)
-  c_3 = round(c_3/c_all*100,2)
-  c_both = round(c_both/c_all*100,2)
-  c_mature = round(c_mature/c_all*100,2)
-  c_exception = round(c_exception/c_all*100,2)
-
-  c_5_counts = round(c_5_counts/c_all_counts*100,2)
-  c_3_counts = round(c_3_counts/c_all_counts*100,2)
-  c_both_counts = round(c_both_counts/c_all_counts*100,2)
-  c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
-  c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
-
-  t_5 = round(t_5/t_all*100,2)
-  t_3 = round(t_3/t_all*100,2)
-  t_both = round(t_both/t_all*100,2)
-  t_mature = round(t_mature/t_all*100,2)
-  t_exception = round(t_exception/t_all*100,2)
-
-  t_5_counts = round(t_5_counts/t_all_counts*100,2)
-  t_3_counts = round(t_3_counts/t_all_counts*100,2)
-  t_both_counts = round(t_both_counts/t_all_counts*100,2)
-  t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
-  t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
-
-  radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
-  radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
-
-  df=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both,t_both],
-  """3' isomiRs""":[c_3,t_3],
-  'miRNA RefSeq':[c_mature,t_mature],
-  """5' isomiRs""":[c_5,t_5],
-  'Others*':[c_exception,t_exception]})
-
-  df1=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
-  """3' isomiRs""":[c_3_counts,t_3_counts],
-  'miRNA RefSeq':[c_mat_counts,t_mat_counts],
-  """5' isomiRs""":[c_5_counts,t_5_counts],
-  'Others*':[c_exception_counts,t_exception_counts]})
-
-  spider_last(df,radar_max,1)
-  spider_last(df1,radar_max_counts,2)
-
-#####################################################################################################################################################
-
-def spider_last(df,radar_max,flag):
-  # ------- PART 1: Create background
-  fig = plt.figure()
-  # number of variable
-  categories=list(df)[1:]
-  N = len(categories)
-
-  # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
-  angles = [n / float(N) * 2 * pi for n in range(N)]
-  angles += angles[:1]
-
-  # Initialise the spider plot
-  ax = plt.subplot(111, polar=True)
-
-  # If you want the first axis to be on top:
-  ax.set_theta_offset(pi/2)
-  ax.set_theta_direction(-1)
-
-  # Draw one axe per variable + add labels labels yet
-  plt.xticks(angles[:-1], categories, fontsize=11)
-
-  # Draw ylabels
-  radar_max=round(radar_max+radar_max*0.1)
-  mul=len(str(radar_max))-1
-  maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
-  sep = round(maxi/4)
-  plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
-  plt.ylim(0, maxi)
-
-  # ------- PART 2: Add plots
-
-  # Plot each individual = each line of the data
-  # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
-
-  # Ind1
-  values=df.loc[0].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
-  ax.fill(angles, values, 'b', alpha=0.1)
-
-  # Ind2
-  values=df.loc[1].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
-  ax.fill(angles, values, 'r', alpha=0.1)
-
-  # Add legend
-  if flag==1:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_non_red.png',dpi=300)
-  else:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_red.png',dpi=300)
-
-
-#############################################################################################################################################################################################################
-
-def hist_red(samples,flag):
-    lengths=[]
-    cat=[]
-    total_reads=0
-    seq=[]
-
-    if flag == "c":
-        title = "Length Distribution of Control group (Redudant reads)"
-    if flag == "t":
-        title = "Length Distribution of Treated group (Redudant reads)"
-
-    for i in samples:
-        for x in i:
-            lengths.append(len(x[9]))
-            if x[1]=="0":
-               seq.append([x[9],x[0].split("-")[1],"Mapped"])
-               cat.append("Mapped")
-            if x[1] == "4":
-               seq.append([x[9],x[0].split("-")[1],"Unmapped"])
-               cat.append("Unmapped")
-
-    uni_len=list(set(lengths))
-    uni_len=[x for x in uni_len if x<=35]
-    low=min(lengths)
-    up=max(lengths)
-    seq.sort()
-    uni_seq=list(seq for seq,_ in itertools.groupby(seq))
-    dim=up-low
-
-    if dim>20:
-       s=5
-    else:
-       s=8
-
-    total_reads+=sum([int(x[1]) for x in uni_seq])
-
-    map_reads=[]
-    unmap_reads=[]
-    length=[]
-    for y in uni_len:
-        map_temp=0
-        unmap_temp=0
-        for x in uni_seq:
-            if len(x[0])==y and x[2]=="Mapped":
-               map_temp+=int(x[1])
-            if len(x[0])==y and x[2]=="Unmapped":
-               unmap_temp+=int(x[1])
-        if y<=35:
-           length.append(y)
-           map_reads.append(round(map_temp/total_reads*100,2))
-           unmap_reads.append(round(unmap_temp/total_reads*100,2))
-
-    ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
-    ylim=ylim+ylim*20/100
-    fig, ax = plt.subplots()
-    width=0.8
-    ax.bar(length, unmap_reads, width, label='Unmapped')
-    h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
-    plt.xticks(np.arange(length[0], length[-1]+1, 1))
-    plt.xlabel('Length (nt)',fontsize=14)
-    plt.ylabel('Percentage',fontsize=14)
-    plt.title(title,fontsize=14)
-    ax.legend()
-    plt.ylim([0, ylim])
-    ax.grid(axis='y',linewidth=0.2)
-
-    if flag=='c':
-       plt.savefig('c_hist_red.png',dpi=300)
-
-    if flag=='t':
-       plt.savefig('t_hist_red.png',dpi=300)
-
-#################################################################################################################
-
-def logo_seq_red(merge, flag):
-
-    if flag=="c":
-       titlos="Control group (Redundant)"
-       file_logo="c_logo.png"
-       file_bar="c_bar.png"
-    if flag=="t":
-       titlos="Treated group (Redundant)"
-       file_logo="t_logo.png"
-       file_bar="t_bar.png"
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-    total_reads=0
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            total_reads+=best[2]
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=best[2]
-                    elif best[1][i] == "C":
-                        C[i]+=best[2]
-                    elif best[1][i] == "G":
-                        G[i]+=best[2]
-                    else:
-                        T[i]+=best[2]
-        else:
-            total_reads+=y[2]
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=(y[2])
-                    else:
-                        T[i]+=y[2]
-
-    A[:] = [round(x*100,1) / total_reads for x in A]
-    C[:] = [round(x*100,1) / total_reads for x in C]
-    G[:] = [round(x*100,1) / total_reads for x in G]
-    T[:] = [round(x*100,1) / total_reads for x in T]
-
-
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-    h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
-    h.grid(axis='y',linewidth=0.2)
-    plt.xticks(rotation=0, ha="right")
-    plt.ylabel("Counts (%)",fontsize=18)
-    plt.xlabel("Positions (nt)",fontsize=18)
-    plt.title(titlos,fontsize=20)
-    plt.tight_layout()
-    plt.savefig(file_bar, dpi=300)
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-
-    # style using Axes methods
-    crp_logo.ax.set_title(titlos,fontsize=18)
-    crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    figure = plt.gcf()
-    figure.set_size_inches(6, 4)
-    crp_logo.fig.savefig(file_logo,dpi=300)
-
-##########################################################################################################################################################################################################
-
-def logo_seq_non_red(merge_LH2E):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=1
-                    elif best[1][i] == "C":
-                        C[i]+=1
-                    elif best[1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-        else:
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=1
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=1
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-    h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
-    h.set_xlabel("Positions (nt)")
-    h.set_ylabel("Unique sequences")
-    plt.xticks(rotation=0, ha="right")
-    plt.tight_layout()
-    plt.savefig("bar2.png", dpi=300)
-
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-
-    # style using Logo methods
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-
-    # style using Axes methods
-    crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    crp_logo.ax.set_title("Non-redundant")
-    figure = plt.gcf()
-    crp_logo.fig.savefig('logo2.png', dpi=300)
-
-
-################################################################################################################################################################
-
-def pdf_before_DE(analysis):
-
-  # Import FPDF class
-  from fpdf import FPDF, fpdf
-
-  # Import glob module to find all the files matching a pattern
-  import glob
-
-  # Image extensions
-  if analysis=="2":
-     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
-  else:
-     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
-  # This list will hold the images file names
-  images = []
-
-  # Build the image list by merging the glob results (a list of files)
-  # for each extension. We are taking images from current folder.
-  for extension in image_extensions:
-      images.extend(glob.glob(extension))
-  #sys.exit(images)
-  # Create instance of FPDF class
-  pdf = FPDF('P', 'in', 'A4')
-  # Add new page. Without this you cannot create the document.
-  pdf.add_page()
-  # Set font to Arial, 'B'old, 16 pts
-  pdf.set_font('Arial', 'B', 20.0)
-
-  # Page header
-  pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C')
-  pdf.ln(0.7)
-  pdf.set_font('Arial','', 16.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C')
-
-  # Smaller font for image captions
-  pdf.set_font('Arial', '', 11.0)
-
-  # Image caption
-  pdf.ln(0.5)
-
-  yh=FPDF.get_y(pdf)
-  pdf.image(images[0],x=0.3,w=4, h=3)
-  pdf.image(images[1],x=4,y=yh, w=4, h=3)
-  pdf.ln(0.3)
-
-  # Image caption
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)")
-  pdf.ln(0.2)
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  and Treated (right) groups")
-
-
-  pdf.ln(0.5)
-  h1=FPDF.get_y(pdf)
-  pdf.image(images[2],x=1, w=6.5, h=5)
-  h2=FPDF.get_y(pdf)
-  FPDF.set_y(pdf,h1+0.2)
-  pdf.set_font('Arial','', 14.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C')
-  pdf.set_font('Arial', '', 11.0)
-  FPDF.set_y(pdf,h2)
-  FPDF.set_y(pdf,9.5)
-  # Image caption
-  pdf.cell(0.2)
-  if analysis=="2":
-     pdf.cell(3.0, 0.0, "  Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads")
-     pdf.ln(0.2)
-     pdf.cell(0.2)
-     pdf.cell(3.0, 0.0, "  in Control (left) and treated (right) groups")
-  else:
-     pdf.cell(3.0, 0.0, "  Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in")
-     pdf.ln(0.2)
-     pdf.cell(0.2)
-     pdf.cell(3.0, 0.0, "  Control (left) and treated (right) groups")
-
-
-
-  pdf.add_page()
-  pdf.set_font('Arial', 'B', 16.0)
-  pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C')
-  pdf.ln(0.7)
-  pdf.set_font('Arial', 'B', 12.0)
-  pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C')
-  pdf.ln(0.5)
-  pdf.image(images[3],x=1.5, w=5.5, h=4)
-  pdf.ln(0.6)
-  pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C')
-  pdf.set_font('Arial', '', 12.0)
-  pdf.ln(0.2)
-  pdf.image(images[4],x=1.5, w=5.5, h=4)
-  pdf.ln(0.3)
-  pdf.set_font('Arial', '', 11.0)
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  * IsomiRs potentialy initiated from multiple loci")
-
-
-  if analysis=="2":
-     pdf.add_page('L')
-
-     pdf.set_font('Arial', 'B', 16.0)
-     pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C')
-     pdf.ln(0.5)
-     pdf.set_font('Arial', 'B', 12.0)
-     pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C')
-     pdf.ln(0.7)
-
-     yh=FPDF.get_y(pdf)
-     pdf.image(images[5],x=1.5,w=3.65, h=2.65)
-     pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
-     pdf.ln(0.5)
-     yh=FPDF.get_y(pdf)
-     pdf.image(images[6],x=1.5,w=3.65, h=2.65)
-     pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
-
-  pdf.close()
-  pdf.output('report1.pdf','F')
-
-
-
-
-#############################################################################################################################################################3
-
-def pdf_after_DE(analysis):
-
-  # Import FPDF class
-  from fpdf import FPDF
-
-  # Import glob module to find all the files matching a pattern
-  import glob
-
-  # Image extensions
-  image_extensions = ("tem.png","non.png","a2.png")
-
-  # This list will hold the images file names
-  images = []
-
-  # Build the image list by merging the glob results (a list of files)
-  # for each extension. We are taking images from current folder.
-  for extension in image_extensions:
-      images.extend(glob.glob(extension))
-
-  # Create instance of FPDF class
-  pdf = FPDF('P', 'in', 'letter')
-  # Add new page. Without this you cannot create the document.
-  pdf.add_page()
-  # Set font to Arial, 'B'old, 16 pts
-  pdf.set_font('Arial', 'B', 16.0)
-
-  # Page header
-  pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C')
-  #pdf.ln(0.25)
-
-  pdf.ln(0.7)
-  pdf.set_font('Arial','B', 12.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C')
-
-
-  # Smaller font for image captions
-  pdf.set_font('Arial', '', 10.0)
-
-  # Image caption 
-  pdf.ln(0.4)
-  pdf.image(images[0],x=0.8, w=7, h=8)
-  pdf.ln(0.3)
-
-  if analysis=="2":
-
-     pdf.add_page()
-     pdf.ln(0.7)
-     pdf.set_font('Arial','B', 12.0)
-     pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C')
-     pdf.ln(0.4)
-     pdf.image(images[1],x=0.5, w=7.5, h=6.5)
-
-  pdf.add_page()
-  pdf.ln(0.5)
-  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C')
-  pdf.ln(0.4)
-  pdf.image(images[2],x=0.8, w=7, h=8)
-  pdf.ln(0.3)
-
-
-  pdf.output('report2.pdf', 'F')
-
-
-
diff -r d2eea02053a0 -r d9b2a7df9d4b mirbase_ultra_v2.py
--- a/mirbase_ultra_v2.py	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-from mirbase_functions import *
-from mirbase_graphs import *
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
-parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
-parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
-parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
-parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
-parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
-parser.add_argument("-f", "--flag", help="choose the database", action="store")
-parser.add_argument("-umis", "--umi", help="choose the database", action="store")
-parser.add_argument("-percentage", "--per", help="choose the database", action="store")
-parser.add_argument("-counts", "--count", help="choose the database", action="store")
-parser.add_argument("-name1", "--n1", help="choose the database", action="store")
-parser.add_argument("-name2", "--n2", help="choose the database", action="store")
-args = parser.parse_args()
-
-
-###############################################################################################################################################################################################
-
-if __name__ == '__main__':
-
- starttime = time.time()
-
- q1 = Queue()
- q2 = Queue()
- lock = Lock()
- manager = Manager()
-
- mature_mirnas=manager.list()
- ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
- ps_mature.start()
-
- args.control[0]=args.control[0][1:]
- args.control[len(args.control)-1][:-1]
- control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
-
- args.treated[0]=args.treated[0][1:]
- args.treated[len(args.treated)-1][:-1]
- treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
-
-
-##############  Detection of templated isoforms ################
-
- radar = manager.list([0,0,0,0])
- samples = manager.list()
- data= manager.list()
- names_con=manager.list()
- samples_mirna_names=manager.list()
- deseq=manager.list()
- unmap_seq=manager.Value('i',0)
- unmap_counts=manager.Value('i',0)
- LH2E_names=manager.list()
- ini_c_samples = manager.list()
-
-
- radar1 = manager.list([0,0,0,0])
- samples1 = manager.list()
- data1 = manager.list()
- names_tre = manager.list()
- samples_mirna_names1=manager.list()
- deseq1=manager.list()
- unmap1_seq = manager.Value('i',0)
- unmap1_counts = manager.Value('i',0)
- LH8E_names=manager.list()
- ini_t_samples = manager.list()
- ps_mature.join()
-
-
- mature_mirnas=list(mature_mirnas)
-
-
- starttime1 = time.time()
- ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
- ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
-
- [p.start() for p in ps_sam]
- [p.join() for p in ps_sam]
- print('SAM took {} seconds'.format(time.time() - starttime1))
-
- ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
- ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
- [x.start() for x in ps_hist]
-
- starttime200=time.time()
-
- sc = list(samples)
- st = list(samples1)
-
- names_con=list(names_con)
- names_tre=list(names_tre)
- samples_mirna_names=list(samples_mirna_names)
- samples_mirna_names.sort()
- samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
-
- samples_mirna_names1=list(samples_mirna_names1)
- samples_mirna_names1.sort()
- samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
-
- deseq=list(deseq)
- deseq1=list(deseq1)
-
- new_names_con=manager.list()
- new_names_tre=manager.list()
- new_deseq=manager.list()
- new_deseq1=manager.list()
- ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
- ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
-
- [z.start() for z in ps_deseq]
- [z.join() for z in ps_deseq]
- new_deseq=list(new_deseq)
- new_deseq1=list(new_deseq1)
-
- LH2E=[[x[0],x[2]] for x in new_deseq[0]]
- [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
-
- LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
- [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
-
- print('Deseq took {} seconds'.format(time.time() - starttime200))
-
- merg_nam_LH2E=manager.list()
- merg_nam_LH8E=manager.list()
-
- LH2E_copy=copy.deepcopy(list(LH2E))
- LH8E_copy=copy.deepcopy(list(LH8E))
-
- fil_sort_tre=manager.list()
- fil_sort_con=manager.list()
- raw_sort_tre=manager.list()
- raw_sort_con=manager.list()
-
- ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con,args.per,args.count))
- ps_main.start()
-
- if args.anal=="2":
-    all_iso = manager.list()
-    ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
-    ps_non_iso.start()
-
- ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
- ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
- [x.start() for x in ps_merge]
- [x.join() for x in ps_merge]
-
- merg_nam_LH2E=list(merg_nam_LH2E)
- merg_nam_LH8E=list(merg_nam_LH8E)
-
- starttime2 = time.time()
- procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
- procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
- procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
- if args.anal == "1":
-    procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
-
- [p.start() for p in procs]
-
-
- if args.anal=="1":
-    [x.join() for x in ps_hist]
-    [p.join() for p in procs]
-    ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
-    ps_pdf.start()
-
- print('Graphs took {} seconds'.format(time.time() - starttime2))
-
- ps_main.join()
-
- fil_sort_con=list(fil_sort_con)
- fil_sort_tre=list(fil_sort_tre)
- if fil_sort_con==[]:
-    fil_sort_con=raw_sort_con
-    fil_sort_tre=raw_sort_tre
-
- raw_sort_con=list(raw_sort_con)
- raw_sort_tre=list(raw_sort_tre)
- names_con=list(new_names_con)
- names_tre=list(new_names_tre)
-
- ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1,args.per,args.n1,args.n2))
- ps_write.start()
-
- ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
- ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
- [p.start() for p in ps1_matrix]
-
- if args.anal=="1":
-    ps_pdf.join()
- if args.anal=="2":
-    [p.join() for p in procs]
-    [x.join() for x in ps_hist]
-
- ps_write.join()
- [p.join() for p in ps1_matrix]
-
-
-############################## Detection of Both  #######################################
-
- starttime10 = time.time()
-
- if args.anal == "2":
-
-  n_data= manager.list()
-  n_names_con=manager.list()
-  n_samples_mirna_names=manager.list()
-  n_deseq=manager.list()
-  n_LH2E_names=manager.list()
-
-  n_data1 = manager.list()
-  n_names_tre = manager.list()
-  n_samples_mirna_names1=manager.list()
-  n_deseq1=manager.list()
-  n_LH8E_names=manager.list()
-
-
-  new_mat_mirnas = list(mature_mirnas)
-  ps_non_iso.join()
-
-  all_iso=list(all_iso)
-  new_mat_mirnas.extend(all_iso)
-
-  starttime11=time.time()
-
-  ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
-  ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
-
-  [p.start() for p in ps_sam]
-  [p.join() for p in ps_sam]
-
-  print('Non-sam took {} seconds'.format(time.time() - starttime11))
-
-  starttime12=time.time()
-
-  n_names_con=list(n_names_con)
-  n_names_tre=list(n_names_tre)
-  n_samples_mirna_names=list(n_samples_mirna_names)
-  n_samples_mirna_names.sort()
-  n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
-
-  n_samples_mirna_names1=list(n_samples_mirna_names1)
-  n_samples_mirna_names1.sort()
-  n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
-
-  n_deseq=list(n_deseq)
-  n_deseq1=list(n_deseq1)
-
-  new_n_names_con=manager.list()
-  new_n_names_tre=manager.list()
-  n_new_deseq=manager.list()
-  n_new_deseq1=manager.list()
-  ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
-  ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
-
-  [x.start() for x in ps_deseq]
-  [x.join() for x in ps_deseq]
-  n_new_deseq=list(n_new_deseq)
-  n_new_deseq1=list(n_new_deseq1)
-
-  n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
-  [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
-
-  n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
-  [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
-
-  print('Non-deseq took {} seconds'.format(time.time() - starttime12))
-
-  merg_nam_n_LH2E=manager.list()
-  merg_nam_n_LH8E=manager.list()
-
-  n_LH2E_copy=copy.deepcopy(list(n_LH2E))
-  n_LH8E_copy=copy.deepcopy(list(n_LH8E))
-
-  n_sort_tre=manager.list()
-  n_sort_con=manager.list()
-
-  n_fil_sort_con=manager.list()
-  n_fil_sort_tre=manager.list()
-  n_raw_sort_con=manager.list()
-  n_raw_sort_tre=manager.list()
-
-  ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con,args.per,args.count))
-  ps_main.start()
-
-  ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
-  ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
-  [p.start() for p in ps_merge]
-  [p.join() for p in ps_merge]
-
-  merg_nam_n_LH2E=list(merg_nam_n_LH2E)
-  merg_nam_n_LH8E=list(merg_nam_n_LH8E)
-
-  procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
-  procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
-  procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
-
-  starttime13=time.time()
-  [p.start() for p in procs]
-  [p.join() for p in procs]
-
-  print('Graphs took {} seconds'.format(time.time() - starttime13))
-
-  procs1 = Process(target=pdf_before_DE,args=(args.anal))
-  procs1.start()
-
-  starttime14=time.time()
-  ps_main.join()
-
-  n_fil_sort_con=list(n_fil_sort_con)
-  n_fil_sort_tre=list(n_fil_sort_tre)
-  if n_fil_sort_con==[]:
-     n_fil_sort_con=n_raw_sort_con
-     n_fil_sort_tre=n_raw_sort_tre
-
-  n_raw_sort_con=list(n_raw_sort_con)
-  n_raw_sort_tre=list(n_raw_sort_tre)
-  n_names_con=list(new_n_names_con)
-  n_names_tre=list(new_n_names_tre)
-
-  ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2,args.per,args.n1,args.n2))
-  ps_write.start()
-
-  ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
-  ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
-  [p.start() for p in ps1_matrix]
-
-  ps_write.join()
-  [p.join() for p in ps1_matrix]
-  procs1.join()
- print('That took {} seconds'.format(time.time() - starttime10))
- print('That took {} seconds'.format(time.time() - starttime))
-
-
-
-
-
-
-
-
diff -r d2eea02053a0 -r d9b2a7df9d4b mirgene.loc.sample
--- a/mirgene.loc.sample	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-# bowtie2_indices.loc.sample
-# This is a *.loc.sample file distributed with Galaxy that enables tools
-# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2.
-# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
-# First create these data files and save them in your own data directory structure.
-# Then, create a bowtie_indices.loc file to use those indexes with tools.
-# Copy this file, save it with the same name (minus the .sample),
-# follow the format examples, and store the result in this directory.
-# The file should include an one line entry for each index set.
-# The path points to the "basename" for the set, not a specific file.
-# It has four text columns seperated by TABS.
-#
-# <unique_build_id>     <dbkey> <display_name>  <file_base_path>
-#
-# So, for example, if you had hg18 indexes stored in:
-#
-#    /depot/data2/galaxy/hg19/bowtie2/
-#
-# containing hg19 genome and hg19.*.bt2 files, such as:
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.fa
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 18:56 hg19canon.2.bt2
-#    -rw-rw-r-- 1 james   james   3.3K Feb 10 16:54 hg19canon.3.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 16:54 hg19canon.4.bt2
-#    -rw-rw-r-- 1 james   james   914M Feb 10 20:45 hg19canon.rev.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 20:45 hg19canon.rev.2.bt2
-#
-# then the bowtie2_indices.loc entry could look like this:
-#
-#
-Hsa	Human (Homo sapiens)
-#
-#
-#
-#
-#
diff -r d2eea02053a0 -r d9b2a7df9d4b mirgene_functions.py
--- a/mirgene_functions.py	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,749 +0,0 @@
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-
-
-
-"""---------------------- Simple Functions -----------------------"""
-
-# Read a file and return it as a list
-def read(path, flag):
-    if flag == 0:
-        with open(path) as fp:
-            file=fp.readlines()
-        fp.close()
-        return file
-
-    if flag == 1:
-        with open(path) as fp:
-            file = fp.read().splitlines()
-        fp.close()
-        return file
-
-# Write a list to a txt file
-def write(path, list):
-    with open(path,'w') as fp:
-        for x in list:
-            fp.write(str("\t".join(x[1:-1])))
-    fp.close()
-
-"""---------------------- RNA-seq Functions ----------------------"""
-
-# Detect the longest common substring sequence between two mirnas
-def longestSubstring(str1, str2):
-
-    from difflib import SequenceMatcher
-    # initialize SequenceMatcher object with
-    # input string
-    seqMatch = SequenceMatcher(None, str1, str2)
-
-    # find match of longest sub-string
-    # output will be like Match(a=0, b=0, size=5)
-    match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))
-
-    # print longest substring
-    if (match.size != 0):
-        return str1[match.a: match.a + match.size]
-    else:
-        print('No longest common sub-string found')
-
-
-
-########################################################################################################################################################
-def collapse_sam(path):
-
-    ini_sam=read(path,0)
-    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    intro_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" in  x.split("\t")[0]]
-
-    uni_seq = []
-    for x in main_sam:
-
-        if [x[2], x[9]] not in uni_seq:
-            uni_seq.append([x[2], x[9]])
-
-    new_main_sam=[]
-    incr_num=0
-    for i in range(len(uni_seq)):
-        count=0
-        incr_num+=1
-        for y in main_sam:
-            if uni_seq[i][1]==y[9] and uni_seq[i][0]==y[2]:
-               count+=1
-               temp=y
-        temp[10]="~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-        temp[0]=str(incr_num)+"-"+str(count)
-        new_main_sam.append(temp)
-
-    new_sam=intro_sam+new_main_sam
-
-    return new_sam
-
-#################################################################################################################################################################
-
-def sam(mature_mirnas,path,name,con,l,samples,data,names,unmap_seq,samples_mirna_names,deseq,LHE_names,umi,ini_sample,unmap_counts):
-
-    # read the sam file
-
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq = [x for x in new_main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
-
-    # Calculate the shifted positions for every isomir and add them to the name of it
-    sorted_uni_arms = []
-
-    for i in range(len(mature_mirnas)):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-        for j in range(len(unique_seq)):
-
-         if mature_mirnas[i] == unique_seq[j][2]:
-
-                temp_mature = mature_mirnas[i+1]
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                # Problem with hsa-miR-8485
-                if mat_diff[1]!=0 and unique_diff[1]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                elif mat_diff[0]!=0 and unique_diff[0]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                else:
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-                   tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                   tmp_count_seq = tmp_count_seq+1
-
-
-                if pre_pos != 0 or post_pos != 0:
-                    if pre_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
-                    elif post_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2] + "_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
-                    else:
-                        unique_seq[j][2] = unique_seq[j][2]+"_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
-
-        for x in range(unique_seq.count(1)):
-           unique_seq.remove(1)
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-           sorted_uni_arms.append([mature_mirnas[i], tmp_count_seq, tmp_count_reads])
-
-    # Store name of arms, number of sequences and number of reads
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-
-    for y in sorted_uni_arms:
-       counts=0
-       seqs=0
-       for x in unique_seq:
-           if y[0]==x[2].split("_")[0]+"_"+x[2].split("_")[1]:
-              counts+=int(x[0].split("-")[1])
-              seqs+=1
-
-       y[1]=seqs
-       y[2]=counts
-
-    LHE=[]
-
-    l.acquire()
-    if con=="c":
-       LHE.extend(z[2] for z in unique_seq)
-       for y in unique_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(unique_seq)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-
-    if con=="t":
-       LHE.extend(z[2] for z in unique_seq)
-       for y in unique_seq:
-           samples_mirna_names.append([y[2],y[9]])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq])
-       LHE_names.extend(LHE)
-       unmap_seq.value += sum([1 for x in new_main_sam if x[1] == '4'])
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in new_main_sam if x[1] == '4'])
-       names.append(name)
-       samples.append(unique_seq)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-       ini_sample.append(new_main_sam)
-    l.release()
-
-
-######################################################################################################################################
-"""
-Read a sam file from Bowtie and do the followings:
-
-1) Remove reverse stranded mapped reads
-2) Remove unmapped reads 
-3) Remove all sequences with reads less than 11 reads
-4) Sort the arms with the most sequences in decreading rate
-5) Sort the sequences of every arm with the most reads in decreasing rate
-6) Calculate total number of sequences of every arm
-7) Calculate total number of reads of sequences of every arm.
-8) Store all the informations in a txt file 
-
-"""
-
-def non_sam(mature_mirnas,path,name,con,l,data,names,n_deseq,n_samples_mirna_names,n_LHE_names):
-
-
-    ini_sam=read(path,0)
-    new_main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq=[]
-    unique_seq = [x for x in new_main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
-    uni_seq=[]
-
-    # Calculate the shifted positions for every isomir and add them to the name of it
-    sorted_uni_arms = []
-    for i in range(1,len(mature_mirnas),2):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-
-        for j in range(len(unique_seq)):
-
-            temp_mature = mature_mirnas[i].strip().replace("U", "T")
-
-            if temp_mature in unique_seq[j][9]:
-
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                if len(unique_diff)<=2:
-                   unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-
-                   lengthofmir = len(off_part) + post_pos
-                   if pre_pos == 0:
-                      tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                      tmp_count_seq = tmp_count_seq + 1
-
-                      if pre_pos == 0:
-                          t_name=copy.deepcopy(unique_seq[j])
-                          t_name[2]=mature_mirnas[i - 1] + "__" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
-                          uni_seq.append(t_name)
-
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-            sorted_uni_arms.append([mature_mirnas[i-1], tmp_count_seq, tmp_count_reads])
-
-
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
-
-    LHE=[]
-
-    l.acquire()
-    if con=="c":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-
-
-    if con=="t":
-       LHE.extend(x[2] for x in unique_seq if x[2]!="*")
-       for x in unique_seq:
-           if x[2]!="*":
-              n_samples_mirna_names.append([x[2],x[9]])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       n_LHE_names.extend(LHE)
-       names.append(name)
-       data.append([con,name,unique_seq,sorted_uni_arms])
-    l.release()
-
-#################################################################################################################################################################################################################
-
-def deseq2_temp(samples_mirna_names,deseq,con,l):
-
-    samples_mirna_names.sort(key=lambda x:[0])
-
-    for i in range(len(deseq)):
-        for y in samples_mirna_names:
-            flag = 0
-            for x in deseq[i]:
-                if y[0] == x[0]:
-                    flag = 1
-                    break
-
-            if flag == 0:
-                deseq[i].append([y[0], "0", y[1]])
-
-    [deseq[i].sort(key=lambda x: x[0]) for i, _ in enumerate(deseq)]
-    deseq_final = [[x[0],x[2]] for x in deseq[0]]
-    [deseq_final[z].append(deseq[i][j][1]) for z,_ in enumerate(deseq_final) for i, _ in enumerate(deseq) for j,_ in enumerate(deseq[i]) if deseq_final[z][0] == deseq[i][j][0]]
-
-    l.acquire()
-    if con=="c":
-       q1.put(deseq_final)
-
-    if con=="t":
-       q2.put(deseq_final)
-    l.release()
-
-
-##################################################################################################################################################################################################################
-
-def main_temp(LH2E, LH2E_names, LH8E, LH8E_names,flag,names_con,names_tre,filter_LH8E,filter_LH2E,raw_LH8E,raw_LH2E,per,count):
-
- LH8E_add_names = [x for x in LH2E_names if x not in LH8E_names]
- LH2E_add_names = [x for x in LH8E_names if x not in LH2E_names]
-
- LH8E_add_names.sort()
- LH2E_add_names.sort()
- LH8E_add_names = list(LH8E_add_names for LH8E_add_names,_ in itertools.groupby(LH8E_add_names))
- LH2E_add_names = list(LH2E_add_names for LH2E_add_names,_ in itertools.groupby(LH2E_add_names))
-
- LH2E.sort()
- LH8E.sort()
- LH2E = list(LH2E for LH2E,_ in itertools.groupby(LH2E))
- LH8E = list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- zeros=["0"]*(len(LH8E[0])-2)
- [LH8E_add_names[i].extend(zeros) for i,_ in enumerate(LH8E_add_names)]
- LH8E=LH8E+LH8E_add_names
-
- zeros=["0"]*(len(LH2E[0])-2)
- [LH2E_add_names[i].extend(zeros) for i,_ in enumerate(LH2E_add_names)]
- LH2E=LH2E+LH2E_add_names
-
- dupes=[]
- final_LH2E =[]
-
- for num,_ in enumerate(LH2E):
-
-    if LH2E[num][1] not in final_LH2E and LH2E[num][0] not in final_LH2E:
-        final_LH2E.append(LH2E[num][1])
-        final_LH2E.append(LH2E[num][0])
-    else:
-        dupes.append(LH2E[num][1])
-
- dupes=list(set(dupes))
-
- dupes=[[x] for x in dupes]
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-             fl=0
-             if len(y)==1:
-                 y.append(x[0])
-             else:
-                 for i in range(1,len(y)):
-                     if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
-                        fl=1
-                        if len(x[0])<len(y[i]):
-                           del y[i]
-                           y.append(x[0])
-                           break
-
-                 if fl==0:
-                    y.append((x[0]))
-
- for y in dupes:
-    if len(y)>2:
-        for i in range(len(y)-1,1,-1):
-            y[1]=y[1]+"/"+y[i]
-            del y[i]
-
-
- for x in LH2E:
-     for y in dupes:
-         if x[1]==y[0]:
-            x[0]=y[1]
-
- for x in LH8E:
-    for y in dupes:
-        if x[1]==y[0]:
-           x[0]=y[1]
-
-
-
- LH2E.sort()
- LH2E=list(LH2E for LH2E,_ in itertools.groupby(LH2E))
-
- LH8E.sort()
- LH8E=list(LH8E for LH8E,_ in itertools.groupby(LH8E))
-
- if int(per)!=-1:
-    percent=int(per)/100
-
-    c_col_filter=round(percent*(len(LH2E[1])-2))
-    t_col_filter=round(percent*(len(LH8E[1])-2))
-
-    for i, _ in enumerate(LH2E):
-        c_cols=0
-        t_cols=0
-
-        c_cols=sum([1 for j in range(len(LH2E[i])-2) if int(LH2E[i][j+2])>=int(count)])
-        t_cols=sum([1 for j in range(len(LH8E[i])-2) if int(LH8E[i][j+2])>=int(count)])
-
-        if c_cols>=c_col_filter or t_cols>=t_col_filter:
-           filter_LH8E.append(LH8E[i])
-           filter_LH2E.append(LH2E[i])
-
- raw_LH2E.extend(LH2E)
- raw_LH8E.extend(LH8E)
-
-##################################################################################################################################################################################################################
-
-def write_main(raw_LH2E, raw_LH8E, fil_LH2E, fil_LH8E, names_con, names_tre, flag, per, n1, n2):
-
- if flag == 1 and int(per)!=-1:
-    fp = open('Counts/Filtered '+n2 +' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2 and int(per)!=-1:
-    fp = open('Counts/Filtered '+n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in fil_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in fil_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 1:
-    fp = open('Counts/Raw '+n2+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+n1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
- if flag == 2:
-    fp = open('Counts/Raw '+n2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_tre:
-       fp.write("\t"+y)
-
-
-    for x in raw_LH8E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+n1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in names_con:
-       fp.write("\t"+y)
-
-    for x in raw_LH2E:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-####################################################################################################################################################################################################################
-
-def ssamples(names,samp,folder,pro):
-
-    for i in range(2,len(samp[0])):
-
-       fp = open(folder+names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+names[i-2]+"\n")
-
-       for x in samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-       fp.close()
-
-####################################################################################################################################################################################################################
-
-def DB_write(con,name,unique_seq,sorted_uni_arms,f):
-
- if f==1:
-    # Write a txt file with all the information
-    if con=="c":
-       fp = open('split1/'+name, 'w')
-
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split2/'+name, 'w') 
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-
-            if sorted_uni_arms[i][0] in (unique_seq[j][2].split("_")[0]+"_"+unique_seq[j][2].split("_")[1]):
-
-                temp.append(unique_seq[j])
-
-        temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-        fp.write("*********************************************************************************************************\n")
-        fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-        fp.write("*********************************************************************************************************\n\n")
-        [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-        fp.write("\n" + "\n")
-    fp.close()
-
- if f==2:
-
-    if con=="c":
-       fp = open('split3/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split4/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-               if sorted_uni_arms[i][0]==unique_seq[j][2].split("__")[0]:
-                  temp.append(unique_seq[j])
-        if temp!=[]:
-           temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-           fp.write("*********************************************************************************************************\n")
-           fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-           fp.write("*********************************************************************************************************\n\n")
-           [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-           fp.write("\n" + "\n")
-    fp.close()
-
-
-##########################################################################################################################
-
-def new_mat_seq(pre_unique_seq,mat_mirnas,l):
-
-    unique_iso = []
-    for x in pre_unique_seq:
-       if len(x[2].split("_"))==3:
-          for y in pre_unique_seq:
-              if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
-                 if any(y[2] in lst2 for lst2 in unique_iso)==False:
-                    y[2]=">"+y[2]
-                    unique_iso.append(y)
-    l.acquire()
-    for x in unique_iso:
-        mat_mirnas.append(x[2])
-        mat_mirnas.append(x[9])
-    l.release()
-
-#########################################################################################################################
-
-def merging_names(LH2E_copy,new):
-
-    dupes=[]
-    final_LH2E =[]
-
-    for num in range(len(LH2E_copy)):
-
-        if LH2E_copy[num][1] not in final_LH2E and LH2E_copy[num][0] not in final_LH2E:
-           final_LH2E.append(LH2E_copy[num][1])
-           final_LH2E.append(LH2E_copy[num][0])
-        else:
-           dupes.append(LH2E_copy[num][1])
-
-    dupes=list(set(dupes))
-
-    for i in range(len(dupes)):
-        dupes[i]=[dupes[i]]
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               fl=0
-               if len(y)==1:
-                  y.append(x[0])
-               else:
-                  for i in range(1,len(y)):
-                      if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
-                         fl=1
-                         if len(x[0])<len(y[i]):
-                            del y[i]
-                            y.append(x[0])
-                            break
-
-                  if fl==0:
-                     y.append((x[0]))
-
-    for y in dupes:
-        if len(y)>2:
-           for i in range(len(y)-1,1,-1):
-               y[1]=y[1]+"/"+y[i]
-               del y[i]
-
-
-    for x in LH2E_copy:
-        for y in dupes:
-            if x[1]==y[0]:
-               x[0]=y[1]
-
-    LH2E_copy.sort()
-    LH2E_copy=list(LH2E_copy for LH2E_copy,_ in itertools.groupby(LH2E_copy))
-    new.extend(LH2E_copy)
-
-######################################################################################################################################################
-
-def ssamples1(tem_names,tem_samp,non_names,non_samp,folder,pro):
-
-    for i in range(2,len(tem_samp[0])):
-
-       fp = open(folder+tem_names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
-
-       for x in tem_samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-
-       for j in range(len(non_names)):
-           if non_names[j]==tem_names[i-2]:
-              for x in non_samp:
-                  fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
-       fp.close()
-
-#################################################################################################################################################################################################################
-
-def download_matures(matures,org_name):
-
-    mature_mir=[]
-
-    mat_url = 'http://mirgenedb.org/fasta/'+org_name+'?mat=1'
-    star_url = 'http://mirgenedb.org/fasta/'+org_name+'?star=1'
-
-    data = urllib.request.urlopen(mat_url).read()
-    file_mirna = data.decode('utf-8')
-    mature_mir = file_mirna.split("\n")
-    mature_mir = [x.replace(">","") for x in mature_mir]
-    del mature_mir[-1]
-
-    data = urllib.request.urlopen(star_url).read()
-    file_mirna = data.decode('utf-8')
-    star_mir = file_mirna.split("\n")
-    star_mir = [x.replace(">","") for x in star_mir]
-    del star_mir[-1]
-
-    mature_mir.extend(star_mir)
-
-    for i in range(1,len(mature_mir),2):
-        mature_mir[i]=mature_mir[i].replace("U","T")
-
-    matures.extend(mature_mir)
-
-###################################################################################################################
-
-def non_template_ref(sc,st,all_isoforms):
-
-  pre_uni_seq_con = list(sc)
-  pre_uni_seq_tre = list(st)
-
-  for x in pre_uni_seq_con:
-      for y in x:
-          if y[2] not in all_isoforms and len(y[2].split("_"))>2:
-             all_isoforms.append(y[2])
-             all_isoforms.append(y[9])
-
-  for x in pre_uni_seq_tre:
-      for y in x:
-          if y[2] not in all_isoforms and len(y[2].split("_"))>2:
-             all_isoforms.append(y[2])
-             all_isoforms.append(y[9])
-
-################################################################################################################################################################################################
-
-def deseqe2(sample,mir_names,l,new_d,sample_name,sample_order):
-
-    for y in mir_names:
-        flag=0
-        for x in sample:
-            if y[0]==x[0]:
-               flag=1
-               break
-        if flag==0:
-           sample.append([y[0],"0",y[1]])
-
-    sample.sort(key=lambda x: x[0])
-    sample=list(sample for sample,_ in itertools.groupby(sample))
-
-    l.acquire()
-    new_d.append(sample)
-    sample_order.append(sample_name)
-    l.release()
-
-###############################################################################################################################################################################################
-
diff -r d2eea02053a0 -r d9b2a7df9d4b mirgene_graphs.py
--- a/mirgene_graphs.py	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,847 +0,0 @@
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-
-#########################################################################################################################
-def pie_non_temp(merge_LH2E,merge_non_LH2E,merge_LH8E,merge_non_LH8E,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-    c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH2E]
-    t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    c_non = len(c_non_samples)
-    c_non_counts = sum(x[2] for x in c_non_samples)
-    t_non = len(t_non_samples)
-    t_non_counts = sum(x[2] for x in t_non_samples)
-
-    c_unmap = c_unmap - c_non
-    t_unmap = c_unmap - t_non
-
-    c_unmap_counts=c_unmap_counts - c_non_counts
-    t_unmap_counts=t_unmap_counts - t_non_counts
-
-
-    for x in c_samples:
-
-        if "/" not in x[0]:
-            if len(x[0].split("_"))==2:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if len(y.split("_"))==2:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if len(x[0].split("_"))==2:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if len(y.split("_"))==2:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-    
-    fig = plt.figure(figsize=(7,5))
-    
-    labels = 'miRNA RefSeq','Template', 'Unmapped','Non-template'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    # Plot
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control Group (reads)',fontsize=12)
-
-    labels = 'miRNA RefSeq','Template', 'Unmapped','non-template'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
-    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
-    # Plot
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated Group (reads)', fontsize=12)
-    plt.savefig('pie_non.png',dpi=300)
-
-####################################################################################################################################################################################################################
-
-def pie_temp(merge_LH2E,c_unmap,c_unmap_counts,merge_LH8E,t_unmap,t_unmap_counts):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-    t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-    c_templ = 0
-    c_tem_counts = 0
-    c_mature = 0
-    c_mat_counts = 0
-    t_templ = 0
-    t_tem_counts = 0
-    t_mature = 0
-    t_mat_counts = 0
-
-    for x in c_samples:
-
-        if "/" not in x[0]:
-            if len(x[0].split("_"))==2:
-               c_mature+=1
-               c_mat_counts += x[2]
-            else:
-               c_templ+=1
-               c_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if len(y.split("_"))==2:
-                  c_mature+=1
-                  c_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              c_templ+=1
-              c_tem_counts += x[2]
-
-    for x in t_samples:
-
-        if "/" not in x[0]:
-            if len(x[0].split("_"))==2:
-               t_mature+=1
-               t_mat_counts += x[2]
-            else:
-               t_templ+=1
-               t_tem_counts += x[2]
-        else:
-           f=0
-           for y in x[0].split("/"):
-               if len(y.split("_"))==2:
-                  t_mature+=1
-                  t_mat_counts += x[2]
-                  f=1
-                  break
-           if f==0:
-              t_templ+=1
-              t_tem_counts += x[2]
-
-
-    fig = plt.figure()
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    # Plot
-    ax1 = plt.subplot2grid((1,2),(0,0))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Control group (reads)', fontsize=12)
-    labels = 'miRNA RefSeq','Template', 'Unmapped'
-    sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
-    colors = ['gold', 'yellowgreen', 'lightskyblue']
-    explode = (0.2, 0.05, 0.1)
-    # Plot
-    ax2 = plt.subplot2grid((1,2),(0,1))
-    patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
-    [x.set_fontsize(8) for x in texts]
-    plt.title('Treated group (reads)',fontsize = 12)
-    plt.savefig('pie_tem.png',dpi=300)
-
-###################################################################################################################################################################################################################
-
-def make_spider(merge_LH2E,merge_LH8E):
-
-  c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-  t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH8E]
-
-  c_5 = 0
-  c_5_counts = 0
-  c_3 = 0
-  c_3_counts = 0
-  c_both =0
-  c_both_counts=0
-  c_mature = 0
-  c_mat_counts = 0
-  c_exception=0
-  c_exception_counts=0
-
-
-  t_5 = 0
-  t_5_counts = 0
-  t_3 = 0
-  t_3_counts = 0
-  t_both = 0
-  t_both_counts = 0
-  t_mature = 0
-  t_mat_counts = 0
-  t_exception = 0
-  t_exception_counts=0
-
-  for x in c_samples:
-
-      if "/" not in x[0]:
-          if len(x[0].split("_"))==2:
-             c_mature+=1
-             c_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[3]):
-             c_5+=1
-             c_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[2]):
-             c_3+=1
-             c_3_counts += x[2]
-          else:
-             c_both+=1
-             c_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if len(y.split("_"))==2:
-                c_mature+=1
-                c_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1]
-            num=0
-            for y in x[0].split("/"):
-                if y.split("_")[-2]+"_"+y.split("_")[-1]==part:
-                   num+=1
-            if num==len(x[0].split("/")):
-
-               if  0 == int(x[0].split("/")[0].split("_")[3]):
-                   c_5+=1
-                   c_5_counts += x[2]
-               elif 0 == int(x[0].split("/")[0].split("_")[2]):
-                    c_3+=1
-                    c_3_counts += x[2]
-               else:
-                    c_both+=1
-                    c_both_counts+=x[2]
-            else:
-               c_exception+=1
-               c_exception_counts += x[2]
-
-
-  for x in t_samples:
-
-      if "/" not in x[0]:
-          if len(x[0].split("_"))==2:
-             t_mature+=1
-             t_mat_counts += x[2]
-          elif 0 == int(x[0].split("_")[3]):
-             t_5+=1
-             t_5_counts += x[2]
-          elif 0 == int(x[0].split("_")[2]):
-             t_3+=1
-             t_3_counts += x[2]
-          else:
-             t_both+=1
-             t_both_counts+=x[2]
-
-      else:
-         f=0
-         for y in x[0].split("/"):
-             if len(y.split("_"))==2:
-                t_mature+=1
-                t_mat_counts += x[2]
-                f=1
-                break
-         if f==0:
-            part=x[0].split("/")[0].split("_")[-2]+"_"+x[0].split("/")[0].split("_")[-1]
-            num=0
-            for y in x[0].split("/"):
-                if y.split("_")[-2]+"_"+y.split("_")[-1]==part:
-                   num+=1
-            if num==len(x[0].split("/")):
-
-               if  0 == int(x[0].split("/")[0].split("_")[3]):
-                   t_5+=1
-                   t_5_counts += x[2]
-               elif 0 == int(x[0].split("/")[0].split("_")[2]):
-                    t_3+=1
-                    t_3_counts += x[2]
-               else:
-                    t_both+=1
-                    t_both_counts+=x[2]
-            else:
-               t_exception+=1
-               t_exception_counts += x[2]
-
-
-  c_all = c_5+c_3+c_both+c_mature+c_exception
-  c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
-
-  t_all = t_5+t_3+t_both+t_mature + t_exception
-  t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
-
-  c_5 = round(c_5/c_all*100,2)
-  c_3 = round(c_3/c_all*100,2)
-  c_both = round(c_both/c_all*100,2)
-  c_mature = round(c_mature/c_all*100,2)
-  c_exception = round(c_exception/c_all*100,2)
-
-  c_5_counts = round(c_5_counts/c_all_counts*100,2)
-  c_3_counts = round(c_3_counts/c_all_counts*100,2)
-  c_both_counts = round(c_both_counts/c_all_counts*100,2)
-  c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
-  c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
-
-  t_5 = round(t_5/t_all*100,2)
-  t_3 = round(t_3/t_all*100,2)
-  t_both = round(t_both/t_all*100,2)
-  t_mature = round(t_mature/t_all*100,2)
-  t_exception = round(t_exception/t_all*100,2)
-
-  t_5_counts = round(t_5_counts/t_all_counts*100,2)
-  t_3_counts = round(t_3_counts/t_all_counts*100,2)
-  t_both_counts = round(t_both_counts/t_all_counts*100,2)
-  t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
-  t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
-
-  radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
-  radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
-
-  df=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both,t_both],
-  """3' isomiRs""":[c_3,t_3],
-  'miRNA RefSeq':[c_mature,t_mature],
-  """5' isomiRs""":[c_5,t_5],
-  'Others*':[c_exception,t_exception]})
-
-  df1=pd.DataFrame({
-  'group':['Controls','Treated'],
-  """5' and 3' isomiRs""":[c_both_counts,t_both_counts],
-  """3' isomiRs""":[c_3_counts,t_3_counts],
-  'miRNA RefSeq':[c_mat_counts,t_mat_counts],
-  """5' isomiRs""":[c_5_counts,t_5_counts],
-  'Others*':[c_exception_counts,t_exception_counts]})
-  
-  spider_last(df,radar_max,1)
-  spider_last(df1,radar_max_counts,2)
-
-
-##############################################################################################################################################################################################################
-
-def spider_last(df,radar_max,flag):
-  # ------- PART 1: Create background
-  fig = plt.figure()
-  # number of variable
-  categories=list(df)[1:]
-  N = len(categories)
-
-  # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
-  angles = [n / float(N) * 2 * pi for n in range(N)]
-  angles += angles[:1]
-
-  # Initialise the spider plot
-  ax = plt.subplot(111, polar=True)
-
-  # If you want the first axis to be on top:
-  ax.set_theta_offset(pi/2)
-  ax.set_theta_direction(-1)
-
-  # Draw one axe per variable + add labels labels yet
-  plt.xticks(angles[:-1], categories, fontsize=11)
-
-  # Draw ylabels
-  radar_max=round(radar_max+radar_max*0.1)
-  mul=len(str(radar_max))-1
-  maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
-  sep = round(maxi/4)
-  plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
-  plt.ylim(0, maxi)
-
-  # ------- PART 2: Add plots
-
-  # Plot each individual = each line of the data
-  # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
-
-  # Ind1
-  values=df.loc[0].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label="Controls")
-  ax.fill(angles, values, 'b', alpha=0.1)
-
-  # Ind2
-  values=df.loc[1].drop('group').values.flatten().tolist()
-  values += values[:1]
-  ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label="Treated")
-  ax.fill(angles, values, 'r', alpha=0.1)
-
-  # Add legend
-  if flag==1:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_non_red.png',dpi=300)
-  else:
-     plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
-     plt.savefig('spider_red.png',dpi=300)
-
-#############################################################################################################################################################################################################
-
-def hist_red(samples,flag):
-
-    lengths=[]
-    cat=[]
-    total_reads=0
-    seq=[]
-
-    if flag == "c":
-        title = "Length Distribution of Control group (Redudant reads)"
-    if flag == "t":
-        title = "Length Distribution of Treated group (Redudant reads)"
-
-    for i in samples:
-        for x in i:
-            lengths.append(len(x[9]))
-            if x[1]=="0":
-               seq.append([x[9],x[0].split("-")[1],"Mapped"])
-               cat.append("Mapped")
-            if x[1] == "4":
-               seq.append([x[9],x[0].split("-")[1],"Unmapped"])
-               cat.append("Unmapped")
-
-    uni_len=list(set(lengths))
-    uni_len=[x for x in uni_len if x<=35]
-    low=min(lengths)
-    up=max(lengths)
-    seq.sort()
-    uni_seq=list(seq for seq,_ in itertools.groupby(seq))
-    dim=up-low
-
-    if dim>20:
-       s=5
-    else:
-       s=8
-    total_reads+=sum([int(x[1]) for x in uni_seq])
-
-    map_reads=[]
-    unmap_reads=[]
-    length=[]
-    for y in uni_len:
-        map_temp=0
-        unmap_temp=0
-        for x in uni_seq:
-            if len(x[0])==y and x[2]=="Mapped":
-               map_temp+=int(x[1])
-            if len(x[0])==y and x[2]=="Unmapped":
-               unmap_temp+=int(x[1])
-        if y<=35:
-           length.append(y)
-           map_reads.append(round(map_temp/total_reads*100,2))
-           unmap_reads.append(round(unmap_temp/total_reads*100,2))
-
-    ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
-    ylim=ylim+ylim*20/100
-    fig, ax = plt.subplots()
-    width=0.8
-    ax.bar(length, unmap_reads, width, label='Unmapped')
-    h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
-    plt.xticks(np.arange(length[0], length[-1]+1, 1))
-    plt.xlabel('Length (nt)',fontsize=14)
-    plt.ylabel('Percentage',fontsize=14)
-    plt.title(title,fontsize=14)
-    ax.legend()
-    plt.ylim([0, ylim])
-    ax.grid(axis='y',linewidth=0.2)
-
-    if flag=='c':
-       plt.savefig('c_hist_red.png',dpi=300)
-
-    if flag=='t':
-       plt.savefig('t_hist_red.png',dpi=300)
-
-##############################################################################################################################################################################################################################################
-
-def logo_seq_red(merge, flag):
-
-    if flag=="c":
-       titlos="Control group (Redundant)"
-       file_logo="c_logo.png"
-       file_bar="c_bar.png"
-    if flag=="t":
-       titlos="Treated group (Redundant)"
-       file_logo="t_logo.png"
-       file_bar="t_bar.png"
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-    total_reads=0
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            total_reads+=best[2]
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=best[2]
-                    elif best[1][i] == "C":
-                        C[i]+=best[2]
-                    elif best[1][i] == "G":
-                        G[i]+=best[2]
-                    else:
-                        T[i]+=best[2]
-        else:
-            total_reads+=y[2]
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=(y[2])
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=(y[2])
-                    else:
-                        T[i]+=y[2]
-
-    A[:] = [round(x*100,1) / total_reads for x in A]
-    C[:] = [round(x*100,1) / total_reads for x in C]
-    G[:] = [round(x*100,1) / total_reads for x in G]
-    T[:] = [round(x*100,1) / total_reads for x in T]
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-
-    h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
-    h.grid(axis='y',linewidth=0.2)
-    plt.xticks(rotation=0, ha="right")
-    plt.ylabel("Counts (%)",fontsize=18)
-    plt.xlabel("Positions (nt)",fontsize=18)
-    plt.title(titlos,fontsize=20)
-    plt.tight_layout()
-    plt.savefig(file_bar, dpi=300)
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-    crp_logo.ax.set_title(titlos,fontsize=18)
-    crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)",fontsize=16, labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    figure = plt.gcf()
-    figure.set_size_inches(6, 4)
-    crp_logo.fig.savefig(file_logo,dpi=300)
-
-##########################################################################################################################################################################################################
-
-def logo_seq_non_red(merge_LH2E):
-
-    c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_LH2E]
-
-    A=[0]*5
-    C=[0]*5
-    G=[0]*5
-    T=[0]*5
-
-    for y in c_samples:
-        if "/" in y[0]:
-            length=[]
-            for x in y[0].split("/"):
-                length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
-
-            best=min(length)
-            for i in range(5):
-                if i<len(best[1]):
-                    if best[1][i] == "A":
-                        A[i]+=1
-                    elif best[1][i] == "C":
-                        C[i]+=1
-                    elif best[1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-        else:
-            for i in range(5):
-                if i<len(y[0].split("_")[-1]):
-                    if y[0].split("_")[-1][i] == "A":
-                        A[i]+=1
-                    elif y[0].split("_")[-1][i] == "C":
-                        C[i]+=1
-                    elif y[0].split("_")[-1][i] == "G":
-                        G[i]+=1
-                    else:
-                        T[i]+=1
-
-    data = {'A':A,'C':C,'G':G,'T':T}
-    df = pd.DataFrame(data, index=[1,2,3,4,5])
-    h=df.plot.bar(title="Non-templated nucleotides after templated sequence",color=tuple(["g", "b","gold","r"]))
-    h.set_xlabel("Positions (nt)")
-    h.set_ylabel("Unique sequences")
-    plt.xticks(rotation=0, ha="right")
-    plt.tight_layout()
-    plt.savefig("bar2.png", dpi=300)
-
-
-    import logomaker as lm
-    crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
-
-    crp_logo.style_spines(visible=False)
-    crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
-    crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
-
-    crp_logo.ax.set_ylabel("Unique sequences", labelpad=5)
-    crp_logo.ax.set_xlabel("Positions (nt)", labelpad=5)
-    crp_logo.ax.xaxis.set_ticks_position('none')
-    crp_logo.ax.xaxis.set_tick_params(pad=-1)
-    crp_logo.ax.set_title("Non-redundant")
-    figure = plt.gcf()
-    crp_logo.fig.savefig('logo2.png', dpi=300)
-
-
-################################################################################################################################################################################################################
-
-
-
-
-
-################################################################################################################################################################
-
-def pdf_before_DE(analysis):
-
-  # Import FPDF class
-  from fpdf import FPDF, fpdf
-
-  # Import glob module to find all the files matching a pattern
-  import glob
-
-  # Image extensions
-  if analysis=="2":
-     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
-  else:
-     image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
-  # This list will hold the images file names
-  images = []
-
-  # Build the image list by merging the glob results (a list of files)
-  # for each extension. We are taking images from current folder.
-  for extension in image_extensions:
-      images.extend(glob.glob(extension))
-  #sys.exit(images)
-  # Create instance of FPDF class
-  pdf = FPDF('P', 'in', 'A4')
-  # Add new page. Without this you cannot create the document.
-  pdf.add_page()
-  # Set font to Arial, 'B'old, 16 pts
-  pdf.set_font('Arial', 'B', 20.0)
-
-  # Page header
-  pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C')
-  pdf.ln(0.7)
-  pdf.set_font('Arial','', 16.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'sRNA Length Distribution',align='C')
-
-  # Smaller font for image captions
-  pdf.set_font('Arial', '', 11.0)
-
-  # Image caption
-  pdf.ln(0.5)
-
-  yh=FPDF.get_y(pdf)
-  pdf.image(images[0],x=0.3,w=4, h=3)
-  pdf.image(images[1],x=4,y=yh, w=4, h=3)
-  pdf.ln(0.3)
-
-  # Image caption
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in Control (left)")
-  pdf.ln(0.2)
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  and Treated (right) groups")
-
-
-  pdf.ln(0.5)
-  h1=FPDF.get_y(pdf)
-  pdf.image(images[2],x=1, w=6.5, h=5)
-  h2=FPDF.get_y(pdf)
-  FPDF.set_y(pdf,h1+0.2)
-  pdf.set_font('Arial','', 14.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'Template and non-template IsomiRs',align='C')
-  pdf.set_font('Arial', '', 11.0)
-  FPDF.set_y(pdf,h2)
-  FPDF.set_y(pdf,9.5)
-  # Image caption
-  pdf.cell(0.2)
-  if analysis=="2":
-     pdf.cell(3.0, 0.0, "  Template, non-template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads")
-     pdf.ln(0.2)
-     pdf.cell(0.2)
-     pdf.cell(3.0, 0.0, "  in Control (left) and treated (right) groups")
-  else:
-     pdf.cell(3.0, 0.0, "  Template, miRNA RefSeq and unmapped sequences as percentage of total sRNA reads in")
-     pdf.ln(0.2)
-     pdf.cell(0.2)
-     pdf.cell(3.0, 0.0, "  Control (left) and treated (right) groups")
-
-
-
-  pdf.add_page()
-  pdf.set_font('Arial', 'B', 16.0)
-  pdf.cell(pdf.w-0.5, 0.5, "Reference form and isomiR among total miRNA reads",align='C')
-  pdf.ln(0.7)
-  pdf.set_font('Arial', 'B', 12.0)
-  pdf.cell(pdf.w-0.5, 0.5, "Template isomiR profile (redundant)",align='C')
-  pdf.ln(0.5)
-  pdf.image(images[3],x=1.5, w=5.5, h=4)
-  pdf.ln(0.6)
-  pdf.cell(pdf.w-0.5, 0.0, "Template isomiR profile (non-redundant)",align='C')
-  pdf.set_font('Arial', '', 12.0)
-  pdf.ln(0.2)
-  pdf.image(images[4],x=1.5, w=5.5, h=4)
-  pdf.ln(0.3)
-  pdf.set_font('Arial', '', 11.0)
-  pdf.cell(0.2)
-  pdf.cell(3.0, 0.0, "  * IsomiRs potentialy initiated from multiple loci")
-
-
-  if analysis=="2":
-     pdf.add_page('L')
-
-     pdf.set_font('Arial', 'B', 16.0)
-     pdf.cell(pdf.w-0.5, 0.5, "Non-template IsomiRs",align='C')
-     pdf.ln(0.5)
-     pdf.set_font('Arial', 'B', 12.0)
-     pdf.cell(pdf.w-0.5, 0.5, "3' Additions of reference of isomiR sequence",align='C')
-     pdf.ln(0.7)
-
-     yh=FPDF.get_y(pdf)
-     pdf.image(images[5],x=1.5,w=3.65, h=2.65)
-     pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
-     pdf.ln(0.5)
-     yh=FPDF.get_y(pdf)
-     pdf.image(images[6],x=1.5,w=3.65, h=2.65)
-     pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
-
-  pdf.close()
-  pdf.output('report1.pdf','F')
-
-
-
-
-#############################################################################################################################################################3
-
-def pdf_after_DE(analysis):
-
-  # Import FPDF class
-  from fpdf import FPDF
-
-  # Import glob module to find all the files matching a pattern
-  import glob
-
-  # Image extensions
-  image_extensions = ("tem.png","non.png","a2.png")
-
-  # This list will hold the images file names
-  images = []
-
-  # Build the image list by merging the glob results (a list of files)
-  # for each extension. We are taking images from current folder.
-  for extension in image_extensions:
-      images.extend(glob.glob(extension))
-
-  # Create instance of FPDF class
-  pdf = FPDF('P', 'in', 'letter')
-  # Add new page. Without this you cannot create the document.
-  pdf.add_page()
-  # Set font to Arial, 'B'old, 16 pts
-  pdf.set_font('Arial', 'B', 16.0)
-
-  # Page header
-  pdf.cell(pdf.w-0.5, 0.5, 'Differential expression of miRNAs and Isoforms',align='C')
-  #pdf.ln(0.25)
-
-  pdf.ln(0.7)
-  pdf.set_font('Arial','B', 12.0)
-  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNA and template isoforms',align='C')
-
-
-  # Smaller font for image captions
-  pdf.set_font('Arial', '', 10.0)
-
-  # Image caption 
-  pdf.ln(0.4)
-  pdf.image(images[0],x=0.8, w=7, h=8)
-  pdf.ln(0.3)
-
-  if analysis=="2":
-
-     pdf.add_page()
-     pdf.ln(0.7)
-     pdf.set_font('Arial','B', 12.0)
-     pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed non-template isomiRs',align='C')
-     pdf.ln(0.4)
-     pdf.image(images[1],x=0.5, w=7.5, h=6.5)
-
-  pdf.add_page()
-  pdf.ln(0.5)
-  pdf.cell(pdf.w-0.5, 0.5, 'Top 50 most differentially expressed miRNAs and isomiRs grouped by arm',align='C')
-  pdf.ln(0.4)
-  pdf.image(images[2],x=0.8, w=7, h=8)
-  pdf.ln(0.3)
-
-
-  pdf.output('report2.pdf', 'F')
-
-
-
diff -r d2eea02053a0 -r d9b2a7df9d4b mirgene_ultra_v2.py
--- a/mirgene_ultra_v2.py	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,365 +0,0 @@
-from mirgene_functions import *
-from mirgene_graphs import *
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
-parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
-parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
-parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
-parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
-parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
-parser.add_argument("-f", "--flag", help="choose the database", action="store")
-parser.add_argument("-umis", "--umi", help="choose the database", action="store")
-parser.add_argument("-percentage", "--per", help="choose the database", action="store")
-parser.add_argument("-counts", "--count", help="choose the database", action="store")
-parser.add_argument("-name1", "--n1", help="choose the database", action="store")
-parser.add_argument("-name2", "--n2", help="choose the database", action="store")
-args = parser.parse_args()
-
-#########################################################################3###############################################################################################################################################################################################
-
-if __name__ == '__main__':
-
- starttime = time.time()
-
- q1 = Queue()
- q2 = Queue()
- lock = Lock()
- manager = Manager()
-
- mature_mirnas=manager.list()
- ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
- ps_mature.start()
-
- args.control[0]=args.control[0][1:]
- args.control[len(args.control)-1][:-1]
- control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
-
- args.treated[0]=args.treated[0][1:]
- args.treated[len(args.treated)-1][:-1]
- treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
-
-
-##############  Detection of templated isoforms ################
-
- radar = manager.list([0,0,0,0])
- samples = manager.list()
- data= manager.list()
- names_con=manager.list()
- samples_mirna_names=manager.list()
- deseq=manager.list()
- unmap_seq=manager.Value('i',0)
- unmap_counts=manager.Value('i',0)
- LH2E_names=manager.list()
- ini_c_samples = manager.list()
-
-
- radar1 = manager.list([0,0,0,0])
- samples1 = manager.list()
- data1 = manager.list()
- names_tre = manager.list()
- samples_mirna_names1=manager.list()
- deseq1=manager.list()
- unmap1_seq = manager.Value('i',0)
- unmap1_counts = manager.Value('i',0)
- LH8E_names=manager.list()
- ini_t_samples = manager.list()
- ps_mature.join()
-
-
- mature_mirnas=list(mature_mirnas)
-
- starttime1 = time.time()
- ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
- ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
-
- [p.start() for p in ps_sam]
- [p.join() for p in ps_sam]
- print('SAM took {} seconds'.format(time.time() - starttime1))
-
- ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
- ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
- [x.start() for x in ps_hist]
-
- starttime200=time.time()
- sc = list(samples)
- st = list(samples1)
-
- names_con=list(names_con)
- names_tre=list(names_tre)
- samples_mirna_names=list(samples_mirna_names)
- samples_mirna_names.sort()
- samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
-
- samples_mirna_names1=list(samples_mirna_names1)
- samples_mirna_names1.sort()
- samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
-
- deseq=list(deseq)
- deseq1=list(deseq1)
-
- new_names_con=manager.list()
- new_names_tre=manager.list()
- new_deseq=manager.list()
- new_deseq1=manager.list()
- ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
- ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
-
- [z.start() for z in ps_deseq]
- [z.join() for z in ps_deseq]
- new_deseq=list(new_deseq)
- new_deseq1=list(new_deseq1)
-
- LH2E=[[x[0],x[2]] for x in new_deseq[0]]
- [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
-
- LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
- [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
-
- print('Deseq took {} seconds'.format(time.time() - starttime200))
-
- merg_nam_LH2E=manager.list()
- merg_nam_LH8E=manager.list()
-
- LH2E_copy=copy.deepcopy(list(LH2E))
- LH8E_copy=copy.deepcopy(list(LH8E))
-
- fil_sort_tre=manager.list()
- fil_sort_con=manager.list()
- raw_sort_tre=manager.list()
- raw_sort_con=manager.list()
-
- ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con,args.per,args.count))
- ps_main.start()
-
- if args.anal=="2":
-    all_iso = manager.list()
-    ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
-    ps_non_iso.start()
-
- ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
- ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
- [x.start() for x in ps_merge]
- [x.join() for x in ps_merge]
-
- merg_nam_LH2E=list(merg_nam_LH2E)
- merg_nam_LH8E=list(merg_nam_LH8E)
-
- starttime2 = time.time()
- procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
- procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
- procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
- if args.anal == "1":
-    procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
-
- [p.start() for p in procs]
-
- if args.anal=="1":
-    [x.join() for x in ps_hist]
-    [p.join() for p in procs]
-    ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
-    ps_pdf.start()
-
- print('Graphs took {} seconds'.format(time.time() - starttime2))
-
- ps_main.join()
-
- fil_sort_con=list(fil_sort_con)
- fil_sort_tre=list(fil_sort_tre)
- if fil_sort_con==[]:
-    fil_sort_con=raw_sort_con
-    fil_sort_tre=raw_sort_tre
-
- raw_sort_con=list(raw_sort_con)
- raw_sort_tre=list(raw_sort_tre)
- names_con=list(new_names_con)
- names_tre=list(new_names_tre)
-
- ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1,args.per,args.n1,args.n2))
- ps_write.start()
-
- ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
- ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
- [p.start() for p in ps1_matrix]
-
- if args.anal=="1":
-    ps_pdf.join()
- if args.anal=="2":
-    [p.join() for p in procs]
-    [x.join() for x in ps_hist]
-
- ps_write.join()
- [p.join() for p in ps1_matrix]
-
-############################## Detection of Both  #######################################
-
- starttime10 = time.time()
-
- if args.anal == "2":
-
-  n_data= manager.list()
-  n_names_con=manager.list()
-  n_samples_mirna_names=manager.list()
-  n_deseq=manager.list()
-  n_LH2E_names=manager.list()
-
-  n_data1 = manager.list()
-  n_names_tre = manager.list()
-  n_samples_mirna_names1=manager.list()
-  n_deseq1=manager.list()
-  n_LH8E_names=manager.list()
-
-  new_mat_mirnas = list(mature_mirnas)
-  ps_non_iso.join()
-
-  all_iso=list(all_iso)
-  new_mat_mirnas.extend(all_iso)
-
-  starttime11=time.time()
-
-  ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
-  ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
-
-  [p.start() for p in ps_sam]
-  [p.join() for p in ps_sam]
-
-  print('Non-sam took {} seconds'.format(time.time() - starttime11))
-
-  starttime12=time.time()
-
-  n_names_con=list(n_names_con)
-  n_names_tre=list(n_names_tre)
-  n_samples_mirna_names=list(n_samples_mirna_names)
-  n_samples_mirna_names.sort()
-  n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
-
-
-  n_samples_mirna_names1=list(n_samples_mirna_names1)
-  n_samples_mirna_names1.sort()
-  n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
-
-  n_deseq=list(n_deseq)
-  n_deseq1=list(n_deseq1)
-
-  new_n_names_con=manager.list()
-  new_n_names_tre=manager.list()
-  n_new_deseq=manager.list()
-  n_new_deseq1=manager.list()
-  ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
-  ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
-
-  [x.start() for x in ps_deseq]
-  [x.join() for x in ps_deseq]
-  n_new_deseq=list(n_new_deseq)
-  n_new_deseq1=list(n_new_deseq1)
-
-  n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
-  [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
-
-  n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
-  [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
-
-  print('Non-deseq took {} seconds'.format(time.time() - starttime12))
-
-  merg_nam_n_LH2E=manager.list()
-  merg_nam_n_LH8E=manager.list()
-
-  n_LH2E_copy=copy.deepcopy(list(n_LH2E))
-  n_LH8E_copy=copy.deepcopy(list(n_LH8E))
-
-  n_fil_sort_con=manager.list()
-  n_fil_sort_tre=manager.list()
-  n_raw_sort_con=manager.list()
-  n_raw_sort_tre=manager.list()
-
-  ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con,args.per,args.count))
-  ps_main.start()
-
-  starttime14=time.time()
-  ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
-  ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
-  [p.start() for p in ps_merge]
-  [p.join() for p in ps_merge]
-
-  merg_nam_n_LH2E=list(merg_nam_n_LH2E)
-  merg_nam_n_LH8E=list(merg_nam_n_LH8E)
-
-  print('Merging took {} seconds'.format(time.time() - starttime14))
-
-  procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
-  procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
-  procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
-
-
-  starttime13=time.time()
-  [p.start() for p in procs]
-  [p.join() for p in procs]
-
-  print('Graphs took {} seconds'.format(time.time() - starttime13))
-
-  procs1 = Process(target=pdf_before_DE,args=(args.anal))
-  procs1.start()
-  starttime16=time.time()
-  ps_main.join()
-  print('Main took {} seconds'.format(time.time() - starttime16))
-
-  starttime15=time.time()
-  n_fil_sort_con=list(n_fil_sort_con)
-  n_fil_sort_tre=list(n_fil_sort_tre)
-  if n_fil_sort_con==[]:
-     n_fil_sort_con=n_raw_sort_con
-     n_fil_sort_tre=n_raw_sort_tre
-
-  n_raw_sort_con=list(n_raw_sort_con)
-  n_raw_sort_tre=list(n_raw_sort_tre)
-  n_names_con=list(new_n_names_con)
-  n_names_tre=list(new_n_names_tre)
-
-  ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2,args.per,args.n1,args.n2))
-  ps_write.start()
-
-  ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
-  ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
-  [p.start() for p in ps1_matrix]
-
-  ps_write.join()
-  [p.join() for p in ps1_matrix]
-  procs1.join()
-  print('Files took {} seconds'.format(time.time() - starttime15))
- print('That took {} seconds'.format(time.time() - starttime10))
- print('That took {} seconds'.format(time.time() - starttime))
-
-
-
-
-
-
-
-
diff -r d2eea02053a0 -r d9b2a7df9d4b n_spiecies.loc.sample
--- a/n_spiecies.loc.sample	Wed Oct 28 08:13:30 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,300 +0,0 @@
-# bowtie2_indices.loc.sample
-# This is a *.loc.sample file distributed with Galaxy that enables tools
-# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2.
-# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
-# First create these data files and save them in your own data directory structure.
-# Then, create a bowtie_indices.loc file to use those indexes with tools.
-# Copy this file, save it with the same name (minus the .sample), 
-# follow the format examples, and store the result in this directory.
-# The file should include an one line entry for each index set.
-# The path points to the "basename" for the set, not a specific file.
-# It has four text columns seperated by TABS.
-#
-# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
-#
-# So, for example, if you had hg18 indexes stored in:
-#
-#    /depot/data2/galaxy/hg19/bowtie2/
-#
-# containing hg19 genome and hg19.*.bt2 files, such as:
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.fa
-#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 18:56 hg19canon.2.bt2
-#    -rw-rw-r-- 1 james   james   3.3K Feb 10 16:54 hg19canon.3.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 16:54 hg19canon.4.bt2
-#    -rw-rw-r-- 1 james   james   914M Feb 10 20:45 hg19canon.rev.1.bt2
-#    -rw-rw-r-- 1 james   james   683M Feb 10 20:45 hg19canon.rev.2.bt2
-#
-# then the bowtie2_indices.loc entry could look like this:
-#
-Acacia auriculiformis	Acacia auriculiformis	Acacia auriculiformis
-Acacia mangium	Acacia mangium	Acacia mangium
-Acyrthosiphon pisum	Acyrthosiphon pisum	Acyrthosiphon pisum
-Aedes aegypti	Aedes aegypti	Aedes aegypti
-Aegilops tauschii	Aegilops tauschii	Aegilops tauschii
-Alligator mississippiensis	Alligator mississippiensis	Alligator mississippiensis
-Amborella trichopoda	Amborella trichopoda	Amborella trichopoda
-Amphimedon queenslandica	Amphimedon queenslandica	Amphimedon queenslandica
-Anas platyrhynchos	Anas platyrhynchos	Anas platyrhynchos
-Anolis carolinensis	Anolis carolinensis	Anolis carolinensis
-Anopheles gambiae	Anopheles gambiae	Anopheles gambiae
-Apis mellifera	Apis mellifera	Apis mellifera
-Aquilegia caerulea	Aquilegia caerulea	Aquilegia caerulea
-Arabidopsis lyrata	Arabidopsis lyrata	Arabidopsis lyrata
-Arabidopsis thaliana	Arabidopsis thaliana	Arabidopsis thaliana
-Arachis hypogaea	Arachis hypogaea	Arachis hypogaea
-Artibeus jamaicensis	Artibeus jamaicensis	Artibeus jamaicensis
-Ascaris suum	Ascaris suum	Ascaris suum
-Asparagus officinalis	Asparagus officinalis	Asparagus officinalis
-Astatotilapia burtoni	Astatotilapia burtoni	Astatotilapia burtoni
-Ateles geoffroyi	Ateles geoffroyi	Ateles geoffroyi
-Avicennia marina	Avicennia marina	Avicennia marina
-BK polyomavirus	BK polyomavirus	BK polyomavirus
-Bactrocera dorsalis	Bactrocera dorsalis	Bactrocera dorsalis
-Bandicoot papillomatosis	Bandicoot papillomatosis	Bandicoot papillomatosis
-Biston betularia	Biston betularia	Biston betularia
-Bombyx mori	Bombyx mori	Bombyx mori
-Bos taurus	Bos taurus	Bos taurus
-Bovine foamy	Bovine foamy	Bovine foamy
-Bovine herpesvirus	Bovine herpesvirus	Bovine herpesvirus
-Bovine leukemia	Bovine leukemia	Bovine leukemia
-Brachypodium distachyon	Brachypodium distachyon	Brachypodium distachyon
-Branchiostoma belcheri	Branchiostoma belcheri	Branchiostoma belcheri
-Branchiostoma floridae	Branchiostoma floridae	Branchiostoma floridae
-Brassica napus	Brassica napus	Brassica napus
-Brassica oleracea	Brassica oleracea	Brassica oleracea
-Brassica rapa	Brassica rapa	Brassica rapa
-Brugia malayi	Brugia malayi	Brugia malayi
-Bruguiera cylindrica	Bruguiera cylindrica	Bruguiera cylindrica
-Bruguiera gymnorhiza	Bruguiera gymnorhiza	Bruguiera gymnorhiza
-Caenorhabditis brenneri	Caenorhabditis brenneri	Caenorhabditis brenneri
-Caenorhabditis briggsae	Caenorhabditis briggsae	Caenorhabditis briggsae
-Caenorhabditis elegans	Caenorhabditis elegans	Caenorhabditis elegans
-Caenorhabditis remanei	Caenorhabditis remanei	Caenorhabditis remanei
-Callithrix jacchus	Callithrix jacchus	Callithrix jacchus
-Camelina sativa	Camelina sativa	Camelina sativa
-Canis familiaris	Canis familiaris	Canis familiaris
-Capitella teleta	Capitella teleta	Capitella teleta
-Capra hircus	Capra hircus	Capra hircus
-Carica papaya	Carica papaya	Carica papaya
-Cavia porcellus	Cavia porcellus	Cavia porcellus
-Cerebratulus lacteus	Cerebratulus lacteus	Cerebratulus lacteus
-Chlamydomonas reinhardtii	Chlamydomonas reinhardtii	Chlamydomonas reinhardtii
-Chrysemys picta	Chrysemys picta	Chrysemys picta
-Ciona intestinalis	Ciona intestinalis	Ciona intestinalis
-Ciona savignyi	Ciona savignyi	Ciona savignyi
-Citrus clementina	Citrus clementina	Citrus clementina
-Citrus reticulata	Citrus reticulata	Citrus reticulata
-Citrus sinensis	Citrus sinensis	Citrus sinensis
-Citrus trifoliata	Citrus trifoliata	Citrus trifoliata
-Columba livia	Columba livia	Columba livia
-Cricetulus griseus	Cricetulus griseus	Cricetulus griseus
-Cucumis melo	Cucumis melo	Cucumis melo
-Cucumis sativus	Cucumis sativus	Cucumis sativus
-Culex quinquefasciatus	Culex quinquefasciatus	Culex quinquefasciatus
-Cunninghamia lanceolata	Cunninghamia lanceolata	Cunninghamia lanceolata
-Cynara cardunculus	Cynara cardunculus	Cynara cardunculus
-Cyprinus carpio	Cyprinus carpio	Cyprinus carpio
-Danio rerio	Danio rerio	Danio rerio
-Daphnia pulex	Daphnia pulex	Daphnia pulex
-Dasypus novemcinctus	Dasypus novemcinctus	Dasypus novemcinctus
-Daubentonia madagascariensis	Daubentonia madagascariensis	Daubentonia madagascariensis
-Dictyostelium discoideum	Dictyostelium discoideum	Dictyostelium discoideum
-Digitalis purpurea	Digitalis purpurea	Digitalis purpurea
-Dinoponera quadriceps	Dinoponera quadriceps	Dinoponera quadriceps
-Drosophila ananassae	Drosophila ananassae	Drosophila ananassae
-Drosophila erecta	Drosophila erecta	Drosophila erecta
-Drosophila grimshawi	Drosophila grimshawi	Drosophila grimshawi
-Drosophila melanogaster	Drosophila melanogaster	Drosophila melanogaster
-Drosophila mojavensis	Drosophila mojavensis	Drosophila mojavensis
-Drosophila persimilis	Drosophila persimilis	Drosophila persimilis
-Drosophila pseudoobscura	Drosophila pseudoobscura	Drosophila pseudoobscura
-Drosophila sechellia	Drosophila sechellia	Drosophila sechellia
-Drosophila simulans	Drosophila simulans	Drosophila simulans
-Drosophila virilis	Drosophila virilis	Drosophila virilis
-Drosophila willistoni	Drosophila willistoni	Drosophila willistoni
-Drosophila yakuba	Drosophila yakuba	Drosophila yakuba
-Duck enteritis	Duck enteritis	Duck enteritis
-Echinococcus granulosus	Echinococcus granulosus	Echinococcus granulosus
-Echinococcus multilocularis	Echinococcus multilocularis	Echinococcus multilocularis
-Ectocarpus siliculosus	Ectocarpus siliculosus	Ectocarpus siliculosus
-Elaeis guineensis	Elaeis guineensis	Elaeis guineensis
-Electrophorus electricus	Electrophorus electricus	Electrophorus electricus
-Epstein Barr	Epstein Barr	Epstein Barr
-Eptesicus fuscus	Eptesicus fuscus	Eptesicus fuscus
-Equus caballus	Equus caballus	Equus caballus
-Eugenia uniflora	Eugenia uniflora	Eugenia uniflora
-Fasciola hepatica	Fasciola hepatica	Fasciola hepatica
-Festuca arundinacea	Festuca arundinacea	Festuca arundinacea
-Fragaria vesca	Fragaria vesca	Fragaria vesca
-Fugu rubripes	Fugu rubripes	Fugu rubripes
-Gadus morhua	Gadus morhua	Gadus morhua
-Gallus gallus	Gallus gallus	Gallus gallus
-Glottidia pyramidata	Glottidia pyramidata	Glottidia pyramidata
-Glycine max	Glycine max	Glycine max
-Glycine soja	Glycine soja	Glycine soja
-Gorilla gorilla	Gorilla gorilla	Gorilla gorilla
-Gossypium arboreum	Gossypium arboreum	Gossypium arboreum
-Gossypium herbaceum	Gossypium herbaceum	Gossypium herbaceum
-Gossypium hirsutum	Gossypium hirsutum	Gossypium hirsutum
-Gossypium raimondii	Gossypium raimondii	Gossypium raimondii
-Gyrodactylus salaris	Gyrodactylus salaris	Gyrodactylus salaris
-Haemonchus contortus	Haemonchus contortus	Haemonchus contortus
-Haliotis rufescens	Haliotis rufescens	Haliotis rufescens
-Helianthus annuus	Helianthus annuus	Helianthus annuus
-Helianthus argophyllus	Helianthus argophyllus	Helianthus argophyllus
-Helianthus ciliaris	Helianthus ciliaris	Helianthus ciliaris
-Helianthus exilis	Helianthus exilis	Helianthus exilis
-Helianthus paradoxus	Helianthus paradoxus	Helianthus paradoxus
-Helianthus petiolaris	Helianthus petiolaris	Helianthus petiolaris
-Helianthus tuberosus	Helianthus tuberosus	Helianthus tuberosus
-Heliconius melpomene	Heliconius melpomene	Heliconius melpomene
-Heligmosomoides polygyrus	Heligmosomoides polygyrus	Heligmosomoides polygyrus
-Herpes B	Herpes B	Herpes B
-Herpes Simplex	Herpes Simplex	Herpes Simplex
-Herpesvirus of	Herpesvirus of	Herpesvirus of
-Herpesvirus saimiri	Herpesvirus saimiri	Herpesvirus saimiri
-Hevea brasiliensis	Hevea brasiliensis	Hevea brasiliensis
-Hippoglossus hippoglossus	Hippoglossus hippoglossus	Hippoglossus hippoglossus
-Homo sapiens	Homo sapiens	Homo sapiens
-Hordeum vulgare	Hordeum vulgare	Hordeum vulgare
-Human cytomegalovirus	Human cytomegalovirus	Human cytomegalovirus
-Human herpesvirus	Human herpesvirus	Human herpesvirus
-Human immunodeficiency	Human immunodeficiency	Human immunodeficiency
-Hydra magnipapillata	Hydra magnipapillata	Hydra magnipapillata
-Ictalurus punctatus	Ictalurus punctatus	Ictalurus punctatus
-Infectious laryngotracheitis	Infectious laryngotracheitis	Infectious laryngotracheitis
-Ixodes scapularis	Ixodes scapularis	Ixodes scapularis
-JC polyomavirus	JC polyomavirus	JC polyomavirus
-Kaposi sarcoma-associated	Kaposi sarcoma-associated	Kaposi sarcoma-associated
-Lagothrix lagotricha	Lagothrix lagotricha	Lagothrix lagotricha
-Lemur catta	Lemur catta	Lemur catta
-Leucosolenia complicata	Leucosolenia complicata	Leucosolenia complicata
-Linum usitatissimum	Linum usitatissimum	Linum usitatissimum
-Locusta migratoria	Locusta migratoria	Locusta migratoria
-Lottia gigantea	Lottia gigantea	Lottia gigantea
-Lotus japonicus	Lotus japonicus	Lotus japonicus
-Lytechinus variegatus	Lytechinus variegatus	Lytechinus variegatus
-Macaca mulatta	Macaca mulatta	Macaca mulatta
-Macaca nemestrina	Macaca nemestrina	Macaca nemestrina
-Macropus eugenii	Macropus eugenii	Macropus eugenii
-Malus domestica	Malus domestica	Malus domestica
-Manduca sexta	Manduca sexta	Manduca sexta
-Manihot esculenta	Manihot esculenta	Manihot esculenta
-Mareks disease	Mareks disease	Mareks disease
-Marsupenaeus japonicus	Marsupenaeus japonicus	Marsupenaeus japonicus
-Medicago truncatula	Medicago truncatula	Medicago truncatula
-Melibe leonina	Melibe leonina	Melibe leonina
-Merkel cell	Merkel cell	Merkel cell
-Mesocestoides corti	Mesocestoides corti	Mesocestoides corti
-Metriaclima zebra	Metriaclima zebra	Metriaclima zebra
-Microcebus murinus	Microcebus murinus	Microcebus murinus
-Monodelphis domestica	Monodelphis domestica	Monodelphis domestica
-Mouse cytomegalovirus	Mouse cytomegalovirus	Mouse cytomegalovirus
-Mouse gammaherpesvirus	Mouse gammaherpesvirus	Mouse gammaherpesvirus
-Mus musculus	Mus musculus	Mus musculus
-Nasonia giraulti	Nasonia giraulti	Nasonia giraulti
-Nasonia longicornis	Nasonia longicornis	Nasonia longicornis
-Nasonia vitripennis	Nasonia vitripennis	Nasonia vitripennis
-Nematostella vectensis	Nematostella vectensis	Nematostella vectensis
-Neolamprologus brichardi	Neolamprologus brichardi	Neolamprologus brichardi
-Nicotiana tabacum	Nicotiana tabacum	Nicotiana tabacum
-Nomascus leucogenys	Nomascus leucogenys	Nomascus leucogenys
-Oikopleura dioica	Oikopleura dioica	Oikopleura dioica
-Ophiophagus hannah	Ophiophagus hannah	Ophiophagus hannah
-Oreochromis niloticus	Oreochromis niloticus	Oreochromis niloticus
-Ornithorhynchus anatinus	Ornithorhynchus anatinus	Ornithorhynchus anatinus
-Oryctolagus cuniculus	Oryctolagus cuniculus	Oryctolagus cuniculus
-Oryza sativa	Oryza sativa	Oryza sativa
-Oryzias latipes	Oryzias latipes	Oryzias latipes
-Otolemur garnettii	Otolemur garnettii	Otolemur garnettii
-Ovis aries	Ovis aries	Ovis aries
-Paeonia lactiflora	Paeonia lactiflora	Paeonia lactiflora
-Pan paniscus	Pan paniscus	Pan paniscus
-Pan troglodytes	Pan troglodytes	Pan troglodytes
-Panagrellus redivivus	Panagrellus redivivus	Panagrellus redivivus
-Panax ginseng	Panax ginseng	Panax ginseng
-Papio hamadryas	Papio hamadryas	Papio hamadryas
-Paralichthys olivaceus	Paralichthys olivaceus	Paralichthys olivaceus
-Parasteatoda tepidariorum	Parasteatoda tepidariorum	Parasteatoda tepidariorum
-Patiria miniata	Patiria miniata	Patiria miniata
-Petromyzon marinus	Petromyzon marinus	Petromyzon marinus
-Phaeodactylum tricornutum	Phaeodactylum tricornutum	Phaeodactylum tricornutum
-Phaseolus vulgaris	Phaseolus vulgaris	Phaseolus vulgaris
-Physcomitrella patens	Physcomitrella patens	Physcomitrella patens
-Phytophthora infestans	Phytophthora infestans	Phytophthora infestans
-Phytophthora ramorum	Phytophthora ramorum	Phytophthora ramorum
-Phytophthora sojae	Phytophthora sojae	Phytophthora sojae
-Picea abies	Picea abies	Picea abies
-Pinus densata	Pinus densata	Pinus densata
-Pinus taeda	Pinus taeda	Pinus taeda
-Plutella xylostella	Plutella xylostella	Plutella xylostella
-Polistes canadensis	Polistes canadensis	Polistes canadensis
-Pongo pygmaeus	Pongo pygmaeus	Pongo pygmaeus
-Populus euphratica	Populus euphratica	Populus euphratica
-Populus trichocarpa	Populus trichocarpa	Populus trichocarpa
-Pristionchus pacificus	Pristionchus pacificus	Pristionchus pacificus
-Prunus persica	Prunus persica	Prunus persica
-Pseudorabies virus	Pseudorabies virus	Pseudorabies virus
-Pteropus alecto	Pteropus alecto	Pteropus alecto
-Pundamilia nyererei	Pundamilia nyererei	Pundamilia nyererei
-Pygathrix bieti	Pygathrix bieti	Pygathrix bieti
-Python bivittatus	Python bivittatus	Python bivittatus
-Raccoon polyomavirus	Raccoon polyomavirus	Raccoon polyomavirus
-Rattus norvegicus	Rattus norvegicus	Rattus norvegicus
-Rehmannia glutinosa	Rehmannia glutinosa	Rehmannia glutinosa
-Rhesus lymphocryptovirus	Rhesus lymphocryptovirus	Rhesus lymphocryptovirus
-Rhesus monkey	Rhesus monkey	Rhesus monkey
-Rhipicephalus microplus	Rhipicephalus microplus	Rhipicephalus microplus
-Ricinus communis	Ricinus communis	Ricinus communis
-Saccharum officinarum	Saccharum officinarum	Saccharum officinarum
-Saccharum sp.	Saccharum sp.	Saccharum sp.
-Saccoglossus kowalevskii	Saccoglossus kowalevskii	Saccoglossus kowalevskii
-Saguinus labiatus	Saguinus labiatus	Saguinus labiatus
-Saimiri boliviensis	Saimiri boliviensis	Saimiri boliviensis
-Salicornia europaea	Salicornia europaea	Salicornia europaea
-Salmo salar	Salmo salar	Salmo salar
-Salvia miltiorrhiza	Salvia miltiorrhiza	Salvia miltiorrhiza
-Salvia sclarea	Salvia sclarea	Salvia sclarea
-Sarcophilus harrisii	Sarcophilus harrisii	Sarcophilus harrisii
-Schistosoma japonicum	Schistosoma japonicum	Schistosoma japonicum
-Schistosoma mansoni	Schistosoma mansoni	Schistosoma mansoni
-Schmidtea mediterranea	Schmidtea mediterranea	Schmidtea mediterranea
-Selaginella moellendorffii	Selaginella moellendorffii	Selaginella moellendorffii
-Simian foamy	Simian foamy	Simian foamy
-Simian virus	Simian virus	Simian virus
-Solanum lycopersicum	Solanum lycopersicum	Solanum lycopersicum
-Solanum tuberosum	Solanum tuberosum	Solanum tuberosum
-Sorghum bicolor	Sorghum bicolor	Sorghum bicolor
-Spodoptera frugiperda	Spodoptera frugiperda	Spodoptera frugiperda
-Strigamia maritima	Strigamia maritima	Strigamia maritima
-Strongylocentrotus purpuratus	Strongylocentrotus purpuratus	Strongylocentrotus purpuratus
-Strongyloides ratti	Strongyloides ratti	Strongyloides ratti
-Sus scrofa	Sus scrofa	Sus scrofa
-Sycon ciliatum	Sycon ciliatum	Sycon ciliatum
-Symbiodinium microadriaticum	Symbiodinium microadriaticum	Symbiodinium microadriaticum
-Symphalangus syndactylus	Symphalangus syndactylus	Symphalangus syndactylus
-Taeniopygia guttata	Taeniopygia guttata	Taeniopygia guttata
-Terebratulina retusa	Terebratulina retusa	Terebratulina retusa
-Tetranychus urticae	Tetranychus urticae	Tetranychus urticae
-Tetraodon nigroviridis	Tetraodon nigroviridis	Tetraodon nigroviridis
-Theobroma cacao	Theobroma cacao	Theobroma cacao
-Torque teno	Torque teno	Torque teno
-Tribolium castaneum	Tribolium castaneum	Tribolium castaneum
-Triops cancriformis	Triops cancriformis	Triops cancriformis
-Triticum aestivum	Triticum aestivum	Triticum aestivum
-Triticum turgidum	Triticum turgidum	Triticum turgidum
-Tupaia chinensis	Tupaia chinensis	Tupaia chinensis
-Vigna unguiculata	Vigna unguiculata	Vigna unguiculata
-Vitis vinifera	Vitis vinifera	Vitis vinifera
-Vriesea carinata	Vriesea carinata	Vriesea carinata
-Xenopus laevis	Xenopus laevis	Xenopus laevis
-Xenopus tropicalis	Xenopus tropicalis	Xenopus tropicalis
-Xenoturbella bocki	Xenoturbella bocki	Xenoturbella bocki
-Zea mays	Zea mays	Zea mays
-#
-#More examples:
-#
-#mm10	mm10	Mouse (mm10)	/depot/data2/galaxy/mm10/bowtie2/mm10
-#dm3	dm3		D. melanogaster (dm3)	/depot/data2/galaxy/mm10/bowtie2/dm3
-#