Mercurial > repos > glogobyte > isoread
view mirbase_ultra_v2.py @ 18:836894e48f1d draft
Uploaded
author | glogobyte |
---|---|
date | Thu, 22 Oct 2020 07:49:40 +0000 |
parents | |
children |
line wrap: on
line source
from mirbase_functions import * from mirbase_graphs import * import itertools import time import sys import os import urllib.request import gzip from multiprocessing import Process, Queue, Lock, Pool, Manager, Value import subprocess import argparse from collections import OrderedDict from matplotlib.backends.backend_pdf import PdfPages import pandas as pd from math import pi import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import PercentFormatter import seaborn as sns import scipy.stats as stats from plotnine import * import math import re import matplotlib.ticker as mtick import copy subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre']) parser = argparse.ArgumentParser() parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store") parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[]) parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] ) parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") parser.add_argument("-gen", "--org_name", help="tool directory path", action="store") parser.add_argument("-program", "--pro", help="choose type of analysis", action="store") parser.add_argument("-f", "--flag", help="choose the database", action="store") parser.add_argument("-umis", "--umi", help="choose the database", action="store") parser.add_argument("-percentage", "--per", help="choose the database", action="store") parser.add_argument("-counts", "--count", help="choose the database", action="store") parser.add_argument("-name1", "--n1", help="choose the database", action="store") parser.add_argument("-name2", "--n2", help="choose the database", action="store") args = parser.parse_args() ############################################################################################################################################################################################### if __name__ == '__main__': starttime = time.time() q1 = Queue() q2 = Queue() lock = Lock() manager = Manager() mature_mirnas=manager.list() ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name)) ps_mature.start() args.control[0]=args.control[0][1:] args.control[len(args.control)-1][:-1] control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)] args.treated[0]=args.treated[0][1:] args.treated[len(args.treated)-1][:-1] treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)] ############## Detection of templated isoforms ################ radar = manager.list([0,0,0,0]) samples = manager.list() data= manager.list() names_con=manager.list() samples_mirna_names=manager.list() deseq=manager.list() unmap_seq=manager.Value('i',0) unmap_counts=manager.Value('i',0) LH2E_names=manager.list() ini_c_samples = manager.list() radar1 = manager.list([0,0,0,0]) samples1 = manager.list() data1 = manager.list() names_tre = manager.list() samples_mirna_names1=manager.list() deseq1=manager.list() unmap1_seq = manager.Value('i',0) unmap1_counts = manager.Value('i',0) LH8E_names=manager.list() ini_t_samples = manager.list() ps_mature.join() mature_mirnas=list(mature_mirnas) starttime1 = time.time() ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control] ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated]) [p.start() for p in ps_sam] [p.join() for p in ps_sam] print('SAM took {} seconds'.format(time.time() - starttime1)) ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))] ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))]) [x.start() for x in ps_hist] starttime200=time.time() sc = list(samples) st = list(samples1) names_con=list(names_con) names_tre=list(names_tre) samples_mirna_names=list(samples_mirna_names) samples_mirna_names.sort() samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names)) samples_mirna_names1=list(samples_mirna_names1) samples_mirna_names1.sort() samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1)) deseq=list(deseq) deseq1=list(deseq1) new_names_con=manager.list() new_names_tre=manager.list() new_deseq=manager.list() new_deseq1=manager.list() ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)] ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)]) [z.start() for z in ps_deseq] [z.join() for z in ps_deseq] new_deseq=list(new_deseq) new_deseq1=list(new_deseq1) LH2E=[[x[0],x[2]] for x in new_deseq[0]] [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq] LH8E=[[x[0],x[2]] for x in new_deseq1[0]] [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1] print('Deseq took {} seconds'.format(time.time() - starttime200)) merg_nam_LH2E=manager.list() merg_nam_LH8E=manager.list() LH2E_copy=copy.deepcopy(list(LH2E)) LH8E_copy=copy.deepcopy(list(LH8E)) fil_sort_tre=manager.list() fil_sort_con=manager.list() raw_sort_tre=manager.list() raw_sort_con=manager.list() ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con)) ps_main.start() if args.anal=="2": all_iso = manager.list() ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso)) ps_non_iso.start() ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))] ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))]) [x.start() for x in ps_merge] [x.join() for x in ps_merge] merg_nam_LH2E=list(merg_nam_LH2E) merg_nam_LH8E=list(merg_nam_LH8E) starttime2 = time.time() procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data] procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1]) procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))]) if args.anal == "1": procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))]) [p.start() for p in procs] if args.anal=="1": [x.join() for x in ps_hist] [p.join() for p in procs] ps_pdf = Process(target=pdf_before_DE,args=(args.anal)) ps_pdf.start() print('Graphs took {} seconds'.format(time.time() - starttime2)) ps_main.join() fil_sort_con=list(fil_sort_con) fil_sort_tre=list(fil_sort_tre) if fil_sort_con==[]: fil_sort_con=raw_sort_con fil_sort_tre=raw_sort_tre raw_sort_con=list(raw_sort_con) raw_sort_tre=list(raw_sort_tre) names_con=list(new_names_con) names_tre=list(new_names_tre) ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1)) ps_write.start() ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))] ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))]) [p.start() for p in ps1_matrix] if args.anal=="1": ps_pdf.join() if args.anal=="2": [p.join() for p in procs] [x.join() for x in ps_hist] ps_write.join() [p.join() for p in ps1_matrix] ############################## Detection of Both ####################################### starttime10 = time.time() if args.anal == "2": n_data= manager.list() n_names_con=manager.list() n_samples_mirna_names=manager.list() n_deseq=manager.list() n_LH2E_names=manager.list() n_data1 = manager.list() n_names_tre = manager.list() n_samples_mirna_names1=manager.list() n_deseq1=manager.list() n_LH8E_names=manager.list() new_mat_mirnas = list(mature_mirnas) ps_non_iso.join() all_iso=list(all_iso) new_mat_mirnas.extend(all_iso) starttime11=time.time() ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control] ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated]) [p.start() for p in ps_sam] [p.join() for p in ps_sam] print('Non-sam took {} seconds'.format(time.time() - starttime11)) starttime12=time.time() n_names_con=list(n_names_con) n_names_tre=list(n_names_tre) n_samples_mirna_names=list(n_samples_mirna_names) n_samples_mirna_names.sort() n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names)) n_samples_mirna_names1=list(n_samples_mirna_names1) n_samples_mirna_names1.sort() n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1)) n_deseq=list(n_deseq) n_deseq1=list(n_deseq1) new_n_names_con=manager.list() new_n_names_tre=manager.list() n_new_deseq=manager.list() n_new_deseq1=manager.list() ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)] ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)]) [x.start() for x in ps_deseq] [x.join() for x in ps_deseq] n_new_deseq=list(n_new_deseq) n_new_deseq1=list(n_new_deseq1) n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]] [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq] n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]] [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1] print('Non-deseq took {} seconds'.format(time.time() - starttime12)) merg_nam_n_LH2E=manager.list() merg_nam_n_LH8E=manager.list() n_LH2E_copy=copy.deepcopy(list(n_LH2E)) n_LH8E_copy=copy.deepcopy(list(n_LH8E)) n_sort_tre=manager.list() n_sort_con=manager.list() n_fil_sort_con=manager.list() n_fil_sort_tre=manager.list() n_raw_sort_con=manager.list() n_raw_sort_tre=manager.list() ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con)) ps_main.start() ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))] ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))]) [p.start() for p in ps_merge] [p.join() for p in ps_merge] merg_nam_n_LH2E=list(merg_nam_n_LH2E) merg_nam_n_LH8E=list(merg_nam_n_LH8E) procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data] procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1]) procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))]) procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))]) procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))]) starttime13=time.time() [p.start() for p in procs] [p.join() for p in procs] print('Graphs took {} seconds'.format(time.time() - starttime13)) procs1 = Process(target=pdf_before_DE,args=(args.anal)) procs1.start() starttime14=time.time() ps_main.join() n_fil_sort_con=list(n_fil_sort_con) n_fil_sort_tre=list(n_fil_sort_tre) if n_fil_sort_con==[]: n_fil_sort_con=n_raw_sort_con n_fil_sort_tre=n_raw_sort_tre n_raw_sort_con=list(n_raw_sort_con) n_raw_sort_tre=list(n_raw_sort_tre) n_names_con=list(new_n_names_con) n_names_tre=list(new_n_names_tre) ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2)) ps_write.start() ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))] ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))]) [p.start() for p in ps1_matrix] ps_write.join() [p.join() for p in ps1_matrix] procs1.join() print('That took {} seconds'.format(time.time() - starttime10)) print('That took {} seconds'.format(time.time() - starttime))