Mercurial > repos > glogobyte > isoread
view mirgene_ultra_v2.py @ 16:e19c832c5368 draft
Uploaded
author | glogobyte |
---|---|
date | Thu, 22 Oct 2020 07:47:58 +0000 |
parents | 88b3ef865431 |
children |
line wrap: on
line source
from mirgene_functions import * from mirgene_graphs import * import itertools import time import sys import os import urllib.request import gzip from multiprocessing import Process, Queue, Lock, Pool, Manager, Value import subprocess import argparse from collections import OrderedDict from matplotlib.backends.backend_pdf import PdfPages import pandas as pd from math import pi import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import PercentFormatter import seaborn as sns import scipy.stats as stats from plotnine import * import math import re import matplotlib.ticker as mtick import copy subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre']) parser = argparse.ArgumentParser() parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store") parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[]) parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] ) parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") parser.add_argument("-gen", "--org_name", help="tool directory path", action="store") parser.add_argument("-program", "--pro", help="choose type of analysis", action="store") parser.add_argument("-f", "--flag", help="choose the database", action="store") parser.add_argument("-umis", "--umi", help="choose the database", action="store") parser.add_argument("-percentage", "--per", help="choose the database", action="store") parser.add_argument("-counts", "--count", help="choose the database", action="store") parser.add_argument("-name1", "--n1", help="choose the database", action="store") parser.add_argument("-name2", "--n2", help="choose the database", action="store") args = parser.parse_args() #########################################################################3############################################################################################################################################################################################### if __name__ == '__main__': starttime = time.time() q1 = Queue() q2 = Queue() lock = Lock() manager = Manager() mature_mirnas=manager.list() ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name)) ps_mature.start() args.control[0]=args.control[0][1:] args.control[len(args.control)-1][:-1] control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)] args.treated[0]=args.treated[0][1:] args.treated[len(args.treated)-1][:-1] treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)] ############## Detection of templated isoforms ################ radar = manager.list([0,0,0,0]) samples = manager.list() data= manager.list() names_con=manager.list() samples_mirna_names=manager.list() deseq=manager.list() unmap_seq=manager.Value('i',0) unmap_counts=manager.Value('i',0) LH2E_names=manager.list() ini_c_samples = manager.list() radar1 = manager.list([0,0,0,0]) samples1 = manager.list() data1 = manager.list() names_tre = manager.list() samples_mirna_names1=manager.list() deseq1=manager.list() unmap1_seq = manager.Value('i',0) unmap1_counts = manager.Value('i',0) LH8E_names=manager.list() ini_t_samples = manager.list() ps_mature.join() mature_mirnas=list(mature_mirnas) starttime1 = time.time() ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control] ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated]) [p.start() for p in ps_sam] [p.join() for p in ps_sam] print('SAM took {} seconds'.format(time.time() - starttime1)) ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))] ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))]) [x.start() for x in ps_hist] starttime200=time.time() sc = list(samples) st = list(samples1) names_con=list(names_con) names_tre=list(names_tre) samples_mirna_names=list(samples_mirna_names) samples_mirna_names.sort() samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names)) samples_mirna_names1=list(samples_mirna_names1) samples_mirna_names1.sort() samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1)) deseq=list(deseq) deseq1=list(deseq1) new_names_con=manager.list() new_names_tre=manager.list() new_deseq=manager.list() new_deseq1=manager.list() ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)] ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)]) [z.start() for z in ps_deseq] [z.join() for z in ps_deseq] new_deseq=list(new_deseq) new_deseq1=list(new_deseq1) LH2E=[[x[0],x[2]] for x in new_deseq[0]] [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq] LH8E=[[x[0],x[2]] for x in new_deseq1[0]] [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1] print('Deseq took {} seconds'.format(time.time() - starttime200)) merg_nam_LH2E=manager.list() merg_nam_LH8E=manager.list() LH2E_copy=copy.deepcopy(list(LH2E)) LH8E_copy=copy.deepcopy(list(LH8E)) fil_sort_tre=manager.list() fil_sort_con=manager.list() raw_sort_tre=manager.list() raw_sort_con=manager.list() ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con,args.per,args.count)) ps_main.start() if args.anal=="2": all_iso = manager.list() ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso)) ps_non_iso.start() ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))] ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))]) [x.start() for x in ps_merge] [x.join() for x in ps_merge] merg_nam_LH2E=list(merg_nam_LH2E) merg_nam_LH8E=list(merg_nam_LH8E) starttime2 = time.time() procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data] procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1]) procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))]) if args.anal == "1": procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))]) [p.start() for p in procs] if args.anal=="1": [x.join() for x in ps_hist] [p.join() for p in procs] ps_pdf = Process(target=pdf_before_DE,args=(args.anal)) ps_pdf.start() print('Graphs took {} seconds'.format(time.time() - starttime2)) ps_main.join() fil_sort_con=list(fil_sort_con) fil_sort_tre=list(fil_sort_tre) if fil_sort_con==[]: fil_sort_con=raw_sort_con fil_sort_tre=raw_sort_tre raw_sort_con=list(raw_sort_con) raw_sort_tre=list(raw_sort_tre) names_con=list(new_names_con) names_tre=list(new_names_tre) ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1,args.per,args.n1,args.n2)) ps_write.start() ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))] ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))]) [p.start() for p in ps1_matrix] if args.anal=="1": ps_pdf.join() if args.anal=="2": [p.join() for p in procs] [x.join() for x in ps_hist] ps_write.join() [p.join() for p in ps1_matrix] ############################## Detection of Both ####################################### starttime10 = time.time() if args.anal == "2": n_data= manager.list() n_names_con=manager.list() n_samples_mirna_names=manager.list() n_deseq=manager.list() n_LH2E_names=manager.list() n_data1 = manager.list() n_names_tre = manager.list() n_samples_mirna_names1=manager.list() n_deseq1=manager.list() n_LH8E_names=manager.list() new_mat_mirnas = list(mature_mirnas) ps_non_iso.join() all_iso=list(all_iso) new_mat_mirnas.extend(all_iso) starttime11=time.time() ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control] ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated]) [p.start() for p in ps_sam] [p.join() for p in ps_sam] print('Non-sam took {} seconds'.format(time.time() - starttime11)) starttime12=time.time() n_names_con=list(n_names_con) n_names_tre=list(n_names_tre) n_samples_mirna_names=list(n_samples_mirna_names) n_samples_mirna_names.sort() n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names)) n_samples_mirna_names1=list(n_samples_mirna_names1) n_samples_mirna_names1.sort() n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1)) n_deseq=list(n_deseq) n_deseq1=list(n_deseq1) new_n_names_con=manager.list() new_n_names_tre=manager.list() n_new_deseq=manager.list() n_new_deseq1=manager.list() ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)] ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)]) [x.start() for x in ps_deseq] [x.join() for x in ps_deseq] n_new_deseq=list(n_new_deseq) n_new_deseq1=list(n_new_deseq1) n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]] [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq] n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]] [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1] print('Non-deseq took {} seconds'.format(time.time() - starttime12)) merg_nam_n_LH2E=manager.list() merg_nam_n_LH8E=manager.list() n_LH2E_copy=copy.deepcopy(list(n_LH2E)) n_LH8E_copy=copy.deepcopy(list(n_LH8E)) n_fil_sort_con=manager.list() n_fil_sort_tre=manager.list() n_raw_sort_con=manager.list() n_raw_sort_tre=manager.list() ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con,args.per,args.count)) ps_main.start() starttime14=time.time() ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))] ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))]) [p.start() for p in ps_merge] [p.join() for p in ps_merge] merg_nam_n_LH2E=list(merg_nam_n_LH2E) merg_nam_n_LH8E=list(merg_nam_n_LH8E) print('Merging took {} seconds'.format(time.time() - starttime14)) procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data] procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1]) procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))]) procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))]) procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))]) starttime13=time.time() [p.start() for p in procs] [p.join() for p in procs] print('Graphs took {} seconds'.format(time.time() - starttime13)) procs1 = Process(target=pdf_before_DE,args=(args.anal)) procs1.start() starttime16=time.time() ps_main.join() print('Main took {} seconds'.format(time.time() - starttime16)) starttime15=time.time() n_fil_sort_con=list(n_fil_sort_con) n_fil_sort_tre=list(n_fil_sort_tre) if n_fil_sort_con==[]: n_fil_sort_con=n_raw_sort_con n_fil_sort_tre=n_raw_sort_tre n_raw_sort_con=list(n_raw_sort_con) n_raw_sort_tre=list(n_raw_sort_tre) n_names_con=list(new_n_names_con) n_names_tre=list(new_n_names_tre) ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2,args.per,args.n1,args.n2)) ps_write.start() ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))] ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))]) [p.start() for p in ps1_matrix] ps_write.join() [p.join() for p in ps1_matrix] procs1.join() print('Files took {} seconds'.format(time.time() - starttime15)) print('That took {} seconds'.format(time.time() - starttime10)) print('That took {} seconds'.format(time.time() - starttime))