Mercurial > repos > glogobyte > isoread
changeset 19:4233626308b4 draft
Deleted selected files
author | glogobyte |
---|---|
date | Thu, 22 Oct 2020 08:15:44 +0000 |
parents | 836894e48f1d |
children | 3efb213d763f |
files | mirbase_ultra_v2.py |
diffstat | 1 files changed, 0 insertions(+), 367 deletions(-) [+] |
line wrap: on
line diff
--- a/mirbase_ultra_v2.py Thu Oct 22 07:49:40 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,367 +0,0 @@ -from mirbase_functions import * -from mirbase_graphs import * -import itertools -import time -import sys -import os -import urllib.request -import gzip -from multiprocessing import Process, Queue, Lock, Pool, Manager, Value -import subprocess -import argparse -from collections import OrderedDict -from matplotlib.backends.backend_pdf import PdfPages -import pandas as pd -from math import pi -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.ticker import PercentFormatter -import seaborn as sns -import scipy.stats as stats -from plotnine import * -import math -import re -import matplotlib.ticker as mtick -import copy - -subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre']) - -parser = argparse.ArgumentParser() -parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store") -parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[]) -parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] ) -parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") -parser.add_argument("-gen", "--org_name", help="tool directory path", action="store") -parser.add_argument("-program", "--pro", help="choose type of analysis", action="store") -parser.add_argument("-f", "--flag", help="choose the database", action="store") -parser.add_argument("-umis", "--umi", help="choose the database", action="store") -parser.add_argument("-percentage", "--per", help="choose the database", action="store") -parser.add_argument("-counts", "--count", help="choose the database", action="store") -parser.add_argument("-name1", "--n1", help="choose the database", action="store") -parser.add_argument("-name2", "--n2", help="choose the database", action="store") -args = parser.parse_args() - - -############################################################################################################################################################################################### - -if __name__ == '__main__': - - starttime = time.time() - - q1 = Queue() - q2 = Queue() - lock = Lock() - manager = Manager() - - mature_mirnas=manager.list() - ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name)) - ps_mature.start() - - args.control[0]=args.control[0][1:] - args.control[len(args.control)-1][:-1] - control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)] - - args.treated[0]=args.treated[0][1:] - args.treated[len(args.treated)-1][:-1] - treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)] - - -############## Detection of templated isoforms ################ - - radar = manager.list([0,0,0,0]) - samples = manager.list() - data= manager.list() - names_con=manager.list() - samples_mirna_names=manager.list() - deseq=manager.list() - unmap_seq=manager.Value('i',0) - unmap_counts=manager.Value('i',0) - LH2E_names=manager.list() - ini_c_samples = manager.list() - - - radar1 = manager.list([0,0,0,0]) - samples1 = manager.list() - data1 = manager.list() - names_tre = manager.list() - samples_mirna_names1=manager.list() - deseq1=manager.list() - unmap1_seq = manager.Value('i',0) - unmap1_counts = manager.Value('i',0) - LH8E_names=manager.list() - ini_t_samples = manager.list() - ps_mature.join() - - - mature_mirnas=list(mature_mirnas) - - - starttime1 = time.time() - ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control] - ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated]) - - [p.start() for p in ps_sam] - [p.join() for p in ps_sam] - print('SAM took {} seconds'.format(time.time() - starttime1)) - - ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))] - ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))]) - [x.start() for x in ps_hist] - - starttime200=time.time() - - sc = list(samples) - st = list(samples1) - - names_con=list(names_con) - names_tre=list(names_tre) - samples_mirna_names=list(samples_mirna_names) - samples_mirna_names.sort() - samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names)) - - samples_mirna_names1=list(samples_mirna_names1) - samples_mirna_names1.sort() - samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1)) - - deseq=list(deseq) - deseq1=list(deseq1) - - new_names_con=manager.list() - new_names_tre=manager.list() - new_deseq=manager.list() - new_deseq1=manager.list() - ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)] - ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)]) - - [z.start() for z in ps_deseq] - [z.join() for z in ps_deseq] - new_deseq=list(new_deseq) - new_deseq1=list(new_deseq1) - - LH2E=[[x[0],x[2]] for x in new_deseq[0]] - [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq] - - LH8E=[[x[0],x[2]] for x in new_deseq1[0]] - [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1] - - print('Deseq took {} seconds'.format(time.time() - starttime200)) - - merg_nam_LH2E=manager.list() - merg_nam_LH8E=manager.list() - - LH2E_copy=copy.deepcopy(list(LH2E)) - LH8E_copy=copy.deepcopy(list(LH8E)) - - fil_sort_tre=manager.list() - fil_sort_con=manager.list() - raw_sort_tre=manager.list() - raw_sort_con=manager.list() - - ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con)) - ps_main.start() - - if args.anal=="2": - all_iso = manager.list() - ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso)) - ps_non_iso.start() - - ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))] - ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))]) - [x.start() for x in ps_merge] - [x.join() for x in ps_merge] - - merg_nam_LH2E=list(merg_nam_LH2E) - merg_nam_LH8E=list(merg_nam_LH8E) - - starttime2 = time.time() - procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data] - procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1]) - procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))]) - if args.anal == "1": - procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))]) - - [p.start() for p in procs] - - - if args.anal=="1": - [x.join() for x in ps_hist] - [p.join() for p in procs] - ps_pdf = Process(target=pdf_before_DE,args=(args.anal)) - ps_pdf.start() - - print('Graphs took {} seconds'.format(time.time() - starttime2)) - - ps_main.join() - - fil_sort_con=list(fil_sort_con) - fil_sort_tre=list(fil_sort_tre) - if fil_sort_con==[]: - fil_sort_con=raw_sort_con - fil_sort_tre=raw_sort_tre - - raw_sort_con=list(raw_sort_con) - raw_sort_tre=list(raw_sort_tre) - names_con=list(new_names_con) - names_tre=list(new_names_tre) - - ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1)) - ps_write.start() - - ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))] - ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))]) - [p.start() for p in ps1_matrix] - - if args.anal=="1": - ps_pdf.join() - if args.anal=="2": - [p.join() for p in procs] - [x.join() for x in ps_hist] - - ps_write.join() - [p.join() for p in ps1_matrix] - - -############################## Detection of Both ####################################### - - starttime10 = time.time() - - if args.anal == "2": - - n_data= manager.list() - n_names_con=manager.list() - n_samples_mirna_names=manager.list() - n_deseq=manager.list() - n_LH2E_names=manager.list() - - n_data1 = manager.list() - n_names_tre = manager.list() - n_samples_mirna_names1=manager.list() - n_deseq1=manager.list() - n_LH8E_names=manager.list() - - - new_mat_mirnas = list(mature_mirnas) - ps_non_iso.join() - - all_iso=list(all_iso) - new_mat_mirnas.extend(all_iso) - - starttime11=time.time() - - ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control] - ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated]) - - [p.start() for p in ps_sam] - [p.join() for p in ps_sam] - - print('Non-sam took {} seconds'.format(time.time() - starttime11)) - - starttime12=time.time() - - n_names_con=list(n_names_con) - n_names_tre=list(n_names_tre) - n_samples_mirna_names=list(n_samples_mirna_names) - n_samples_mirna_names.sort() - n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names)) - - n_samples_mirna_names1=list(n_samples_mirna_names1) - n_samples_mirna_names1.sort() - n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1)) - - n_deseq=list(n_deseq) - n_deseq1=list(n_deseq1) - - new_n_names_con=manager.list() - new_n_names_tre=manager.list() - n_new_deseq=manager.list() - n_new_deseq1=manager.list() - ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)] - ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)]) - - [x.start() for x in ps_deseq] - [x.join() for x in ps_deseq] - n_new_deseq=list(n_new_deseq) - n_new_deseq1=list(n_new_deseq1) - - n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]] - [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq] - - n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]] - [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1] - - print('Non-deseq took {} seconds'.format(time.time() - starttime12)) - - merg_nam_n_LH2E=manager.list() - merg_nam_n_LH8E=manager.list() - - n_LH2E_copy=copy.deepcopy(list(n_LH2E)) - n_LH8E_copy=copy.deepcopy(list(n_LH8E)) - - n_sort_tre=manager.list() - n_sort_con=manager.list() - - n_fil_sort_con=manager.list() - n_fil_sort_tre=manager.list() - n_raw_sort_con=manager.list() - n_raw_sort_tre=manager.list() - - ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con)) - ps_main.start() - - ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))] - ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))]) - [p.start() for p in ps_merge] - [p.join() for p in ps_merge] - - merg_nam_n_LH2E=list(merg_nam_n_LH2E) - merg_nam_n_LH8E=list(merg_nam_n_LH8E) - - procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data] - procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1]) - procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))]) - procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))]) - procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))]) - - starttime13=time.time() - [p.start() for p in procs] - [p.join() for p in procs] - - print('Graphs took {} seconds'.format(time.time() - starttime13)) - - procs1 = Process(target=pdf_before_DE,args=(args.anal)) - procs1.start() - - starttime14=time.time() - ps_main.join() - - n_fil_sort_con=list(n_fil_sort_con) - n_fil_sort_tre=list(n_fil_sort_tre) - if n_fil_sort_con==[]: - n_fil_sort_con=n_raw_sort_con - n_fil_sort_tre=n_raw_sort_tre - - n_raw_sort_con=list(n_raw_sort_con) - n_raw_sort_tre=list(n_raw_sort_tre) - n_names_con=list(new_n_names_con) - n_names_tre=list(new_n_names_tre) - - ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2)) - ps_write.start() - - ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))] - ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))]) - [p.start() for p in ps1_matrix] - - ps_write.join() - [p.join() for p in ps1_matrix] - procs1.join() - print('That took {} seconds'.format(time.time() - starttime10)) - print('That took {} seconds'.format(time.time() - starttime)) - - - - - - - -