changeset 21:9c1e59bf8f6e draft

Deleted selected files
author glogobyte
date Thu, 22 Oct 2020 08:25:04 +0000
parents 3efb213d763f
children d766563ab56d
files mirbase_ultra_v2.py
diffstat 1 files changed, 0 insertions(+), 367 deletions(-) [+]
line wrap: on
line diff
--- a/mirbase_ultra_v2.py	Thu Oct 22 08:16:01 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-from mirbase_functions import *
-from mirbase_graphs import *
-import itertools
-import time
-import sys
-import os
-import urllib.request
-import gzip
-from multiprocessing import Process, Queue, Lock, Pool, Manager, Value
-import subprocess
-import argparse
-from collections import OrderedDict
-from matplotlib.backends.backend_pdf import PdfPages
-import pandas as pd
-from math import pi
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.ticker import PercentFormatter
-import seaborn as sns
-import scipy.stats as stats
-from plotnine import *
-import math
-import re
-import matplotlib.ticker as mtick
-import copy
-
-subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre'])
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store")
-parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[])
-parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] )
-parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
-parser.add_argument("-gen", "--org_name", help="tool directory path", action="store")
-parser.add_argument("-program", "--pro", help="choose type of analysis", action="store")
-parser.add_argument("-f", "--flag", help="choose the database", action="store")
-parser.add_argument("-umis", "--umi", help="choose the database", action="store")
-parser.add_argument("-percentage", "--per", help="choose the database", action="store")
-parser.add_argument("-counts", "--count", help="choose the database", action="store")
-parser.add_argument("-name1", "--n1", help="choose the database", action="store")
-parser.add_argument("-name2", "--n2", help="choose the database", action="store")
-args = parser.parse_args()
-
-
-###############################################################################################################################################################################################
-
-if __name__ == '__main__':
-
- starttime = time.time()
-
- q1 = Queue()
- q2 = Queue()
- lock = Lock()
- manager = Manager()
-
- mature_mirnas=manager.list()
- ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name))
- ps_mature.start()
-
- args.control[0]=args.control[0][1:]
- args.control[len(args.control)-1][:-1]
- control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)]
-
- args.treated[0]=args.treated[0][1:]
- args.treated[len(args.treated)-1][:-1]
- treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)]
-
-
-##############  Detection of templated isoforms ################
-
- radar = manager.list([0,0,0,0])
- samples = manager.list()
- data= manager.list()
- names_con=manager.list()
- samples_mirna_names=manager.list()
- deseq=manager.list()
- unmap_seq=manager.Value('i',0)
- unmap_counts=manager.Value('i',0)
- LH2E_names=manager.list()
- ini_c_samples = manager.list()
-
-
- radar1 = manager.list([0,0,0,0])
- samples1 = manager.list()
- data1 = manager.list()
- names_tre = manager.list()
- samples_mirna_names1=manager.list()
- deseq1=manager.list()
- unmap1_seq = manager.Value('i',0)
- unmap1_counts = manager.Value('i',0)
- LH8E_names=manager.list()
- ini_t_samples = manager.list()
- ps_mature.join()
-
-
- mature_mirnas=list(mature_mirnas)
-
-
- starttime1 = time.time()
- ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control]
- ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated])
-
- [p.start() for p in ps_sam]
- [p.join() for p in ps_sam]
- print('SAM took {} seconds'.format(time.time() - starttime1))
-
- ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))]
- ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))])
- [x.start() for x in ps_hist]
-
- starttime200=time.time()
-
- sc = list(samples)
- st = list(samples1)
-
- names_con=list(names_con)
- names_tre=list(names_tre)
- samples_mirna_names=list(samples_mirna_names)
- samples_mirna_names.sort()
- samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names))
-
- samples_mirna_names1=list(samples_mirna_names1)
- samples_mirna_names1.sort()
- samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1))
-
- deseq=list(deseq)
- deseq1=list(deseq1)
-
- new_names_con=manager.list()
- new_names_tre=manager.list()
- new_deseq=manager.list()
- new_deseq1=manager.list()
- ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)]
- ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)])
-
- [z.start() for z in ps_deseq]
- [z.join() for z in ps_deseq]
- new_deseq=list(new_deseq)
- new_deseq1=list(new_deseq1)
-
- LH2E=[[x[0],x[2]] for x in new_deseq[0]]
- [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq]
-
- LH8E=[[x[0],x[2]] for x in new_deseq1[0]]
- [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1]
-
- print('Deseq took {} seconds'.format(time.time() - starttime200))
-
- merg_nam_LH2E=manager.list()
- merg_nam_LH8E=manager.list()
-
- LH2E_copy=copy.deepcopy(list(LH2E))
- LH8E_copy=copy.deepcopy(list(LH8E))
-
- fil_sort_tre=manager.list()
- fil_sort_con=manager.list()
- raw_sort_tre=manager.list()
- raw_sort_con=manager.list()
-
- ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con))
- ps_main.start()
-
- if args.anal=="2":
-    all_iso = manager.list()
-    ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso))
-    ps_non_iso.start()
-
- ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))]
- ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))])
- [x.start() for x in ps_merge]
- [x.join() for x in ps_merge]
-
- merg_nam_LH2E=list(merg_nam_LH2E)
- merg_nam_LH8E=list(merg_nam_LH8E)
-
- starttime2 = time.time()
- procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data]
- procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1])
- procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))])
- if args.anal == "1":
-    procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))])
-
- [p.start() for p in procs]
-
-
- if args.anal=="1":
-    [x.join() for x in ps_hist]
-    [p.join() for p in procs]
-    ps_pdf = Process(target=pdf_before_DE,args=(args.anal))
-    ps_pdf.start()
-
- print('Graphs took {} seconds'.format(time.time() - starttime2))
-
- ps_main.join()
-
- fil_sort_con=list(fil_sort_con)
- fil_sort_tre=list(fil_sort_tre)
- if fil_sort_con==[]:
-    fil_sort_con=raw_sort_con
-    fil_sort_tre=raw_sort_tre
-
- raw_sort_con=list(raw_sort_con)
- raw_sort_tre=list(raw_sort_tre)
- names_con=list(new_names_con)
- names_tre=list(new_names_tre)
-
- ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1,args.per,args.n1,args.n2))
- ps_write.start()
-
- ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))]
- ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))])
- [p.start() for p in ps1_matrix]
-
- if args.anal=="1":
-    ps_pdf.join()
- if args.anal=="2":
-    [p.join() for p in procs]
-    [x.join() for x in ps_hist]
-
- ps_write.join()
- [p.join() for p in ps1_matrix]
-
-
-############################## Detection of Both  #######################################
-
- starttime10 = time.time()
-
- if args.anal == "2":
-
-  n_data= manager.list()
-  n_names_con=manager.list()
-  n_samples_mirna_names=manager.list()
-  n_deseq=manager.list()
-  n_LH2E_names=manager.list()
-
-  n_data1 = manager.list()
-  n_names_tre = manager.list()
-  n_samples_mirna_names1=manager.list()
-  n_deseq1=manager.list()
-  n_LH8E_names=manager.list()
-
-
-  new_mat_mirnas = list(mature_mirnas)
-  ps_non_iso.join()
-
-  all_iso=list(all_iso)
-  new_mat_mirnas.extend(all_iso)
-
-  starttime11=time.time()
-
-  ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control]
-  ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated])
-
-  [p.start() for p in ps_sam]
-  [p.join() for p in ps_sam]
-
-  print('Non-sam took {} seconds'.format(time.time() - starttime11))
-
-  starttime12=time.time()
-
-  n_names_con=list(n_names_con)
-  n_names_tre=list(n_names_tre)
-  n_samples_mirna_names=list(n_samples_mirna_names)
-  n_samples_mirna_names.sort()
-  n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names))
-
-  n_samples_mirna_names1=list(n_samples_mirna_names1)
-  n_samples_mirna_names1.sort()
-  n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1))
-
-  n_deseq=list(n_deseq)
-  n_deseq1=list(n_deseq1)
-
-  new_n_names_con=manager.list()
-  new_n_names_tre=manager.list()
-  n_new_deseq=manager.list()
-  n_new_deseq1=manager.list()
-  ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)]
-  ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)])
-
-  [x.start() for x in ps_deseq]
-  [x.join() for x in ps_deseq]
-  n_new_deseq=list(n_new_deseq)
-  n_new_deseq1=list(n_new_deseq1)
-
-  n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]]
-  [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq]
-
-  n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]]
-  [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1]
-
-  print('Non-deseq took {} seconds'.format(time.time() - starttime12))
-
-  merg_nam_n_LH2E=manager.list()
-  merg_nam_n_LH8E=manager.list()
-
-  n_LH2E_copy=copy.deepcopy(list(n_LH2E))
-  n_LH8E_copy=copy.deepcopy(list(n_LH8E))
-
-  n_sort_tre=manager.list()
-  n_sort_con=manager.list()
-
-  n_fil_sort_con=manager.list()
-  n_fil_sort_tre=manager.list()
-  n_raw_sort_con=manager.list()
-  n_raw_sort_tre=manager.list()
-
-  ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con))
-  ps_main.start()
-
-  ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))]
-  ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))])
-  [p.start() for p in ps_merge]
-  [p.join() for p in ps_merge]
-
-  merg_nam_n_LH2E=list(merg_nam_n_LH2E)
-  merg_nam_n_LH8E=list(merg_nam_n_LH8E)
-
-  procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data]
-  procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))])
-  procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))])
-  procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))])
-
-  starttime13=time.time()
-  [p.start() for p in procs]
-  [p.join() for p in procs]
-
-  print('Graphs took {} seconds'.format(time.time() - starttime13))
-
-  procs1 = Process(target=pdf_before_DE,args=(args.anal))
-  procs1.start()
-
-  starttime14=time.time()
-  ps_main.join()
-
-  n_fil_sort_con=list(n_fil_sort_con)
-  n_fil_sort_tre=list(n_fil_sort_tre)
-  if n_fil_sort_con==[]:
-     n_fil_sort_con=n_raw_sort_con
-     n_fil_sort_tre=n_raw_sort_tre
-
-  n_raw_sort_con=list(n_raw_sort_con)
-  n_raw_sort_tre=list(n_raw_sort_tre)
-  n_names_con=list(new_n_names_con)
-  n_names_tre=list(new_n_names_tre)
-
-  ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2,args.per,args.n1,args.n2))
-  ps_write.start()
-
-  ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))]
-  ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))])
-  [p.start() for p in ps1_matrix]
-
-  ps_write.join()
-  [p.join() for p in ps1_matrix]
-  procs1.join()
- print('That took {} seconds'.format(time.time() - starttime10))
- print('That took {} seconds'.format(time.time() - starttime))
-
-
-
-
-
-
-
-