Mercurial > repos > glogobyte > isoread
comparison mirbase_ultra_v2.py @ 20:3efb213d763f draft
Uploaded
| author | glogobyte |
|---|---|
| date | Thu, 22 Oct 2020 08:16:01 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 19:4233626308b4 | 20:3efb213d763f |
|---|---|
| 1 from mirbase_functions import * | |
| 2 from mirbase_graphs import * | |
| 3 import itertools | |
| 4 import time | |
| 5 import sys | |
| 6 import os | |
| 7 import urllib.request | |
| 8 import gzip | |
| 9 from multiprocessing import Process, Queue, Lock, Pool, Manager, Value | |
| 10 import subprocess | |
| 11 import argparse | |
| 12 from collections import OrderedDict | |
| 13 from matplotlib.backends.backend_pdf import PdfPages | |
| 14 import pandas as pd | |
| 15 from math import pi | |
| 16 import numpy as np | |
| 17 import matplotlib.pyplot as plt | |
| 18 from matplotlib.ticker import PercentFormatter | |
| 19 import seaborn as sns | |
| 20 import scipy.stats as stats | |
| 21 from plotnine import * | |
| 22 import math | |
| 23 import re | |
| 24 import matplotlib.ticker as mtick | |
| 25 import copy | |
| 26 | |
| 27 subprocess.call(['mkdir','-p', 'split1','split2','split3','split4','split11','split12','Counts','Diff/temp_con','Diff/temp_tre','Diff/n_temp_con','Diff/n_temp_tre']) | |
| 28 | |
| 29 parser = argparse.ArgumentParser() | |
| 30 parser.add_argument("-analysis", "--anal", help="choose type of analysis", action="store") | |
| 31 parser.add_argument("-con", "--control", help="input fastq file", nargs='+', default=[]) | |
| 32 parser.add_argument("-tre", "--treated", help="input fastq file", nargs='+', default=[] ) | |
| 33 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") | |
| 34 parser.add_argument("-gen", "--org_name", help="tool directory path", action="store") | |
| 35 parser.add_argument("-program", "--pro", help="choose type of analysis", action="store") | |
| 36 parser.add_argument("-f", "--flag", help="choose the database", action="store") | |
| 37 parser.add_argument("-umis", "--umi", help="choose the database", action="store") | |
| 38 parser.add_argument("-percentage", "--per", help="choose the database", action="store") | |
| 39 parser.add_argument("-counts", "--count", help="choose the database", action="store") | |
| 40 parser.add_argument("-name1", "--n1", help="choose the database", action="store") | |
| 41 parser.add_argument("-name2", "--n2", help="choose the database", action="store") | |
| 42 args = parser.parse_args() | |
| 43 | |
| 44 | |
| 45 ############################################################################################################################################################################################### | |
| 46 | |
| 47 if __name__ == '__main__': | |
| 48 | |
| 49 starttime = time.time() | |
| 50 | |
| 51 q1 = Queue() | |
| 52 q2 = Queue() | |
| 53 lock = Lock() | |
| 54 manager = Manager() | |
| 55 | |
| 56 mature_mirnas=manager.list() | |
| 57 ps_mature=Process(target=download_matures,args=(mature_mirnas,args.org_name)) | |
| 58 ps_mature.start() | |
| 59 | |
| 60 args.control[0]=args.control[0][1:] | |
| 61 args.control[len(args.control)-1][:-1] | |
| 62 control = [(args.control[i:i+2]) for i in range(0, len(args.control), 2)] | |
| 63 | |
| 64 args.treated[0]=args.treated[0][1:] | |
| 65 args.treated[len(args.treated)-1][:-1] | |
| 66 treated = [(args.treated[i:i+2]) for i in range(0, len(args.treated), 2)] | |
| 67 | |
| 68 | |
| 69 ############## Detection of templated isoforms ################ | |
| 70 | |
| 71 radar = manager.list([0,0,0,0]) | |
| 72 samples = manager.list() | |
| 73 data= manager.list() | |
| 74 names_con=manager.list() | |
| 75 samples_mirna_names=manager.list() | |
| 76 deseq=manager.list() | |
| 77 unmap_seq=manager.Value('i',0) | |
| 78 unmap_counts=manager.Value('i',0) | |
| 79 LH2E_names=manager.list() | |
| 80 ini_c_samples = manager.list() | |
| 81 | |
| 82 | |
| 83 radar1 = manager.list([0,0,0,0]) | |
| 84 samples1 = manager.list() | |
| 85 data1 = manager.list() | |
| 86 names_tre = manager.list() | |
| 87 samples_mirna_names1=manager.list() | |
| 88 deseq1=manager.list() | |
| 89 unmap1_seq = manager.Value('i',0) | |
| 90 unmap1_counts = manager.Value('i',0) | |
| 91 LH8E_names=manager.list() | |
| 92 ini_t_samples = manager.list() | |
| 93 ps_mature.join() | |
| 94 | |
| 95 | |
| 96 mature_mirnas=list(mature_mirnas) | |
| 97 | |
| 98 | |
| 99 starttime1 = time.time() | |
| 100 ps_sam = [Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,samples,data,names_con,unmap_seq,samples_mirna_names,deseq,LH2E_names,"0",ini_c_samples,unmap_counts)) for path in control] | |
| 101 ps_sam.extend([Process(target=sam,args=(mature_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,samples1,data1,names_tre,unmap1_seq,samples_mirna_names1,deseq1,LH8E_names,"0",ini_t_samples,unmap1_counts)) for path in treated]) | |
| 102 | |
| 103 [p.start() for p in ps_sam] | |
| 104 [p.join() for p in ps_sam] | |
| 105 print('SAM took {} seconds'.format(time.time() - starttime1)) | |
| 106 | |
| 107 ps_hist=[Process(target=hist_red,args=(ini_c_samples,'c'))] | |
| 108 ps_hist.extend([Process(target=hist_red,args=(ini_t_samples,'t'))]) | |
| 109 [x.start() for x in ps_hist] | |
| 110 | |
| 111 starttime200=time.time() | |
| 112 | |
| 113 sc = list(samples) | |
| 114 st = list(samples1) | |
| 115 | |
| 116 names_con=list(names_con) | |
| 117 names_tre=list(names_tre) | |
| 118 samples_mirna_names=list(samples_mirna_names) | |
| 119 samples_mirna_names.sort() | |
| 120 samples_mirna_names=list(samples_mirna_names for samples_mirna_names,_ in itertools.groupby(samples_mirna_names)) | |
| 121 | |
| 122 samples_mirna_names1=list(samples_mirna_names1) | |
| 123 samples_mirna_names1.sort() | |
| 124 samples_mirna_names1=list(samples_mirna_names1 for samples_mirna_names1,_ in itertools.groupby(samples_mirna_names1)) | |
| 125 | |
| 126 deseq=list(deseq) | |
| 127 deseq1=list(deseq1) | |
| 128 | |
| 129 new_names_con=manager.list() | |
| 130 new_names_tre=manager.list() | |
| 131 new_deseq=manager.list() | |
| 132 new_deseq1=manager.list() | |
| 133 ps_deseq=[Process(target=deseqe2,args=(sampp,samples_mirna_names,lock,new_deseq,names_con[i],new_names_con)) for i,sampp in enumerate(deseq)] | |
| 134 ps_deseq.extend([Process(target=deseqe2,args=(sampp,samples_mirna_names1,lock,new_deseq1,names_tre[i],new_names_tre)) for i,sampp in enumerate(deseq1)]) | |
| 135 | |
| 136 [z.start() for z in ps_deseq] | |
| 137 [z.join() for z in ps_deseq] | |
| 138 new_deseq=list(new_deseq) | |
| 139 new_deseq1=list(new_deseq1) | |
| 140 | |
| 141 LH2E=[[x[0],x[2]] for x in new_deseq[0]] | |
| 142 [LH2E[i].append(y[i][1]) for i,_ in enumerate(LH2E) for y in new_deseq] | |
| 143 | |
| 144 LH8E=[[x[0],x[2]] for x in new_deseq1[0]] | |
| 145 [LH8E[i].append(y[i][1]) for i,_ in enumerate(LH8E) for y in new_deseq1] | |
| 146 | |
| 147 print('Deseq took {} seconds'.format(time.time() - starttime200)) | |
| 148 | |
| 149 merg_nam_LH2E=manager.list() | |
| 150 merg_nam_LH8E=manager.list() | |
| 151 | |
| 152 LH2E_copy=copy.deepcopy(list(LH2E)) | |
| 153 LH8E_copy=copy.deepcopy(list(LH8E)) | |
| 154 | |
| 155 fil_sort_tre=manager.list() | |
| 156 fil_sort_con=manager.list() | |
| 157 raw_sort_tre=manager.list() | |
| 158 raw_sort_con=manager.list() | |
| 159 | |
| 160 ps_main = Process(target=main_temp,args=(list(LH2E), samples_mirna_names, list(LH8E), samples_mirna_names1,1,list(names_con),list(names_tre),fil_sort_tre,fil_sort_con,raw_sort_tre,raw_sort_con)) | |
| 161 ps_main.start() | |
| 162 | |
| 163 if args.anal=="2": | |
| 164 all_iso = manager.list() | |
| 165 ps_non_iso = Process(target=non_template_ref,args=(sc,st,all_iso)) | |
| 166 ps_non_iso.start() | |
| 167 | |
| 168 ps_merge = [Process(target=merging_names,args=(LH2E_copy,merg_nam_LH2E))] | |
| 169 ps_merge.extend([Process(target=merging_names,args=(LH8E_copy,merg_nam_LH8E))]) | |
| 170 [x.start() for x in ps_merge] | |
| 171 [x.join() for x in ps_merge] | |
| 172 | |
| 173 merg_nam_LH2E=list(merg_nam_LH2E) | |
| 174 merg_nam_LH8E=list(merg_nam_LH8E) | |
| 175 | |
| 176 starttime2 = time.time() | |
| 177 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data] | |
| 178 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],1)) for x in data1]) | |
| 179 procs.extend([Process(target=make_spider,args=(merg_nam_LH2E,merg_nam_LH8E))]) | |
| 180 if args.anal == "1": | |
| 181 procs.extend([Process(target=pie_temp,args=(merg_nam_LH2E,unmap_seq.value,unmap_counts.value,merg_nam_LH8E,unmap1_seq.value,unmap1_counts.value))]) | |
| 182 | |
| 183 [p.start() for p in procs] | |
| 184 | |
| 185 | |
| 186 if args.anal=="1": | |
| 187 [x.join() for x in ps_hist] | |
| 188 [p.join() for p in procs] | |
| 189 ps_pdf = Process(target=pdf_before_DE,args=(args.anal)) | |
| 190 ps_pdf.start() | |
| 191 | |
| 192 print('Graphs took {} seconds'.format(time.time() - starttime2)) | |
| 193 | |
| 194 ps_main.join() | |
| 195 | |
| 196 fil_sort_con=list(fil_sort_con) | |
| 197 fil_sort_tre=list(fil_sort_tre) | |
| 198 if fil_sort_con==[]: | |
| 199 fil_sort_con=raw_sort_con | |
| 200 fil_sort_tre=raw_sort_tre | |
| 201 | |
| 202 raw_sort_con=list(raw_sort_con) | |
| 203 raw_sort_tre=list(raw_sort_tre) | |
| 204 names_con=list(new_names_con) | |
| 205 names_tre=list(new_names_tre) | |
| 206 | |
| 207 ps_write = Process(target=write_main,args=(raw_sort_con, raw_sort_tre, fil_sort_con, fil_sort_tre, names_con,names_tre,1,args.per,args.n1,args.n2)) | |
| 208 ps_write.start() | |
| 209 | |
| 210 ps1_matrix = [Process(target=ssamples,args=(names_con,fil_sort_con,"Diff/temp_con/",0))] | |
| 211 ps1_matrix.extend([Process(target=ssamples,args=(names_tre,fil_sort_tre,"Diff/temp_tre/",0))]) | |
| 212 [p.start() for p in ps1_matrix] | |
| 213 | |
| 214 if args.anal=="1": | |
| 215 ps_pdf.join() | |
| 216 if args.anal=="2": | |
| 217 [p.join() for p in procs] | |
| 218 [x.join() for x in ps_hist] | |
| 219 | |
| 220 ps_write.join() | |
| 221 [p.join() for p in ps1_matrix] | |
| 222 | |
| 223 | |
| 224 ############################## Detection of Both ####################################### | |
| 225 | |
| 226 starttime10 = time.time() | |
| 227 | |
| 228 if args.anal == "2": | |
| 229 | |
| 230 n_data= manager.list() | |
| 231 n_names_con=manager.list() | |
| 232 n_samples_mirna_names=manager.list() | |
| 233 n_deseq=manager.list() | |
| 234 n_LH2E_names=manager.list() | |
| 235 | |
| 236 n_data1 = manager.list() | |
| 237 n_names_tre = manager.list() | |
| 238 n_samples_mirna_names1=manager.list() | |
| 239 n_deseq1=manager.list() | |
| 240 n_LH8E_names=manager.list() | |
| 241 | |
| 242 | |
| 243 new_mat_mirnas = list(mature_mirnas) | |
| 244 ps_non_iso.join() | |
| 245 | |
| 246 all_iso=list(all_iso) | |
| 247 new_mat_mirnas.extend(all_iso) | |
| 248 | |
| 249 starttime11=time.time() | |
| 250 | |
| 251 ps_sam = [Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"c",lock,n_data,n_names_con,n_deseq,n_samples_mirna_names,n_LH2E_names)) for path in control] | |
| 252 ps_sam.extend([Process(target=non_sam,args=(new_mat_mirnas,path[1][:-1],path[0].split(",")[0],"t",lock,n_data1,n_names_tre,n_deseq1,n_samples_mirna_names1,n_LH8E_names)) for path in treated]) | |
| 253 | |
| 254 [p.start() for p in ps_sam] | |
| 255 [p.join() for p in ps_sam] | |
| 256 | |
| 257 print('Non-sam took {} seconds'.format(time.time() - starttime11)) | |
| 258 | |
| 259 starttime12=time.time() | |
| 260 | |
| 261 n_names_con=list(n_names_con) | |
| 262 n_names_tre=list(n_names_tre) | |
| 263 n_samples_mirna_names=list(n_samples_mirna_names) | |
| 264 n_samples_mirna_names.sort() | |
| 265 n_samples_mirna_names=list(n_samples_mirna_names for n_samples_mirna_names,_ in itertools.groupby(n_samples_mirna_names)) | |
| 266 | |
| 267 n_samples_mirna_names1=list(n_samples_mirna_names1) | |
| 268 n_samples_mirna_names1.sort() | |
| 269 n_samples_mirna_names1=list(n_samples_mirna_names1 for n_samples_mirna_names1,_ in itertools.groupby(n_samples_mirna_names1)) | |
| 270 | |
| 271 n_deseq=list(n_deseq) | |
| 272 n_deseq1=list(n_deseq1) | |
| 273 | |
| 274 new_n_names_con=manager.list() | |
| 275 new_n_names_tre=manager.list() | |
| 276 n_new_deseq=manager.list() | |
| 277 n_new_deseq1=manager.list() | |
| 278 ps_deseq=[Process(target=deseqe2,args=(sampp,n_samples_mirna_names,lock,n_new_deseq,n_names_con[i],new_n_names_con)) for i,sampp in enumerate(n_deseq)] | |
| 279 ps_deseq.extend([Process(target=deseqe2,args=(sampp,n_samples_mirna_names1,lock,n_new_deseq1,n_names_tre[i],new_n_names_tre)) for i,sampp in enumerate(n_deseq1)]) | |
| 280 | |
| 281 [x.start() for x in ps_deseq] | |
| 282 [x.join() for x in ps_deseq] | |
| 283 n_new_deseq=list(n_new_deseq) | |
| 284 n_new_deseq1=list(n_new_deseq1) | |
| 285 | |
| 286 n_LH2E=[[x[0],x[2]] for x in n_new_deseq[0]] | |
| 287 [n_LH2E[i].append(y[i][1]) for i,_ in enumerate(n_LH2E) for y in n_new_deseq] | |
| 288 | |
| 289 n_LH8E=[[x[0],x[2]] for x in n_new_deseq1[0]] | |
| 290 [n_LH8E[i].append(y[i][1]) for i,_ in enumerate(n_LH8E) for y in n_new_deseq1] | |
| 291 | |
| 292 print('Non-deseq took {} seconds'.format(time.time() - starttime12)) | |
| 293 | |
| 294 merg_nam_n_LH2E=manager.list() | |
| 295 merg_nam_n_LH8E=manager.list() | |
| 296 | |
| 297 n_LH2E_copy=copy.deepcopy(list(n_LH2E)) | |
| 298 n_LH8E_copy=copy.deepcopy(list(n_LH8E)) | |
| 299 | |
| 300 n_sort_tre=manager.list() | |
| 301 n_sort_con=manager.list() | |
| 302 | |
| 303 n_fil_sort_con=manager.list() | |
| 304 n_fil_sort_tre=manager.list() | |
| 305 n_raw_sort_con=manager.list() | |
| 306 n_raw_sort_tre=manager.list() | |
| 307 | |
| 308 ps_main = Process(target=main_temp,args=(list(n_LH2E), n_samples_mirna_names, list(n_LH8E), n_samples_mirna_names1,1,list(n_names_con),list(n_names_tre),n_fil_sort_tre,n_fil_sort_con,n_raw_sort_tre,n_raw_sort_con)) | |
| 309 ps_main.start() | |
| 310 | |
| 311 ps_merge = [Process(target=merging_names,args=(n_LH2E_copy,merg_nam_n_LH2E))] | |
| 312 ps_merge.extend([Process(target=merging_names,args=(n_LH8E_copy,merg_nam_n_LH8E))]) | |
| 313 [p.start() for p in ps_merge] | |
| 314 [p.join() for p in ps_merge] | |
| 315 | |
| 316 merg_nam_n_LH2E=list(merg_nam_n_LH2E) | |
| 317 merg_nam_n_LH8E=list(merg_nam_n_LH8E) | |
| 318 | |
| 319 procs = [Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data] | |
| 320 procs.extend([Process(target=DB_write,args=(x[0],x[1],x[2],x[3],2)) for x in n_data1]) | |
| 321 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH2E,'c'))]) | |
| 322 procs.extend([Process(target=logo_seq_red,args=(merg_nam_n_LH8E,'t'))]) | |
| 323 procs.extend([Process(target=pie_non_temp,args=(merg_nam_LH2E,merg_nam_n_LH2E,merg_nam_LH8E,merg_nam_n_LH8E,unmap_seq.value,unmap1_seq.value,unmap_counts.value,unmap1_counts.value))]) | |
| 324 | |
| 325 starttime13=time.time() | |
| 326 [p.start() for p in procs] | |
| 327 [p.join() for p in procs] | |
| 328 | |
| 329 print('Graphs took {} seconds'.format(time.time() - starttime13)) | |
| 330 | |
| 331 procs1 = Process(target=pdf_before_DE,args=(args.anal)) | |
| 332 procs1.start() | |
| 333 | |
| 334 starttime14=time.time() | |
| 335 ps_main.join() | |
| 336 | |
| 337 n_fil_sort_con=list(n_fil_sort_con) | |
| 338 n_fil_sort_tre=list(n_fil_sort_tre) | |
| 339 if n_fil_sort_con==[]: | |
| 340 n_fil_sort_con=n_raw_sort_con | |
| 341 n_fil_sort_tre=n_raw_sort_tre | |
| 342 | |
| 343 n_raw_sort_con=list(n_raw_sort_con) | |
| 344 n_raw_sort_tre=list(n_raw_sort_tre) | |
| 345 n_names_con=list(new_n_names_con) | |
| 346 n_names_tre=list(new_n_names_tre) | |
| 347 | |
| 348 ps_write = Process(target=write_main,args=(n_raw_sort_con, n_raw_sort_tre,n_fil_sort_con, n_fil_sort_tre, n_names_con, n_names_tre,2,args.per,args.n1,args.n2)) | |
| 349 ps_write.start() | |
| 350 | |
| 351 ps1_matrix = [Process(target=ssamples1,args=(n_names_con,n_fil_sort_con,names_con,fil_sort_con,"Diff/n_temp_con/",0))] | |
| 352 ps1_matrix.extend([Process(target=ssamples1,args=(n_names_tre,n_fil_sort_tre,names_tre,fil_sort_tre,"Diff/n_temp_tre/",0))]) | |
| 353 [p.start() for p in ps1_matrix] | |
| 354 | |
| 355 ps_write.join() | |
| 356 [p.join() for p in ps1_matrix] | |
| 357 procs1.join() | |
| 358 print('That took {} seconds'.format(time.time() - starttime10)) | |
| 359 print('That took {} seconds'.format(time.time() - starttime)) | |
| 360 | |
| 361 | |
| 362 | |
| 363 | |
| 364 | |
| 365 | |
| 366 | |
| 367 |
