Mercurial > repos > greg > ideas_genome_tracks
view ideas_genome_tracks.R @ 1:fe50d06f7052 draft
Uploaded
| author | greg |
|---|---|
| date | Wed, 15 Nov 2017 13:03:16 -0500 |
| parents | a842ab7e3bba |
| children | d1703a15c393 |
line wrap: on
line source
#!/usr/bin/env Rscript suppressPackageStartupMessages(library("optparse")) option_list <- list( make_option(c("-b", "--build"), action="store", dest="build", help="Genome build"), make_option(c("-c", "--chrom_len_file"), action="store", dest="chrom_len_file", help="Chromosome length file"), make_option(c("-d", "--header"), action="store", dest="header", default=NULL, help="Track header"), make_option(c("-e", "--state_name"), action="store", dest="state_name", help="State name"), make_option(c("-f", "--hub_id"), action="store", dest="hub_id", help="Not sure what this is"), make_option(c("-i", "--cell_info"), action="store", dest="cell_info", default=NULL, help="Not sure what this is"), make_option(c("-n", "--hub_name"), action="store", dest="hub_name", default=NULL, help="Not sure what this is"), make_option(c("-p", "--input_dir_para"), action="store", dest="input_dir_para", help="Directory containing .para outputs from IDEAS"), make_option(c("-q", "--input_dir_state"), action="store", dest="input_dir_state", help="Directory containing .state outputs from IDEAS"), make_option(c("-u", "--output_track_db"), action="store", dest="output_track_db", help="Output track db file"), make_option(c("-v", "--output_build"), action="store", dest="output_build", help="Output genome build file"), make_option(c("-w", "--output_hub"), action="store", dest="output_hub", help="Output hub file") ) parser <- OptionParser(usage="%prog [options] file", option_list=option_list) args <- parse_args(parser, positional_arguments=TRUE) opt <- args$options create_color_scheme = function(input_dir_para) { # Create the color scheme. para_files <- list.files(path=input_dir_para, full.names=True); mc = NULL; x = read.table(opt$parameter_files, comment="!", nrows=1); l = as.integer(regexpr("\\*", as.matrix(x))); l = min(which(l>0)) - 2; x = as.matrix(read.table(opt$parameter_files)); if (length(para_files) > 1) { for (i in 2:length(para_files)) { x = x + as.matrix(read.table(paste(para_files[i], ".para", sep=""))); } } p = x[,1] / sum(x[,1]); m = array(as.matrix(x[,1:l+1] / x[,1]), dim=c(dim(x)[1], l)); return get_state_color(m, mc); } get_state_color = function(statemean, markcolor=NULL) { if (length(markcolor) == 0) { markcolor = rep("", dim(statemean)[2]); markcolor[order(apply(statemean, 2, sd), decreasing=T)] = hsv((1:dim(statemean)[2]-1)/dim(statemean)[2], 1, 1); markcolor = t(col2rgb(markcolor)); } rg = apply(statemean, 1, range); mm = NULL; for(i in 1:dim(statemean)[1]) { mm = rbind(mm, (statemean[i,]-rg[1,i])/(rg[2,i]-rg[1,i]+1e-10)); } mm = mm^6; mm = mm / (apply(mm, 1, sum) + 1e-10); mycol = mm%*%markcolor; s = apply(statemean, 1, max); s = (s - min(s)) / (max(s) - min(s) + 1e-10); h = t(apply(mycol, 1, function(x){rgb2hsv(x[1], x[2], x[3])})); h[,2] = h[,2] * s; h = apply(h, 1, function(x){hsv(x[1], x[2], x[3])}); rt = cbind(apply(t(col2rgb(h)), 1, function(x){paste(x, collapse=",")}), h); return(rt); } create_track = function(input_dir_state, chrom_len_file, outpref, state_color, header, state_name) { state_files <- list.files(path=input_dir_state, full.names=True); genome_size = read.table(chrom_len_file); g = NULL; for(i in state_files) { message(paste(i, " ", sep=""), appendLF=F); tg = as.matrix(fread(i)); t = NULL; for(j in 1:dim(genome_size)[1]) { t = c(t, which(tg[,2]==as.character(genome_size[j, 1]) & as.numeric(tg[,4]) > as.numeric(genome_size[j, 2]))); } if (length(t) > 0) { tg = tg[-t,]; } t = which(is.na(match(tg[,2], genome_size[,1]))==T); if (length(t)>0) { tg = tg[-t,]; } print(c(dim(g),dim(tg))); g = rbind(g, tg); } message("Done"); uchr = sort(unique(as.character(g[,2]))); g1 = NULL; for(i in uchr){ t = which(g[,2]==i); g1 = rbind(g1, g[t[order(as.integer(g[t, 3]))],]); } g = NULL; chr = as.character(g1[,2]); posst = as.numeric(g1[,3]); posed = as.numeric(g1[,4]); state = as.matrix(g1[,5:(dim(g1)[2]-1)]); if (length(state_name)==0) { state_name=0:max(state); } L = dim(g1)[1]; n = dim(state)[2]; if (length(header) > 0) { colnames(g1) = header; } cells = as.character(colnames(g1)[5:(dim(g1)[2]-1)]); g1 = NULL; message("Generating tracks"); options(scipen=999); tt = which(chr[2:L]!=chr[2:L-1]); tt = c(tt,which(posst[2:L]!=posed[2:L-1])); tt = sort(unique(tt)); for(i in 1:n) { tstate = state[,i]; t = c(tt,which(tstate[2:L]!=tstate[2:L-1])); t = sort(unique(t)); t0 = c(0, t) + 1; t = c(t, L); np = cbind(chr[t], posst[t0], posed[t], tstate[t]); x = cbind(np[,1:3], state_name[as.integer(np[,4])+1], 1000, ".", np[,2:3], state_color[as.numeric(np[,4])+1]); write.table(as.matrix(x), paste(outpref, i, "bed1", sep="."), quote=F, row.names=F, col.names=F); print(x[1,]); system(paste("bedToBigBed ", outpref, ".", i, ".bed1 ", chrom_len_file, " ", outpref, ".", i, ".bb", sep="")); system(paste("rm ", paste(outpref, i, "bed1", sep="."))); } return(cells); } create_track_db = function(input_dir_state, chrom_len_file, track_file_name, state_color, header, state_name, hub_name, cell_info) { cells = create_track(input_dir_state, chrom_len_file, track_file_name, state_color, header, state_name); if (length(cell_info) == 0) { cell_info = cbind(cells, cells, cells, "#000000"); cell_info = array(cell_info, dim=c(length(cells), 4)); } cell_info = as.matrix(cell_info); track_db = NULL; for(i in 1:length(cells)) { ii = which(cells[i] == cell_info[,1]); if (length(ii) == 0) { next; } ii = ii[1]; track_db = c(track_db, paste("track bigBed", i, sep="")); track_db = c(track_db, paste("priority", ii)); track_db = c(track_db, "type bigBed 9 ."); track_db = c(track_db, "itemRgb on"); track_db = c(track_db, "maxItems 100000"); track_db = c(track_db, paste("shortLabel", cell_info[ii, 2])); track_db = c(track_db, paste("longLabel", paste(hub_name, cell_info[ii, 3]))); track_db = c(track_db, paste("color", paste(c(col2rgb(cell_info[ii, 4])), collapse=","))); track_db = c(track_db, "visibility dense"); track_db = c(track_db, ""); } } if (length(opt$hub_name) == 0) { hub_name = opt$hub_id; } # Create the tracks output directory. tracks_output_dir = paste("tracks_", opt$hub_id, "/", sep=""); dir.create(tracks_output_dir, showWarnings=FALSE); # Create the color scheme. state_color <- create_color_scheme(opt$input_dir_para); # Create the tracks. track_db <- create_track_db(opt$input_dir_state, opt$chrom_len_file, paste(tracks_output_dir, opt$hub_id, sep=""), state_color, opt$header, opt$state_name, opt$hub_name, opt$cell_info); # Write the outputs. write.table(track_db, opt$output_track_db, quote=F, row.names=F, col.names=F); write.table(c(paste("genome", opt$build), opt$output_build), paste(tracks_output_dir, "genomes_", opt$hub_id, ".txt", sep=""), quote=F, row.names=F, col.names=F); write.table(c(paste("hub", opt$hub_id), paste("shortLabel", opt$hub_id), paste("longLabel", opt$hub_name), paste("genomesFile genomes_", opt$hub_id, ".txt", sep=""), "email yzz2 at psu.edu"), opt$output_hub, quote=F, row.names=F, col.names=F);
