Mercurial > repos > greg > ideas_genome_tracks
changeset 83:2f49b6bddccc draft
Uploaded
author | greg |
---|---|
date | Thu, 04 Jan 2018 14:44:38 -0500 |
parents | cf0a62d084dc |
children | daeba7e9dd11 |
files | ideas_genome_tracks.R |
diffstat | 1 files changed, 54 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/ideas_genome_tracks.R Thu Jan 04 14:44:24 2018 -0500 +++ b/ideas_genome_tracks.R Thu Jan 04 14:44:38 2018 -0500 @@ -4,22 +4,22 @@ suppressPackageStartupMessages(library("optparse")) option_list <- list( - make_option(c("--build"), action="store", dest="build", help="Genome build"), - make_option(c("--chrom_len_file"), action="store", dest="chrom_len_file", help="Chromosome length file"), - make_option(c("--email"), action="store", dest="email", help="User email address"), - make_option(c("--galaxy_url"), action="store", dest="galaxy_url", help="Galaxy instance base URL"), - make_option(c("--hub_long_label"), action="store", dest="hub_long_label", help="Hub long label"), - make_option(c("--hub_name"), action="store", dest="hub_name", default=NULL, help="Hub name without spaces"), - make_option(c("--hub_short_label"), action="store", dest="hub_short_label", help="Hub short label"), - make_option(c("--input_dir_para"), action="store", dest="input_dir_para", help="Directory containing .para outputs from IDEAS"), - make_option(c("--input_dir_state"), action="store", dest="input_dir_state", help="Directory containing .state outputs from IDEAS"), - make_option(c("--output_trackhub"), action="store", dest="output_trackhub", help="Output hub file"), - make_option(c("--output_trackhub_files_path"), action="store", dest="output_trackhub_files_path", help="Output hub extra files path"), - make_option(c("--output_trackhub_id"), action="store", dest="output_trackhub_id", help="Encoded output_trackhub dataset id"), - make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"), - make_option(c("--state_colors"), action="store", dest="state_colors", default=NULL, help="List of state colors"), - make_option(c("--state_indexes"), action="store", dest="state_indexes", default=NULL, help="List of state indexes"), - make_option(c("--state_names"), action="store", dest="state_names", default=NULL, help="List of state names") + make_option(c("--build"), action="store", dest="build", help="Genome build"), + make_option(c("--chrom_len_file"), action="store", dest="chrom_len_file", help="Chromosome length file"), + make_option(c("--email"), action="store", dest="email", help="User email address"), + make_option(c("--galaxy_url"), action="store", dest="galaxy_url", help="Galaxy instance base URL"), + make_option(c("--hub_long_label"), action="store", dest="hub_long_label", help="Hub long label"), + make_option(c("--hub_name"), action="store", dest="hub_name", default=NULL, help="Hub name without spaces"), + make_option(c("--hub_short_label"), action="store", dest="hub_short_label", help="Hub short label"), + make_option(c("--input_dir_para"), action="store", dest="input_dir_para", help="Directory containing .para outputs from IDEAS"), + make_option(c("--input_dir_state"), action="store", dest="input_dir_state", help="Directory containing .state outputs from IDEAS"), + make_option(c("--output_trackhub"), action="store", dest="output_trackhub", help="Output hub file"), + make_option(c("--output_trackhub_files_path"), action="store", dest="output_trackhub_files_path", help="Output hub extra files path"), + make_option(c("--output_trackhub_id"), action="store", dest="output_trackhub_id", help="Encoded output_trackhub dataset id"), + make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"), + make_option(c("--state_colors"), action="store", dest="state_colors", default=NULL, help="List of state colors"), + make_option(c("--state_indexes"), action="store", dest="state_indexes", default=NULL, help="List of state indexes"), + make_option(c("--state_names"), action="store", dest="state_names", default=NULL, help="List of state names") ) parser <- OptionParser(usage="%prog [options] file", option_list=option_list) @@ -41,7 +41,8 @@ create_cells = function(input_dir_state, chrom_len_file, base_track_file_name, state_indexes, state_names, state_colors) { # Create everything needed, including the bigbed file, # to render the tracks within the UCSC track hub. - state_files <- list.files(path=input_dir_state, full.names=TRUE); + state_files = list.files(path=input_dir_state, full.names=TRUE); + cell_type_names = get_cell_type_names(state_files[1]); genome_size = read.table(chrom_len_file); g = NULL; for(i in state_files) { @@ -70,8 +71,9 @@ posst = as.numeric(g1[,3]); posed = as.numeric(g1[,4]); state = as.matrix(g1[,5:(dim(g1)[2]-1)]); - state_name = 0:max(state); L = dim(g1)[1]; + # Here n will be the same length as the + # list of cell_type_names defined above. n = dim(state)[2]; cells = as.character(colnames(g1)[5:(dim(g1)[2]-1)]); options(scipen=999); @@ -79,6 +81,7 @@ tt = c(tt, which(posst[2:L]!=posed[2:L-1])); tt = sort(unique(tt)); for(i in 1:n) { + state_name = cell_type_names[i]; tstate = state[,i]; t = c(tt, which(tstate[2:L]!=tstate[2:L-1])); t = sort(unique(t)); @@ -88,7 +91,7 @@ track_file_name_bed_unsorted <- get_track_file_name(base_track_file_name, i, "bed_unsorted"); track_file_name_bed <- get_track_file_name(base_track_file_name, i, "bed"); track_file_name_bigbed <- get_track_file_name(base_track_file_name, i, "bigbed"); - x = cbind(np[, 1:3], state_name[as.integer(np[,4])+1], 1000, ".", np[,2:3]); + x = cbind(np[, 1:3], state_name, 1000, ".", np[,2:3]); write.table(as.matrix(x), track_file_name_bed_unsorted, quote=F, row.names=F, col.names=F); cmd = paste("LC_COLLATE=C sort -k1,1 -k2,2n < ", track_file_name_bed_unsorted, " > ", track_file_name_bed); system(cmd); @@ -125,7 +128,7 @@ s_indexes = c(s_indexes, as.integer(item)); } # Split state_names into a list of strings. - name_str = as.character(state_colors); + name_str = as.character(state_names); items = strsplit(name_str, ","); item_index = 0; for (item in items) { @@ -145,8 +148,16 @@ } base_track_file_name <- paste(tracks_dir, hub_name, sep=""); cells = create_cells(input_dir_state, chrom_len_file, base_track_file_name, s_indexes, s_names, s_colors); + cell_info = cbind(cells, cells, cells, "#000000"); + cell_info = array(cell_info, dim=c(length(cells), 4)); + cell_info = as.matrix(cell_info); track_db = NULL; for (i in 1:length(cells)) { + ii = which(cells[i] == cell_info[,1]); + if(length(ii) == 0) { + next; + } + ii = ii[1]; # Get the color for the current state. color <- paste(c(col2rgb(s_colors[i])), collapse=","); # Get the bigDataUrl. @@ -155,8 +166,8 @@ track_db = c(track_db, paste("track ", hub_name, "_track_", i, sep="")); track_db = c(track_db, "type bigBed"); track_db = c(track_db, paste("bigDataUrl", big_data_url, sep=" ")); - track_db = c(track_db, paste("shortLabel", hub_short_label, sep=" ")); - track_db = c(track_db, paste("longLabel", hub_long_label, sep=" ")); + track_db = c(track_db, paste("shortLabel", cell_info[ii, 2], sep=" ")); + track_db = c(track_db, paste("longLabel", paste(hub_name, cell_info[ii, 3], sep=" "))); track_db = c(track_db, paste("priority", i)); track_db = c(track_db, "itemRgb on"); track_db = c(track_db, "maxItems 100000"); @@ -173,6 +184,27 @@ return(s) } +get_cell_type_names = function(state_file) { + # The first line of a state file is a comment + # that looks something like this: + # ID CHR POSst POSed E001 E002 PosClass + # The cell type names are the elemets whose + # 1-based indexes start at 5 and end with -1, + # which in the above case are E001 and E002. + fh = file(state_file,"r"); + line = readLines(fh, n=1); + close(fh); + # Split line into a list of strings. + items = strsplit(line, "\\s+")[[1]]; + # Extract the cell type names into a list. + last_cell_type_name_index = length(items) -1; + cell_type_names = c(); + for (i in 5:last_cell_type_name_index) { + cell_type_names = c(cell_type_names, items[i]); + } + return(cell_type_names); +} + get_track_file_name = function(base_track_file_name, index, ext) { track_file_name <- paste(base_track_file_name, index, ext, sep="."); return(track_file_name);