| 
0
 | 
     1 #!/usr/bin/env Rscript
 | 
| 
 | 
     2 
 | 
| 
5
 | 
     3 suppressPackageStartupMessages(library("data.table"))
 | 
| 
0
 | 
     4 suppressPackageStartupMessages(library("optparse"))
 | 
| 
47
 | 
     5 suppressPackageStartupMessages(library("viridisLite"))
 | 
| 
0
 | 
     6 
 | 
| 
 | 
     7 option_list <- list(
 | 
| 
54
 | 
     8         make_option(c("--build"), action="store", dest="build", help="Genome build"),
 | 
| 
 | 
     9         make_option(c("--chrom_len_file"), action="store", dest="chrom_len_file", help="Chromosome length file"),
 | 
| 
 | 
    10         make_option(c("--email"),  action="store", dest="email", help="User email address"),
 | 
| 
 | 
    11         make_option(c("--galaxy_url"),  action="store", dest="galaxy_url", help="Galaxy instance base URL"),
 | 
| 
 | 
    12         make_option(c("--hub_name"),  action="store", dest="hub_name", default=NULL, help="Hub name without spaces"),
 | 
| 
 | 
    13         make_option(c("--input_dir_state"), action="store", dest="input_dir_state", help="Directory containing .state outputs from IDEAS"),
 | 
| 
 | 
    14         make_option(c("--long_label"), action="store", dest="long_label", help="Hub long label"),
 | 
| 
 | 
    15         make_option(c("--output_trackhub"),  action="store", dest="output_trackhub", help="Output hub file"),
 | 
| 
 | 
    16         make_option(c("--output_trackhub_files_path"),  action="store", dest="output_trackhub_files_path", help="Output hub extra files path"),
 | 
| 
 | 
    17         make_option(c("--output_trackhub_id"),  action="store", dest="output_trackhub_id", help="Encoded output_trackhub dataset id"),
 | 
| 
 | 
    18         make_option(c("--short_label"), action="store", dest="short_label", help="Hub short label")
 | 
| 
0
 | 
    19 )
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
 | 
| 
 | 
    22 args <- parse_args(parser, positional_arguments=TRUE)
 | 
| 
 | 
    23 opt <- args$options
 | 
| 
 | 
    24 
 | 
| 
39
 | 
    25 create_primary_html = function(output_trackhub, tracks_dir, build) {
 | 
| 
54
 | 
    26     track_files <- list.files(path=tracks_dir);
 | 
| 
 | 
    27     s <- paste('<html><head></head><body>', sep="\n");
 | 
| 
 | 
    28     s <- paste(s, '<h3>Contents of directory ~/myHub/', build, ' required by UCSC TrackHub</h3>\n', sep="");
 | 
| 
 | 
    29     s <- paste(s, '<ul>\n', sep="")
 | 
| 
 | 
    30     for (i in 1:length(track_files)) {
 | 
| 
 | 
    31         s <- paste(s, '<li><a href="', 'myHub/', build, "/", track_files[i], '">', track_files[i], '</a></li>\n', sep="");
 | 
| 
 | 
    32     }
 | 
| 
 | 
    33     s <- paste(s, '</ul>\n</body>\n</html>', sep="");
 | 
| 
 | 
    34     cat(s, file=output_trackhub);
 | 
| 
20
 | 
    35 }
 | 
| 
 | 
    36 
 | 
| 
51
 | 
    37 create_track = function(input_dir_state, chrom_len_file, base_track_file_name) {
 | 
| 
54
 | 
    38     # Create everythin needed, including the bigbed file,
 | 
| 
 | 
    39     # to render the tracks within the UCSC track hub.
 | 
| 
 | 
    40     state_files <- list.files(path=input_dir_state, full.names=TRUE);
 | 
| 
 | 
    41     genome_size = read.table(chrom_len_file);
 | 
| 
 | 
    42     g = NULL;
 | 
| 
 | 
    43     for(i in state_files) {
 | 
| 
 | 
    44         tg = as.matrix(fread(i));
 | 
| 
 | 
    45         t = NULL;
 | 
| 
 | 
    46         for(j in 1:dim(genome_size)[1]) {
 | 
| 
 | 
    47             t = c(t, which(tg[,2]==as.character(genome_size[j, 1]) & as.numeric(tg[,4]) > as.numeric(genome_size[j, 2])));
 | 
| 
 | 
    48         }
 | 
| 
 | 
    49         if (length(t) > 0) {
 | 
| 
 | 
    50             tg = tg[-t,];
 | 
| 
 | 
    51         }
 | 
| 
 | 
    52         t = which(is.na(match(tg[,2], genome_size[,1]))==T);
 | 
| 
 | 
    53         if (length(t)>0) {
 | 
| 
 | 
    54             tg = tg[-t,];
 | 
| 
 | 
    55         }
 | 
| 
 | 
    56         g = rbind(g, tg);
 | 
| 
 | 
    57     }
 | 
| 
 | 
    58     uchr = sort(unique(as.character(g[,2])));
 | 
| 
 | 
    59     g1 = NULL;
 | 
| 
 | 
    60     for(i in uchr) {
 | 
| 
 | 
    61         t = which(g[,2]==i);
 | 
| 
 | 
    62         g1 = rbind(g1, g[t[order(as.integer(g[t, 3]))],]);
 | 
| 
 | 
    63     }
 | 
| 
 | 
    64     g = NULL;
 | 
| 
 | 
    65     chr = as.character(g1[,2]);
 | 
| 
 | 
    66     posst = as.numeric(g1[,3]);
 | 
| 
 | 
    67     posed = as.numeric(g1[,4]);
 | 
| 
 | 
    68     state = as.matrix(g1[,5:(dim(g1)[2]-1)]);
 | 
| 
 | 
    69     state_name = 0:max(state);
 | 
| 
 | 
    70     L = dim(g1)[1];
 | 
| 
 | 
    71     n = dim(state)[2];
 | 
| 
 | 
    72     cells = as.character(colnames(g1)[5:(dim(g1)[2]-1)]);
 | 
| 
 | 
    73     options(scipen=999);
 | 
| 
 | 
    74     tt = which(chr[2:L]!=chr[2:L-1]);
 | 
| 
 | 
    75     tt = c(tt, which(posst[2:L]!=posed[2:L-1]));
 | 
| 
 | 
    76     tt = sort(unique(tt));
 | 
| 
 | 
    77     for(i in 1:n) {
 | 
| 
 | 
    78         tstate = state[,i];
 | 
| 
 | 
    79         t = c(tt, which(tstate[2:L]!=tstate[2:L-1]));
 | 
| 
 | 
    80         t = sort(unique(t));
 | 
| 
 | 
    81         t0 = c(0, t) + 1;
 | 
| 
 | 
    82         t = c(t, L);
 | 
| 
 | 
    83         np = cbind(chr[t], posst[t0], posed[t], tstate[t]);
 | 
| 
57
 | 
    84         track_file_name_bed_unsorted <- get_track_file_name(base_track_file_name, i, "bed_unsorted");
 | 
| 
58
 | 
    85         track_file_name_bed <- get_track_file_name(base_track_file_name, i, "bed");
 | 
| 
56
 | 
    86         track_file_name_bigbed <- get_track_file_name(base_track_file_name, i, "bigbed");
 | 
| 
54
 | 
    87         x = cbind(np[, 1:3], state_name[as.integer(np[,4])+1], 1000, ".", np[,2:3]);
 | 
| 
57
 | 
    88         write.table(as.matrix(x), track_file_name_bed_unsorted, quote=F, row.names=F, col.names=F);
 | 
| 
59
 | 
    89         cmd = paste("LC_COLLATE=C sort -k1,1 -k2,2n < ", track_file_name_bed_unsorted, " > ", track_file_name_bed);
 | 
| 
 | 
    90         system(cmd);
 | 
| 
 | 
    91         cmd = paste("bedToBigBed ", track_file_name_bed, chrom_len_file, " ", track_file_name_bigbed);
 | 
| 
 | 
    92         system(cmd);
 | 
| 
58
 | 
    93         system(paste("rm ", track_file_name_bed_unsorted));
 | 
| 
 | 
    94         system(paste("rm ", track_file_name_bed));
 | 
| 
54
 | 
    95     }
 | 
| 
 | 
    96     return(cells);
 | 
| 
0
 | 
    97 }
 | 
| 
 | 
    98 
 | 
| 
51
 | 
    99 create_track_db = function(galaxy_url, encoded_dataset_id, input_dir_state, build, chrom_len_file, tracks_dir, hub_name, short_label, long_label) {
 | 
| 
54
 | 
   100     # Create a trackDb.txt file that includes each state.
 | 
| 
 | 
   101     base_track_file_name <- paste(tracks_dir, hub_name, sep="");
 | 
| 
 | 
   102     cells = create_track(input_dir_state, chrom_len_file, base_track_file_name);
 | 
| 
 | 
   103     # Create a a character vector of 1024 viridis color hex codes.
 | 
| 
 | 
   104     viridis_vector <- viridis(1024, alpha=1, begin=0, end=1, direction=1, option="D");
 | 
| 
 | 
   105     colors_used <- vector();
 | 
| 
 | 
   106     track_db = NULL;
 | 
| 
 | 
   107     for (i in 1:length(cells)) {
 | 
| 
 | 
   108         # Generate a random number between 1 and 1024 as
 | 
| 
 | 
   109         # the viridis_vector index for the next state color.
 | 
| 
 | 
   110         color_index <- sample(1:1024, 1)
 | 
| 
 | 
   111         # Make sure the color was not previously chosen.
 | 
| 
 | 
   112         while(is.element(color_index, colors_used)) {
 | 
| 
 | 
   113             color_index <- sample(1:1024, 1)
 | 
| 
 | 
   114         }
 | 
| 
 | 
   115         # Append the color to our list of chosen colors.
 | 
| 
 | 
   116         append(colors_used, color_index)
 | 
| 
 | 
   117         # Get the hex code from viridis_vector.
 | 
| 
 | 
   118         viridis_color_hex_code <- viridis_vector[color_index]
 | 
| 
 | 
   119         big_data_url <- get_big_data_url(galaxy_url, encoded_dataset_id, tracks_dir, i, build);
 | 
| 
 | 
   120         # trackDb.txt track entry.
 | 
| 
 | 
   121         track_db = c(track_db, paste("track ", hub_name, "_track_", i, sep=""));
 | 
| 
 | 
   122         track_db = c(track_db, "type bigBed");
 | 
| 
 | 
   123         track_db = c(track_db, paste("bigDataUrl", big_data_url, sep=" "));
 | 
| 
 | 
   124         track_db = c(track_db, paste("shortLabel", short_label, sep=" "));
 | 
| 
 | 
   125         track_db = c(track_db, paste("longLabel", long_label, sep=" "));
 | 
| 
 | 
   126         track_db = c(track_db, paste("priority", i));
 | 
| 
 | 
   127         track_db = c(track_db, "itemRgb on");
 | 
| 
 | 
   128         track_db = c(track_db, "maxItems 100000");
 | 
| 
 | 
   129         track_db = c(track_db, paste("color", paste(c(col2rgb(viridis_color_hex_code)), collapse=","), sep=" "));
 | 
| 
 | 
   130         track_db = c(track_db, "visibility dense");
 | 
| 
 | 
   131         track_db = c(track_db, "");
 | 
| 
 | 
   132     }
 | 
| 
 | 
   133     return(track_db);
 | 
| 
0
 | 
   134 }
 | 
| 
 | 
   135 
 | 
| 
39
 | 
   136 get_big_data_url = function(galaxy_url, encoded_dataset_id, tracks_dir, index, build) {
 | 
| 
56
 | 
   137     track_files <- list.files(path=tracks_dir, pattern="\\.bigbed");
 | 
| 
54
 | 
   138     s <- paste(galaxy_url, 'datasets/', encoded_dataset_id, '/display/myHub/', build, '/', track_files[index], sep="");
 | 
| 
 | 
   139     return(s)
 | 
| 
50
 | 
   140 }
 | 
| 
 | 
   141 
 | 
| 
39
 | 
   142 get_track_file_name = function(base_track_file_name, index, ext) {
 | 
| 
54
 | 
   143     track_file_name <- paste(base_track_file_name, index, ext, sep=".");
 | 
| 
 | 
   144     return(track_file_name);
 | 
| 
39
 | 
   145 }
 | 
| 
 | 
   146 
 | 
| 
20
 | 
   147 # Create the hub.txt output.
 | 
| 
53
 | 
   148 hub_name_line <- paste("hub ", opt$hub_name, sep="");
 | 
| 
39
 | 
   149 short_label_line <- paste("shortLabel ", opt$short_label, sep="");
 | 
| 
 | 
   150 long_label_line <- paste("longLabel ", opt$long_label, sep="");
 | 
| 
35
 | 
   151 genomes_txt_line <- paste("genomesFile genomes.txt", sep="");
 | 
| 
39
 | 
   152 email_line <- paste("email ", opt$email, sep="");
 | 
| 
 | 
   153 contents <- paste(hub_name_line, short_label_line, long_label_line, genomes_txt_line, email_line, sep="\n");
 | 
| 
35
 | 
   154 hub_dir <- paste(opt$output_trackhub_files_path, "/", "myHub", "/", sep="");
 | 
| 
22
 | 
   155 dir.create(hub_dir, showWarnings=FALSE);
 | 
| 
35
 | 
   156 hub_file_path <- paste(hub_dir, "hub.txt", sep="");
 | 
| 
20
 | 
   157 write.table(contents, file=hub_file_path, quote=F, row.names=F, col.names=F);
 | 
| 
 | 
   158 
 | 
| 
 | 
   159 # Create the genomes.txt output.
 | 
| 
39
 | 
   160 genome_line <- paste("genome ", opt$build, sep="");
 | 
| 
35
 | 
   161 track_db_line <- paste("trackDb ", opt$build, "/", "trackDb.txt", sep="");
 | 
| 
39
 | 
   162 contents <- paste(genome_line, track_db_line, sep="\n");
 | 
| 
30
 | 
   163 genomes_file_path <- paste(hub_dir, "genomes.txt", sep="");
 | 
| 
20
 | 
   164 write.table(contents, file=genomes_file_path, quote=F, row.names=F, col.names=F);
 | 
| 
 | 
   165 
 | 
| 
0
 | 
   166 # Create the tracks.
 | 
| 
36
 | 
   167 tracks_dir <- paste(hub_dir, opt$build, "/", sep="");
 | 
| 
11
 | 
   168 dir.create(tracks_dir, showWarnings=FALSE);
 | 
| 
51
 | 
   169 track_db <- create_track_db(opt$galaxy_url, opt$output_trackhub_id, opt$input_dir_state, opt$build, opt$chrom_len_file, tracks_dir, opt$hub_name, opt$short_label, opt$long_label);
 | 
| 
0
 | 
   170 
 | 
| 
20
 | 
   171 # Create the trackDb.txt output.
 | 
| 
35
 | 
   172 track_db_file_path <- paste(tracks_dir, "trackDb.txt", sep="");
 | 
| 
13
 | 
   173 write.table(track_db, file=track_db_file_path, quote=F, row.names=F, col.names=F);
 | 
| 
11
 | 
   174 
 | 
| 
20
 | 
   175 # Create the primary HTML dataset.
 | 
| 
54
 | 
   176 create_primary_html(opt$output_trackhub, tracks_dir, opt$build);
 |