Mercurial > repos > mingchen0919 > aurora_htseq
changeset 6:7c54fd452b8e draft
add kableExtra package
author | mingchen0919 |
---|---|
date | Wed, 18 Apr 2018 15:59:38 -0400 |
parents | aacd5f53ac99 |
children | 63cde8d7f89c |
files | helper.R htseq_count.Rmd htseq_count.xml htseq_count_render.R run_htseq-count.sh |
diffstat | 5 files changed, 90 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/helper.R Wed Apr 18 15:59:38 2018 -0400 @@ -0,0 +1,72 @@ +#' \code{getopt_specification_matrix} returns a getopt specification matrix. +#' +#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data. +#' The first column are short flags, the second column are argument masks, the third column +#' is data types. The fourth column are variable names used in the tool XML. These three columns are required. +#' @param gtg_name the name of a running GTG. +getopt_specification_matrix = function(specification_file, gtg_name = 'gtg', tool_dir = Sys.getenv('TOOL_DIR')) { + df = read.csv(paste0(tool_dir, '/', specification_file), + header = TRUE, stringsAsFactors = FALSE) + # check if there are duplicated short flags + short_flags = df[, 1] + if (length(unique(short_flags)) < length(short_flags)) { + cat('----Duplicated short flags found ----\n') + cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n') + stop('Duplicated short flags are not allowed.') + } + + # use short flags to generate long flags + long_flags = paste0('X_', df[, 1]) + + # specification matrix + df2 = data.frame(long_flags = long_flags, + short_flags = df[, 1], + argument_mask = df[, 2], + data_type = df[, 3]) + + as.matrix(df2) +} + + + +#' \code{file_tree} generate file tree of a directory in the format of HTML lists. +#' +#' @param dir the path to the directory for generating the file tree. +#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files. +# define a recursive function to build html string of the file tree +file_tree = function(dir = '.'){ + # get the OUTPUT_DIR folder data: dataset_NUMBER_files + report_files_path = Sys.getenv('REPORT_FILES_PATH') + output_dir = tail(strsplit(report_files_path, '/')[[1]], 1) + + files = list.files(path = dir, recursive = FALSE, full.names = TRUE) + # files also include directorys, need to remove directorys + files = files[!dir.exists(files)] + dirs = list.dirs(path = dir, recursive = FALSE, full.names = TRUE) + # hide vakata-jstree-3.3.5 folder + #jstree_index = grep(pattern = 'vakata-jstree-3.3.5', x = dirs) + #dirs = dirs[-jstree_index] + tags$ul( + { + if (length(files) > 0) { + lapply(files, function(x){ + path_end = tail(strsplit(x, '/')[[1]],1) + href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(tags$a(path_end, href=href_path)) + li_item$attribs = list('data-jstree'='{"icon":"jstree-file"}') + li_item + }) + } + }, + { + if (length(dirs) > 0) { + lapply(dirs, function(x){ + x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(x_path, file_tree(x)) + li_item$attribs = list('data-jstree'='{"icon":"jstree-folder"}') + li_item + }) + } + } + ) +}
--- a/htseq_count.Rmd Wed Apr 18 13:17:28 2018 -0400 +++ b/htseq_count.Rmd Wed Apr 18 15:59:38 2018 -0400 @@ -42,29 +42,31 @@ ----------------------------------------- -```{bash, echo=FALSE} -sh ${TOOL_INSTALL_DIR}/run_htseq-count.sh -``` - ## Job script * htseq-count analysis: [htseq-count.sh](htseq-count.sh) +```{bash, echo=FALSE} +sh ${TOOL_INSTALL_DIR}/run_htseq-count.sh +``` + + ## Counts -```{r} +```{r, echo=TRUE} count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1) sample_names = trimws(strsplit(opt$X_B, ',')[[1]]) colnames(count_data) = rep(sample_names, length = ncol(count_data)) -DT::datatable(count_data, caption = "htseq counts for each sample") +kable(head(count_data)) ``` -```{r} +```{r, echo=FALSE} # save count data as an R object save(count_data, file = paste0(opt$X_d, '/counts.RData')) ``` -```{bash} +```{bash, echo=FALSE} +# expose counts.RData to galaxy history cp ${X_d}/counts.RData ${X_O} ```
--- a/htseq_count.xml Wed Apr 18 13:17:28 2018 -0400 +++ b/htseq_count.xml Wed Apr 18 15:59:38 2018 -0400 @@ -5,7 +5,7 @@ <requirement type="package" version="1.20.0">r-getopt</requirement> <requirement type="package" version="1.6">r-rmarkdown</requirement> <requirement type="package" version="0.9.1">htseq</requirement> - <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="0.8.0">r-kableextra</requirement> </requirements> <stdio> <regex match="XXX" source="stderr" level="warning" @@ -27,8 +27,6 @@ ############ create a hidden file to store r markdown rendering log echo '------ Rmd rendering log -------\n' > $report.files_path/.r_rendering.log.txt && - echo echo 'R markdown rendering started.' && - Rscript '${__tool_directory__}/htseq_count_render.R' -o $report @@ -45,9 +43,7 @@ -i $idattr -m $mode -c $count - -O $count_rdata && - - echo 'R markdown rendering finished.' + -O $count_rdata ]]></command> <inputs>
--- a/htseq_count_render.R Wed Apr 18 13:17:28 2018 -0400 +++ b/htseq_count_render.R Wed Apr 18 15:59:38 2018 -0400 @@ -11,6 +11,8 @@ library(getopt) library(rmarkdown) library(htmltools) +library(kableExtra) +library(magrittr) # load helper functions source(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/helper.R')) # import getopt specification matrix from a csv file
--- a/run_htseq-count.sh Wed Apr 18 13:17:28 2018 -0400 +++ b/run_htseq-count.sh Wed Apr 18 15:59:38 2018 -0400 @@ -10,9 +10,11 @@ -a ${X_a} \\ -t ${X_T} \\ -i ${X_i} \\ - -m ${X_m} > counts.txt + -m ${X_m} > htseq-counts-raw.txt - grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' counts.txt > ${X_c} + grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' htseq-counts-raw.txt > counts.txt + + cp counts.txt ${X_c} EOF sh htseq-count.sh \ No newline at end of file