Mercurial > repos > mingchen0919 > aurora_star
changeset 7:878d41e8fad9 draft
planemo upload commit 5b03cf1a31307c262701d4f267972bd01be24311-dirty
author | mingchen0919 |
---|---|
date | Thu, 26 Apr 2018 14:11:58 -0400 |
parents | 9e058e122d05 |
children | ba410e2de3a0 |
files | expose-outputs.sh helper.R run-star.sh shell-script-template.sh star.Rmd star_render.R |
diffstat | 6 files changed, 171 insertions(+), 109 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/expose-outputs.sh Thu Apr 26 14:11:58 2018 -0400 @@ -0,0 +1,6 @@ +# change directory to tool outputs directory +cd ${REPORT_FILES_PATH} + +# copy outputs from tool outputs directory to corresponding galaxy output path +cp star.html ${REPORT} +cp Aligned.out.sorted.bam ${X_S} \ No newline at end of file
--- a/helper.R Wed Apr 18 11:13:37 2018 -0400 +++ b/helper.R Thu Apr 26 14:11:58 2018 -0400 @@ -4,9 +4,14 @@ #' The first column are short flags, the second column are argument masks, the third column #' is data types. The fourth column are variable names used in the tool XML. These three columns are required. #' @param gtg_name the name of a running GTG. -getopt_specification_matrix = function(specification_file, gtg_name = 'gtg', tool_dir = Sys.getenv('TOOL_DIR')) { - df = read.csv(paste0(tool_dir, '/', specification_file), - header = TRUE, stringsAsFactors = FALSE) +getopt_specification_matrix = function(specification_file, + gtg_name = 'gtg', + tool_dir = Sys.getenv('TOOL_DIR')) { + df = read.csv( + paste0(tool_dir, '/', specification_file), + header = TRUE, + stringsAsFactors = FALSE + ) # check if there are duplicated short flags short_flags = df[, 1] if (length(unique(short_flags)) < length(short_flags)) { @@ -19,10 +24,12 @@ long_flags = paste0('X_', df[, 1]) # specification matrix - df2 = data.frame(long_flags = long_flags, - short_flags = df[, 1], - argument_mask = df[, 2], - data_type = df[, 3]) + df2 = data.frame( + long_flags = long_flags, + short_flags = df[, 1], + argument_mask = df[, 2], + data_type = df[, 3] + ) as.matrix(df2) } @@ -30,43 +37,46 @@ #' \code{file_tree} generate file tree of a directory in the format of HTML lists. -#' +#' #' @param dir the path to the directory for generating the file tree. #' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files. # define a recursive function to build html string of the file tree -file_tree = function(dir = '.'){ +file_tree = function(dir = '.') { # get the OUTPUT_DIR folder data: dataset_NUMBER_files report_files_path = Sys.getenv('REPORT_FILES_PATH') output_dir = tail(strsplit(report_files_path, '/')[[1]], 1) - files = list.files(path = dir, recursive = FALSE, full.names = TRUE) + files = list.files(path = dir, + recursive = FALSE, + full.names = TRUE) # files also include directorys, need to remove directorys files = files[!dir.exists(files)] - dirs = list.dirs(path = dir, recursive = FALSE, full.names = TRUE) - # hide vakata-jstree-3.3.5 folder - #jstree_index = grep(pattern = 'vakata-jstree-3.3.5', x = dirs) - #dirs = dirs[-jstree_index] - tags$ul( - { - if (length(files) > 0) { - lapply(files, function(x){ - path_end = tail(strsplit(x, '/')[[1]],1) - href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] - li_item = tags$li(tags$a(path_end, href=href_path)) - li_item$attribs = list('data-jstree'='{"icon":"jstree-file"}') + dirs = list.dirs(path = dir, + recursive = FALSE, + full.names = TRUE) + tags$ul({ + if (length(files) > 0) { + lapply(files, function(x) { + path_end = tail(strsplit(x, '/')[[1]], 1) + href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(tags$a(path_end, href = href_path)) + li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}') + li_item + }) + } + }, + { + if (length(dirs) > 0) { + lapply(dirs, function(x) { + path_end = tail(strsplit(x, '/')[[1]], 1) + # hide vakata-jstree-3.3.5 folder + if (path_end != 'vakata-jstree-3.3.5') { + # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(path_end, file_tree(x)) + li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}') li_item - }) - } - }, - { - if (length(dirs) > 0) { - lapply(dirs, function(x){ - x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] - li_item = tags$li(x_path, file_tree(x)) - li_item$attribs = list('data-jstree'='{"icon":"jstree-folder"}') - li_item - }) - } + } + }) } - ) + }) }
--- a/run-star.sh Wed Apr 18 11:13:37 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -cd ${X_d} - -#--------- index genome -------- -# create genome directory for genome indexes -mkdir -p ${X_d}/genomeDir - -cat >temp.sh <<EOF -STAR \\ - --runMode genomeGenerate \\ - --genomeDir ${X_d}/genomeDir \\ - --genomeFastaFiles $( echo ${X_A} | sed 's/,/ /g' ) \\ - --sjdbGTFfile ${X_B} \\ - --sjdbOverhang ${X_C} \\ - > /dev/null 2>&1 -EOF - -grep -v None temp.sh > index-genome.sh - -# run star -sh index-genome.sh - -#---- mapping --------- -cat >temp.sh <<EOF -STAR \\ - --genomeDir ${X_d}/genomeDir \\ - --readFilesIn \\ - ${X_F} \\ - ${X_R} \\ - > /dev/null 2>&1 -EOF - -grep -v None temp.sh > mapping.sh - -# run mapping -sh mapping.sh - -# remove temp.sh -rm temp.sh - -#----- SAM to sorted BAM ------ -echo "samtools sort -o Aligned.out.sorted.bam Aligned.out.sam" > sam2bam.sh -sh sam2bam.sh - -#----- evaluate mapping ------- -echo "samtools flagstat Aligned.out.sorted.bam > flagstat.txt" > flagstat.sh -sh flagstat.sh - - -#====== expose outputs to galaxy history ======= -cp Aligned.out.sorted.bam ${X_S} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shell-script-template.sh Thu Apr 26 14:11:58 2018 -0400 @@ -0,0 +1,47 @@ +# run SHELL_SCRIPT within tool outputs directory +cd ${REPORT_FILES_PATH} + +#--------- index genome -------- +# create genome directory for genome indexes +mkdir -p ${X_d}/genomeDir + +cat >temp.sh <<EOF +STAR \\ + --runMode genomeGenerate \\ + --genomeDir ${X_d}/genomeDir \\ + --genomeFastaFiles $( echo ${X_A} | sed 's/,/ /g' ) \\ + --sjdbGTFfile ${X_B} \\ + --sjdbOverhang ${X_C} \\ + > /dev/null 2>&1 +EOF + +grep -v None temp.sh > index-genome.sh + +# run star +sh index-genome.sh + +#---- mapping --------- +cat >temp.sh <<EOF +STAR \\ + --genomeDir ${X_d}/genomeDir \\ + --readFilesIn \\ + ${X_F} \\ + ${X_R} \\ + > /dev/null 2>&1 +EOF + +grep -v None temp.sh > mapping.sh + +# run mapping +sh mapping.sh + +# remove temp.sh +rm temp.sh + +#----- SAM to sorted BAM ------ +echo "samtools sort -o Aligned.out.sorted.bam Aligned.out.sam" > sam2bam.sh +sh sam2bam.sh + +#----- evaluate mapping ------- +echo "samtools flagstat Aligned.out.sorted.bam > flagstat.txt" > flagstat.sh +sh flagstat.sh
--- a/star.Rmd Wed Apr 18 11:13:37 2018 -0400 +++ b/star.Rmd Thu Apr 26 14:11:58 2018 -0400 @@ -5,6 +5,15 @@ highlight: pygments --- +```{css echo=FALSE} +pre code, pre, code { + white-space: pre !important; + overflow-x: scroll !important; + word-break: keep-all !important; + word-wrap: initial !important; +} +``` + ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_knit$set(progress = FALSE) knitr::opts_chunk$set(error = TRUE, echo = FALSE) @@ -41,21 +50,57 @@ </script> ----------------------------------------- -```{bash, echo=FALSE} -sh ${TOOL_INSTALL_DIR}/run-star.sh -``` ## Job scripts -* Index genome: [index-genome.sh](index-genome.sh) -* Mapping: [mapping.sh](mapping.sh) -* SAM to sorted BAM: [sam2bam.sh](sam2bam.sh) -* Evaluate mapping: [flagstat.sh](flagstat.sh) +```{bash, echo=FALSE} +sh ${TOOL_INSTALL_DIR}/shell-script-template.sh +``` + +### Index genome + +```{r echo=FALSE,results='asis'} +# display content of the job-script.sh file. +cat('```bash\n') +cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/index-genome.sh')), sep = '\n') +cat('\n```') +``` + +### Mapping + +```{r echo=FALSE,results='asis'} +# display content of the job-script.sh file. +cat('```bash\n') +cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/mapping.sh')), sep = '\n') +cat('\n```') +``` + +### SAM to sorted BAM -## Mapping evaluation +```{r echo=FALSE,warning=FALSE,results='asis'} +# display content of the job-script.sh file. +cat('```bash\n') +cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/sam2bam.sh')), sep = '\n') +cat('\n```') +``` + +### Mapping evaluation -```{r, echo=FALSE} -readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/flagstat.txt')) +```{r echo=FALSE,warning=FALSE,results='asis'} +# display content of the job-script.sh file. +cat('```bash\n') +cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/flagstat.sh')), sep = '\n') +cat('\n```') +``` + + +### Mapping evaluation results + +```{r echo=FALSE,warning=FALSE,results='asis'} +# display content of the job-script.sh file. +cat('```bash\n') +cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/flagstat.txt')), sep = '\n') +cat('\n```') ```
--- a/star_render.R Wed Apr 18 11:13:37 2018 -0400 +++ b/star_render.R Thu Apr 26 14:11:58 2018 -0400 @@ -5,42 +5,46 @@ sink(zz) sink(zz, type = 'message') -#-------------------preparation ----------------- +#============== preparation ==================================== options(stringsAsFactors = FALSE) # import libraries -library(getopt) -library(rmarkdown) -library(htmltools) +#------------------------------------------------------------------ +# ADD MORE LIBRARIES HERE IF YOUR TOOL DEPENDS ON OTHER R LIBRARIES +#------------------------------------------------------------------ +library('getopt') +library('rmarkdown') +library('htmltools') + + # load helper functions source(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/helper.R')) # import getopt specification matrix from a csv file opt = getopt(getopt_specification_matrix('getopt_specification.csv', tool_dir=Sys.getenv('TOOL_INSTALL_DIR'))) -# define a unix variable versions for all input values. this is useful when we +# define environment variables for all input values. this is useful when we # want to use input values by other programming language in r markdown do.call(Sys.setenv, opt[-1]) -#------------------------------------------------ +#=============================================================== -#-----------------render Rmd files -------------- +#======================== render Rmd files ========================= # NOTICE: # we should copy all rmarkdown files from tool install directory to REPORT_FILES_PATH directory. # and render rmarkdown files in the REPORT_FILES_PATH directory. file.copy(from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'), to = Sys.getenv('REPORT_FILES_PATH'), recursive = TRUE) system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}') -# render Rmd files in order + +#----------------BELOW IS WHERE YOU NEED TO CUSTOMIZE --------------------- render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/star.Rmd')) # add more lines below if there are more Rmd files to be rendered -#------------------------------------------ +#=============================================================== -#---------------- copy the output html to REPORT ---- -system(command = 'cp ${REPORT_FILES_PATH}/star.html ${REPORT}') -# add more lines below if there are more output html files - -#==============the end============== +#============== expose outputs to galaxy history =============== +system(command = 'sh ${TOOL_INSTALL_DIR}/expose-outputs.sh') +#=============================================================== ##--------end of code rendering .Rmd templates----------------