Mercurial > repos > mingchen0919 > elastic_r_tidyverse
changeset 0:a936b1aff8d3 draft
planemo upload
author | mingchen0919 |
---|---|
date | Tue, 27 Mar 2018 12:29:19 -0400 |
parents | |
children | 7a3cc84232a2 |
files | elastic_r_package.xml elastic_r_package_render.R elastic_r_package_wrappers_macros.xml elastic_tool.Rmd |
diffstat | 4 files changed, 407 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/elastic_r_package.xml Tue Mar 27 12:29:19 2018 -0400 @@ -0,0 +1,21 @@ +<tool id="tool_id" name="elastic R package" version="1.0.0"> + <macros> + <import>elastic_tool_wrappers_macros.xml</import> + </macros> + <description></description> + <requirements> + <expand macro="rmarkdown_requirements" /> + </requirements> + <expand macro="stdio" /> + <expand macro="shell_command" /> + <inputs> + <expand macro="function_name" /> + <expand macro="function_arguments" /> + </inputs> + <outputs> + <expand macro="output_set" /> + </outputs> + <citations> + <expand macro="citations" /> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/elastic_r_package_render.R Tue Mar 27 12:29:19 2018 -0400 @@ -0,0 +1,33 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file(Sys.getenv('TOOL_LOG')) +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(rmarkdown) +#------------------------------------------------ + +#------------------------------------------------ +options_and_arguments = read.table(paste0(Sys.getenv('REPORT_FILES_PATH'), '/options_and_arguments.txt'), + sep = '|', header = TRUE) + + +#-----------------render Rmd-------------- +render(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/elastic_tool.Rmd'), + output_file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/report.html')) + +# for some unknow reason, directly using REPORT as the input value for output_file parameter +# in the render function can cause empty report file when the tool runs in batch mode. +# the solution is to render the rmarkdown to a explicitly specified file and then copy the +# file to ${REPORT} +system(command = 'cp ${REPORT_FILES_PATH}/report.html ${REPORT}') +#------------------------------------------ + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/elastic_r_package_wrappers_macros.xml Tue Mar 27 12:29:19 2018 -0400 @@ -0,0 +1,206 @@ +<macros> + + <xml name="rmarkdown_requirements"> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.6">r-rmarkdown</requirement> + <requirement type="package" version="1.2.1">r-tidyverse</requirement> + </xml> + + <xml name="stdio"> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the tool log output file for more details."/> + </stdio> + </xml> + + <!--Tool name--> + <xml name="function_name"> + <param type="select" name="function_name" multiple="false" label="Tool name"> + </param> + </xml> + + <!--input values: string, float and integer--> + <xml name="function_arguments"> + <repeat name="regular_argument_value" title="INPUT VALUE: string/float/integer" min="0" default="0"> + <param type="text" name="argument" label="argument" /> + <param type="text" name="argument_value" label="argument value" /> + </repeat> + <repeat name="rdata_argument_value" title="INPUT VALUE: RData output" min="0" default="0"> + <param type="text" name="argument" label="argument" /> + <param type="text" name="argument_value" label="RData output" /> + </repeat> + <repeat name="function_argument_value" title="INPUT VALUE: function" min="0" default="0"> + <param type="text" name="argument" label="argument" /> + <export macro="function_name" /> + <export macro="nested_function_arguments" /> + </repeat> + <param type="select" name="connection" label="connection function"> + <option value="%>%" selected="false">pipe (%>%)</option> + <option value="+" selected="false">plus (+)</option> + </param> + </xml> + + <xml name="nested_function_arguments"> + <repeat name="regular_argument_value" title="INPUT VALUE: string/float/integer" min="0" default="0"> + <param type="text" name="argument" label="argument" /> + <param type="text" name="argument_value" label="argument value" /> + </repeat> + <repeat name="rdata_argument_value" title="INPUT VALUE: RData output" min="0" default="0"> + <param type="text" name="argument" label="argument" /> + <param type="text" name="argument_value" label="RData output" /> + </repeat> + </xml> + + <!--<!–OPTION/ARGUMENT PAIRS and INPUT types–>--> + <!--<!–input files–>--> + <!--<xml name="option_argument_single_dataset">--> + <!--<repeat name="option_argument_single_dataset_repeat" title="INPUT DATA FROM HISTORY: single data set" min="0" default="0">--> + <!--<param type="text" name="flag" optional="true" label="flag" />--> + <!--<param name="value" type="data" optional="false" multiple="false" label="value"/>--> + <!--</repeat>--> + <!--</xml>--> + + <!--<xml name="option_argument_multiple_datasets">--> + <!--<repeat name="option_argument_multiple_datasets_repeat" title="INPUT DATA FROM HISTORY: multiple data sets" min="0" default="0">--> + <!--<param type="text" name="flag" optional="true" label="flag" />--> + <!--<param name="value" type="data" optional="false" multiple="true" label="value"/>--> + <!--<param type="select" name="delimiter" multiple="false" label="file delimiter">--> + <!--<option value=" " selected="false">space</option>--> + <!--<option value="," selected="false">comma</option>--> + <!--</param>--> + <!--</repeat>--> + <!--</xml>--> + + <!--<!–input path–>--> + <!--<xml name="option_argument_path_relative_to_a_tool">--> + <!--<repeat name="option_argument_path_relative_to_a_tool_repeat" title="INPUT DATA PATH: relative to a tool output directory" min="0" default="0">--> + <!--<param type="text" name="flag" optional="true" label="flag" />--> + <!--<param name="a_tool_output_dir" type="data" optional="false" multiple="false" label="a tool output directory"/>--> + <!--<param type="text" name="value" optional="true" label="value" />--> + <!--</repeat>--> + <!--</xml>--> + <!--<xml name="option_argument_path_relative_to_this_tool">--> + <!--<repeat name="option_argument_path_relative_to_this_tool_repeat" title="OUTPUT PATH: relative to this tool output directory" min="0" default="0">--> + <!--<param type="text" name="flag" optional="true" label="flag" />--> + <!--<param type="text" name="value" optional="false" label="value" />--> + <!--<param type="select" name="path_type" multiple="false" label="path type">--> + <!--<option value="file_path" selected="false">file path</option>--> + <!--<option value="dir_path" selected="false">directory path</option>--> + <!--</param>--> + <!--</repeat>--> + <!--</xml>--> + + + + <!--output set--> + <xml name="output_set"> + <data format="html" name="report" label="${tool.name} report"/> + <data format="txt" name="tool_help_doc" label="${tool.name} help doc"/> + <data format="txt" name="tool_output_dir" label="${tool.name} output directory"/> + <data format="txt" name="tool_log" label="${tool.name} log" /> + </xml> + + + <!--shell command fragment--> + <xml name="shell_command"> + <command><![CDATA[ + + ############ first, create a directory to store all files + mkdir -p $report.files_path && + + + ############ save the tool installation directory to an environment variable + export TOOL_INSTALL_DIR='${__tool_directory__}' && + + + ######################### + ## + ## save user input option/argument pairs into a file + ## + ######################### + + ## first line will be the header + echo 'type|flag|value|path_type' > $report.files_path/options_and_arguments.txt && + + ############ function_name + ##-t '$function_name' + echo "function_name|not available|${function_name}|not available" >> $report.files_path/options_and_arguments.txt && + + #################################################### + ## loop through repeats to get option/argument pairs + #################################################### + + ############ option_argument_single_dataset + #for i in $option_argument_single_dataset_repeat: + #set $item = 'single_dataset|' + str($i.flag) + "|" + str($i.value) + "|" + echo '$item' >> $report.files_path/options_and_arguments.txt && + #end for + + ############ option_argument_multiple_datasets + #for i in $option_argument_multiple_datasets_repeat: + #set $item = 'multiple_datasets|' + str($i.flag) + "|" + str($i.value).replace(',', str($i.delimiter)) + "|" + echo '$item' >> $report.files_path/options_and_arguments.txt && + #end for + + + ############ option_argument_path_relative_to_a_tool + #for i in $option_argument_path_relative_to_a_tool_repeat: + #set $item = 'path_relative_to_a_tool|' + str($i.flag) + "|" + str($i.value) + "|" + str($i.a_tool_output_dir) + echo '$item' >> $report.files_path/options_and_arguments.txt && + #end for + + ############ option_argument_path_relative_to_this_tool + #for i in $option_argument_path_relative_to_this_tool_repeat: + #set $item = 'path_relative_to_this_tool|' + str($i.flag) + "|" + str($i.value) + "|" + str($i.path_type) + echo '$item' >> $report.files_path/options_and_arguments.txt && + #end for + + ############ option_argument_string_float_integer + #for i in $option_argument_string_float_integer_repeat: + #set $item = 'value|' + str($i.flag) + "|" + str($i.value) + "|" + echo '$item' >> $report.files_path/options_and_arguments.txt && + #end for + + ############ output_set + ## '$report' + ## '$report.files_path' + ## '$tool_help_doc' + ## '$tool_output_dir' + ## '$tool_log' + ####################### + export REPORT='$report' && + export REPORT_FILES_PATH='$report.files_path' && + export TOOL_HELP_DOC='$tool_help_doc' && + export TOOL_OUTPUT_DIR='$tool_output_dir' && + export TOOL_LOG='$tool_log' && + + + ############ run render R script to render R markdowns + Rscript '${__tool_directory__}/elastic_tool_render.R' + + ]]></command> + </xml> + + <!--tool citations--> + <xml name="citations"> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015elastic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/elastic_tool.Rmd Tue Mar 27 12:29:19 2018 -0400 @@ -0,0 +1,147 @@ +--- +title: 'Tool Report' +output: html_document +--- + +<style> +pre code, pre, code { + white-space: pre !important; + overflow-x: scroll !important; + word-break: keep-all !important; + word-wrap: initial !important; +} +</style> + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(error = TRUE) +``` + +## User input + +```{r, 'display user input'} +# get user input and save it into a data frame. +df = read.table(paste0(Sys.getenv('REPORT_FILES_PATH'), '/options_and_arguments.txt'), + sep = '|', header = TRUE) + +# if the input type is 'path_relative_to_a_tool', prepend A_TOOL_OUTPUT_PATH to the value to make +# the value a full path. +if (nrow(df[df$type == 'path_relative_to_a_tool', ]) > 0) { + for (i in 1:nrow(df[df$type == 'path_relative_to_a_tool', ])) { + root_path = readLines(df[df$type == 'path_relative_to_a_tool', ][i, 'path_type'])[1] + df[df$type == 'path_relative_to_a_tool', ][i, 'value'] = paste(root_path, + df[df$type == 'path_relative_to_a_tool', ][i, 'value'], + sep = '/') + } +} + +## display user input as a table +knitr::kable(df) +``` + + +```{r, 'build script', echo=FALSE} +##-------- build script files ----------- + +# get tool name, the first line of the script is always the tool name. +tool_name = df[df$type == 'tool_name', 'value'] + +# if the number of option/argument pairs is larger than 0, build script file +df2 = df[df$type != 'tool_name', ] +if (nrow(df2) > 0) { + # write tool name as the first line of the script.sh + # before running the job, cd into the ${REPORT_FILES_PATH} directory + write(paste0(tool_name, ' \\'), + paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh')) + df2 = df[df$type != 'tool_name', ] + write(paste(' ', df2$flag, df2$value, '\\', sep = ' '), + file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'), + append = TRUE ) + # remember that after writing option/argument lines to the script.sh, the last line + # has an extra newline character '\' which causes a problem. We can either remove that extra + # '\' or add a new line to the end. We choose to add a new line. + + # add an extra line to the end to redirect stdout to stdout.txt and stderr to stderr.txt + write(' > ${REPORT_FILES_PATH}/stdout.txt 2>${REPORT_FILES_PATH}/stderr.txt', + paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'), append = TRUE) +} else { + # if no option/argument input, simply display the help message + write(paste0(tool_name, ' -h'), + file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh')) +} +``` + + +```{r, 'create paths if they do not exist', echo=FALSE} +## if the input type is 'path_relative_to_this_tool', that means +## we need to create a directory or file path. + +# create paths before running the job script +df_paths = df[df$type == 'path_relative_to_this_tool', ] + +if (nrow(df_paths) > 0) { + for (i in 1:nrow(df_paths)) { + path = paste0(Sys.getenv('REPORT_FILES_PATH'), '/', df_paths[i, 'value']) + path_type = df_paths[i, 'path_type'] + + # create file paths + if ((path_type == 'file_path') & !file.exists(path)) { + dir_path = paste(head(strsplit(path, '/')[[1]], -1), collapse = '/' ) + if (!dir.exists(dir_path)) { + dir.create(dir_path, recursive = TRUE) + } + file.create(path) + } + + # create dir paths + if ((path_type == 'dir_path') & !dir.exists(path)) { + dir.create(path, recursive = TRUE) + } + } +} +``` + + +```{bash, 'run jobs', echo=FALSE} +# run job script, always use absolute path. +# we want to run all jobs within the working path. +sh ${REPORT_FILES_PATH}/script.sh +``` + + +```{bash, 'display script', results='asis', echo=FALSE} +echo '## Job script' +echo '' +echo '' +echo '```bash' +cat ${REPORT_FILES_PATH}/script.sh +echo '```' +``` + + +```{r, 'display output directory contents', results='asis', echo=FALSE} +## after the job is done, we list all files from the output directory. +## full relative path to the output directory needs to be displayed. + +cat('##All output files') +cat('\n\n') +all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'), + full.names = TRUE, + recursive = TRUE) + +for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) { + cat('* [', f, '](', f, ')\n') +} +cat('\n') +``` + + +```{r, 'save output directory of this tool', echo=FALSE} +## each elastic tool has a galaxy history output which contains the REPORT_FILES_PATH of this tool +## so that other tools can reference the outputs from this tool. + +## obtain REPORT_FILES_PAHT and save it to a galaxy output. +database_root = paste(head(strsplit(Sys.getenv('TOOL_LOG'), '/')[[1]], -1), collapse = '/') +tool_output_dir_id = tail(strsplit(Sys.getenv('REPORT_FILES_PATH'), '/')[[1]], 1) +tool_output_dir = paste0(database_root, '/', tool_output_dir_id) +write(tool_output_dir, Sys.getenv('TOOL_OUTPUT_DIR')) +```