Mercurial > repos > mingchen0919 > elastic_r_tidyverse

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/elastic_r_package.xml	Tue Mar 27 12:29:19 2018 -0400
@@ -0,0 +1,21 @@
+<tool id="tool_id" name="elastic R package" version="1.0.0">
+    <macros>
+        <import>elastic_tool_wrappers_macros.xml</import>
+    </macros>
+    <description></description>
+    <requirements>
+        <expand macro="rmarkdown_requirements" />
+    </requirements>
+    <expand macro="stdio" />
+    <expand macro="shell_command" />
+    <inputs>
+        <expand macro="function_name" />
+        <expand macro="function_arguments" />
+    </inputs>
+    <outputs>
+        <expand macro="output_set" />
+    </outputs>
+    <citations>
+        <expand macro="citations" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/elastic_r_package_render.R	Tue Mar 27 12:29:19 2018 -0400
@@ -0,0 +1,33 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file(Sys.getenv('TOOL_LOG'))
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(rmarkdown)
+#------------------------------------------------
+
+#------------------------------------------------
+options_and_arguments = read.table(paste0(Sys.getenv('REPORT_FILES_PATH'), '/options_and_arguments.txt'),
+                                   sep = '|', header = TRUE)
+
+
+#-----------------render Rmd--------------
+render(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/elastic_tool.Rmd'),
+       output_file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/report.html'))
+
+# for some unknow reason, directly using REPORT as the input value for output_file parameter
+# in the render function can cause empty report file when the tool runs in batch mode.
+# the solution is to render the rmarkdown to a explicitly specified file and then copy the
+# file to ${REPORT}
+system(command = 'cp ${REPORT_FILES_PATH}/report.html ${REPORT}')
+#------------------------------------------
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/elastic_r_package_wrappers_macros.xml	Tue Mar 27 12:29:19 2018 -0400
@@ -0,0 +1,206 @@
+<macros>
+
+    <xml name="rmarkdown_requirements">
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+        <requirement type="package" version="1.2.1">r-tidyverse</requirement>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <regex match="XXX" source="stderr" level="warning"
+                   description="Check the tool log output file for more details."/>
+        </stdio>
+    </xml>
+
+    <!--Tool name-->
+    <xml name="function_name">
+        <param type="select" name="function_name" multiple="false" label="Tool name">
+        </param>
+    </xml>
+
+    <!--input values: string, float and integer-->
+    <xml name="function_arguments">
+        <repeat name="regular_argument_value" title="INPUT VALUE: string/float/integer" min="0" default="0">
+            <param type="text" name="argument" label="argument"  />
+            <param type="text" name="argument_value"  label="argument value"  />
+        </repeat>
+        <repeat name="rdata_argument_value" title="INPUT VALUE: RData output" min="0" default="0">
+            <param type="text" name="argument" label="argument"  />
+            <param type="text" name="argument_value"  label="RData output"  />
+        </repeat>
+        <repeat name="function_argument_value" title="INPUT VALUE: function" min="0" default="0">
+            <param type="text" name="argument" label="argument"  />
+            <export macro="function_name" />
+            <export macro="nested_function_arguments" />
+        </repeat>
+        <param type="select" name="connection" label="connection function">
+            <option value="%>%" selected="false">pipe (%>%)</option>
+            <option value="+" selected="false">plus (+)</option>
+        </param>
+    </xml>
+
+    <xml name="nested_function_arguments">
+        <repeat name="regular_argument_value" title="INPUT VALUE: string/float/integer" min="0" default="0">
+            <param type="text" name="argument" label="argument"  />
+            <param type="text" name="argument_value"  label="argument value"  />
+        </repeat>
+        <repeat name="rdata_argument_value" title="INPUT VALUE: RData output" min="0" default="0">
+            <param type="text" name="argument" label="argument"  />
+            <param type="text" name="argument_value"  label="RData output"  />
+        </repeat>
+    </xml>
+
+    <!--&lt;!&ndash;OPTION/ARGUMENT PAIRS and INPUT types&ndash;&gt;-->
+    <!--&lt;!&ndash;input files&ndash;&gt;-->
+    <!--<xml name="option_argument_single_dataset">-->
+        <!--<repeat name="option_argument_single_dataset_repeat" title="INPUT DATA FROM HISTORY: single data set" min="0" default="0">-->
+            <!--<param type="text" name="flag" optional="true" label="flag" />-->
+            <!--<param name="value" type="data" optional="false" multiple="false" label="value"/>-->
+        <!--</repeat>-->
+    <!--</xml>-->
+
+    <!--<xml name="option_argument_multiple_datasets">-->
+        <!--<repeat name="option_argument_multiple_datasets_repeat" title="INPUT DATA FROM HISTORY: multiple data sets" min="0" default="0">-->
+            <!--<param type="text" name="flag" optional="true" label="flag" />-->
+            <!--<param name="value" type="data" optional="false" multiple="true" label="value"/>-->
+            <!--<param type="select" name="delimiter" multiple="false" label="file delimiter">-->
+                <!--<option value=" " selected="false">space</option>-->
+                <!--<option value="," selected="false">comma</option>-->
+            <!--</param>-->
+        <!--</repeat>-->
+    <!--</xml>-->
+
+    <!--&lt;!&ndash;input path&ndash;&gt;-->
+    <!--<xml name="option_argument_path_relative_to_a_tool">-->
+        <!--<repeat name="option_argument_path_relative_to_a_tool_repeat" title="INPUT DATA PATH: relative to a tool output directory" min="0" default="0">-->
+            <!--<param type="text" name="flag" optional="true" label="flag"  />-->
+            <!--<param name="a_tool_output_dir" type="data" optional="false" multiple="false" label="a tool output directory"/>-->
+            <!--<param type="text" name="value" optional="true" label="value"  />-->
+        <!--</repeat>-->
+    <!--</xml>-->
+    <!--<xml name="option_argument_path_relative_to_this_tool">-->
+        <!--<repeat name="option_argument_path_relative_to_this_tool_repeat" title="OUTPUT PATH: relative to this tool output directory" min="0" default="0">-->
+            <!--<param type="text" name="flag" optional="true" label="flag"  />-->
+            <!--<param type="text" name="value" optional="false" label="value"  />-->
+            <!--<param type="select" name="path_type" multiple="false" label="path type">-->
+                <!--<option value="file_path" selected="false">file path</option>-->
+                <!--<option value="dir_path" selected="false">directory path</option>-->
+            <!--</param>-->
+        <!--</repeat>-->
+    <!--</xml>-->
+
+
+
+    <!--output set-->
+    <xml name="output_set">
+        <data format="html" name="report" label="${tool.name} report"/>
+        <data format="txt" name="tool_help_doc" label="${tool.name} help doc"/>
+        <data format="txt" name="tool_output_dir" label="${tool.name} output directory"/>
+        <data format="txt" name="tool_log" label="${tool.name} log" />
+    </xml>
+
+
+    <!--shell command fragment-->
+    <xml name="shell_command">
+        <command><![CDATA[
+
+        ############ first, create a directory to store all files
+        mkdir -p $report.files_path &&
+
+
+        ############ save the tool installation directory to an environment variable
+        export TOOL_INSTALL_DIR='${__tool_directory__}' &&
+
+
+        #########################
+        ##
+        ## save user input option/argument pairs into a file
+        ##
+        #########################
+
+        ## first line will be the header
+        echo 'type|flag|value|path_type' > $report.files_path/options_and_arguments.txt &&
+
+        ############ function_name
+        ##-t '$function_name'
+        echo "function_name|not available|${function_name}|not available" >> $report.files_path/options_and_arguments.txt &&
+
+        ####################################################
+        ## loop through repeats to get option/argument pairs
+        ####################################################
+
+        ############ option_argument_single_dataset
+        #for i in $option_argument_single_dataset_repeat:
+            #set $item = 'single_dataset|' + str($i.flag) + "|" + str($i.value) + "|"
+            echo '$item' >> $report.files_path/options_and_arguments.txt &&
+        #end for
+
+        ############ option_argument_multiple_datasets
+        #for i in $option_argument_multiple_datasets_repeat:
+            #set $item = 'multiple_datasets|' + str($i.flag) + "|" + str($i.value).replace(',', str($i.delimiter)) + "|"
+            echo '$item' >> $report.files_path/options_and_arguments.txt &&
+        #end for
+
+
+        ############ option_argument_path_relative_to_a_tool
+        #for i in $option_argument_path_relative_to_a_tool_repeat:
+             #set $item = 'path_relative_to_a_tool|' + str($i.flag) + "|" + str($i.value) + "|" + str($i.a_tool_output_dir)
+             echo '$item' >> $report.files_path/options_and_arguments.txt &&
+        #end for
+
+        ############ option_argument_path_relative_to_this_tool
+        #for i in $option_argument_path_relative_to_this_tool_repeat:
+             #set $item = 'path_relative_to_this_tool|' + str($i.flag) + "|" + str($i.value) + "|" + str($i.path_type)
+             echo '$item' >> $report.files_path/options_and_arguments.txt &&
+        #end for
+
+        ############ option_argument_string_float_integer
+        #for i in $option_argument_string_float_integer_repeat:
+             #set $item = 'value|' + str($i.flag) + "|" + str($i.value) + "|"
+             echo '$item' >> $report.files_path/options_and_arguments.txt &&
+        #end for
+
+        ############ output_set
+        ## '$report'
+        ## '$report.files_path'
+        ## '$tool_help_doc'
+        ## '$tool_output_dir'
+        ## '$tool_log'
+        #######################
+        export REPORT='$report' &&
+        export REPORT_FILES_PATH='$report.files_path' &&
+        export TOOL_HELP_DOC='$tool_help_doc' &&
+        export TOOL_OUTPUT_DIR='$tool_output_dir' &&
+        export TOOL_LOG='$tool_log' &&
+
+
+        ############ run render R script to render R markdowns
+        Rscript '${__tool_directory__}/elastic_tool_render.R'
+
+        ]]></command>
+    </xml>
+
+    <!--tool citations-->
+    <xml name="citations">
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015elastic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/elastic_tool.Rmd	Tue Mar 27 12:29:19 2018 -0400
@@ -0,0 +1,147 @@
+---
+title: 'Tool Report'
+output: html_document
+---
+
+<style>
+pre code, pre, code {
+  white-space: pre !important;
+  overflow-x: scroll !important;
+  word-break: keep-all !important;
+  word-wrap: initial !important;
+}
+</style>
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(error = TRUE)
+```
+
+## User input
+
+```{r, 'display user input'}
+# get user input and save it into a data frame.
+df = read.table(paste0(Sys.getenv('REPORT_FILES_PATH'), '/options_and_arguments.txt'),
+                sep = '|', header = TRUE)
+
+# if the input type is 'path_relative_to_a_tool', prepend A_TOOL_OUTPUT_PATH to the value to make
+# the value a full path.
+if (nrow(df[df$type == 'path_relative_to_a_tool', ]) > 0) {
+  for (i in 1:nrow(df[df$type == 'path_relative_to_a_tool', ])) {
+    root_path = readLines(df[df$type == 'path_relative_to_a_tool', ][i, 'path_type'])[1]
+    df[df$type == 'path_relative_to_a_tool', ][i, 'value'] = paste(root_path,
+                                                                   df[df$type == 'path_relative_to_a_tool', ][i, 'value'],
+                                                                   sep = '/')
+  }
+}
+
+## display user input as a table
+knitr::kable(df)
+```
+
+
+```{r, 'build script', echo=FALSE}
+##-------- build script files -----------
+
+# get tool name, the first line of the script is always the tool name.
+tool_name = df[df$type == 'tool_name', 'value']
+
+# if the number of option/argument pairs is larger than 0, build script file
+df2 = df[df$type != 'tool_name', ]
+if (nrow(df2) > 0) {
+  # write tool name as the first line of the script.sh
+  # before running the job, cd into the ${REPORT_FILES_PATH} directory
+  write(paste0(tool_name, ' \\'),
+        paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'))
+  df2 = df[df$type != 'tool_name', ]
+  write(paste(' ', df2$flag, df2$value, '\\', sep = ' '),
+        file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'),
+        append = TRUE )
+  # remember that after writing option/argument lines to the script.sh, the last line
+  # has an extra newline character '\' which causes a problem. We can either remove that extra
+  # '\' or add a new line to the end. We choose to add a new line.
+
+  # add an extra line to the end to redirect stdout to stdout.txt and stderr to stderr.txt
+  write('  > ${REPORT_FILES_PATH}/stdout.txt 2>${REPORT_FILES_PATH}/stderr.txt',
+        paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'), append = TRUE)
+} else {
+  # if no option/argument input, simply display the help message
+  write(paste0(tool_name, ' -h'),
+        file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'))
+}
+```
+
+
+```{r, 'create paths if they do not exist', echo=FALSE}
+## if the input type is 'path_relative_to_this_tool', that means
+## we need to create a directory or file path.
+
+# create paths before running the job script
+df_paths = df[df$type == 'path_relative_to_this_tool', ]
+
+if (nrow(df_paths) > 0) {
+  for (i in 1:nrow(df_paths)) {
+    path = paste0(Sys.getenv('REPORT_FILES_PATH'), '/', df_paths[i, 'value'])
+    path_type = df_paths[i, 'path_type']
+
+    # create file paths
+    if ((path_type == 'file_path') & !file.exists(path)) {
+      dir_path = paste(head(strsplit(path, '/')[[1]], -1), collapse = '/' )
+      if (!dir.exists(dir_path)) {
+        dir.create(dir_path, recursive = TRUE)
+      }
+      file.create(path)
+    }
+
+    # create dir paths
+    if ((path_type == 'dir_path') & !dir.exists(path)) {
+      dir.create(path, recursive = TRUE)
+    }
+  }
+}
+```
+
+
+```{bash, 'run jobs', echo=FALSE}
+# run job script, always use absolute path.
+# we want to run all jobs within the working path.
+sh ${REPORT_FILES_PATH}/script.sh
+```
+
+
+```{bash, 'display script', results='asis', echo=FALSE}
+echo '## Job script'
+echo ''
+echo ''
+echo '```bash'
+cat ${REPORT_FILES_PATH}/script.sh
+echo '```'
+```
+
+
+```{r, 'display output directory contents', results='asis', echo=FALSE}
+## after the job is done, we list all files from the output directory.
+## full relative path to the output directory needs to be displayed.
+
+cat('##All output files')
+cat('\n\n')
+all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'),
+                       full.names = TRUE,
+                       recursive = TRUE)
+
+for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) {
+  cat('* [', f, '](', f, ')\n')
+}
+cat('\n')
+```
+
+
+```{r, 'save output directory of this tool', echo=FALSE}
+## each elastic tool has a galaxy history output which contains the REPORT_FILES_PATH of this tool
+## so that other tools can reference the outputs from this tool.
+
+## obtain REPORT_FILES_PAHT and save it to a galaxy output.
+database_root = paste(head(strsplit(Sys.getenv('TOOL_LOG'), '/')[[1]], -1), collapse = '/')
+tool_output_dir_id = tail(strsplit(Sys.getenv('REPORT_FILES_PATH'), '/')[[1]], 1)
+tool_output_dir = paste0(database_root, '/', tool_output_dir_id)
+write(tool_output_dir, Sys.getenv('TOOL_OUTPUT_DIR'))
+```