Mercurial > repos > mingchen0919 > elastic_r_tidyverse

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/elastic_r_package.Rmd	Wed Mar 28 23:34:03 2018 -0400
@@ -0,0 +1,110 @@
+---
+title: 'Tool Report'
+output: html_document
+---
+
+<style>
+pre code, pre, code {
+  white-space: pre !important;
+  overflow-x: scroll !important;
+  word-break: keep-all !important;
+  word-wrap: initial !important;
+}
+</style>
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(error = TRUE)
+```
+
+## User input
+
+```{r, echo=FALSE}
+knitr::kable(arguments)
+```
+
+
+```{r, echo=FALSE}
+# each tool execution runs one or multiple chained functions but generate only one single object.
+# we save this object to an rdata file and output this file to galaxy history so that it can be used by other tools
+# we can use this rdata output file's dataset id as the variable name of the saved object.
+job_script_path = paste0(Sys.getenv('REPORT_FILES_PATH'), '/job-script.R')
+tool_rdata_output = Sys.getenv('TOOL_RDATA_OUTPUT')
+dataset_id = tail(strsplit(tool_rdata_output, '/')[[1]], 1)
+dataset_num = gsub("(.+_)([0-9]+)\\.dat", "\\2", dataset_id)
+rdata_id = paste0('rdata_', dataset_num)
+
+## build script
+# the first line of the job script is 'rdata_NUM = ', where 'NUM' is the dataset number of the output rdata.
+write(paste0(rdata_id, ' = '), file = job_script_path)
+# loop through argument data frame to build up the job script.
+for (i in 1: (nrow(arguments)-1)) {
+  row_type = arguments[i, 'row_type']
+  switch (row_type,
+    # if it's a function row, the line has format 'function_name('
+    func = write(paste0(arguments[i, 'function_name'], '('),
+                 file = job_script_path,
+                 append = TRUE ),
+
+
+    argument = {
+      # if it's an argument row and the next row is not an operator row,
+      # the line has format '    argument_name=argument_value,'
+      if (arguments[i+1, 'operator'] == "") {
+        write(paste0('    ', arguments[i, 'argument_name'], '=', arguments[i, 'argument_value'], ','),
+              file = job_script_path,
+              append = TRUE )
+      } else {
+        # if it's an argument row and the next row IS an operator row,
+        # the line has format '    argument_name=argument_value'. note that there is not comma at the end.
+        write(paste0('    ', arguments[i, 'argument_name'], '=', arguments[i, 'argument_value']),
+              file = job_script_path,
+              append = TRUE )
+      }
+    },
+
+    # if it is an operator row, the line has format ')  operator'
+    operator =  write(paste0(')  ', arguments[i, 'operator']),
+                      file = job_script_path,
+                      append = TRUE )
+  )
+}
+
+# the last line is missing a ')'
+write(')', file = job_script_path, append = TRUE)
+```
+
+
+```{bash, 'display script', results='asis', echo=FALSE}
+echo '## Job script'
+echo ''
+echo ''
+echo '```r'
+cat ${REPORT_FILES_PATH}/job-script.R
+echo '```'
+```
+
+
+## Result
+
+```{r, 'run job script', echo=FALSE}
+source(job_script_path)
+# display result.
+eval(parse(text = rdata_id))
+```
+
+
+```{r, 'display output directory contents', results='asis', echo=FALSE}
+## after the job is done, we list all files from the output directory.
+## full relative path to the output directory needs to be displayed.
+
+cat('##All output files')
+cat('\n\n')
+all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'),
+                       full.names = TRUE,
+                       recursive = TRUE)
+
+for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) {
+  cat('* [', f, '](', f, ')\n')
+}
+cat('\n')
+```
\ No newline at end of file
--- a/elastic_r_package_render.R	Tue Mar 27 23:42:53 2018 -0400
+++ b/elastic_r_package_render.R	Wed Mar 28 23:34:03 2018 -0400
@@ -9,15 +9,16 @@
 options(stringsAsFactors = FALSE)

 library(rmarkdown)
+library(tidyverse)
 #------------------------------------------------

 #------------------------------------------------
-options_and_arguments = read.table(paste0(Sys.getenv('REPORT_FILES_PATH'), '/options_and_arguments.txt'),
+arguments = read.table(paste0(Sys.getenv('REPORT_FILES_PATH'), '/arguments.txt'),
                                    sep = '|', header = TRUE)


 #-----------------render Rmd--------------
-render(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/elastic_tool.Rmd'),
+render(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/elastic_r_package.Rmd'),
        output_file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/report.html'))

 # for some unknow reason, directly using REPORT as the input value for output_file parameter
--- a/elastic_r_package_wrappers_macros.xml	Tue Mar 27 23:42:53 2018 -0400
+++ b/elastic_r_package_wrappers_macros.xml	Wed Mar 28 23:34:03 2018 -0400
@@ -14,43 +14,49 @@
     </xml>

     <xml name="function_components">
-        <repeat name="function_component_repeat" title="Call a function" min="1" default="1">
+        <repeat name="function_components" title="Call a function" min="1" default="1">
             <param type="select" name="function_name" multiple="false" label="==== Call function ====">
                 <option value="ggplot" selected="false">ggplot</option>
+                <option value="geom_point" selected="false">geom_point</option>
                 <option value="aes" selected="false">ggplot</option>
                 <option value="read.table" selected="false">read.table</option>
                 <option value="write.csv" selected="false">write.csv</option>
             </param>
             <expand macro="function_arguments" />
-            <param type="select" name="operator" label="connect to next function">
-                <option value="%>%" selected="false">pipe (%>%)</option>
-                <option value="+" selected="false">plus (+)</option>
-            </param>
         </repeat>
     </xml>

     <xml name="function_arguments">
-        <repeat name="regular_argument_value" title="Argument value: from user input" min="0" default="0">
-            <param type="text" name="argument" label="argument"  />
-            <param type="text" name="argument_value"  label="argument value"  />
-            <param type="select" name="argument_type"  label="argument type">
-                <option value="string" selected="true">string</option>
-                <option value="numeric" selected="false">numeric</option>
-                <option value="boolean" selected="false">boolean</option>
-                <option value="variable" selected="false">variable</option>
-            </param>
+        <repeat name="function_arguments" title="argument name/value pairs" min="0" default="0">
+            <conditional name="argument_type">
+                <param name="argument_type_selector" type="select" label="Argument type">
+                    <option value="value" selected="false">user input value</option>
+                    <option value="rdata" selected="false">value from RData output</option>
+                </param>
+                <when value="value">
+                    <param type="text" name="argument_name" label="argument name"  />
+                    <param type="text" name="argument_value"  label="argument value"  />
+                </when>
+                <when value="rdata">
+                    <param type="text" name="argument_name" label="argument name"  />
+                    <param type="data" format="rdata" name="argument_value" optional="false" label="RData output"  />
+                </when>
+            </conditional>
         </repeat>
-        <repeat name="rdata_argument_value" title="Argument value: from RData" min="0" default="0">
-            <param type="text" name="argument" label="argument"  />
-            <param type="data" format="rdata" name="argument_value" optional="false" label="RData output"  />
-        </repeat>
+        <param type="select" name="operator" label="connect to next function">
+            <sanitizer>
+                <valid initial="string.printable"/>
+            </sanitizer>
+            <option value="%>%" selected="true">pipe (%>%)</option>
+            <option value="+" selected="false">plus (+)</option>
+        </param>
     </xml>


     <!--output set-->
     <xml name="output_set">
         <data format="html" name="report" label="${tool.name} report"/>
-        <data format="txt" name="tool_help_doc" label="${tool.name} help doc"/>
+        <data format="rdata" name="tool_rdata_output" label="${tool.name} RData output ${on_string}"/>
         <data format="txt" name="tool_log" label="${tool.name} log" />
     </xml>

@@ -70,18 +76,20 @@
         ############ 3. save user inputs into arguments.txt

         ############    3.1 table header
-        echo 'function|input_type|argument|argument_value|argument_value_type|operator' > $report.files_path/arguments.txt &&
+        echo 'row_type|function_name|argument_name|argument_value|argument_value_type|operator' > $report.files_path/arguments.txt &&


         ############    3.2 loop through function component repeat
-        #for $fun_component in $function_components:
-            #for reg_arg in $fun_component.regular_argument_value
-                echo '$fun_component.function_name|regular|$reg_arg.argument|$arg_arg.argument_value|$arg_arg.argument_value_type|$fun_component.operator' >> $report.files_path/arguments.txt &&
+        #for fun_component in $function_components:
+            echo 'func|$fun_component.function_name||||' >> $report.files_path/arguments.txt &&
+
+            Rscript -e 'library(tidyverse); help($fun_component.function_name)' >> $report.files_path/$fun_component.function_name-help.txt &&
+
+            #for argument in $fun_component.function_arguments:
+                echo 'argument||$argument.argument_type.argument_name|$argument.argument_type.argument_value|$argument.argument_type.argument_type_selector|' >> $report.files_path/arguments.txt &&
             #end for
-
-            #for rdata_arg in $fun_component.rdata_argument_value
-                echo '$fun_component.function_name|rdata|$rdata_arg.argument|$arg_arg.argument_value|rdata|$fun_component.operator' >> $report.files_path/arguments.txt &&
-            #end for
+            echo 'operator|||||$fun_component.operator' >> $report.files_path/arguments.txt &&
+        #end for

         ############ output_set
         ## '$report'
@@ -91,12 +99,12 @@
         #######################
         export REPORT='$report' &&
         export REPORT_FILES_PATH='$report.files_path' &&
-        export TOOL_HELP_DOC='$tool_help_doc' &&
+        export TOOL_RDATA_OUTPUT='$tool_rdata_output' &&
         export TOOL_LOG='$tool_log' &&


         ############ run render R script to render R markdowns
-        Rscript '${__tool_directory__}/elastic_tool_render.R'
+        Rscript '${__tool_directory__}/elastic_r_package_render.R'

         ]]></command>
     </xml>
--- a/elastic_tool.Rmd	Tue Mar 27 23:42:53 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
----
-title: 'Tool Report'
-output: html_document
----
-
-<style>
-pre code, pre, code {
-  white-space: pre !important;
-  overflow-x: scroll !important;
-  word-break: keep-all !important;
-  word-wrap: initial !important;
-}
-</style>
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(error = TRUE)
-```
-
-## User input
-
-```{r, 'display user input'}
-# get user input and save it into a data frame.
-df = read.table(paste0(Sys.getenv('REPORT_FILES_PATH'), '/options_and_arguments.txt'),
-                sep = '|', header = TRUE)
-
-# if the input type is 'path_relative_to_a_tool', prepend A_TOOL_OUTPUT_PATH to the value to make
-# the value a full path.
-if (nrow(df[df$type == 'path_relative_to_a_tool', ]) > 0) {
-  for (i in 1:nrow(df[df$type == 'path_relative_to_a_tool', ])) {
-    root_path = readLines(df[df$type == 'path_relative_to_a_tool', ][i, 'path_type'])[1]
-    df[df$type == 'path_relative_to_a_tool', ][i, 'value'] = paste(root_path,
-                                                                   df[df$type == 'path_relative_to_a_tool', ][i, 'value'],
-                                                                   sep = '/')
-  }
-}
-
-## display user input as a table
-knitr::kable(df)
-```
-
-
-```{r, 'build script', echo=FALSE}
-##-------- build script files -----------
-
-# get tool name, the first line of the script is always the tool name.
-tool_name = df[df$type == 'tool_name', 'value']
-
-# if the number of option/argument pairs is larger than 0, build script file
-df2 = df[df$type != 'tool_name', ]
-if (nrow(df2) > 0) {
-  # write tool name as the first line of the script.sh
-  # before running the job, cd into the ${REPORT_FILES_PATH} directory
-  write(paste0(tool_name, ' \\'),
-        paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'))
-  df2 = df[df$type != 'tool_name', ]
-  write(paste(' ', df2$flag, df2$value, '\\', sep = ' '),
-        file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'),
-        append = TRUE )
-  # remember that after writing option/argument lines to the script.sh, the last line
-  # has an extra newline character '\' which causes a problem. We can either remove that extra
-  # '\' or add a new line to the end. We choose to add a new line.
-
-  # add an extra line to the end to redirect stdout to stdout.txt and stderr to stderr.txt
-  write('  > ${REPORT_FILES_PATH}/stdout.txt 2>${REPORT_FILES_PATH}/stderr.txt',
-        paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'), append = TRUE)
-} else {
-  # if no option/argument input, simply display the help message
-  write(paste0(tool_name, ' -h'),
-        file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/script.sh'))
-}
-```
-
-
-```{r, 'create paths if they do not exist', echo=FALSE}
-## if the input type is 'path_relative_to_this_tool', that means
-## we need to create a directory or file path.
-
-# create paths before running the job script
-df_paths = df[df$type == 'path_relative_to_this_tool', ]
-
-if (nrow(df_paths) > 0) {
-  for (i in 1:nrow(df_paths)) {
-    path = paste0(Sys.getenv('REPORT_FILES_PATH'), '/', df_paths[i, 'value'])
-    path_type = df_paths[i, 'path_type']
-
-    # create file paths
-    if ((path_type == 'file_path') & !file.exists(path)) {
-      dir_path = paste(head(strsplit(path, '/')[[1]], -1), collapse = '/' )
-      if (!dir.exists(dir_path)) {
-        dir.create(dir_path, recursive = TRUE)
-      }
-      file.create(path)
-    }
-
-    # create dir paths
-    if ((path_type == 'dir_path') & !dir.exists(path)) {
-      dir.create(path, recursive = TRUE)
-    }
-  }
-}
-```
-
-
-```{bash, 'run jobs', echo=FALSE}
-# run job script, always use absolute path.
-# we want to run all jobs within the working path.
-sh ${REPORT_FILES_PATH}/script.sh
-```
-
-
-```{bash, 'display script', results='asis', echo=FALSE}
-echo '## Job script'
-echo ''
-echo ''
-echo '```bash'
-cat ${REPORT_FILES_PATH}/script.sh
-echo '```'
-```
-
-
-```{r, 'display output directory contents', results='asis', echo=FALSE}
-## after the job is done, we list all files from the output directory.
-## full relative path to the output directory needs to be displayed.
-
-cat('##All output files')
-cat('\n\n')
-all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'),
-                       full.names = TRUE,
-                       recursive = TRUE)
-
-for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) {
-  cat('* [', f, '](', f, ')\n')
-}
-cat('\n')
-```
-
-
-```{r, 'save output directory of this tool', echo=FALSE}
-## each elastic tool has a galaxy history output which contains the REPORT_FILES_PATH of this tool
-## so that other tools can reference the outputs from this tool.
-
-## obtain REPORT_FILES_PAHT and save it to a galaxy output.
-database_root = paste(head(strsplit(Sys.getenv('TOOL_LOG'), '/')[[1]], -1), collapse = '/')
-tool_output_dir_id = tail(strsplit(Sys.getenv('REPORT_FILES_PATH'), '/')[[1]], 1)
-tool_output_dir = paste0(database_root, '/', tool_output_dir_id)
-write(tool_output_dir, Sys.getenv('TOOL_OUTPUT_DIR'))
-```