# HG changeset patch # User mingchen0919 # Date 1521385636 14400 # Node ID d9601e1eb360c4519c9c24225373aef5f07660d1 # Parent e969699f49b6476718127f9a3954f0dbdc6c04f5 v1.1.0 diff -r e969699f49b6 -r d9601e1eb360 getopt_specification.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getopt_specification.csv Sun Mar 18 11:07:16 2018 -0400 @@ -0,0 +1,28 @@ +short flag,argument mask,data type,variable name,galaxy input type,description +o,1,character,report,data, +d,1,character,report.files_path,, +s,1,character,sink_message,data, +A,1,character,first_reads,data, +B,1,character,second_reads,data, +x,1,character,adapter_first_reads,data, +y,1,character,adapter_second_reads,data, +m,1,character,trimming_mode,select, +r,1,character,maximum_allowed_error_rate,float, +D,1,character,maximum_allowed_indel_error_rate,float, +q,1,character,quality_trimming_3_end,integer, +Q,1,character,mean_quality,integer, +l,1,character,minimum_read_length,integer, +j,1,character,advanced_options.junction_adapter,data, +M,1,character,advanced_options.tab_adapter,data, +b,1,character,advanced_options.barcode,boolean, +c,1,character,advanced_options.cut,text, +n,1,character,advanced_options.filter_degenerative_reads,boolean, +u,1,character,advanced_options.filter_undetermined_mate_pair_reads,boolean, +f,1,character,advanced_options.format,select, +z,1,character,advanced_options.compress,boolean, +E,1,character,advanced_options.qiime,boolean, +F,1,character,advanced_options.quiet,boolean, +i,1,character,advanced_options.intelligent,boolean, +1,1,character,trimmed_r1, +2,1,character,trimmed_r2, +3,1,character,trimmed_s, \ No newline at end of file diff -r e969699f49b6 -r d9601e1eb360 helper.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/helper.R Sun Mar 18 11:07:16 2018 -0400 @@ -0,0 +1,28 @@ +#' \code{getopt_specification_matrix} returns a getopt specification matrix. +#' +#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data. +#' The first column are short flags, the second column are argument masks, the third column +#' is data types. The fourth column are variable names used in the tool XML. These three columns are required. +#' @param gtg_name the name of a running GTG. +getopt_specification_matrix = function(specification_file, gtg_name = 'gtg', tool_dir = Sys.getenv('TOOL_DIRECTORY')) { + df = read.csv(paste0(tool_dir, specification_file), + header = TRUE, stringsAsFactors = FALSE) + # check if there are duplicated short flags + short_flags = df[, 1] + if (length(unique(short_flags)) < length(short_flags)) { + cat('----Duplicated short flags found ----\n') + cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n') + stop('Duplicated short flags are not allowed.') + } + + # use short flags to generate long flags + long_flags = paste0('X_', df[, 1]) + + # specification matrix + df2 = data.frame(long_flags = long_flags, + short_flags = df[, 1], + argument_mask = df[, 2], + data_type = df[, 3]) + + as.matrix(df2) +} \ No newline at end of file diff -r e969699f49b6 -r d9601e1eb360 skewer.Rmd --- a/skewer.Rmd Fri Mar 02 09:59:25 2018 -0500 +++ b/skewer.Rmd Sun Mar 18 11:07:16 2018 -0400 @@ -1,11 +1,15 @@ --- -title: 'Skewer Trimming' -output: html_document +title: 'Skewer report' +output: + html_document: + number_sections: true + highlight: tango + code_folding: hide --- ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = as.logical(opt$X_e), + echo = TRUE, error = TRUE ) ``` @@ -13,44 +17,90 @@ # Run Skewer -```{bash} +```{bash echo=FALSE} +#--- build skewer job script --- +## change directory to output dir cd ${X_d} cat >temp.sh < /dev/null 2>&1 + ${X_A} \\ + ${X_B} \\ + -x ${X_x} \\ + -y ${X_y} \\ + -m ${X_m} \\ + -r ${X_r} \\ + -d ${X_D} \\ + -q ${X_q} \\ + -Q ${X_Q} \\ + -l ${X_l} \\ + -j ${X_j} \\ + -M ${X_M} \\ + -b ${X_b} \\ + -c ${X_b} \\ + -n ${X_n} \\ + -u ${X_u} \\ + -f ${X_f} \\ + -z ${X_z} \\ + -qiime ${X_E} \\ + -quiet ${X_F} \\ + -i ${X_i} \\ + -o trim > /dev/null 2>&1 + EOL -grep -v None temp.sh > skewer-job.sh - -# run skewer -sh skewer-job.sh - -cp trim-trimmed-pair1.fastq ${X_f} -cp trim-trimmed-pair2.fastq ${X_r} -mv trim-trimmed.log trim-trimmed.txt +# remove empty input lines +grep -v '\-M \\' temp.sh |\ + grep -v 'None' |\ + grep -v 'NO_ARGUMENT_NO' |\ + sed 's/NO_ARGUMENT_YES//g' > skewer-job.sh ``` - -```{r} +```{r echo=FALSE} # display skewer job script skewer_sh = paste0(opt$X_d, '/skewer-job.sh') tags$code(tags$pre(readChar(skewer_sh, file.info(skewer_sh)$size ))) ``` +```{bash echo=FALSE} +## change directory to output dir +cd ${X_d} + +sh skewer-job.sh + +if [ -e trim-trimmed-pair1.fastq ]; then + cp trim-trimmed-pair1.fastq ${X_1} +fi + +if [ -e trim-trimmed-pair2.fastq ]; then + cp trim-trimmed-pair2.fastq ${X_2} +fi + +if [ -e trim-trimmed.fastq ]; then + cp trim-trimmed.fastq ${X_3} +fi + +if [ -e trim-trimmed.log ]; then + cp trim-trimmed.log trim-trimmed.txt +fi +``` + + + # Results -```{r} -tags$ul( - tags$li(tags$a(href = 'trim-trimmed.txt', 'trim-trimmed.log')), - tags$li(tags$a(href = 'trim-trimmed-pair1.fastq', 'trim-trimmed-pair1.fastq')), - tags$li(tags$a(href = 'trim-trimmed-pair2.fastq', 'trim-trimmed-pair2.fastq')) -) +```{r echo=FALSE} +if (file.exists(paste0(opt$X_d, '/trim-trimmed.fastq'))) { + tags$ul( + tags$li(tags$a(href = 'trim-trimmed.txt', 'trim-trimmed.log')), + tags$li(tags$a(href = 'trim-trimmed.fastq', 'trim-trimmed.fastq')) + ) +} else { + tags$ul( + tags$li(tags$a(href = 'trim-trimmed.txt', 'trim-trimmed.log')), + tags$li(tags$a(href = 'trim-trimmed-pair1.fastq', 'trim-trimmed-pair1.fastq')), + tags$li(tags$a(href = 'trim-trimmed-pair2.fastq', 'trim-trimmed-pair2.fastq')) + ) +} + ``` - diff -r e969699f49b6 -r d9601e1eb360 skewer.sh --- a/skewer.sh Fri Mar 02 09:59:25 2018 -0500 +++ b/skewer.sh Sun Mar 18 11:07:16 2018 -0400 @@ -1,17 +1,31 @@ -Rscript '${__tool_directory__}/skewer_render.R' +export TOOL_DIR='${__tool_directory__}' && + +Rscript '${__tool_directory__}/'skewer_render.R - -e $echo - -o $report - -d $report.files_path - -s $sink_message - -t '${__tool_directory__}' - - -X $first_reads - -Y $second_reads - -x $adapter_x - -y $adapter_y - -A $end_quality - -B $mean_quality - - -f $trimmed_r1 - -r $trimmed_r2 + -o '$report' + -d '$report.files_path' + -s '$sink_message' + -A '$first_reads' + -B '$second_reads' + -x '$adapter_first_reads' + -y '$adapter_second_reads' + -m '$trimming_mode' + -r '$maximum_allowed_error_rate' + -D '$maximum_allowed_indel_error_rate' + -q '$quality_trimming_3_end' + -Q '$mean_quality' + -l '$minimum_read_length' + -j '$advanced_options.junction_adapter' + -M '$advanced_options.tab_adapter' + -b '$advanced_options.barcode' + -c '$advanced_options.cut' + -n '$advanced_options.filter_degenerative_reads' + -u '$advanced_options.filter_undetermined_mate_pair_reads' + -f '$advanced_options.format' + -z '$advanced_options.compress' + -E '$advanced_options.qiime' + -F '$advanced_options.quiet' + -i '$advanced_options.intelligent' + -1 '$trimmed_r1' + -2 '$trimmed_r2' + -3 '$trimmed_s' diff -r e969699f49b6 -r d9601e1eb360 skewer.xml --- a/skewer.xml Fri Mar 02 09:59:25 2018 -0500 +++ b/skewer.xml Sun Mar 18 11:07:16 2018 -0400 @@ -1,4 +1,4 @@ - + A fast and accurate adapter trimmer for next-generation sequencing paired-end reads @@ -11,47 +11,115 @@ - - - - - - + + + + + + + + + + + + + + + - + +
+ + + + + + + + + + + + + + + +
- + + `_ if you are not sure what parameter values to use.]]> % -# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) - - -spec_matrix = as.matrix( - data.frame(stringsAsFactors=FALSE, - long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_X", "X_Y", - "X_x", "X_y", "X_A", "X_B", "X_f", "X_r"), - short_flags = c("e", "o", "d", "s", "t", "X", "Y", "x", "y", "A", - "B", "f", "r"), - argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), - data_type_flags = c("character", "character", "character", "character", - "character", "character", "character", - "character", "character", "character", "character", - "character", "character") - ) -) -opt = getopt(spec_matrix) +# load helper function +source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R')) +# import getopt specification matrix from a csv file +spec_csv = paste0(Sys.getenv('TOOL_DIR'), '/getopt_specification.csv') +opt = getopt(getopt_specification_matrix(spec_csv)) +opt$X_t = Sys.getenv('TOOL_DIR') +print(opt) #---------------------------------------------------- @@ -46,14 +35,13 @@ # TOOL_DIR: path to the tool installation directory OUTPUT_DIR = opt$X_d TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o RMD_NAME = 'skewer.Rmd' -OUTPUT_REPORT = opt$X_o # create the output associated directory to store all outputs dir.create(OUTPUT_DIR, recursive = TRUE) -opt + #-----------------render Rmd-------------- -paste0(TOOL_DIR, '/', RMD_NAME) render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) #------------------------------------------