# HG changeset patch # User mingchen0919 # Date 1520430302 18000 # Node ID 0bfeaf18a871a21209b8ee096719b96d67051b39 planemo upload diff -r 000000000000 -r 0bfeaf18a871 htseq_count.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count.Rmd Wed Mar 07 08:45:02 2018 -0500 @@ -0,0 +1,65 @@ +--- +title: 'htseq-count analysis' +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +## Run htseq-count + +```{bash} +cd ${X_d} + +cat >htseq-count.sh < counts.txt + + grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' counts.txt > ${X_c} +EOF +``` + + +```{r} +# display htseq-count job script +htseq_count_sh = paste0(opt$X_d, '/htseq-count.sh') +tags$code(tags$pre(readChar(htseq_count_sh, file.info(htseq_count_sh)$size ))) +``` + +```{bash} +cd ${X_d} +sh htseq-count.sh +``` + +## Counts + +```{r} +count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1) +sample_names = trimws(strsplit(opt$X_B, ',')[[1]]) +colnames(count_data) = rep(sample_names, length = ncol(count_data)) +DT::datatable(count_data, caption = "htseq counts for each sample") +``` + +```{r} +# save count data as an R object +save(count_data, file = paste0(opt$X_d, '/counts.RData')) +``` + +```{bash} +cp ${X_d}/counts.RData ${X_O} +``` + + diff -r 000000000000 -r 0bfeaf18a871 htseq_count.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count.sh Wed Mar 07 08:45:02 2018 -0500 @@ -0,0 +1,20 @@ +Rscript '${__tool_directory__}/htseq_count_render.R' + + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + + -A '$alignment_files' + -B '$sample_names' + -G $gff + -f $format + -r $order + -S $stranded + -a $minaqual + -T $feature_type + -i $idattr + -m $mode + -c $count + -O $count_rdata \ No newline at end of file diff -r 000000000000 -r 0bfeaf18a871 htseq_count.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count.xml Wed Mar 07 08:45:02 2018 -0500 @@ -0,0 +1,112 @@ + + Counting reads in features. + + pandoc + r-getopt + r-rmarkdown + htseq + r-dt + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 0bfeaf18a871 htseq_count_index.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count_index.Rmd Wed Mar 07 08:45:02 2018 -0500 @@ -0,0 +1,16 @@ +--- +title: "htseq-count Report" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = TRUE, error = TRUE) +``` + +## References + +Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6. + +Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29. + +Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang (2015). HTSeq—a Python framework to work with high-throughput sequencing data. In Bioinformatics, 31 (2), pp. 166--169. \ No newline at end of file diff -r 000000000000 -r 0bfeaf18a871 htseq_count_render.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count_render.R Wed Mar 07 08:45:02 2018 -0500 @@ -0,0 +1,83 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# library(dplyr) +# getopt_specification_matrix(extract_short_flags('')) %>% +# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) + + +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B", "X_G", + "X_f", "X_r", "X_S", "X_a", "X_T", "X_i", "X_m", "X_c", "X_O"), + short_flags = c("e", "o", "d", "s", "t", "A", "B", "G", "f", "r", "S", + "a", "T", "i", "m", "c", "O"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, + 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", "character", + "character", "character", "character", "character", + "character", "character", "character", "character", + "character") + ) +) +opt = getopt(spec_matrix) +#---------------------------------------------------- + + +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_REPORT: path to galaxy output report +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o + + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render site-------------- +# copy site generating materials into OUTPUT_DIR +dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) +system(paste0('cp -r ', TOOL_DIR, '/htseq_count.Rmd ', OUTPUT_DIR, '/site_generator/htseq_count.Rmdd')) +system(paste0('cp -r ', TOOL_DIR, '/htseq_count_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml')) +system(paste0('cp -r ', TOOL_DIR, '/htseq_count_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) +# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file +render_site(input = paste0(OUTPUT_DIR, '/site_generator')) +# remove site generating materials from output associated directory +unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) +# move _site/* into output associated directory +move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR) +system(move_cmd) +#------------------------------------------ + +#-----link index.html to output----- +cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT) +system(cp_index) +#----------------------------------- + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file diff -r 000000000000 -r 0bfeaf18a871 htseq_count_site.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count_site.yml Wed Mar 07 08:45:02 2018 -0500 @@ -0,0 +1,15 @@ +name: "Analysis Report" +output_dir: "../_site" +navbar: + title: "" + type: inverse + left: + - text: "Home" + icon: fa-home + href: index.html + - text: "TITLE 1" + href: htseq_count.html +output: + html_document: + theme: cosmo + highlight: textmate \ No newline at end of file