changeset 0:5f6352750d82 draft

planemo upload
author mingchen0919
date Sun, 04 Mar 2018 11:42:48 -0500
parents
children bcf9ea03aefe
files aurora_star_site.Rmd aurora_star_site.xml aurora_star_site_index.Rmd aurora_star_site_render.R aurora_star_site_site.yml spec.txt
diffstat 6 files changed, 267 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aurora_star_site.Rmd	Sun Mar 04 11:42:48 2018 -0500
@@ -0,0 +1,95 @@
+---
+title: 'STAR: RNA-Seq aligner'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e), 
+  error = TRUE
+)
+```
+
+
+## Generating genome indexes
+
+```{bash}
+cd ${X_d}
+# create genome directory for genome indexes
+mkdir -p ${X_d}/genomeDir
+
+cat >temp.sh <<EOF
+STAR \\
+  --runMode genomeGenerate \\
+  --genomeDir ${X_d}/genomeDir \\
+  --genomeFastaFiles $( echo ${X_A} | sed 's/,/ /g' ) \\
+  --sjdbGTFfile ${X_B} \\
+  --sjdbOverhang ${X_C} \\
+  > /dev/null 2>&1
+EOF
+
+grep -v None temp.sh > index-genome.sh
+
+# run star
+sh index-genome.sh
+```
+
+
+```{r}
+# display index-genome code
+index_genome_sh = paste0(opt$X_d, '/index-genome.sh')
+tags$code(tags$pre(readChar(index_genome_sh, file.info(index_genome_sh)$size )))
+```
+
+## Running mapping jobs
+
+```{bash}
+cd ${X_d}
+
+cat >temp.sh <<EOF
+STAR \\
+  --genomeDir ${X_d}/genomeDir \\
+  --readFilesIn \\
+  ${X_F} \\
+  ${X_R} \\
+  > /dev/null 2>&1
+EOF
+
+grep -v None temp.sh > mapping.sh
+
+# run mapping
+sh mapping.sh
+```
+
+
+```{r}
+# display mapping code
+mapping_sh = paste0(opt$X_d, '/mapping.sh')
+tags$code(tags$pre(readChar(mapping_sh, file.info(mapping_sh)$size )))
+```
+
+
+## SAM to sorted BAM
+
+```{bash}
+cd ${X_d}
+
+echo "samtools sort -o Aligned.out.sorted.bam Aligned.out.sam" > sam2bam.sh
+
+sh sam2bam.sh
+
+cp Aligned.out.sorted.bam ${X_S}
+```
+
+```{r}
+# display sam to bam code
+sam2bam_sh = paste0(opt$X_d, '/sam2bam.sh')
+tags$code(tags$pre(readChar(sam2bam_sh, file.info(sam2bam_sh)$size )))
+```
+
+## Mapping statistics
+
+```{bash}
+cd ${X_d}
+samtools flagstat Aligned.out.sorted.bam
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aurora_star_site.xml	Sun Mar 04 11:42:48 2018 -0500
@@ -0,0 +1,54 @@
+<tool id="aurora_star_site" name="Aurora STAR site" version="1.0.0">
+  <description>ultrafast universal RNA-seq aligner&#xD;
+    </description>
+  <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="2.5.4a">star</requirement><requirement type="package" version="1.7">samtools</requirement></requirements>
+  <stdio>
+        <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/></stdio>
+  <command><![CDATA[Rscript '${__tool_directory__}/star_render.R'
+
+    -e $echo
+    -o $report
+    -d $report.files_path
+    -s $sink_message
+    -t '${__tool_directory__}'
+    
+    -A '$genomeFastaFiles'
+    -B '$sjdbGTFfile'
+    -C '$sjdbOverhang'
+    -F '$first_reads'
+    -R '$second_reads'
+    -S '$sorted_bam']]></command>
+  <inputs>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?"/><param type="data" name="first_reads" label="First reads" optional="False" format="fastq,fastqsanger"/><param type="data" name="second_reads" label="Second reads" optional="True" format="fastq,fastqsanger"/><param type="data" name="genomeFastaFiles" argument="--genomeFastaFiles" label="Genome fasta files" optional="False" format="fasta,fa"/><param type="data" name="sjdbGTFfile" argument="--sjdbGTFfile" label="Annotated transcripts" help="the file with annotated transcripts in the standard GTF format. STAR will extract splice junctions from this file and use them to greatly improve accuracy of the mapping. While this is optional, and STAR can be run without annotations, using annotations is highly recommended whenever they are available." optional="True" format="gtf"/><param type="integer" name="sjdbOverhang" argument="--sjdbOverhang" label="sjdbOverhang" help="the length of the genomic sequence around the annotated junction to be used in constructing the splice junctions database. Ideally, this length should be equal to the ReadLength-1, where ReadLength is the length of the reads. For instance, for Illumina 2x100b paired-end reads, the ideal value is 100-1=99. In case of reads of varying length, the ideal value is max(ReadLength)-1. In most cases, a generic value of 100 will work as well as the ideal value." optional="False" value="100" min="1"/></inputs>
+  <outputs>
+        <data name="report" format="html" label="${tool.name} report" hidden="false"/><data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/><data name="sorted_bam" format="bam" hidden="false"/></outputs>
+  <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation><citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation><citation type="bibtex"><![CDATA[@article{dobin2013star,
+  title={STAR: ultrafast universal RNA-seq aligner},
+  author={Dobin, Alexander and Davis, Carrie A and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R},
+  journal={Bioinformatics},
+  volume={29},
+  number={1},
+  pages={15--21},
+  year={2013},
+  publisher={Oxford University Press}
+}]]></citation></citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aurora_star_site_index.Rmd	Sun Mar 04 11:42:48 2018 -0500
@@ -0,0 +1,11 @@
+---
+title: "Analysis Report"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = TRUE, error = TRUE)
+```
+
+## References
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aurora_star_site_render.R	Sun Mar 04 11:42:48 2018 -0500
@@ -0,0 +1,80 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# library(dplyr)
+# getopt_specification_matrix(extract_short_flags('aurora_star_site.xml')) %>%
+#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
+
+
+spec_matrix = as.matrix(
+  data.frame(stringsAsFactors=FALSE,
+              long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B",
+                             "X_C", "X_F", "X_R", "X_S"),
+             short_flags = c("e", "o", "d", "s", "t", "A", "B", "C", "F", "R",
+                             "S"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+         data_type_flags = c("character", "character", "character", "character",
+                             "character", "character", "character",
+                             "character", "character", "character", "character")
+  )
+)
+opt = getopt(spec_matrix)
+#----------------------------------------------------
+
+
+#-----------using passed arguments in R 
+#           to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_REPORT: path to galaxy output report
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+OUTPUT_DIR = opt$X_d
+TOOL_DIR =   opt$X_t
+OUTPUT_REPORT = opt$X_o
+
+
+# create the output associated directory to store all outputs
+dir.create(OUTPUT_DIR, recursive = TRUE)
+
+#-----------------render site--------------
+# copy site generating materials into OUTPUT_DIR
+dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
+system(paste0('cp -r ', TOOL_DIR, '/aurora_star_site.Rmd ', OUTPUT_DIR, '/site_generator/skewer.Rmd'))
+system(paste0('cp -r ', TOOL_DIR, '/aurora_star_site_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
+system(paste0('cp -r ', TOOL_DIR, '/aurora_star_site_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
+# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file
+render_site(input = paste0(OUTPUT_DIR, '/site_generator'))
+# remove site generating materials from output associated directory
+unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
+# move _site/* into output associated directory
+move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR)
+system(move_cmd)
+#------------------------------------------
+
+#-----link index.html to output-----
+cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT)
+system(cp_index)
+#-----------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aurora_star_site_site.yml	Sun Mar 04 11:42:48 2018 -0500
@@ -0,0 +1,15 @@
+name: "Analysis Report"
+output_dir: "../_site"
+navbar:
+    title: ""
+    type: inverse
+    left:
+        - text: "Home"
+          icon: fa-home
+          href: index.html
+        - text: ""
+          href: aurora_star_site.html
+output:
+  html_document:
+    theme: cosmo
+    highlight: textmate
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spec.txt	Sun Mar 04 11:42:48 2018 -0500
@@ -0,0 +1,12 @@
+long_flags,short_flags,argument_mask_flags,data_type_flags
+X_e,e,1,character
+X_o,o,1,character
+X_d,d,1,character
+X_s,s,1,character
+X_t,t,1,character
+X_A,A,1,character
+X_B,B,1,character
+X_C,C,1,character
+X_F,F,1,character
+X_R,R,1,character
+X_S,S,1,character