changeset 0:e0638fbb50f2 draft

planemo upload
author mingchen0919
date Fri, 02 Mar 2018 13:17:45 -0500
parents
children a76288e18d85
files star.Rmd star.sh star.xml star_render.R
diffstat 4 files changed, 160 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star.Rmd	Fri Mar 02 13:17:45 2018 -0500
@@ -0,0 +1,36 @@
+---
+title: 'STAR: RNA-Seq aligner'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(), 
+  error = TRUE
+)
+```
+
+
+# Generating genome indexes
+
+```{bash}
+cd ${X_d}
+# create genome directory for genome indexes
+mkdir -p ${X_d}/genomeDir
+
+cat >temp.sh <<EOF
+STAR \\
+  --runMode genomeGenerate \\
+  --genomeDir ${X_d}/genomeDir \\
+  --genomeFastaFiles $( echo ${X_A} | sed 's/,/ /g' ) \\
+  --sjdbGTFfile ${X_B} \\
+  --sjdbOverhang ${X_C} \\
+  > /dev/null 2>&1
+EOF
+
+grep -v None temp.sh > star-job.sh
+
+# run star
+sh star-job.sh
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star.sh	Fri Mar 02 13:17:45 2018 -0500
@@ -0,0 +1,11 @@
+Rscript '${__tool_directory__}/star_render.R'
+
+    -e $echo
+    -o $report
+    -d $report.files_path
+    -s $sink_message
+    -t '${__tool_directory__}'
+    
+    -A '$genomeFastaFiles'
+    -B '$sjdbGTFfile'
+    -C '$sjdbOverhang'
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star.xml	Fri Mar 02 13:17:45 2018 -0500
@@ -0,0 +1,61 @@
+<tool id="aurora_star" name="Aurora Star" version="1.0.0">
+    <description>ultrafast universal RNA-seq aligner&#xD;
+    </description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[Rscript '${__tool_directory__}/star_render.R'
+
+    -e $echo
+    -o $report
+    -d $report.files_path
+    -s $sink_message
+    -t '${__tool_directory__}'
+    
+    -A '$genomeFastaFiles'
+    -B '$sjdbGTFfile'
+    -C '$sjdbOverhang']]></command>
+    <inputs>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+        <param type="data" name="genomeFastaFiles" argument="--genomeFastaFiles" label="Genome fasta files"
+               optional="False" format="fasta,fa"/>
+        <param type="data" name="sjdbGTFfile" argument="--sjdbGTFfile" label="Annotated transcripts"
+               help="the file with annotated transcripts in the standard GTF format. STAR will extract splice junctions from this file and use them to greatly improve accuracy of the mapping. While this is optional, and STAR can be run without annotations, using annotations is highly recommended whenever they are available."
+               optional="True" format="gtf"/>
+        <param type="integer" name="sjdbOverhang" argument="--sjdbOverhang" label="sjdbOverhang"
+               help="the length of the genomic sequence around the annotated junction to be used in constructing the splice junctions database. Ideally, this length should be equal to the ReadLength-1, where ReadLength is the length of the reads. For instance, for Illumina 2x100b paired-end reads, the ideal value is 100-1=99. In case of reads of varying length, the ideal value is max(ReadLength)-1. In most cases, a generic value of 100 will work as well as the ideal value."
+               optional="False" value="100" min="1"/>
+    </inputs>
+    <outputs>
+        <data format="html" name="report" label="tool report"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star_render.R	Fri Mar 02 13:17:45 2018 -0500
@@ -0,0 +1,52 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# library(dplyr)
+# getopt_specification_matrix(extract_short_flags('')) %>%
+#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
+
+
+spec_matrix = as.matrix()
+opt = getopt(spec_matrix)
+#----------------------------------------------------
+
+
+#-----------using passed arguments in R 
+#           to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+OUTPUT_DIR = ''
+TOOL_DIR =   ''
+RMD_NAME = ''
+OUTPUT_REPORT = opt$X_o
+
+# create the output associated directory to store all outputs
+dir.create(OUTPUT_DIR, recursive = TRUE)
+
+#-----------------render Rmd--------------
+render(paste0(TOOL_DIR, RMD_NAME, sep = '/'), output_file = OUTPUT_REPORT)
+#------------------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file