changeset 1:9be3a8754fb3 draft

initial
author mingchen0919
date Wed, 07 Mar 2018 08:51:53 -0500
parents 0bfeaf18a871
children fad3864136c9
files htseq_count.Rmd htseq_count.sh htseq_count.xml htseq_count_index.Rmd htseq_count_render.R htseq_count_site.Rmd htseq_count_site.sh htseq_count_site.xml htseq_count_site_index.Rmd
diffstat 9 files changed, 216 insertions(+), 215 deletions(-) [+]
line wrap: on
line diff
--- a/htseq_count.Rmd	Wed Mar 07 08:45:02 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
----
-title: 'htseq-count analysis'
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e), 
-  error = TRUE
-)
-```
-
-
-## Run htseq-count
-
-```{bash}
-cd ${X_d}
-
-cat >htseq-count.sh <<EOF
-  htseq-count \\
-    $(echo ${X_A} | sed 's/,/ /g') \\
-    ${X_G} \\
-    -f ${X_f} \\
-    -r ${X_r} \\
-    -s ${X_S} \\
-    -a ${X_a} \\
-    -t ${X_T} \\
-    -i ${X_i} \\
-    -m ${X_m} > counts.txt
-    
-  grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' counts.txt > ${X_c}
-EOF
-```
-
-
-```{r}
-# display htseq-count job script
-htseq_count_sh = paste0(opt$X_d, '/htseq-count.sh')
-tags$code(tags$pre(readChar(htseq_count_sh, file.info(htseq_count_sh)$size )))
-```
-
-```{bash}
-cd ${X_d}
-sh htseq-count.sh
-```
-
-## Counts
-
-```{r}
-count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1)
-sample_names = trimws(strsplit(opt$X_B, ',')[[1]])
-colnames(count_data) = rep(sample_names, length = ncol(count_data))
-DT::datatable(count_data, caption = "htseq counts for each sample")
-```
-
-```{r}
-# save count data as an R object
-save(count_data, file = paste0(opt$X_d, '/counts.RData'))
-```
-
-```{bash}
-cp ${X_d}/counts.RData ${X_O}
-```
-
-
--- a/htseq_count.sh	Wed Mar 07 08:45:02 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-Rscript '${__tool_directory__}/htseq_count_render.R'
-
-			-e $echo
-			-o $report
-			-d $report.files_path
-			-s $sink_message
-			-t '${__tool_directory__}'
-			
-			-A '$alignment_files'
-			-B '$sample_names'
-			-G $gff
-			-f $format
-			-r $order
-			-S $stranded
-			-a $minaqual
-			-T $feature_type
-			-i $idattr
-			-m $mode
-			-c $count
-			-O $count_rdata
\ No newline at end of file
--- a/htseq_count.xml	Wed Mar 07 08:45:02 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-<tool id="aurora_htseq_size" name="Aurora htseq site" version="1.0.0">
-    <description>Counting reads in features.</description>
-    <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
-        <requirement type="package" version="1.20.0">r-getopt</requirement>
-        <requirement type="package" version="1.6">r-rmarkdown</requirement>
-        <requirement type="package" version="0.9.1">htseq</requirement>
-        <requirement type="package" version="0.2">r-dt</requirement>
-    </requirements>
-    <stdio>
-        <regex match="XXX" source="stderr" level="warning"
-               description="Check the warnings_and_errors.txt file for more details."/>
-    </stdio>
-    <command><![CDATA[Rscript '${__tool_directory__}/htseq_count_site_render.R'
-
-			-e $echo
-			-o $report
-			-d $report.files_path
-			-s $sink_message
-			-t '${__tool_directory__}'
-			
-			-A '$alignment_files'
-			-B '$sample_names'
-			-G $gff
-			-f $format
-			-r $order
-			-S $stranded
-			-a $minaqual
-			-T $feature_type
-			-i $idattr
-			-m $mode
-			-c $count
-			-O $count_rdata]]></command>
-    <inputs>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
-               label="Display analysis code in report?"/>
-        <param type="data" name="alignment_files" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam"
-               multiple="True"/>
-        <param type="text" name="sample_names" label="Sample names"
-               help="sample names for each input SAM/BAM file. Use comma to separate multiple names, for example, sample_1, sample_2, sample_3. The number of sample names should be the same as the number of input SAM/BAM files."
-               optional="False" size="5x25"/>
-        <param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False">
-            <option value="sam" selected="false">SAM</option>
-            <option value="bam" selected="true">BAM</option>
-        </param>
-        <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/>
-        <param type="select" name="order" argument="-r" label="Order"
-               help="Use this option to indicate how the input data has been sorted. The default is name.&#13;&#10;&#13;&#10;"
-               optional="False" multiple="False">
-            <option value="name" selected="false">Name</option>
-            <option value="pos" selected="false">Position</option>
-        </param>
-        <param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no" selected="false">No</option>
-            <option value="reverse" selected="false">Reverse</option>
-        </param>
-        <param type="integer" name="minaqual" argument="-a" label="Alignment quality "
-               help="Skip all reads with alignment quality lower than the given minimum value (default: 10)"
-               optional="False" value="10" min="0"/>
-        <param type="text" name="feature_type" argument="-t" label="Feature type"
-               help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)"
-               optional="False" value="exon"/>
-        <param type="text" name="idattr" argument="-i" label="ID attribute"
-               help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id."
-               optional="False" value="gene_id"/>
-        <param type="select" name="mode" argument="-m" label="Mode"
-               help="Mode to handle reads overlapping more than one feature. Possible values for &lt;mode&gt; are union, intersection-strict and intersection-nonempty (default: union)"
-               optional="False" multiple="False">
-            <option value="union" selected="true">Union</option>
-            <option value="intersection-strict" selected="false">Intersection (strict)</option>
-            <option value="intersection-nonempty" selected="false">Intersection (nonempty)</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="report" format="html" label="Aurora htseq-count site on ${on_string}" hidden="false"/>
-        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
-        <data name="count" format="txt" label="Aurora htseq-count site on ${on_string}" hidden="false"/>
-        <data name="count_rdata" format="rdata" hidden="false"/>
-    </outputs>
-    <citations>
-        <citation type="bibtex"><![CDATA[
-            @article{allaire2016rmarkdown,
-            title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
-            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
-            journal={R package version 0.9},
-            volume={6},
-            year={2016}
-            }
-        ]]></citation>
-        <citation type="bibtex"><![CDATA[
-            @book{xie2015dynamic,
-            title={Dynamic Documents with R and knitr},
-            author={Xie, Yihui},
-            volume={29},
-            year={2015},
-            publisher={CRC Press}
-            }
-        ]]></citation>
-        <citation type="bibtex"><![CDATA[@article{anders2015htseq,
-  title={HTSeq—a Python framework to work with high-throughput sequencing data},
-  author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
-  journal={Bioinformatics},
-  volume={31},
-  number={2},
-  pages={166--169},
-  year={2015},
-  publisher={Oxford University Press}
-}]]></citation>
-    </citations>
-</tool>
--- a/htseq_count_index.Rmd	Wed Mar 07 08:45:02 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
----
-title: "htseq-count Report"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = TRUE, error = TRUE)
-```
-
-## References
-
-Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6.
-
-Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29.
-
-Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang (2015). HTSeq—a Python framework to work with high-throughput sequencing data. In Bioinformatics, 31 (2), pp. 166--169.
\ No newline at end of file
--- a/htseq_count_render.R	Wed Mar 07 08:45:02 2018 -0500
+++ b/htseq_count_render.R	Wed Mar 07 08:51:53 2018 -0500
@@ -10,6 +10,7 @@
 
 library(getopt)
 library(rmarkdown)
+library(htmltools)
 #------------------------------------------------
 
 
@@ -58,9 +59,9 @@
 #-----------------render site--------------
 # copy site generating materials into OUTPUT_DIR
 dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
-system(paste0('cp -r ', TOOL_DIR, '/htseq_count.Rmd ', OUTPUT_DIR, '/site_generator/htseq_count.Rmdd'))
+system(paste0('cp -r ', TOOL_DIR, '/htseq_count_site.Rmd ', OUTPUT_DIR, '/site_generator/htseq_count_site.Rmd'))
 system(paste0('cp -r ', TOOL_DIR, '/htseq_count_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
-system(paste0('cp -r ', TOOL_DIR, '/htseq_count_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
+system(paste0('cp -r ', TOOL_DIR, '/htseq_count_site_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
 # render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file
 render_site(input = paste0(OUTPUT_DIR, '/site_generator'))
 # remove site generating materials from output associated directory
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_count_site.Rmd	Wed Mar 07 08:51:53 2018 -0500
@@ -0,0 +1,65 @@
+---
+title: 'htseq-count analysis'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e), 
+  error = TRUE
+)
+```
+
+
+## Run htseq-count
+
+```{bash}
+cd ${X_d}
+
+cat >htseq-count.sh <<EOF
+  htseq-count \\
+    $(echo ${X_A} | sed 's/,/ /g') \\
+    ${X_G} \\
+    -f ${X_f} \\
+    -r ${X_r} \\
+    -s ${X_S} \\
+    -a ${X_a} \\
+    -t ${X_T} \\
+    -i ${X_i} \\
+    -m ${X_m} > counts.txt
+    
+  grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' counts.txt > ${X_c}
+EOF
+```
+
+
+```{r}
+# display htseq-count job script
+htseq_count_sh = paste0(opt$X_d, '/htseq-count.sh')
+tags$code(tags$pre(readChar(htseq_count_sh, file.info(htseq_count_sh)$size )))
+```
+
+```{bash}
+cd ${X_d}
+sh htseq-count.sh
+```
+
+## Counts
+
+```{r}
+count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1)
+sample_names = trimws(strsplit(opt$X_B, ',')[[1]])
+colnames(count_data) = rep(sample_names, length = ncol(count_data))
+DT::datatable(count_data, caption = "htseq counts for each sample")
+```
+
+```{r}
+# save count data as an R object
+save(count_data, file = paste0(opt$X_d, '/counts.RData'))
+```
+
+```{bash}
+cp ${X_d}/counts.RData ${X_O}
+```
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_count_site.sh	Wed Mar 07 08:51:53 2018 -0500
@@ -0,0 +1,20 @@
+Rscript '${__tool_directory__}/htseq_count_render.R'
+
+			-e $echo
+			-o $report
+			-d $report.files_path
+			-s $sink_message
+			-t '${__tool_directory__}'
+			
+			-A '$alignment_files'
+			-B '$sample_names'
+			-G $gff
+			-f $format
+			-r $order
+			-S $stranded
+			-a $minaqual
+			-T $feature_type
+			-i $idattr
+			-m $mode
+			-c $count
+			-O $count_rdata
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_count_site.xml	Wed Mar 07 08:51:53 2018 -0500
@@ -0,0 +1,112 @@
+<tool id="aurora_htseq_size" name="Aurora htseq site" version="1.0.0">
+    <description>Counting reads in features.</description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+        <requirement type="package" version="0.9.1">htseq</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[Rscript '${__tool_directory__}/htseq_count_site_render.R'
+
+			-e $echo
+			-o $report
+			-d $report.files_path
+			-s $sink_message
+			-t '${__tool_directory__}'
+			
+			-A '$alignment_files'
+			-B '$sample_names'
+			-G $gff
+			-f $format
+			-r $order
+			-S $stranded
+			-a $minaqual
+			-T $feature_type
+			-i $idattr
+			-m $mode
+			-c $count
+			-O $count_rdata]]></command>
+    <inputs>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+        <param type="data" name="alignment_files" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam"
+               multiple="True"/>
+        <param type="text" name="sample_names" label="Sample names"
+               help="sample names for each input SAM/BAM file. Use comma to separate multiple names, for example, sample_1, sample_2, sample_3. The number of sample names should be the same as the number of input SAM/BAM files."
+               optional="False" size="5x25"/>
+        <param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False">
+            <option value="sam" selected="false">SAM</option>
+            <option value="bam" selected="true">BAM</option>
+        </param>
+        <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/>
+        <param type="select" name="order" argument="-r" label="Order"
+               help="Use this option to indicate how the input data has been sorted. The default is name.&#13;&#10;&#13;&#10;"
+               optional="False" multiple="False">
+            <option value="name" selected="false">Name</option>
+            <option value="pos" selected="false">Position</option>
+        </param>
+        <param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no" selected="false">No</option>
+            <option value="reverse" selected="false">Reverse</option>
+        </param>
+        <param type="integer" name="minaqual" argument="-a" label="Alignment quality "
+               help="Skip all reads with alignment quality lower than the given minimum value (default: 10)"
+               optional="False" value="10" min="0"/>
+        <param type="text" name="feature_type" argument="-t" label="Feature type"
+               help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)"
+               optional="False" value="exon"/>
+        <param type="text" name="idattr" argument="-i" label="ID attribute"
+               help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id."
+               optional="False" value="gene_id"/>
+        <param type="select" name="mode" argument="-m" label="Mode"
+               help="Mode to handle reads overlapping more than one feature. Possible values for &lt;mode&gt; are union, intersection-strict and intersection-nonempty (default: union)"
+               optional="False" multiple="False">
+            <option value="union" selected="true">Union</option>
+            <option value="intersection-strict" selected="false">Intersection (strict)</option>
+            <option value="intersection-nonempty" selected="false">Intersection (nonempty)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="report" format="html" label="Aurora htseq-count site on ${on_string}" hidden="false"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+        <data name="count" format="txt" label="Aurora htseq-count site on ${on_string}" hidden="false"/>
+        <data name="count_rdata" format="rdata" hidden="false"/>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[@article{anders2015htseq,
+  title={HTSeq—a Python framework to work with high-throughput sequencing data},
+  author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
+  journal={Bioinformatics},
+  volume={31},
+  number={2},
+  pages={166--169},
+  year={2015},
+  publisher={Oxford University Press}
+}]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_count_site_index.Rmd	Wed Mar 07 08:51:53 2018 -0500
@@ -0,0 +1,16 @@
+---
+title: "htseq-count Report"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = TRUE, error = TRUE)
+```
+
+## References
+
+Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6.
+
+Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29.
+
+Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang (2015). HTSeq—a Python framework to work with high-throughput sequencing data. In Bioinformatics, 31 (2), pp. 166--169.
\ No newline at end of file