changeset 4:0fdb0d5f53ce draft

add rdata output
author mingchen0919
date Wed, 07 Mar 2018 01:37:59 -0500
parents 1545e24c9ba7
children aacd5f53ac99
files htseq_count.Rmd htseq_count.sh htseq_count.xml htseq_count_render.R spec.txt
diffstat 5 files changed, 93 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/htseq_count.Rmd	Wed Mar 07 01:10:13 2018 -0500
+++ b/htseq_count.Rmd	Wed Mar 07 01:37:59 2018 -0500
@@ -37,7 +37,6 @@
 # display htseq-count job script
 htseq_count_sh = paste0(opt$X_d, '/htseq-count.sh')
 tags$code(tags$pre(readChar(htseq_count_sh, file.info(htseq_count_sh)$size )))
-opt
 ```
 
 ```{bash}
@@ -54,4 +53,13 @@
 DT::datatable(count_data, caption = "htseq counts for each sample")
 ```
 
+```{r}
+# save count data as an R object
+save(count_data, file = paste0(opt$X_d, '/counts.RData'))
+```
 
+```{bash}
+cp ${X_d}/counts.RData ${X_O}
+```
+
+
--- a/htseq_count.sh	Wed Mar 07 01:10:13 2018 -0500
+++ b/htseq_count.sh	Wed Mar 07 01:37:59 2018 -0500
@@ -16,4 +16,5 @@
 			-T $feature_type
 			-i $idattr
 			-m $mode
-			-c $count
\ No newline at end of file
+			-c $count
+			-O $count_rdata
\ No newline at end of file
--- a/htseq_count.xml	Wed Mar 07 01:10:13 2018 -0500
+++ b/htseq_count.xml	Wed Mar 07 01:37:59 2018 -0500
@@ -1,10 +1,17 @@
 <tool id="aurora_htseq" name="Aurora htseq" version="1.0.0">
-  <description>Counting reads in features.</description>
-  <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="0.9.1">htseq</requirement><requirement type="package" version="0.2">r-dt</requirement></requirements>
-  <stdio>
-        <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/></stdio>
-  <command><![CDATA[Rscript '${__tool_directory__}/htseq_count_render.R'
+    <description>Counting reads in features.</description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+        <requirement type="package" version="0.9.1">htseq</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[Rscript '${__tool_directory__}/htseq_count_render.R'
 
 			-e $echo
 			-o $report
@@ -22,12 +29,56 @@
 			-T $feature_type
 			-i $idattr
 			-m $mode
-			-c $count]]></command>
-  <inputs>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?"/><param type="data" name="alignment_files" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam" multiple="True"/><param type="text" name="sample_names" label="Sample names" help="sample names for each input SAM/BAM file. Use comma to separate multiple names, for example, sample_1, sample_2, sample_3. The number of sample names should be the same as the number of input SAM/BAM files." optional="False" size="5x25"/><param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False"><option value="sam" selected="false">SAM</option><option value="bam" selected="true">BAM</option></param><param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/><param type="select" name="order" argument="-r" label="Order" help="Use this option to indicate how the input data has been sorted. The default is name.&#13;&#10;&#13;&#10;" optional="False" multiple="False"><option value="name" selected="false">Name</option><option value="pos" selected="false">Position</option></param><param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False"><option value="yes" selected="true">Yes</option><option value="no" selected="false">No</option><option value="reverse" selected="false">Reverse</option></param><param type="integer" name="minaqual" argument="-a" label="Alignment quality " help="Skip all reads with alignment quality lower than the given minimum value (default: 10)" optional="False" value="10" min="0"/><param type="text" name="feature_type" argument="-t" label="Feature type" help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)" optional="False" value="exon"/><param type="text" name="idattr" argument="-i" label="ID attribute" help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id." optional="False" value="gene_id"/><param type="select" name="mode" argument="-m" label="Mode" help="Mode to handle reads overlapping more than one feature. Possible values for &lt;mode&gt; are union, intersection-strict and intersection-nonempty (default: union)" optional="False" multiple="False"><option value="union" selected="true">Union</option><option value="intersection-strict" selected="false">Intersection (strict)</option><option value="intersection-nonempty" selected="false">Intersection (nonempty)</option></param></inputs>
-  <outputs>
-        <data name="report" format="html" label="Aurora htseq on ${on_string}" hidden="false"/><data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/><data name="count" format="txt" label="Aurora htseq-count on ${on_string}" hidden="false"/></outputs>
-  <citations>
+			-c $count
+			-O $count_rdata]]></command>
+    <inputs>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+        <param type="data" name="alignment_files" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam"
+               multiple="True"/>
+        <param type="text" name="sample_names" label="Sample names"
+               help="sample names for each input SAM/BAM file. Use comma to separate multiple names, for example, sample_1, sample_2, sample_3. The number of sample names should be the same as the number of input SAM/BAM files."
+               optional="False" size="5x25"/>
+        <param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False">
+            <option value="sam" selected="false">SAM</option>
+            <option value="bam" selected="true">BAM</option>
+        </param>
+        <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/>
+        <param type="select" name="order" argument="-r" label="Order"
+               help="Use this option to indicate how the input data has been sorted. The default is name.&#13;&#10;&#13;&#10;"
+               optional="False" multiple="False">
+            <option value="name" selected="false">Name</option>
+            <option value="pos" selected="false">Position</option>
+        </param>
+        <param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no" selected="false">No</option>
+            <option value="reverse" selected="false">Reverse</option>
+        </param>
+        <param type="integer" name="minaqual" argument="-a" label="Alignment quality "
+               help="Skip all reads with alignment quality lower than the given minimum value (default: 10)"
+               optional="False" value="10" min="0"/>
+        <param type="text" name="feature_type" argument="-t" label="Feature type"
+               help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)"
+               optional="False" value="exon"/>
+        <param type="text" name="idattr" argument="-i" label="ID attribute"
+               help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id."
+               optional="False" value="gene_id"/>
+        <param type="select" name="mode" argument="-m" label="Mode"
+               help="Mode to handle reads overlapping more than one feature. Possible values for &lt;mode&gt; are union, intersection-strict and intersection-nonempty (default: union)"
+               optional="False" multiple="False">
+            <option value="union" selected="true">Union</option>
+            <option value="intersection-strict" selected="false">Intersection (strict)</option>
+            <option value="intersection-nonempty" selected="false">Intersection (nonempty)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="report" format="html" label="Aurora htseq on ${on_string}" hidden="false"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+        <data name="count" format="txt" label="Aurora htseq-count on ${on_string}" hidden="false"/>
+        <data name="count_rdata" format="rdata" hidden="false"/>
+    </outputs>
+    <citations>
         <citation type="bibtex"><![CDATA[
             @article{allaire2016rmarkdown,
             title={rmarkdown: Dynamic Documents for R, 2016},
@@ -37,7 +88,8 @@
             volume={6},
             year={2016}
             }
-        ]]></citation><citation type="bibtex"><![CDATA[
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
             @book{xie2015dynamic,
             title={Dynamic Documents with R and knitr},
             author={Xie, Yihui},
@@ -45,14 +97,16 @@
             year={2015},
             publisher={CRC Press}
             }
-        ]]></citation><citation type="bibtex"><![CDATA[@article{anders2015htseq,
-  title={HTSeq—a Python framework to work with high-throughput sequencing data},
-  author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
-  journal={Bioinformatics},
-  volume={31},
-  number={2},
-  pages={166--169},
-  year={2015},
-  publisher={Oxford University Press}
-}]]></citation></citations>
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[@article{anders2015htseq,
+  title={HTSeq—a Python framework to work with high-throughput sequencing data},
+  author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
+  journal={Bioinformatics},
+  volume={31},
+  number={2},
+  pages={166--169},
+  year={2015},
+  publisher={Oxford University Press}
+}]]></citation>
+    </citations>
 </tool>
--- a/htseq_count_render.R	Wed Mar 07 01:10:13 2018 -0500
+++ b/htseq_count_render.R	Wed Mar 07 01:37:59 2018 -0500
@@ -23,15 +23,16 @@
 spec_matrix = as.matrix(
   data.frame(stringsAsFactors=FALSE,
               long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B", "X_G",
-                             "X_f", "X_r", "X_S", "X_a", "X_T", "X_i", "X_m", "X_c"),
+                             "X_f", "X_r", "X_S", "X_a", "X_T", "X_i", "X_m", "X_c", "X_O"),
              short_flags = c("e", "o", "d", "s", "t", "A", "B", "G", "f", "r", "S",
-                             "a", "T", "i", "m", "c"),
+                             "a", "T", "i", "m", "c", "O"),
      argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
-                             1L, 1L, 1L),
+                             1L, 1L, 1L, 1L),
          data_type_flags = c("character", "character", "character", "character",
                              "character", "character", "character", "character",
                              "character", "character", "character", "character",
-                             "character", "character", "character", "character")
+                             "character", "character", "character", "character",
+                             "character")
   )
 )
 opt = getopt(spec_matrix)
--- a/spec.txt	Wed Mar 07 01:10:13 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-long_flags,short_flags,argument_mask_flags,data_type_flags
-X_e,e,1,character
-X_o,o,1,character
-X_d,d,1,character
-X_s,s,1,character
-X_t,t,1,character
-X_A,A,1,character
-X_G,G,1,character
-X_f,f,1,character
-X_r,r,1,character
-X_S,S,1,character
-X_a,a,1,character
-X_T,T,1,character
-X_i,i,1,character
-X_m,m,1,character