changeset 1:cc33706a1d4b draft

update
author mingchen0919
date Tue, 06 Mar 2018 17:38:58 -0500
parents 10a460f61ac2
children db9da9716914
files htseq_count.Rmd htseq_count.sh htseq_count.xml htseq_count_render.R spec.txt
diffstat 5 files changed, 135 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/htseq_count.Rmd	Tue Mar 06 15:16:40 2018 -0500
+++ b/htseq_count.Rmd	Tue Mar 06 17:38:58 2018 -0500
@@ -1,16 +1,11 @@
 ---
-title: 'HTML report title'
-output:
-    html_document:
-      number_sections: true
-      toc: true
-      theme: cosmo
-      highlight: tango
+title: 'htseq-count analysis'
+output: html_document
 ---
 
 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
 knitr::opts_chunk$set(
-  echo = as.logical(), 
+  echo = as.logical(opt$X_e), 
   error = TRUE
 )
 ```
@@ -19,8 +14,9 @@
 # Code for computational analysis
 
 ```{r 'step 1'}
+opt
+```
 
-```
 
 ```{r 'ste[ 2'}
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_count.sh	Tue Mar 06 17:38:58 2018 -0500
@@ -0,0 +1,16 @@
+Rscript '${__tool_directory__}/aurora_htseq_count.R'
+
+			-e $echo
+			-o $report
+			-d $report.files_path
+			-s $sink_message
+			-t '${__tool_directory__}'
+			
+			-f $format
+			-r $order
+			-S $stranded
+			-a $minaqual
+			-T $feature_type
+			-i $idattr
+			-I $additional_attr
+			-m $mode
\ No newline at end of file
--- a/htseq_count.xml	Tue Mar 06 15:16:40 2018 -0500
+++ b/htseq_count.xml	Tue Mar 06 17:38:58 2018 -0500
@@ -1,28 +1,77 @@
-<tool name="Tool report" id="tool_1" version="1.0.0">
-  <description>Counting reads in features.</description>
-  <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="0.9.1">htseq</requirement></requirements>
-  <stdio>
-        <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/></stdio>
-  <command><![CDATA[
-        
-
-        Rscript '${__tool_directory__}/rmarkdown_report_render.R'
+<tool id="aurora_htseq" name="Aurora htseq" version="1.0.0">
+    <description>Counting reads in features.</description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+        <requirement type="package" version="0.9.1">htseq</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[Rscript '${__tool_directory__}/aurora_htseq_count.R'
 
 			-e $echo
 			-o $report
 			-d $report.files_path
 			-s $sink_message
 			-t '${__tool_directory__}'
-
-
-        
-    ]]></command>
-  <inputs>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?"/><param type="data" name="alignment_file" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam"/><param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/><param type="text" name="format" argument="-f" label="Input data format" help="Possible values are bam and sam." optional="False" value="bam"/><param type="text" name="order" argument="-r" label="Order" help="Possible values are &lt;code&gt;name&lt;/code&gt; and &lt;code&gt;pos&lt;/code&gt;. This option indicates whether the input data has been sorted by &lt;code&gt;name&lt;/code&gt; or &lt;code&gt;pos&lt;/code&gt;. " optional="False" value="name"/><param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False"><option value="yes" selected="true">Yes</option><option value="no" selected="false">No</option><option value="reverse" selected="false">Reverse</option></param><param type="integer" name="minaqual" argument="-a" label="Alignment quality " help="Skip all reads with alignment quality lower than the given minimum value (default: 10)" optional="False" value="10" min="0"/><param type="text" name="feature_type" argument="-t" label="Feature type" help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)" optional="False" value="exon"/><param type="text" name="idattr" argument="-i" label="ID attribute" help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id." optional="False" value="gene_id"/><param type="text" name="additional_attr" argument="--additional-attr" label="Additional attributes" help="Additional feature attributes, which will be printed as an additional column after the primary attribute column but before the counts column(s). The default is none, a suitable value to get gene names using an Ensembl GTF file is gene_name." optional="True"/><param type="select" name="mode" argument="-m" label="Mode" help="Mode to handle reads overlapping more than one feature. Possible values for &lt;mode&gt; are union, intersection-strict and intersection-nonempty (default: union)" optional="False" multiple="False"><option value="union" selected="true">Union</option><option value="intersection-strict" selected="false">Intersection (strict)</option><option value="intersection-nonempty" selected="false">Intersection (nonempty)</option></param></inputs>
-  <outputs>
-        <data format="html" name="report" label="tool report"/><data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/><data name="count" format="txt" label="Aurora htseq-count on ${on_string}" hidden="false"/></outputs>
-  <citations>
+			
+			-f $format
+			-r $order
+			-S $stranded
+			-a $minaqual
+			-T $feature_type
+			-i $idattr
+			-I $additional_attr
+			-m $mode]]></command>
+    <inputs>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+        <param type="data" name="alignment_file" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam"/>
+        <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/>
+        <param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False">
+            <option value="sam" selected="false">SAM</option>
+            <option value="bam" selected="false">BAM</option>
+        </param>
+        <param type="select" name="order" argument="-r" label="Order"
+               help="Use this option to indicate how the input data has been sorted. The default is name.&#13;&#10;&#13;&#10;"
+               optional="False" multiple="False">
+            <option value="name" selected="false">Name</option>
+            <option value="pos" selected="false">Position</option>
+        </param>
+        <param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no" selected="false">No</option>
+            <option value="reverse" selected="false">Reverse</option>
+        </param>
+        <param type="integer" name="minaqual" argument="-a" label="Alignment quality "
+               help="Skip all reads with alignment quality lower than the given minimum value (default: 10)"
+               optional="False" value="10" min="0"/>
+        <param type="text" name="feature_type" argument="-t" label="Feature type"
+               help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)"
+               optional="False" value="exon"/>
+        <param type="text" name="idattr" argument="-i" label="ID attribute"
+               help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id."
+               optional="False" value="gene_id"/>
+        <param type="text" name="additional_attr" argument="--additional-attr" label="Additional attributes"
+               help="Additional feature attributes, which will be printed as an additional column after the primary attribute column but before the counts column(s). The default is none, a suitable value to get gene names using an Ensembl GTF file is gene_name."
+               optional="True"/>
+        <param type="select" name="mode" argument="-m" label="Mode"
+               help="Mode to handle reads overlapping more than one feature. Possible values for &lt;mode&gt; are union, intersection-strict and intersection-nonempty (default: union)"
+               optional="False" multiple="False">
+            <option value="union" selected="true">Union</option>
+            <option value="intersection-strict" selected="false">Intersection (strict)</option>
+            <option value="intersection-nonempty" selected="false">Intersection (nonempty)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="Aurora htseq" format="html" label="Aurora htseq on ${on_string}" hidden="false"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+        <data name="count" format="txt" label="Aurora htseq-count on ${on_string}" hidden="false"/>
+    </outputs>
+    <citations>
         <citation type="bibtex"><![CDATA[
             @article{allaire2016rmarkdown,
             title={rmarkdown: Dynamic Documents for R, 2016},
@@ -32,7 +81,8 @@
             volume={6},
             year={2016}
             }
-        ]]></citation><citation type="bibtex"><![CDATA[
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
             @book{xie2015dynamic,
             title={Dynamic Documents with R and knitr},
             author={Xie, Yihui},
@@ -40,14 +90,16 @@
             year={2015},
             publisher={CRC Press}
             }
-        ]]></citation><citation type="bibtex"><![CDATA[@article{anders2015htseq,
-  title={HTSeq—a Python framework to work with high-throughput sequencing data},
-  author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
-  journal={Bioinformatics},
-  volume={31},
-  number={2},
-  pages={166--169},
-  year={2015},
-  publisher={Oxford University Press}
-}]]></citation></citations>
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[@article{anders2015htseq,
+  title={HTSeq—a Python framework to work with high-throughput sequencing data},
+  author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
+  journal={Bioinformatics},
+  volume={31},
+  number={2},
+  pages={166--169},
+  year={2015},
+  publisher={Oxford University Press}
+}]]></citation>
+    </citations>
 </tool>
--- a/htseq_count_render.R	Tue Mar 06 15:16:40 2018 -0500
+++ b/htseq_count_render.R	Tue Mar 06 17:38:58 2018 -0500
@@ -15,11 +15,23 @@
 
 #------------get arguments into R--------------------
 # library(dplyr)
-# getopt_specification_matrix(extract_short_flags('')) %>%
+# getopt_specification_matrix(extract_short_flags('htseq_count.xml')) %>%
 #   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
 
 
-spec_matrix = as.matrix()
+spec_matrix = as.matrix(
+  data.frame(stringsAsFactors=FALSE,
+              long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_f", "X_r",
+                             "X_S", "X_a", "X_T", "X_i", "X_I", "X_m"),
+             short_flags = c("e", "o", "d", "s", "t", "f", "r", "S", "a", "T",
+                             "i", "I", "m"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+         data_type_flags = c("character", "character", "character", "character",
+                             "character", "character", "character",
+                             "character", "character", "character", "character",
+                             "character", "character")
+  )
+)
 opt = getopt(spec_matrix)
 #----------------------------------------------------
 
@@ -32,16 +44,16 @@
 #---------- often used variables ----------------
 # OUTPUT_DIR: path to the output associated directory, which stores all outputs
 # TOOL_DIR: path to the tool installation directory
-OUTPUT_DIR = ''
-TOOL_DIR =   ''
-RMD_NAME = ''
+OUTPUT_DIR = opt$X_d
+TOOL_DIR =   opt$X_t
+RMD_NAME = 'htseq_count.Rmd'
 OUTPUT_REPORT = opt$X_o
 
 # create the output associated directory to store all outputs
 dir.create(OUTPUT_DIR, recursive = TRUE)
 
 #-----------------render Rmd--------------
-render(paste0(TOOL_DIR, RMD_NAME, sep = '/'), output_file = OUTPUT_REPORT)
+render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
 #------------------------------------------
 
 #==============the end==============
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spec.txt	Tue Mar 06 17:38:58 2018 -0500
@@ -0,0 +1,14 @@
+long_flags,short_flags,argument_mask_flags,data_type_flags
+X_e,e,1,character
+X_o,o,1,character
+X_d,d,1,character
+X_s,s,1,character
+X_t,t,1,character
+X_f,f,1,character
+X_r,r,1,character
+X_S,S,1,character
+X_a,a,1,character
+X_T,T,1,character
+X_i,i,1,character
+X_I,I,1,character
+X_m,m,1,character