changeset 19:b7235a9b1881 draft

Uploaded
author pavanvidem
date Thu, 03 Sep 2015 05:25:49 -0400
parents ce3f79d2feb5
children 169c48f14f31
files dexseq.R dexseq.xml dexseq_count.xml tool_dependencies.xml
diffstat 4 files changed, 108 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/dexseq.R	Tue Sep 01 11:08:29 2015 -0400
+++ b/dexseq.R	Thu Sep 03 05:25:49 2015 -0400
@@ -19,20 +19,12 @@
     'gtf', 'a', 1, "character",
     'outfile', 'o', 1, "character",
     'reportdir', 'r', 1, "character",
-    'htmlfile', 'x', 1, "character",
     'factors', 'f', 1, "character",
     'threads', 'p', 1, "integer",
     'fdr', 'c', 1, "double"
 ), byrow=TRUE, ncol=4);
 opt = getopt(spec);
 
-
-#reportdir <- gsub(".dat", "_files", opt$outfile)
-
-dir.create(file.path(opt$reportdir))
-setwd(opt$reportdir)
-getwd()
-
 # if help was asked for print a friendly message
 # and exit with a non-zero error code
 if ( !is.null(opt$help) ) {
@@ -86,7 +78,6 @@
 primaryFactor
 countFiles
 opt$reportdir
-opt$htmlfile
 opt$threads
 getwd()
 
@@ -112,6 +103,4 @@
     DEXSeqHTML(res, path=opt$reportdir, FDR=opt$fdr, color=c("#C3EEE7","#B7FEA0","#F1E7A1","#CEAEFF","#FF8F43","#EDC3C5","#AAA8AA","#FF0000","#637EE9","#FBFBFB"))
     unlink(file.path(opt$reportdir,"DEXSeq_analysis.RData"))
 }
-file.remove(opt$htmlfile)
-file.symlink(file.path(opt$reportdir,"testForDEU.html"), opt$htmlfile)
 sessionInfo()
--- a/dexseq.xml	Tue Sep 01 11:08:29 2015 -0400
+++ b/dexseq.xml	Thu Sep 03 05:25:49 2015 -0400
@@ -2,14 +2,14 @@
     <description>Determines differential exon usage from count tables</description>
     <requirements>
         <requirement type="package" version="3.2.0">R</requirement>
-        <requirement type="binary">Rscript</requirement>
+        <requirement type="package" version="4.0.3">tiff</requirement>
         <requirement type="package" version="1.14.2">DEXSeq</requirement>
     </requirements>
     <code file="dexseq_helper.py" />
     <command>
+    mkdir ./html_out ;
         #import json
-        Rscript /usr/local/galaxy/shed_tools/testtoolshed.g2.bx.psu.edu/repos/pavanvidem/dexseq/cbd118202fd1/dexseq/dexseq.R
-            #set $reportdir = str($dexseq_out).replace('.dat','_files')
+        Rscript $__tool_directory__/dexseq.R
             -o "$dexseq_out"
             -p 12
             #set $temp_factor_names = list()
@@ -27,12 +27,12 @@
             -f '#echo json.dumps(temp_factor_names)#'
             -a $gtf
             #if $report:
-                -x "$htmlreport"
-                -r "$reportdir"
+                -r ./html_out
             #end if
             -c $fdr_cutoff;
-            
-##            cp #echo os.path.join($reportdir, 'testForDEU.html')# $htmlreport;
+        mkdir $htmlreport.extra_files_path;
+        cp ./html_out/testForDEU.html $htmlreport ;
+        cp -r ./html_out/* $htmlreport.extra_files_path ;
     </command>
     <stdio>
         <regex match="Execution halted" 
@@ -91,52 +91,54 @@
 
 **Inputs**
 
-DESeq2_ takes count tables that generated from the htseq-count as input. Count tables must be generated for each sample individually. DESeq2 is capable of handling multiple factors that effect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. You also input several secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.
+DEXSeq_ takes count tables that generated from the dexseq_count as input. Count tables must be generated for each sample individually. DEXSeq is capable of handling multiple factors that effect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. Primary factor should always be named as 'condition'. You also input several secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.
 
 The following table gives some examples of factors and their levels:
 
 ========= ============== ===============
 Factor    Factor level 1 Factor level 2 
 --------- -------------- ---------------
-Treatment Treated        Untreated
+condition Knockdown      Wildtype
 --------- -------------- ---------------
-Condition Knockdown      Wildtype
+treatment Treated        Untreated
 --------- -------------- ---------------
-TimePoint Day4           Day1
+timePoint Day4           Day1
 --------- -------------- ---------------
 SeqType   SingleEnd      PairedEnd
 --------- -------------- ---------------
 Gender    Female         Male
 ========= ============== ===============
 
-*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.
+*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'condition' given in above table, DEXSeq computes fold changes of 'Knockdown' samples against 'Wildtype', i.e. the values correspond to up or down regulations of genes in Knockdown samples.
 
 **Output**
 
-DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
+DEXSeq_ generates a tabular file containing the different columns and an optional html report.
 
 ====== ==========================================================
 Column Description
 ------ ----------------------------------------------------------
-     1 Gene Identifiers
-     2 mean normalised counts, averaged over all samples from both conditions
-     3 the logarithm (to basis 2) of the fold change (See the note in inputs section)
-     4 standard error estimate for the log2 fold change estimate
-     5 Wald statistic
-     6 p value for the statistical significance of this change
-     7 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
-       which controls false discovery rate (FDR)
+     1 Gene and exon Identifiers
+     2 group/gene identifier
+     3 feature/exon identifier
+     4 mean of the counts across samples in each feature/exon
+     5 exon dispersion estimate
+     6 LRT statistic
+     7 LRT p-value
+     8 BH adjusted p-values
+     9 exon usage coefficient factorLevel 2
+    10 exon usage coefficient factorLevel 1
+    11 relative exon usage fold changes
+    12 GRanges object of the coordinates of the exon/feature
+    13 matrix of integer counts, of each column containing a sample
+    14 list of transcripts overlapping with the exon
 ====== ==========================================================
 
 
-
-
-
-.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
-
+.. _DEXSeq: http://master.bioconductor.org/packages/release/bioc/html/DEXSeq.html
 
     </help>
     <citations>
-        <citation type="doi">10.1101/002832</citation>
+        <citation type="doi">10.1101/gr.133744.111</citation>
     </citations>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dexseq_count.xml	Thu Sep 03 05:25:49 2015 -0400
@@ -0,0 +1,77 @@
+<tool id="dexseq_count" name="DEXSeq-Count" version="1.0">
+    <description>Prepare and count exon abundancies from RNA-seq data</description>
+    <requirements>
+        <requirement type="package" version="3.1.0">R</requirement>
+        <requirement type="package" version="1.14.2">DEXSeq</requirement> 
+    </requirements>
+    <command>
+        #if $mode.mode_select == "prepare":
+            python $INSTALL_DIR/python_scripts/dexseq_prepare_annotation.py -r $aggregate $gtf $flattened_gtf_out
+        #elif $mode.mode_select == "count":
+            python $INSTALL_DIR/python_scripts/dexseq_count.py -f bam -p $paired -r $order -s $stranded $flattened_gtf_in $bamfile $counts_file
+        #end if
+    </command>
+    <inputs>
+        <conditional name="mode">
+            <param name="mode_select" type="select" label="Mode of operation">
+                <option value="prepare">Prepare annotation</option>
+                <option value="count">Count reads</option>
+            </param>
+            <when value="prepare">
+                <param name="gtf" type="gtf" label="GTF file"/>
+                <param name="aggregate" type="boolean" checked="True" truevalue="yes" falsevalue="no" label="Aggretare genes with  exons?" help="Indicates whether two or more genes sharing an exon should be merged into an 'aggregate gene'. If 'no', the exons that can not be assiged to a single gene are ignored."/>
+            </when>
+            <when value="count">
+                <param name="bamfile" type="bam" label="Input bam file"/>
+                <param name="flattened_gtf_in" type="gtf" label="DEXSeq compatible GTF file" help="Created by prepare mode"/>
+                <param name="paired" type="boolean" checked="False" truevalue="yes" falsevalue="no" label="Is libray paired end?"/>
+                <param name="stranded" type="select" label="Is library strand specific?">
+                    <option value="no">No</option>
+                    <option value="yes">Yes</option>
+                    <option value="reverse">Yes, but reverse</option>
+                </param>
+                <param name="qual" type="integer" value="10" label="Skip all reads with alignment quality lower than the given minimum value">
+                <param name="order" type="select" label="Sorting order of alignments" help="If you generated your alignments using tophat, they are by default position sorted. Ignored for single-end data">
+                    <option value="pos">By position</option>
+                    <option value="name">By name</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="counts_file" label="DEXSeq count reads on ${on_string}">
+            <filter>(mode['mode_select'] == 'count')</filter>
+        </data>
+        <data format="gtf" name="flattened_gtf_out" label="DEXSeq prepare annotation ${on_string}">
+            <filter>(mode['mode_select'] == 'prepare')</filter>
+        </data>
+    </outputs>
+
+    <help>
+
+.. class:: infomark
+
+**What it does** 
+
+The main goal of this tol is to count the number of reads/fragments per exon of each gene in RNA-seq sample. In addition it also prepares your annotation gtf file compatible for counting.
+
+
+**Inputs**
+
+Mode-preprare: Takes a normal gtf file as input. For example from ensembl database.
+Mode-count: Inputs are flattened gtf file and BAM file. The flattened gtf file can be generated from 'prepare' mode of this tool.
+
+**Output**
+
+Mode-preprare: Flattened gtf file that contains only exons with corresponding gene ids from given gtf file. Sometimes two or more genes sharing an exon will be merged into an 'aggregate gene' if the aggregate option was used.
+Mode-count: Two column tab-delimeted file with exon ids and their read counts.
+
+.. _DEXSeq: http://master.bioconductor.org/packages/release/bioc/html/DEXSeq.html
+
+
+    </help>
+    <citations>
+        <citation type="doi">10.1101/gr.133744.111</citation>
+    </citations>
+</tool>
--- a/tool_dependencies.xml	Tue Sep 01 11:08:29 2015 -0400
+++ b/tool_dependencies.xml	Thu Sep 03 05:25:49 2015 -0400
@@ -4,7 +4,7 @@
         <repository changeset_revision="d973c8e9b29e" name="package_r_3_2_0" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
     <package name="tiff" version="4.0.3">
-        <repository changeset_revision="84bb5c1e2067" name="package_tiff_4_0_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="3fecd5a9a64c" name="package_tiff_4_0_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
     <package name="DEXSeq" version="1.14.2">
         <install version="1.0">
@@ -13,7 +13,7 @@
                     <repository changeset_revision="d973c8e9b29e" name="package_r_3_2_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
                         <package name="R" version="3.2.0" />
                     </repository>
-                    <repository changeset_revision="84bb5c1e2067" name="package_tiff_4_0_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
+                    <repository changeset_revision="3fecd5a9a64c" name="package_tiff_4_0_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
                         <package name="tiff" version="4.0.3" />
                     </repository>
                     <package>https://github.com/bgruening/download_store/raw/master/DEXSeq_1.14.2/Cairo_1.5-8.tar.gz</package>