changeset 20:bbea9c694b34 draft

Uploaded
author bgruening
date Wed, 19 Feb 2014 06:11:20 -0500
parents a1aa11e02862
children d32de046ba31
files deseq2.xml deseq_helper.py foldchanges_heatmap.r foldchanges_heatmap.xml matrix_helper.py tool_dependencies.xml
diffstat 6 files changed, 246 insertions(+), 83 deletions(-) [+]
line wrap: on
line diff
--- a/deseq2.xml	Mon Sep 30 12:28:02 2013 -0400
+++ b/deseq2.xml	Wed Feb 19 06:11:20 2014 -0500
@@ -8,7 +8,7 @@
         <!--<requirement type="set_environment">DESEQ2_SCRIPT_PATH</requirement>-->
     </requirements>
     <command interpreter="Rscript">
-        #import simplejson
+        #import json
         deseq2.R
             -o "$deseq_out"
             --outfilefiltered "$deseq_out_filtered"
@@ -16,7 +16,7 @@
             #if $pdf:
                 -p "$plots" 
             #end if
-            
+
             -i "$input_matrix"
 
             #set $temp_factor_name = list()
@@ -31,7 +31,7 @@
             #end for
 
                 ##-m "#echo ' '.join( $temp_factor_list )#"
-                -m '#echo simplejson.dumps(temp_factor_name)#'
+                -m '#echo json.dumps(temp_factor_name)#'
             ##--organism "$organism"
             ##-t "$fittype"
             -c $countthreshold
@@ -53,15 +53,15 @@
     <inputs>
         <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool 
             'Count reads in features with htseq-count'"/>
-        
+
         <repeat name="rep_factorName" title="Factor/Condition" min="1">
-            <param name="factorName" type="text" value="FactorName" label="Specify a factor name" help=""/>
+            <param name="factorName" type="text" value="FactorName" label="Specify a factor name" help="" />
             <repeat name="rep_factorLevel" title="Factor level" min="1">
-                <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level" help=""/>
+                <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level" help="" />
 
                 <param name="factorIndex" label="Select columns that are associated with this factor level" type="data_column" data_ref="input_matrix"
                     numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes">
-                    <validator type="no_options" message="Please select at least one column."/>
+                    <validator type="no_options" message="Please select at least one column." />
                 </param>
             </repeat>
         </repeat>
@@ -90,24 +90,25 @@
             <option value="fly">fly</option>
             <option value="other">other</option>
         </param-->
-        <param name="countthreshold" size="10" type="float" value="10" label="Filter out features with mean normalized counts lower than this value"/>
+        <param name="countthreshold" size="10" type="float" value="10.0" label="Filter out features with mean normalized counts lower than this value"/>
         <param name="fittype" type="select" label="Type of fitting of dispersions to the mean intensity">
             <option value="parametric">parametric</option>
             <option value="local">local</option>
             <option value="mean">mean</option>
-        </param> 
-        <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Visualising the analysis results"
-            help="output an additional PDF files" />            
+        </param>
+        <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" 
+            label="Visualising the analysis results"
+            help="output an additional PDF files" />
     </inputs>
 
     <outputs>
-        <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}"/>
-        <data format="tabular" name="deseq_out_filtered" label="Independent filtering result file on ${on_string}"/>
+        <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}" />
+        <data format="tabular" name="deseq_out_filtered" label="Independent filtering result file on ${on_string}" />
         <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}">
             <filter>pdf == True</filter>
         </data>
     </outputs>
-    <code file="matrix_helper.py" />
+    <code file="deseq_helper.py" />
 
     <help>
 
@@ -147,7 +148,7 @@
 
 DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg)
 
-If _DESeq2_ is used to obtain results for scientific publications it
+If DESeq2_ is used to obtain results for scientific publications it
 should be cited as [1]_. A paper describing DESeq2_ is in preparation.
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq_helper.py	Wed Feb 19 06:11:20 2014 -0500
@@ -0,0 +1,98 @@
+
+from galaxy.tools.parameters import DataToolParameter
+
+def get_matrix_header( input_dataset ):
+    """
+        Not used currently, because the reload of the ckeckboxes did not work.
+    """
+    input_handle = open( input_dataset.file_name )
+    first_header = input_handle.readline()
+    second_header = input_handle.readline()
+    return [('%s::%s' % (cname2,cname1), str(int(col_num) + 1), False) for col_num, (cname2, cname1) in enumerate(zip(second_header.split()[1:],first_header.split()[1:])) ]
+
+
+
+def _construct_error_map( error_map, rep_dict, rep_parent, child, error_value ):
+    """
+        Its no so easy to create a propper error_map for repetitions in Galaxy.
+        This is a helper function.
+    """
+
+    error_map[ rep_parent ] = [ dict() for t in rep_dict ]
+    for i in range( len( rep_dict ) ):
+        error_map[ rep_parent ][i][ child ] = error_value
+
+
+
+def validate_input( trans, error_map, param_values, page_param_map ):
+    """
+        Validates the user input, before execution.
+    """
+    factors = param_values['rep_factorName']
+    factor_name_list = []
+    factor_duplication = False
+    level_duplication = False
+    overlapping_selection = False
+
+    first_condition = True
+    factor_indieces = list()
+
+    for factor in factors:
+        # factor names should be unique
+        fn = factor['factorName']
+        if fn in factor_name_list:
+            factor_duplication = True
+            break
+        factor_name_list.append( fn )
+
+        level_name_list = list()
+        factor_index_list = list()
+
+        if first_condition and len( factor['rep_factorLevel'] ) < 2:
+            # first condition needs to have at least 2 levels
+            _construct_error_map( error_map, factors, 'rep_factorName', 'rep_factorLevel', [ {'factorLevel': 'The first condition should have at least 2 factor'} for t in factor['rep_factorLevel'] ] )
+
+        for level in factor['rep_factorLevel']:
+            # level names under one factor should be unique
+            fl = level['factorLevel']
+            if fl in level_name_list:
+                level_duplication = True
+            level_name_list.append( fl )
+
+            fi = level['factorIndex']
+            if fi:
+                # the checkboxes should not have an overlap
+                for check in fi:
+                    if check in factor_index_list:
+                        overlapping_selection = True
+                    factor_index_list.append( check )
+
+            print set(factor_index_list)
+            print factor_indieces
+            if set(factor_index_list) in factor_indieces:
+                _construct_error_map( error_map, factors, 'rep_factorName', 'rep_factorLevel', [ {'factorLevel': 'It is not allowed to have two identical factors, that means two factors with the same toggeled checked boxes. '} for t in factor['rep_factorLevel'] ] )
+            else:
+                factor_indieces.append( set(factor_index_list) )
+
+
+
+        if level_duplication:
+            error_map['rep_factorName'] = [ dict() for t in factors ]
+            for i in range( len( factors ) ):
+                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorLevel': 'Factor levels for each factor need to be unique'} for t in factor['rep_factorLevel'] ]
+            break
+        if overlapping_selection:
+            error_map['rep_factorName'] = [ dict() for t in factors ]
+            for i in range( len( factors ) ):
+                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorIndex': 'The samples from different factors are not allowed to overlap'} for t in factor['rep_factorLevel'] ]
+            break
+
+        first_condition = False
+
+    if factor_duplication:
+        _construct_error_map( error_map, factors, 'rep_factorName', 'factorName', 'Factor names need to be unique' )
+        """
+        error_map['rep_factorName'] = [ dict() for t in factors ]
+        for i in range( len( factors ) ):
+            error_map['rep_factorName'][i]['factorName'] = 'Factor names need to be unique'
+        """
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/foldchanges_heatmap.r	Wed Feb 19 06:11:20 2014 -0500
@@ -0,0 +1,44 @@
+## Setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+args <- commandArgs(trailingOnly = TRUE)
+
+library("gplots")
+library("RColorBrewer")
+
+tables <- {}
+labels <- {}
+
+labels <- unlist(strsplit(args[1], ","))
+tables <- unlist(strsplit(args[2], ","))
+ids_file=args[3]
+output_pdf_file=args[4]
+
+reds <- colorRampPalette(brewer.pal(9,"Reds"))(100)
+reds <- rev(reds)
+greens <- colorRampPalette(brewer.pal(9,"Greens"))(100)
+cols <- c(reds, greens)
+
+ids <- read.table(ids_file)
+
+mat <- c()
+
+for(i in 1:length(tables)) {
+ # get data frame
+    curr_table <- read.table(tables[[i]], sep="\t", header=FALSE)
+    log2FCvect <- c()
+    for(j in 1:length(ids$V1)) {
+        log2FCvect <- c(log2FCvect, curr_table$V3[which(curr_table$V1 %in% ids$V1[j])])
+    }
+    # build foldChange data frame for heatmap
+    mat <- cbind(mat, log2FCvect)
+}
+pdf(output_pdf_file) 
+hm <- heatmap.2(mat, col = cols, trace="none", labCol=labels, labRow=NULL,scale="none",symm=F,symkey=T,symbreaks=T, cexCol=0.8, cexRow=0.8)
+dev.off()  
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/foldchanges_heatmap.xml	Wed Feb 19 06:11:20 2014 -0500
@@ -0,0 +1,86 @@
+<tool id="foldchange_heatmap" name="fold change heatmap" version="1.0">
+    <description>Plot heat map of fold changes from multiple DESeq2 outputs</description>
+    <requirements>
+        <requirement type="binary">Rscript</requirement>
+    </requirements>
+    <command interpreter="Rscript">
+            #set $label_list = list()
+            #set $table_list = list()
+            #for $input in $deseqout:
+                $label_list.append('%s' %($input.label))
+            #end for
+            #for $input in $deseqout:
+                $table_list.append('%s' %($input.table))
+            #end for
+            foldchanges_heatmap.r "#echo ','.join( $label_list )#" "#echo ','.join( $table_list )#" $gene_ids_file $plots
+            ## TODO: instead of throwing away stderr, try Bjoern's fix
+            2> /dev/null
+    </command>
+    <stdio>
+        <regex match="Execution halted" 
+           source="both" 
+           level="fatal" 
+           description="Execution halted." />
+    </stdio>
+    <inputs>
+        <repeat name="deseqout" title="DESeq2 output" min="1">
+            <param name="label" type="text" value="Label" label="Specify a label" help="" />
+            <param name="table" label="Select DESeq2 output" type="data" format="tabular"/>
+        </repeat>
+        <param name="gene_ids_file" label="Select file containing gene ids" type="data" format="tabular"/>
+    </inputs>
+
+    <outputs>
+        <data format="pdf" name="plots" label="Heatmap plot on ${on_string}"/>
+    </outputs>
+    <help>
+
+.. class:: infomark
+
+**What it does** 
+
+Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
+
+
+**Inputs**
+
+DESeq2_ requires one count matrix as input file. You can use the tool
+
+
+
+**Output**
+
+DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
+
+====== ==========================================================
+Column Description
+------ ----------------------------------------------------------
+     1 Gene Identifiers
+     2 mean normalised counts, averaged over all samples from both conditions
+     3 the logarithm (to basis 2) of the fold change
+     4 standard error estimate for the log2 fold change estimate
+     5 p value for the statistical significance of this change
+     6 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
+       which controls false discovery rate (FDR)
+====== ==========================================================
+
+
+------
+
+**References** 
+
+DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg)
+
+If DESeq2_ is used to obtain results for scientific publications it
+should be cited as [1]_. A paper describing DESeq2_ is in preparation.
+
+
+
+.. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_. 
+
+.. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106
+.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
+
+
+    </help>
+</tool>
--- a/matrix_helper.py	Mon Sep 30 12:28:02 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-
-from galaxy.tools.parameters import DataToolParameter
-
-def get_matrix_header( input_dataset ):
-    """
-        Not used currently, because the reload of the ckeckboxes did not work.
-    """
-    input_handle = open( input_dataset.file_name )
-    first_header = input_handle.readline()
-    second_header = input_handle.readline()
-    return [('%s::%s' % (cname2,cname1), str(int(col_num) + 1), False) for col_num, (cname2, cname1) in enumerate(zip(second_header.split()[1:],first_header.split()[1:])) ]
-
-
-def validate_input( trans, error_map, param_values, page_param_map ):
-    """
-        Validates the user input, before execution.
-    """
-    factors = param_values['rep_factorName']
-    factor_name_list = []
-    factor_duplication = False
-    level_duplication = False
-    overlapping_selection = False
-
-
-    for factor in factors:
-        # factor names should be unique
-        fn = factor['factorName']
-        if fn in factor_name_list:
-            factor_duplication = True
-            break
-        factor_name_list.append( fn )
-
-        level_name_list = list()
-        factor_index_list = list()
-
-        for level in factor['rep_factorLevel']:
-            # level names under one factor should be unique
-            fl = level['factorLevel']
-            if fl in level_name_list:
-                level_duplication = True
-            level_name_list.append( fl )
-
-            fi = level['factorIndex']
-            if fi:
-                # the checkboxes should not have an overlap
-                for check in fi:
-                    if check in factor_index_list:
-                        overlapping_selection = True
-                    factor_index_list.append( check )
-
-        if level_duplication:
-            error_map['rep_factorName'] = [ dict() for t in factors ]
-            for i in range( len( factors ) ):
-                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorLevel': 'Factor levels for each factor need to be unique'} for t in factor['rep_factorLevel'] ]
-            break
-        if overlapping_selection:
-            error_map['rep_factorName'] = [ dict() for t in factors ]
-            for i in range( len( factors ) ):
-                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorIndex': 'The samples from different factors are not allowed to overlap'} for t in factor['rep_factorLevel'] ]
-            break
-
-    if factor_duplication:
-        error_map['rep_factorName'] = [ dict() for t in factors ]
-        for i in range( len( factors ) ):
-            error_map['rep_factorName'][i]['factorName'] = 'Factor names need to be unique'
-
--- a/tool_dependencies.xml	Mon Sep 30 12:28:02 2013 -0400
+++ b/tool_dependencies.xml	Wed Feb 19 06:11:20 2014 -0500
@@ -1,10 +1,10 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="R_3_0_1" version="3.0.1">
-        <repository changeset_revision="e16e08f41ed7" name="package_r_3_0_1" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="4666f68ad4d5" name="package_r_3_0_1" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
     </package>
     <package name="deseq2" version="1.0.17">
-        <repository changeset_revision="3e3d1a50a347" name="package_deseq2_1_0_17" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="3b6a8c377895" name="package_deseq2_1_0_17" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
     </package>
     <!--<set_environment version="1.0">
         <environment_variable name="DESEQ2_SCRIPT_PATH" action="set_to">$REPOSITORY_INSTALL_DIR/scripts</environment_variable>