# HG changeset patch # User bgruening # Date 1392808280 18000 # Node ID bbea9c694b344ec40388ef123591694b596ec148 # Parent a1aa11e0286202cdca63aa7412c1d77e88ab8fca Uploaded diff -r a1aa11e02862 -r bbea9c694b34 deseq2.xml --- a/deseq2.xml Mon Sep 30 12:28:02 2013 -0400 +++ b/deseq2.xml Wed Feb 19 06:11:20 2014 -0500 @@ -8,7 +8,7 @@ - #import simplejson + #import json deseq2.R -o "$deseq_out" --outfilefiltered "$deseq_out_filtered" @@ -16,7 +16,7 @@ #if $pdf: -p "$plots" #end if - + -i "$input_matrix" #set $temp_factor_name = list() @@ -31,7 +31,7 @@ #end for ##-m "#echo ' '.join( $temp_factor_list )#" - -m '#echo simplejson.dumps(temp_factor_name)#' + -m '#echo json.dumps(temp_factor_name)#' ##--organism "$organism" ##-t "$fittype" -c $countthreshold @@ -53,15 +53,15 @@ - + - + - + - + @@ -90,24 +90,25 @@ - + - - + + - - + + pdf == True -

+    
 
     
 
@@ -147,7 +148,7 @@
 
 DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg)
 
-If _DESeq2_ is used to obtain results for scientific publications it
+If DESeq2_ is used to obtain results for scientific publications it
 should be cited as [1]_. A paper describing DESeq2_ is in preparation.
 
 
diff -r a1aa11e02862 -r bbea9c694b34 deseq_helper.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq_helper.py	Wed Feb 19 06:11:20 2014 -0500
@@ -0,0 +1,98 @@
+
+from galaxy.tools.parameters import DataToolParameter
+
+def get_matrix_header( input_dataset ):
+    """
+        Not used currently, because the reload of the ckeckboxes did not work.
+    """
+    input_handle = open( input_dataset.file_name )
+    first_header = input_handle.readline()
+    second_header = input_handle.readline()
+    return [('%s::%s' % (cname2,cname1), str(int(col_num) + 1), False) for col_num, (cname2, cname1) in enumerate(zip(second_header.split()[1:],first_header.split()[1:])) ]
+
+
+
+def _construct_error_map( error_map, rep_dict, rep_parent, child, error_value ):
+    """
+        Its no so easy to create a propper error_map for repetitions in Galaxy.
+        This is a helper function.
+    """
+
+    error_map[ rep_parent ] = [ dict() for t in rep_dict ]
+    for i in range( len( rep_dict ) ):
+        error_map[ rep_parent ][i][ child ] = error_value
+
+
+
+def validate_input( trans, error_map, param_values, page_param_map ):
+    """
+        Validates the user input, before execution.
+    """
+    factors = param_values['rep_factorName']
+    factor_name_list = []
+    factor_duplication = False
+    level_duplication = False
+    overlapping_selection = False
+
+    first_condition = True
+    factor_indieces = list()
+
+    for factor in factors:
+        # factor names should be unique
+        fn = factor['factorName']
+        if fn in factor_name_list:
+            factor_duplication = True
+            break
+        factor_name_list.append( fn )
+
+        level_name_list = list()
+        factor_index_list = list()
+
+        if first_condition and len( factor['rep_factorLevel'] ) < 2:
+            # first condition needs to have at least 2 levels
+            _construct_error_map( error_map, factors, 'rep_factorName', 'rep_factorLevel', [ {'factorLevel': 'The first condition should have at least 2 factor'} for t in factor['rep_factorLevel'] ] )
+
+        for level in factor['rep_factorLevel']:
+            # level names under one factor should be unique
+            fl = level['factorLevel']
+            if fl in level_name_list:
+                level_duplication = True
+            level_name_list.append( fl )
+
+            fi = level['factorIndex']
+            if fi:
+                # the checkboxes should not have an overlap
+                for check in fi:
+                    if check in factor_index_list:
+                        overlapping_selection = True
+                    factor_index_list.append( check )
+
+            print set(factor_index_list)
+            print factor_indieces
+            if set(factor_index_list) in factor_indieces:
+                _construct_error_map( error_map, factors, 'rep_factorName', 'rep_factorLevel', [ {'factorLevel': 'It is not allowed to have two identical factors, that means two factors with the same toggeled checked boxes. '} for t in factor['rep_factorLevel'] ] )
+            else:
+                factor_indieces.append( set(factor_index_list) )
+
+
+
+        if level_duplication:
+            error_map['rep_factorName'] = [ dict() for t in factors ]
+            for i in range( len( factors ) ):
+                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorLevel': 'Factor levels for each factor need to be unique'} for t in factor['rep_factorLevel'] ]
+            break
+        if overlapping_selection:
+            error_map['rep_factorName'] = [ dict() for t in factors ]
+            for i in range( len( factors ) ):
+                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorIndex': 'The samples from different factors are not allowed to overlap'} for t in factor['rep_factorLevel'] ]
+            break
+
+        first_condition = False
+
+    if factor_duplication:
+        _construct_error_map( error_map, factors, 'rep_factorName', 'factorName', 'Factor names need to be unique' )
+        """
+        error_map['rep_factorName'] = [ dict() for t in factors ]
+        for i in range( len( factors ) ):
+            error_map['rep_factorName'][i]['factorName'] = 'Factor names need to be unique'
+        """
diff -r a1aa11e02862 -r bbea9c694b34 foldchanges_heatmap.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/foldchanges_heatmap.r	Wed Feb 19 06:11:20 2014 -0500
@@ -0,0 +1,44 @@
+## Setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+args <- commandArgs(trailingOnly = TRUE)
+
+library("gplots")
+library("RColorBrewer")
+
+tables <- {}
+labels <- {}
+
+labels <- unlist(strsplit(args[1], ","))
+tables <- unlist(strsplit(args[2], ","))
+ids_file=args[3]
+output_pdf_file=args[4]
+
+reds <- colorRampPalette(brewer.pal(9,"Reds"))(100)
+reds <- rev(reds)
+greens <- colorRampPalette(brewer.pal(9,"Greens"))(100)
+cols <- c(reds, greens)
+
+ids <- read.table(ids_file)
+
+mat <- c()
+
+for(i in 1:length(tables)) {
+ # get data frame
+    curr_table <- read.table(tables[[i]], sep="\t", header=FALSE)
+    log2FCvect <- c()
+    for(j in 1:length(ids$V1)) {
+        log2FCvect <- c(log2FCvect, curr_table$V3[which(curr_table$V1 %in% ids$V1[j])])
+    }
+    # build foldChange data frame for heatmap
+    mat <- cbind(mat, log2FCvect)
+}
+pdf(output_pdf_file) 
+hm <- heatmap.2(mat, col = cols, trace="none", labCol=labels, labRow=NULL,scale="none",symm=F,symkey=T,symbreaks=T, cexCol=0.8, cexRow=0.8)
+dev.off()  
+
+
+
+
diff -r a1aa11e02862 -r bbea9c694b34 foldchanges_heatmap.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/foldchanges_heatmap.xml	Wed Feb 19 06:11:20 2014 -0500
@@ -0,0 +1,86 @@
+
+    Plot heat map of fold changes from multiple DESeq2 outputs
+    
+        Rscript
+    
+    
+            #set $label_list = list()
+            #set $table_list = list()
+            #for $input in $deseqout:
+                $label_list.append('%s' %($input.label))
+            #end for
+            #for $input in $deseqout:
+                $table_list.append('%s' %($input.table))
+            #end for
+            foldchanges_heatmap.r "#echo ','.join( $label_list )#" "#echo ','.join( $table_list )#" $gene_ids_file $plots
+            ## TODO: instead of throwing away stderr, try Bjoern's fix
+            2> /dev/null
+    
+    
+        
+    
+    
+        
+            
+            
+        
+        
+    
+
+    
+        
+    
+    
+
+.. class:: infomark
+
+**What it does** 
+
+Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
+
+
+**Inputs**
+
+DESeq2_ requires one count matrix as input file. You can use the tool
+
+
+
+**Output**
+
+DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF.
+
+====== ==========================================================
+Column Description
+------ ----------------------------------------------------------
+     1 Gene Identifiers
+     2 mean normalised counts, averaged over all samples from both conditions
+     3 the logarithm (to basis 2) of the fold change
+     4 standard error estimate for the log2 fold change estimate
+     5 p value for the statistical significance of this change
+     6 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
+       which controls false discovery rate (FDR)
+====== ==========================================================
+
+
+------
+
+**References** 
+
+DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg)
+
+If DESeq2_ is used to obtain results for scientific publications it
+should be cited as [1]_. A paper describing DESeq2_ is in preparation.
+
+
+
+.. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_. 
+
+.. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106
+.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
+
+
+    
+
diff -r a1aa11e02862 -r bbea9c694b34 matrix_helper.py
--- a/matrix_helper.py	Mon Sep 30 12:28:02 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-
-from galaxy.tools.parameters import DataToolParameter
-
-def get_matrix_header( input_dataset ):
-    """
-        Not used currently, because the reload of the ckeckboxes did not work.
-    """
-    input_handle = open( input_dataset.file_name )
-    first_header = input_handle.readline()
-    second_header = input_handle.readline()
-    return [('%s::%s' % (cname2,cname1), str(int(col_num) + 1), False) for col_num, (cname2, cname1) in enumerate(zip(second_header.split()[1:],first_header.split()[1:])) ]
-
-
-def validate_input( trans, error_map, param_values, page_param_map ):
-    """
-        Validates the user input, before execution.
-    """
-    factors = param_values['rep_factorName']
-    factor_name_list = []
-    factor_duplication = False
-    level_duplication = False
-    overlapping_selection = False
-
-
-    for factor in factors:
-        # factor names should be unique
-        fn = factor['factorName']
-        if fn in factor_name_list:
-            factor_duplication = True
-            break
-        factor_name_list.append( fn )
-
-        level_name_list = list()
-        factor_index_list = list()
-
-        for level in factor['rep_factorLevel']:
-            # level names under one factor should be unique
-            fl = level['factorLevel']
-            if fl in level_name_list:
-                level_duplication = True
-            level_name_list.append( fl )
-
-            fi = level['factorIndex']
-            if fi:
-                # the checkboxes should not have an overlap
-                for check in fi:
-                    if check in factor_index_list:
-                        overlapping_selection = True
-                    factor_index_list.append( check )
-
-        if level_duplication:
-            error_map['rep_factorName'] = [ dict() for t in factors ]
-            for i in range( len( factors ) ):
-                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorLevel': 'Factor levels for each factor need to be unique'} for t in factor['rep_factorLevel'] ]
-            break
-        if overlapping_selection:
-            error_map['rep_factorName'] = [ dict() for t in factors ]
-            for i in range( len( factors ) ):
-                error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorIndex': 'The samples from different factors are not allowed to overlap'} for t in factor['rep_factorLevel'] ]
-            break
-
-    if factor_duplication:
-        error_map['rep_factorName'] = [ dict() for t in factors ]
-        for i in range( len( factors ) ):
-            error_map['rep_factorName'][i]['factorName'] = 'Factor names need to be unique'
-
diff -r a1aa11e02862 -r bbea9c694b34 tool_dependencies.xml
--- a/tool_dependencies.xml	Mon Sep 30 12:28:02 2013 -0400
+++ b/tool_dependencies.xml	Wed Feb 19 06:11:20 2014 -0500
@@ -1,10 +1,10 @@
 
 
     
-        
+        
     
     
-        
+