# HG changeset patch # User bgruening # Date 1392808280 18000 # Node ID bbea9c694b344ec40388ef123591694b596ec148 # Parent a1aa11e0286202cdca63aa7412c1d77e88ab8fca Uploaded diff -r a1aa11e02862 -r bbea9c694b34 deseq2.xml --- a/deseq2.xml Mon Sep 30 12:28:02 2013 -0400 +++ b/deseq2.xml Wed Feb 19 06:11:20 2014 -0500 @@ -8,7 +8,7 @@ - #import simplejson + #import json deseq2.R -o "$deseq_out" --outfilefiltered "$deseq_out_filtered" @@ -16,7 +16,7 @@ #if $pdf: -p "$plots" #end if - + -i "$input_matrix" #set $temp_factor_name = list() @@ -31,7 +31,7 @@ #end for ##-m "#echo ' '.join( $temp_factor_list )#" - -m '#echo simplejson.dumps(temp_factor_name)#' + -m '#echo json.dumps(temp_factor_name)#' ##--organism "$organism" ##-t "$fittype" -c $countthreshold @@ -53,15 +53,15 @@ - + - + - + - + @@ -90,24 +90,25 @@ - + - - + + - - + + pdf == True - + @@ -147,7 +148,7 @@ DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg) -If _DESeq2_ is used to obtain results for scientific publications it +If DESeq2_ is used to obtain results for scientific publications it should be cited as [1]_. A paper describing DESeq2_ is in preparation. diff -r a1aa11e02862 -r bbea9c694b34 deseq_helper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq_helper.py Wed Feb 19 06:11:20 2014 -0500 @@ -0,0 +1,98 @@ + +from galaxy.tools.parameters import DataToolParameter + +def get_matrix_header( input_dataset ): + """ + Not used currently, because the reload of the ckeckboxes did not work. + """ + input_handle = open( input_dataset.file_name ) + first_header = input_handle.readline() + second_header = input_handle.readline() + return [('%s::%s' % (cname2,cname1), str(int(col_num) + 1), False) for col_num, (cname2, cname1) in enumerate(zip(second_header.split()[1:],first_header.split()[1:])) ] + + + +def _construct_error_map( error_map, rep_dict, rep_parent, child, error_value ): + """ + Its no so easy to create a propper error_map for repetitions in Galaxy. + This is a helper function. + """ + + error_map[ rep_parent ] = [ dict() for t in rep_dict ] + for i in range( len( rep_dict ) ): + error_map[ rep_parent ][i][ child ] = error_value + + + +def validate_input( trans, error_map, param_values, page_param_map ): + """ + Validates the user input, before execution. + """ + factors = param_values['rep_factorName'] + factor_name_list = [] + factor_duplication = False + level_duplication = False + overlapping_selection = False + + first_condition = True + factor_indieces = list() + + for factor in factors: + # factor names should be unique + fn = factor['factorName'] + if fn in factor_name_list: + factor_duplication = True + break + factor_name_list.append( fn ) + + level_name_list = list() + factor_index_list = list() + + if first_condition and len( factor['rep_factorLevel'] ) < 2: + # first condition needs to have at least 2 levels + _construct_error_map( error_map, factors, 'rep_factorName', 'rep_factorLevel', [ {'factorLevel': 'The first condition should have at least 2 factor'} for t in factor['rep_factorLevel'] ] ) + + for level in factor['rep_factorLevel']: + # level names under one factor should be unique + fl = level['factorLevel'] + if fl in level_name_list: + level_duplication = True + level_name_list.append( fl ) + + fi = level['factorIndex'] + if fi: + # the checkboxes should not have an overlap + for check in fi: + if check in factor_index_list: + overlapping_selection = True + factor_index_list.append( check ) + + print set(factor_index_list) + print factor_indieces + if set(factor_index_list) in factor_indieces: + _construct_error_map( error_map, factors, 'rep_factorName', 'rep_factorLevel', [ {'factorLevel': 'It is not allowed to have two identical factors, that means two factors with the same toggeled checked boxes. '} for t in factor['rep_factorLevel'] ] ) + else: + factor_indieces.append( set(factor_index_list) ) + + + + if level_duplication: + error_map['rep_factorName'] = [ dict() for t in factors ] + for i in range( len( factors ) ): + error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorLevel': 'Factor levels for each factor need to be unique'} for t in factor['rep_factorLevel'] ] + break + if overlapping_selection: + error_map['rep_factorName'] = [ dict() for t in factors ] + for i in range( len( factors ) ): + error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorIndex': 'The samples from different factors are not allowed to overlap'} for t in factor['rep_factorLevel'] ] + break + + first_condition = False + + if factor_duplication: + _construct_error_map( error_map, factors, 'rep_factorName', 'factorName', 'Factor names need to be unique' ) + """ + error_map['rep_factorName'] = [ dict() for t in factors ] + for i in range( len( factors ) ): + error_map['rep_factorName'][i]['factorName'] = 'Factor names need to be unique' + """ diff -r a1aa11e02862 -r bbea9c694b34 foldchanges_heatmap.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/foldchanges_heatmap.r Wed Feb 19 06:11:20 2014 -0500 @@ -0,0 +1,44 @@ +## Setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +# we need that to not crash galaxy with an UTF8 error on German LC settings. +Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +args <- commandArgs(trailingOnly = TRUE) + +library("gplots") +library("RColorBrewer") + +tables <- {} +labels <- {} + +labels <- unlist(strsplit(args[1], ",")) +tables <- unlist(strsplit(args[2], ",")) +ids_file=args[3] +output_pdf_file=args[4] + +reds <- colorRampPalette(brewer.pal(9,"Reds"))(100) +reds <- rev(reds) +greens <- colorRampPalette(brewer.pal(9,"Greens"))(100) +cols <- c(reds, greens) + +ids <- read.table(ids_file) + +mat <- c() + +for(i in 1:length(tables)) { + # get data frame + curr_table <- read.table(tables[[i]], sep="\t", header=FALSE) + log2FCvect <- c() + for(j in 1:length(ids$V1)) { + log2FCvect <- c(log2FCvect, curr_table$V3[which(curr_table$V1 %in% ids$V1[j])]) + } + # build foldChange data frame for heatmap + mat <- cbind(mat, log2FCvect) +} +pdf(output_pdf_file) +hm <- heatmap.2(mat, col = cols, trace="none", labCol=labels, labRow=NULL,scale="none",symm=F,symkey=T,symbreaks=T, cexCol=0.8, cexRow=0.8) +dev.off() + + + + diff -r a1aa11e02862 -r bbea9c694b34 foldchanges_heatmap.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/foldchanges_heatmap.xml Wed Feb 19 06:11:20 2014 -0500 @@ -0,0 +1,86 @@ + + Plot heat map of fold changes from multiple DESeq2 outputs + + Rscript + + + #set $label_list = list() + #set $table_list = list() + #for $input in $deseqout: + $label_list.append('%s' %($input.label)) + #end for + #for $input in $deseqout: + $table_list.append('%s' %($input.table)) + #end for + foldchanges_heatmap.r "#echo ','.join( $label_list )#" "#echo ','.join( $table_list )#" $gene_ids_file $plots + ## TODO: instead of throwing away stderr, try Bjoern's fix + 2> /dev/null + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What it does** + +Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution + + +**Inputs** + +DESeq2_ requires one count matrix as input file. You can use the tool + + + +**Output** + +DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF. + +====== ========================================================== +Column Description +------ ---------------------------------------------------------- + 1 Gene Identifiers + 2 mean normalised counts, averaged over all samples from both conditions + 3 the logarithm (to basis 2) of the fold change + 4 standard error estimate for the log2 fold change estimate + 5 p value for the statistical significance of this change + 6 p value adjusted for multiple testing with the Benjamini-Hochberg procedure + which controls false discovery rate (FDR) +====== ========================================================== + + +------ + +**References** + +DESeq2_ Authors: Michael Love (MPIMG Berlin), Simon Anders, Wolfgang Huber (EMBL Heidelberg) + +If DESeq2_ is used to obtain results for scientific publications it +should be cited as [1]_. A paper describing DESeq2_ is in preparation. + + + +.. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_. + +.. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106 +.. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html + + + + diff -r a1aa11e02862 -r bbea9c694b34 matrix_helper.py --- a/matrix_helper.py Mon Sep 30 12:28:02 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ - -from galaxy.tools.parameters import DataToolParameter - -def get_matrix_header( input_dataset ): - """ - Not used currently, because the reload of the ckeckboxes did not work. - """ - input_handle = open( input_dataset.file_name ) - first_header = input_handle.readline() - second_header = input_handle.readline() - return [('%s::%s' % (cname2,cname1), str(int(col_num) + 1), False) for col_num, (cname2, cname1) in enumerate(zip(second_header.split()[1:],first_header.split()[1:])) ] - - -def validate_input( trans, error_map, param_values, page_param_map ): - """ - Validates the user input, before execution. - """ - factors = param_values['rep_factorName'] - factor_name_list = [] - factor_duplication = False - level_duplication = False - overlapping_selection = False - - - for factor in factors: - # factor names should be unique - fn = factor['factorName'] - if fn in factor_name_list: - factor_duplication = True - break - factor_name_list.append( fn ) - - level_name_list = list() - factor_index_list = list() - - for level in factor['rep_factorLevel']: - # level names under one factor should be unique - fl = level['factorLevel'] - if fl in level_name_list: - level_duplication = True - level_name_list.append( fl ) - - fi = level['factorIndex'] - if fi: - # the checkboxes should not have an overlap - for check in fi: - if check in factor_index_list: - overlapping_selection = True - factor_index_list.append( check ) - - if level_duplication: - error_map['rep_factorName'] = [ dict() for t in factors ] - for i in range( len( factors ) ): - error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorLevel': 'Factor levels for each factor need to be unique'} for t in factor['rep_factorLevel'] ] - break - if overlapping_selection: - error_map['rep_factorName'] = [ dict() for t in factors ] - for i in range( len( factors ) ): - error_map['rep_factorName'][i]['rep_factorLevel'] = [ {'factorIndex': 'The samples from different factors are not allowed to overlap'} for t in factor['rep_factorLevel'] ] - break - - if factor_duplication: - error_map['rep_factorName'] = [ dict() for t in factors ] - for i in range( len( factors ) ): - error_map['rep_factorName'][i]['factorName'] = 'Factor names need to be unique' - diff -r a1aa11e02862 -r bbea9c694b34 tool_dependencies.xml --- a/tool_dependencies.xml Mon Sep 30 12:28:02 2013 -0400 +++ b/tool_dependencies.xml Wed Feb 19 06:11:20 2014 -0500 @@ -1,10 +1,10 @@ - + - +