Mercurial > repos > bgruening > deseq2
changeset 16:1d2a02bc2208 draft
Uploaded
author | bgruening |
---|---|
date | Mon, 09 Sep 2013 07:09:38 -0400 |
parents | ff74cd9b0414 |
children | a05999fd6e26 |
files | deseq2.R deseq2.xml |
diffstat | 2 files changed, 61 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq2.R Thu Sep 05 04:09:22 2013 -0400 +++ b/deseq2.R Mon Sep 09 07:09:38 2013 -0400 @@ -16,7 +16,8 @@ 'plots' , 'p', 2, "character", 'input' , 'i', 1, "character", 'samples', 's', 1, "character", - 'factors', 'f', 2, "character" + 'factors', 'f', 2, "character", + 'fittype', 't', 2, "character" ), byrow=TRUE, ncol=4); opt = getopt(spec); @@ -28,6 +29,9 @@ q(status=1); } +if( is.null(opt$fittype)) + opt$fittype = "parametric" + trim <- function (x) gsub("^\\s+|\\s+$", "", x) opt$samples <- trim(opt$samples) opt$factors <- trim(opt$factors) @@ -68,15 +72,12 @@ colnames(currentPairTable) <- sampleNames pdata = data.frame(condition=factor(conditions[currentColmuns]),row.names=colnames(currentPairTable)) + print(pdata) dds = DESeqDataSetFromMatrix(countData = currentPairTable, colData = pdata, design = ~ condition) - dds <- DESeq(dds) + dds <- DESeq(dds, fitType= opt$fittype) sizeFactors(dds) - if ( !is.null(opt$plots) ) { - plotDispEsts(dds) - plotMA(dds) - } res <- results(dds) resCols <- colnames(res) @@ -86,6 +87,22 @@ resSorted <- res[order(res$padj),] write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE) + + if ( !is.null(opt$plots) ) { + plotDispEsts(dds) + plotMA(dds) + + library("RColorBrewer") + library("gplots") + select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30] + hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) + + rld <- rlogTransformation(dds, blind=TRUE) + vsd <- varianceStabilizingTransformation(dds, blind=TRUE) + distsRL <- dist(t(assay(rld))) + mat <- as.matrix(distsRL) + heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances") + } } } } @@ -108,7 +125,7 @@ htseqSubCountTable <- matrix(unlist(htseqSubCountTable), ncol = ntabcols, byrow = FALSE) colnames(htseqSubCountTable) <- names(htseqCountTable)[sampleColumns] - pdata = data.frame(condition=factor(conditions[sampleColumns]), row.names=colnames(htseqSubCountTable)) + pdata = data.frame(row.names=colnames(htseqSubCountTable)) if(!is.null(opt$factors)){ factorSets <- unlist(strsplit(opt$factors, " ")) @@ -134,17 +151,14 @@ } } } + pdata$condition<-factor(conditions[sampleColumns]) + print(pdata) - form <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ ")) - dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable, colData = pdata, design = form) + designFormula <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ ")) + dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable, colData = pdata, design = designFormula) - dds <- DESeq(dds) + dds <- DESeq(dds, fitType= opt$fittype) sizeFactors(dds) - if ( !is.null(opt$plots) ) { - plotDispEsts(dds) - plotMA(dds) - } - res <- results(dds) resCols <- colnames(res) @@ -154,6 +168,22 @@ resSorted <- res[order(res$padj),] write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, row.names = FALSE, col.names = FALSE) + + if ( !is.null(opt$plots) ) { + plotDispEsts(dds) + plotMA(dds) + + library("RColorBrewer") + library("gplots") + select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30] + hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) + + rld <- rlogTransformation(dds, blind=TRUE) + vsd <- varianceStabilizingTransformation(dds, blind=TRUE) + distsRL <- dist(t(assay(rld))) + mat <- as.matrix(distsRL) + heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances") + } } dev.off()
--- a/deseq2.xml Thu Sep 05 04:09:22 2013 -0400 +++ b/deseq2.xml Mon Sep 09 07:09:38 2013 -0400 @@ -33,7 +33,7 @@ -f "#echo ' '.join( $temp_factor_list )#" #end if #end if - + -t $fittype </command> <stdio> <regex match="Execution halted" @@ -50,26 +50,28 @@ description="An undefined error occured, please check your intput carefully and contact your administrator." /> </stdio> <inputs> - <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool ..."/> + <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool + 'Count reads in features with htseq-count'"/> <conditional name="filter_sel"> - <param name="filter_sel_opts" type="select" label="Select properties to filter"> + <param name="filter_sel_opts" type="select" label="Select mode of analysis" help="All against all: analyses all possible pairs of conditions, + <br> Specific: select your samples of interest"> <option value="all_vs_all">All against all</option> <option value="specific">specific</option> </param> <when value="specific"> - <param name="control_cols" label="Select columns containing treatment." type="data_column" data_ref="input_matrix" + <param name="control_cols" label="Select columns containing first condition" type="data_column" data_ref="input_matrix" numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes"> <validator type="no_options" message="Please select at least one column."/> </param> - <param name="experiement_cols" label="Select columns containing treatment." type="data_column" data_ref="input_matrix" + <param name="experiement_cols" label="Select columns containing second condition" type="data_column" data_ref="input_matrix" numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes"> <validator type="no_options" message="Please select at least one column."/> </param> - <repeat name="factor" title="Include multi-factor analysis"> + <repeat name="factor" title="Include factor"> <param name="factor_name" type="text" value="Factor Name" label="Specify a factor name" help=""/> <param name="factor_index" label="Select columns that are associated with a factor." type="data_column" data_ref="input_matrix" @@ -79,12 +81,16 @@ </repeat> </when> - <when value="all_v_all" /> + <when value="all_vs_all" /> </conditional> - <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Visualising the analysis results" + <param name="fittype" type="select" label="Type of fitting of dispersions to the mean intensity"> + <option value="parametric">parametric</option> + <option value="local">local</option> + <option value="mean">mean</option> + </param> + <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Visualising the analysis results" help="output an additional PDF file" /> - </inputs> <outputs> @@ -116,7 +122,7 @@ ====== ========================================================== Column Description ------ ---------------------------------------------------------- - 1 Sample ID (corresponds to the header in your count matrix) + 1 Condition tested (corresponds to the conitions from the first line of your count matrix) 2 Gene Identifiers 3 mean normalised counts, averaged over all samples from both conditions 4 the logarithm (to basis 2) of the fold change