# HG changeset patch # User bgruening # Date 1378724978 14400 # Node ID 1d2a02bc220843dbe04bcdbead96f6008d8e7f58 # Parent ff74cd9b041405fc4925cf2690ef676b13d7723d Uploaded diff -r ff74cd9b0414 -r 1d2a02bc2208 deseq2.R --- a/deseq2.R Thu Sep 05 04:09:22 2013 -0400 +++ b/deseq2.R Mon Sep 09 07:09:38 2013 -0400 @@ -16,7 +16,8 @@ 'plots' , 'p', 2, "character", 'input' , 'i', 1, "character", 'samples', 's', 1, "character", - 'factors', 'f', 2, "character" + 'factors', 'f', 2, "character", + 'fittype', 't', 2, "character" ), byrow=TRUE, ncol=4); opt = getopt(spec); @@ -28,6 +29,9 @@ q(status=1); } +if( is.null(opt$fittype)) + opt$fittype = "parametric" + trim <- function (x) gsub("^\\s+|\\s+$", "", x) opt$samples <- trim(opt$samples) opt$factors <- trim(opt$factors) @@ -68,15 +72,12 @@ colnames(currentPairTable) <- sampleNames pdata = data.frame(condition=factor(conditions[currentColmuns]),row.names=colnames(currentPairTable)) + print(pdata) dds = DESeqDataSetFromMatrix(countData = currentPairTable, colData = pdata, design = ~ condition) - dds <- DESeq(dds) + dds <- DESeq(dds, fitType= opt$fittype) sizeFactors(dds) - if ( !is.null(opt$plots) ) { - plotDispEsts(dds) - plotMA(dds) - } res <- results(dds) resCols <- colnames(res) @@ -86,6 +87,22 @@ resSorted <- res[order(res$padj),] write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE) + + if ( !is.null(opt$plots) ) { + plotDispEsts(dds) + plotMA(dds) + + library("RColorBrewer") + library("gplots") + select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30] + hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) + + rld <- rlogTransformation(dds, blind=TRUE) + vsd <- varianceStabilizingTransformation(dds, blind=TRUE) + distsRL <- dist(t(assay(rld))) + mat <- as.matrix(distsRL) + heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances") + } } } } @@ -108,7 +125,7 @@ htseqSubCountTable <- matrix(unlist(htseqSubCountTable), ncol = ntabcols, byrow = FALSE) colnames(htseqSubCountTable) <- names(htseqCountTable)[sampleColumns] - pdata = data.frame(condition=factor(conditions[sampleColumns]), row.names=colnames(htseqSubCountTable)) + pdata = data.frame(row.names=colnames(htseqSubCountTable)) if(!is.null(opt$factors)){ factorSets <- unlist(strsplit(opt$factors, " ")) @@ -134,17 +151,14 @@ } } } + pdata$condition<-factor(conditions[sampleColumns]) + print(pdata) - form <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ ")) - dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable, colData = pdata, design = form) + designFormula <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ ")) + dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable, colData = pdata, design = designFormula) - dds <- DESeq(dds) + dds <- DESeq(dds, fitType= opt$fittype) sizeFactors(dds) - if ( !is.null(opt$plots) ) { - plotDispEsts(dds) - plotMA(dds) - } - res <- results(dds) resCols <- colnames(res) @@ -154,6 +168,22 @@ resSorted <- res[order(res$padj),] write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, row.names = FALSE, col.names = FALSE) + + if ( !is.null(opt$plots) ) { + plotDispEsts(dds) + plotMA(dds) + + library("RColorBrewer") + library("gplots") + select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30] + hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) + + rld <- rlogTransformation(dds, blind=TRUE) + vsd <- varianceStabilizingTransformation(dds, blind=TRUE) + distsRL <- dist(t(assay(rld))) + mat <- as.matrix(distsRL) + heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances") + } } dev.off() diff -r ff74cd9b0414 -r 1d2a02bc2208 deseq2.xml --- a/deseq2.xml Thu Sep 05 04:09:22 2013 -0400 +++ b/deseq2.xml Mon Sep 09 07:09:38 2013 -0400 @@ -33,7 +33,7 @@ -f "#echo ' '.join( $temp_factor_list )#" #end if #end if - + -t $fittype - + - + - - - + - + - + + + + + - @@ -116,7 +122,7 @@ ====== ========================================================== Column Description ------ ---------------------------------------------------------- - 1 Sample ID (corresponds to the header in your count matrix) + 1 Condition tested (corresponds to the conitions from the first line of your count matrix) 2 Gene Identifiers 3 mean normalised counts, averaged over all samples from both conditions 4 the logarithm (to basis 2) of the fold change