# HG changeset patch
# User bgruening
# Date 1378724978 14400
# Node ID 1d2a02bc220843dbe04bcdbead96f6008d8e7f58
# Parent ff74cd9b041405fc4925cf2690ef676b13d7723d
Uploaded
diff -r ff74cd9b0414 -r 1d2a02bc2208 deseq2.R
--- a/deseq2.R Thu Sep 05 04:09:22 2013 -0400
+++ b/deseq2.R Mon Sep 09 07:09:38 2013 -0400
@@ -16,7 +16,8 @@
'plots' , 'p', 2, "character",
'input' , 'i', 1, "character",
'samples', 's', 1, "character",
- 'factors', 'f', 2, "character"
+ 'factors', 'f', 2, "character",
+ 'fittype', 't', 2, "character"
), byrow=TRUE, ncol=4);
opt = getopt(spec);
@@ -28,6 +29,9 @@
q(status=1);
}
+if( is.null(opt$fittype))
+ opt$fittype = "parametric"
+
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
opt$samples <- trim(opt$samples)
opt$factors <- trim(opt$factors)
@@ -68,15 +72,12 @@
colnames(currentPairTable) <- sampleNames
pdata = data.frame(condition=factor(conditions[currentColmuns]),row.names=colnames(currentPairTable))
+ print(pdata)
dds = DESeqDataSetFromMatrix(countData = currentPairTable, colData = pdata, design = ~ condition)
- dds <- DESeq(dds)
+ dds <- DESeq(dds, fitType= opt$fittype)
sizeFactors(dds)
- if ( !is.null(opt$plots) ) {
- plotDispEsts(dds)
- plotMA(dds)
- }
res <- results(dds)
resCols <- colnames(res)
@@ -86,6 +87,22 @@
resSorted <- res[order(res$padj),]
write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE)
+
+ if ( !is.null(opt$plots) ) {
+ plotDispEsts(dds)
+ plotMA(dds)
+
+ library("RColorBrewer")
+ library("gplots")
+ select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30]
+ hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
+
+ rld <- rlogTransformation(dds, blind=TRUE)
+ vsd <- varianceStabilizingTransformation(dds, blind=TRUE)
+ distsRL <- dist(t(assay(rld)))
+ mat <- as.matrix(distsRL)
+ heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances")
+ }
}
}
}
@@ -108,7 +125,7 @@
htseqSubCountTable <- matrix(unlist(htseqSubCountTable), ncol = ntabcols, byrow = FALSE)
colnames(htseqSubCountTable) <- names(htseqCountTable)[sampleColumns]
- pdata = data.frame(condition=factor(conditions[sampleColumns]), row.names=colnames(htseqSubCountTable))
+ pdata = data.frame(row.names=colnames(htseqSubCountTable))
if(!is.null(opt$factors)){
factorSets <- unlist(strsplit(opt$factors, " "))
@@ -134,17 +151,14 @@
}
}
}
+ pdata$condition<-factor(conditions[sampleColumns])
+ print(pdata)
- form <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ "))
- dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable, colData = pdata, design = form)
+ designFormula <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ "))
+ dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable, colData = pdata, design = designFormula)
- dds <- DESeq(dds)
+ dds <- DESeq(dds, fitType= opt$fittype)
sizeFactors(dds)
- if ( !is.null(opt$plots) ) {
- plotDispEsts(dds)
- plotMA(dds)
- }
-
res <- results(dds)
resCols <- colnames(res)
@@ -154,6 +168,22 @@
resSorted <- res[order(res$padj),]
write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
+
+ if ( !is.null(opt$plots) ) {
+ plotDispEsts(dds)
+ plotMA(dds)
+
+ library("RColorBrewer")
+ library("gplots")
+ select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30]
+ hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
+
+ rld <- rlogTransformation(dds, blind=TRUE)
+ vsd <- varianceStabilizingTransformation(dds, blind=TRUE)
+ distsRL <- dist(t(assay(rld)))
+ mat <- as.matrix(distsRL)
+ heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances")
+ }
}
dev.off()
diff -r ff74cd9b0414 -r 1d2a02bc2208 deseq2.xml
--- a/deseq2.xml Thu Sep 05 04:09:22 2013 -0400
+++ b/deseq2.xml Mon Sep 09 07:09:38 2013 -0400
@@ -33,7 +33,7 @@
-f "#echo ' '.join( $temp_factor_list )#"
#end if
#end if
-
+ -t $fittype
-
+
-
+
-
-
-
+
-
+
-
+
+
+
+
+
-
@@ -116,7 +122,7 @@
====== ==========================================================
Column Description
------ ----------------------------------------------------------
- 1 Sample ID (corresponds to the header in your count matrix)
+ 1 Condition tested (corresponds to the conitions from the first line of your count matrix)
2 Gene Identifiers
3 mean normalised counts, averaged over all samples from both conditions
4 the logarithm (to basis 2) of the fold change