changeset 16:1d2a02bc2208 draft

Uploaded
author bgruening
date Mon, 09 Sep 2013 07:09:38 -0400
parents ff74cd9b0414
children a05999fd6e26
files deseq2.R deseq2.xml
diffstat 2 files changed, 61 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/deseq2.R	Thu Sep 05 04:09:22 2013 -0400
+++ b/deseq2.R	Mon Sep 09 07:09:38 2013 -0400
@@ -16,7 +16,8 @@
   'plots' , 'p', 2, "character",
   'input' , 'i', 1, "character",
   'samples', 's', 1, "character",
-  'factors', 'f', 2, "character"
+  'factors', 'f', 2, "character",
+  'fittype', 't', 2, "character"
 ), byrow=TRUE, ncol=4);
 opt = getopt(spec);
 
@@ -28,6 +29,9 @@
   q(status=1);
 }
 
+if( is.null(opt$fittype))
+	opt$fittype = "parametric"
+
 trim <- function (x) gsub("^\\s+|\\s+$", "", x)
 opt$samples <- trim(opt$samples)
 opt$factors <- trim(opt$factors)
@@ -68,15 +72,12 @@
             colnames(currentPairTable) <- sampleNames
 
         pdata = data.frame(condition=factor(conditions[currentColmuns]),row.names=colnames(currentPairTable))
+        print(pdata)
 
         dds = DESeqDataSetFromMatrix(countData = currentPairTable,  colData = pdata, design = ~ condition)
 
-        dds <- DESeq(dds)
+        dds <- DESeq(dds, fitType= opt$fittype)
         sizeFactors(dds)
-        if ( !is.null(opt$plots) ) {
-            plotDispEsts(dds)
-            plotMA(dds)
-        }
         res <- results(dds)
         resCols <- colnames(res)
 
@@ -86,6 +87,22 @@
         
         resSorted <- res[order(res$padj),]
         write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, append=TRUE, row.names = FALSE, col.names = FALSE)
+        
+        if ( !is.null(opt$plots) ) {
+            plotDispEsts(dds)
+            plotMA(dds)
+            
+			library("RColorBrewer")
+			library("gplots")
+			select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30]
+			hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
+			
+			rld <- rlogTransformation(dds, blind=TRUE)
+			vsd <- varianceStabilizingTransformation(dds, blind=TRUE)
+			distsRL <- dist(t(assay(rld)))
+			mat <- as.matrix(distsRL)
+			heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances")            
+        }        
       }
     }
   }
@@ -108,7 +125,7 @@
     htseqSubCountTable <- matrix(unlist(htseqSubCountTable), ncol = ntabcols, byrow = FALSE)
     colnames(htseqSubCountTable) <- names(htseqCountTable)[sampleColumns]
 
-    pdata = data.frame(condition=factor(conditions[sampleColumns]), row.names=colnames(htseqSubCountTable))
+    pdata = data.frame(row.names=colnames(htseqSubCountTable))
 
     if(!is.null(opt$factors)){
         factorSets <- unlist(strsplit(opt$factors, " "))
@@ -134,17 +151,14 @@
             }
         }
     }
+    pdata$condition<-factor(conditions[sampleColumns])
+    print(pdata)
 
-    form <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ "))
-    dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable,  colData = pdata, design = form)
+    designFormula <- as.formula(paste("", paste(names(pdata), collapse=" + "), sep=" ~ "))
+    dds = DESeqDataSetFromMatrix(countData = htseqSubCountTable,  colData = pdata, design = designFormula)
 
-    dds <- DESeq(dds)
+    dds <- DESeq(dds, fitType= opt$fittype)
     sizeFactors(dds)
-    if ( !is.null(opt$plots) ) {
-        plotDispEsts(dds)
-        plotMA(dds)
-    }
-
     res <- results(dds)
     resCols <- colnames(res)
 
@@ -154,6 +168,22 @@
 
     resSorted <- res[order(res$padj),]
     write.table(as.data.frame(resSorted[,c("condition", "geneIds", resCols)]), file = opt$outputfile, sep="\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
+
+    if ( !is.null(opt$plots) ) {
+        plotDispEsts(dds)
+        plotMA(dds)
+
+		library("RColorBrewer")
+		library("gplots")
+		select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30]
+		hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
+        
+		rld <- rlogTransformation(dds, blind=TRUE)
+		vsd <- varianceStabilizingTransformation(dds, blind=TRUE)
+		distsRL <- dist(t(assay(rld)))
+		mat <- as.matrix(distsRL)
+		heatmap.2(mat, trace="none", col = rev(hmcol), main = "Sample-to-sample distances")
+    }    
 }
 dev.off()
 
--- a/deseq2.xml	Thu Sep 05 04:09:22 2013 -0400
+++ b/deseq2.xml	Mon Sep 09 07:09:38 2013 -0400
@@ -33,7 +33,7 @@
                 -f "#echo ' '.join( $temp_factor_list )#"
             #end if
         #end if
-
+				-t $fittype
     </command>
     <stdio>
         <regex match="Execution halted" 
@@ -50,26 +50,28 @@
            description="An undefined error occured, please check your intput carefully and contact your administrator." />
     </stdio>
     <inputs>
-        <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool ..."/>
+        <param format="tabular" name="input_matrix" type="data" label="Countmatrix" help="You can create a count matrix with the tool 
+			'Count reads in features with htseq-count'"/>
 
          <conditional name="filter_sel">
-            <param name="filter_sel_opts" type="select" label="Select properties to filter">
+            <param name="filter_sel_opts" type="select" label="Select mode of analysis" help="All against all: analyses all possible pairs of conditions,
+				&lt;br&gt; Specific: select your samples of interest">
               <option value="all_vs_all">All against all</option>
               <option value="specific">specific</option>
             </param>
             <when value="specific">
 
-                <param name="control_cols" label="Select columns containing treatment." type="data_column" data_ref="input_matrix"
+                <param name="control_cols" label="Select columns containing first condition" type="data_column" data_ref="input_matrix"
                     numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes">
                     <validator type="no_options" message="Please select at least one column."/>
                 </param>
 
-                <param name="experiement_cols" label="Select columns containing treatment." type="data_column" data_ref="input_matrix"
+                <param name="experiement_cols" label="Select columns containing second condition" type="data_column" data_ref="input_matrix"
                     numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes">
                     <validator type="no_options" message="Please select at least one column."/>
                 </param>
 
-                <repeat name="factor" title="Include multi-factor analysis">
+                <repeat name="factor" title="Include factor">
                     <param name="factor_name" type="text" value="Factor Name" label="Specify a factor name" help=""/>
 
                     <param name="factor_index" label="Select columns that are associated with a factor." type="data_column" data_ref="input_matrix"
@@ -79,12 +81,16 @@
                 </repeat>
 
             </when>
-            <when value="all_v_all" />
+            <when value="all_vs_all" />
         </conditional>
 
-        <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Visualising the analysis results"
+        <param name="fittype" type="select" label="Type of fitting of dispersions to the mean intensity">
+			<option value="parametric">parametric</option>
+			<option value="local">local</option>
+			<option value="mean">mean</option>
+		</param>	
+		<param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Visualising the analysis results"
             help="output an additional PDF file" />
-
     </inputs>
 
   <outputs>
@@ -116,7 +122,7 @@
 ====== ==========================================================
 Column Description
 ------ ----------------------------------------------------------
-     1 Sample ID (corresponds to the header in your count matrix)
+     1 Condition tested (corresponds to the conitions from the first line of your count matrix)
      2 Gene Identifiers
      3 mean normalised counts, averaged over all samples from both conditions
      4 the logarithm (to basis 2) of the fold change