changeset 44:261b997b241f draft

Uploaded
author yhoogstrate
date Wed, 28 May 2014 05:24:12 -0400
parents c6e787bb605c
children f710e5ed7cea
files edgeR_Differential_Gene_Expression.xml
diffstat 1 files changed, 9 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/edgeR_Differential_Gene_Expression.xml	Wed May 28 05:11:48 2014 -0400
+++ b/edgeR_Differential_Gene_Expression.xml	Wed May 28 05:24:12 2014 -0400
@@ -158,24 +158,28 @@
   design_matrix[,i] <- as.factor(design_matrix[,i])
 }
 
-
+## 1) In the expression matrix, you only want to have the samples described in the design matrix
 columns <- match(rownames(design_matrix),colnames(expression_matrix))
 columns <- columns[!is.na(columns)]
 read_counts <- expression_matrix[,columns]
 
+## 2) In the design matrix, you only want to have samples of which you really have the counts
+columns <- match(colnames(expression_matrix),rownames(design_matrix))
+columns <- columns[!is.na(columns)]
+design_matrix <- design_matrix[columns,,drop=FALSE]
 
 ## Filter for HTSeq predifined counts:
 exclude_HTSeq <- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
 exclude_DEXSeq <- c("_ambiguous","_empty","_lowaqual","_notaligned")
 
-exclude = match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
-exclude = exclude[is.na(exclude)==0]
+exclude <- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
+exclude <- exclude[is.na(exclude)==0]
 if(length(exclude) != 0)  {
-  read_counts = read_counts[-exclude,]
+  read_counts <- read_counts[-exclude,]
 }
 
 
-empty_samples = apply(read_counts,2,function(x) sum(x) == 0)
+empty_samples <- apply(read_counts,2,function(x) sum(x) == 0)
 if(sum(empty_samples) > 0) {
   write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr())
   write(colnames(read_counts)[empty_samples],stderr())