Mercurial > repos > yhoogstrate > edger_with_design_matrix
changeset 44:261b997b241f draft
Uploaded
author | yhoogstrate |
---|---|
date | Wed, 28 May 2014 05:24:12 -0400 |
parents | c6e787bb605c |
children | f710e5ed7cea |
files | edgeR_Differential_Gene_Expression.xml |
diffstat | 1 files changed, 9 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/edgeR_Differential_Gene_Expression.xml Wed May 28 05:11:48 2014 -0400 +++ b/edgeR_Differential_Gene_Expression.xml Wed May 28 05:24:12 2014 -0400 @@ -158,24 +158,28 @@ design_matrix[,i] <- as.factor(design_matrix[,i]) } - +## 1) In the expression matrix, you only want to have the samples described in the design matrix columns <- match(rownames(design_matrix),colnames(expression_matrix)) columns <- columns[!is.na(columns)] read_counts <- expression_matrix[,columns] +## 2) In the design matrix, you only want to have samples of which you really have the counts +columns <- match(colnames(expression_matrix),rownames(design_matrix)) +columns <- columns[!is.na(columns)] +design_matrix <- design_matrix[columns,,drop=FALSE] ## Filter for HTSeq predifined counts: exclude_HTSeq <- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique") exclude_DEXSeq <- c("_ambiguous","_empty","_lowaqual","_notaligned") -exclude = match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts)) -exclude = exclude[is.na(exclude)==0] +exclude <- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts)) +exclude <- exclude[is.na(exclude)==0] if(length(exclude) != 0) { - read_counts = read_counts[-exclude,] + read_counts <- read_counts[-exclude,] } -empty_samples = apply(read_counts,2,function(x) sum(x) == 0) +empty_samples <- apply(read_counts,2,function(x) sum(x) == 0) if(sum(empty_samples) > 0) { write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr()) write(colnames(read_counts)[empty_samples],stderr())