annotate edgeR_DGE_test.R @ 3:df239301559a draft

Uploaded
author yhoogstrate
date Thu, 09 Jan 2014 02:44:37 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
1 #!/usr/bin/env Rscript
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
2
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
3 # edgeR citation:
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
4 # Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor package for differential expression
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
5 # analysis of digital gene expression data. Bioinformatics 26, 139-140
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
6 #
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
7 # Robinson MD and Smyth GK (2007). Moderated statistical tests for assessing differences in tag
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
8 # abundance. Bioinformatics 23, 2881-2887
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
9
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
10 # Robinson MD and Smyth GK (2008). Small-sample estimation of negative binomial dispersion, with
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
11 # applications to SAGE data. Biostatistics, 9, 321-332
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
12
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
13 # McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis of multifactor RNA-Seq
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
14 # experiments with respect to biological variation. Nucleic Acids Research 40, 4288-4297
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
15
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
16 # R script Author:
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
17 # - MSc. René Böttcher (Erasmus MC)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
18
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
19 # Hooked into Galaxy Server:
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
20 # - MSc. Youri Hoogstrate (Erasmus MC)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
21
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
22
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
23 setwd("/home/youri/Desktop/galaxy/tools/TraIT/edgeR/DGE")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
24
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
25 library(edgeR)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
26
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
27 # Fetch commandline arguments
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
28 args <- commandArgs(trailingOnly = TRUE)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
29 designmatrix = args[1]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
30 contrast = args[2]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
31
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
32 output_1 = args[3]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
33 output_2 = args[4]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
34 output_3 = args[5] #FPKM file - to be implemented
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
35 output_4 = args[6]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
36
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
37 QC = nchar(args[7]) > 0
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
38
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
39 output_5 = args[8]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
40 output_6 = args[9]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
41 output_7 = args[10]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
42
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
43 output_8 = args[11]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
44
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
45
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
46
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
47
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
48 library(edgeR)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
49 raw_data <- read.delim(designmatrix,header=T,stringsAsFactors=T)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
50
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
51 # Obtain read-counts
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
52 read_counts = read.delim(as.character(raw_data[1,1]),header=F,stringsAsFactors=F,row.names=1)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
53 for(i in 2:length(raw_data[,1])) {
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
54 print("parsing counts from:")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
55 print(raw_data[i,1])
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
56 read_counts = cbind(read_counts,read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
57 print(i)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
58 }
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
59
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
60
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
61
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
62 # Filter for HTSeq predifined counts:
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
63 exclude_HTSeq = c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
64 exclude_DEXSeq = c("_ambiguous","_empty","_lowaqual","_notaligned")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
65
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
66 exclude = match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
67 exclude = exclude[is.na(exclude)==0]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
68 if(length(exclude) != 0) {
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
69 read_counts = read_counts[-exclude,]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
70 }
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
71
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
72
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
73
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
74
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
75
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
76
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
77
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
78
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
79
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
80 colnames(read_counts) = raw_data[,2]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
81 dge = DGEList(counts=read_counts,genes=rownames(read_counts))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
82
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
83 design_tmp <- raw_data[3:length(raw_data)]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
84 rownames(design_tmp) <- colnames(dge)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
85 formula = paste(c("~0",colnames(design_tmp)),collapse = " + ")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
86 design <- model.matrix(as.formula(formula),design_tmp)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
87
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
88 prefixes = colnames(design_tmp)[attr(design,"assign")]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
89 avoid = nchar(prefixes) == nchar(colnames(design))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
90 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design)))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
91 replacements[avoid] = colnames(design)[avoid]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
92 colnames(design) = replacements
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
93
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
94
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
95
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
96 print("Calculating normalization factors...")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
97 dge = calcNormFactors(dge)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
98 print("Estimating common dispersion...")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
99 dge = estimateGLMCommonDisp(dge,design)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
100 print("Estimating trended dispersion...")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
101 dge = estimateGLMTrendedDisp(dge,design)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
102 print("Estimating tagwise dispersion...")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
103 dge = estimateGLMTagwiseDisp(dge,design)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
104
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
105
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
106
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
107
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
108 if (QC == TRUE) {
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
109 print("Creating QC plots...")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
110 ## MDS Plot
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
111 pdf(output_5)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
112 plotMDS(dge, main="edgeR MDS Plot")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
113 dev.off()
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
114 ## Biological coefficient of variation plot
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
115 pdf(output_6)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
116 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
117 dev.off()
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
118 }
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
119
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
120
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
121
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
122 print("Fitting GLM...")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
123 fit = glmFit(dge,design)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
124
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
125 print(paste("Performing likelihood ratio test: ",contrast,sep=""))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
126 cont <- c(contrast)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
127 cont <- makeContrasts(contrasts=cont, levels=design)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
128
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
129 lrt <- glmLRT(fit, contrast=cont[,1])
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
130 print(paste("Exporting to file: ",output_1,sep=""))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
131 write.table(file=output_1,topTags(lrt,n=nrow(read_counts))$table,sep="\t",row.names=T)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
132 write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
133 # todo EXPORT FPKM
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
134 write.table(file=output_4,dge$counts,sep="\t")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
135
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
136
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
137
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
138 if (QC == TRUE) {
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
139 print("Creating MA plots...")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
140
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
141
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
142 etable <- topTags(lrt, n=nrow(dge))$table
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
143 etable <- etable[order(etable$FDR), ]
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
144 pdf(output_7)
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
145 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
146 with(subset(etable, FDR<0.05), points(logCPM, logFC, pch=20, col="red"))
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
147 abline(h=c(-1,1), col="blue")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
148 dev.off()
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
149 }
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
150 print("Done!")
df239301559a Uploaded
yhoogstrate
parents:
diff changeset
151