comparison edgeR_DGE.xml @ 15:db9eb4b6f778 draft

Uploaded
author yhoogstrate
date Thu, 09 Jan 2014 08:51:59 -0500
parents 15fd1a6798e0
children fba5577122a8
comparison
equal deleted inserted replaced
14:15fd1a6798e0 15:db9eb4b6f778
6 <!-- 6 <!--
7 The following script is written in the "Cheetah" language: 7 The following script is written in the "Cheetah" language:
8 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html 8 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
9 --> 9 -->
10 10
11 R CMD BATCH --vanilla --slave '--args 11 R --vanilla --slave -f $R_script '--args
12 $design_matrix 12 $design_matrix
13 $contrast 13 $contrast
14 14
15 $output_count_edgeR 15 $output_count_edgeR
16 $output_cpm 16 $output_cpm
20 $qc 20 $qc
21 $output_MDSplot 21 $output_MDSplot
22 $output_BCVplot 22 $output_BCVplot
23 $output_MAplot 23 $output_MAplot
24 smearPlot ' 24 smearPlot '
25 $R_script $output_R 25 > $output_R
26 </command> 26 </command>
27 27
28 <inputs> 28 <inputs>
29 <param name="design_matrix" type="data" format="tabular" help="Design matrix" /> 29 <param name="design_matrix" type="data" format="tabular" help="Design matrix" />
30 30
50 designmatrix = args[1] 50 designmatrix = args[1]
51 contrast = args[2] 51 contrast = args[2]
52 52
53 output_1 = args[3] 53 output_1 = args[3]
54 output_2 = args[4] 54 output_2 = args[4]
55 output_3 = args[5] ##FPKM file - to be implemented 55 output_3 = args[5] ##FPKM file - yet to be implemented
56 output_4 = args[6] 56 output_4 = args[6]
57 57
58 QC = nchar(args[7]) > 0 58 QC = nchar(args[7]) > 0
59 59
60 output_5 = args[8] 60 output_5 = args[8]
73 has_header = (class(header[1,1]) == "character") 73 has_header = (class(header[1,1]) == "character")
74 74
75 read_counts = read.delim(as.character(raw_data[1,1]),header=has_header,stringsAsFactors=F,row.names=1)[1] 75 read_counts = read.delim(as.character(raw_data[1,1]),header=has_header,stringsAsFactors=F,row.names=1)[1]
76 76
77 for(i in 2:length(raw_data[,1])) { 77 for(i in 2:length(raw_data[,1])) {
78 print("parsing counts from:") 78 write("parsing counts from:",stdout())
79 print(raw_data[i,1]) 79 write(raw_data[i,1],stdout())
80 80
81 header = read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1,nrows=1) 81 header = read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1,nrows=1)
82 has_header = (class(header[1,1]) == "character") 82 has_header = (class(header[1,1]) == "character")
83 table = read.delim(as.character(raw_data[i,1]),header=has_header,stringsAsFactors=F,row.names=1)[1] 83 table = read.delim(as.character(raw_data[i,1]),header=has_header,stringsAsFactors=F,row.names=1)[1]
84 84
85 read_counts = cbind(read_counts,table) 85 read_counts = cbind(read_counts,table)
86 print(i)
87 } 86 }
88 87
89 colnames(read_counts) = as.character(raw_data[,2]) 88 colnames(read_counts) = as.character(raw_data[,2])
90 89
91 90
120 replacements[avoid] = colnames(design)[avoid] 119 replacements[avoid] = colnames(design)[avoid]
121 colnames(design) = replacements 120 colnames(design) = replacements
122 121
123 122
124 123
125 print("Calculating normalization factors...") 124 write("Calculating normalization factors...",stdout())
126 dge = calcNormFactors(dge) 125 dge = calcNormFactors(dge)
127 print("Estimating common dispersion...") 126 write("Estimating common dispersion...",stdout())
128 dge = estimateGLMCommonDisp(dge,design) 127 dge = estimateGLMCommonDisp(dge,design)
129 print("Estimating trended dispersion...") 128 write("Estimating trended dispersion...",stdout())
130 dge = estimateGLMTrendedDisp(dge,design) 129 dge = estimateGLMTrendedDisp(dge,design)
131 print("Estimating tagwise dispersion...") 130 write("Estimating tagwise dispersion...",stdout())
132 dge = estimateGLMTagwiseDisp(dge,design) 131 dge = estimateGLMTagwiseDisp(dge,design)
133 132
134 133
135 134
136 135
137 if (QC == TRUE) { 136 if(QC == TRUE) {
138 print("Creating QC plots...") 137 write("Creating QC plots...",stdout())
139 #### MDS Plot 138 #### MDS Plot
140 pdf(output_5) 139 pdf(output_5)
141 plotMDS(dge, main="edgeR MDS Plot") 140 plotMDS(dge, main="edgeR MDS Plot")
142 dev.off() 141 dev.off()
143 #### Biological coefficient of variation plot 142 #### Biological coefficient of variation plot
146 dev.off() 145 dev.off()
147 } 146 }
148 147
149 148
150 149
151 print("Fitting GLM...") 150 write("Fitting GLM...",stdout())
152 fit = glmFit(dge,design) 151 fit = glmFit(dge,design)
153 152
154 print(paste("Performing likelihood ratio test: ",contrast,sep="")) 153 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
155 cont &lt;- c(contrast) 154 cont &lt;- c(contrast)
156 cont &lt;- makeContrasts(contrasts=cont, levels=design) 155 cont &lt;- makeContrasts(contrasts=cont, levels=design)
157 156
158 lrt &lt;- glmLRT(fit, contrast=cont[,1]) 157 lrt &lt;- glmLRT(fit, contrast=cont[,1])
159 print(paste("Exporting to file: ",output_1,sep="")) 158 write(paste("Exporting to file: ",output_1,sep=""),stdout())
160 write.table(file=output_1,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=T) 159 write.table(file=output_1,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=T)
161 write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t") 160 write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t")
162 ## todo EXPORT FPKM 161 ## todo EXPORT FPKM
163 write.table(file=output_4,dge\$counts,sep="\t") 162 write.table(file=output_4,dge\$counts,sep="\t")
164 163
165 164
166 165
167 if (QC == TRUE) { 166 if(QC == TRUE) {
168 print("Creating MA plots...") 167 write("Creating MA plots...",stdout())
169 168
170 etable &lt;- topTags(lrt, n=nrow(dge))\$table 169 etable &lt;- topTags(lrt, n=nrow(dge))\$table
171 etable &lt;- etable[order(etable\$FDR), ] 170 etable &lt;- etable[order(etable\$FDR), ]
172 pdf(output_7) 171 pdf(output_7)
173 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) 172 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
174 with(subset(etable, FDR&lt;0.05), points(logCPM, logFC, pch=20, col="red")) 173 with(subset(etable, FDR&lt;0.05), points(logCPM, logFC, pch=20, col="red"))
175 abline(h=c(-1,1), col="blue") 174 abline(h=c(-1,1), col="blue")
176 dev.off() 175 dev.off()
177 } 176 }
178 print("Done!") 177 write("Done!",stdout())
179 } 178 }
180 </configfile> 179 </configfile>
181 </configfiles> 180 </configfiles>
182 181
183 <outputs> 182 <outputs>