Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_DGE.xml @ 15:db9eb4b6f778 draft
Uploaded
author | yhoogstrate |
---|---|
date | Thu, 09 Jan 2014 08:51:59 -0500 |
parents | 15fd1a6798e0 |
children | fba5577122a8 |
comparison
equal
deleted
inserted
replaced
14:15fd1a6798e0 | 15:db9eb4b6f778 |
---|---|
6 <!-- | 6 <!-- |
7 The following script is written in the "Cheetah" language: | 7 The following script is written in the "Cheetah" language: |
8 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html | 8 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html |
9 --> | 9 --> |
10 | 10 |
11 R CMD BATCH --vanilla --slave '--args | 11 R --vanilla --slave -f $R_script '--args |
12 $design_matrix | 12 $design_matrix |
13 $contrast | 13 $contrast |
14 | 14 |
15 $output_count_edgeR | 15 $output_count_edgeR |
16 $output_cpm | 16 $output_cpm |
20 $qc | 20 $qc |
21 $output_MDSplot | 21 $output_MDSplot |
22 $output_BCVplot | 22 $output_BCVplot |
23 $output_MAplot | 23 $output_MAplot |
24 smearPlot ' | 24 smearPlot ' |
25 $R_script $output_R | 25 > $output_R |
26 </command> | 26 </command> |
27 | 27 |
28 <inputs> | 28 <inputs> |
29 <param name="design_matrix" type="data" format="tabular" help="Design matrix" /> | 29 <param name="design_matrix" type="data" format="tabular" help="Design matrix" /> |
30 | 30 |
50 designmatrix = args[1] | 50 designmatrix = args[1] |
51 contrast = args[2] | 51 contrast = args[2] |
52 | 52 |
53 output_1 = args[3] | 53 output_1 = args[3] |
54 output_2 = args[4] | 54 output_2 = args[4] |
55 output_3 = args[5] ##FPKM file - to be implemented | 55 output_3 = args[5] ##FPKM file - yet to be implemented |
56 output_4 = args[6] | 56 output_4 = args[6] |
57 | 57 |
58 QC = nchar(args[7]) > 0 | 58 QC = nchar(args[7]) > 0 |
59 | 59 |
60 output_5 = args[8] | 60 output_5 = args[8] |
73 has_header = (class(header[1,1]) == "character") | 73 has_header = (class(header[1,1]) == "character") |
74 | 74 |
75 read_counts = read.delim(as.character(raw_data[1,1]),header=has_header,stringsAsFactors=F,row.names=1)[1] | 75 read_counts = read.delim(as.character(raw_data[1,1]),header=has_header,stringsAsFactors=F,row.names=1)[1] |
76 | 76 |
77 for(i in 2:length(raw_data[,1])) { | 77 for(i in 2:length(raw_data[,1])) { |
78 print("parsing counts from:") | 78 write("parsing counts from:",stdout()) |
79 print(raw_data[i,1]) | 79 write(raw_data[i,1],stdout()) |
80 | 80 |
81 header = read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1,nrows=1) | 81 header = read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1,nrows=1) |
82 has_header = (class(header[1,1]) == "character") | 82 has_header = (class(header[1,1]) == "character") |
83 table = read.delim(as.character(raw_data[i,1]),header=has_header,stringsAsFactors=F,row.names=1)[1] | 83 table = read.delim(as.character(raw_data[i,1]),header=has_header,stringsAsFactors=F,row.names=1)[1] |
84 | 84 |
85 read_counts = cbind(read_counts,table) | 85 read_counts = cbind(read_counts,table) |
86 print(i) | |
87 } | 86 } |
88 | 87 |
89 colnames(read_counts) = as.character(raw_data[,2]) | 88 colnames(read_counts) = as.character(raw_data[,2]) |
90 | 89 |
91 | 90 |
120 replacements[avoid] = colnames(design)[avoid] | 119 replacements[avoid] = colnames(design)[avoid] |
121 colnames(design) = replacements | 120 colnames(design) = replacements |
122 | 121 |
123 | 122 |
124 | 123 |
125 print("Calculating normalization factors...") | 124 write("Calculating normalization factors...",stdout()) |
126 dge = calcNormFactors(dge) | 125 dge = calcNormFactors(dge) |
127 print("Estimating common dispersion...") | 126 write("Estimating common dispersion...",stdout()) |
128 dge = estimateGLMCommonDisp(dge,design) | 127 dge = estimateGLMCommonDisp(dge,design) |
129 print("Estimating trended dispersion...") | 128 write("Estimating trended dispersion...",stdout()) |
130 dge = estimateGLMTrendedDisp(dge,design) | 129 dge = estimateGLMTrendedDisp(dge,design) |
131 print("Estimating tagwise dispersion...") | 130 write("Estimating tagwise dispersion...",stdout()) |
132 dge = estimateGLMTagwiseDisp(dge,design) | 131 dge = estimateGLMTagwiseDisp(dge,design) |
133 | 132 |
134 | 133 |
135 | 134 |
136 | 135 |
137 if (QC == TRUE) { | 136 if(QC == TRUE) { |
138 print("Creating QC plots...") | 137 write("Creating QC plots...",stdout()) |
139 #### MDS Plot | 138 #### MDS Plot |
140 pdf(output_5) | 139 pdf(output_5) |
141 plotMDS(dge, main="edgeR MDS Plot") | 140 plotMDS(dge, main="edgeR MDS Plot") |
142 dev.off() | 141 dev.off() |
143 #### Biological coefficient of variation plot | 142 #### Biological coefficient of variation plot |
146 dev.off() | 145 dev.off() |
147 } | 146 } |
148 | 147 |
149 | 148 |
150 | 149 |
151 print("Fitting GLM...") | 150 write("Fitting GLM...",stdout()) |
152 fit = glmFit(dge,design) | 151 fit = glmFit(dge,design) |
153 | 152 |
154 print(paste("Performing likelihood ratio test: ",contrast,sep="")) | 153 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout()) |
155 cont <- c(contrast) | 154 cont <- c(contrast) |
156 cont <- makeContrasts(contrasts=cont, levels=design) | 155 cont <- makeContrasts(contrasts=cont, levels=design) |
157 | 156 |
158 lrt <- glmLRT(fit, contrast=cont[,1]) | 157 lrt <- glmLRT(fit, contrast=cont[,1]) |
159 print(paste("Exporting to file: ",output_1,sep="")) | 158 write(paste("Exporting to file: ",output_1,sep=""),stdout()) |
160 write.table(file=output_1,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=T) | 159 write.table(file=output_1,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=T) |
161 write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t") | 160 write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t") |
162 ## todo EXPORT FPKM | 161 ## todo EXPORT FPKM |
163 write.table(file=output_4,dge\$counts,sep="\t") | 162 write.table(file=output_4,dge\$counts,sep="\t") |
164 | 163 |
165 | 164 |
166 | 165 |
167 if (QC == TRUE) { | 166 if(QC == TRUE) { |
168 print("Creating MA plots...") | 167 write("Creating MA plots...",stdout()) |
169 | 168 |
170 etable <- topTags(lrt, n=nrow(dge))\$table | 169 etable <- topTags(lrt, n=nrow(dge))\$table |
171 etable <- etable[order(etable\$FDR), ] | 170 etable <- etable[order(etable\$FDR), ] |
172 pdf(output_7) | 171 pdf(output_7) |
173 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) | 172 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) |
174 with(subset(etable, FDR<0.05), points(logCPM, logFC, pch=20, col="red")) | 173 with(subset(etable, FDR<0.05), points(logCPM, logFC, pch=20, col="red")) |
175 abline(h=c(-1,1), col="blue") | 174 abline(h=c(-1,1), col="blue") |
176 dev.off() | 175 dev.off() |
177 } | 176 } |
178 print("Done!") | 177 write("Done!",stdout()) |
179 } | 178 } |
180 </configfile> | 179 </configfile> |
181 </configfiles> | 180 </configfiles> |
182 | 181 |
183 <outputs> | 182 <outputs> |