Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_DGE.xml @ 4:b1aee4b59049 draft
Uploaded
author | yhoogstrate |
---|---|
date | Thu, 09 Jan 2014 02:55:02 -0500 |
parents | 521bfa975110 |
children | 149a52c74f39 |
comparison
equal
deleted
inserted
replaced
3:df239301559a | 4:b1aee4b59049 |
---|---|
6 <!-- | 6 <!-- |
7 The following script is written in the "Cheetah" language: | 7 The following script is written in the "Cheetah" language: |
8 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html | 8 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html |
9 --> | 9 --> |
10 | 10 |
11 R CMD BATCH --vanilla '--args | 11 R CMD BATCH --vanilla --slave '--args |
12 $design_matrix | 12 $design_matrix |
13 $contrast | 13 $contrast |
14 | 14 |
15 $output_count_edgeR | 15 $output_count_edgeR |
16 $output_cpm | 16 $output_cpm |
20 $qc | 20 $qc |
21 $output_MDSplot | 21 $output_MDSplot |
22 $output_BCVplot | 22 $output_BCVplot |
23 $output_MAplot | 23 $output_MAplot |
24 smearPlot ' | 24 smearPlot ' |
25 /home/youri/Desktop/galaxy/tools/TraIT/edgeR/DGE/edgeR_DGE_test.R $output_R | 25 $R_script $output_R |
26 </command> | 26 </command> |
27 | 27 |
28 <inputs> | 28 <inputs> |
29 <param name="design_matrix" type="data" format="tabular" help="Design matrix" /> | 29 <param name="design_matrix" type="data" format="tabular" help="Design matrix" /> |
30 | 30 |
39 <option value="true" selected="true"> Yes</option> | 39 <option value="true" selected="true"> Yes</option> |
40 <option value="false">No</option> | 40 <option value="false">No</option> |
41 </param> | 41 </param> |
42 </inputs> | 42 </inputs> |
43 | 43 |
44 <configfiles> | |
45 <configfile name="R_script"> | |
46 library(edgeR) | |
47 | |
48 ## Fetch commandline arguments | |
49 args <- commandArgs(trailingOnly = TRUE) | |
50 designmatrix = args[1] | |
51 contrast = args[2] | |
52 | |
53 output_1 = args[3] | |
54 output_2 = args[4] | |
55 output_3 = args[5] ##FPKM file - to be implemented | |
56 output_4 = args[6] | |
57 | |
58 QC = nchar(args[7]) > 0 | |
59 | |
60 output_5 = args[8] | |
61 output_6 = args[9] | |
62 output_7 = args[10] | |
63 | |
64 output_8 = args[11] | |
65 | |
66 | |
67 | |
68 | |
69 library(edgeR) | |
70 raw_data <- read.delim(designmatrix,header=T,stringsAsFactors=T) | |
71 | |
72 ## Obtain read-counts | |
73 read_counts = read.delim(as.character(raw_data[1,1]),header=F,stringsAsFactors=F,row.names=1) | |
74 for(i in 2:length(raw_data[,1])) { | |
75 print("parsing counts from:") | |
76 print(raw_data[i,1]) | |
77 read_counts = cbind(read_counts,read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1)) | |
78 print(i) | |
79 } | |
80 | |
81 | |
82 | |
83 ## Filter for HTSeq predifined counts: | |
84 exclude_HTSeq = c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique") | |
85 exclude_DEXSeq = c("_ambiguous","_empty","_lowaqual","_notaligned") | |
86 | |
87 exclude = match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts)) | |
88 exclude = exclude[is.na(exclude)==0] | |
89 if(length(exclude) != 0) { | |
90 read_counts = read_counts[-exclude,] | |
91 } | |
92 | |
93 | |
94 | |
95 | |
96 | |
97 | |
98 | |
99 | |
100 | |
101 colnames(read_counts) = raw_data[,2] | |
102 dge = DGEList(counts=read_counts,genes=rownames(read_counts)) | |
103 | |
104 design_tmp <- raw_data[3:length(raw_data)] | |
105 rownames(design_tmp) <- colnames(dge) | |
106 formula = paste(c("~0",colnames(design_tmp)),collapse = " + ") | |
107 design <- model.matrix(as.formula(formula),design_tmp) | |
108 | |
109 prefixes = colnames(design_tmp)[attr(design,"assign")] | |
110 avoid = nchar(prefixes) == nchar(colnames(design)) | |
111 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design))) | |
112 replacements[avoid] = colnames(design)[avoid] | |
113 colnames(design) = replacements | |
114 | |
115 | |
116 | |
117 print("Calculating normalization factors...") | |
118 dge = calcNormFactors(dge) | |
119 print("Estimating common dispersion...") | |
120 dge = estimateGLMCommonDisp(dge,design) | |
121 print("Estimating trended dispersion...") | |
122 dge = estimateGLMTrendedDisp(dge,design) | |
123 print("Estimating tagwise dispersion...") | |
124 dge = estimateGLMTagwiseDisp(dge,design) | |
125 | |
126 | |
127 | |
128 | |
129 if (QC == TRUE) { | |
130 print("Creating QC plots...") | |
131 #### MDS Plot | |
132 pdf(output_5) | |
133 plotMDS(dge, main="edgeR MDS Plot") | |
134 dev.off() | |
135 #### Biological coefficient of variation plot | |
136 pdf(output_6) | |
137 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") | |
138 dev.off() | |
139 } | |
140 | |
141 | |
142 | |
143 print("Fitting GLM...") | |
144 fit = glmFit(dge,design) | |
145 | |
146 print(paste("Performing likelihood ratio test: ",contrast,sep="")) | |
147 cont <- c(contrast) | |
148 cont <- makeContrasts(contrasts=cont, levels=design) | |
149 | |
150 lrt <- glmLRT(fit, contrast=cont[,1]) | |
151 print(paste("Exporting to file: ",output_1,sep="")) | |
152 write.table(file=output_1,topTags(lrt,n=nrow(read_counts))$table,sep="\t",row.names=T) | |
153 write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t") | |
154 ## todo EXPORT FPKM | |
155 write.table(file=output_4,dge$counts,sep="\t") | |
156 | |
157 | |
158 | |
159 if (QC == TRUE) { | |
160 print("Creating MA plots...") | |
161 | |
162 | |
163 etable <- topTags(lrt, n=nrow(dge))$table | |
164 etable <- etable[order(etable$FDR), ] | |
165 pdf(output_7) | |
166 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) | |
167 with(subset(etable, FDR<0.05), points(logCPM, logFC, pch=20, col="red")) | |
168 abline(h=c(-1,1), col="blue") | |
169 dev.off() | |
170 } | |
171 print("Done!") | |
172 </configfile> | |
173 </configfiles> | |
174 | |
44 <outputs> | 175 <outputs> |
45 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - table" /> | 176 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - table" /> |
46 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" /> | 177 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" /> |
47 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts" /> | 178 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts" /> |
48 | 179 |