annotate edgeR_DGE.xml @ 11:b8b3d63b60ab draft

Uploaded
author yhoogstrate
date Thu, 09 Jan 2014 06:48:17 -0500
parents 61e42740b13a
children c672e76503b2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
1 <?xml version="1.0" encoding="UTF-8"?>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
2 <tool id="edger_dge" name="edgeR Differential GeneExpression Analysis">
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
3 <description>RNA-Seq expression analysis using edgeR (R package)</description>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
4
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
5 <command>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
6 <!--
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
7 The following script is written in the "Cheetah" language:
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
8 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
9 -->
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
10
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
11 R CMD BATCH --vanilla --slave '--args
2
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
12 $design_matrix
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
13 $contrast
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
14
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
15 $output_count_edgeR
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
16 $output_cpm
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
17 output_FPXM
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
18 $output_raw_counts
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
19
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
20 $qc
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
21 $output_MDSplot
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
22 $output_BCVplot
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
23 $output_MAplot
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
24 smearPlot '
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
25 $R_script $output_R
2
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
26 </command>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
27
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
28 <inputs>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
29 <param name="design_matrix" type="data" format="tabular" help="Design matrix" />
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
30
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
31 <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info." />
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
32
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
33 <param name="qc" type="select" label="Quality control reports">
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
34 <option value="true">Yes</option>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
35 <option value="false" selected="true">No</option>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
36 </param>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
37
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
38 <param name="debug" type="select" label="R Debug output">
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
39 <option value="true" selected="true"> Yes</option>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
40 <option value="false">No</option>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
41 </param>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
42 </inputs>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
43
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
44 <configfiles>
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
45 <configfile name="R_script">
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
46 library(edgeR)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
47
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
48 ## Fetch commandline arguments
6
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
49 args &lt;- commandArgs(trailingOnly = TRUE)
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
50 designmatrix = args[1]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
51 contrast = args[2]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
52
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
53 output_1 = args[3]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
54 output_2 = args[4]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
55 output_3 = args[5] ##FPKM file - to be implemented
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
56 output_4 = args[6]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
57
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
58 QC = nchar(args[7]) > 0
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
59
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
60 output_5 = args[8]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
61 output_6 = args[9]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
62 output_7 = args[10]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
63
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
64 output_8 = args[11]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
65
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
66
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
67 library(edgeR)
6
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
68 raw_data &lt;- read.delim(designmatrix,header=T,stringsAsFactors=T)
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
69
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
70 ## Obtain read-counts
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
71 read_counts = read.delim(as.character(raw_data[1,1]),header=F,stringsAsFactors=F,row.names=1)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
72 for(i in 2:length(raw_data[,1])) {
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
73 print("parsing counts from:")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
74 print(raw_data[i,1])
11
b8b3d63b60ab Uploaded
yhoogstrate
parents: 7
diff changeset
75
b8b3d63b60ab Uploaded
yhoogstrate
parents: 7
diff changeset
76 header = read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1,nrows=1)
b8b3d63b60ab Uploaded
yhoogstrate
parents: 7
diff changeset
77 has_header = (class(header[1,1]) == "character")
b8b3d63b60ab Uploaded
yhoogstrate
parents: 7
diff changeset
78
b8b3d63b60ab Uploaded
yhoogstrate
parents: 7
diff changeset
79 read_counts = cbind(read_counts,read.delim(as.character(raw_data[i,1]),header=has_header,stringsAsFactors=F,row.names=1))
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
80 print(i)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
81 }
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
82
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
83
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
84
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
85 ## Filter for HTSeq predifined counts:
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
86 exclude_HTSeq = c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
87 exclude_DEXSeq = c("_ambiguous","_empty","_lowaqual","_notaligned")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
88
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
89 exclude = match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
90 exclude = exclude[is.na(exclude)==0]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
91 if(length(exclude) != 0) {
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
92 read_counts = read_counts[-exclude,]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
93 }
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
94
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
95
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
96
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
97
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
98
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
99
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
100
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
101
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
102
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
103 colnames(read_counts) = raw_data[,2]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
104 dge = DGEList(counts=read_counts,genes=rownames(read_counts))
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
105
6
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
106 design_tmp &lt;- raw_data[3:length(raw_data)]
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
107 rownames(design_tmp) &lt;- colnames(dge)
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
108 formula = paste(c("~0",colnames(design_tmp)),collapse = " + ")
6
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
109 design &lt;- model.matrix(as.formula(formula),design_tmp)
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
110
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
111 prefixes = colnames(design_tmp)[attr(design,"assign")]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
112 avoid = nchar(prefixes) == nchar(colnames(design))
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
113 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design)))
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
114 replacements[avoid] = colnames(design)[avoid]
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
115 colnames(design) = replacements
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
116
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
117
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
118
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
119 print("Calculating normalization factors...")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
120 dge = calcNormFactors(dge)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
121 print("Estimating common dispersion...")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
122 dge = estimateGLMCommonDisp(dge,design)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
123 print("Estimating trended dispersion...")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
124 dge = estimateGLMTrendedDisp(dge,design)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
125 print("Estimating tagwise dispersion...")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
126 dge = estimateGLMTagwiseDisp(dge,design)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
127
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
128
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
129
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
130
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
131 if (QC == TRUE) {
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
132 print("Creating QC plots...")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
133 #### MDS Plot
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
134 pdf(output_5)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
135 plotMDS(dge, main="edgeR MDS Plot")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
136 dev.off()
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
137 #### Biological coefficient of variation plot
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
138 pdf(output_6)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
139 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
140 dev.off()
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
141 }
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
142
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
143
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
144
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
145 print("Fitting GLM...")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
146 fit = glmFit(dge,design)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
147
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
148 print(paste("Performing likelihood ratio test: ",contrast,sep=""))
6
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
149 cont &lt;- c(contrast)
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
150 cont &lt;- makeContrasts(contrasts=cont, levels=design)
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
151
6
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
152 lrt &lt;- glmLRT(fit, contrast=cont[,1])
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
153 print(paste("Exporting to file: ",output_1,sep=""))
7
61e42740b13a Uploaded
yhoogstrate
parents: 6
diff changeset
154 write.table(file=output_1,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=T)
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
155 write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
156 ## todo EXPORT FPKM
7
61e42740b13a Uploaded
yhoogstrate
parents: 6
diff changeset
157 write.table(file=output_4,dge\$counts,sep="\t")
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
158
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
159
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
160
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
161 if (QC == TRUE) {
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
162 print("Creating MA plots...")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
163
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
164
7
61e42740b13a Uploaded
yhoogstrate
parents: 6
diff changeset
165 etable &lt;- topTags(lrt, n=nrow(dge))\$table
61e42740b13a Uploaded
yhoogstrate
parents: 6
diff changeset
166 etable &lt;- etable[order(etable\$FDR), ]
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
167 pdf(output_7)
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
168 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
6
149a52c74f39 Uploaded
yhoogstrate
parents: 4
diff changeset
169 with(subset(etable, FDR&lt;0.05), points(logCPM, logFC, pch=20, col="red"))
4
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
170 abline(h=c(-1,1), col="blue")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
171 dev.off()
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
172 }
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
173 print("Done!")
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
174 </configfile>
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
175 </configfiles>
b1aee4b59049 Uploaded
yhoogstrate
parents: 2
diff changeset
176
2
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
177 <outputs>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
178 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - table" />
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
179 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" />
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
180 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts" />
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
181
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
182 <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output" >
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
183 <filter>(debug == "true")</filter>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
184 </data>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
185
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
186 <data format="pdf" name="output_MDSplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot">
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
187 <filter>(qc == "true")</filter>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
188 </data>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
189
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
190 <data format="pdf" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot">
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
191 <filter>(qc == "true")</filter>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
192 </data>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
193
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
194 <data format="pdf" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot">
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
195 <filter>(qc == "true")</filter>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
196 </data>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
197 </outputs>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
198
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
199 <help>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
200 input: Design matrix using "create Design matrix" tool
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
201 input: contrast
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
202 </help>
521bfa975110 Uploaded
yhoogstrate
parents:
diff changeset
203 </tool>