25
|
1 <?xml version="1.0" encoding="UTF-8"?>
|
|
2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis">
|
|
3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description>
|
|
4
|
|
5 <requirements>
|
57
|
6 <requirement type="package" version="3.0.1">package_r3_withx</requirement>
|
29
|
7 <requirement type="package" version="latest">package_biocLite_edgeR_limma</requirement>
|
25
|
8 </requirements>
|
|
9
|
|
10 <command>
|
|
11 <!--
|
|
12 The following script is written in the "Cheetah" language:
|
|
13 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
|
|
14 -->
|
|
15
|
|
16 R --vanilla --slave -f $R_script '--args
|
|
17 $expression_matrix
|
|
18 $design_matrix
|
|
19 $contrast
|
|
20
|
|
21 $fdr
|
|
22
|
|
23 $output_count_edgeR
|
|
24 $output_cpm
|
|
25
|
|
26 /dev/null <!-- Calculation of FPKM/RPKM should come here -->
|
|
27
|
|
28 #if $output_raw_counts:
|
|
29 $output_raw_counts
|
|
30 #else:
|
|
31 /dev/null
|
|
32 #end if
|
|
33
|
|
34 #if $output_MDSplot:
|
|
35 $output_MDSplot
|
|
36 #else:
|
|
37 /dev/null
|
|
38 #end if
|
|
39
|
|
40 #if $output_BCVplot:
|
|
41 $output_BCVplot
|
|
42 #else:
|
|
43 /dev/null
|
|
44 #end if
|
|
45
|
|
46 #if $output_MAplot:
|
|
47 $output_MAplot
|
|
48 #else:
|
|
49 /dev/null
|
|
50 #end if
|
|
51
|
|
52 #if $output_PValue_distribution_plot:
|
|
53 $output_PValue_distribution_plot
|
|
54 #else:
|
|
55 /dev/null
|
|
56 #end if
|
|
57
|
|
58 #if $output_hierarchical_clustering_plot:
|
|
59 $output_hierarchical_clustering_plot
|
|
60 #else:
|
|
61 /dev/null
|
|
62 #end if
|
|
63
|
|
64 #if $output_heatmap_plot:
|
|
65 $output_heatmap_plot
|
|
66 #else:
|
|
67 /dev/null
|
|
68 #end if
|
|
69
|
|
70 #if $output_RData_obj:
|
|
71 $output_RData_obj
|
|
72 #else:
|
|
73 /dev/null
|
|
74 #end if
|
55
|
75
|
|
76 $output_format_images
|
|
77 '
|
25
|
78 #if $output_R:
|
|
79 > $output_R
|
|
80 #else:
|
|
81 > /dev/null
|
|
82 #end if
|
|
83
|
53
|
84 2> stderr.txt ;
|
|
85
|
|
86 grep -v 'Calculating library sizes from column' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
87
|
|
88 ## Locale error messages:
|
|
89 grep -v 'During startup - Warning messages' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
90 grep -v 'Setting LC_TIME failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
91 grep -v 'Setting LC_MONETARY failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
92 grep -v 'Setting LC_PAPER failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
93 grep -v 'Setting LC_MEASUREMENT failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
94 grep -v 'Setting LC_CTYPE failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
95 grep -v 'Setting LC_COLLATE failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ;
|
|
96
|
|
97 cat stderr.txt >&2
|
25
|
98
|
|
99 </command>
|
|
100
|
|
101 <inputs>
|
|
102 <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" />
|
|
103 <param name="design_matrix" type="data" format="tabular" label="Design matrix" hepl="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
|
|
104
|
|
105 <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." />
|
|
106
|
|
107 <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" />
|
|
108
|
|
109 <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes">
|
|
110 <option value="make_output_raw_counts">Raw counts table</option>
|
|
111 <option value="make_output_MDSplot">MDS-plot</option>
|
|
112 <option value="make_output_BCVplot">BCV-plot</option>
|
|
113 <option value="make_output_MAplot">MA-plot</option>
|
|
114 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option>
|
|
115 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option>
|
|
116 <option value="make_output_heatmap_plot">Heatmap</option>
|
|
117
|
43
|
118 <option value="make_output_R_stdout">R stdout</option>
|
25
|
119 <option value="make_output_RData_obj">R Data object</option>
|
|
120 </param>
|
55
|
121
|
|
122 <param name="output_format_images" type="select" label="Output format of images" display="radio">
|
|
123 <option value="png">Portable network graphics (.png)</option>
|
|
124 <option value="pdf">Portable document format (.pdf)</option>
|
|
125 <option value="svg">Scalable vector graphics (.svg)</option>
|
|
126 </param>
|
25
|
127 </inputs>
|
|
128
|
|
129 <configfiles>
|
|
130 <configfile name="R_script">
|
|
131 library(limma,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
|
|
132 library(edgeR,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
|
|
133 library(splines,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
|
|
134
|
|
135 ## Fetch commandline arguments
|
|
136 args <- commandArgs(trailingOnly = TRUE)
|
|
137
|
|
138 expression_matrix_file = args[1]
|
|
139 design_matrix_file = args[2]
|
|
140 contrast = args[3]
|
|
141
|
|
142 fdr = args[4]
|
|
143
|
|
144 output_count_edgeR = args[5]
|
|
145 output_cpm = args[6]
|
|
146
|
43
|
147 output_xpkm = args[7] ##FPKM file - yet to be implemented
|
25
|
148
|
|
149 output_raw_counts = args[8]
|
|
150 output_MDSplot = args[9]
|
|
151 output_BCVplot = args[10]
|
|
152 output_MAplot = args[11]
|
|
153 output_PValue_distribution_plot = args[12]
|
|
154 output_hierarchical_clustering_plot = args[13]
|
|
155 output_heatmap_plot = args[14]
|
|
156 output_RData_obj = args[15]
|
55
|
157 output_format_images = args[16]
|
25
|
158
|
|
159
|
|
160 library(edgeR)
|
|
161 ##raw_data <- read.delim(designmatrix,header=T,stringsAsFactors=T)
|
|
162 ## Obtain read-counts
|
|
163
|
|
164 expression_matrix <- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
|
|
165 design_matrix <- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
|
|
166
|
|
167 colnames(design_matrix) <- make.names(colnames(design_matrix))
|
|
168
|
|
169 for(i in 1:ncol(design_matrix)) {
|
|
170 old = design_matrix[,i]
|
|
171 design_matrix[,i] = make.names(design_matrix[,i])
|
|
172 if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
|
|
173 print("Renaming of factors:")
|
|
174 print(old)
|
|
175 print("To:")
|
|
176 print(design_matrix[,i])
|
|
177 }
|
45
|
178 ## The following line seems to malfunction the script:
|
|
179 ##design_matrix[,i] <- as.factor(design_matrix[,i])
|
25
|
180 }
|
|
181
|
44
|
182 ## 1) In the expression matrix, you only want to have the samples described in the design matrix
|
25
|
183 columns <- match(rownames(design_matrix),colnames(expression_matrix))
|
43
|
184 columns <- columns[!is.na(columns)]
|
25
|
185 read_counts <- expression_matrix[,columns]
|
|
186
|
44
|
187 ## 2) In the design matrix, you only want to have samples of which you really have the counts
|
|
188 columns <- match(colnames(expression_matrix),rownames(design_matrix))
|
|
189 columns <- columns[!is.na(columns)]
|
|
190 design_matrix <- design_matrix[columns,,drop=FALSE]
|
25
|
191
|
|
192 ## Filter for HTSeq predifined counts:
|
|
193 exclude_HTSeq <- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
|
|
194 exclude_DEXSeq <- c("_ambiguous","_empty","_lowaqual","_notaligned")
|
|
195
|
44
|
196 exclude <- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
|
|
197 exclude <- exclude[is.na(exclude)==0]
|
25
|
198 if(length(exclude) != 0) {
|
44
|
199 read_counts <- read_counts[-exclude,]
|
25
|
200 }
|
|
201
|
|
202
|
44
|
203 empty_samples <- apply(read_counts,2,function(x) sum(x) == 0)
|
25
|
204 if(sum(empty_samples) > 0) {
|
|
205 write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr())
|
|
206 write(colnames(read_counts)[empty_samples],stderr())
|
|
207 } else {
|
|
208
|
|
209 dge <- DGEList(counts=read_counts,genes=rownames(read_counts))
|
|
210
|
|
211 formula <- paste(c("~0",make.names(colnames(design_matrix))),collapse = " + ")
|
|
212 design_matrix_tmp <- design_matrix
|
|
213 colnames(design_matrix_tmp) <- make.names(colnames(design_matrix_tmp))
|
|
214 design <- model.matrix(as.formula(formula),design_matrix_tmp)
|
|
215 rm(design_matrix_tmp)
|
|
216
|
|
217 # Filter prefixes
|
|
218 prefixes = colnames(design_matrix)[attr(design,"assign")]
|
|
219 avoid = nchar(prefixes) == nchar(colnames(design))
|
|
220 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design)))
|
|
221 replacements[avoid] = colnames(design)[avoid]
|
|
222 colnames(design) = replacements
|
|
223
|
|
224 # Do normalization
|
|
225 write("Calculating normalization factors...",stdout())
|
|
226 dge <- calcNormFactors(dge)
|
|
227 write("Estimating common dispersion...",stdout())
|
|
228 dge <- estimateGLMCommonDisp(dge,design)
|
|
229 write("Estimating trended dispersion...",stdout())
|
|
230 dge <- estimateGLMTrendedDisp(dge,design)
|
|
231 write("Estimating tagwise dispersion...",stdout())
|
|
232 dge <- estimateGLMTagwiseDisp(dge,design)
|
|
233
|
|
234
|
|
235 if(output_MDSplot != "/dev/null") {
|
|
236 write("Creating MDS plot",stdout())
|
|
237 ##points <- plotMDS(dge,method="bcv",labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
|
|
238 points <- plotMDS.DGEList(dge,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
|
|
239 dev.off()# Kill it
|
|
240
|
55
|
241 if(output_format_images == "pdf") {
|
|
242 capabilities()
|
|
243 ##x11()
|
|
244 pdf(output_MDSplot)
|
|
245 } else if(output_format_images == "svg") {
|
|
246 svg(output_MDSplot)
|
|
247 } else {
|
|
248 png(output_MDSplot)
|
|
249 }
|
|
250
|
25
|
251 diff_x <- abs(max(points\$x)-min(points\$x))
|
|
252 diff_y <-(max(points\$y)-min(points\$y))
|
|
253 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR MDS Plot",type="n", xlab="BCV distance 1", ylab="BCV distance 2")
|
|
254 points(points\$x,points\$y,pch=20)
|
|
255 text(points\$x, points\$y,rownames(dge\$samples),cex=0.7,col="gray",pos=4)
|
|
256 rm(diff_x,diff_y)
|
|
257
|
|
258 dev.off()
|
|
259 }
|
|
260
|
|
261 if(output_BCVplot != "/dev/null") {
|
|
262 write("Creating Biological coefficient of variation plot",stdout())
|
|
263 pdf(output_BCVplot)
|
|
264 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
|
|
265 dev.off()
|
|
266 }
|
|
267
|
|
268
|
|
269 write("Fitting GLM...",stdout())
|
|
270 fit <- glmFit(dge,design)
|
|
271
|
|
272 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
|
|
273 cont <- c(contrast)
|
|
274 cont <- makeContrasts(contrasts=cont, levels=design)
|
|
275
|
|
276 lrt <- glmLRT(fit, contrast=cont[,1])
|
|
277 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout())
|
|
278 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
|
|
279 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
|
|
280
|
|
281 ## todo EXPORT FPKM
|
|
282 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA)
|
|
283
|
|
284
|
34
|
285 if(output_MAplot != "/dev/null" || output_PValue_distribution_plot != "/dev/null") {
|
25
|
286 etable <- topTags(lrt, n=nrow(dge))\$table
|
|
287 etable <- etable[order(etable\$FDR), ]
|
32
|
288
|
|
289 if(output_MAplot != "/dev/null") {
|
|
290 write("Creating MA plot...",stdout())
|
|
291 pdf(output_MAplot)
|
|
292 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
|
|
293 with(subset(etable, FDR < fdr), points(logCPM, logFC, pch=20, col="red"))
|
|
294 abline(h=c(-1,1), col="blue")
|
|
295 dev.off()
|
|
296 }
|
25
|
297
|
32
|
298 if(output_PValue_distribution_plot != "/dev/null") {
|
|
299 write("Creating P-value distribution plot...",stdout())
|
|
300 pdf(output_PValue_distribution_plot)
|
|
301 expressed_genes <- subset(etable, PValue < 0.99)
|
|
302 h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (< 0.99)")
|
|
303 center <- sum(h\$counts) / length(h\$counts)
|
|
304 lines(c(0,1),c(center,center),lty=2,col="red",lwd=2)
|
|
305 k <- ksmooth(h\$mid, h\$counts)
|
|
306 lines(k\$x,k\$y,col="red",lwd=2)
|
|
307 rmsd <- (h\$counts) - center
|
|
308 rmsd <- rmsd^2
|
|
309 rmsd <- sum(rmsd)
|
|
310 rmsd <- sqrt(rmsd)
|
|
311 text(0,max(h\$counts),paste("e=",round(rmsd,2),sep=""),pos=4,col="blue")
|
|
312 ## change e into epsilon somehow
|
|
313 dev.off()
|
|
314 }
|
40
|
315 }
|
|
316
|
|
317 if(output_heatmap_plot != "/dev/null") {
|
42
|
318 pdf(output_heatmap_plot,width=10.5)
|
40
|
319 etable2 <- topTags(lrt, n=100)\$table
|
|
320 order <- rownames(etable2)
|
|
321 cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
|
|
322 heatmap(t(cpm_sub))
|
|
323 dev.off()
|
25
|
324 }
|
|
325
|
|
326 ##output_hierarchical_clustering_plot = args[13]
|
|
327
|
35
|
328 if(output_RData_obj != "/dev/null") {
|
25
|
329 save.image(output_RData_obj)
|
|
330 }
|
|
331
|
|
332 write("Done!",stdout())
|
|
333 }
|
|
334 </configfile>
|
|
335 </configfiles>
|
|
336
|
|
337 <outputs>
|
53
|
338 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - differentially expressed genes" />
|
25
|
339 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" />
|
|
340
|
|
341 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts">
|
53
|
342 <filter>outputs and ("make_output_raw_counts" in outputs)</filter>
|
25
|
343 </data>
|
|
344
|
55
|
345 <data format="${output_format_images}" name="output_MDSplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot">
|
53
|
346 <filter>outputs and ("make_output_MDSplot" in outputs)</filter>
|
25
|
347 </data>
|
|
348
|
55
|
349 <data format="${output_format_images}" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot">
|
53
|
350 <filter>outputs and ("make_output_BCVplot" in outputs)</filter>
|
25
|
351 </data>
|
|
352
|
55
|
353 <data format="${output_format_images}" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot">
|
53
|
354 <filter>outputs and ("make_output_MAplot" in outputs)</filter>
|
25
|
355 </data>
|
|
356
|
55
|
357 <data format="${output_format_images}" name="output_PValue_distribution_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - P-Value distribution">
|
53
|
358 <filter>outputs and ("make_output_PValue_distribution_plot" in outputs)</filter>
|
25
|
359 </data>
|
|
360
|
55
|
361 <data format="${output_format_images}" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Hierarchical custering">
|
53
|
362 <filter>outputs and ("make_output_hierarchical_clustering_plot" in outputs)</filter>
|
25
|
363 </data>
|
|
364
|
55
|
365 <data format="${output_format_images}" name="output_heatmap_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Heatmap">
|
53
|
366 <filter>outputs and ("make_output_heatmap_plot" in outputs)</filter>
|
25
|
367 </data>
|
|
368
|
|
369 <data format="RData" name="output_RData_obj" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R data object">
|
53
|
370 <filter>outputs and ("make_output_RData_obj" in outputs)</filter>
|
25
|
371 </data>
|
|
372
|
40
|
373 <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output (debug)" >
|
53
|
374 <filter>outputs and ("make_output_R_stdout" in outputs)</filter>
|
25
|
375 </data>
|
|
376 </outputs>
|
|
377
|
|
378 <help>
|
|
379 edgeR: Differential Gene(Expression) Analysis
|
36
|
380 #############################################
|
25
|
381
|
36
|
382 Overview
|
|
383 --------
|
|
384 Differential expression analysis of RNA-seq and digital gene expression profiles with biological replication. Uses empirical Bayes estimation and exact tests based on the negative binomial distribution. Also useful for differential signal analysis with other types of genome-scale count data [1].
|
25
|
385
|
|
386 For every experiment, the algorithm requires a design matrix. This matrix describes which samples belong to which groups.
|
36
|
387 More details on this are given in the edgeR manual: http://www.bioconductor.org/packages/2.12/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
|
25
|
388 and the limma manual.
|
|
389
|
|
390 Because the creation of a design matrix can be complex and time consuming, especially if no GUI is used, this package comes with an alternative tool which can help you with it.
|
|
391 This tool is called *edgeR Design Matrix Creator*.
|
|
392 If the appropriate design matrix (with corresponding links to the files) is given,
|
|
393 the correct contrast ( http://en.wikipedia.org/wiki/Contrast_(statistics) ) has to be given.
|
|
394
|
|
395 If you have for example two groups, with an equal weight, you would like to compare either
|
|
396 "g1~g2" or "normal~cancer".
|
|
397
|
36
|
398 The test function makes use of a MCF7 dataset used in a study that indicates that a higher sequencing depth is not neccesairily more important than a higher amount of replaciates[2].
|
25
|
399
|
36
|
400 Input
|
|
401 -----
|
|
402 Expression matrix
|
|
403 ^^^^^^^^^^^^^^^^^
|
|
404 ::
|
25
|
405
|
|
406 Geneid "\t" Sample-1 "\t" Sample-2 "\t" Sample-3 "\t" Sample-4 [...] "\n"
|
|
407 SMURF "\t" 123 "\t" 21 "\t" 34545 "\t" 98 ... "\n"
|
|
408 BRCA1 "\t" 435 "\t" 6655 "\t" 45 "\t" 55 ... "\n"
|
|
409 LINK33 "\t" 4 "\t" 645 "\t" 345 "\t" 1 ... "\n"
|
|
410 SNORD78 "\t" 498 "\t" 65 "\t" 98 "\t" 27 ... "\n"
|
|
411 [...]
|
|
412
|
36
|
413 *Note: Make sure the number of columns in the header is identical to the number of columns in the body.*
|
25
|
414
|
36
|
415 Design matrix
|
|
416 ^^^^^^^^^^^^^
|
|
417 ::
|
25
|
418
|
|
419 Sample "\t" Condition "\t" Ethnicity "\t" Patient "\t" Batch "\n"
|
|
420 Sample-1 "\t" Tumor "\t" European "\t" 1 "\t" 1 "\n"
|
|
421 Sample-2 "\t" Normal "\t" European "\t" 1 "\t" 1 "\n"
|
|
422 Sample-3 "\t" Tumor "\t" European "\t" 2 "\t" 1 "\n"
|
|
423 Sample-4 "\t" Normal "\t" European "\t" 2 "\t" 1 "\n"
|
|
424 Sample-5 "\t" Tumor "\t" African "\t" 3 "\t" 1 "\n"
|
|
425 Sample-6 "\t" Normal "\t" African "\t" 3 "\t" 1 "\n"
|
|
426 Sample-7 "\t" Tumor "\t" African "\t" 4 "\t" 2 "\n"
|
|
427 Sample-8 "\t" Normal "\t" African "\t" 4 "\t" 2 "\n"
|
|
428 Sample-9 "\t" Tumor "\t" Asian "\t" 5 "\t" 2 "\n"
|
|
429 Sample-10 "\t" Normal "\t" Asian "\t" 5 "\t" 2 "\n"
|
|
430 Sample-11 "\t" Tumor "\t" Asian "\t" 6 "\t" 2 "\n"
|
|
431 Sample-12 "\t" Normal "\t" Asian "\t" 6 "\t" 2 "\n"
|
|
432
|
36
|
433 *Note: Avoid factor names that are (1) numerical, (2) contain mathematical symbols and preferebly only use letters.*
|
25
|
434
|
36
|
435 Contrast
|
|
436 ^^^^^^^^
|
|
437 The contrast represents the biological question. There can be many questions asked, e.g.:
|
25
|
438
|
36
|
439 - Tumor-Normal
|
|
440 - African-European
|
|
441 - 0.5*(Control+Placebo) / Treated
|
25
|
442
|
36
|
443 Installation
|
|
444 ------------
|
25
|
445
|
|
446 This tool requires no specific configurations. The following dependencies are installed automatically:
|
36
|
447
|
|
448 - R
|
|
449 - Bioconductor
|
25
|
450 - limma
|
36
|
451
|
25
|
452 - edgeR
|
|
453
|
36
|
454 License
|
|
455 -------
|
|
456 - R
|
|
457 - GPL-2 & GPL-3
|
|
458 - limma
|
|
459 - GPL (>=2)
|
|
460 - edgeR
|
|
461 - GPL (>=2)
|
|
462
|
|
463 References
|
|
464 ----------
|
|
465
|
|
466 EdgeR
|
|
467 ^^^^^
|
|
468 **[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.**
|
25
|
469
|
36
|
470 *Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140.
|
|
471
|
|
472 - http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html
|
|
473 - http://dx.doi.org/10.1093/bioinformatics/btp616
|
|
474 - http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
|
25
|
475
|
36
|
476 Test-data (MCF7)
|
|
477 ^^^^^^^^^^^^^^^^
|
|
478 **[2] RNA-seq differential expression studies: more sequence or more replication?**
|
|
479
|
|
480 *Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304.
|
|
481
|
|
482 - http://www.ncbi.nlm.nih.gov/pubmed/24319002
|
|
483 - http://dx.doi.org/10.1093/bioinformatics/btt688
|
|
484
|
|
485 Contact
|
|
486 -------
|
25
|
487 The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project:
|
|
488 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
|
|
489
|
36
|
490 I would like to thank Hina Riaz - Naz Khan for her helpful contribution.
|
25
|
491
|
36
|
492 More tools by the Translational Research IT (TraIT) project can be found in the following repository:
|
|
493 http://testtoolshed.g2.bx.psu.edu/
|
25
|
494 </help>
|
|
495 </tool>
|