comparison edgeR_Differential_Gene_Expression.xml @ 99:450c1c9a2938 draft

Uploaded
author yhoogstrate
date Wed, 27 May 2015 09:35:28 -0400
parents
children 9e7b3a02b906
comparison
equal deleted inserted replaced
98:943778e4dafa 99:450c1c9a2938
1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.0.3-latest.d">
3 <description>RNA-Seq gene expression analysis using edgeR (R package)</description>
4
5 <requirements>
6 <requirement type="package" version="3.0.3">R</requirement>
7 <requirement type="package" version="latest">biocLite_edgeR_limma</requirement>
8 </requirements>
9
10 <stdio>
11 <regex match="Error in `contrasts`"
12 source="both"
13 level="fatal"
14 description="Have the design- and expression-matrix been swapped?" />
15 <regex match="Calculating library sizes from column"
16 source="stderr"
17 level="log" />
18 <regex match="During startup - Warning messages"
19 source="stderr"
20 level="log" />
21 <regex match="Setting LC_[^ ]+ failed"
22 source="stderr"
23 level="warning"
24 description="LOCALE has not been set correctly" />
25 </stdio>
26
27 <version_command>echo $(R --version | grep version | grep -v GNU) " , EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command>
28
29 <command>
30 <!--
31 The following script is written in the "Cheetah" language:
32 http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
33 -->
34
35 R --vanilla --slave -f $R_script '--args
36 $expression_matrix
37 $design_matrix
38 $contrast
39
40 $fdr
41
42 $output_count_edgeR
43 $output_cpm
44
45 /dev/null <!-- Calculation of FPKM/RPKM should come here -->
46
47 #if $output_raw_counts:
48 $output_raw_counts
49 #else:
50 /dev/null
51 #end if
52
53 #if $output_MDSplot_logFC:
54 $output_MDSplot_logFC
55 #else:
56 /dev/null
57 #end if
58
59 #if $output_MDSplot_bcv:
60 $output_MDSplot_bcv
61 #else:
62 /dev/null
63 #end if
64
65 #if $output_BCVplot:
66 $output_BCVplot
67 #else:
68 /dev/null
69 #end if
70
71 #if $output_MAplot:
72 $output_MAplot
73 #else:
74 /dev/null
75 #end if
76
77 #if $output_PValue_distribution_plot:
78 $output_PValue_distribution_plot
79 #else:
80 /dev/null
81 #end if
82
83 #if $output_hierarchical_clustering_plot:
84 $output_hierarchical_clustering_plot
85 #else:
86 /dev/null
87 #end if
88
89 #if $output_heatmap_plot:
90 $output_heatmap_plot
91 #else:
92 /dev/null
93 #end if
94
95 #if $output_RData_obj:
96 $output_RData_obj
97 #else:
98 /dev/null
99 #end if
100
101 $output_format_images
102 '
103 #if $output_R:
104 > $output_R
105 #else:
106 > /dev/null
107 #end if
108 </command>
109
110 <configfiles>
111 <configfile name="R_script">
112 library(limma,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
113 library(edgeR,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
114 library(splines,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
115
116 ## Fetch commandline arguments
117 args &lt;- commandArgs(trailingOnly = TRUE)
118
119 expression_matrix_file = args[1]
120 design_matrix_file = args[2]
121 contrast = args[3]
122
123 fdr = args[4]
124
125 output_count_edgeR = args[5]
126 output_cpm = args[6]
127
128 output_xpkm = args[7] ##FPKM file - yet to be implemented
129
130 output_raw_counts = args[8]
131 output_MDSplot_logFC = args[9]
132 output_MDSplot_bcv = args[10]
133 output_BCVplot = args[11]
134 output_MAplot = args[12]
135 output_PValue_distribution_plot = args[13]
136 output_hierarchical_clustering_plot = args[14]
137 output_heatmap_plot = args[15]
138 output_RData_obj = args[16]
139 output_format_images = args[17]
140
141
142 library(edgeR)
143 ##raw_data &lt;- read.delim(designmatrix,header=T,stringsAsFactors=T)
144 ## Obtain read-counts
145
146 expression_matrix &lt;- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
147 design_matrix &lt;- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
148
149 colnames(design_matrix) &lt;- make.names(colnames(design_matrix))
150
151 for(i in 1:ncol(design_matrix)) {
152 old &lt;- design_matrix[,i]
153 design_matrix[,i] &lt;- make.names(design_matrix[,i])
154 if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
155 print("Renaming of factors:")
156 print(old)
157 print("To:")
158 print(design_matrix[,i])
159 }
160 ## The following line seems to malfunction the script:
161 ##design_matrix[,i] &lt;- as.factor(design_matrix[,i])
162 }
163
164 ## 1) In the expression matrix, you only want to have the samples described in the design matrix
165 columns &lt;- match(rownames(design_matrix),colnames(expression_matrix))
166 columns &lt;- columns[!is.na(columns)]
167 read_counts &lt;- expression_matrix[,columns]
168
169 ## 2) In the design matrix, you only want to have samples of which you really have the counts
170 columns &lt;- match(colnames(read_counts),rownames(design_matrix))
171 columns &lt;- columns[!is.na(columns)]
172 design_matrix &lt;- design_matrix[columns,,drop=FALSE]
173
174 ## Filter for HTSeq predifined counts:
175 exclude_HTSeq &lt;- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
176 exclude_DEXSeq &lt;- c("_ambiguous","_empty","_lowaqual","_notaligned")
177
178 exclude &lt;- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
179 exclude &lt;- exclude[is.na(exclude)==0]
180 if(length(exclude) != 0) {
181 read_counts &lt;- read_counts[-exclude,]
182 }
183
184
185 ## sorting expression matrix with the order of the read_counts
186 ##order &lt;- match(colnames(read_counts) , rownames(design_matrix))
187 ##read_counts_ordered &lt;- read_counts[,order2]
188
189 empty_samples &lt;- apply(read_counts,2,function(x) sum(x) == 0)
190 if(sum(empty_samples) > 0) {
191 write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr())
192 write(colnames(read_counts)[empty_samples],stderr())
193 } else {
194
195 dge &lt;- DGEList(counts=read_counts,genes=rownames(read_counts))
196
197 formula &lt;- paste(c("~0",make.names(colnames(design_matrix))),collapse = " + ")
198 design_matrix_tmp &lt;- design_matrix
199 colnames(design_matrix_tmp) &lt;- make.names(colnames(design_matrix_tmp))
200 design &lt;- model.matrix(as.formula(formula),design_matrix_tmp)
201 rm(design_matrix_tmp)
202
203 # Filter prefixes
204 prefixes = colnames(design_matrix)[attr(design,"assign")]
205 avoid = nchar(prefixes) == nchar(colnames(design))
206 replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design)))
207 replacements[avoid] = colnames(design)[avoid]
208 colnames(design) = replacements
209
210 # Do normalization
211 write("Calculating normalization factors...",stdout())
212 dge &lt;- calcNormFactors(dge)
213 write("Estimating common dispersion...",stdout())
214 dge &lt;- estimateGLMCommonDisp(dge,design)
215 write("Estimating trended dispersion...",stdout())
216 dge &lt;- estimateGLMTrendedDisp(dge,design)
217 write("Estimating tagwise dispersion...",stdout())
218 dge &lt;- estimateGLMTagwiseDisp(dge,design)
219
220
221 if(output_MDSplot_logFC != "/dev/null") {
222 write("Creating MDS plot (logFC method)",stdout())
223 points &lt;- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
224 dev.off()# Kill it
225
226 if(output_format_images == "pdf") {
227 pdf(output_MDSplot_logFC,height=14,width=14)
228 } else if(output_format_images == "svg") {
229 svg(output_MDSplot_logFC,height=14,width=14)
230 } else {
231 ## png(output_MDSplot_logFC)
232 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
233
234 bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14)
235 }
236
237
238 diff_x &lt;- abs(max(points\$x)-min(points\$x))
239 diff_y &lt;-(max(points\$y)-min(points\$y))
240 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2")
241 points(points\$x,points\$y,pch=20)
242 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
243 rm(diff_x,diff_y)
244
245 dev.off()
246 }
247
248 if(output_MDSplot_bcv != "/dev/null") {
249 write("Creating MDS plot (bcv method)",stdout())
250
251 ## 1. First create a virtual plot to obtain the desired coordinates
252 pdf("bcvmds.pdf")
253 points &lt;- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
254 dev.off()# Kill it
255
256 ## 2. Re-plot the coordinates in a new figure with the size and settings.
257 if(output_format_images == "pdf") {
258 pdf(output_MDSplot_bcv,height=14,width=14)
259 } else if(output_format_images == "svg") {
260 svg(output_MDSplot_bcv,height=14,width=14)
261 } else {
262 ## png(output_MDSplot_bcv)
263 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
264
265 bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14)
266 }
267
268 diff_x &lt;- abs(max(points\$x)-min(points\$x))
269 diff_y &lt;-(max(points\$y)-min(points\$y))
270 plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2")
271 points(points\$x,points\$y,pch=20)
272 text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
273 rm(diff_x,diff_y)
274
275 dev.off()
276 }
277
278
279 if(output_BCVplot != "/dev/null") {
280 write("Creating Biological coefficient of variation plot",stdout())
281
282 if(output_format_images == "pdf") {
283 pdf(output_BCVplot)
284 } else if(output_format_images == "svg") {
285 svg(output_BCVplot)
286 } else {
287 ## png(output_BCVplot)
288 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
289
290 bitmap(output_BCVplot,type="png16m")
291 }
292
293 plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance")
294 dev.off()
295 }
296
297
298 write("Fitting GLM...",stdout())
299 fit &lt;- glmFit(dge,design)
300
301 write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
302 cont &lt;- c(contrast)
303 cont &lt;- makeContrasts(contrasts=cont, levels=design)
304
305 lrt &lt;- glmLRT(fit, contrast=cont[,1])
306 write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout())
307 write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
308 write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
309
310 ## todo EXPORT FPKM
311 write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA)
312
313 if(output_MAplot != "/dev/null" || output_PValue_distribution_plot != "/dev/null") {
314 etable &lt;- topTags(lrt, n=nrow(dge))\$table
315 etable &lt;- etable[order(etable\$FDR), ]
316
317 if(output_MAplot != "/dev/null") {
318 write("Creating MA plot...",stdout())
319
320 if(output_format_images == "pdf") {
321 pdf(output_MAplot)
322 } else if(output_format_images == "svg") {
323 svg(output_MAplot)
324 } else {
325 ## png(output_MAplot)
326 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
327
328 bitmap(output_MAplot,type="png16m")
329 }
330
331 with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance"))
332 with(subset(etable, FDR &lt; fdr), points(logCPM, logFC, pch=20, col="red"))
333 abline(h=c(-1,1), col="blue")
334 dev.off()
335 }
336
337 if(output_PValue_distribution_plot != "/dev/null") {
338 write("Creating P-value distribution plot...",stdout())
339
340 if(output_format_images == "pdf") {
341 pdf(output_PValue_distribution_plot,width=14,height=14)
342 } else if(output_format_images == "svg") {
343 svg(output_PValue_distribution_plot,width=14,height=14)
344 } else {
345 ## png(output_PValue_distribution_plot)
346 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
347
348 bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14)
349 }
350
351 expressed_genes &lt;- subset(etable, PValue &lt; 0.99)
352 h &lt;- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (&lt; 0.99)")
353 center &lt;- sum(h\$counts) / length(h\$counts)
354 lines(c(0,1),c(center,center),lty=2,col="red",lwd=2)
355 k &lt;- ksmooth(h\$mid, h\$counts)
356 lines(k\$x,k\$y,col="red",lwd=2)
357 rmsd &lt;- (h\$counts) - center
358 rmsd &lt;- rmsd^2
359 rmsd &lt;- sum(rmsd)
360 rmsd &lt;- sqrt(rmsd)
361 text(0,max(h\$counts),paste("e=",round(rmsd,2),sep=""),pos=4,col="blue")
362 ## change e into epsilon somehow
363 dev.off()
364 }
365 }
366
367 if(output_heatmap_plot != "/dev/null") {
368
369 if(output_format_images == "pdf") {
370 pdf(output_heatmap_plot,width=10.5)
371 } else if(output_format_images == "svg") {
372 svg(output_heatmap_plot,width=10.5)
373 } else {
374 ## png(output_heatmap_plot)
375 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/
376
377 bitmap(output_heatmap_plot,type="png16m",width=10.5)
378 }
379
380 etable2 &lt;- topTags(lrt, n=100)\$table
381 order &lt;- rownames(etable2)
382 cpm_sub &lt;- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
383 heatmap(t(cpm_sub))
384 dev.off()
385 }
386
387 ##output_hierarchical_clustering_plot = args[13]
388
389 if(output_RData_obj != "/dev/null") {
390 save.image(output_RData_obj)
391 }
392
393 write("Done!",stdout())
394 }
395 </configfile>
396 </configfiles>
397
398 <inputs>
399 <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" />
400 <param name="design_matrix" type="data" format="tabular" label="Design matrix" help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
401
402 <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." />
403
404 <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" />
405
406 <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes">
407 <option value="make_output_raw_counts">Raw counts table</option>
408 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option>
409 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option>
410 <option value="make_output_BCVplot">BCV-plot</option>
411 <option value="make_output_MAplot">MA-plot</option>
412 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option>
413 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option>
414 <option value="make_output_heatmap_plot">Heatmap</option>
415
416 <option value="make_output_R_stdout">R stdout</option>
417 <option value="make_output_RData_obj">R Data object</option>
418 </param>
419
420 <param name="output_format_images" type="select" label="Output format of images" display="radio">
421 <option value="png">Portable network graphics (.png)</option>
422 <option value="pdf">Portable document format (.pdf)</option>
423 <option value="svg">Scalable vector graphics (.svg)</option>
424 </param>
425 </inputs>
426
427 <outputs>
428 <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - differentially expressed genes" />
429 <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" />
430
431 <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts">
432 <filter>outputs and ("make_output_raw_counts" in outputs)</filter>
433 </data>
434
435 <data format="png" name="output_MDSplot_logFC" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (logFC method)">
436 <filter>outputs and ("make_output_MDSplot_logFC" in outputs)</filter>
437
438 <change_format>
439 <when input="output_format_images" value="png" format="png" />
440 <when input="output_format_images" value="pdf" format="pdf" />
441 <when input="output_format_images" value="svg" format="svg" />
442 </change_format>
443 </data>
444
445 <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (bcv method)">
446 <filter>outputs and ("make_output_MDSplot_bcv" in outputs)</filter>
447
448 <change_format>
449 <when input="output_format_images" value="png" format="png" />
450 <when input="output_format_images" value="pdf" format="pdf" />
451 <when input="output_format_images" value="svg" format="svg" />
452 </change_format>
453 </data>
454
455 <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot">
456 <filter>outputs and ("make_output_BCVplot" in outputs)</filter>
457
458 <change_format>
459 <when input="output_format_images" value="png" format="png" />
460 <when input="output_format_images" value="pdf" format="pdf" />
461 <when input="output_format_images" value="svg" format="svg" />
462 </change_format>
463 </data>
464
465 <data format="png" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot">
466 <filter>outputs and ("make_output_MAplot" in outputs)</filter>
467
468 <change_format>
469 <when input="output_format_images" value="png" format="png" />
470 <when input="output_format_images" value="pdf" format="pdf" />
471 <when input="output_format_images" value="svg" format="svg" />
472 </change_format>
473 </data>
474
475 <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - P-Value distribution">
476 <filter>outputs and ("make_output_PValue_distribution_plot" in outputs)</filter>
477
478 <change_format>
479 <when input="output_format_images" value="png" format="png" />
480 <when input="output_format_images" value="pdf" format="pdf" />
481 <when input="output_format_images" value="svg" format="svg" />
482 </change_format>
483 </data>
484
485 <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Hierarchical custering">
486 <filter>outputs and ("make_output_hierarchical_clustering_plot" in outputs)</filter>
487
488 <change_format>
489 <when input="output_format_images" value="png" format="png" />
490 <when input="output_format_images" value="pdf" format="pdf" />
491 <when input="output_format_images" value="svg" format="svg" />
492 </change_format>
493 </data>
494
495 <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Heatmap">
496 <filter>outputs and ("make_output_heatmap_plot" in outputs)</filter>
497
498 <change_format>
499 <when input="output_format_images" value="png" format="png" />
500 <when input="output_format_images" value="pdf" format="pdf" />
501 <when input="output_format_images" value="svg" format="svg" />
502 </change_format>
503 </data>
504
505 <data format="RData" name="output_RData_obj" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R data object">
506 <filter>outputs and ("make_output_RData_obj" in outputs)</filter>
507 </data>
508
509 <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output (debug)" >
510 <filter>outputs and ("make_output_R_stdout" in outputs)</filter>
511 </data>
512 </outputs>
513
514 <tests>
515 <test>
516 <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" />
517 <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" />
518
519 <param name="contrast" value="E-C"/>
520
521 <param name="fdr" value="0.05" />
522
523 <param name="output_format_images" value="png" />
524
525 <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
526 </test>
527 </tests>
528
529 <help>
530 edgeR: Differential Gene(Expression) Analysis
531 #############################################
532
533 Overview
534 --------
535 Differential expression analysis of RNA-seq and digital gene expression profiles with biological replication. Uses empirical Bayes estimation and exact tests based on the negative binomial distribution. Also useful for differential signal analysis with other types of genome-scale count data [1].
536
537 For every experiment, the algorithm requires a design matrix. This matrix describes which samples belong to which groups.
538 More details on this are given in the edgeR manual: http://www.bioconductor.org/packages/2.12/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
539 and the limma manual.
540
541 Because the creation of a design matrix can be complex and time consuming, especially if no GUI is used, this package comes with an alternative tool which can help you with it.
542 This tool is called *edgeR Design Matrix Creator*.
543 If the appropriate design matrix (with corresponding links to the files) is given,
544 the correct contrast ( http://en.wikipedia.org/wiki/Contrast_(statistics) ) has to be given.
545
546 If you have for example two groups, with an equal weight, you would like to compare either
547 "g1-g2" or "normal-cancer".
548
549 The test function makes use of a MCF7 dataset used in a study that indicates that a higher sequencing depth is not neccesairily more important than a higher amount of replaciates[2].
550
551 Input
552 -----
553 Expression matrix
554 ^^^^^^^^^^^^^^^^^
555 ::
556
557 Geneid "\t" Sample-1 "\t" Sample-2 "\t" Sample-3 "\t" Sample-4 [...] "\n"
558 SMURF "\t" 123 "\t" 21 "\t" 34545 "\t" 98 ... "\n"
559 BRCA1 "\t" 435 "\t" 6655 "\t" 45 "\t" 55 ... "\n"
560 LINK33 "\t" 4 "\t" 645 "\t" 345 "\t" 1 ... "\n"
561 SNORD78 "\t" 498 "\t" 65 "\t" 98 "\t" 27 ... "\n"
562 [...]
563
564 *Note: Make sure the number of columns in the header is identical to the number of columns in the body.*
565
566 Design matrix
567 ^^^^^^^^^^^^^
568 ::
569
570 Sample "\t" Condition "\t" Ethnicity "\t" Patient "\t" Batch "\n"
571 Sample-1 "\t" Tumor "\t" European "\t" 1 "\t" 1 "\n"
572 Sample-2 "\t" Normal "\t" European "\t" 1 "\t" 1 "\n"
573 Sample-3 "\t" Tumor "\t" European "\t" 2 "\t" 1 "\n"
574 Sample-4 "\t" Normal "\t" European "\t" 2 "\t" 1 "\n"
575 Sample-5 "\t" Tumor "\t" African "\t" 3 "\t" 1 "\n"
576 Sample-6 "\t" Normal "\t" African "\t" 3 "\t" 1 "\n"
577 Sample-7 "\t" Tumor "\t" African "\t" 4 "\t" 2 "\n"
578 Sample-8 "\t" Normal "\t" African "\t" 4 "\t" 2 "\n"
579 Sample-9 "\t" Tumor "\t" Asian "\t" 5 "\t" 2 "\n"
580 Sample-10 "\t" Normal "\t" Asian "\t" 5 "\t" 2 "\n"
581 Sample-11 "\t" Tumor "\t" Asian "\t" 6 "\t" 2 "\n"
582 Sample-12 "\t" Normal "\t" Asian "\t" 6 "\t" 2 "\n"
583
584 *Note: Avoid factor names that are (1) numerical, (2) contain mathematical symbols and preferebly only use letters.*
585
586 Contrast
587 ^^^^^^^^
588 The contrast represents the biological question. There can be many questions asked, e.g.:
589
590 - Tumor-Normal
591 - African-European
592 - 0.5*(Control+Placebo) / Treated
593
594 Installation
595 ------------
596
597 This tool requires no specific configurations. The following dependencies are installed automatically:
598
599 - R
600 - Bioconductor
601 - limma
602 - edgeR
603
604 License
605 -------
606 - R
607 - GPL 2 &amp; GPL 3
608 - limma
609 - GPL (&gt;=2)
610 - edgeR
611 - GPL (&gt;=2)
612
613 References
614 ----------
615
616 EdgeR
617 ^^^^^
618 **[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.**
619
620 *Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140.
621
622 - http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html
623 - http://dx.doi.org/10.1093/bioinformatics/btp616
624 - http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
625
626 Test-data (MCF7)
627 ^^^^^^^^^^^^^^^^
628 **[2] RNA-seq differential expression studies: more sequence or more replication?**
629
630 *Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304.
631
632 - http://www.ncbi.nlm.nih.gov/pubmed/24319002
633 - http://dx.doi.org/10.1093/bioinformatics/btt688
634
635 Contact
636 -------
637
638 The tool wrapper has been written by Youri Hoogstrate from the Erasmus
639 Medical Center (Rotterdam, Netherlands) on behalf of the Translational
640 Research IT (TraIT) project:
641
642 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
643
644 More tools by the Translational Research IT (TraIT) project can be found
645 in the following toolsheds:
646
647 http://toolshed.dtls.nl/
648
649 http://toolshed.g2.bx.psu.edu
650
651 http://testtoolshed.g2.bx.psu.edu/
652
653 I would like to thank Hina Riaz - Naz Khan for her helpful contribution.
654 </help>
655
656 <citations>
657 <citation type="doi">10.1093/bioinformatics/btp616</citation>
658 <citation type="doi">10.1093/bioinformatics/btt688</citation>
659 </citations>
660 </tool>