Mercurial > repos > fubar > differential_count_models
comparison rgedgeRpaired_nocamera.xml @ 149:3107df74056e draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/differential_count_models commit 344140b8df53b8b7024618bb04594607a045c03a
author | iuc |
---|---|
date | Mon, 04 May 2015 22:47:36 -0400 |
parents | 474c08e747b6 |
children |
comparison
equal
deleted
inserted
replaced
148:1e20061decdd | 149:3107df74056e |
---|---|
5 <requirement type="package" version="3.1.2">R</requirement> | 5 <requirement type="package" version="3.1.2">R</requirement> |
6 <requirement type="package" version="1.3.18">graphicsmagick</requirement> | 6 <requirement type="package" version="1.3.18">graphicsmagick</requirement> |
7 <requirement type="package" version="9.10">ghostscript</requirement> | 7 <requirement type="package" version="9.10">ghostscript</requirement> |
8 <requirement type="package" version="2.14">biocbasics</requirement> | 8 <requirement type="package" version="2.14">biocbasics</requirement> |
9 </requirements> | 9 </requirements> |
10 <stdio> | |
11 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix"/> | |
12 </stdio> | |
10 <command interpreter="python"> | 13 <command interpreter="python"> |
11 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "Differential_Counts" | 14 rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "Differential_Counts" |
12 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" | 15 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" |
13 </command> | 16 </command> |
17 <configfiles> | |
18 <configfile name="runme"><![CDATA[ | |
19 # | |
20 # edgeR.Rscript | |
21 # updated feb 2014 adding outlier-robust deviance estimate options by ross for R 3.0.2/bioc 2.13 | |
22 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross | |
23 # Performs DGE on a count table containing n replicates of two conditions | |
24 # | |
25 # Parameters | |
26 # | |
27 # 1 - Output Dir | |
28 | |
29 # Original edgeR code by: S.Lunke and A.Kaspi | |
30 reallybig = log10(.Machine\$double.xmax) | |
31 reallysmall = log10(.Machine\$double.xmin) | |
32 library("gplots") | |
33 library("edgeR") | |
34 library('stringr') | |
35 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') | |
36 { | |
37 # Perform clustering for significant pvalues after controlling FWER | |
38 samples = colnames(cmat) | |
39 gu = unique(group) | |
40 gn = rownames(cmat) | |
41 if (length(gu) == 2) { | |
42 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"} | |
43 pcols = unlist(lapply(group,col.map)) | |
44 } else { | |
45 colours = rainbow(length(gu),start=0,end=4/6) | |
46 pcols = colours[match(group,gu)] } | |
47 dm = cmat[(! is.na(gn)),] | |
48 # remove unlabelled hm rows | |
49 nprobes = nrow(dm) | |
50 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance') | |
51 if (nprobes > nsamp) { | |
52 dm =dm[1:nsamp,] | |
53 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total') | |
54 } | |
55 newcolnames = substr(colnames(dm),1,20) | |
56 colnames(dm) = newcolnames | |
57 pdf(outpdfname) | |
58 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none', | |
59 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5) | |
60 dev.off() | |
61 } | |
62 | |
63 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here") | |
64 { | |
65 # for 2 groups only was | |
66 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"} | |
67 #pcols = unlist(lapply(group,col.map)) | |
68 gu = unique(group) | |
69 colours = rainbow(length(gu),start=0.3,end=0.6) | |
70 pcols = colours[match(group,gu)] | |
71 nrows = nrow(cmat) | |
72 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows) | |
73 if (nrows > nsamp) { | |
74 cmat = cmat[c(1:nsamp),] | |
75 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='') | |
76 } | |
77 newcolnames = substr(colnames(cmat),1,20) | |
78 colnames(cmat) = newcolnames | |
79 pdf(outpdfname) | |
80 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols) | |
81 dev.off() | |
82 } | |
83 | |
84 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...) | |
85 # stolen from https://gist.github.com/703512 | |
86 { | |
87 o = -log10(sort(pvector,decreasing=F)) | |
88 e = -log10( 1:length(o)/length(o) ) | |
89 o[o==-Inf] = reallysmall | |
90 o[o==Inf] = reallybig | |
91 maint = descr | |
92 pdf(outpdf) | |
93 plot(e,o,pch=19,cex=1, main=maint, ..., | |
94 xlab=expression(Expected~~-log[10](italic(p))), | |
95 ylab=expression(Observed~~-log[10](italic(p))), | |
96 xlim=c(0,max(e)), ylim=c(0,max(o))) | |
97 lines(e,e,col="red") | |
98 grid(col = "lightgray", lty = "dotted") | |
99 dev.off() | |
100 } | |
101 | |
102 smearPlot = function(myDGEList,deTags, outSmear, outMain) | |
103 { | |
104 pdf(outSmear) | |
105 plotSmear(myDGEList,de.tags=deTags,main=outMain) | |
106 grid(col="lightgray", lty="dotted") | |
107 dev.off() | |
108 } | |
109 | |
110 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname) | |
111 { | |
112 nc = ncol(rawrs) | |
113 ##### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA} | |
114 fullnames = colnames(rawrs) | |
115 newcolnames = substr(colnames(rawrs),1,20) | |
116 colnames(rawrs) = newcolnames | |
117 newcolnames = substr(colnames(cleanrs),1,20) | |
118 colnames(cleanrs) = newcolnames | |
119 defpar = par(no.readonly=T) | |
120 print.noquote('@@@ Raw contig counts by sample:') | |
121 print.noquote(summary(rawrs)) | |
122 print.noquote('@@@ Library size contig counts by sample:') | |
123 print.noquote(summary(cleanrs)) | |
124 pdf(pdfname) | |
125 par(mfrow=c(1,2)) | |
126 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main='log2 raw counts') | |
127 grid(col="lightgray",lty="dotted") | |
128 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('log2 counts after ',maint)) | |
129 grid(col="lightgray",lty="dotted") | |
130 dev.off() | |
131 pdfname = "sample_counts_histogram.pdf" | |
132 nc = ncol(rawrs) | |
133 print.noquote(paste('Using ncol rawrs=',nc)) | |
134 ncroot = round(sqrt(nc)) | |
135 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 } | |
136 m = c() | |
137 for (i in c(1:nc)) { | |
138 rhist = hist(rawrs[,i],breaks=100,plot=F) | |
139 m = append(m,max(rhist\$counts)) | |
140 } | |
141 ymax = max(m) | |
142 ncols = length(fullnames) | |
143 if (ncols > 20) | |
144 { | |
145 scale = 7*ncols/20 | |
146 pdf(pdfname,width=scale,height=scale) | |
147 } else { | |
148 pdf(pdfname) | |
149 } | |
150 par(mfrow=c(ncroot,ncroot)) | |
151 for (i in c(1:nc)) { | |
152 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", | |
153 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax)) | |
154 } | |
155 dev.off() | |
156 par(defpar) | |
157 | |
158 } | |
159 | |
160 cumPlot = function(rawrs,cleanrs,maint,myTitle) | |
161 { # updated to use ecdf | |
162 pdfname = "Differential_rowsum_bar_charts.pdf" | |
163 defpar = par(no.readonly=T) | |
164 lrs = log(rawrs,10) | |
165 lim = max(lrs) | |
166 pdf(pdfname) | |
167 par(mfrow=c(2,1)) | |
168 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)", | |
169 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1) | |
170 grid(col="lightgray", lty="dotted") | |
171 lrs = log(cleanrs,10) | |
172 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)", | |
173 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1) | |
174 grid(col="lightgray", lty="dotted") | |
175 dev.off() | |
176 par(defpar) | |
177 } | |
178 | |
179 cumPlot1 = function(rawrs,cleanrs,maint,myTitle) | |
180 { # updated to use ecdf | |
181 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_') | |
182 pdf(pdfname) | |
183 par(mfrow=c(2,1)) | |
184 lastx = max(rawrs) | |
185 rawe = knots(ecdf(rawrs)) | |
186 cleane = knots(ecdf(cleanrs)) | |
187 cy = 1:length(cleane)/length(cleane) | |
188 ry = 1:length(rawe)/length(rawe) | |
189 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads", | |
190 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) | |
191 grid(col="blue") | |
192 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads", | |
193 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) | |
194 grid(col="blue") | |
195 dev.off() | |
196 } | |
197 | |
198 | |
199 | |
200 doGSEAold = function(y=NULL,design=NULL,histgmt="", | |
201 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | |
202 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") | |
203 { | |
204 sink('Camera.log') | |
205 genesets = c() | |
206 if (bigmt > "") | |
207 { | |
208 bigenesets = readLines(bigmt) | |
209 genesets = bigenesets | |
210 } | |
211 if (histgmt > "") | |
212 { | |
213 hgenesets = readLines(histgmt) | |
214 if (bigmt > "") { | |
215 genesets = rbind(genesets,hgenesets) | |
216 } else { | |
217 genesets = hgenesets | |
218 } # use only history if no bi | |
219 } | |
220 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) | |
221 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n | |
222 outf = outfname | |
223 head=paste(myTitle,'edgeR GSEA') | |
224 write(head,file=outfname,append=F) | |
225 ntest=length(genesets) | |
226 urownames = toupper(rownames(y)) | |
227 upcam = c() | |
228 downcam = c() | |
229 for (i in 1:ntest) { | |
230 gs = unlist(genesets[i]) | |
231 g = gs[1] # geneset_id | |
232 u = gs[2] | |
233 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") } | |
234 glist = gs[3:length(gs)] # member gene symbols | |
235 glist = toupper(glist) | |
236 inglist = urownames %in% glist | |
237 nin = sum(inglist) | |
238 if ((nin > minnin) && (nin < maxnin)) { | |
239 ### print(paste('@@found',sum(inglist),'genes in glist')) | |
240 camres = camera(y=y,index=inglist,design=design) | |
241 if (! is.null(camres)) { | |
242 rownames(camres) = g # gene set name | |
243 camres = cbind(GeneSet=g,URL=u,camres) | |
244 if (camres\$Direction == "Up") | |
245 { | |
246 upcam = rbind(upcam,camres) } else { | |
247 downcam = rbind(downcam,camres) | |
248 } | |
249 } | |
250 } | |
251 } | |
252 uscam = upcam[order(upcam\$PValue),] | |
253 unadjp = uscam\$PValue | |
254 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
255 nup = max(10,sum((uscam\$adjPValue < fdrthresh))) | |
256 dscam = downcam[order(downcam\$PValue),] | |
257 unadjp = dscam\$PValue | |
258 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
259 ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) | |
260 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
261 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
262 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) | |
263 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) | |
264 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) | |
265 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) | |
266 sink() | |
267 } | |
268 | |
269 | |
270 | |
271 | |
272 doGSEA = function(y=NULL,design=NULL,histgmt="", | |
273 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | |
274 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") | |
275 { | |
276 sink('Camera.log') | |
277 genesets = c() | |
278 if (bigmt > "") | |
279 { | |
280 bigenesets = readLines(bigmt) | |
281 genesets = bigenesets | |
282 } | |
283 if (histgmt > "") | |
284 { | |
285 hgenesets = readLines(histgmt) | |
286 if (bigmt > "") { | |
287 genesets = rbind(genesets,hgenesets) | |
288 } else { | |
289 genesets = hgenesets | |
290 } # use only history if no bi | |
291 } | |
292 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) | |
293 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n | |
294 outf = outfname | |
295 head=paste(myTitle,'edgeR GSEA') | |
296 write(head,file=outfname,append=F) | |
297 ntest=length(genesets) | |
298 urownames = toupper(rownames(y)) | |
299 upcam = c() | |
300 downcam = c() | |
301 incam = c() | |
302 urls = c() | |
303 gsids = c() | |
304 for (i in 1:ntest) { | |
305 gs = unlist(genesets[i]) | |
306 gsid = gs[1] # geneset_id | |
307 url = gs[2] | |
308 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") } | |
309 glist = gs[3:length(gs)] # member gene symbols | |
310 glist = toupper(glist) | |
311 inglist = urownames %in% glist | |
312 nin = sum(inglist) | |
313 if ((nin > minnin) && (nin < maxnin)) { | |
314 incam = c(incam,inglist) | |
315 gsids = c(gsids,gsid) | |
316 urls = c(urls,url) | |
317 } | |
318 } | |
319 incam = as.list(incam) | |
320 names(incam) = gsids | |
321 allcam = camera(y=y,index=incam,design=design) | |
322 allcamres = cbind(geneset=gsids,allcam,URL=urls) | |
323 for (i in 1:ntest) { | |
324 camres = allcamres[i] | |
325 res = try(test = (camres\$Direction == "Up")) | |
326 if ("try-error" %in% class(res)) { | |
327 cat("test failed, camres = :") | |
328 print.noquote(camres) | |
329 } else { if (camres\$Direction == "Up") | |
330 { upcam = rbind(upcam,camres) | |
331 } else { downcam = rbind(downcam,camres) | |
332 } | |
333 | |
334 } | |
335 } | |
336 uscam = upcam[order(upcam\$PValue),] | |
337 unadjp = uscam\$PValue | |
338 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
339 nup = max(10,sum((uscam\$adjPValue < fdrthresh))) | |
340 dscam = downcam[order(downcam\$PValue),] | |
341 unadjp = dscam\$PValue | |
342 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
343 ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) | |
344 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
345 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
346 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) | |
347 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) | |
348 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) | |
349 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) | |
350 sink() | |
351 } | |
352 | |
353 | |
354 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5, | |
355 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, | |
356 filterquantile=0.2, subjects=c(),TreatmentName="Rx",ControlName="Ctrl",mydesign=NULL, | |
357 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', | |
358 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | |
359 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') | |
360 { | |
361 | |
362 logf = file('Differential.log', open = "a") | |
363 sink(logf,type = c("output", "message")) | |
364 | |
365 | |
366 run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod) | |
367 { | |
368 logf = file('edgeR.log', open = "a") | |
369 sink(logf,type = c("output", "message")) | |
370 #### Setup myDGEList object | |
371 myDGEList = DGEList(counts=workCM, group = group) | |
372 myDGEList = calcNormFactors(myDGEList) | |
373 if (robust_meth == 'ordinary') { | |
374 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) | |
375 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) | |
376 if (priordf > 0) { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf) | |
377 } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) } | |
378 comdisp = myDGEList\$common.dispersion | |
379 estpriorn = getPriorN(myDGEList) | |
380 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) | |
381 } else { | |
382 myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth) | |
383 } | |
384 | |
385 | |
386 DGLM = glmFit(myDGEList,design=mydesign) | |
387 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed | |
388 normData = cpm(myDGEList) | |
389 uoutput = cbind( | |
390 Name=as.character(rownames(myDGEList\$counts)), | |
391 DE\$table, | |
392 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
393 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData, | |
394 myDGEList\$counts | |
395 ) | |
396 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable | |
397 goodness = gof(DGLM, pcutoff=fdrthresh) | |
398 if (sum(goodness\$outlier) > 0) { | |
399 print.noquote('GLM outliers:') | |
400 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) | |
401 } else { | |
402 print('No GLM fit outlier genes found\n') | |
403 } | |
404 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) | |
405 pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_')) | |
406 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") | |
407 abline(0,1,lwd=3) | |
408 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") | |
409 dev.off() | |
410 uniqueg = unique(group) | |
411 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) | |
412 tt = cbind( | |
413 Name=as.character(rownames(myDGEList)), | |
414 DE\$table, | |
415 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
416 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums | |
417 ) | |
418 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely | |
419 stt = tt[order(DE\$table\$PValue),] | |
420 print.noquote("@@ edgeR Top tags\n") | |
421 print.noquote(stt[1:50,]) | |
422 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) | |
423 nsig = length(deTags) | |
424 print.noquote(paste('@@',nsig,'tags significant at adj p=',fdrthresh)) | |
425 deColours = ifelse(deTags,'red','black') | |
426 pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_")) | |
427 plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance") | |
428 dev.off() | |
429 dg = myDGEList[order(DE\$table\$PValue),] | |
430 outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_") | |
431 ocpm = normData[order(DE\$table\$PValue),] | |
432 ocpm = ocpm[c(1:100),] | |
433 hmap2(ocpm,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap')) | |
434 outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_") | |
435 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') | |
436 smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) | |
437 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_')) | |
438 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] | |
439 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) | |
440 edgeRcounts = rep(0, length(allgenes)) | |
441 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits | |
442 sink() | |
443 return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts)) | |
444 } ### run_edgeR | |
445 | |
446 | |
447 run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) | |
448 | |
449 { | |
450 logf = file("DESeq2.log", open = "a") | |
451 sink(logf,type = c("output", "message")) | |
452 # DESeq2 | |
453 require('DESeq2') | |
454 library('RColorBrewer') | |
455 if (length(subjects) == 0) | |
456 { | |
457 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM)) | |
458 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) | |
459 } else { | |
460 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) | |
461 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) | |
462 } | |
463 deSeqDatsizefac = estimateSizeFactors(deSEQds) | |
464 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) | |
465 resDESeq = nbinomWaldTest(deSeqDatdisp) | |
466 rDESeq = as.data.frame(results(resDESeq)) | |
467 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) | |
468 srDESeq = rDESeq[order(rDESeq\$pvalue),] | |
469 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_")) | |
470 cat("# DESeq top 50\n") | |
471 print.noquote(srDESeq[1:50,]) | |
472 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) | |
473 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] | |
474 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) | |
475 DESeqcounts = rep(0, length(allgenes)) | |
476 DESeqcounts[DESeqcountsindex] = 1 | |
477 pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_')) | |
478 plotDispEsts(resDESeq) | |
479 dev.off() | |
480 ysmall = abs(min(rDESeq\$log2FoldChange)) | |
481 ybig = abs(max(rDESeq\$log2FoldChange)) | |
482 ylimit = min(4,ysmall,ybig) | |
483 pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_")) | |
484 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) | |
485 dev.off() | |
486 rlogres = rlogTransformation(resDESeq) | |
487 sampledists = dist( t( assay(rlogres) ) ) | |
488 sdmat = as.matrix(sampledists) | |
489 pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_")) | |
490 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), | |
491 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) | |
492 dev.off() | |
493 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) | |
494 if ("try-error" %in% class(result)) { | |
495 print.noquote('DESeq2 plotPCA failed.') | |
496 } else { | |
497 pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_")) | |
498 #### wtf - print? Seems needed to get this to work | |
499 print(ppca) | |
500 dev.off() | |
501 } | |
502 sink() | |
503 return(DESeqcounts) | |
504 } | |
505 | |
506 | |
507 run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom) | |
508 { | |
509 logf = file('VOOM.log', open = "a") | |
510 sink(logf,type = c("output", "message")) | |
511 if (doedgeR == F) { | |
512 #### Setup myDGEList object | |
513 myDGEList = DGEList(counts=workCM, group = group) | |
514 myDGEList = calcNormFactors(myDGEList) | |
515 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) | |
516 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) | |
517 myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) | |
518 } | |
519 pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_')) | |
520 dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL) | |
521 dev.off() | |
522 # Use limma to fit data | |
523 fit = lmFit(dat.voomed, mydesign) | |
524 fit = eBayes(fit) | |
525 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") | |
526 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_')) | |
527 rownames(rvoom) = rownames(workCM) | |
528 rvoom = cbind(Contig=rownames(workCM),rvoom,NReads=cmrowsums,URL=contigurls) | |
529 srvoom = rvoom[order(rvoom\$P.Value),] | |
530 cat("# VOOM top 50\n") | |
531 print(srvoom[1:50,]) | |
532 write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F) | |
533 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma | |
534 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] | |
535 voomcountsindex <- which(allgenes %in% rownames(topresults.voom)) | |
536 voomcounts = rep(0, length(allgenes)) | |
537 voomcounts[voomcountsindex] = 1 | |
538 sink() | |
539 return(voomcounts) | |
540 } | |
541 | |
542 | |
543 #### data cleaning and analsis control starts here | |
544 | |
545 | |
546 # Error handling | |
547 nugroup = length(unique(group)) | |
548 if (nugroup!=2){ | |
549 print("Number of conditions identified in experiment does not equal 2") | |
550 q() | |
551 } | |
552 require(edgeR) | |
553 options(width = 512) | |
554 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") | |
555 allN = nrow(Count_Matrix) | |
556 nscut = round(ncol(Count_Matrix)/2) # half samples | |
557 colTotmillionreads = colSums(Count_Matrix)/1e6 | |
558 counts.dataframe = as.data.frame(c()) | |
559 rawrs = rowSums(Count_Matrix) | |
560 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes | |
561 nzN = nrow(nonzerod) | |
562 nzrs = rowSums(nonzerod) | |
563 zN = allN - nzN | |
564 print('@@@ Quantiles for non-zero row counts:',quote=F) | |
565 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F) | |
566 if (useNDF == T) | |
567 { | |
568 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut | |
569 lo = colSums(Count_Matrix[!gt1rpin3,]) | |
570 workCM = Count_Matrix[gt1rpin3,] | |
571 cleanrs = rowSums(workCM) | |
572 cleanN = length(cleanrs) | |
573 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="") | |
574 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F) | |
575 maint = paste('Filter >=1/million reads in >=',nscut,'samples') | |
576 } else { | |
577 useme = (nzrs > quantile(nzrs,filterquantile)) | |
578 workCM = nonzerod[useme,] | |
579 lo = colSums(nonzerod[!useme,]) | |
580 cleanrs = rowSums(workCM) | |
581 cleanN = length(cleanrs) | |
582 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="") | |
583 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F) | |
584 maint = paste('Filter below',filterquantile,'quantile') | |
585 } | |
586 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) | |
587 allgenes = rownames(workCM) | |
588 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp | |
589 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" | |
590 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') | |
591 testreg = str_match(allgenes,reg) | |
592 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string | |
593 { | |
594 print("@@ using ucsc substitution for urls") | |
595 contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>") | |
596 } else { | |
597 print("@@ using genecards substitution for urls") | |
598 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") | |
599 } | |
600 print.noquote(paste("@@ Total low count contigs per sample = ",paste(table(lo),collapse=','))) | |
601 cmrowsums = rowSums(workCM) | |
602 TName=unique(group)[1] | |
603 CName=unique(group)[2] | |
604 if (is.null(mydesign)) { | |
605 if (length(subjects) == 0) | |
606 { | |
607 mydesign = model.matrix(~group) | |
608 } | |
609 else { | |
610 subjf = factor(subjects) | |
611 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it | |
612 } | |
613 } | |
614 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) | |
615 print.noquote('Using design matrix:') | |
616 print.noquote(mydesign) | |
617 normData = cpm(workCM)*1e6 | |
618 colnames(normData) = paste( colnames(workCM),'N',sep="_") | |
619 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) | |
620 | |
621 if (doedgeR == T) { | |
622 eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod) | |
623 myDGEList = eres\$myDGEList | |
624 edgeRcounts = eres\$edgeRcounts | |
625 #### Plot MDS | |
626 sample_colors = match(group,levels(group)) | |
627 sampleTypes = levels(factor(group)) | |
628 print.noquote(sampleTypes) | |
629 pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_')) | |
630 plotMDS.DGEList(myDGEList,main=paste("MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) | |
631 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) | |
632 grid(col="blue") | |
633 dev.off() | |
634 scale <- myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors | |
635 normCounts <- round(t(t(myDGEList\$counts)/scale)*mean(scale)) | |
636 try({boxPlot(rawrs=nzd,cleanrs=log2(normCounts+1),maint='Effects of TMM size normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_'))},T) | |
637 } | |
638 if (doDESeq2 == T) { DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) } | |
639 if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) } | |
640 | |
641 | |
642 if (doCamera) { | |
643 doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, | |
644 outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) | |
645 } | |
646 counts.dataframe = c() | |
647 vennmain = 'no venn' | |
648 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) { | |
649 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) { | |
650 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh) | |
651 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, | |
652 VOOM_limma = voomcounts, row.names = allgenes) | |
653 } else if ((doDESeq2==T) && (doedgeR==T)) { | |
654 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh) | |
655 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes) | |
656 } else if ((doVoom==T) && (doedgeR==T)) { | |
657 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh) | |
658 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes) | |
659 } | |
660 | |
661 if (nrow(counts.dataframe > 1)) { | |
662 counts.venn = vennCounts(counts.dataframe) | |
663 vennf = paste("Differential_venn",mt,"significant_genes_overlap.pdf",sep="_") | |
664 pdf(vennf) | |
665 vennDiagram(counts.venn,main=vennmain,col="maroon") | |
666 dev.off() | |
667 } | |
668 } #### doDESeq2 or doVoom | |
669 sink() | |
670 } | |
671 #### Done | |
672 ]]> | |
673 builtin_gmt = "" | |
674 history_gmt = "" | |
675 history_gmt_name = "" | |
676 out_edgeR = F | |
677 out_DESeq2 = F | |
678 out_Voom = "$out_VOOM" | |
679 edgeR_robust_meth = "ordinary" | |
680 doDESeq2 = $DESeq2.doDESeq2 | |
681 doVoom = $doVoom | |
682 doCamera = F | |
683 doedgeR = $edgeR.doedgeR | |
684 edgeR_priordf = 10 | |
685 | |
686 | |
687 #if $doVoom == "T": | |
688 out_Voom = "$out_VOOM" | |
689 #end if | |
690 | |
691 #if $DESeq2.doDESeq2 == "T": | |
692 out_DESeq2 = "$out_DESeq2" | |
693 doDESeq2 = T | |
694 DESeq_fitType = "$DESeq2.DESeq_fitType" | |
695 #end if | |
696 | |
697 #if $edgeR.doedgeR == "T": | |
698 out_edgeR = "$out_edgeR" | |
699 edgeR_priordf = $edgeR.edgeR_priordf | |
700 edgeR_robust_meth = "$edgeR.edgeR_robust_method" | |
701 #end if | |
702 | |
703 | |
704 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0) | |
705 { | |
706 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr()) | |
707 quit(save="no",status=2) | |
708 } | |
709 | |
710 Out_Dir = "$html_file.files_path" | |
711 Input = "$input1" | |
712 TreatmentName = "$treatment_name" | |
713 TreatmentCols = "$Treat_cols" | |
714 ControlName = "$control_name" | |
715 ControlCols= "$Control_cols" | |
716 org = "$input1.dbkey" | |
717 if (org == "") { org = "hg19"} | |
718 fdrtype = "$fdrtype" | |
719 fdrthresh = $fdrthresh | |
720 useNDF = $useNDF | |
721 fQ = $fQ # non-differential centile cutoff | |
722 myTitle = "$title" | |
723 sids = strsplit("$subjectids",',') | |
724 subjects = unlist(sids) | |
725 nsubj = length(subjects) | |
726 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1 | |
727 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 | |
728 cat('Got TCols=') | |
729 cat(TCols) | |
730 cat('; CCols=') | |
731 cat(CCols) | |
732 cat('\n') | |
733 <![CDATA[ | |
734 useCols = c(TCols,CCols) | |
735 if (file.exists(Out_Dir) == F) dir.create(Out_Dir) | |
736 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') | |
737 snames = colnames(Count_Matrix) | |
738 nsamples = length(snames) | |
739 if (nsubj > 0 & nsubj != nsamples) { | |
740 options("show.error.messages"=T) | |
741 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','), | |
742 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=',')) | |
743 write(mess, stderr()) | |
744 quit(save="no",status=4) | |
745 } | |
746 if (length(subjects) != 0) {subjects = subjects[useCols]} | |
747 Count_Matrix = Count_Matrix[,useCols] ### reorder columns | |
748 rn = rownames(Count_Matrix) | |
749 islib = rn %in% c('librarySize','NotInBedRegions') | |
750 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first | |
751 Count_Matrix = Count_Matrix[subset(rn,! islib),] | |
752 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) | |
753 group = factor(group, levels=c(ControlName,TreatmentName)) | |
754 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") | |
755 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2, | |
756 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', | |
757 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,TreatmentName=TreatmentName,ControlName=ControlName, | |
758 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, | |
759 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth) | |
760 sessionInfo() | |
761 | |
762 sink() | |
763 ]]> | |
764 </configfile> | |
765 </configfiles> | |
14 <inputs> | 766 <inputs> |
15 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample" help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/> | 767 <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample" help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/> |
16 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain"> | 768 <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain"> |
17 <sanitizer invalid_char=""> | 769 <sanitizer invalid_char=""> |
18 <valid initial="string.letters,string.digits"> | 770 <valid initial="string.letters,string.digits"> |
91 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls"> | 843 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls"> |
92 <filter>doVoom == "T"</filter> | 844 <filter>doVoom == "T"</filter> |
93 </data> | 845 </data> |
94 <data format="html" name="html_file" label="${title}.html"/> | 846 <data format="html" name="html_file" label="${title}.html"/> |
95 </outputs> | 847 </outputs> |
96 <stdio> | |
97 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix"/> | |
98 </stdio> | |
99 <tests> | 848 <tests> |
100 <test> | 849 <test> |
101 <param name="input1" value="test_bams2mx.xls" ftype="tabular"/> | 850 <param name="input1" value="test_bams2mx.xls" ftype="tabular"/> |
102 <param name="treatment_name" value="liver"/> | 851 <param name="treatment_name" value="liver"/> |
103 <param name="title" value="edgeRtest"/> | 852 <param name="title" value="edgeRtest"/> |
115 <param name="Treat_cols" value="2,6,7,8"/> | 864 <param name="Treat_cols" value="2,6,7,8"/> |
116 <output name="out_edgeR" file="edgeRtest1out.xls" compare="diff" lines_diff="20"/> | 865 <output name="out_edgeR" file="edgeRtest1out.xls" compare="diff" lines_diff="20"/> |
117 <output name="html_file" file="edgeRtest1out.html" compare="diff" lines_diff="20"/> | 866 <output name="html_file" file="edgeRtest1out.html" compare="diff" lines_diff="20"/> |
118 </test> | 867 </test> |
119 </tests> | 868 </tests> |
120 <configfiles> | |
121 <configfile name="runme"><![CDATA[ | |
122 # | |
123 # edgeR.Rscript | |
124 # updated feb 2014 adding outlier-robust deviance estimate options by ross for R 3.0.2/bioc 2.13 | |
125 # updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross | |
126 # Performs DGE on a count table containing n replicates of two conditions | |
127 # | |
128 # Parameters | |
129 # | |
130 # 1 - Output Dir | |
131 | |
132 # Original edgeR code by: S.Lunke and A.Kaspi | |
133 reallybig = log10(.Machine\$double.xmax) | |
134 reallysmall = log10(.Machine\$double.xmin) | |
135 library("gplots") | |
136 library("edgeR") | |
137 library('stringr') | |
138 hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') | |
139 { | |
140 # Perform clustering for significant pvalues after controlling FWER | |
141 samples = colnames(cmat) | |
142 gu = unique(group) | |
143 gn = rownames(cmat) | |
144 if (length(gu) == 2) { | |
145 col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"} | |
146 pcols = unlist(lapply(group,col.map)) | |
147 } else { | |
148 colours = rainbow(length(gu),start=0,end=4/6) | |
149 pcols = colours[match(group,gu)] } | |
150 dm = cmat[(! is.na(gn)),] | |
151 # remove unlabelled hm rows | |
152 nprobes = nrow(dm) | |
153 # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance') | |
154 if (nprobes > nsamp) { | |
155 dm =dm[1:nsamp,] | |
156 #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total') | |
157 } | |
158 newcolnames = substr(colnames(dm),1,20) | |
159 colnames(dm) = newcolnames | |
160 pdf(outpdfname) | |
161 heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none', | |
162 Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5) | |
163 dev.off() | |
164 } | |
165 | |
166 hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here") | |
167 { | |
168 # for 2 groups only was | |
169 #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"} | |
170 #pcols = unlist(lapply(group,col.map)) | |
171 gu = unique(group) | |
172 colours = rainbow(length(gu),start=0.3,end=0.6) | |
173 pcols = colours[match(group,gu)] | |
174 nrows = nrow(cmat) | |
175 mtitle = paste(myTitle,'Heatmap: n contigs =',nrows) | |
176 if (nrows > nsamp) { | |
177 cmat = cmat[c(1:nsamp),] | |
178 mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='') | |
179 } | |
180 newcolnames = substr(colnames(cmat),1,20) | |
181 colnames(cmat) = newcolnames | |
182 pdf(outpdfname) | |
183 heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols) | |
184 dev.off() | |
185 } | |
186 | |
187 qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...) | |
188 # stolen from https://gist.github.com/703512 | |
189 { | |
190 o = -log10(sort(pvector,decreasing=F)) | |
191 e = -log10( 1:length(o)/length(o) ) | |
192 o[o==-Inf] = reallysmall | |
193 o[o==Inf] = reallybig | |
194 maint = descr | |
195 pdf(outpdf) | |
196 plot(e,o,pch=19,cex=1, main=maint, ..., | |
197 xlab=expression(Expected~~-log[10](italic(p))), | |
198 ylab=expression(Observed~~-log[10](italic(p))), | |
199 xlim=c(0,max(e)), ylim=c(0,max(o))) | |
200 lines(e,e,col="red") | |
201 grid(col = "lightgray", lty = "dotted") | |
202 dev.off() | |
203 } | |
204 | |
205 smearPlot = function(myDGEList,deTags, outSmear, outMain) | |
206 { | |
207 pdf(outSmear) | |
208 plotSmear(myDGEList,de.tags=deTags,main=outMain) | |
209 grid(col="lightgray", lty="dotted") | |
210 dev.off() | |
211 } | |
212 | |
213 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname) | |
214 { | |
215 nc = ncol(rawrs) | |
216 ##### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA} | |
217 fullnames = colnames(rawrs) | |
218 newcolnames = substr(colnames(rawrs),1,20) | |
219 colnames(rawrs) = newcolnames | |
220 newcolnames = substr(colnames(cleanrs),1,20) | |
221 colnames(cleanrs) = newcolnames | |
222 defpar = par(no.readonly=T) | |
223 print.noquote('@@@ Raw contig counts by sample:') | |
224 print.noquote(summary(rawrs)) | |
225 print.noquote('@@@ Library size contig counts by sample:') | |
226 print.noquote(summary(cleanrs)) | |
227 pdf(pdfname) | |
228 par(mfrow=c(1,2)) | |
229 boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main='log2 raw counts') | |
230 grid(col="lightgray",lty="dotted") | |
231 boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('log2 counts after ',maint)) | |
232 grid(col="lightgray",lty="dotted") | |
233 dev.off() | |
234 pdfname = "sample_counts_histogram.pdf" | |
235 nc = ncol(rawrs) | |
236 print.noquote(paste('Using ncol rawrs=',nc)) | |
237 ncroot = round(sqrt(nc)) | |
238 if (ncroot*ncroot < nc) { ncroot = ncroot + 1 } | |
239 m = c() | |
240 for (i in c(1:nc)) { | |
241 rhist = hist(rawrs[,i],breaks=100,plot=F) | |
242 m = append(m,max(rhist\$counts)) | |
243 } | |
244 ymax = max(m) | |
245 ncols = length(fullnames) | |
246 if (ncols > 20) | |
247 { | |
248 scale = 7*ncols/20 | |
249 pdf(pdfname,width=scale,height=scale) | |
250 } else { | |
251 pdf(pdfname) | |
252 } | |
253 par(mfrow=c(ncroot,ncroot)) | |
254 for (i in c(1:nc)) { | |
255 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", | |
256 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax)) | |
257 } | |
258 dev.off() | |
259 par(defpar) | |
260 | |
261 } | |
262 | |
263 cumPlot = function(rawrs,cleanrs,maint,myTitle) | |
264 { # updated to use ecdf | |
265 pdfname = "Differential_rowsum_bar_charts.pdf" | |
266 defpar = par(no.readonly=T) | |
267 lrs = log(rawrs,10) | |
268 lim = max(lrs) | |
269 pdf(pdfname) | |
270 par(mfrow=c(2,1)) | |
271 hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)", | |
272 ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1) | |
273 grid(col="lightgray", lty="dotted") | |
274 lrs = log(cleanrs,10) | |
275 hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)", | |
276 ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1) | |
277 grid(col="lightgray", lty="dotted") | |
278 dev.off() | |
279 par(defpar) | |
280 } | |
281 | |
282 cumPlot1 = function(rawrs,cleanrs,maint,myTitle) | |
283 { # updated to use ecdf | |
284 pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_') | |
285 pdf(pdfname) | |
286 par(mfrow=c(2,1)) | |
287 lastx = max(rawrs) | |
288 rawe = knots(ecdf(rawrs)) | |
289 cleane = knots(ecdf(cleanrs)) | |
290 cy = 1:length(cleane)/length(cleane) | |
291 ry = 1:length(rawe)/length(rawe) | |
292 plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads", | |
293 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) | |
294 grid(col="blue") | |
295 plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads", | |
296 ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) | |
297 grid(col="blue") | |
298 dev.off() | |
299 } | |
300 | |
301 | |
302 | |
303 doGSEAold = function(y=NULL,design=NULL,histgmt="", | |
304 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | |
305 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") | |
306 { | |
307 sink('Camera.log') | |
308 genesets = c() | |
309 if (bigmt > "") | |
310 { | |
311 bigenesets = readLines(bigmt) | |
312 genesets = bigenesets | |
313 } | |
314 if (histgmt > "") | |
315 { | |
316 hgenesets = readLines(histgmt) | |
317 if (bigmt > "") { | |
318 genesets = rbind(genesets,hgenesets) | |
319 } else { | |
320 genesets = hgenesets | |
321 } # use only history if no bi | |
322 } | |
323 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) | |
324 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n | |
325 outf = outfname | |
326 head=paste(myTitle,'edgeR GSEA') | |
327 write(head,file=outfname,append=F) | |
328 ntest=length(genesets) | |
329 urownames = toupper(rownames(y)) | |
330 upcam = c() | |
331 downcam = c() | |
332 for (i in 1:ntest) { | |
333 gs = unlist(genesets[i]) | |
334 g = gs[1] # geneset_id | |
335 u = gs[2] | |
336 if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") } | |
337 glist = gs[3:length(gs)] # member gene symbols | |
338 glist = toupper(glist) | |
339 inglist = urownames %in% glist | |
340 nin = sum(inglist) | |
341 if ((nin > minnin) && (nin < maxnin)) { | |
342 ### print(paste('@@found',sum(inglist),'genes in glist')) | |
343 camres = camera(y=y,index=inglist,design=design) | |
344 if (! is.null(camres)) { | |
345 rownames(camres) = g # gene set name | |
346 camres = cbind(GeneSet=g,URL=u,camres) | |
347 if (camres\$Direction == "Up") | |
348 { | |
349 upcam = rbind(upcam,camres) } else { | |
350 downcam = rbind(downcam,camres) | |
351 } | |
352 } | |
353 } | |
354 } | |
355 uscam = upcam[order(upcam\$PValue),] | |
356 unadjp = uscam\$PValue | |
357 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
358 nup = max(10,sum((uscam\$adjPValue < fdrthresh))) | |
359 dscam = downcam[order(downcam\$PValue),] | |
360 unadjp = dscam\$PValue | |
361 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
362 ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) | |
363 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
364 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
365 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) | |
366 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) | |
367 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) | |
368 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) | |
369 sink() | |
370 } | |
371 | |
372 | |
373 | |
374 | |
375 doGSEA = function(y=NULL,design=NULL,histgmt="", | |
376 bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | |
377 ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") | |
378 { | |
379 sink('Camera.log') | |
380 genesets = c() | |
381 if (bigmt > "") | |
382 { | |
383 bigenesets = readLines(bigmt) | |
384 genesets = bigenesets | |
385 } | |
386 if (histgmt > "") | |
387 { | |
388 hgenesets = readLines(histgmt) | |
389 if (bigmt > "") { | |
390 genesets = rbind(genesets,hgenesets) | |
391 } else { | |
392 genesets = hgenesets | |
393 } # use only history if no bi | |
394 } | |
395 print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) | |
396 genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n | |
397 outf = outfname | |
398 head=paste(myTitle,'edgeR GSEA') | |
399 write(head,file=outfname,append=F) | |
400 ntest=length(genesets) | |
401 urownames = toupper(rownames(y)) | |
402 upcam = c() | |
403 downcam = c() | |
404 incam = c() | |
405 urls = c() | |
406 gsids = c() | |
407 for (i in 1:ntest) { | |
408 gs = unlist(genesets[i]) | |
409 gsid = gs[1] # geneset_id | |
410 url = gs[2] | |
411 if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") } | |
412 glist = gs[3:length(gs)] # member gene symbols | |
413 glist = toupper(glist) | |
414 inglist = urownames %in% glist | |
415 nin = sum(inglist) | |
416 if ((nin > minnin) && (nin < maxnin)) { | |
417 incam = c(incam,inglist) | |
418 gsids = c(gsids,gsid) | |
419 urls = c(urls,url) | |
420 } | |
421 } | |
422 incam = as.list(incam) | |
423 names(incam) = gsids | |
424 allcam = camera(y=y,index=incam,design=design) | |
425 allcamres = cbind(geneset=gsids,allcam,URL=urls) | |
426 for (i in 1:ntest) { | |
427 camres = allcamres[i] | |
428 res = try(test = (camres\$Direction == "Up")) | |
429 if ("try-error" %in% class(res)) { | |
430 cat("test failed, camres = :") | |
431 print.noquote(camres) | |
432 } else { if (camres\$Direction == "Up") | |
433 { upcam = rbind(upcam,camres) | |
434 } else { downcam = rbind(downcam,camres) | |
435 } | |
436 | |
437 } | |
438 } | |
439 uscam = upcam[order(upcam\$PValue),] | |
440 unadjp = uscam\$PValue | |
441 uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
442 nup = max(10,sum((uscam\$adjPValue < fdrthresh))) | |
443 dscam = downcam[order(downcam\$PValue),] | |
444 unadjp = dscam\$PValue | |
445 dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) | |
446 ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) | |
447 write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
448 write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) | |
449 print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) | |
450 write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) | |
451 print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) | |
452 write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) | |
453 sink() | |
454 } | |
455 | |
456 | |
457 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5, | |
458 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, | |
459 filterquantile=0.2, subjects=c(),TreatmentName="Rx",ControlName="Ctrl",mydesign=NULL, | |
460 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', | |
461 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", | |
462 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary') | |
463 { | |
464 | |
465 logf = file('Differential.log', open = "a") | |
466 sink(logf,type = c("output", "message")) | |
467 | |
468 | |
469 run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod) | |
470 { | |
471 logf = file('edgeR.log', open = "a") | |
472 sink(logf,type = c("output", "message")) | |
473 #### Setup myDGEList object | |
474 myDGEList = DGEList(counts=workCM, group = group) | |
475 myDGEList = calcNormFactors(myDGEList) | |
476 if (robust_meth == 'ordinary') { | |
477 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) | |
478 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) | |
479 if (priordf > 0) { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign,prior.df = priordf) | |
480 } else { myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) } | |
481 comdisp = myDGEList\$common.dispersion | |
482 estpriorn = getPriorN(myDGEList) | |
483 print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) | |
484 } else { | |
485 myDGEList = estimateGLMRobustDisp(myDGEList,design=mydesign, prior.df = priordf, maxit = 6, residual.type = robust_meth) | |
486 } | |
487 | |
488 | |
489 DGLM = glmFit(myDGEList,design=mydesign) | |
490 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed | |
491 normData = cpm(myDGEList) | |
492 uoutput = cbind( | |
493 Name=as.character(rownames(myDGEList\$counts)), | |
494 DE\$table, | |
495 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
496 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData, | |
497 myDGEList\$counts | |
498 ) | |
499 soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable | |
500 goodness = gof(DGLM, pcutoff=fdrthresh) | |
501 if (sum(goodness\$outlier) > 0) { | |
502 print.noquote('GLM outliers:') | |
503 print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) | |
504 } else { | |
505 print('No GLM fit outlier genes found\n') | |
506 } | |
507 z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) | |
508 pdf(paste("edgeR",mt,"GoodnessofFit.pdf",sep='_')) | |
509 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") | |
510 abline(0,1,lwd=3) | |
511 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") | |
512 dev.off() | |
513 uniqueg = unique(group) | |
514 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) | |
515 tt = cbind( | |
516 Name=as.character(rownames(myDGEList)), | |
517 DE\$table, | |
518 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), | |
519 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums | |
520 ) | |
521 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely | |
522 stt = tt[order(DE\$table\$PValue),] | |
523 print.noquote("@@ edgeR Top tags\n") | |
524 print.noquote(stt[1:50,]) | |
525 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) | |
526 nsig = length(deTags) | |
527 print.noquote(paste('@@',nsig,'tags significant at adj p=',fdrthresh)) | |
528 deColours = ifelse(deTags,'red','black') | |
529 pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_")) | |
530 plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance") | |
531 dev.off() | |
532 dg = myDGEList[order(DE\$table\$PValue),] | |
533 outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_") | |
534 ocpm = normData[order(DE\$table\$PValue),] | |
535 ocpm = ocpm[c(1:100),] | |
536 hmap2(ocpm,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap')) | |
537 outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_") | |
538 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') | |
539 smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) | |
540 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_')) | |
541 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] | |
542 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) | |
543 edgeRcounts = rep(0, length(allgenes)) | |
544 edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits | |
545 sink() | |
546 return(list(myDGEList=myDGEList,edgeRcounts=edgeRcounts)) | |
547 } ### run_edgeR | |
548 | |
549 | |
550 run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) | |
551 | |
552 { | |
553 logf = file("DESeq2.log", open = "a") | |
554 sink(logf,type = c("output", "message")) | |
555 # DESeq2 | |
556 require('DESeq2') | |
557 library('RColorBrewer') | |
558 if (length(subjects) == 0) | |
559 { | |
560 pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM)) | |
561 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) | |
562 } else { | |
563 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) | |
564 deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) | |
565 } | |
566 deSeqDatsizefac = estimateSizeFactors(deSEQds) | |
567 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) | |
568 resDESeq = nbinomWaldTest(deSeqDatdisp) | |
569 rDESeq = as.data.frame(results(resDESeq)) | |
570 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) | |
571 srDESeq = rDESeq[order(rDESeq\$pvalue),] | |
572 qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf=paste('DESeq2',mt,'qqplot.pdf',sep="_")) | |
573 cat("# DESeq top 50\n") | |
574 print.noquote(srDESeq[1:50,]) | |
575 write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) | |
576 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] | |
577 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) | |
578 DESeqcounts = rep(0, length(allgenes)) | |
579 DESeqcounts[DESeqcountsindex] = 1 | |
580 pdf(paste("DESeq2",mt,"dispersion_estimates.pdf",sep='_')) | |
581 plotDispEsts(resDESeq) | |
582 dev.off() | |
583 ysmall = abs(min(rDESeq\$log2FoldChange)) | |
584 ybig = abs(max(rDESeq\$log2FoldChange)) | |
585 ylimit = min(4,ysmall,ybig) | |
586 pdf(paste("DESeq2",mt,"MA_plot.pdf",sep="_")) | |
587 plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) | |
588 dev.off() | |
589 rlogres = rlogTransformation(resDESeq) | |
590 sampledists = dist( t( assay(rlogres) ) ) | |
591 sdmat = as.matrix(sampledists) | |
592 pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_")) | |
593 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), | |
594 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) | |
595 dev.off() | |
596 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) | |
597 if ("try-error" %in% class(result)) { | |
598 print.noquote('DESeq2 plotPCA failed.') | |
599 } else { | |
600 pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_")) | |
601 #### wtf - print? Seems needed to get this to work | |
602 print(ppca) | |
603 dev.off() | |
604 } | |
605 sink() | |
606 return(DESeqcounts) | |
607 } | |
608 | |
609 | |
610 run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom) | |
611 { | |
612 logf = file('VOOM.log', open = "a") | |
613 sink(logf,type = c("output", "message")) | |
614 if (doedgeR == F) { | |
615 #### Setup myDGEList object | |
616 myDGEList = DGEList(counts=workCM, group = group) | |
617 myDGEList = calcNormFactors(myDGEList) | |
618 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign) | |
619 myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign) | |
620 myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign) | |
621 } | |
622 pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_')) | |
623 dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL) | |
624 dev.off() | |
625 # Use limma to fit data | |
626 fit = lmFit(dat.voomed, mydesign) | |
627 fit = eBayes(fit) | |
628 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") | |
629 qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_')) | |
630 rownames(rvoom) = rownames(workCM) | |
631 rvoom = cbind(Contig=rownames(workCM),rvoom,NReads=cmrowsums,URL=contigurls) | |
632 srvoom = rvoom[order(rvoom\$P.Value),] | |
633 cat("# VOOM top 50\n") | |
634 print(srvoom[1:50,]) | |
635 write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F) | |
636 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma | |
637 topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] | |
638 voomcountsindex <- which(allgenes %in% rownames(topresults.voom)) | |
639 voomcounts = rep(0, length(allgenes)) | |
640 voomcounts[voomcountsindex] = 1 | |
641 sink() | |
642 return(voomcounts) | |
643 } | |
644 | |
645 | |
646 #### data cleaning and analsis control starts here | |
647 | |
648 | |
649 # Error handling | |
650 nugroup = length(unique(group)) | |
651 if (nugroup!=2){ | |
652 print("Number of conditions identified in experiment does not equal 2") | |
653 q() | |
654 } | |
655 require(edgeR) | |
656 options(width = 512) | |
657 mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") | |
658 allN = nrow(Count_Matrix) | |
659 nscut = round(ncol(Count_Matrix)/2) # half samples | |
660 colTotmillionreads = colSums(Count_Matrix)/1e6 | |
661 counts.dataframe = as.data.frame(c()) | |
662 rawrs = rowSums(Count_Matrix) | |
663 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes | |
664 nzN = nrow(nonzerod) | |
665 nzrs = rowSums(nonzerod) | |
666 zN = allN - nzN | |
667 print('@@@ Quantiles for non-zero row counts:',quote=F) | |
668 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F) | |
669 if (useNDF == T) | |
670 { | |
671 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut | |
672 lo = colSums(Count_Matrix[!gt1rpin3,]) | |
673 workCM = Count_Matrix[gt1rpin3,] | |
674 cleanrs = rowSums(workCM) | |
675 cleanN = length(cleanrs) | |
676 meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="") | |
677 print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F) | |
678 maint = paste('Filter >=1/million reads in >=',nscut,'samples') | |
679 } else { | |
680 useme = (nzrs > quantile(nzrs,filterquantile)) | |
681 workCM = nonzerod[useme,] | |
682 lo = colSums(nonzerod[!useme,]) | |
683 cleanrs = rowSums(workCM) | |
684 cleanN = length(cleanrs) | |
685 meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="") | |
686 print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F) | |
687 maint = paste('Filter below',filterquantile,'quantile') | |
688 } | |
689 cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) | |
690 allgenes = rownames(workCM) | |
691 reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" # ucsc chr:start-end regexp | |
692 genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" | |
693 ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') | |
694 testreg = str_match(allgenes,reg) | |
695 if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string | |
696 { | |
697 print("@@ using ucsc substitution for urls") | |
698 contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>") | |
699 } else { | |
700 print("@@ using genecards substitution for urls") | |
701 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") | |
702 } | |
703 print.noquote(paste("@@ Total low count contigs per sample = ",paste(table(lo),collapse=','))) | |
704 cmrowsums = rowSums(workCM) | |
705 TName=unique(group)[1] | |
706 CName=unique(group)[2] | |
707 if (is.null(mydesign)) { | |
708 if (length(subjects) == 0) | |
709 { | |
710 mydesign = model.matrix(~group) | |
711 } | |
712 else { | |
713 subjf = factor(subjects) | |
714 mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it | |
715 } | |
716 } | |
717 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) | |
718 print.noquote('Using design matrix:') | |
719 print.noquote(mydesign) | |
720 normData = cpm(workCM)*1e6 | |
721 colnames(normData) = paste( colnames(workCM),'N',sep="_") | |
722 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) | |
723 | |
724 if (doedgeR == T) { | |
725 eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod) | |
726 myDGEList = eres\$myDGEList | |
727 edgeRcounts = eres\$edgeRcounts | |
728 #### Plot MDS | |
729 sample_colors = match(group,levels(group)) | |
730 sampleTypes = levels(factor(group)) | |
731 print.noquote(sampleTypes) | |
732 pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_')) | |
733 plotMDS.DGEList(myDGEList,main=paste("MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) | |
734 legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) | |
735 grid(col="blue") | |
736 dev.off() | |
737 scale <- myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors | |
738 normCounts <- round(t(t(myDGEList\$counts)/scale)*mean(scale)) | |
739 try({boxPlot(rawrs=nzd,cleanrs=log2(normCounts+1),maint='Effects of TMM size normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_'))},T) | |
740 } | |
741 if (doDESeq2 == T) { DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) } | |
742 if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) } | |
743 | |
744 | |
745 if (doCamera) { | |
746 doGSEA(y=myDGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, | |
747 outfname=paste("GSEA_Camera",mt,"table.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) | |
748 } | |
749 counts.dataframe = c() | |
750 vennmain = 'no venn' | |
751 if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) { | |
752 if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) { | |
753 vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh) | |
754 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, | |
755 VOOM_limma = voomcounts, row.names = allgenes) | |
756 } else if ((doDESeq2==T) && (doedgeR==T)) { | |
757 vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh) | |
758 counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes) | |
759 } else if ((doVoom==T) && (doedgeR==T)) { | |
760 vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh) | |
761 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes) | |
762 } | |
763 | |
764 if (nrow(counts.dataframe > 1)) { | |
765 counts.venn = vennCounts(counts.dataframe) | |
766 vennf = paste("Differential_venn",mt,"significant_genes_overlap.pdf",sep="_") | |
767 pdf(vennf) | |
768 vennDiagram(counts.venn,main=vennmain,col="maroon") | |
769 dev.off() | |
770 } | |
771 } #### doDESeq2 or doVoom | |
772 sink() | |
773 } | |
774 #### Done | |
775 ]]> | |
776 builtin_gmt = "" | |
777 history_gmt = "" | |
778 history_gmt_name = "" | |
779 out_edgeR = F | |
780 out_DESeq2 = F | |
781 out_Voom = "$out_VOOM" | |
782 edgeR_robust_meth = "ordinary" | |
783 doDESeq2 = $DESeq2.doDESeq2 | |
784 doVoom = $doVoom | |
785 doCamera = F | |
786 doedgeR = $edgeR.doedgeR | |
787 edgeR_priordf = 10 | |
788 | |
789 | |
790 #if $doVoom == "T": | |
791 out_Voom = "$out_VOOM" | |
792 #end if | |
793 | |
794 #if $DESeq2.doDESeq2 == "T": | |
795 out_DESeq2 = "$out_DESeq2" | |
796 doDESeq2 = T | |
797 DESeq_fitType = "$DESeq2.DESeq_fitType" | |
798 #end if | |
799 | |
800 #if $edgeR.doedgeR == "T": | |
801 out_edgeR = "$out_edgeR" | |
802 edgeR_priordf = $edgeR.edgeR_priordf | |
803 edgeR_robust_meth = "$edgeR.edgeR_robust_method" | |
804 #end if | |
805 | |
806 | |
807 if (sum(c(doedgeR,doVoom,doDESeq2)) == 0) | |
808 { | |
809 write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr()) | |
810 quit(save="no",status=2) | |
811 } | |
812 | |
813 Out_Dir = "$html_file.files_path" | |
814 Input = "$input1" | |
815 TreatmentName = "$treatment_name" | |
816 TreatmentCols = "$Treat_cols" | |
817 ControlName = "$control_name" | |
818 ControlCols= "$Control_cols" | |
819 org = "$input1.dbkey" | |
820 if (org == "") { org = "hg19"} | |
821 fdrtype = "$fdrtype" | |
822 fdrthresh = $fdrthresh | |
823 useNDF = $useNDF | |
824 fQ = $fQ # non-differential centile cutoff | |
825 myTitle = "$title" | |
826 sids = strsplit("$subjectids",',') | |
827 subjects = unlist(sids) | |
828 nsubj = length(subjects) | |
829 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1 | |
830 CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 | |
831 cat('Got TCols=') | |
832 cat(TCols) | |
833 cat('; CCols=') | |
834 cat(CCols) | |
835 cat('\n') | |
836 <![CDATA[ | |
837 useCols = c(TCols,CCols) | |
838 if (file.exists(Out_Dir) == F) dir.create(Out_Dir) | |
839 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') | |
840 snames = colnames(Count_Matrix) | |
841 nsamples = length(snames) | |
842 if (nsubj > 0 & nsubj != nsamples) { | |
843 options("show.error.messages"=T) | |
844 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','), | |
845 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=',')) | |
846 write(mess, stderr()) | |
847 quit(save="no",status=4) | |
848 } | |
849 if (length(subjects) != 0) {subjects = subjects[useCols]} | |
850 Count_Matrix = Count_Matrix[,useCols] ### reorder columns | |
851 rn = rownames(Count_Matrix) | |
852 islib = rn %in% c('librarySize','NotInBedRegions') | |
853 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first | |
854 Count_Matrix = Count_Matrix[subset(rn,! islib),] | |
855 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) | |
856 group = factor(group, levels=c(ControlName,TreatmentName)) | |
857 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") | |
858 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2, | |
859 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', | |
860 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,TreatmentName=TreatmentName,ControlName=ControlName, | |
861 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, | |
862 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth) | |
863 sessionInfo() | |
864 | |
865 sink() | |
866 ]]> | |
867 </configfile> | |
868 </configfiles> | |
869 <help> | 869 <help> |
870 | 870 |
871 **What it does** | 871 **What it does** |
872 | 872 |
873 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes. | 873 Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes. |
874 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design. | 874 Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design. |
875 | 875 |
876 **Input** | 876 **Input** |
877 | 877 |
878 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper | 878 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper |
879 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the | 879 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the |
880 non-negative integer count of reads from one sample overlapping the feature. | 880 non-negative integer count of reads from one sample overlapping the feature. |
881 | 881 |
882 The matrix must have a header row uniquely identifying the source samples, and unique row names in | 882 The matrix must have a header row uniquely identifying the source samples, and unique row names in |
883 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods. | 883 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods. |
884 They must be unique and R names or they will be mangled - please read the fine R docs for the rules on identifiers. | 884 They must be unique and R names or they will be mangled - please read the fine R docs for the rules on identifiers. |
885 | 885 |
886 **Specifying comparisons** | 886 **Specifying comparisons** |
887 | 887 |
888 This is basically dumbed down for two factors - case vs control. | 888 This is basically dumbed down for two factors - case vs control. |
889 | 889 |
890 More complex interfaces are possible but painful at present. | 890 More complex interfaces are possible but painful at present. |
891 Probably need to specify a phenotype file to do this better. | 891 Probably need to specify a phenotype file to do this better. |
892 Work in progress. Send code. | 892 Work in progress. Send code. |
893 | 893 |
894 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples), | 894 If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples), |
895 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or | 895 put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or |
896 A list of integers, one for each subject or an empty string if samples are all independent. | 896 A list of integers, one for each subject or an empty string if samples are all independent. |
897 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix. | 897 If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix. |
898 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used. | 898 Integers for samples that are not in the analysis *must* be present in the string as filler even if not used. |
899 | 899 |
900 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones | 900 So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones |
901 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use | 901 eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use |
902 8,9,1,1,2,2 | 902 8,9,1,1,2,2 |
903 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6 | 903 as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6 |
904 | 904 |
905 **Methods available** | 905 **Methods available** |
906 | 906 |
907 You can run 3 popular Bioconductor packages available for count data. | 907 You can run 3 popular Bioconductor packages available for count data. |
914 | 914 |
915 and optionally camera in edgeR which works better if MSigDB is installed. | 915 and optionally camera in edgeR which works better if MSigDB is installed. |
916 | 916 |
917 **Outputs** | 917 **Outputs** |
918 | 918 |
919 Some helpful plots and analysis results. Note that most of these are produced using R code | 919 Some helpful plots and analysis results. Note that most of these are produced using R code |
920 suggested by the excellent documentation and vignettes for the Bioconductor | 920 suggested by the excellent documentation and vignettes for the Bioconductor |
921 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy. | 921 packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy. |
922 | 922 |
923 **Note on Voom** | 923 **Note on Voom** |
924 | 924 |
959 vooma is a similar function but for microarrays instead of RNA-seq. | 959 vooma is a similar function but for microarrays instead of RNA-seq. |
960 | 960 |
961 | 961 |
962 ***old rant on changes to Bioconductor package variable names between versions*** | 962 ***old rant on changes to Bioconductor package variable names between versions*** |
963 | 963 |
964 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) | 964 The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) |
965 breaking this and all other code that assumed the old name for this variable, | 965 breaking this and all other code that assumed the old name for this variable, |
966 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). | 966 between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). |
967 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing | 967 This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing |
968 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly | 968 to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly |
969 when their old scripts break. This tool currently now works with 2.4.6. | 969 when their old scripts break. This tool currently now works with 2.4.6. |
970 | 970 |
971 **Note on prior.N** | 971 **Note on prior.N** |
972 | 972 |
973 http://seqanswers.com/forums/showthread.php?t=5591 says: | 973 http://seqanswers.com/forums/showthread.php?t=5591 says: |
974 | 974 |
975 *prior.n* | 975 *prior.n* |
976 | 976 |
977 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. | 977 The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. |
978 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood | 978 You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood |
979 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your | 979 in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your |
980 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the | 980 tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the |
981 common likelihood the weight of one observation. | 981 common likelihood the weight of one observation. |
982 | 982 |
983 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, | 983 In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, |
984 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that | 984 or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that |
985 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation | 985 you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation |
986 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment? | 986 (squeezing) of the tagwise dispersions. How many samples do you have in your experiment? |
987 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. | 987 What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. |
988 If you have more samples, then the tagwise dispersion estimates will be more reliable, | 988 If you have more samples, then the tagwise dispersion estimates will be more reliable, |
989 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. | 989 so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. |
990 | 990 |
991 | 991 |
992 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes: | 992 From Bioconductor Digest, Vol 118, Issue 5, Gordon writes: |
993 | 993 |
994 Dear Dorota, | 994 Dear Dorota, |
1021 | 1021 |
1022 ---- | 1022 ---- |
1023 | 1023 |
1024 **Attributions** | 1024 **Attributions** |
1025 | 1025 |
1026 edgeR - edgeR_ | 1026 edgeR - edgeR_ |
1027 | 1027 |
1028 VOOM/limma - limma_VOOM_ | 1028 VOOM/limma - limma_VOOM_ |
1029 | 1029 |
1030 DESeq2 - DESeq2_ for details | 1030 DESeq2 - DESeq2_ for details |
1031 | 1031 |
1032 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package. | 1032 See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package. |
1033 | 1033 |
1034 Galaxy_ (that's what you are using right now!) for gluing everything together | 1034 Galaxy_ (that's what you are using right now!) for gluing everything together |
1035 | 1035 |
1036 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is | 1036 Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is |
1037 licensed to you under the LGPL_ like other rgenetics artefacts | 1037 licensed to you under the LGPL_ like other rgenetics artefacts |
1038 | 1038 |
1039 .. _LGPL: http://www.gnu.org/copyleft/lesser.html | 1039 .. _LGPL: http://www.gnu.org/copyleft/lesser.html |
1040 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html | 1040 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html |
1041 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html | 1041 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html |