comparison rgedgeR/rgedgeRpaired.xml @ 7:8c0405de0695 draft

Uploaded
author fubar
date Sat, 27 Jul 2013 01:25:20 -0400
parents f1e6a5f8a611
children a4edc1360ea9
comparison
equal deleted inserted replaced
6:cd586457aced 7:8c0405de0695
91 <option value="hommel">Hommel</option> 91 <option value="hommel">Hommel</option>
92 <option value="none">no control for multiple tests</option> 92 <option value="none">no control for multiple tests</option>
93 </param> 93 </param>
94 </inputs> 94 </inputs>
95 <outputs> 95 <outputs>
96 <data format="tabular" name="outtab" label="${title}.xls"/> 96 <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls">
97 <filter>edgeR.doedgeR == "T"</filter>
98 </data>
99 <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls">
100 <filter>DESeq2.doDESeq2 == "T"</filter>
101 </data>
102 <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls">
103 <filter>doVoom == "T"</filter>
104 </data>
97 <data format="html" name="html_file" label="${title}.html"/> 105 <data format="html" name="html_file" label="${title}.html"/>
98 </outputs> 106 </outputs>
99 <stdio> 107 <stdio>
100 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" /> 108 <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" />
101 </stdio> 109 </stdio>
365 sink() 373 sink()
366 } 374 }
367 375
368 376
369 377
370 edgeIt = function (Count_Matrix,group,outputfilename,fdrtype='fdr',priordf=5, 378 edgeIt = function (Count_Matrix,group,out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
371 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, 379 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
372 filterquantile=0.2, subjects=c(),mydesign=NULL, 380 filterquantile=0.2, subjects=c(),mydesign=NULL,
373 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', 381 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
374 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", 382 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
375 doCook=F,DESeq_fitType="parameteric") 383 doCook=F,DESeq_fitType="parameteric")
502 dev.off() 510 dev.off()
503 colnames(normData) = paste( colnames(normData),'N',sep="_") 511 colnames(normData) = paste( colnames(normData),'N',sep="_")
504 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) 512 print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
505 nzd = data.frame(log(nonzerod + 1e-2,10)) 513 nzd = data.frame(log(nonzerod + 1e-2,10))
506 boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") 514 boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf")
507 write.table(soutput,outputfilename, quote=FALSE, sep="\t",row.names=F) 515 write.table(soutput,out_edgeR, quote=FALSE, sep="\t",row.names=F)
508 tt = cbind( 516 tt = cbind(
509 Name=as.character(rownames(DGEList\$counts)), 517 Name=as.character(rownames(DGEList\$counts)),
510 DE\$table, 518 DE\$table,
511 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), 519 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
512 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums 520 Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums
557 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) 565 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
558 srDESeq = rDESeq[order(rDESeq\$pvalue),] 566 srDESeq = rDESeq[order(rDESeq\$pvalue),]
559 qqPlot(descr=paste(myTitle,'DESeq2 qqplot'),pvector=rDESeq\$pvalue,outpdf='DESeq2_qqplot.pdf') 567 qqPlot(descr=paste(myTitle,'DESeq2 qqplot'),pvector=rDESeq\$pvalue,outpdf='DESeq2_qqplot.pdf')
560 cat("# DESeq top 50\n") 568 cat("# DESeq top 50\n")
561 print.noquote(srDESeq[1:50,]) 569 print.noquote(srDESeq[1:50,])
562 write.table(srDESeq,paste(mt,'DESeq2_TopTable.xls',sep='_'), quote=FALSE, sep="\t",row.names=F) 570 write.table(srDESeq,out_DESeq2, quote=FALSE, sep="\t",row.names=F)
563 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] 571 topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ]
564 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) 572 DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq))
565 DESeqcounts = rep(0, length(allgenes)) 573 DESeqcounts = rep(0, length(allgenes))
566 DESeqcounts[DESeqcountsindex] = 1 574 DESeqcounts[DESeqcountsindex] = 1
567 pdf("DESeq2_dispersion_estimates.pdf") 575 pdf("DESeq2_dispersion_estimates.pdf")
603 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") 611 rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
604 qqPlot(descr=paste(myTitle,'VOOM-limma QQ plot'),pvector=rvoom\$P.Value,outpdf='VOOM_qqplot.pdf') 612 qqPlot(descr=paste(myTitle,'VOOM-limma QQ plot'),pvector=rvoom\$P.Value,outpdf='VOOM_qqplot.pdf')
605 rownames(rvoom) = rownames(workCM) 613 rownames(rvoom) = rownames(workCM)
606 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) 614 rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
607 srvoom = rvoom[order(rvoom\$P.Value),] 615 srvoom = rvoom[order(rvoom\$P.Value),]
608 write.table(srvoom,paste(mt,'VOOM_topTable.xls',sep='_'), quote=FALSE, sep="\t",row.names=F) 616 write.table(srvoom,out_VOOM, quote=FALSE, sep="\t",row.names=F)
609 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma 617 # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
610 topresults.voom = srvoom[which(rvoom\$adj.P.Val < fdrthresh), ] 618 topresults.voom = srvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
611 voomcountsindex = which(allgenes %in% topresults.voom\$ID) 619 voomcountsindex = which(allgenes %in% topresults.voom\$ID)
612 voomcounts = rep(0, length(allgenes)) 620 voomcounts = rep(0, length(allgenes))
613 voomcounts[voomcountsindex] = 1 621 voomcounts[voomcountsindex] = 1
649 #### Done 657 #### Done
650 658
651 ###sink(stdout(),append=T,type="message") 659 ###sink(stdout(),append=T,type="message")
652 builtin_gmt="" 660 builtin_gmt=""
653 history_gmt="" 661 history_gmt=""
662 out_edgeR = F
663 out_DESeq2 = F
664 out_VOOM = F
654 doDESeq2 = $DESeq2.doDESeq2 # make these T or F 665 doDESeq2 = $DESeq2.doDESeq2 # make these T or F
655 doVoom = $doVoom 666 doVoom = $doVoom
656 doCamera = F 667 doCamera = F
657 doedgeR = $edgeR.doedgeR 668 doedgeR = $edgeR.doedgeR
658 edgeR_priordf = 0 669 edgeR_priordf = 0
659 670
660 #if $DESeq2.doDESeq2 == "T" 671 #if $DESeq2.doDESeq2 == "T"
672 out_DESeq2 = "$out_DESeq2"
661 DESeq_fitType = "$DESeq2.DESeq_fitType" 673 DESeq_fitType = "$DESeq2.DESeq_fitType"
662 #end if 674 #end if
663 #if $edgeR.doedgeR == "T" 675 #if $edgeR.doedgeR == "T"
664 edgeR_priordf = $edgeR.edgeR_priordf 676 out_edgeR = "$out_edgeR"
677 edgeR_priordf = $edgeR.edgeR_priordf
665 #end if 678 #end if
666 679 #if $doVoom == "T"
680 out_VOOM = "$out_VOOM"
681 #end if
667 682
668 Out_Dir = "$html_file.files_path" 683 Out_Dir = "$html_file.files_path"
669 Input = "$input1" 684 Input = "$input1"
670 TreatmentName = "$treatment_name" 685 TreatmentName = "$treatment_name"
671 TreatmentCols = "$Treat_cols" 686 TreatmentCols = "$Treat_cols"
714 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first 729 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
715 Count_Matrix = Count_Matrix[subset(rn,! islib),] 730 Count_Matrix = Count_Matrix[subset(rn,! islib),]
716 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor 731 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
717 group = factor(group, levels=c(ControlName,TreatmentName)) 732 group = factor(group, levels=c(ControlName,TreatmentName))
718 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns 733 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns
719 results = edgeIt(Count_Matrix=Count_Matrix,group=group,outputfilename=outputfilename, 734 results = edgeIt(Count_Matrix=Count_Matrix,group=group,out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2,
720 fdrtype='BH',priordf=edgeR_priordf,fdrthresh=0.05,outputdir='.', 735 fdrtype='BH',priordf=edgeR_priordf,fdrthresh=0.05,outputdir='.',
721 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=c(), 736 myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=c(),
722 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, 737 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
723 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType) 738 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType)
724 sessionInfo() 739 sessionInfo()