differential_count_models: rgedgeRpaired

comparison rgedgeRpaired_nocamera.xml @ 124:731315bd6e48 draft

Uploaded

author	fubar
date	Tue, 25 Nov 2014 05:50:07 -0500
parents	51f998262ada
children	999d4b5939bb

comparison

equal deleted inserted replaced

-:51f998262ada
+:731315bd6e48
 <requirement type="package" version="9.10">ghostscript</requirement>
 <requirement type="package" version="2.14">biocbasics</requirement>
 </requirements>
 <command interpreter="python">
-rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts"
+rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "Differential_Counts"
 --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
 </command>
 <inputs>
 <param name="input1"  type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
 help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
 </sanitizer>
 </param>
 <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
 <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True"
-multiple="true" use_header_names="true" size="120" display="checkboxes">
+multiple="true" use_header_names="true" size="120" display="checkboxes"  force_select="True">
 <validator type="no_options" message="Please select at least one column."/>
 </param>
 <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
 <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True"
-multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
+multiple="true" use_header_names="true" size="120" display="checkboxes"  force_select="True">
 </param>
 <param name="subjectids" type="text" optional="true" size="120" value = ""
 label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input"
 help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'">
 <sanitizer>
 label="Run the same model with Voom/limma and compare findings"
 help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma">
 <option value="F" selected="true">Do not run VOOM</option>
 <option value="T">Run VOOM</option>
 </param>
-<!--
-<conditional name="camera">
-<param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets"
-help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
-<option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
-<option value="T">Run GSEA tests with the Camera algorithm</option>
-</param>
-<when value="T">
-<conditional name="gmtSource">
-<param name="refgmtSource" type="select"
-label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
-<option value="indexed" selected="true">Use a built-in gene set</option>
-<option value="history">Use a gene set from my history</option>
-<option value="both">Add a gene set from my history to a built in gene set</option>
-</param>
-<when value="indexed">
-<param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
-<options from_data_table="gseaGMT_3.1">
-<filter type="sort_by" column="2" />
-<validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
-</options>
-</param>
-</when>
-<when value="history">
-<param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
-</when>
-<when value="both">
-<param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
-<param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
-<options from_data_table="gseaGMT_4">
-<filter type="sort_by" column="2" />
-<validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/>
-</options>
-</param>
-</when>
-</conditional>
-</when>
-<when value="F">
-</when>
-</conditional>
--->
 <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
 help="Conventional default value of 0.05 recommended"/>
 <param name="fdrtype" type="select" label="FDR (Type II error) control method"
 help="Use fdr or bh typically to control for the number of tests in a reliable way">
 <option value="fdr" selected="true">fdr</option>
 grid(col="lightgray", lty="dotted")
 dev.off()
 }
 boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname)
-{   #
+{
 nc = ncol(rawrs)
-for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
+##### for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
 fullnames = colnames(rawrs)
 newcolnames = substr(colnames(rawrs),1,20)
 colnames(rawrs) = newcolnames
 newcolnames = substr(colnames(cleanrs),1,20)
 colnames(cleanrs) = newcolnames
 defpar = par(no.readonly=T)
-print.noquote('raw contig counts by sample:')
+print.noquote('@@@ Raw contig counts by sample:')
 print.noquote(summary(rawrs))
-print.noquote('normalised contig counts by sample:')
+print.noquote('@@@ Library size contig counts by sample:')
 print.noquote(summary(cleanrs))
 pdf(pdfname)
 par(mfrow=c(1,2))
-boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
+boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main='log2 raw counts')
 grid(col="lightgray",lty="dotted")
-boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
+boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('log2 counts after ',maint))
 grid(col="lightgray",lty="dotted")
 dev.off()
 pdfname = "sample_counts_histogram.pdf"
 nc = ncol(rawrs)
 print.noquote(paste('Using ncol rawrs=',nc))
 }
 cumPlot = function(rawrs,cleanrs,maint,myTitle)
 {   # updated to use ecdf
-pdfname = "Filtering_rowsum_bar_charts.pdf"
+pdfname = "Differential_rowsum_bar_charts.pdf"
 defpar = par(no.readonly=T)
 lrs = log(rawrs,10)
 lim = max(lrs)
 pdf(pdfname)
 par(mfrow=c(2,1))
 }
 edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_Voom=F,out_DESeq2=F,fdrtype='fdr',priordf=5,
 fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F,
-filterquantile=0.2, subjects=c(),mydesign=NULL,
+filterquantile=0.2, subjects=c(),TreatmentName="Rx",ControlName="Ctrl",mydesign=NULL,
 doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19',
 histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
 doCook=F,DESeq_fitType="parameteric",robust_meth='ordinary')
 {
+logf = file('Differential.log', open = "a")
-run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
+sink(logf,type = c("output", "message"))
+run_edgeR = function(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod)
 {
-sink('edgeR.log')
+logf = file('edgeR.log', open = "a")
+sink(logf,type = c("output", "message"))
 #### Setup myDGEList object
 myDGEList = DGEList(counts=workCM, group = group)
 myDGEList = calcNormFactors(myDGEList)
 if (robust_meth == 'ordinary') {
 myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
 }
 DGLM = glmFit(myDGEList,design=mydesign)
 DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed
-efflib = myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors
 normData = cpm(myDGEList)
-### normData = (1e+06*myDGEList\$counts/efflib)
 uoutput = cbind(
 Name=as.character(rownames(myDGEList\$counts)),
 DE\$table,
 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums,normData,
 qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
 abline(0,1,lwd=3)
 points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
 dev.off()
 uniqueg = unique(group)
-#### Plot MDS
-sample_colors =  match(group,levels(group))
-sampleTypes = levels(factor(group))
-print.noquote(sampleTypes)
-pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_'))
-plotMDS.DGEList(myDGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
-legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
-grid(col="blue")
-dev.off()
-colnames(normData) = paste( colnames(normData),'N',sep="_")
-print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
-nzd = data.frame(log(nonzerod + 1e-2,10))
-try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_') ))
 write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F)
 tt = cbind(
 Name=as.character(rownames(myDGEList)),
 DE\$table,
 adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
 Dispersion=myDGEList\$tagwise.dispersion,totreads=cmrowsums
 )
-print.noquote("# edgeR Top tags\n")
+print.noquote("@@ edgeR Top tags\n")
 tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely
-tt = tt[order(DE\$table\$PValue),]
 print.noquote(tt[1:50,])
 deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
 nsig = length(deTags)
-print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F)
+print.noquote(paste('@@',nsig,'tags significant at adj p=',fdrthresh))
 deColours = ifelse(deTags,'red','black')
 pdf(paste("edgeR",mt,"BCV_vs_abundance.pdf",sep="_"))
 plotBCV(myDGEList, cex=0.3, main="Biological CV vs abundance")
 dev.off()
 dg = myDGEList[order(DE\$table\$PValue),]
-#normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg)))
 outpdfname= paste("edgeR",mt,"top_100_heatmap.pdf",sep="_")
-hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap'))
+ocpm = normData[order(DE\$table\$PValue),]
+ocpm = ocpm[c(1:100),]
+hmap2(ocpm,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste(myTitle,'Heatmap'))
 outSmear = paste("edgeR",mt,"smearplot.pdf",sep="_")
 outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
 smearPlot(myDGEList=myDGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
 qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf=paste('edgeR',mt,'qqplot.pdf',sep='_'))
-norm.factor = myDGEList\$samples\$norm.factors
 topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ]
 edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR))
 edgeRcounts = rep(0, length(allgenes))
 edgeRcounts[edgeRcountsindex] = 1  # Create venn diagram of hits
 sink()
 run_DESeq2 = function(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType)
 {
-sink("DESeq2.log")
+logf = file("DESeq2.log", open = "a")
+sink(logf,type = c("output", "message"))
 # DESeq2
 require('DESeq2')
 library('RColorBrewer')
 if (length(subjects) == 0)
 {
 deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
 } else {
 pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
 deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
 }
-#DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype)
-#rDESeq = results(DESeq2)
-#newCountDataSet(workCM, group)
 deSeqDatsizefac = estimateSizeFactors(deSEQds)
 deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType)
 resDESeq = nbinomWaldTest(deSeqDatdisp)
 rDESeq = as.data.frame(results(resDESeq))
 rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls)
 sdmat = as.matrix(sampledists)
 pdf(paste("DESeq2",mt,"sample_distance_plot.pdf",sep="_"))
 heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"),
 col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
 dev.off()
-###outpdfname=paste("DESeq2",mt,"top50_heatmap.pdf",sep="_")
-###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle))
-sink()
 result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) )
 if ("try-error" %in% class(result)) {
 print.noquote('DESeq2 plotPCA failed.')
 } else {
 pdf(paste("DESeq2",mt,"PCA_plot.pdf",sep="_"))
 #### wtf - print? Seems needed to get this to work
 print(ppca)
 dev.off()
 }
+sink()
 return(DESeqcounts)
 }
 run_Voom = function(workCM,pdata,subjects,group,mydesign,mt,out_Voom)
 {
-sink('VOOM.log')
+logf = file('VOOM.log', open = "a")
-if (doedgeR == F) {
+sink(logf,type = c("output", "message"))
-#### Setup myDGEList object
+if (doedgeR == F) {
-myDGEList = DGEList(counts=workCM, group = group)
+#### Setup myDGEList object
-myDGEList = calcNormFactors(myDGEList)
+myDGEList = DGEList(counts=workCM, group = group)
-myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
+myDGEList = calcNormFactors(myDGEList)
-myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
+myDGEList = estimateGLMCommonDisp(myDGEList,mydesign)
-myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign)
+myDGEList = estimateGLMTrendedDisp(myDGEList,mydesign)
-}
+myDGEList = estimateGLMTagwiseDisp(myDGEList,mydesign)
-pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_'))
+}
-dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
+pdf(paste("VOOM",mt,"mean_variance_plot.pdf",sep='_'))
-dev.off()
+dat.voomed <- voom(myDGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL)
-# Use limma to fit data
+dev.off()
-fit = lmFit(dat.voomed, mydesign)
+# Use limma to fit data
-fit = eBayes(fit)
+fit = lmFit(dat.voomed, mydesign)
-rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
+fit = eBayes(fit)
-qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_'))
+rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none")
-rownames(rvoom) = rownames(workCM)
+qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf=paste('VOOM',mt,'qqplot.pdf',sep='_'))
-rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls)
+rownames(rvoom) = rownames(workCM)
-srvoom = rvoom[order(rvoom\$P.Value),]
+rvoom = cbind(Contig=rownames(workCM),rvoom,NReads=cmrowsums,URL=contigurls)
-cat("# VOOM top 50\n")
+srvoom = rvoom[order(rvoom\$P.Value),]
-print(srvoom[1:50,])
+cat("# VOOM top 50\n")
-write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F)
+print(srvoom[1:50,])
-# Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
+write.table(srvoom,file=out_Voom, quote=FALSE, sep="\t",row.names=F)
-topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
+# Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma
-voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
+topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
-voomcounts = rep(0, length(allgenes))
+voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
-voomcounts[voomcountsindex] = 1
+voomcounts = rep(0, length(allgenes))
-sink()
+voomcounts[voomcountsindex] = 1
-return(voomcounts)
+sink()
-}
+return(voomcounts)
+}
 #### data cleaning and analsis control starts here
 # Error handling
 nugroup = length(unique(group))
 if (nugroup!=2){
 print("Number of conditions identified in experiment does not equal 2")
 rawrs = rowSums(Count_Matrix)
 nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes
 nzN = nrow(nonzerod)
 nzrs = rowSums(nonzerod)
 zN = allN - nzN
-print('# Quantiles for non-zero row counts:',quote=F)
+print('@@@ Quantiles for non-zero row counts:',quote=F)
 print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
 if (useNDF == T)
 {
 gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
 lo = colSums(Count_Matrix[!gt1rpin3,])
 contigurls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>")
 } else {
 print("@@ using genecards substitution for urls")
 contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>")
 }
-print.noquote("# urls sample")
+print.noquote(paste("@@ Total low count contigs per sample = ",paste(table(lo),collapse=',')))
-print.noquote(head(contigurls))
-print(paste("# Total low count contigs per sample = ",table(lo)),quote=F)
 cmrowsums = rowSums(workCM)
 TName=unique(group)[1]
 CName=unique(group)[2]
 if (is.null(mydesign)) {
 if (length(subjects) == 0)
 }
 }
 print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
 print.noquote('Using design matrix:')
 print.noquote(mydesign)
+normData = cpm(workCM)*1e6
+colnames(normData) = paste( colnames(workCM),'N',sep="_")
+print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=',')))
 if (doedgeR == T) {
-eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR)
+eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod)
 myDGEList = eres\$myDGEList
 edgeRcounts = eres\$edgeRcounts
+#### Plot MDS
+sample_colors =  match(group,levels(group))
+sampleTypes = levels(factor(group))
+print.noquote(sampleTypes)
+pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_'))
+plotMDS.DGEList(myDGEList,main=paste("MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
+legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
+grid(col="blue")
+dev.off()
+scale <- myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors
+normCounts <- round(t(t(myDGEList\$counts)/scale)*mean(scale))
+try({boxPlot(rawrs=nzd,cleanrs=log2(normCounts+1),maint='Effects of TMM size normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_'))},T)
 }
 if (doDESeq2 == T) {  DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) }
 if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) }
 counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
 }
 if (nrow(counts.dataframe > 1)) {
 counts.venn = vennCounts(counts.dataframe)
-vennf = paste("Venn",mt,"significant_genes_overlap.pdf",sep="_")
+vennf = paste("Differential_venn",mt,"significant_genes_overlap.pdf",sep="_")
 pdf(vennf)
 vennDiagram(counts.venn,main=vennmain,col="maroon")
 dev.off()
 }
 } #### doDESeq2 or doVoom
+sink()
 }
 #### Done
 ]]>
-###sink(stdout(),append=T,type="message")
 builtin_gmt = ""
 history_gmt = ""
 history_gmt_name = ""
 out_edgeR = F
 out_DESeq2 = F
 out_Voom = "$out_VOOM"
-edgeR_robust_meth = "ordinary" # control robust deviance options
+edgeR_robust_meth = "ordinary"
 doDESeq2 = $DESeq2.doDESeq2
 doVoom = $doVoom
 doCamera = F
 doedgeR = $edgeR.doedgeR
 edgeR_priordf = 10
 org = "$input1.dbkey"
 if (org == "") { org = "hg19"}
 fdrtype = "$fdrtype"
 fdrthresh = $fdrthresh
 useNDF = $useNDF
-fQ = $fQ
+fQ = $fQ # non-differential centile cutoff
 myTitle = "$title"
 sids = strsplit("$subjectids",',')
 subjects = unlist(sids)
 nsubj = length(subjects)
 TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
 cat('Got TCols=')
 cat(TCols)
 cat('; CCols=')
 cat(CCols)
 cat('\n')
+<![CDATA[
 useCols = c(TCols,CCols)
 if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
 Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t')
 snames = colnames(Count_Matrix)
 nsamples = length(snames)
-if (nsubj &gt; 0 &amp; nsubj != nsamples) {
+if (nsubj >  0 & nsubj != nsamples) {
 options("show.error.messages"=T)
 mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
 write(mess, stderr())
 quit(save="no",status=4)
 }
 if (length(subjects) != 0) {subjects = subjects[useCols]}
-Count_Matrix = Count_Matrix[,useCols]
+Count_Matrix = Count_Matrix[,useCols] ### reorder columns
 rn = rownames(Count_Matrix)
 islib = rn %in% c('librarySize','NotInBedRegions')
 LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
 Count_Matrix = Count_Matrix[subset(rn,! islib),]
 group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )
 group = factor(group, levels=c(ControlName,TreatmentName))
 colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")
 results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2,
 fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.',
-myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
+myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,TreatmentName=TreatmentName,ControlName=ControlName,
 doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org,
 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth)
 sessionInfo()
+sink()
+]]>
 </configfile>
 </configfiles>
 <help>
 **What it does**
 **Input**
 Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper
 and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the
 non-negative integer count of reads from one sample overlapping the feature.
 The matrix must have a header row uniquely identifying the source samples, and unique row names in
 the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods.
+They must be unique and R names or they will be mangled - please read the fine R docs for the rules on identifiers.
 **Specifying comparisons**
 This is basically dumbed down for two factors - case vs control.

Mercurial > repos > fubar > differential_count_models

comparison rgedgeRpaired_nocamera.xml @ 124:731315bd6e48 draft