# HG changeset patch # User fubar # Date 1416912607 18000 # Node ID 731315bd6e4889d631e447a3961634c9cecf2a62 # Parent 51f998262ada8971362d7fba31a99de22a78dde2 Uploaded diff -r 51f998262ada -r 731315bd6e48 rgedgeRpaired_nocamera.xml --- a/rgedgeRpaired_nocamera.xml Tue Nov 25 05:36:47 2014 -0500 +++ b/rgedgeRpaired_nocamera.xml Tue Nov 25 05:50:07 2014 -0500 @@ -8,7 +8,7 @@ - rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts" + rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "Differential_Counts" --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" @@ -22,12 +22,12 @@ + multiple="true" use_header_names="true" size="120" display="checkboxes" force_select="True"> + multiple="true" use_header_names="true" size="120" display="checkboxes" force_select="True"> Do not run VOOM - ",allgenes,"") } - print.noquote("# urls sample") - print.noquote(head(contigurls)) - print(paste("# Total low count contigs per sample = ",table(lo)),quote=F) + print.noquote(paste("@@ Total low count contigs per sample = ",paste(table(lo),collapse=','))) cmrowsums = rowSums(workCM) TName=unique(group)[1] CName=unique(group)[2] @@ -793,10 +736,26 @@ print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) print.noquote('Using design matrix:') print.noquote(mydesign) + normData = cpm(workCM)*1e6 + colnames(normData) = paste( colnames(workCM),'N',sep="_") + print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) + if (doedgeR == T) { - eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR) + eres = run_edgeR(workCM,pdata,subjects,group,priordf,robust_meth,mydesign,mt,cmrowsums,out_edgeR,nonzerod) myDGEList = eres\$myDGEList edgeRcounts = eres\$edgeRcounts + #### Plot MDS + sample_colors = match(group,levels(group)) + sampleTypes = levels(factor(group)) + print.noquote(sampleTypes) + pdf(paste("edgeR",mt,"MDSplot.pdf",sep='_')) + plotMDS.DGEList(myDGEList,main=paste("MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) + legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) + grid(col="blue") + dev.off() + scale <- myDGEList\$samples\$lib.size*myDGEList\$samples\$norm.factors + normCounts <- round(t(t(myDGEList\$counts)/scale)*mean(scale)) + try({boxPlot(rawrs=nzd,cleanrs=log2(normCounts+1),maint='Effects of TMM size normalisation',myTitle=myTitle,pdfname=paste("edgeR",mt,"raw_norm_counts_box.pdf",sep='_'))},T) } if (doDESeq2 == T) { DESeqcounts = run_DESeq2(workCM,pdata,subjects,group,out_DESeq2,mt,DESeq_fitType) } if (doVoom == T) { voomcounts = run_Voom(workCM,pdata,subjects,group,mydesign,mt,out_Voom) } @@ -823,25 +782,23 @@ if (nrow(counts.dataframe > 1)) { counts.venn = vennCounts(counts.dataframe) - vennf = paste("Venn",mt,"significant_genes_overlap.pdf",sep="_") + vennf = paste("Differential_venn",mt,"significant_genes_overlap.pdf",sep="_") pdf(vennf) vennDiagram(counts.venn,main=vennmain,col="maroon") dev.off() } } #### doDESeq2 or doVoom - +sink() } #### Done ]]> - -###sink(stdout(),append=T,type="message") builtin_gmt = "" history_gmt = "" history_gmt_name = "" out_edgeR = F out_DESeq2 = F out_Voom = "$out_VOOM" -edgeR_robust_meth = "ordinary" # control robust deviance options +edgeR_robust_meth = "ordinary" doDESeq2 = $DESeq2.doDESeq2 doVoom = $doVoom doCamera = F @@ -883,7 +840,7 @@ fdrtype = "$fdrtype" fdrthresh = $fdrthresh useNDF = $useNDF -fQ = $fQ +fQ = $fQ # non-differential centile cutoff myTitle = "$title" sids = strsplit("$subjectids",',') subjects = unlist(sids) @@ -895,12 +852,13 @@ cat('; CCols=') cat(CCols) cat('\n') + 0 & nsubj != nsamples) { options("show.error.messages"=T) mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','), 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=',')) @@ -908,21 +866,23 @@ quit(save="no",status=4) } if (length(subjects) != 0) {subjects = subjects[useCols]} -Count_Matrix = Count_Matrix[,useCols] +Count_Matrix = Count_Matrix[,useCols] ### reorder columns rn = rownames(Count_Matrix) islib = rn %in% c('librarySize','NotInBedRegions') LibSizes = Count_Matrix[subset(rn,islib),][1] # take first Count_Matrix = Count_Matrix[subset(rn,! islib),] group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) group = factor(group, levels=c(ControlName,TreatmentName)) -colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") +colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_Voom=out_Voom, out_DESeq2=out_DESeq2, fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', - myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, + myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,TreatmentName=TreatmentName,ControlName=ControlName, doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType,robust_meth=edgeR_robust_meth) sessionInfo() +sink() +]]> @@ -936,9 +896,11 @@ Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the -non-negative integer count of reads from one sample overlapping the feature. +non-negative integer count of reads from one sample overlapping the feature. + The matrix must have a header row uniquely identifying the source samples, and unique row names in the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods. +They must be unique and R names or they will be mangled - please read the fine R docs for the rules on identifiers. **Specifying comparisons**