Mercurial > repos > yhoogstrate > edger_with_design_matrix
view edgeR_DGE.xml @ 7:61e42740b13a draft
Uploaded
author | yhoogstrate |
---|---|
date | Thu, 09 Jan 2014 04:38:21 -0500 |
parents | 149a52c74f39 |
children | b8b3d63b60ab |
line wrap: on
line source
<?xml version="1.0" encoding="UTF-8"?> <tool id="edger_dge" name="edgeR Differential GeneExpression Analysis"> <description>RNA-Seq expression analysis using edgeR (R package)</description> <command> <!-- The following script is written in the "Cheetah" language: http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html --> R CMD BATCH --vanilla --slave '--args $design_matrix $contrast $output_count_edgeR $output_cpm output_FPXM $output_raw_counts $qc $output_MDSplot $output_BCVplot $output_MAplot smearPlot ' $R_script $output_R </command> <inputs> <param name="design_matrix" type="data" format="tabular" help="Design matrix" /> <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info." /> <param name="qc" type="select" label="Quality control reports"> <option value="true">Yes</option> <option value="false" selected="true">No</option> </param> <param name="debug" type="select" label="R Debug output"> <option value="true" selected="true"> Yes</option> <option value="false">No</option> </param> </inputs> <configfiles> <configfile name="R_script"> library(edgeR) ## Fetch commandline arguments args <- commandArgs(trailingOnly = TRUE) designmatrix = args[1] contrast = args[2] output_1 = args[3] output_2 = args[4] output_3 = args[5] ##FPKM file - to be implemented output_4 = args[6] QC = nchar(args[7]) > 0 output_5 = args[8] output_6 = args[9] output_7 = args[10] output_8 = args[11] library(edgeR) raw_data <- read.delim(designmatrix,header=T,stringsAsFactors=T) ## Obtain read-counts read_counts = read.delim(as.character(raw_data[1,1]),header=F,stringsAsFactors=F,row.names=1) for(i in 2:length(raw_data[,1])) { print("parsing counts from:") print(raw_data[i,1]) read_counts = cbind(read_counts,read.delim(as.character(raw_data[i,1]),header=F,stringsAsFactors=F,row.names=1)) print(i) } ## Filter for HTSeq predifined counts: exclude_HTSeq = c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique") exclude_DEXSeq = c("_ambiguous","_empty","_lowaqual","_notaligned") exclude = match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts)) exclude = exclude[is.na(exclude)==0] if(length(exclude) != 0) { read_counts = read_counts[-exclude,] } colnames(read_counts) = raw_data[,2] dge = DGEList(counts=read_counts,genes=rownames(read_counts)) design_tmp <- raw_data[3:length(raw_data)] rownames(design_tmp) <- colnames(dge) formula = paste(c("~0",colnames(design_tmp)),collapse = " + ") design <- model.matrix(as.formula(formula),design_tmp) prefixes = colnames(design_tmp)[attr(design,"assign")] avoid = nchar(prefixes) == nchar(colnames(design)) replacements = substr(colnames(design),nchar(prefixes)+1,nchar(colnames(design))) replacements[avoid] = colnames(design)[avoid] colnames(design) = replacements print("Calculating normalization factors...") dge = calcNormFactors(dge) print("Estimating common dispersion...") dge = estimateGLMCommonDisp(dge,design) print("Estimating trended dispersion...") dge = estimateGLMTrendedDisp(dge,design) print("Estimating tagwise dispersion...") dge = estimateGLMTagwiseDisp(dge,design) if (QC == TRUE) { print("Creating QC plots...") #### MDS Plot pdf(output_5) plotMDS(dge, main="edgeR MDS Plot") dev.off() #### Biological coefficient of variation plot pdf(output_6) plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") dev.off() } print("Fitting GLM...") fit = glmFit(dge,design) print(paste("Performing likelihood ratio test: ",contrast,sep="")) cont <- c(contrast) cont <- makeContrasts(contrasts=cont, levels=design) lrt <- glmLRT(fit, contrast=cont[,1]) print(paste("Exporting to file: ",output_1,sep="")) write.table(file=output_1,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=T) write.table(file=output_2,cpm(dge,normalized.lib.sizes=TRUE),sep="\t") ## todo EXPORT FPKM write.table(file=output_4,dge\$counts,sep="\t") if (QC == TRUE) { print("Creating MA plots...") etable <- topTags(lrt, n=nrow(dge))\$table etable <- etable[order(etable\$FDR), ] pdf(output_7) with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) with(subset(etable, FDR<0.05), points(logCPM, logFC, pch=20, col="red")) abline(h=c(-1,1), col="blue") dev.off() } print("Done!") </configfile> </configfiles> <outputs> <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - table" /> <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" /> <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts" /> <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output" > <filter>(debug == "true")</filter> </data> <data format="pdf" name="output_MDSplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot"> <filter>(qc == "true")</filter> </data> <data format="pdf" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot"> <filter>(qc == "true")</filter> </data> <data format="pdf" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot"> <filter>(qc == "true")</filter> </data> </outputs> <help> input: Design matrix using "create Design matrix" tool input: contrast </help> </tool>