Mercurial > repos > vladimir-daric > ebio_deseq
view script_deseq.R @ 0:8c5de60b3c04 draft
Uploaded
author | vladimir-daric |
---|---|
date | Fri, 25 Apr 2014 05:05:39 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env Rscript ################################### # @author: Vladimir Daric, Rachel Legendre, Coline Billerey, Alban Ott, Claire Wallon # @copyright: eBio ebio@igmors.u-psud.fr # @license: GPL v3 ####################################################### # GetExecutingDirectory (for galaxy) ####################################################### getScriptDirectory = function() { cheminScript = unlist(strsplit(commandArgs()[4],split="="))[2] cheminScriptSplit = unlist(strsplit(cheminScript,split="/")) return(paste(cheminScriptSplit[1:(length(cheminScriptSplit)-1)],collapse="/")) } #################################################### # MAIN PROGRAM #################################################### scriptDirectory = getScriptDirectory() source(paste(scriptDirectory,"/GetOptions.R",sep="")) source(paste(scriptDirectory,"/function_deseq.R",sep="")) optArgs=getopt( rbind( c('IsReplicate' , 'r', 0, 'logical' , "FALSE" , "Is there any replicates ?"), c('help' , 'h', 0, 'logical' , "FALSE" , "Display help"), c('IsHeader' , 'b', 0, 'logical' , "FALSE" , "Is there a header in the HTSeq-count file ?"), c('alpha' , 'a', 1, 'numeric' , "0.05" , "p-value threshold"), c('foldchange' , 'f', 1, 'numeric' , "2" , "foldchange threshold"), c('HTSeqcount' , 'c', 1, 'character', "" , "HTSeq-count files"), c('condition' , 'g', 1, 'character', "" , "Biological conditions"), c('replicate' , 'l', 1, 'character', "" , "Names of each replicate"), c('OutputDir' , 'p', 1, 'character', "DESeq_out" , "Output directory"), c('ImageSize' , 's', 1, 'numeric' , "1960" , "Image size"), c('PPI' , 'i', 1, 'numeric' , "200" , "Image resolution"), c('logBase' , 'o', 1, 'numeric' , "10" , "log base for image and result"), c('geneNumber' , 'z', 1, 'numeric' , "30" , "Gene number for heatmap of most highly expressed genes."), c('sharingMode' , 'v', 1, 'character', "maximum" , "Sharing mode of estimate dispersion fonction (maximum, fit-only, gene-est-only)"), c('method' , 'w', 1, 'character', "pooled" , "Method of estimate dispersion fonction (pooled, per-condition or blind)"), c('fitType' , 't', 1, 'character', "parametric" , "Fit type of estimate dispersion fonction (local or parametric)") ) ) attach(optArgs) sample = data.frame(replicate=replicate, files=HTSeqcount, condition=condition,stringsAsFactors=F) if (help) { #AO : deprecated cat("DESeqTools version eBio Commande : script_deseq.R [options] file1 file2 file3 file4 .... Exemple: ./script_deseq.R --condition=Cond1 --replicate=replicate1 -HTSeqcount=file_count1.txt --condition=Cond1 --replicate=replicate2 -HTSeqcount=file_count2.txt --condition=Cond2 --replicate=replicate1 -HTSeqcount=file_count3.txt --condition=Cond2 --replicate=replicate2 -HTSeqcount=file_count4.txt") stop("",call.=F) } ## Supress warning options(warn=-1) if (length(unique(condition))!=2){ stop("Please specify exactly TWO and ONLY two diferent condition names",call.=F) } if (length(condition)!=length(HTSeqcount) || length(replicate) != length(HTSeqcount)){ stop("You must specify as many conditions and replicates as files. In your input I\'ve found ", length(HTSeqcount)," files, ", length(condition)," conditions, ", length(replicate)," replicates", call.=F) } # Chargement des packages et des fonctions suppressPackageStartupMessages(library("DESeq")) suppressPackageStartupMessages(library("RColorBrewer")) suppressPackageStartupMessages(library("gplots")) suppressPackageStartupMessages(library("plotrix")) ## conditions conds <- unique(sample$condition) ######################################################## # Counts matrix creation countTable = CreateCountMatrix(sample, IsHeader, IsReplicate) # suppress non expressed transcript in all conditions countTable=countTable[apply(countTable,1,sum)!=0,] # Make relations between names and conditions design = data.frame(names = sample$replicate, condition = sample$condition ) # DESeq main analysis, NB : type of analysis depend on IsReplicate cds = DE_analysis(countTable,design,IsReplicate,mode=sharingMode, method=method, fitType=fitType) DESeqResults = nbinomTest( cds,conds[1],conds[2]) # Plot hist of null counts NullCondPlot(cds,file=paste(OutputDir,"_NullCondPlot.png", sep=""),ImageSize=ImageSize, PPI=PPI) # plot of total read count per sample TotReadSamplePlot(cds, file=paste(OutputDir,"_TotSamplePlot.png", sep="") ,ImageSize=ImageSize, PPI=PPI) # plot hist of pvalues # |_->if there is replicate : adjusted pvalue # |_->if there is no replicate : raw pvalue PlotHistPval(DESeqResults,IsReplicate,file=paste(OutputDir,"_histPval.png", sep=""),ImageSize=ImageSize, PPI=PPI) ## Violin plot for raw data PlotViolin(cds, FALSE, paste(OutputDir,"_RawViolin.png", sep=""), ImageSize=ImageSize, PPI=PPI, logBase=logBase) ## Violin plot for raw data PlotViolin(cds, TRUE, paste(OutputDir,"_NormViolin.png", sep=""), ImageSize=ImageSize, PPI=PPI, logBase=logBase) # plot volcano PlotVolcano(res=DESeqResults, cond1=conds[1], cond2=conds[2], alpha=alpha, fold=foldchange, rep=IsReplicate, file=paste(OutputDir,"_volcanoplot.png", sep=""), ImageSize=ImageSize, PPI=PPI, logBase=logBase) # plot MA and dispersion MAPlot(DESeqResults,file=paste(OutputDir,"_MAplot.png", sep=""), ImageSize=ImageSize, PPI=PPI) EstDispPlot(cds, method, file=paste(OutputDir,"_EstDispPlot.png", sep=""), ImageSize=ImageSize, PPI=PPI) # compute clustering if (IsReplicate){ cdsBlind = estimateDispersions( cds, method = "blind",sharingMode=sharingMode, fitType=fitType ) }else{ cdsBlind = estimateDispersions( cds, method = "blind",sharingMode='fit-only', fitType=fitType ) } vsd = varianceStabilizingTransformation( cdsBlind ) # plot clustering and correlation HeatCountPlot(cds,vsd,file=paste(OutputDir,"_HeatCountPlot.png", sep=""), geneNumber, ImageSize=ImageSize, PPI=PPI) HeatDistPlot(vsd,cdsBlind,file=paste(OutputDir,"_HeatDistPlot.png", sep=""), ImageSize=ImageSize, PPI=PPI) ## export result WriteResults (DESeqResults, countTable, counts( cds, normalized=TRUE ), conds[1], conds[2], alpha, foldchange, IsReplicate, OutputDir, logBase=logBase)