view script_deseq.R @ 0:8c5de60b3c04 draft

Uploaded
author vladimir-daric
date Fri, 25 Apr 2014 05:05:39 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env Rscript

###################################
# @author: Vladimir Daric, Rachel Legendre, Coline Billerey, Alban Ott, Claire Wallon
# @copyright: eBio ebio@igmors.u-psud.fr
# @license: GPL v3



#######################################################
# GetExecutingDirectory (for galaxy)
#######################################################

getScriptDirectory = function() {
  cheminScript = unlist(strsplit(commandArgs()[4],split="="))[2]
  cheminScriptSplit = unlist(strsplit(cheminScript,split="/"))
  return(paste(cheminScriptSplit[1:(length(cheminScriptSplit)-1)],collapse="/"))
}


####################################################
#						MAIN PROGRAM 
####################################################


scriptDirectory = getScriptDirectory()
source(paste(scriptDirectory,"/GetOptions.R",sep=""))
source(paste(scriptDirectory,"/function_deseq.R",sep=""))

optArgs=getopt(
		rbind(
				c('IsReplicate'     , 'r', 0, 'logical'  , "FALSE"      , "Is there any replicates ?"),
				c('help'            , 'h', 0, 'logical'  , "FALSE"      , "Display help"),
				c('IsHeader'        , 'b', 0, 'logical'  , "FALSE"      , "Is there a header in the HTSeq-count file ?"),
				c('alpha'           , 'a', 1, 'numeric'  , "0.05"       , "p-value threshold"),
				c('foldchange'      , 'f', 1, 'numeric'  , "2"          , "foldchange threshold"),
				c('HTSeqcount'      , 'c', 1, 'character', ""           , "HTSeq-count files"),
				c('condition'       , 'g', 1, 'character', ""           , "Biological conditions"),
				c('replicate'       , 'l', 1, 'character', ""           , "Names of each replicate"),
				c('OutputDir'       , 'p', 1, 'character', "DESeq_out"  , "Output directory"),
				c('ImageSize'       , 's', 1, 'numeric'  , "1960"       , "Image size"),
				c('PPI'             , 'i', 1, 'numeric'  , "200"        , "Image resolution"),
				c('logBase'         , 'o', 1, 'numeric'  , "10"         , "log base for image and result"),
				c('geneNumber'      , 'z', 1, 'numeric'  , "30"         , "Gene number for heatmap of most highly expressed genes."),
				c('sharingMode'     , 'v', 1, 'character', "maximum"    , "Sharing mode of estimate dispersion fonction (maximum, fit-only, gene-est-only)"),
				c('method'	    	, 'w', 1, 'character', "pooled"   	, "Method of estimate dispersion fonction (pooled, per-condition or blind)"),
				c('fitType'         , 't', 1, 'character', "parametric" , "Fit type of estimate dispersion fonction (local or parametric)")
		)
)
attach(optArgs)
sample = data.frame(replicate=replicate, files=HTSeqcount, condition=condition,stringsAsFactors=F)

if (help) {
	#AO : deprecated
	cat("DESeqTools version eBio
					Commande : script_deseq.R [options] file1 file2 file3 file4 .... 
					Exemple: ./script_deseq.R --condition=Cond1 --replicate=replicate1 -HTSeqcount=file_count1.txt
					--condition=Cond1 --replicate=replicate2 -HTSeqcount=file_count2.txt  
					--condition=Cond2 --replicate=replicate1 -HTSeqcount=file_count3.txt 
					--condition=Cond2 --replicate=replicate2 -HTSeqcount=file_count4.txt")
	stop("",call.=F)
}

## Supress warning
options(warn=-1)

if (length(unique(condition))!=2){
	stop("Please specify exactly TWO and ONLY two diferent condition names",call.=F)
}

if (length(condition)!=length(HTSeqcount) || length(replicate) != length(HTSeqcount)){
	stop("You must specify as many conditions and replicates as files. In your input I\'ve found ",
			length(HTSeqcount)," files, ",
			length(condition)," conditions, ",
			length(replicate)," replicates", call.=F)
}

# Chargement des packages et des fonctions
suppressPackageStartupMessages(library("DESeq"))
suppressPackageStartupMessages(library("RColorBrewer"))
suppressPackageStartupMessages(library("gplots"))
suppressPackageStartupMessages(library("plotrix"))


## conditions
conds <- unique(sample$condition)

########################################################
# Counts matrix creation
countTable = CreateCountMatrix(sample, IsHeader, IsReplicate)

# suppress non expressed transcript in all conditions
countTable=countTable[apply(countTable,1,sum)!=0,]

# Make relations between names and conditions
design = data.frame(names = sample$replicate, condition = sample$condition )
 
# DESeq main analysis, NB : type of analysis depend on IsReplicate
cds = DE_analysis(countTable,design,IsReplicate,mode=sharingMode, method=method, fitType=fitType)
DESeqResults = nbinomTest( cds,conds[1],conds[2])

# Plot hist of null counts
NullCondPlot(cds,file=paste(OutputDir,"_NullCondPlot.png", sep=""),ImageSize=ImageSize, PPI=PPI)

# plot of total read count per sample
TotReadSamplePlot(cds, file=paste(OutputDir,"_TotSamplePlot.png", sep="") ,ImageSize=ImageSize, PPI=PPI)

# plot hist of pvalues
#	|_->if there is replicate    : adjusted pvalue
#	|_->if there is no replicate : raw pvalue
PlotHistPval(DESeqResults,IsReplicate,file=paste(OutputDir,"_histPval.png", sep=""),ImageSize=ImageSize, PPI=PPI)

## Violin plot for raw data
PlotViolin(cds, FALSE, paste(OutputDir,"_RawViolin.png", sep=""), ImageSize=ImageSize, PPI=PPI, logBase=logBase)

## Violin plot for raw data
PlotViolin(cds, TRUE, paste(OutputDir,"_NormViolin.png", sep=""), ImageSize=ImageSize, PPI=PPI, logBase=logBase)

# plot volcano
PlotVolcano(res=DESeqResults, cond1=conds[1], cond2=conds[2], alpha=alpha, fold=foldchange, rep=IsReplicate, file=paste(OutputDir,"_volcanoplot.png", sep=""), ImageSize=ImageSize, PPI=PPI, logBase=logBase)

# plot MA and dispersion
MAPlot(DESeqResults,file=paste(OutputDir,"_MAplot.png", sep=""), ImageSize=ImageSize, PPI=PPI)
EstDispPlot(cds, method, file=paste(OutputDir,"_EstDispPlot.png", sep=""), ImageSize=ImageSize, PPI=PPI)

# compute clustering
if (IsReplicate){
	cdsBlind = estimateDispersions( cds, method = "blind",sharingMode=sharingMode, fitType=fitType  )
}else{
	cdsBlind = estimateDispersions( cds, method = "blind",sharingMode='fit-only', fitType=fitType  )
}
vsd = varianceStabilizingTransformation( cdsBlind )

# plot clustering and correlation
HeatCountPlot(cds,vsd,file=paste(OutputDir,"_HeatCountPlot.png", sep=""), geneNumber, ImageSize=ImageSize, PPI=PPI)
HeatDistPlot(vsd,cdsBlind,file=paste(OutputDir,"_HeatDistPlot.png", sep=""), ImageSize=ImageSize, PPI=PPI)

## export result
WriteResults (DESeqResults, countTable, counts( cds, normalized=TRUE ), conds[1], conds[2], alpha, foldchange, IsReplicate, OutputDir, logBase=logBase)