changeset 19:2eff4cd2c205 draft

Uploaded
author jbrayet
date Wed, 19 Aug 2015 05:07:38 -0400
parents 70c764cca56f
children f6c6a0156c8a
files Sequenza_analysis.R
diffstat 1 files changed, 214 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Sequenza_analysis.R	Wed Aug 19 05:07:38 2015 -0400
@@ -0,0 +1,214 @@
+#!/usr/bin/env Rscript
+
+
+#parse command line
+args <- commandArgs(trailingOnly = TRUE)
+USAGE ="\n\nUSAGE: ./Sequenza_analysis.R -normal normal.mpileup -tumor tumor.mpileup -out resdir -gcContent gcContentFile -name sampleName -cellularity cellularityValue -ploidy ploidyValue\n\nkill execution"
+if("-h"%in%args | "--h"%in%args |"-help"%in%args | "--help"%in%args){
+	write(paste("\n---Help---",USAGE,sep=""),stdout())
+	quit()
+}
+if(length(args)!=14){
+	write(paste("\n---ERROR---\n\nThis script take exactly 7 arguments",USAGE,sep=""),stdout())	
+	quit()
+}
+
+#input files
+NORMALPILEUP =args[(which(args=="-normal")+1)]
+TUMORPILEUP =args[(which(args=="-tumor")+1)]
+SAMPLEID =args[(which(args=="-name")+1)]
+GCCONTENTFILE =args[(which(args=="-gcContent")+1)]
+CELLULARITY =args[(which(args=="-cellularity")+1)]
+PLOIDY =args[(which(args=="-ploidy")+1)]
+OUT_DIR =args[(which(args=="-out")+1)]
+
+###################Python PATH####################
+PYTHON_PATH = "python2.7"
+##################################################
+
+library("sequenza")
+
+#Note on the helper program sequenza-utils.py.
+UTILS =system.file("exec", "sequenza-utils.py", package="sequenza")
+
+##########################################################
+NORMALPILEUP_GZIP =NORMALPILEUP
+TUMORPILEUP_GZIP =TUMORPILEUP
+##########################################################
+
+#Generate a seqz file
+system(paste("time ",PYTHON_PATH," ",UTILS," pileup2seqz -gc ",GCCONTENTFILE," -n ",NORMALPILEUP_GZIP," -t ",TUMORPILEUP_GZIP," | gzip > ",OUT_DIR,"/out.seqz.gz",sep=""))
+system(paste("time ",PYTHON_PATH," ",UTILS," seqz-binning -w 50 -s ",OUT_DIR,"/out.seqz.gz | gzip > ",OUT_DIR,"/out.small.seqz.gz",sep=""))
+
+#### Exploring the seqz file and depth ratio normalization details
+##Read the seqz file
+data.file =paste(OUT_DIR,"/out.small.seqz.gz",sep="")
+seqz.data <- read.seqz(data.file)
+
+#Normalization of depth ratio
+gc.stats <- gc.sample.stats(data.file)
+
+gc.vect <- setNames(gc.stats$raw.mean, gc.stats$gc.values)
+
+seqz.data$adjusted.ratio <- seqz.data$depth.ratio/gc.vect[as.character(seqz.data$GC.percent)]
+
+png(paste(OUT_DIR,"/",SAMPLEID,"_gc_stat.png",sep=""),width=1000, type = "cairo")
+    par(mfrow = c(1,2), cex = 1, las = 1, bty = 'l')
+    matplot(gc.stats$gc.values, gc.stats$raw, type = 'b', col = 1, pch = c(1, 19, 1), lty = c(2, 1, 2), xlab = 'GC content (%)', ylab = 'Uncorrected depth ratio')
+    legend('topright', legend = colnames(gc.stats$raw), pch = c(1, 19, 1))
+    hist2(seqz.data$depth.ratio, seqz.data$adjusted.ratio, breaks = prettyLog, key = vkey, panel.first = abline(0, 1, lty = 2), xlab = 'Uncorrected depth ratio', ylab = 'GC-adjusted depth ratio')
+dev.off()
+
+
+
+if (as.numeric(PLOIDY)==0){
+
+	#### Analyzing sequencing data with sequenza
+
+	##Extract the information from the seqz file.
+	test <- sequenza.extract(data.file)
+
+	#Plot chromosome view with mutations, BAF, depth ratio and segments
+	chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1])
+
+
+	#Inference of cellularity and ploidy
+	CP <- sequenza.fit(test)
+
+	#Results of model fitting
+	sequenza.results(sequenza.extract = test, cp.table = CP, sample.id =SAMPLEID, out.dir=OUT_DIR)
+
+	#Confidence intervals, confidence region and point estimate
+	cint <- get.ci(CP)
+	cp.plot(CP)
+	cp.plot.contours(CP, add = TRUE, likThresh = c(0.95))
+
+	EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE)
+
+	pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25)
+
+		for(i in 1:length(EstimatedValues[,1])){
+
+			cellularity=EstimatedValues[i,1]
+			ploidy=EstimatedValues[i,2]
+			avg.depth.ratio=mean(test$gc$adj[,2])
+			seg.tab=na.exclude(do.call(rbind,test$segments))
+
+			cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio)
+			seg.tab=cbind(seg.tab,cn.alleles)
+
+			write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t")
+
+			if (i==1){
+
+				par(mfrow=c(2,1))
+
+				genome.view(seg.cn=seg.tab, info.type="CNt")
+				legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
+				mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2)
+
+				genome.view(seg.cn=seg.tab, info.type="AB")
+				legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
+
+			}else{
+
+				genome.view(seg.cn=seg.tab, info.type="CNt")
+				mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
+
+				genome.view(seg.cn=seg.tab, info.type="AB")
+				legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE)
+			}
+
+			sequenza:::plotRawGenome(test, cellularity = cellularity,  ploidy = ploidy)
+			mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
+		}
+
+	dev.off()
+	
+	cellularity <- cint$max.cellularity
+	ploidy <- cint$max.ploidy
+	avg.depth.ratio <- mean(test$gc$adj[, 2])
+	write(paste("cellularity : ",cellularity,"\nploidy : ",ploidy,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep=""))
+
+} else {
+
+	cellularity.user <- as.numeric(CELLULARITY)
+	ploidy.user <- as.numeric(PLOIDY)
+
+	#### Analyzing sequencing data with sequenza
+
+	##Extract the information from the seqz file.
+	test <- sequenza.extract(data.file)
+	
+	#Plot chromosome view with mutations, BAF, depth ratio and segments
+	chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1])
+
+
+	#Inference of cellularity and ploidy
+	CP <- sequenza.fit(test)
+
+	#Results of model fitting
+	sequenza.results(sequenza.extract = test, cp.table = CP, sample.id=SAMPLEID, out.dir=OUT_DIR,ploidy=ploidy.user,cellularity=cellularity.user)
+
+
+	#Confidence intervals, confidence region and point estimate
+	cint <- get.ci(CP)
+	cp.plot(CP)
+	cp.plot.contours(CP, add = TRUE, likThresh = c(0.95))
+
+	EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE)
+	userValues=c(as.numeric(cellularity.user),as.numeric(ploidy.user),as.numeric(0.0))
+	EstimatedValues=rbind(userValues,EstimatedValues)
+
+	pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25)
+
+		for(i in 1:length(EstimatedValues[,1])){
+
+			cellularity=EstimatedValues[i,1]
+			ploidy=EstimatedValues[i,2]
+			avg.depth.ratio=mean(test$gc$adj[,2])
+			seg.tab=na.exclude(do.call(rbind,test$segments))
+
+			cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio)
+			seg.tab=cbind(seg.tab,cn.alleles)
+
+			write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t")
+
+			if (i==1){
+
+				par(mfrow=c(2,1))
+
+				genome.view(seg.cn=seg.tab, info.type="CNt")
+				legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
+				mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2)
+
+				genome.view(seg.cn=seg.tab, info.type="AB")
+				legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
+
+			}else{
+
+				genome.view(seg.cn=seg.tab, info.type="CNt")
+				mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
+
+				genome.view(seg.cn=seg.tab, info.type="AB")
+				legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE)
+			}
+
+			sequenza:::plotRawGenome(test, cellularity = cellularity,  ploidy = ploidy)
+			mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
+		}
+
+	dev.off()
+
+	#Call CNVs and mutations using the estimated parameters
+	
+	avg.depth.ratio <- mean(test$gc$adj[, 2])
+	write(paste("cellularity : ",cellularity.user,"\nploidy : ",ploidy.user,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep=""))
+
+
+
+}
+
+
+
+