Mercurial > repos > jbrayet > sequenza
changeset 16:457837475adc draft
Deleted selected files
author | jbrayet |
---|---|
date | Tue, 18 Aug 2015 10:14:47 -0400 |
parents | 417f971f592d |
children | de5cce939dae |
files | Sequenza_analysis.R |
diffstat | 1 files changed, 0 insertions(+), 214 deletions(-) [+] |
line wrap: on
line diff
--- a/Sequenza_analysis.R Tue Aug 18 09:52:48 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,214 +0,0 @@ -#!/usr/bin/env Rscript - - -#parse command line -args <- commandArgs(trailingOnly = TRUE) -USAGE ="\n\nUSAGE: ./Sequenza_analysis.R -normal normal.mpileup -tumor tumor.mpileup -out resdir -gcContent gcContentFile -name sampleName -cellularity cellularityValue -ploidy ploidyValue\n\nkill execution" -if("-h"%in%args | "--h"%in%args |"-help"%in%args | "--help"%in%args){ - write(paste("\n---Help---",USAGE,sep=""),stdout()) - quit() -} -if(length(args)!=14){ - write(paste("\n---ERROR---\n\nThis script take exactly 7 arguments",USAGE,sep=""),stdout()) - quit() -} - -#input files -NORMALPILEUP =args[(which(args=="-normal")+1)] -TUMORPILEUP =args[(which(args=="-tumor")+1)] -SAMPLEID =args[(which(args=="-name")+1)] -GCCONTENTFILE =args[(which(args=="-gcContent")+1)] -CELLULARITY =args[(which(args=="-cellularity")+1)] -PLOIDY =args[(which(args=="-ploidy")+1)] -OUT_DIR =args[(which(args=="-out")+1)] - -###################Python PATH#################### -PYTHON_PATH = "python2.7" -################################################## - -library("sequenza") - -#Note on the helper program sequenza-utils.py. -UTILS =system.file("exec", "sequenza-utils.py", package="sequenza") - -########################################################## -NORMALPILEUP_GZIP =NORMALPILEUP -TUMORPILEUP_GZIP =TUMORPILEUP -########################################################## - -#Generate a seqz file -system(paste("time ",PYTHON_PATH," ",UTILS," pileup2seqz -gc ",GCCONTENTFILE," -n ",NORMALPILEUP_GZIP," -t ",TUMORPILEUP_GZIP," | gzip > ",OUT_DIR,"/out.seqz.gz",sep="")) -system(paste("time ",PYTHON_PATH," ",UTILS," seqz-binning -w 50 -s ",OUT_DIR,"/out.seqz.gz | gzip > ",OUT_DIR,"/out.small.seqz.gz",sep="")) - -#### Exploring the seqz file and depth ratio normalization details -##Read the seqz file -data.file =paste(OUT_DIR,"/out.small.seqz.gz",sep="") -seqz.data <- read.seqz(data.file) - -#Normalization of depth ratio -gc.stats <- gc.sample.stats(data.file) - -gc.vect <- setNames(gc.stats$raw.mean, gc.stats$gc.values) - -seqz.data$adjusted.ratio <- seqz.data$depth.ratio/gc.vect[as.character(seqz.data$GC.percent)] - -png(paste(OUT_DIR,"/",SAMPLEID,"_gc_stat.png",sep=""),width=1000) - par(mfrow = c(1,2), cex = 1, las = 1, bty = 'l') - matplot(gc.stats$gc.values, gc.stats$raw, type = 'b', col = 1, pch = c(1, 19, 1), lty = c(2, 1, 2), xlab = 'GC content (%)', ylab = 'Uncorrected depth ratio') - legend('topright', legend = colnames(gc.stats$raw), pch = c(1, 19, 1)) - hist2(seqz.data$depth.ratio, seqz.data$adjusted.ratio, breaks = prettyLog, key = vkey, panel.first = abline(0, 1, lty = 2), xlab = 'Uncorrected depth ratio', ylab = 'GC-adjusted depth ratio') -dev.off() - - - -if (as.numeric(PLOIDY)==0){ - - #### Analyzing sequencing data with sequenza - - ##Extract the information from the seqz file. - test <- sequenza.extract(data.file) - - #Plot chromosome view with mutations, BAF, depth ratio and segments - chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1]) - - - #Inference of cellularity and ploidy - CP <- sequenza.fit(test) - - #Results of model fitting - sequenza.results(sequenza.extract = test, cp.table = CP, sample.id =SAMPLEID, out.dir=OUT_DIR) - - #Confidence intervals, confidence region and point estimate - cint <- get.ci(CP) - cp.plot(CP) - cp.plot.contours(CP, add = TRUE, likThresh = c(0.95)) - - EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE) - - pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25) - - for(i in 1:length(EstimatedValues[,1])){ - - cellularity=EstimatedValues[i,1] - ploidy=EstimatedValues[i,2] - avg.depth.ratio=mean(test$gc$adj[,2]) - seg.tab=na.exclude(do.call(rbind,test$segments)) - - cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio) - seg.tab=cbind(seg.tab,cn.alleles) - - write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t") - - if (i==1){ - - par(mfrow=c(2,1)) - - genome.view(seg.cn=seg.tab, info.type="CNt") - legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) - mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2) - - genome.view(seg.cn=seg.tab, info.type="AB") - legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) - - }else{ - - genome.view(seg.cn=seg.tab, info.type="CNt") - mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) - - genome.view(seg.cn=seg.tab, info.type="AB") - legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE) - } - - sequenza:::plotRawGenome(test, cellularity = cellularity, ploidy = ploidy) - mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) - } - - dev.off() - - cellularity <- cint$max.cellularity - ploidy <- cint$max.ploidy - avg.depth.ratio <- mean(test$gc$adj[, 2]) - write(paste("cellularity : ",cellularity,"\nploidy : ",ploidy,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep="")) - -} else { - - cellularity.user <- as.numeric(CELLULARITY) - ploidy.user <- as.numeric(PLOIDY) - - #### Analyzing sequencing data with sequenza - - ##Extract the information from the seqz file. - test <- sequenza.extract(data.file) - - #Plot chromosome view with mutations, BAF, depth ratio and segments - chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1]) - - - #Inference of cellularity and ploidy - CP <- sequenza.fit(test) - - #Results of model fitting - sequenza.results(sequenza.extract = test, cp.table = CP, sample.id=SAMPLEID, out.dir=OUT_DIR,ploidy=ploidy.user,cellularity=cellularity.user) - - - #Confidence intervals, confidence region and point estimate - cint <- get.ci(CP) - cp.plot(CP) - cp.plot.contours(CP, add = TRUE, likThresh = c(0.95)) - - EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE) - userValues=c(as.numeric(cellularity.user),as.numeric(ploidy.user),as.numeric(0.0)) - EstimatedValues=rbind(userValues,EstimatedValues) - - pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25) - - for(i in 1:length(EstimatedValues[,1])){ - - cellularity=EstimatedValues[i,1] - ploidy=EstimatedValues[i,2] - avg.depth.ratio=mean(test$gc$adj[,2]) - seg.tab=na.exclude(do.call(rbind,test$segments)) - - cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio) - seg.tab=cbind(seg.tab,cn.alleles) - - write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t") - - if (i==1){ - - par(mfrow=c(2,1)) - - genome.view(seg.cn=seg.tab, info.type="CNt") - legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) - mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2) - - genome.view(seg.cn=seg.tab, info.type="AB") - legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) - - }else{ - - genome.view(seg.cn=seg.tab, info.type="CNt") - mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) - - genome.view(seg.cn=seg.tab, info.type="AB") - legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE) - } - - sequenza:::plotRawGenome(test, cellularity = cellularity, ploidy = ploidy) - mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) - } - - dev.off() - - #Call CNVs and mutations using the estimated parameters - - avg.depth.ratio <- mean(test$gc$adj[, 2]) - write(paste("cellularity : ",cellularity.user,"\nploidy : ",ploidy.user,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep="")) - - - -} - - - -