17
|
1 #!/usr/bin/env Rscript
|
|
2
|
|
3
|
|
4 #parse command line
|
|
5 args <- commandArgs(trailingOnly = TRUE)
|
|
6 USAGE ="\n\nUSAGE: ./Sequenza_analysis.R -normal normal.mpileup -tumor tumor.mpileup -out resdir -gcContent gcContentFile -name sampleName -cellularity cellularityValue -ploidy ploidyValue\n\nkill execution"
|
|
7 if("-h"%in%args | "--h"%in%args |"-help"%in%args | "--help"%in%args){
|
|
8 write(paste("\n---Help---",USAGE,sep=""),stdout())
|
|
9 quit()
|
|
10 }
|
|
11 if(length(args)!=14){
|
|
12 write(paste("\n---ERROR---\n\nThis script take exactly 7 arguments",USAGE,sep=""),stdout())
|
|
13 quit()
|
|
14 }
|
|
15
|
|
16 #input files
|
|
17 NORMALPILEUP =args[(which(args=="-normal")+1)]
|
|
18 TUMORPILEUP =args[(which(args=="-tumor")+1)]
|
|
19 SAMPLEID =args[(which(args=="-name")+1)]
|
|
20 GCCONTENTFILE =args[(which(args=="-gcContent")+1)]
|
|
21 CELLULARITY =args[(which(args=="-cellularity")+1)]
|
|
22 PLOIDY =args[(which(args=="-ploidy")+1)]
|
|
23 OUT_DIR =args[(which(args=="-out")+1)]
|
|
24
|
|
25 ###################Python PATH####################
|
|
26 PYTHON_PATH = "python2.7"
|
|
27 ##################################################
|
|
28
|
|
29 library("sequenza")
|
|
30
|
|
31 #Note on the helper program sequenza-utils.py.
|
|
32 UTILS =system.file("exec", "sequenza-utils.py", package="sequenza")
|
|
33
|
|
34 ##########################################################
|
|
35 NORMALPILEUP_GZIP =NORMALPILEUP
|
|
36 TUMORPILEUP_GZIP =TUMORPILEUP
|
|
37 ##########################################################
|
|
38
|
|
39 #Generate a seqz file
|
|
40 system(paste("time ",PYTHON_PATH," ",UTILS," pileup2seqz -gc ",GCCONTENTFILE," -n ",NORMALPILEUP_GZIP," -t ",TUMORPILEUP_GZIP," | gzip > ",OUT_DIR,"/out.seqz.gz",sep=""))
|
|
41 system(paste("time ",PYTHON_PATH," ",UTILS," seqz-binning -w 50 -s ",OUT_DIR,"/out.seqz.gz | gzip > ",OUT_DIR,"/out.small.seqz.gz",sep=""))
|
|
42
|
|
43 #### Exploring the seqz file and depth ratio normalization details
|
|
44 ##Read the seqz file
|
|
45 data.file =paste(OUT_DIR,"/out.small.seqz.gz",sep="")
|
|
46 seqz.data <- read.seqz(data.file)
|
|
47
|
|
48 #Normalization of depth ratio
|
|
49 gc.stats <- gc.sample.stats(data.file)
|
|
50
|
|
51 gc.vect <- setNames(gc.stats$raw.mean, gc.stats$gc.values)
|
|
52
|
|
53 seqz.data$adjusted.ratio <- seqz.data$depth.ratio/gc.vect[as.character(seqz.data$GC.percent)]
|
|
54
|
|
55 png(paste(OUT_DIR,"/",SAMPLEID,"_gc_stat.png",sep=""),width=1000, type = "cairo")
|
|
56 par(mfrow = c(1,2), cex = 1, las = 1, bty = 'l')
|
|
57 matplot(gc.stats$gc.values, gc.stats$raw, type = 'b', col = 1, pch = c(1, 19, 1), lty = c(2, 1, 2), xlab = 'GC content (%)', ylab = 'Uncorrected depth ratio')
|
|
58 legend('topright', legend = colnames(gc.stats$raw), pch = c(1, 19, 1))
|
|
59 hist2(seqz.data$depth.ratio, seqz.data$adjusted.ratio, breaks = prettyLog, key = vkey, panel.first = abline(0, 1, lty = 2), xlab = 'Uncorrected depth ratio', ylab = 'GC-adjusted depth ratio')
|
|
60 dev.off()
|
|
61
|
|
62
|
|
63
|
|
64 if (as.numeric(PLOIDY)==0){
|
|
65
|
|
66 #### Analyzing sequencing data with sequenza
|
|
67
|
|
68 ##Extract the information from the seqz file.
|
|
69 test <- sequenza.extract(data.file)
|
|
70
|
|
71 #Plot chromosome view with mutations, BAF, depth ratio and segments
|
|
72 chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1])
|
|
73
|
|
74
|
|
75 #Inference of cellularity and ploidy
|
|
76 CP <- sequenza.fit(test)
|
|
77
|
|
78 #Results of model fitting
|
|
79 sequenza.results(sequenza.extract = test, cp.table = CP, sample.id =SAMPLEID, out.dir=OUT_DIR)
|
|
80
|
|
81 #Confidence intervals, confidence region and point estimate
|
|
82 cint <- get.ci(CP)
|
|
83 cp.plot(CP)
|
|
84 cp.plot.contours(CP, add = TRUE, likThresh = c(0.95))
|
|
85
|
|
86 EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE)
|
|
87
|
|
88 pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25)
|
|
89
|
|
90 for(i in 1:length(EstimatedValues[,1])){
|
|
91
|
|
92 cellularity=EstimatedValues[i,1]
|
|
93 ploidy=EstimatedValues[i,2]
|
|
94 avg.depth.ratio=mean(test$gc$adj[,2])
|
|
95 seg.tab=na.exclude(do.call(rbind,test$segments))
|
|
96
|
|
97 cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio)
|
|
98 seg.tab=cbind(seg.tab,cn.alleles)
|
|
99
|
|
100 write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t")
|
|
101
|
|
102 if (i==1){
|
|
103
|
|
104 par(mfrow=c(2,1))
|
|
105
|
|
106 genome.view(seg.cn=seg.tab, info.type="CNt")
|
|
107 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
|
|
108 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2)
|
|
109
|
|
110 genome.view(seg.cn=seg.tab, info.type="AB")
|
|
111 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
|
|
112
|
|
113 }else{
|
|
114
|
|
115 genome.view(seg.cn=seg.tab, info.type="CNt")
|
|
116 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
|
|
117
|
|
118 genome.view(seg.cn=seg.tab, info.type="AB")
|
|
119 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE)
|
|
120 }
|
|
121
|
|
122 sequenza:::plotRawGenome(test, cellularity = cellularity, ploidy = ploidy)
|
|
123 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
|
|
124 }
|
|
125
|
|
126 dev.off()
|
|
127
|
|
128 cellularity <- cint$max.cellularity
|
|
129 ploidy <- cint$max.ploidy
|
|
130 avg.depth.ratio <- mean(test$gc$adj[, 2])
|
|
131 write(paste("cellularity : ",cellularity,"\nploidy : ",ploidy,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep=""))
|
|
132
|
|
133 } else {
|
|
134
|
|
135 cellularity.user <- as.numeric(CELLULARITY)
|
|
136 ploidy.user <- as.numeric(PLOIDY)
|
|
137
|
|
138 #### Analyzing sequencing data with sequenza
|
|
139
|
|
140 ##Extract the information from the seqz file.
|
|
141 test <- sequenza.extract(data.file)
|
|
142
|
|
143 #Plot chromosome view with mutations, BAF, depth ratio and segments
|
|
144 chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1])
|
|
145
|
|
146
|
|
147 #Inference of cellularity and ploidy
|
|
148 CP <- sequenza.fit(test)
|
|
149
|
|
150 #Results of model fitting
|
|
151 sequenza.results(sequenza.extract = test, cp.table = CP, sample.id=SAMPLEID, out.dir=OUT_DIR,ploidy=ploidy.user,cellularity=cellularity.user)
|
|
152
|
|
153
|
|
154 #Confidence intervals, confidence region and point estimate
|
|
155 cint <- get.ci(CP)
|
|
156 cp.plot(CP)
|
|
157 cp.plot.contours(CP, add = TRUE, likThresh = c(0.95))
|
|
158
|
|
159 EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE)
|
|
160 userValues=c(as.numeric(cellularity.user),as.numeric(ploidy.user),as.numeric(0.0))
|
|
161 EstimatedValues=rbind(userValues,EstimatedValues)
|
|
162
|
|
163 pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25)
|
|
164
|
|
165 for(i in 1:length(EstimatedValues[,1])){
|
|
166
|
|
167 cellularity=EstimatedValues[i,1]
|
|
168 ploidy=EstimatedValues[i,2]
|
|
169 avg.depth.ratio=mean(test$gc$adj[,2])
|
|
170 seg.tab=na.exclude(do.call(rbind,test$segments))
|
|
171
|
|
172 cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio)
|
|
173 seg.tab=cbind(seg.tab,cn.alleles)
|
|
174
|
|
175 write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t")
|
|
176
|
|
177 if (i==1){
|
|
178
|
|
179 par(mfrow=c(2,1))
|
|
180
|
|
181 genome.view(seg.cn=seg.tab, info.type="CNt")
|
|
182 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
|
|
183 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2)
|
|
184
|
|
185 genome.view(seg.cn=seg.tab, info.type="AB")
|
|
186 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE)
|
|
187
|
|
188 }else{
|
|
189
|
|
190 genome.view(seg.cn=seg.tab, info.type="CNt")
|
|
191 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
|
|
192
|
|
193 genome.view(seg.cn=seg.tab, info.type="AB")
|
|
194 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE)
|
|
195 }
|
|
196
|
|
197 sequenza:::plotRawGenome(test, cellularity = cellularity, ploidy = ploidy)
|
|
198 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2)
|
|
199 }
|
|
200
|
|
201 dev.off()
|
|
202
|
|
203 #Call CNVs and mutations using the estimated parameters
|
|
204
|
|
205 avg.depth.ratio <- mean(test$gc$adj[, 2])
|
|
206 write(paste("cellularity : ",cellularity.user,"\nploidy : ",ploidy.user,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep=""))
|
|
207
|
|
208
|
|
209
|
|
210 }
|
|
211
|
|
212
|
|
213
|
|
214
|