Mercurial > repos > jbrayet > sequenza
comparison Sequenza_analysis.R @ 0:acdb95d9de7e draft
Uploaded
author | jbrayet |
---|---|
date | Fri, 14 Aug 2015 09:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acdb95d9de7e |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 | |
4 #parse command line | |
5 args <- commandArgs(trailingOnly = TRUE) | |
6 USAGE ="\n\nUSAGE: ./Sequenza_analysis.R -normal normal.mpileup -tumor tumor.mpileup -out resdir -gcContent gcContentFile -name sampleName -cellularity cellularityValue -ploidy ploidyValue\n\nkill execution" | |
7 if("-h"%in%args | "--h"%in%args |"-help"%in%args | "--help"%in%args){ | |
8 write(paste("\n---Help---",USAGE,sep=""),stdout()) | |
9 quit() | |
10 } | |
11 if(length(args)!=14){ | |
12 write(paste("\n---ERROR---\n\nThis script take exactly 7 arguments",USAGE,sep=""),stdout()) | |
13 quit() | |
14 } | |
15 | |
16 #input files | |
17 NORMALPILEUP =args[(which(args=="-normal")+1)] | |
18 TUMORPILEUP =args[(which(args=="-tumor")+1)] | |
19 SAMPLEID =args[(which(args=="-name")+1)] | |
20 GCCONTENTFILE =args[(which(args=="-gcContent")+1)] | |
21 CELLULARITY =args[(which(args=="-cellularity")+1)] | |
22 PLOIDY =args[(which(args=="-ploidy")+1)] | |
23 OUT_DIR =args[(which(args=="-out")+1)] | |
24 | |
25 ###################Python PATH#################### | |
26 PYTHON_PATH = "python2.7" | |
27 ################################################## | |
28 | |
29 library("sequenza") | |
30 | |
31 #Note on the helper program sequenza-utils.py. | |
32 UTILS =system.file("exec", "sequenza-utils.py", package="sequenza") | |
33 | |
34 ########################################################## | |
35 NORMALPILEUP_GZIP =NORMALPILEUP | |
36 TUMORPILEUP_GZIP =TUMORPILEUP | |
37 ########################################################## | |
38 | |
39 #Generate a seqz file | |
40 system(paste("time ",PYTHON_PATH," ",UTILS," pileup2seqz -gc ",GCCONTENTFILE," -n ",NORMALPILEUP_GZIP," -t ",TUMORPILEUP_GZIP," | gzip > ",OUT_DIR,"/out.seqz.gz",sep="")) | |
41 system(paste("time ",PYTHON_PATH," ",UTILS," seqz-binning -w 50 -s ",OUT_DIR,"/out.seqz.gz | gzip > ",OUT_DIR,"/out.small.seqz.gz",sep="")) | |
42 | |
43 #### Exploring the seqz file and depth ratio normalization details | |
44 ##Read the seqz file | |
45 data.file =paste(OUT_DIR,"/out.small.seqz.gz",sep="") | |
46 seqz.data <- read.seqz(data.file) | |
47 | |
48 #Normalization of depth ratio | |
49 gc.stats <- gc.sample.stats(data.file) | |
50 | |
51 gc.vect <- setNames(gc.stats$raw.mean, gc.stats$gc.values) | |
52 | |
53 seqz.data$adjusted.ratio <- seqz.data$depth.ratio/gc.vect[as.character(seqz.data$GC.percent)] | |
54 | |
55 png(paste(OUT_DIR,"/",SAMPLEID,"_gc_stat.png",sep=""),width=1000) | |
56 par(mfrow = c(1,2), cex = 1, las = 1, bty = 'l') | |
57 matplot(gc.stats$gc.values, gc.stats$raw, type = 'b', col = 1, pch = c(1, 19, 1), lty = c(2, 1, 2), xlab = 'GC content (%)', ylab = 'Uncorrected depth ratio') | |
58 legend('topright', legend = colnames(gc.stats$raw), pch = c(1, 19, 1)) | |
59 hist2(seqz.data$depth.ratio, seqz.data$adjusted.ratio, breaks = prettyLog, key = vkey, panel.first = abline(0, 1, lty = 2), xlab = 'Uncorrected depth ratio', ylab = 'GC-adjusted depth ratio') | |
60 dev.off() | |
61 | |
62 | |
63 | |
64 if (as.numeric(PLOIDY)==0){ | |
65 | |
66 #### Analyzing sequencing data with sequenza | |
67 | |
68 ##Extract the information from the seqz file. | |
69 test <- sequenza.extract(data.file) | |
70 | |
71 #Plot chromosome view with mutations, BAF, depth ratio and segments | |
72 chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1]) | |
73 | |
74 | |
75 #Inference of cellularity and ploidy | |
76 CP <- sequenza.fit(test) | |
77 | |
78 #Results of model fitting | |
79 sequenza.results(sequenza.extract = test, cp.table = CP, sample.id =SAMPLEID, out.dir=OUT_DIR) | |
80 | |
81 #Confidence intervals, confidence region and point estimate | |
82 cint <- get.ci(CP) | |
83 cp.plot(CP) | |
84 cp.plot.contours(CP, add = TRUE, likThresh = c(0.95)) | |
85 | |
86 EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE) | |
87 | |
88 pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25) | |
89 | |
90 for(i in 1:length(EstimatedValues[,1])){ | |
91 | |
92 cellularity=EstimatedValues[i,1] | |
93 ploidy=EstimatedValues[i,2] | |
94 avg.depth.ratio=mean(test$gc$adj[,2]) | |
95 seg.tab=na.exclude(do.call(rbind,test$segments)) | |
96 | |
97 cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio) | |
98 seg.tab=cbind(seg.tab,cn.alleles) | |
99 | |
100 write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t") | |
101 | |
102 if (i==1){ | |
103 | |
104 par(mfrow=c(2,1)) | |
105 | |
106 genome.view(seg.cn=seg.tab, info.type="CNt") | |
107 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) | |
108 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2) | |
109 | |
110 genome.view(seg.cn=seg.tab, info.type="AB") | |
111 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) | |
112 | |
113 }else{ | |
114 | |
115 genome.view(seg.cn=seg.tab, info.type="CNt") | |
116 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) | |
117 | |
118 genome.view(seg.cn=seg.tab, info.type="AB") | |
119 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE) | |
120 } | |
121 | |
122 sequenza:::plotRawGenome(test, cellularity = cellularity, ploidy = ploidy) | |
123 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) | |
124 } | |
125 | |
126 dev.off() | |
127 | |
128 cellularity <- cint$max.cellularity | |
129 ploidy <- cint$max.ploidy | |
130 avg.depth.ratio <- mean(test$gc$adj[, 2]) | |
131 write(paste("cellularity : ",cellularity,"\nploidy : ",ploidy,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep="")) | |
132 | |
133 } else { | |
134 | |
135 cellularity.user <- as.numeric(CELLULARITY) | |
136 ploidy.user <- as.numeric(PLOIDY) | |
137 | |
138 #### Analyzing sequencing data with sequenza | |
139 | |
140 ##Extract the information from the seqz file. | |
141 test <- sequenza.extract(data.file) | |
142 | |
143 #Plot chromosome view with mutations, BAF, depth ratio and segments | |
144 chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]], ratio.windows = test$ratio[[1]], min.N.ratio = 1, segments = test$segments[[1]], main = test$chromosomes[1]) | |
145 | |
146 | |
147 #Inference of cellularity and ploidy | |
148 CP <- sequenza.fit(test) | |
149 | |
150 #Results of model fitting | |
151 sequenza.results(sequenza.extract = test, cp.table = CP, sample.id=SAMPLEID, out.dir=OUT_DIR,ploidy=ploidy.user,cellularity=cellularity.user) | |
152 | |
153 | |
154 #Confidence intervals, confidence region and point estimate | |
155 cint <- get.ci(CP) | |
156 cp.plot(CP) | |
157 cp.plot.contours(CP, add = TRUE, likThresh = c(0.95)) | |
158 | |
159 EstimatedValues <- read.table(file=paste(OUT_DIR,"/",SAMPLEID,"_alternative_solutions.txt",sep=""),header=TRUE) | |
160 userValues=c(as.numeric(cellularity.user),as.numeric(ploidy.user),as.numeric(0.0)) | |
161 EstimatedValues=rbind(userValues,EstimatedValues) | |
162 | |
163 pdf(file=paste(OUT_DIR,"/",SAMPLEID,"_analyse.pdf",sep=""),height=10,width=25) | |
164 | |
165 for(i in 1:length(EstimatedValues[,1])){ | |
166 | |
167 cellularity=EstimatedValues[i,1] | |
168 ploidy=EstimatedValues[i,2] | |
169 avg.depth.ratio=mean(test$gc$adj[,2]) | |
170 seg.tab=na.exclude(do.call(rbind,test$segments)) | |
171 | |
172 cn.alleles<-baf.bayes(Bf=seg.tab$Bf,depth.ratio=seg.tab$depth.ratio,cellularity=cellularity,ploidy=ploidy,avg.depth.ratio=avg.depth.ratio) | |
173 seg.tab=cbind(seg.tab,cn.alleles) | |
174 | |
175 write.table(seg.tab, file = paste(OUT_DIR,"/segments_",i,".txt",sep=""), col.names = TRUE, row.names = FALSE, sep = "\t") | |
176 | |
177 if (i==1){ | |
178 | |
179 par(mfrow=c(2,1)) | |
180 | |
181 genome.view(seg.cn=seg.tab, info.type="CNt") | |
182 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) | |
183 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = -2, outer = TRUE, cex=1.5, font=2) | |
184 | |
185 genome.view(seg.cn=seg.tab, info.type="AB") | |
186 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.1),pch=15,xpd=TRUE) | |
187 | |
188 }else{ | |
189 | |
190 genome.view(seg.cn=seg.tab, info.type="CNt") | |
191 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) | |
192 | |
193 genome.view(seg.cn=seg.tab, info.type="AB") | |
194 legend("bottomright",bty="n",c("Tumor Copy Number"),col=c("red"),inset=c(0.0,-0.08),pch=15,xpd=TRUE) | |
195 } | |
196 | |
197 sequenza:::plotRawGenome(test, cellularity = cellularity, ploidy = ploidy) | |
198 mtext(text=paste("\n",SAMPLEID," Choice ",i," ploidy=",ploidy," cellularity=",cellularity,sep=""), side = 3, line = 1, outer = TRUE, cex=1.5, font=2) | |
199 } | |
200 | |
201 dev.off() | |
202 | |
203 #Call CNVs and mutations using the estimated parameters | |
204 | |
205 avg.depth.ratio <- mean(test$gc$adj[, 2]) | |
206 write(paste("cellularity : ",cellularity.user,"\nploidy : ",ploidy.user,"\naverage.depth.ratio : ",avg.depth.ratio,sep=""),file=paste(OUT_DIR,"/",SAMPLEID,"_cellularity_ploidy.txt",sep="")) | |
207 | |
208 | |
209 | |
210 } | |
211 | |
212 | |
213 | |
214 |