comparison sequenza_wrapper.py @ 1:aff64bdd2a36 draft

Uploaded
author jbrayet
date Fri, 14 Aug 2015 09:55:29 -0400
parents
children
comparison
equal deleted inserted replaced
0:acdb95d9de7e 1:aff64bdd2a36
1 #! /usr/bin/python
2 # -*- coding: utf8 -*-
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
4 #Copyright (C) 2015 Institut Curie
5 #
6 #This program is free software: you can redistribute it and/or modify
7 #it under the terms of the GNU General Public License as published by
8 #the Free Software Foundation, either version 3 of the License, or
9 #(at your option) any later version.
10 #
11 #This program is distributed in the hope that it will be useful,
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 #GNU General Public License for more details.
15 #
16 #You should have received a copy of the GNU General Public License
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #
19 ###########################################################'
20 #
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE>
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format
23 # <FILE_FORMAT> -estimation <ESTIMATION>
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>]
25 # [-ploidy <PLOIDY>] [-selector_index <SELECTOR>]
26 # [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy
27 # <OUT_GALAXY>
28 #
29 #Run Sequenza with Galaxy.
30 #
31 #optional arguments:
32 # -h, --help show this help message and exit
33 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE>
34 # Normal input file (BAM, pileup, pileup.gz).
35 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE>
36 # Tumor input file (BAM, pileup, pileup.gz).
37 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME>
38 # Sample Name.
39 # -gcContent <GC_FILE>, --GCfile <GC_FILE>
40 # GC content file (txt).
41 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT>
42 # Files format.
43 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION>
44 # To use sequenza estimation or not.
45 # -ref_file <REF_FILE>, --refFile <REF_FILE>
46 # Index file to samtools mpileup.
47 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY>
48 # If estimation = no, cellularity used.
49 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY>
50 # If estimation = no, plody used.
51 # -selector_index <SELECTOR>, --selector <SELECTOR>
52 # Source for the reference list.
53 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS>
54 # Samtools options (-A, -B, -d, -q and -Q).
55 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
56 #
57 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team
58 #
59 ###########################################################"""
60 __author__ = 'Jocelyn Brayet'
61
62 ###########################################################'
63 ## Import
64
65 import argparse
66 import glob
67 import os
68 import signal
69 import subprocess
70
71 ## Tested with python 2.6.6
72 sequenzaVersion = '1.2 - 05/08/2015'
73
74 ################################ functions ############################################################
75 ## Define a function to test arguments
76
77 def testNone(argument):
78 """
79 Test if argument is None or not.
80 argument -> argument gived by user (XML file)
81 """
82
83 if not argument is None:
84 variable = argument[0]
85 else:
86 variable = ""
87 return variable
88
89 def subprocess_setup():
90 """
91 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect.
92 """
93
94 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
95
96 def createHTML(resultFile,resultPath,sample):
97 """
98 Building a result HTML displayed in Galaxy.
99 resultFile -> HTML file
100 resultPath -> Galaxy result path
101 sample -> sample name
102 """
103
104 resultHTML=open(resultFile,"w")
105 resultHTML.write("<html>\n<head>\n</head>\n<body>\n")
106 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n")
107 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n")
108 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a></td>\n</tr>\n")
109 resultHTML.write("</font>\n</table>\n</p>\n")
110 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n")
111 resultHTML.write("<p align='center'><embed src='"+sample+"_analyse.pdf' width='800px' height='590px'></p>\n")
112 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n")
113
114 fileList=glob.glob(resultPath+"/segments_*.txt")
115 alternativeSolutionsList=""
116
117 for segmentsFile in fileList:
118 if not "segments_1.txt" in segmentsFile:
119 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile
120
121 os.system("mkdir "+resultPath+"/segments")
122 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments")
123 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments"
124 os.system(cmdLine)
125
126 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n")
127 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n")
128 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n")
129 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n")
130 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n")
131 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n")
132 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n")
133 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n")
134 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n")
135 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n")
136 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n")
137 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n")
138 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n")
139 resultHTML.write("<h2>Normalization of depth ratio</h2>\n")
140 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px height='300px src='"+sample+"_gc_stat.png'></a></p>\n")
141 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n")
142 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n")
143 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n")
144 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n")
145 resultHTML.write("</body>\n</html>")
146 resultHTML.close()
147
148 if __name__ == '__main__':
149
150 ########### sequenza arguments ####################
151 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion)
152
153 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True)
154 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True)
155 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True)
156 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True)
157 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True)
158 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True)
159 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False)
160 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False)
161 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False)
162 parser.add_argument('-selector_index', '--selector', metavar='<SELECTOR>', type=str, nargs=1, help='Source for the reference list.', required=False)
163 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False)
164
165 ################################ galaxy arguments ############################################################
166 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
167 ###########################################################'
168
169 args = parser.parse_args()
170
171 normalFile = testNone(args.normalFile)
172 tumorFile = testNone(args.tumorFile)
173 sampleName = testNone(args.sampleName)
174 gcContent = testNone(args.GCfile)
175 fileFormat = testNone(args.fileFormat)
176 usePersonalEstimation = testNone(args.usePersonalEstimation)
177 refFile = testNone(args.refFile)
178 cellularityToUsed = testNone(args.cellularityToUsed)
179 ploidyToUsed = testNone(args.ploidyToUsed)
180 selector = testNone(args.selector)
181 samtoolsOptions = testNone(args.samtoolsOptions)
182 outGalaxyValue = testNone(args.outGalaxy)
183
184 ################Rscrip PATH#################
185 RscriptPath = "Rscript --vanilla "
186 ############################################
187
188 pathFile = os.path.dirname(os.path.realpath(__file__))
189 samtoolsPath = pathFile.replace("copy_number","samtools/")
190 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
191 os.popen("mkdir "+outGalaxyValueDir)
192 os.popen("chmod 777 -R "+outGalaxyValueDir)
193
194 log = open(outGalaxyValueDir+"/logFile.txt", "w")
195
196 log.write(samtoolsOptions)
197
198 if fileFormat=="BAM" :
199 if selector=="cached":
200
201 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
202 proc = subprocess.Popen( args=cmd, shell=True)
203 proc.wait()
204 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
205 os.system("rm -f "+normalFile+".tmp")
206
207 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
208 proc = subprocess.Popen( args=cmd, shell=True)
209 proc.wait()
210 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
211 os.system("rm -f "+tumorFile+".tmp")
212
213 else:
214
215 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
216 proc = subprocess.Popen( args=cmd, shell=True)
217 proc.wait()
218 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
219 os.system("rm -f "+normalFile+".tmp")
220
221 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
222 proc = subprocess.Popen( args=cmd, shell=True)
223 proc.wait()
224 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
225 os.system("rm -f "+tumorFile+".tmp")
226
227 if fileFormat=="pileup" :
228 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz")
229 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz")
230
231 if fileFormat=="pileup_gz" :
232 os.system("cp -f "+normalFile+" "+normalFile+".gz")
233 os.system("cp -f "+tumorFile+" "+tumorFile+".gz")
234
235
236
237
238 #############################################################
239
240 if usePersonalEstimation=="yes":
241
242 cellularityToUsed = float(cellularityToUsed)/100
243 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed)
244 log.write(str(command))
245
246 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
247 Rscrpit.wait()
248
249 else :
250
251 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0"
252 log.write(str(command))
253
254 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
255 Rscrpit.wait()
256
257 #############################################################
258
259 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName)
260
261
262 log.close()
263
264
265
266
267
268
269
270
271
272
273
274