21
|
1 #! /usr/bin/python
|
|
2 # -*- coding: utf8 -*-
|
|
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
|
|
4 #Copyright (C) 2015 Institut Curie
|
|
5 #
|
|
6 #This program is free software: you can redistribute it and/or modify
|
|
7 #it under the terms of the GNU General Public License as published by
|
|
8 #the Free Software Foundation, either version 3 of the License, or
|
|
9 #(at your option) any later version.
|
|
10 #
|
|
11 #This program is distributed in the hope that it will be useful,
|
|
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 #GNU General Public License for more details.
|
|
15 #
|
|
16 #You should have received a copy of the GNU General Public License
|
|
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18 #
|
|
19 ###########################################################'
|
|
20 #
|
|
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE>
|
|
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format
|
|
23 # <FILE_FORMAT> -estimation <ESTIMATION>
|
|
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>]
|
|
25 # [-ploidy <PLOIDY>] [-selector_index <SELECTOR>]
|
|
26 # [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy
|
|
27 # <OUT_GALAXY>
|
|
28 #
|
|
29 #Run Sequenza with Galaxy.
|
|
30 #
|
|
31 #optional arguments:
|
|
32 # -h, --help show this help message and exit
|
|
33 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE>
|
|
34 # Normal input file (BAM, pileup, pileup.gz).
|
|
35 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE>
|
|
36 # Tumor input file (BAM, pileup, pileup.gz).
|
|
37 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME>
|
|
38 # Sample Name.
|
|
39 # -gcContent <GC_FILE>, --GCfile <GC_FILE>
|
|
40 # GC content file (txt).
|
|
41 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT>
|
|
42 # Files format.
|
|
43 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION>
|
|
44 # To use sequenza estimation or not.
|
|
45 # -ref_file <REF_FILE>, --refFile <REF_FILE>
|
|
46 # Index file to samtools mpileup.
|
|
47 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY>
|
|
48 # If estimation = no, cellularity used.
|
|
49 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY>
|
|
50 # If estimation = no, plody used.
|
|
51 # -selector_index <SELECTOR>, --selector <SELECTOR>
|
|
52 # Source for the reference list.
|
|
53 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS>
|
|
54 # Samtools options (-A, -B, -d, -q and -Q).
|
|
55 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
|
|
56 #
|
|
57 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team
|
|
58 #
|
|
59 ###########################################################"""
|
|
60 __author__ = 'Jocelyn Brayet'
|
|
61
|
|
62 ###########################################################'
|
|
63 ## Import
|
|
64
|
|
65 import argparse
|
|
66 import glob
|
|
67 import os
|
|
68 import signal
|
|
69 import subprocess
|
|
70
|
|
71 ## Tested with python 2.6.6
|
|
72 sequenzaVersion = '1.2 - 05/08/2015'
|
|
73
|
|
74 ################################ functions ############################################################
|
|
75 ## Define a function to test arguments
|
|
76
|
|
77 def testNone(argument):
|
|
78 """
|
|
79 Test if argument is None or not.
|
|
80 argument -> argument gived by user (XML file)
|
|
81 """
|
|
82
|
|
83 if not argument is None:
|
|
84 variable = argument[0]
|
|
85 else:
|
|
86 variable = ""
|
|
87 return variable
|
|
88
|
|
89 def subprocess_setup():
|
|
90 """
|
|
91 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect.
|
|
92 """
|
|
93
|
|
94 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
95
|
|
96 def createHTML(resultFile,resultPath,sample):
|
|
97 """
|
|
98 Building a result HTML displayed in Galaxy.
|
|
99 resultFile -> HTML file
|
|
100 resultPath -> Galaxy result path
|
|
101 sample -> sample name
|
|
102 """
|
|
103
|
|
104 resultHTML=open(resultFile,"w")
|
|
105 resultHTML.write("<html>\n<head>\n</head>\n<body>\n")
|
|
106 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n")
|
|
107 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n")
|
|
108 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a></td>\n</tr>\n")
|
|
109 resultHTML.write("</font>\n</table>\n</p>\n")
|
|
110 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n")
|
|
111 resultHTML.write("<p align='center'><embed src='"+resultPath+sample+"_analyse.pdf' width='800px' height='590px'></p>\n")
|
|
112 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n")
|
|
113
|
|
114 fileList=glob.glob(resultPath+"/segments_*.txt")
|
|
115 alternativeSolutionsList=""
|
|
116
|
|
117 for segmentsFile in fileList:
|
|
118 if not "segments_1.txt" in segmentsFile:
|
|
119 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile
|
|
120
|
|
121 os.system("mkdir "+resultPath+"/segments")
|
|
122 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments")
|
|
123 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments"
|
|
124 os.system(cmdLine)
|
|
125
|
|
126 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n")
|
|
127 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n")
|
|
128 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n")
|
|
129 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n")
|
|
130 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n")
|
|
131 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n")
|
|
132 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n")
|
|
133 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n")
|
|
134 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n")
|
|
135 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n")
|
|
136 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n")
|
|
137 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n")
|
|
138 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n")
|
|
139 resultHTML.write("<h2>Normalization of depth ratio</h2>\n")
|
|
140 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px' height='300px' src='"+sample+"_gc_stat.png'></a></p>\n")
|
|
141 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n")
|
|
142 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n")
|
|
143 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n")
|
|
144 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n")
|
|
145 resultHTML.write("</body>\n</html>")
|
|
146 resultHTML.close()
|
|
147
|
|
148 if __name__ == '__main__':
|
|
149
|
|
150 ########### sequenza arguments ####################
|
|
151 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion)
|
|
152
|
|
153 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True)
|
|
154 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True)
|
|
155 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True)
|
|
156 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True)
|
|
157 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True)
|
|
158 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True)
|
|
159 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False)
|
|
160 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False)
|
|
161 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False)
|
|
162 parser.add_argument('-selector_index', '--selector', metavar='<SELECTOR>', type=str, nargs=1, help='Source for the reference list.', required=False)
|
|
163 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False)
|
|
164
|
|
165 ################################ galaxy arguments ############################################################
|
|
166 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
|
|
167 ###########################################################'
|
|
168
|
|
169 args = parser.parse_args()
|
|
170
|
|
171 normalFile = testNone(args.normalFile)
|
|
172 tumorFile = testNone(args.tumorFile)
|
|
173 sampleName = testNone(args.sampleName)
|
|
174 gcContent = testNone(args.GCfile)
|
|
175 fileFormat = testNone(args.fileFormat)
|
|
176 usePersonalEstimation = testNone(args.usePersonalEstimation)
|
|
177 refFile = testNone(args.refFile)
|
|
178 cellularityToUsed = testNone(args.cellularityToUsed)
|
|
179 ploidyToUsed = testNone(args.ploidyToUsed)
|
|
180 selector = testNone(args.selector)
|
|
181 samtoolsOptions = testNone(args.samtoolsOptions)
|
|
182 outGalaxyValue = testNone(args.outGalaxy)
|
|
183
|
|
184 ################Rscrip PATH#################
|
|
185 RscriptPath = "Rscript --vanilla "
|
|
186 ############################################
|
|
187
|
|
188 pathFile = os.path.dirname(os.path.realpath(__file__))
|
|
189 samtoolsPath = pathFile.replace("copy_number","samtools/")
|
|
190 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
|
|
191 os.popen("mkdir "+outGalaxyValueDir)
|
|
192 os.popen("chmod 777 -R "+outGalaxyValueDir)
|
|
193
|
|
194 log = open(outGalaxyValueDir+"/logFile.txt", "w")
|
|
195
|
|
196 log.write(samtoolsOptions)
|
|
197
|
|
198 if fileFormat=="BAM" :
|
|
199 if selector=="cached":
|
|
200
|
|
201 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
|
|
202 proc = subprocess.Popen( args=cmd, shell=True)
|
|
203 proc.wait()
|
|
204 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
|
|
205 os.system("rm -f "+normalFile+".tmp")
|
|
206
|
|
207 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
|
|
208 proc = subprocess.Popen( args=cmd, shell=True)
|
|
209 proc.wait()
|
|
210 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
|
|
211 os.system("rm -f "+tumorFile+".tmp")
|
|
212
|
|
213 else:
|
|
214
|
|
215 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
|
|
216 proc = subprocess.Popen( args=cmd, shell=True)
|
|
217 proc.wait()
|
|
218 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
|
|
219 os.system("rm -f "+normalFile+".tmp")
|
|
220
|
|
221 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
|
|
222 proc = subprocess.Popen( args=cmd, shell=True)
|
|
223 proc.wait()
|
|
224 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
|
|
225 os.system("rm -f "+tumorFile+".tmp")
|
|
226
|
|
227 if fileFormat=="pileup" :
|
|
228 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz")
|
|
229 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz")
|
|
230
|
|
231 if fileFormat=="pileup_gz" :
|
|
232 os.system("cp -f "+normalFile+" "+normalFile+".gz")
|
|
233 os.system("cp -f "+tumorFile+" "+tumorFile+".gz")
|
|
234
|
|
235
|
|
236
|
|
237
|
|
238 #############################################################
|
|
239
|
|
240 if usePersonalEstimation=="yes":
|
|
241
|
|
242 cellularityToUsed = float(cellularityToUsed)/100
|
|
243 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed)
|
|
244 log.write(str(command))
|
|
245
|
|
246 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
|
|
247 Rscrpit.wait()
|
|
248
|
|
249 else :
|
|
250
|
|
251 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0"
|
|
252 log.write(str(command))
|
|
253
|
|
254 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
|
|
255 Rscrpit.wait()
|
|
256
|
|
257 #############################################################
|
|
258
|
|
259 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName)
|
|
260
|
|
261
|
|
262 log.close()
|
|
263
|
|
264
|
|
265
|
|
266
|
|
267
|
|
268
|
|
269
|
|
270
|
|
271
|
|
272
|
|
273
|
|
274
|