29
|
1 #! /usr/bin/python
|
|
2 # -*- coding: utf8 -*-
|
|
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
|
|
4 #Copyright (C) 2015 Institut Curie
|
|
5 #
|
|
6 #This program is free software: you can redistribute it and/or modify
|
|
7 #it under the terms of the GNU General Public License as published by
|
|
8 #the Free Software Foundation, either version 3 of the License, or
|
|
9 #(at your option) any later version.
|
|
10 #
|
|
11 #This program is distributed in the hope that it will be useful,
|
|
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 #GNU General Public License for more details.
|
|
15 #
|
|
16 #You should have received a copy of the GNU General Public License
|
|
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18 #
|
|
19 ###########################################################'
|
|
20 #
|
|
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE>
|
|
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format
|
|
23 # <FILE_FORMAT> -estimation <ESTIMATION>
|
|
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>]
|
|
25 # [-ploidy <PLOIDY>] [-selector_index <SELECTOR>]
|
|
26 # [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy
|
|
27 # <OUT_GALAXY>
|
|
28 #
|
|
29 #Run Sequenza with Galaxy.
|
|
30 #
|
|
31 #optional arguments:
|
|
32 # -h, --help show this help message and exit
|
|
33 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE>
|
|
34 # Normal input file (BAM, pileup, pileup.gz).
|
|
35 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE>
|
|
36 # Tumor input file (BAM, pileup, pileup.gz).
|
|
37 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME>
|
|
38 # Sample Name.
|
|
39 # -gcContent <GC_FILE>, --GCfile <GC_FILE>
|
|
40 # GC content file (txt).
|
|
41 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT>
|
|
42 # Files format.
|
|
43 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION>
|
|
44 # To use sequenza estimation or not.
|
|
45 # -ref_file <REF_FILE>, --refFile <REF_FILE>
|
|
46 # Index file to samtools mpileup.
|
|
47 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY>
|
|
48 # If estimation = no, cellularity used.
|
|
49 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY>
|
|
50 # If estimation = no, plody used.
|
|
51 # -selector_index <SELECTOR>, --selector <SELECTOR>
|
|
52 # Source for the reference list.
|
|
53 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS>
|
|
54 # Samtools options (-A, -B, -d, -q and -Q).
|
|
55 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
|
|
56 #
|
|
57 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team
|
|
58 #
|
|
59 ###########################################################"""
|
|
60 __author__ = 'Jocelyn Brayet'
|
|
61
|
|
62 ###########################################################'
|
|
63 ## Import
|
|
64
|
|
65 import argparse
|
|
66 import glob
|
|
67 import os
|
|
68 import signal
|
|
69 import subprocess
|
|
70
|
|
71 ## Tested with python 2.6.6
|
|
72 sequenzaVersion = '1.2 - 05/08/2015'
|
|
73
|
|
74 ################################ functions ############################################################
|
|
75 ## Define a function to test arguments
|
|
76
|
|
77 def testNone(argument):
|
|
78 """
|
|
79 Test if argument is None or not.
|
|
80 argument -> argument gived by user (XML file)
|
|
81 """
|
|
82
|
|
83 if not argument is None:
|
|
84 variable = argument[0]
|
|
85 else:
|
|
86 variable = ""
|
|
87 return variable
|
|
88
|
|
89 def subprocess_setup():
|
|
90 """
|
|
91 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect.
|
|
92 """
|
|
93
|
|
94 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
95
|
|
96 def createHTML(resultFile,resultPath,sample):
|
|
97 """
|
|
98 Building a result HTML displayed in Galaxy.
|
|
99 resultFile -> HTML file
|
|
100 resultPath -> Galaxy result path
|
|
101 sample -> sample name
|
|
102 """
|
|
103
|
|
104 resultHTML=open(resultFile,"w")
|
|
105
|
|
106 cmdLine="cd "+resultPath.replace(os.path.basename(resultPath),"")+"; tar czf all_files.tar.gz "+os.path.basename(resultPath)
|
|
107 os.system(cmdLine)
|
|
108 os.system("mv "+resultPath.replace(os.path.basename(resultPath),"")+"all_files.tar.gz "+resultPath)
|
|
109
|
|
110 resultHTML.write("<html>\n<head>\n</head>\n<body>\n")
|
|
111 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n")
|
|
112 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n")
|
|
113 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a><br><a href='all_files.tar.gz'>All files</a></td>\n</tr>\n")
|
|
114 resultHTML.write("</font>\n</table>\n</p>\n")
|
|
115 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n")
|
|
116 resultHTML.write("<p align='center'><embed src='"+sample+"_analyse.pdf' width='800px' height='590px'></p>\n")
|
|
117 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n")
|
|
118
|
|
119 fileList=glob.glob(resultPath+"/segments_*.txt")
|
|
120 alternativeSolutionsList=""
|
|
121
|
|
122 for segmentsFile in fileList:
|
|
123 if not "segments_1.txt" in segmentsFile:
|
|
124 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile
|
|
125
|
|
126 os.system("mkdir "+resultPath+"/segments")
|
|
127 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments")
|
|
128 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments"
|
|
129 os.system(cmdLine)
|
|
130
|
|
131 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n")
|
|
132 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n")
|
|
133 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n")
|
|
134 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n")
|
|
135 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n")
|
|
136 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n")
|
|
137 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n")
|
|
138 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n")
|
|
139 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n")
|
|
140 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n")
|
|
141 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n")
|
|
142 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n")
|
|
143 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n")
|
|
144 resultHTML.write("<h2>Normalization of depth ratio</h2>\n")
|
|
145 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px' height='300px' src='"+sample+"_gc_stat.png'></a></p>\n")
|
|
146 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n")
|
|
147 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n")
|
|
148 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n")
|
|
149 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n")
|
|
150 resultHTML.write("</body>\n</html>")
|
|
151 resultHTML.close()
|
|
152
|
|
153 if __name__ == '__main__':
|
|
154
|
|
155 ########### sequenza arguments ####################
|
|
156 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion)
|
|
157
|
|
158 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True)
|
|
159 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True)
|
|
160 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True)
|
|
161 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True)
|
|
162 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True)
|
|
163 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True)
|
|
164 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False)
|
|
165 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False)
|
|
166 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False)
|
|
167 parser.add_argument('-selector_index', '--selector', metavar='<SELECTOR>', type=str, nargs=1, help='Source for the reference list.', required=False)
|
|
168 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False)
|
|
169
|
|
170 ################################ galaxy arguments ############################################################
|
|
171 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
|
|
172 ###########################################################'
|
|
173
|
|
174 args = parser.parse_args()
|
|
175
|
|
176 normalFile = testNone(args.normalFile)
|
|
177 tumorFile = testNone(args.tumorFile)
|
|
178 sampleName = testNone(args.sampleName)
|
|
179 gcContent = testNone(args.GCfile)
|
|
180 fileFormat = testNone(args.fileFormat)
|
|
181 usePersonalEstimation = testNone(args.usePersonalEstimation)
|
|
182 refFile = testNone(args.refFile)
|
|
183 cellularityToUsed = testNone(args.cellularityToUsed)
|
|
184 ploidyToUsed = testNone(args.ploidyToUsed)
|
|
185 selector = testNone(args.selector)
|
|
186 samtoolsOptions = testNone(args.samtoolsOptions)
|
|
187 outGalaxyValue = testNone(args.outGalaxy)
|
|
188
|
|
189 ################Rscrip PATH#################
|
|
190 RscriptPath = "Rscript --vanilla "
|
|
191 ############################################
|
|
192
|
|
193 pathFile = os.path.dirname(os.path.realpath(__file__))
|
|
194 samtoolsPath = pathFile.replace("copy_number","samtools/")
|
|
195 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
|
|
196 os.popen("mkdir "+outGalaxyValueDir)
|
|
197 os.popen("chmod 777 -R "+outGalaxyValueDir)
|
|
198
|
|
199 log = open(outGalaxyValueDir+"/logFile.txt", "w")
|
|
200
|
|
201 log.write(samtoolsOptions)
|
|
202
|
|
203 if fileFormat=="BAM" :
|
|
204 if selector=="cached":
|
|
205
|
|
206 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
|
|
207 proc = subprocess.Popen( args=cmd, shell=True)
|
|
208 proc.wait()
|
|
209 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
|
|
210 os.system("rm -f "+normalFile+".tmp")
|
|
211
|
|
212 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
|
|
213 proc = subprocess.Popen( args=cmd, shell=True)
|
|
214 proc.wait()
|
|
215 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
|
|
216 os.system("rm -f "+tumorFile+".tmp")
|
|
217
|
|
218 else:
|
|
219
|
|
220 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
|
|
221 proc = subprocess.Popen( args=cmd, shell=True)
|
|
222 proc.wait()
|
|
223 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
|
|
224 os.system("rm -f "+normalFile+".tmp")
|
|
225
|
|
226 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
|
|
227 proc = subprocess.Popen( args=cmd, shell=True)
|
|
228 proc.wait()
|
|
229 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
|
|
230 os.system("rm -f "+tumorFile+".tmp")
|
|
231
|
|
232 if fileFormat=="pileup" :
|
|
233 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz")
|
|
234 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz")
|
|
235
|
|
236 if fileFormat=="pileup_gz" :
|
|
237 os.system("cp -f "+normalFile+" "+normalFile+".gz")
|
|
238 os.system("cp -f "+tumorFile+" "+tumorFile+".gz")
|
|
239
|
|
240
|
|
241
|
|
242
|
|
243 #############################################################
|
|
244
|
|
245 if usePersonalEstimation=="yes":
|
|
246
|
|
247 cellularityToUsed = float(cellularityToUsed)/100
|
|
248 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed)
|
|
249 log.write(str(command))
|
|
250
|
|
251 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
|
|
252 Rscrpit.wait()
|
|
253
|
|
254 else :
|
|
255
|
|
256 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0"
|
|
257 log.write(str(command))
|
|
258
|
|
259 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
|
|
260 Rscrpit.wait()
|
|
261
|
|
262 #############################################################
|
|
263
|
|
264 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName)
|
|
265
|
|
266
|
|
267 log.close()
|
|
268
|
|
269
|
|
270
|
|
271
|
|
272
|
|
273
|
|
274
|
|
275
|
|
276
|
|
277
|
|
278
|
|
279
|