38
|
1 #! /usr/bin/python
|
|
2 # -*- coding: utf8 -*-
|
|
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
|
|
4 #Copyright (C) 2015 Institut Curie
|
|
5 #
|
|
6 #This program is free software: you can redistribute it and/or modify
|
|
7 #it under the terms of the GNU General Public License as published by
|
|
8 #the Free Software Foundation, either version 3 of the License, or
|
|
9 #(at your option) any later version.
|
|
10 #
|
|
11 #This program is distributed in the hope that it will be useful,
|
|
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 #GNU General Public License for more details.
|
|
15 #
|
|
16 #You should have received a copy of the GNU General Public License
|
|
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18 #
|
|
19 ###########################################################'
|
|
20 #
|
|
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE>
|
|
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format
|
|
23 # <FILE_FORMAT> -estimation <ESTIMATION>
|
|
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>]
|
|
25 # [-ploidy <PLOIDY>] [-samtools_options <SAMTOOLS_OPTIONS>]
|
|
26 # -outGalaxy <OUT_GALAXY>
|
|
27 #
|
|
28 #Run Sequenza with Galaxy.
|
|
29 #
|
|
30 #optional arguments:
|
|
31 # -h, --help show this help message and exit
|
|
32 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE>
|
|
33 # Normal input file (BAM, pileup, pileup.gz).
|
|
34 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE>
|
|
35 # Tumor input file (BAM, pileup, pileup.gz).
|
|
36 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME>
|
|
37 # Sample Name.
|
|
38 # -gcContent <GC_FILE>, --GCfile <GC_FILE>
|
|
39 # GC content file (txt).
|
|
40 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT>
|
|
41 # Files format.
|
|
42 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION>
|
|
43 # To use sequenza estimation or not.
|
|
44 # -ref_file <REF_FILE>, --refFile <REF_FILE>
|
|
45 # Index file to samtools mpileup.
|
|
46 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY>
|
|
47 # If estimation = no, cellularity used.
|
|
48 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY>
|
|
49 # If estimation = no, plody used.
|
|
50 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS>
|
|
51 # Samtools options (-A, -B, -d, -q and -Q).
|
|
52 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
|
|
53 #
|
|
54 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team
|
|
55 #
|
|
56 ###########################################################"""
|
|
57 __author__ = 'Jocelyn Brayet'
|
|
58
|
|
59 ###########################################################'
|
|
60 ## Import
|
|
61
|
|
62 import argparse
|
|
63 import glob
|
|
64 import os
|
|
65 import signal
|
|
66 import subprocess
|
|
67
|
|
68 ## Tested with python 2.6.6
|
|
69 sequenzaVersion = '1.2 - 05/08/2015'
|
|
70
|
|
71 ################################ functions ############################################################
|
|
72 ## Define a function to test arguments
|
|
73
|
|
74 def testNone(argument):
|
|
75 """
|
|
76 Test if argument is None or not.
|
|
77 argument -> argument gived by user (XML file)
|
|
78 """
|
|
79
|
|
80 if not argument is None:
|
|
81 variable = argument[0]
|
|
82 else:
|
|
83 variable = ""
|
|
84 return variable
|
|
85
|
|
86 def subprocess_setup():
|
|
87 """
|
|
88 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect.
|
|
89 """
|
|
90
|
|
91 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
92
|
|
93 def createHTML(resultFile,resultPath,sample):
|
|
94 """
|
|
95 Building a result HTML displayed in Galaxy.
|
|
96 resultFile -> HTML file
|
|
97 resultPath -> Galaxy result path
|
|
98 sample -> sample name
|
|
99 """
|
|
100
|
|
101 resultHTML=open(resultFile,"w")
|
|
102
|
|
103 cmdLine="cd "+resultPath.replace(os.path.basename(resultPath),"")+"; tar czf all_files.tar.gz "+os.path.basename(resultPath)
|
|
104 os.system(cmdLine)
|
|
105 os.system("mv "+resultPath.replace(os.path.basename(resultPath),"")+"all_files.tar.gz "+resultPath)
|
|
106
|
|
107 resultHTML.write("<html>\n<head>\n</head>\n<body>\n")
|
|
108 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n")
|
|
109 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n")
|
|
110 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a><br><a href='all_files.tar.gz'>All files</a></td>\n</tr>\n")
|
|
111 resultHTML.write("</font>\n</table>\n</p>\n")
|
|
112 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n")
|
|
113 resultHTML.write("<p align='center'><embed src='"+sample+"_analyse.pdf' width='800px' height='590px'></p>\n")
|
|
114 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n")
|
|
115
|
|
116 fileList=glob.glob(resultPath+"/segments_*.txt")
|
|
117 alternativeSolutionsList=""
|
|
118
|
|
119 for segmentsFile in fileList:
|
|
120 if not "segments_1.txt" in segmentsFile:
|
|
121 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile
|
|
122
|
|
123 os.system("mkdir "+resultPath+"/segments")
|
|
124 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments")
|
|
125 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments"
|
|
126 os.system(cmdLine)
|
|
127
|
|
128 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n")
|
|
129 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n")
|
|
130 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n")
|
|
131 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n")
|
|
132 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n")
|
|
133 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n")
|
|
134 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n")
|
|
135 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n")
|
|
136 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n")
|
|
137 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n")
|
|
138 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n")
|
|
139 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n")
|
|
140 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n")
|
|
141 resultHTML.write("<h2>Normalization of depth ratio</h2>\n")
|
|
142 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px' height='300px' src='"+sample+"_gc_stat.png'></a></p>\n")
|
|
143 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n")
|
|
144 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n")
|
|
145 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n")
|
|
146 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n")
|
|
147 resultHTML.write("</body>\n</html>")
|
|
148 resultHTML.close()
|
|
149
|
|
150 if __name__ == '__main__':
|
|
151
|
|
152 ########### sequenza arguments ####################
|
|
153 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion)
|
|
154
|
|
155 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True)
|
|
156 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True)
|
|
157 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True)
|
|
158 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True)
|
|
159 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True)
|
|
160 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True)
|
|
161 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False)
|
|
162 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False)
|
|
163 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False)
|
|
164 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False)
|
|
165
|
|
166 ################################ galaxy arguments ############################################################
|
|
167 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
|
|
168 ###########################################################'
|
|
169
|
|
170 args = parser.parse_args()
|
|
171
|
|
172 normalFile = testNone(args.normalFile)
|
|
173 tumorFile = testNone(args.tumorFile)
|
|
174 sampleName = testNone(args.sampleName)
|
|
175 gcContent = testNone(args.GCfile)
|
|
176 fileFormat = testNone(args.fileFormat)
|
|
177 usePersonalEstimation = testNone(args.usePersonalEstimation)
|
|
178 refFile = testNone(args.refFile)
|
|
179 cellularityToUsed = testNone(args.cellularityToUsed)
|
|
180 ploidyToUsed = testNone(args.ploidyToUsed)
|
|
181 samtoolsOptions = testNone(args.samtoolsOptions)
|
|
182 outGalaxyValue = testNone(args.outGalaxy)
|
|
183
|
|
184 ################Rscrip PATH#################
|
|
185 RscriptPath = "Rscript --vanilla "
|
|
186 samtoolsPath = "samtools mpileup "
|
|
187 ############################################
|
|
188
|
|
189 pathFile = os.path.dirname(os.path.realpath(__file__))
|
|
190 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
|
|
191 os.popen("mkdir "+outGalaxyValueDir)
|
|
192 os.popen("chmod 777 -R "+outGalaxyValueDir)
|
|
193
|
|
194 log = open(outGalaxyValueDir+"/logFile.txt", "w")
|
|
195
|
|
196 log.write(samtoolsOptions)
|
|
197
|
|
198 if fileFormat=="BAM" :
|
|
199
|
|
200 cmd=samtoolsPath+"-f "+refFile+" "+normalFile+" "+samtoolsOptions+" > "+normalFile+".tmp"
|
|
201 proc = subprocess.Popen( args=cmd, stdout=log, stderr=log, shell=True)
|
|
202 proc.wait()
|
|
203 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
|
|
204 os.system("rm -f "+normalFile+".tmp")
|
|
205
|
|
206 cmd=samtoolsPath+"-f "+refFile+" "+tumorFile+" "+samtoolsOptions+" > "+tumorFile+".tmp"
|
|
207 proc = subprocess.Popen( args=cmd, stdout=log, stderr=log, shell=True)
|
|
208 proc.wait()
|
|
209 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
|
|
210 os.system("rm -f "+tumorFile+".tmp")
|
|
211
|
|
212 if fileFormat=="pileup" :
|
|
213 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz")
|
|
214 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz")
|
|
215
|
|
216 if fileFormat=="pileup_gz" :
|
|
217 os.system("cp -f "+normalFile+" "+normalFile+".gz")
|
|
218 os.system("cp -f "+tumorFile+" "+tumorFile+".gz")
|
|
219
|
|
220
|
|
221
|
|
222
|
|
223 #############################################################
|
|
224
|
|
225 if usePersonalEstimation=="yes":
|
|
226
|
|
227 cellularityToUsed = float(cellularityToUsed)/100
|
|
228 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed)
|
|
229 log.write(str(command))
|
|
230
|
|
231 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
|
|
232 Rscrpit.wait()
|
|
233
|
|
234 else :
|
|
235
|
|
236 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0"
|
|
237 log.write(str(command))
|
|
238
|
|
239 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
|
|
240 Rscrpit.wait()
|
|
241
|
|
242 #############################################################
|
|
243
|
|
244 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName)
|
|
245
|
|
246
|
|
247 log.close()
|
|
248
|
|
249
|
|
250
|
|
251
|
|
252
|
|
253
|
|
254
|
|
255
|
|
256
|
|
257
|
|
258
|
|
259
|