comparison sequenza_wrapper.py @ 33:90e2f2e84ab2 draft

Uploaded
author jbrayet
date Fri, 21 Aug 2015 05:46:18 -0400
parents
children
comparison
equal deleted inserted replaced
32:046d256695d1 33:90e2f2e84ab2
1 #! /usr/bin/python
2 # -*- coding: utf8 -*-
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
4 #Copyright (C) 2015 Institut Curie
5 #
6 #This program is free software: you can redistribute it and/or modify
7 #it under the terms of the GNU General Public License as published by
8 #the Free Software Foundation, either version 3 of the License, or
9 #(at your option) any later version.
10 #
11 #This program is distributed in the hope that it will be useful,
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 #GNU General Public License for more details.
15 #
16 #You should have received a copy of the GNU General Public License
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #
19 ###########################################################'
20 #
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE>
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format
23 # <FILE_FORMAT> -estimation <ESTIMATION>
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>]
25 # [-ploidy <PLOIDY>] [-selector_index <SELECTOR>]
26 # [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy
27 # <OUT_GALAXY>
28 #
29 #Run Sequenza with Galaxy.
30 #
31 #optional arguments:
32 # -h, --help show this help message and exit
33 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE>
34 # Normal input file (BAM, pileup, pileup.gz).
35 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE>
36 # Tumor input file (BAM, pileup, pileup.gz).
37 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME>
38 # Sample Name.
39 # -gcContent <GC_FILE>, --GCfile <GC_FILE>
40 # GC content file (txt).
41 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT>
42 # Files format.
43 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION>
44 # To use sequenza estimation or not.
45 # -ref_file <REF_FILE>, --refFile <REF_FILE>
46 # Index file to samtools mpileup.
47 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY>
48 # If estimation = no, cellularity used.
49 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY>
50 # If estimation = no, plody used.
51 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS>
52 # Samtools options (-A, -B, -d, -q and -Q).
53 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
54 #
55 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team
56 #
57 ###########################################################"""
58 __author__ = 'Jocelyn Brayet'
59
60 ###########################################################'
61 ## Import
62
63 import argparse
64 import glob
65 import os
66 import signal
67 import subprocess
68
69 ## Tested with python 2.6.6
70 sequenzaVersion = '1.2 - 05/08/2015'
71
72 ################################ functions ############################################################
73 ## Define a function to test arguments
74
75 def testNone(argument):
76 """
77 Test if argument is None or not.
78 argument -> argument gived by user (XML file)
79 """
80
81 if not argument is None:
82 variable = argument[0]
83 else:
84 variable = ""
85 return variable
86
87 def subprocess_setup():
88 """
89 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect.
90 """
91
92 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
93
94 def createHTML(resultFile,resultPath,sample):
95 """
96 Building a result HTML displayed in Galaxy.
97 resultFile -> HTML file
98 resultPath -> Galaxy result path
99 sample -> sample name
100 """
101
102 resultHTML=open(resultFile,"w")
103
104 cmdLine="cd "+resultPath.replace(os.path.basename(resultPath),"")+"; tar czf all_files.tar.gz "+os.path.basename(resultPath)
105 os.system(cmdLine)
106 os.system("mv "+resultPath.replace(os.path.basename(resultPath),"")+"all_files.tar.gz "+resultPath)
107
108 resultHTML.write("<html>\n<head>\n</head>\n<body>\n")
109 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n")
110 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n")
111 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a><br><a href='all_files.tar.gz'>All files</a></td>\n</tr>\n")
112 resultHTML.write("</font>\n</table>\n</p>\n")
113 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n")
114 resultHTML.write("<p align='center'><embed src='"+sample+"_analyse.pdf' width='800px' height='590px'></p>\n")
115 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n")
116
117 fileList=glob.glob(resultPath+"/segments_*.txt")
118 alternativeSolutionsList=""
119
120 for segmentsFile in fileList:
121 if not "segments_1.txt" in segmentsFile:
122 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile
123
124 os.system("mkdir "+resultPath+"/segments")
125 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments")
126 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments"
127 os.system(cmdLine)
128
129 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n")
130 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n")
131 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n")
132 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n")
133 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n")
134 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n")
135 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n")
136 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n")
137 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n")
138 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n")
139 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n")
140 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n")
141 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n")
142 resultHTML.write("<h2>Normalization of depth ratio</h2>\n")
143 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px' height='300px' src='"+sample+"_gc_stat.png'></a></p>\n")
144 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n")
145 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n")
146 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n")
147 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n")
148 resultHTML.write("</body>\n</html>")
149 resultHTML.close()
150
151 if __name__ == '__main__':
152
153 ########### sequenza arguments ####################
154 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion)
155
156 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True)
157 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True)
158 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True)
159 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True)
160 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True)
161 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True)
162 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False)
163 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False)
164 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False)
165 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False)
166
167 ################################ galaxy arguments ############################################################
168 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
169 ###########################################################'
170
171 args = parser.parse_args()
172
173 normalFile = testNone(args.normalFile)
174 tumorFile = testNone(args.tumorFile)
175 sampleName = testNone(args.sampleName)
176 gcContent = testNone(args.GCfile)
177 fileFormat = testNone(args.fileFormat)
178 usePersonalEstimation = testNone(args.usePersonalEstimation)
179 refFile = testNone(args.refFile)
180 cellularityToUsed = testNone(args.cellularityToUsed)
181 ploidyToUsed = testNone(args.ploidyToUsed)
182 samtoolsOptions = testNone(args.samtoolsOptions)
183 outGalaxyValue = testNone(args.outGalaxy)
184
185 ################Rscrip PATH#################
186 RscriptPath = "Rscript --vanilla "
187 samtoolsPath = "samtools mpileup "
188 ############################################
189
190 pathFile = os.path.dirname(os.path.realpath(__file__))
191 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
192 os.popen("mkdir "+outGalaxyValueDir)
193 os.popen("chmod 777 -R "+outGalaxyValueDir)
194
195 log = open(outGalaxyValueDir+"/logFile.txt", "w")
196
197 log.write(samtoolsOptions)
198
199 if fileFormat=="BAM" :
200
201 cmd=samtoolsPath+"-f "+refFile+" "+normalFile+" "+samtoolsOptions+" > "+normalFile+".tmp"
202 proc = subprocess.Popen( args=cmd, shell=True)
203 proc.wait()
204 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
205 os.system("rm -f "+normalFile+".tmp")
206
207 cmd=samtoolsPath+"-f "+refFile+" "+tumorFile+" "+samtoolsOptions+" > "+tumorFile+".tmp"
208 proc = subprocess.Popen( args=cmd, shell=True)
209 proc.wait()
210 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
211 os.system("rm -f "+tumorFile+".tmp")
212
213 if fileFormat=="pileup" :
214 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz")
215 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz")
216
217 if fileFormat=="pileup_gz" :
218 os.system("cp -f "+normalFile+" "+normalFile+".gz")
219 os.system("cp -f "+tumorFile+" "+tumorFile+".gz")
220
221
222
223
224 #############################################################
225
226 if usePersonalEstimation=="yes":
227
228 cellularityToUsed = float(cellularityToUsed)/100
229 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed)
230 log.write(str(command))
231
232 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
233 Rscrpit.wait()
234
235 else :
236
237 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0"
238 log.write(str(command))
239
240 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
241 Rscrpit.wait()
242
243 #############################################################
244
245 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName)
246
247
248 log.close()
249
250
251
252
253
254
255
256
257
258
259
260