comparison sequenza_wrapper.py @ 29:8687649c8ec3 draft

Uploaded
author jbrayet
date Thu, 20 Aug 2015 05:44:55 -0400
parents
children
comparison
equal deleted inserted replaced
28:1448b2b18bee 29:8687649c8ec3
1 #! /usr/bin/python
2 # -*- coding: utf8 -*-
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
4 #Copyright (C) 2015 Institut Curie
5 #
6 #This program is free software: you can redistribute it and/or modify
7 #it under the terms of the GNU General Public License as published by
8 #the Free Software Foundation, either version 3 of the License, or
9 #(at your option) any later version.
10 #
11 #This program is distributed in the hope that it will be useful,
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 #GNU General Public License for more details.
15 #
16 #You should have received a copy of the GNU General Public License
17 #along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #
19 ###########################################################'
20 #
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE>
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format
23 # <FILE_FORMAT> -estimation <ESTIMATION>
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>]
25 # [-ploidy <PLOIDY>] [-selector_index <SELECTOR>]
26 # [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy
27 # <OUT_GALAXY>
28 #
29 #Run Sequenza with Galaxy.
30 #
31 #optional arguments:
32 # -h, --help show this help message and exit
33 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE>
34 # Normal input file (BAM, pileup, pileup.gz).
35 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE>
36 # Tumor input file (BAM, pileup, pileup.gz).
37 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME>
38 # Sample Name.
39 # -gcContent <GC_FILE>, --GCfile <GC_FILE>
40 # GC content file (txt).
41 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT>
42 # Files format.
43 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION>
44 # To use sequenza estimation or not.
45 # -ref_file <REF_FILE>, --refFile <REF_FILE>
46 # Index file to samtools mpileup.
47 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY>
48 # If estimation = no, cellularity used.
49 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY>
50 # If estimation = no, plody used.
51 # -selector_index <SELECTOR>, --selector <SELECTOR>
52 # Source for the reference list.
53 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS>
54 # Samtools options (-A, -B, -d, -q and -Q).
55 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
56 #
57 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team
58 #
59 ###########################################################"""
60 __author__ = 'Jocelyn Brayet'
61
62 ###########################################################'
63 ## Import
64
65 import argparse
66 import glob
67 import os
68 import signal
69 import subprocess
70
71 ## Tested with python 2.6.6
72 sequenzaVersion = '1.2 - 05/08/2015'
73
74 ################################ functions ############################################################
75 ## Define a function to test arguments
76
77 def testNone(argument):
78 """
79 Test if argument is None or not.
80 argument -> argument gived by user (XML file)
81 """
82
83 if not argument is None:
84 variable = argument[0]
85 else:
86 variable = ""
87 return variable
88
89 def subprocess_setup():
90 """
91 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect.
92 """
93
94 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
95
96 def createHTML(resultFile,resultPath,sample):
97 """
98 Building a result HTML displayed in Galaxy.
99 resultFile -> HTML file
100 resultPath -> Galaxy result path
101 sample -> sample name
102 """
103
104 resultHTML=open(resultFile,"w")
105
106 cmdLine="cd "+resultPath.replace(os.path.basename(resultPath),"")+"; tar czf all_files.tar.gz "+os.path.basename(resultPath)
107 os.system(cmdLine)
108 os.system("mv "+resultPath.replace(os.path.basename(resultPath),"")+"all_files.tar.gz "+resultPath)
109
110 resultHTML.write("<html>\n<head>\n</head>\n<body>\n")
111 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n")
112 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n")
113 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a><br><a href='all_files.tar.gz'>All files</a></td>\n</tr>\n")
114 resultHTML.write("</font>\n</table>\n</p>\n")
115 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n")
116 resultHTML.write("<p align='center'><embed src='"+sample+"_analyse.pdf' width='800px' height='590px'></p>\n")
117 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n")
118
119 fileList=glob.glob(resultPath+"/segments_*.txt")
120 alternativeSolutionsList=""
121
122 for segmentsFile in fileList:
123 if not "segments_1.txt" in segmentsFile:
124 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile
125
126 os.system("mkdir "+resultPath+"/segments")
127 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments")
128 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments"
129 os.system(cmdLine)
130
131 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n")
132 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n")
133 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n")
134 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n")
135 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n")
136 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n")
137 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n")
138 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n")
139 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n")
140 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n")
141 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n")
142 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n")
143 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n")
144 resultHTML.write("<h2>Normalization of depth ratio</h2>\n")
145 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px' height='300px' src='"+sample+"_gc_stat.png'></a></p>\n")
146 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n")
147 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n")
148 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n")
149 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n")
150 resultHTML.write("</body>\n</html>")
151 resultHTML.close()
152
153 if __name__ == '__main__':
154
155 ########### sequenza arguments ####################
156 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion)
157
158 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True)
159 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True)
160 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True)
161 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True)
162 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True)
163 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True)
164 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False)
165 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False)
166 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False)
167 parser.add_argument('-selector_index', '--selector', metavar='<SELECTOR>', type=str, nargs=1, help='Source for the reference list.', required=False)
168 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False)
169
170 ################################ galaxy arguments ############################################################
171 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
172 ###########################################################'
173
174 args = parser.parse_args()
175
176 normalFile = testNone(args.normalFile)
177 tumorFile = testNone(args.tumorFile)
178 sampleName = testNone(args.sampleName)
179 gcContent = testNone(args.GCfile)
180 fileFormat = testNone(args.fileFormat)
181 usePersonalEstimation = testNone(args.usePersonalEstimation)
182 refFile = testNone(args.refFile)
183 cellularityToUsed = testNone(args.cellularityToUsed)
184 ploidyToUsed = testNone(args.ploidyToUsed)
185 selector = testNone(args.selector)
186 samtoolsOptions = testNone(args.samtoolsOptions)
187 outGalaxyValue = testNone(args.outGalaxy)
188
189 ################Rscrip PATH#################
190 RscriptPath = "Rscript --vanilla "
191 ############################################
192
193 pathFile = os.path.dirname(os.path.realpath(__file__))
194 samtoolsPath = pathFile.replace("copy_number","samtools/")
195 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
196 os.popen("mkdir "+outGalaxyValueDir)
197 os.popen("chmod 777 -R "+outGalaxyValueDir)
198
199 log = open(outGalaxyValueDir+"/logFile.txt", "w")
200
201 log.write(samtoolsOptions)
202
203 if fileFormat=="BAM" :
204 if selector=="cached":
205
206 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
207 proc = subprocess.Popen( args=cmd, shell=True)
208 proc.wait()
209 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
210 os.system("rm -f "+normalFile+".tmp")
211
212 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
213 proc = subprocess.Popen( args=cmd, shell=True)
214 proc.wait()
215 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
216 os.system("rm -f "+tumorFile+".tmp")
217
218 else:
219
220 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
221 proc = subprocess.Popen( args=cmd, shell=True)
222 proc.wait()
223 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
224 os.system("rm -f "+normalFile+".tmp")
225
226 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
227 proc = subprocess.Popen( args=cmd, shell=True)
228 proc.wait()
229 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
230 os.system("rm -f "+tumorFile+".tmp")
231
232 if fileFormat=="pileup" :
233 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz")
234 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz")
235
236 if fileFormat=="pileup_gz" :
237 os.system("cp -f "+normalFile+" "+normalFile+".gz")
238 os.system("cp -f "+tumorFile+" "+tumorFile+".gz")
239
240
241
242
243 #############################################################
244
245 if usePersonalEstimation=="yes":
246
247 cellularityToUsed = float(cellularityToUsed)/100
248 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed)
249 log.write(str(command))
250
251 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
252 Rscrpit.wait()
253
254 else :
255
256 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0"
257 log.write(str(command))
258
259 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
260 Rscrpit.wait()
261
262 #############################################################
263
264 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName)
265
266
267 log.close()
268
269
270
271
272
273
274
275
276
277
278
279