Mercurial > repos > jbrayet > sequenza
comparison sequenza_wrapper.py @ 27:e61ad7276f18 draft
Uploaded
author | jbrayet |
---|---|
date | Wed, 19 Aug 2015 09:09:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
26:20e6f88018f7 | 27:e61ad7276f18 |
---|---|
1 #! /usr/bin/python | |
2 # -*- coding: utf8 -*- | |
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr> | |
4 #Copyright (C) 2015 Institut Curie | |
5 # | |
6 #This program is free software: you can redistribute it and/or modify | |
7 #it under the terms of the GNU General Public License as published by | |
8 #the Free Software Foundation, either version 3 of the License, or | |
9 #(at your option) any later version. | |
10 # | |
11 #This program is distributed in the hope that it will be useful, | |
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 #GNU General Public License for more details. | |
15 # | |
16 #You should have received a copy of the GNU General Public License | |
17 #along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 # | |
19 ###########################################################' | |
20 # | |
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE> | |
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format | |
23 # <FILE_FORMAT> -estimation <ESTIMATION> | |
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>] | |
25 # [-ploidy <PLOIDY>] [-selector_index <SELECTOR>] | |
26 # [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy | |
27 # <OUT_GALAXY> | |
28 # | |
29 #Run Sequenza with Galaxy. | |
30 # | |
31 #optional arguments: | |
32 # -h, --help show this help message and exit | |
33 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE> | |
34 # Normal input file (BAM, pileup, pileup.gz). | |
35 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE> | |
36 # Tumor input file (BAM, pileup, pileup.gz). | |
37 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME> | |
38 # Sample Name. | |
39 # -gcContent <GC_FILE>, --GCfile <GC_FILE> | |
40 # GC content file (txt). | |
41 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT> | |
42 # Files format. | |
43 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION> | |
44 # To use sequenza estimation or not. | |
45 # -ref_file <REF_FILE>, --refFile <REF_FILE> | |
46 # Index file to samtools mpileup. | |
47 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY> | |
48 # If estimation = no, cellularity used. | |
49 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY> | |
50 # If estimation = no, plody used. | |
51 # -selector_index <SELECTOR>, --selector <SELECTOR> | |
52 # Source for the reference list. | |
53 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS> | |
54 # Samtools options (-A, -B, -d, -q and -Q). | |
55 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY> | |
56 # | |
57 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team | |
58 # | |
59 ###########################################################""" | |
60 __author__ = 'Jocelyn Brayet' | |
61 | |
62 ###########################################################' | |
63 ## Import | |
64 | |
65 import argparse | |
66 import glob | |
67 import os | |
68 import signal | |
69 import subprocess | |
70 | |
71 ## Tested with python 2.6.6 | |
72 sequenzaVersion = '1.2 - 05/08/2015' | |
73 | |
74 ################################ functions ############################################################ | |
75 ## Define a function to test arguments | |
76 | |
77 def testNone(argument): | |
78 """ | |
79 Test if argument is None or not. | |
80 argument -> argument gived by user (XML file) | |
81 """ | |
82 | |
83 if not argument is None: | |
84 variable = argument[0] | |
85 else: | |
86 variable = "" | |
87 return variable | |
88 | |
89 def subprocess_setup(): | |
90 """ | |
91 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect. | |
92 """ | |
93 | |
94 signal.signal(signal.SIGPIPE, signal.SIG_DFL) | |
95 | |
96 def createHTML(resultFile,resultPath,sample): | |
97 """ | |
98 Building a result HTML displayed in Galaxy. | |
99 resultFile -> HTML file | |
100 resultPath -> Galaxy result path | |
101 sample -> sample name | |
102 """ | |
103 | |
104 resultHTML=open(resultFile,"w") | |
105 | |
106 cmdLine="tar czf all_files.tar.gz "+resultPath | |
107 os.system(cmdLine) | |
108 | |
109 resultHTML.write("<html>\n<head>\n</head>\n<body>\n") | |
110 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n") | |
111 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n") | |
112 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a><br><a href='all_files.tar.gz'>All files</a></td>\n</tr>\n") | |
113 resultHTML.write("</font>\n</table>\n</p>\n") | |
114 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n") | |
115 resultHTML.write("<p align='center'><embed src='"+sample+"_analyse.pdf' width='800px' height='590px'></p>\n") | |
116 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n") | |
117 | |
118 fileList=glob.glob(resultPath+"/segments_*.txt") | |
119 alternativeSolutionsList="" | |
120 | |
121 for segmentsFile in fileList: | |
122 if not "segments_1.txt" in segmentsFile: | |
123 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile | |
124 | |
125 os.system("mkdir "+resultPath+"/segments") | |
126 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments") | |
127 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments" | |
128 os.system(cmdLine) | |
129 | |
130 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n") | |
131 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n") | |
132 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n") | |
133 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n") | |
134 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n") | |
135 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n") | |
136 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n") | |
137 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n") | |
138 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n") | |
139 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n") | |
140 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n") | |
141 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n") | |
142 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n") | |
143 resultHTML.write("<h2>Normalization of depth ratio</h2>\n") | |
144 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px' height='300px' src='"+sample+"_gc_stat.png'></a></p>\n") | |
145 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n") | |
146 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n") | |
147 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n") | |
148 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n") | |
149 resultHTML.write("</body>\n</html>") | |
150 resultHTML.close() | |
151 | |
152 if __name__ == '__main__': | |
153 | |
154 ########### sequenza arguments #################### | |
155 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion) | |
156 | |
157 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True) | |
158 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True) | |
159 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True) | |
160 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True) | |
161 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True) | |
162 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True) | |
163 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False) | |
164 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False) | |
165 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False) | |
166 parser.add_argument('-selector_index', '--selector', metavar='<SELECTOR>', type=str, nargs=1, help='Source for the reference list.', required=False) | |
167 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False) | |
168 | |
169 ################################ galaxy arguments ############################################################ | |
170 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True) | |
171 ###########################################################' | |
172 | |
173 args = parser.parse_args() | |
174 | |
175 normalFile = testNone(args.normalFile) | |
176 tumorFile = testNone(args.tumorFile) | |
177 sampleName = testNone(args.sampleName) | |
178 gcContent = testNone(args.GCfile) | |
179 fileFormat = testNone(args.fileFormat) | |
180 usePersonalEstimation = testNone(args.usePersonalEstimation) | |
181 refFile = testNone(args.refFile) | |
182 cellularityToUsed = testNone(args.cellularityToUsed) | |
183 ploidyToUsed = testNone(args.ploidyToUsed) | |
184 selector = testNone(args.selector) | |
185 samtoolsOptions = testNone(args.samtoolsOptions) | |
186 outGalaxyValue = testNone(args.outGalaxy) | |
187 | |
188 ################Rscrip PATH################# | |
189 RscriptPath = "Rscript --vanilla " | |
190 ############################################ | |
191 | |
192 pathFile = os.path.dirname(os.path.realpath(__file__)) | |
193 samtoolsPath = pathFile.replace("copy_number","samtools/") | |
194 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files") | |
195 os.popen("mkdir "+outGalaxyValueDir) | |
196 os.popen("chmod 777 -R "+outGalaxyValueDir) | |
197 | |
198 log = open(outGalaxyValueDir+"/logFile.txt", "w") | |
199 | |
200 log.write(samtoolsOptions) | |
201 | |
202 if fileFormat=="BAM" : | |
203 if selector=="cached": | |
204 | |
205 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'" | |
206 proc = subprocess.Popen( args=cmd, shell=True) | |
207 proc.wait() | |
208 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz") | |
209 os.system("rm -f "+normalFile+".tmp") | |
210 | |
211 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'" | |
212 proc = subprocess.Popen( args=cmd, shell=True) | |
213 proc.wait() | |
214 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz") | |
215 os.system("rm -f "+tumorFile+".tmp") | |
216 | |
217 else: | |
218 | |
219 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'" | |
220 proc = subprocess.Popen( args=cmd, shell=True) | |
221 proc.wait() | |
222 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz") | |
223 os.system("rm -f "+normalFile+".tmp") | |
224 | |
225 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'" | |
226 proc = subprocess.Popen( args=cmd, shell=True) | |
227 proc.wait() | |
228 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz") | |
229 os.system("rm -f "+tumorFile+".tmp") | |
230 | |
231 if fileFormat=="pileup" : | |
232 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz") | |
233 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz") | |
234 | |
235 if fileFormat=="pileup_gz" : | |
236 os.system("cp -f "+normalFile+" "+normalFile+".gz") | |
237 os.system("cp -f "+tumorFile+" "+tumorFile+".gz") | |
238 | |
239 | |
240 | |
241 | |
242 ############################################################# | |
243 | |
244 if usePersonalEstimation=="yes": | |
245 | |
246 cellularityToUsed = float(cellularityToUsed)/100 | |
247 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed) | |
248 log.write(str(command)) | |
249 | |
250 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True) | |
251 Rscrpit.wait() | |
252 | |
253 else : | |
254 | |
255 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0" | |
256 log.write(str(command)) | |
257 | |
258 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True) | |
259 Rscrpit.wait() | |
260 | |
261 ############################################################# | |
262 | |
263 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName) | |
264 | |
265 | |
266 log.close() | |
267 | |
268 | |
269 | |
270 | |
271 | |
272 | |
273 | |
274 | |
275 | |
276 | |
277 | |
278 |