Mercurial > repos > jbrayet > sequenza
comparison sequenza_wrapper.py @ 25:cf8f1324a967 draft
Uploaded
author | jbrayet |
---|---|
date | Wed, 19 Aug 2015 08:12:59 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
24:1f3034e3fb3a | 25:cf8f1324a967 |
---|---|
1 #! /usr/bin/python | |
2 # -*- coding: utf8 -*- | |
3 """#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr> | |
4 #Copyright (C) 2015 Institut Curie | |
5 # | |
6 #This program is free software: you can redistribute it and/or modify | |
7 #it under the terms of the GNU General Public License as published by | |
8 #the Free Software Foundation, either version 3 of the License, or | |
9 #(at your option) any later version. | |
10 # | |
11 #This program is distributed in the hope that it will be useful, | |
12 #but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 #GNU General Public License for more details. | |
15 # | |
16 #You should have received a copy of the GNU General Public License | |
17 #along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 # | |
19 ###########################################################' | |
20 # | |
21 #usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE> | |
22 # -name <SAMPLE_NAME> -gcContent <GC_FILE> -format | |
23 # <FILE_FORMAT> -estimation <ESTIMATION> | |
24 # [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>] | |
25 # [-ploidy <PLOIDY>] [-selector_index <SELECTOR>] | |
26 # [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy | |
27 # <OUT_GALAXY> | |
28 # | |
29 #Run Sequenza with Galaxy. | |
30 # | |
31 #optional arguments: | |
32 # -h, --help show this help message and exit | |
33 # -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE> | |
34 # Normal input file (BAM, pileup, pileup.gz). | |
35 # -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE> | |
36 # Tumor input file (BAM, pileup, pileup.gz). | |
37 # -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME> | |
38 # Sample Name. | |
39 # -gcContent <GC_FILE>, --GCfile <GC_FILE> | |
40 # GC content file (txt). | |
41 # -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT> | |
42 # Files format. | |
43 # -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION> | |
44 # To use sequenza estimation or not. | |
45 # -ref_file <REF_FILE>, --refFile <REF_FILE> | |
46 # Index file to samtools mpileup. | |
47 # -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY> | |
48 # If estimation = no, cellularity used. | |
49 # -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY> | |
50 # If estimation = no, plody used. | |
51 # -selector_index <SELECTOR>, --selector <SELECTOR> | |
52 # Source for the reference list. | |
53 # -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS> | |
54 # Samtools options (-A, -B, -d, -q and -Q). | |
55 # -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY> | |
56 # | |
57 #Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team | |
58 # | |
59 ###########################################################""" | |
60 __author__ = 'Jocelyn Brayet' | |
61 | |
62 ###########################################################' | |
63 ## Import | |
64 | |
65 import argparse | |
66 import glob | |
67 import os | |
68 import signal | |
69 import subprocess | |
70 | |
71 ## Tested with python 2.6.6 | |
72 sequenzaVersion = '1.2 - 05/08/2015' | |
73 | |
74 ################################ functions ############################################################ | |
75 ## Define a function to test arguments | |
76 | |
77 def testNone(argument): | |
78 """ | |
79 Test if argument is None or not. | |
80 argument -> argument gived by user (XML file) | |
81 """ | |
82 | |
83 if not argument is None: | |
84 variable = argument[0] | |
85 else: | |
86 variable = "" | |
87 return variable | |
88 | |
89 def subprocess_setup(): | |
90 """ | |
91 Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect. | |
92 """ | |
93 | |
94 signal.signal(signal.SIGPIPE, signal.SIG_DFL) | |
95 | |
96 def createHTML(resultFile,resultPath,sample): | |
97 """ | |
98 Building a result HTML displayed in Galaxy. | |
99 resultFile -> HTML file | |
100 resultPath -> Galaxy result path | |
101 sample -> sample name | |
102 """ | |
103 | |
104 resultHTML=open(resultFile,"w") | |
105 | |
106 cmdLine="tar czf all_files.tar.gz "+resultPath | |
107 | |
108 resultHTML.write("<html>\n<head>\n</head>\n<body>\n") | |
109 resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n") | |
110 resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n") | |
111 resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a><br><a href='all_files.tar.gz'>All files</a></td>\n</tr>\n") | |
112 resultHTML.write("</font>\n</table>\n</p>\n") | |
113 resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n") | |
114 resultHTML.write("<p align='center'><embed src='"+resultPath+sample+"_analyse.pdf' width='800px' height='590px'></p>\n") | |
115 resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n") | |
116 | |
117 fileList=glob.glob(resultPath+"/segments_*.txt") | |
118 alternativeSolutionsList="" | |
119 | |
120 for segmentsFile in fileList: | |
121 if not "segments_1.txt" in segmentsFile: | |
122 alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile | |
123 | |
124 os.system("mkdir "+resultPath+"/segments") | |
125 os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments") | |
126 cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments" | |
127 os.system(cmdLine) | |
128 | |
129 resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n") | |
130 resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n") | |
131 resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n") | |
132 resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n") | |
133 resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n") | |
134 resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n") | |
135 resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n") | |
136 resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n") | |
137 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n") | |
138 resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n") | |
139 resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n") | |
140 resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n") | |
141 resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n") | |
142 resultHTML.write("<h2>Normalization of depth ratio</h2>\n") | |
143 resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px' height='300px' src='"+sample+"_gc_stat.png'></a></p>\n") | |
144 resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n") | |
145 resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n") | |
146 resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n") | |
147 resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n") | |
148 resultHTML.write("</body>\n</html>") | |
149 resultHTML.close() | |
150 | |
151 if __name__ == '__main__': | |
152 | |
153 ########### sequenza arguments #################### | |
154 parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion) | |
155 | |
156 parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True) | |
157 parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True) | |
158 parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True) | |
159 parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True) | |
160 parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True) | |
161 parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True) | |
162 parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False) | |
163 parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False) | |
164 parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False) | |
165 parser.add_argument('-selector_index', '--selector', metavar='<SELECTOR>', type=str, nargs=1, help='Source for the reference list.', required=False) | |
166 parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False) | |
167 | |
168 ################################ galaxy arguments ############################################################ | |
169 parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True) | |
170 ###########################################################' | |
171 | |
172 args = parser.parse_args() | |
173 | |
174 normalFile = testNone(args.normalFile) | |
175 tumorFile = testNone(args.tumorFile) | |
176 sampleName = testNone(args.sampleName) | |
177 gcContent = testNone(args.GCfile) | |
178 fileFormat = testNone(args.fileFormat) | |
179 usePersonalEstimation = testNone(args.usePersonalEstimation) | |
180 refFile = testNone(args.refFile) | |
181 cellularityToUsed = testNone(args.cellularityToUsed) | |
182 ploidyToUsed = testNone(args.ploidyToUsed) | |
183 selector = testNone(args.selector) | |
184 samtoolsOptions = testNone(args.samtoolsOptions) | |
185 outGalaxyValue = testNone(args.outGalaxy) | |
186 | |
187 ################Rscrip PATH################# | |
188 RscriptPath = "Rscript --vanilla " | |
189 ############################################ | |
190 | |
191 pathFile = os.path.dirname(os.path.realpath(__file__)) | |
192 samtoolsPath = pathFile.replace("copy_number","samtools/") | |
193 outGalaxyValueDir = outGalaxyValue.replace(".dat","_files") | |
194 os.popen("mkdir "+outGalaxyValueDir) | |
195 os.popen("chmod 777 -R "+outGalaxyValueDir) | |
196 | |
197 log = open(outGalaxyValueDir+"/logFile.txt", "w") | |
198 | |
199 log.write(samtoolsOptions) | |
200 | |
201 if fileFormat=="BAM" : | |
202 if selector=="cached": | |
203 | |
204 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'" | |
205 proc = subprocess.Popen( args=cmd, shell=True) | |
206 proc.wait() | |
207 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz") | |
208 os.system("rm -f "+normalFile+".tmp") | |
209 | |
210 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'" | |
211 proc = subprocess.Popen( args=cmd, shell=True) | |
212 proc.wait() | |
213 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz") | |
214 os.system("rm -f "+tumorFile+".tmp") | |
215 | |
216 else: | |
217 | |
218 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'" | |
219 proc = subprocess.Popen( args=cmd, shell=True) | |
220 proc.wait() | |
221 os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz") | |
222 os.system("rm -f "+normalFile+".tmp") | |
223 | |
224 cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'" | |
225 proc = subprocess.Popen( args=cmd, shell=True) | |
226 proc.wait() | |
227 os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz") | |
228 os.system("rm -f "+tumorFile+".tmp") | |
229 | |
230 if fileFormat=="pileup" : | |
231 os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz") | |
232 os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz") | |
233 | |
234 if fileFormat=="pileup_gz" : | |
235 os.system("cp -f "+normalFile+" "+normalFile+".gz") | |
236 os.system("cp -f "+tumorFile+" "+tumorFile+".gz") | |
237 | |
238 | |
239 | |
240 | |
241 ############################################################# | |
242 | |
243 if usePersonalEstimation=="yes": | |
244 | |
245 cellularityToUsed = float(cellularityToUsed)/100 | |
246 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed) | |
247 log.write(str(command)) | |
248 | |
249 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True) | |
250 Rscrpit.wait() | |
251 | |
252 else : | |
253 | |
254 command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0" | |
255 log.write(str(command)) | |
256 | |
257 Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True) | |
258 Rscrpit.wait() | |
259 | |
260 ############################################################# | |
261 | |
262 createHTML(outGalaxyValue,outGalaxyValueDir,sampleName) | |
263 | |
264 | |
265 log.close() | |
266 | |
267 | |
268 | |
269 | |
270 | |
271 | |
272 | |
273 | |
274 | |
275 | |
276 | |
277 |