view sequenza_wrapper.py @ 11:3b7cb5a0ebf8 draft

Uploaded
author jbrayet
date Tue, 18 Aug 2015 09:05:33 -0400
parents aff64bdd2a36
children
line wrap: on
line source

#! /usr/bin/python
# -*- coding: utf8 -*-
"""#Sequenza Galaxy - developed by Jocelyn Brayet <jocelyn.brayet@curie.fr>
#Copyright (C) 2015  Institut Curie
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
###########################################################'
#
#usage: sequenza_wrapper.py [-h] -normal <NORMAL_FILE> -tumor <TUMOR_FILE>
#                           -name <SAMPLE_NAME> -gcContent <GC_FILE> -format
#                           <FILE_FORMAT> -estimation <ESTIMATION>
#                           [-ref_file <REF_FILE>] [-cellularity <CELLULARITY>]
#                           [-ploidy <PLOIDY>] [-selector_index <SELECTOR>]
#                           [-samtools_options <SAMTOOLS_OPTIONS>] -outGalaxy
#                           <OUT_GALAXY>
#
#Run Sequenza with Galaxy.
#
#optional arguments:
#  -h, --help            show this help message and exit
#  -normal <NORMAL_FILE>, --normalFile <NORMAL_FILE>
#                        Normal input file (BAM, pileup, pileup.gz).
#  -tumor <TUMOR_FILE>, --tumorFile <TUMOR_FILE>
#                        Tumor input file (BAM, pileup, pileup.gz).
#  -name <SAMPLE_NAME>, --sampleName <SAMPLE_NAME>
#                        Sample Name.
#  -gcContent <GC_FILE>, --GCfile <GC_FILE>
#                        GC content file (txt).
#  -format <FILE_FORMAT>, --fileFormat <FILE_FORMAT>
#                        Files format.
#  -estimation <ESTIMATION>, --usePersonalEstimation <ESTIMATION>
#                        To use sequenza estimation or not.
#  -ref_file <REF_FILE>, --refFile <REF_FILE>
#                        Index file to samtools mpileup.
#  -cellularity <CELLULARITY>, --cellularityToUsed <CELLULARITY>
#                        If estimation = no, cellularity used.
#  -ploidy <PLOIDY>, --ploidyToUsed <PLOIDY>
#                        If estimation = no, plody used.
#  -selector_index <SELECTOR>, --selector <SELECTOR>
#                        Source for the reference list.
#  -samtools_options <SAMTOOLS_OPTIONS>, --samtoolsOptions <SAMTOOLS_OPTIONS>
#                        Samtools options (-A, -B, -d, -q and -Q).
#  -outGalaxy <OUT_GALAXY>, --outGalaxy <OUT_GALAXY>
#
#Version 1.2 - 05/08/2015 - Adapted from Jocelyn Brayet, France Genomique team
#
###########################################################"""
__author__ =  'Jocelyn Brayet'

###########################################################'
## Import

import argparse
import glob
import os
import signal
import subprocess

## Tested with python 2.6.6
sequenzaVersion = '1.2 - 05/08/2015'

################################ functions ############################################################
## Define a function to test arguments

def testNone(argument):
	"""
	Test if argument is None or not.
		argument -> argument gived by user (XML file)
	"""

	if not argument is None:
		variable = argument[0]
	else:
		variable = ""
	return variable

def subprocess_setup():
	""" 
	Python installs a SIGPIPE handler by default. This is usually not what non-Python subprocesses expect.
	"""

	signal.signal(signal.SIGPIPE, signal.SIG_DFL)

def createHTML(resultFile,resultPath,sample):
	"""
	Building a result HTML displayed in Galaxy.
		resultFile -> HTML file
		resultPath -> Galaxy result path
		sample -> sample name
	"""

	resultHTML=open(resultFile,"w")
	resultHTML.write("<html>\n<head>\n</head>\n<body>\n")
	resultHTML.write("<h1><a target='_top' href='http://cran.r-project.org/web/packages/sequenza/index.html'>Sequenza</a> - results</h1>\n")
	resultHTML.write("<p align='center'>\n<table width=70% border=1>\n<font size='30pt'><tr>\n<th colspan=3>Additional files</th>\n</tr>\n")
	resultHTML.write("<tr>\n<td><a href='out.seqz.gz'>Out Sequenza</a><br><a href='out.small.seqz.gz'>Out small Sequenza</a></td>\n<td><a href='"+sample+"_sequenza_cp_table.RData'>Cellularity and ploidy matrix (RData)</a><br><a href='"+sample+"_sequenza_extract.RData'>Sequenza extract (RData)</a></td><td><a href='logFile.txt'>Log file</a></td>\n</tr>\n")
	resultHTML.write("</font>\n</table>\n</p>\n")
	resultHTML.write("<h2>Genome-wide view of the allele and copy number state</h2>\n")
	resultHTML.write("<p align='center'><embed src='"+sample+"_analyse.pdf' width='800px' height='590px'></p>\n")
	resultHTML.write("<p align='center'>Genome-wide allele-specific copy number profile obtained from exome sequencing (top), genome-wide absolute copy number profile obtained from exome sequencing (middle) and depth ratio (bottom).</p>\n")

	fileList=glob.glob(resultPath+"/segments_*.txt")
	alternativeSolutionsList=""

	for segmentsFile in fileList:
		if not "segments_1.txt" in segmentsFile:
			alternativeSolutionsList = alternativeSolutionsList+" "+segmentsFile

	os.system("mkdir "+resultPath+"/segments")
	os.system("mv "+alternativeSolutionsList+" "+resultPath+"/segments")
	cmdLine="cd "+resultPath+"; tar czf "+sample+"_segments.tar.gz segments"
	os.system(cmdLine)

	resultHTML.write("<p align='center'>File (best solution or user solution): <a href='"+sample+"_segments.txt'>Segments</a></p>\n")
	resultHTML.write("<p align='center'>File (alternative solutions): <a href='"+sample+"_segments.tar.gz'>Segments</a></p>\n")
	resultHTML.write("<p align='center'>File : <a href='"+sample+"_mutations.txt'>Mutations</a></p>\n")
	resultHTML.write("<h2>Confidence intervals, confidence region and point estimate</h2>\n")
	resultHTML.write("<p align='center'><embed src='"+sample+"_CP_contours.pdf' width='580px' height='580px'></p>\n")
	resultHTML.write("<p align='center'>Result from the inference over the defined range of cellularity and ploidy. Color intensity indicates the log posterior probability of corresponding cellularity/ploidy values.</p>\n")
	resultHTML.write("<p align='center'><embed src='"+sample+"_CN_bars.pdf' width='580px' height='580px'></p>\n")
	resultHTML.write("<p align='center'><embed src='"+sample+"_model_fit.pdf' width='580px' height='580px'></p>\n")
	resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the best cellularity and ploidy.</p>\n")
	resultHTML.write("<p align='center'>File : <a href='"+sample+"_cellularity_ploidy.txt'>Cellularity and ploidy</a></p>\n")
	resultHTML.write("<p align='center'><embed src='"+sample+"_alternative_fit.pdf' width='580px' height='580px'></p>\n")
	resultHTML.write("<p align='center'>Observed depth ratio and BAF values for each genomic segment (black circles and dots) along with the representative joint LPP density (colors). The representative joint LPP density is calculated for the alternative cellularity and ploidy (scroll).</p>\n")        
	resultHTML.write("<p align='center'>File : <a href='"+sample+"_alternative_solutions.txt'>Alternative solutions</a></p>\n")
	resultHTML.write("<h2>Normalization of depth ratio</h2>\n")
	resultHTML.write("<p align='center'><a href='"+sample+"_gc_stat.png'><img border='1' width='680px height='300px src='"+sample+"_gc_stat.png'></a></p>\n")
	resultHTML.write("<p align='center'>Visualization of depth.ratio bias in relation of GC content (left), and resulting normalization effect (right)</p>\n")
	resultHTML.write("<h2>Plot chromosome view with mutations, BAF, depth ratio and segments (best solution or user solution)</h2>\n")
	resultHTML.write("<p align='center'><embed src='"+sample+"_chromosome_view.pdf' width='680px' height='680px'></p>\n")
	resultHTML.write("<p align='center'>Plots of mutant allele frequency (top), B allele frequency (middle) and depth ratio (bottom) vs. chromosome position (scroll).</p>\n")
	resultHTML.write("</body>\n</html>")
	resultHTML.close()

if __name__ == '__main__':

	########### sequenza arguments ####################
	parser = argparse.ArgumentParser(description='Run Sequenza with Galaxy.', epilog='Version '+sequenzaVersion)
	
	parser.add_argument('-normal', '--normalFile', metavar='<NORMAL_FILE>', type=str, nargs=1, help='Normal input file (BAM, pileup, pileup.gz).', required=True)
	parser.add_argument('-tumor', '--tumorFile', metavar='<TUMOR_FILE>', type=str, nargs=1, help='Tumor input file (BAM, pileup, pileup.gz).', required=True)
	parser.add_argument('-name', '--sampleName', metavar='<SAMPLE_NAME>', type=str, nargs=1, help='Sample Name.', required=True)
	parser.add_argument('-gcContent', '--GCfile', metavar='<GC_FILE>', type=str, nargs=1, help='GC content file (txt).', required=True)
	parser.add_argument('-format', '--fileFormat', metavar='<FILE_FORMAT>', type=str, nargs=1, help='Files format.', required=True)
	parser.add_argument('-estimation', '--usePersonalEstimation', metavar='<ESTIMATION>', type=str, nargs=1, help='To use sequenza estimation or not.', required=True)
	parser.add_argument('-ref_file', '--refFile', metavar='<REF_FILE>', type=str, nargs=1, help='Index file to samtools mpileup.', required=False)
	parser.add_argument('-cellularity', '--cellularityToUsed', metavar='<CELLULARITY>', type=int, nargs=1, help='If estimation = no, cellularity used.', required=False)
	parser.add_argument('-ploidy', '--ploidyToUsed', metavar='<PLOIDY>', type=int, nargs=1, help='If estimation = no, plody used.', required=False)
	parser.add_argument('-selector_index', '--selector', metavar='<SELECTOR>', type=str, nargs=1, help='Source for the reference list.', required=False)
	parser.add_argument('-samtools_options', '--samtoolsOptions', metavar='<SAMTOOLS_OPTIONS>', type=str, nargs=1, help='Samtools options (-A, -B, -d, -q and -Q).', required=False)

	################################ galaxy arguments ############################################################
	parser.add_argument('-outGalaxy', '--outGalaxy', metavar='<OUT_GALAXY>', type=str, nargs=1, required=True)
	###########################################################'

	args = parser.parse_args()

	normalFile = testNone(args.normalFile)
	tumorFile = testNone(args.tumorFile)
	sampleName = testNone(args.sampleName)
	gcContent = testNone(args.GCfile)
	fileFormat = testNone(args.fileFormat)
	usePersonalEstimation = testNone(args.usePersonalEstimation)
	refFile = testNone(args.refFile)
	cellularityToUsed = testNone(args.cellularityToUsed)
	ploidyToUsed = testNone(args.ploidyToUsed)
	selector = testNone(args.selector)
	samtoolsOptions = testNone(args.samtoolsOptions)
	outGalaxyValue = testNone(args.outGalaxy)

	################Rscrip PATH#################
	RscriptPath = "Rscript --vanilla "
	############################################

	pathFile = os.path.dirname(os.path.realpath(__file__))
	samtoolsPath = pathFile.replace("copy_number","samtools/")
        outGalaxyValueDir = outGalaxyValue.replace(".dat","_files")
	os.popen("mkdir "+outGalaxyValueDir)
	os.popen("chmod 777 -R "+outGalaxyValueDir)

	log = open(outGalaxyValueDir+"/logFile.txt", "w")

	log.write(samtoolsOptions)

	if fileFormat=="BAM" :
		if selector=="cached":
			
			cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
			proc = subprocess.Popen( args=cmd, shell=True)
        		proc.wait()
			os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
			os.system("rm -f "+normalFile+".tmp")

			cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -p \'-f \""+refFile+"\"\' -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
			proc = subprocess.Popen( args=cmd, shell=True)
        		proc.wait()
			os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
			os.system("rm -f "+tumorFile+".tmp")

		else:

			cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+normalFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+normalFile+".tmp\" \'"
			proc = subprocess.Popen( args=cmd, shell=True)
        		proc.wait()
			os.system("gzip -f -c "+normalFile+".tmp > "+normalFile+".gz")
			os.system("rm -f "+normalFile+".tmp")

			cmd=samtoolsPath+"samtools_wrapper.py -p 'samtools mpileup' -d \'-f \""+refFile+"\"\' \"fa\" \"reference_input\" -d \" \" \""+tumorFile+"\" \"bam\" \"bam_normal\" -p\' "+samtoolsOptions+" > \""+tumorFile+".tmp\" \'"
			proc = subprocess.Popen( args=cmd, shell=True)
        		proc.wait()
			os.system("gzip -f -c "+tumorFile+".tmp > "+tumorFile+".gz")
			os.system("rm -f "+tumorFile+".tmp")

	if fileFormat=="pileup" :
		os.system("gzip -f -c "+normalFile+" > "+ normalFile+".gz")
		os.system("gzip -f -c "+tumorFile+" > "+tumorFile+".gz")

	if fileFormat=="pileup_gz" :
		os.system("cp -f "+normalFile+" "+normalFile+".gz")
		os.system("cp -f "+tumorFile+" "+tumorFile+".gz")

	


	#############################################################

	if usePersonalEstimation=="yes":

		cellularityToUsed = float(cellularityToUsed)/100
		command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity "+str(cellularityToUsed)+" -ploidy "+str(ploidyToUsed)
		log.write(str(command))

		Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
		Rscrpit.wait()

	else :

		command = RscriptPath+pathFile+"/Sequenza_analysis.R -normal "+normalFile+".gz"+" -tumor "+tumorFile+".gz"+" -out "+outGalaxyValueDir+" -gcContent "+gcContent+" -name "+sampleName+ " -cellularity 0 -ploidy 0"
		log.write(str(command))

		Rscrpit = subprocess.Popen(command, preexec_fn=subprocess_setup, stdout=log, stderr=log, shell=True)
		Rscrpit.wait()

	#############################################################

	createHTML(outGalaxyValue,outGalaxyValueDir,sampleName)


	log.close()