view breakdancer.py @ 25:d05d0c77b021 draft

Uploaded
author jeremie
date Tue, 01 Jul 2014 10:35:15 -0400
parents b4f641515363
children acec6ba9884a
line wrap: on
line source

#!/usr/bin/python

import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time


parser = argparse.ArgumentParser(description='')

parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' )
parser.add_argument ( '-o1', dest='output_raw', help='the output file' )
parser.add_argument ( '-o2', dest='output_vcf', help='the output file' )


tmp_dir = tempfile.mkdtemp()
errorFile = tmp_dir+"/errorLog"

currentDir = os.path.dirname(os.path.abspath(__file__))

binPath = os.environ['BREAKDANCER_BIN']
print(binPath)
print(str(binPath))
bam2cfg = str(binPath)+"/bam2cfg.pl"
breakdancer = binPath+"/breakdancer-max"
breakdancer2vcf = binPath+"/breakdancer2vcf.py"


def bam2cfg(args):
	config = tmp_dir+"/breakdancer_config"
	cmd = 'perl %s %s' % (bam2cfg, args.input_bam)
	execute(cmd, output=config)
	print ("\ncmd = %s  \n" ) %(cmd)
	return config


def breakdancer(args, config):
	cmd = '%s %s' % (breakdancer, config)
	execute(cmd, output=args.output_raw)


def breakdancer2vcf(args):
	cmd = "python %s -i %s -o %s" % ( breakdancer2vcf, args.output_raw, args.output_vcf )
	execute(cmd)


def execute( cmd, output="" ):
	try: 
		err = open(tmp_dir+"/errorLog", 'a')
		if output != "":
			out = open(output, 'w')
		else:
			out = subprocess.PIPE
		process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
		process.wait()
		err.close()
		if out != subprocess.PIPE:
			out.close()
	except Exception, e:
		sys.stderr.write("problem doing : %s\n" %(cmd))
		sys.stderr.write( '%s\n\n' % str(e) )


def check( output ):
	if (os.path.getsize(output)>0):
		return True
	else:
		sys.stderr.write('The output file is empty : %s\n' % (output))


def getLine(file):
	try:
		f = open(file, 'r')
		lignes = f.readlines()
		n=0
		for ligne in lignes:
			if ligne.strip()[0]!="#":
				n+=1
		# n = len(lignes)
		f.close()
	except Exception, e:
		sys.stderr.write( '%s\n' % str(e) )
	return n


def compare( output1, output2 ):
	# compare le nombre de ligne entre 2 fichiers
	# pour verifier qu'aucune ligne n'a ete saute
	num_raw = getLine(output1)
	num_vcf = getLine(output2)
	if (num_raw==num_vcf):
		return True
	else:
		sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))


def __main__():

	time0 = time.time()
	args = parser.parse_args()

	try:
		config = bam2cfg(args)
		breakdancer(args, config)
		breakdancer2vcf(args)

		#	quelques tests
		# verifier que les fichiers de sorties ne sont pas vides
		check(args.output_raw)
		check(args.output_vcf)
		# comparer le nombre de ligne entre les 2 fichiers
		# pour etre sur que toute les variations ont ete prises en compte
		compare(args.output_raw, args.output_vcf)
			
		sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0)))

		if ( os.path.getsize(errorFile)>0 ):
			sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) )
			try:
				err = open(errorFile, 'r')
				errors = err.read()
				err.close()
				sys.stdout.write( "Errors :\n%s" %(errors))
			except Exception, e:
				sys.stderr.write( '%s\n' % str(e) )
		else:
			sys.stdout.write( 'BreakDancer successful' )

	except Exception, e:
		sys.stdout.write( 'BreakDancer fail\n' )
		sys.stderr.write( '%s\n' % str(e) )
		sys.exit()

	finally:
		if os.path.exists( tmp_dir ):
			shutil.rmtree( tmp_dir )

if __name__=="__main__":
	__main__()