changeset 2:01bb7558d3b3 draft

Uploaded
author jeremie
date Thu, 26 Jun 2014 06:10:48 -0400
parents d7254319236b
children b129309b3cb9
files breakdancer.py
diffstat 1 files changed, 131 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/breakdancer.py	Thu Jun 26 06:10:48 2014 -0400
@@ -0,0 +1,131 @@
+#!/usr/bin/python
+
+import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time
+
+
+parser = argparse.ArgumentParser(description='')
+
+parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' )
+parser.add_argument ( '-o1', dest='output_raw', help='the output file' )
+parser.add_argument ( '-o2', dest='output_vcf', help='the output file' )
+
+
+tmp_dir = tempfile.mkdtemp()
+errorFile = tmp_dir+"/errorLog"
+
+bam2cfg_path = "/home/jeremie/Tools/breakdancer-1.1.2/perl/bam2cfg.pl"
+breakdancer_path = "/usr/bin/breakdancer-max"
+breakdancer2vcf_path = "/home/jeremie/Tools/breakdancer-1.1.2/breakdancer2vcf.py"
+
+
+def bam2cfg(args):
+	config = tmp_dir+"/breakdancer_config"
+	cmd = 'perl %s %s' % (bam2cfg_path, args.input_bam)
+	execute(cmd, output=config)
+	print ("\ncmd = %s  \n" ) %(cmd)
+	return config
+
+
+def breakdancer(args, config):
+	cmd = '%s %s' % (breakdancer_path, config)
+	execute(cmd, output=args.output_raw)
+
+
+def breakdancer2vcf(args):
+	cmd = "python %s -i %s -o %s" % ( breakdancer2vcf_path, args.output_raw, args.output_vcf )
+	execute(cmd)
+
+
+def execute( cmd, output="" ):
+	try: 
+		err = open(tmp_dir+"/errorLog", 'a')
+		if output != "":
+			out = open(output, 'w')
+		else:
+			out = subprocess.PIPE
+		process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
+		process.wait()
+		err.close()
+		if out != subprocess.PIPE:
+			out.close()
+	except Exception, e:
+		sys.stderr.write("problem doing : %s\n" %(cmd))
+		sys.stderr.write( '%s\n\n' % str(e) )
+
+
+def check( output ):
+	if (os.path.getsize(output)>0):
+		return True
+	else:
+		sys.stderr.write('The output file is empty : %s\n' % (output))
+
+
+def getLine(file):
+	try:
+		f = open(file, 'r')
+		lignes = f.readlines()
+		n=0
+		for ligne in lignes:
+			if ligne.strip()[0]!="#":
+				n+=1
+		# n = len(lignes)
+		f.close()
+	except Exception, e:
+		sys.stderr.write( '%s\n' % str(e) )
+	return n
+
+
+def compare( output1, output2 ):
+	# compare le nombre de ligne entre 2 fichiers
+	# pour verifier qu'aucune ligne n'a ete saute
+	num_raw = getLine(output1)
+	num_vcf = getLine(output2)
+	if (num_raw==num_vcf):
+		return True
+	else:
+		sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))
+
+
+def __main__():
+
+	time0 = time.time()
+	args = parser.parse_args()
+
+	try:
+		config = bam2cfg(args)
+		breakdancer(args, config)
+		breakdancer2vcf(args)
+
+		#	quelques tests
+		# verifier que les fichiers de sorties ne sont pas vides
+		check(args.output_raw)
+		check(args.output_vcf)
+		# comparer le nombre de ligne entre les 2 fichiers
+		# pour etre sur que toute les variations ont ete prises en compte
+		compare(args.output_raw, args.output_vcf)
+			
+		sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0)))
+
+		if ( os.path.getsize(errorFile)>0 ):
+			sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) )
+			try:
+				err = open(errorFile, 'r')
+				errors = err.read()
+				err.close()
+				sys.stdout.write( "Errors :\n%s" %(errors))
+			except Exception, e:
+				sys.stderr.write( '%s\n' % str(e) )
+		else:
+			sys.stdout.write( 'BreakDancer successful' )
+
+	except Exception, e:
+		sys.stdout.write( 'BreakDancer fail\n' )
+		sys.stderr.write( '%s\n' % str(e) )
+		sys.exit()
+
+	finally:
+		if os.path.exists( tmp_dir ):
+			shutil.rmtree( tmp_dir )
+
+if __name__=="__main__":
+	__main__()