changeset 7:c971e3f6de9f draft

Uploaded
author jeremie
date Wed, 02 Jul 2014 09:46:11 -0400
parents 4a5a1765951d
children 7cd61ef578b5
files breakdancer.py
diffstat 1 files changed, 120 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/breakdancer.py	Thu Jun 26 07:26:50 2014 -0400
+++ b/breakdancer.py	Wed Jul 02 09:46:11 2014 -0400
@@ -1,59 +1,89 @@
 #!/usr/bin/python
 
-import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time
-
+import argparse, os, shutil, subprocess, sys, tempfile, shlex, time
 
 parser = argparse.ArgumentParser(description='')
-
-parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' )
-parser.add_argument ( '-o1', dest='output_raw', help='the output file' )
-parser.add_argument ( '-o2', dest='output_vcf', help='the output file' )
+# required
+parser.add_argument('-i1', dest='inputBamFile', required=True, help='the bam input file')
+parser.add_argument('-o1', dest='outputRawFile', required=True, help='the raw output file')
+# optional
+parser.add_argument('-o2', dest='outputVcfFile', required=False, help='the vcf output file')
+parser.add_argument('-o', dest='chromosome', required=False, help='operate on a single chromosome')
+parser.add_argument('-s', dest='minLength', type=int, required=False, help='minimum length of a region', default=7)
+parser.add_argument('-c', dest='cutoff', type=int, required=False, help='cutoff in unit of standard deviation', default=3)
+parser.add_argument('-m', dest='maxSvSize', type=int, required=False, help='maximum SV size', default=1000000000)
+parser.add_argument('-q', dest='minMapQuality', type=int, required=False, help='minimum alternative mapping quality', default=35)
+parser.add_argument('-r', dest='minReadDepth', type=int, required=False, help='minimum number of read pairs required to establish a connection', default=2)
+parser.add_argument('-x', dest='maxHaploidCov', type=int, required=False, help='maximum threshold of haploid sequence coverage for regions to be ignored', default=1000)
+parser.add_argument('-b', dest='bufferSize', type=int, required=False, help='buffer size for building connection', default=100)
+parser.add_argument('-t', dest='onlyTrans', action='store_true', help='only detect transchromosomal rearrangement', default=False)
+parser.add_argument('-d', dest='prefix', required=False, help='prefix of fastq files that SV supporting reads will be saved by library')
+parser.add_argument('-g', dest='bedFormat', required=False, help='dump SVs and supporting reads in BED format for GBrowse')
+parser.add_argument('-l', dest='matePair', required=False, help='analyze Illumina long insert (mate-pair) library')
+# parser.add_argument('-a', dest='sortByLibrary', action='store_true', help='print out copy number and support reads per library rather than per bam', default=False)
+# parser.add_argument('-h', dest='', action='store_true', help='print out Allele Frequency column', default=False)
+parser.add_argument('-y', dest='scoreFilter', type=int, required=False, help='output score filter', default=30)
 
 
-tmp_dir = tempfile.mkdtemp()
-errorFile = tmp_dir+"/errorLog"
+
+binPath = os.environ['BREAKDANCER_BIN']
 
-bam2cfg_path = "/home/jeremie/Tools/breakdancer-1.1.2/perl/bam2cfg.pl"
-breakdancer_path = "/usr/bin/breakdancer-max"
-breakdancer2vcf_path = "/home/jeremie/Tools/breakdancer-1.1.2/breakdancer2vcf.py"
+bam2cfgPath = binPath+"/bam2cfg.pl"
+breakdancer2vcfPath = binPath+"/breakdancer2vcf.py"
 
 
-def bam2cfg(args):
-	config = tmp_dir+"/breakdancer_config"
-	cmd = 'perl %s %s' % (bam2cfg_path, args.input_bam)
-	execute(cmd, output=config)
-	print ("\ncmd = %s  \n" ) %(cmd)
-	return config
+# def bam2cfg(args, tempDir):
+# 	config = tempDir+"/breakdancer_config"
+# 	cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile)
+# 	execute(cmd, output=config)
+# 	return config
 
 
-def breakdancer(args, config):
-	cmd = '%s %s' % (breakdancer_path, config)
-	execute(cmd, output=args.output_raw)
-
-
-def breakdancer2vcf(args):
-	cmd = "python %s -i %s -o %s" % ( breakdancer2vcf_path, args.output_raw, args.output_vcf )
-	execute(cmd)
+# def breakdancer(args, config):
+# 	cmd = 'breakdancer-max %s' % (config)
+# 	execute(cmd, output=args.outputRawFile)
 
 
-def execute( cmd, output="" ):
+# def breakdancer2vcf(args):
+# 	cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile)
+# 	execute(cmd)
+
+def execute(cmd, output=None):
+	# function to execute a cmd and report if an error occur
+	# print(cmd)
 	try: 
-		err = open(tmp_dir+"/errorLog", 'a')
-		if output != "":
-			out = open(output, 'w')
-		else:
-			out = subprocess.PIPE
-		process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
+		process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 		process.wait()
-		err.close()
-		if out != subprocess.PIPE:
-			out.close()
-	except Exception, e:
-		sys.stderr.write("problem doing : %s\n" %(cmd))
-		sys.stderr.write( '%s\n\n' % str(e) )
+		stdout,stderr = process.communicate()
+	except Exception, e: # une erreur de ma commande : stderr
+		sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e))
+		return
+	if output:
+		output = open(output, 'w')
+		output.write(stdout)
+		output.close()
+	if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes)
+		sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr))
 
 
-def check( output ):
+# def execute(cmd, output=""):
+# 	try: 
+# 		err = open(tempDir+"/errorLog", 'a')
+# 		if output != "":
+# 			out = open(output, 'w')
+# 		else:
+# 			out = subprocess.PIPE
+# 		process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err)
+# 		process.wait()
+# 		err.close()
+# 		if out != subprocess.PIPE:
+# 			out.close()
+# 	except Exception, e:
+# 		sys.stderr.write("problem doing : %s\n" %(cmd))
+# 		sys.stderr.write('%s\n\n' % str(e))
+
+
+def check(output):
 	if (os.path.getsize(output)>0):
 		return True
 	else:
@@ -71,11 +101,11 @@
 		# n = len(lignes)
 		f.close()
 	except Exception, e:
-		sys.stderr.write( '%s\n' % str(e) )
+		sys.stderr.write('%s\n' % str(e))
 	return n
 
 
-def compare( output1, output2 ):
+def compare(output1, output2):
 	# compare le nombre de ligne entre 2 fichiers
 	# pour verifier qu'aucune ligne n'a ete saute
 	num_raw = getLine(output1)
@@ -85,47 +115,80 @@
 	else:
 		sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))
 
+def getVersion(program):
+	import tempfile, subprocess
+	try:
+		temp = tempfile.NamedTemporaryFile().name
+		tempStdout = open(temp, 'wb')
+		proc = subprocess.Popen(args=program, shell=True, stdout=tempStdout)
+		tempStdout.close()
+		returncode = proc.wait()
+		stdout = None
+		for line in open(tempStdout.name, 'rb'):
+			if line.lower().find('version') >= 0:
+				stdout = line.strip()
+				break
+		if stdout:
+			sys.stdout.write('%s\n' % stdout)
+	except:
+		sys.stdout.write('Could not determine %s version\n' % (program))
 
 def __main__():
 
 	time0 = time.time()
+	tempDir = tempfile.mkdtemp()
 	args = parser.parse_args()
+	getVersion("breakdancer-max")
 
 	try:
-		config = bam2cfg(args)
-		breakdancer(args, config)
-		breakdancer2vcf(args)
+		# config = bam2cfg(args, tempDir)
+		# breakdancer(args, config)
+		# breakdancer2vcf(args)
+
+		# bam2cfg
+		config = tempDir+"/breakdancer_config"
+		cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile)
+		execute(cmd, output=config)
+
+		# breakdancer
+		cmd = 'breakdancer-max %s' % (config)
+		execute(cmd, output=args.outputRawFile)
+
+		# breakdancer2vcf
+		if args.outputVcfFile:
+			cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile)
+			execute(cmd)
 
 		#	quelques tests
 		# verifier que les fichiers de sorties ne sont pas vides
-		check(args.output_raw)
-		check(args.output_vcf)
+		check(args.outputRawFile)
+		check(args.outputVcfFile)
 		# comparer le nombre de ligne entre les 2 fichiers
 		# pour etre sur que toute les variations ont ete prises en compte
-		compare(args.output_raw, args.output_vcf)
+		compare(args.outputRawFile, args.outputVcfFile)
 			
-		sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0)))
+		sys.stdout.write('\nDone in %d seconds\n' %(int(time.time()-time0)))
 
-		if ( os.path.getsize(errorFile)>0 ):
-			sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) )
+		if (os.path.getsize(errorFile)>0):
+			sys.stdout.write('At least one non fatal error was send, check the file : %s\n' %(errorFile))
 			try:
 				err = open(errorFile, 'r')
 				errors = err.read()
 				err.close()
-				sys.stdout.write( "Errors :\n%s" %(errors))
+				sys.stdout.write("Errors :\n%s" %(errors))
 			except Exception, e:
-				sys.stderr.write( '%s\n' % str(e) )
+				sys.stderr.write('%s\n' % str(e))
 		else:
-			sys.stdout.write( 'BreakDancer successful' )
+			sys.stdout.write('BreakDancer successful')
 
 	except Exception, e:
-		sys.stdout.write( 'BreakDancer fail\n' )
-		sys.stderr.write( '%s\n' % str(e) )
+		sys.stdout.write('BreakDancer fail\n')
+		sys.stderr.write('%s\n' % str(e))
 		sys.exit()
 
 	finally:
-		if os.path.exists( tmp_dir ):
-			shutil.rmtree( tmp_dir )
+		if os.path.exists(tempDir):
+			shutil.rmtree(tempDir)
 
 if __name__=="__main__":
 	__main__()