# HG changeset patch # User jeremie # Date 1404308771 14400 # Node ID c971e3f6de9ff35e1741aa92404991d2094d317a # Parent 4a5a1765951d754353f5067e896ee7e68e36a3a7 Uploaded diff -r 4a5a1765951d -r c971e3f6de9f breakdancer.py --- a/breakdancer.py Thu Jun 26 07:26:50 2014 -0400 +++ b/breakdancer.py Wed Jul 02 09:46:11 2014 -0400 @@ -1,59 +1,89 @@ #!/usr/bin/python -import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time - +import argparse, os, shutil, subprocess, sys, tempfile, shlex, time parser = argparse.ArgumentParser(description='') - -parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' ) -parser.add_argument ( '-o1', dest='output_raw', help='the output file' ) -parser.add_argument ( '-o2', dest='output_vcf', help='the output file' ) +# required +parser.add_argument('-i1', dest='inputBamFile', required=True, help='the bam input file') +parser.add_argument('-o1', dest='outputRawFile', required=True, help='the raw output file') +# optional +parser.add_argument('-o2', dest='outputVcfFile', required=False, help='the vcf output file') +parser.add_argument('-o', dest='chromosome', required=False, help='operate on a single chromosome') +parser.add_argument('-s', dest='minLength', type=int, required=False, help='minimum length of a region', default=7) +parser.add_argument('-c', dest='cutoff', type=int, required=False, help='cutoff in unit of standard deviation', default=3) +parser.add_argument('-m', dest='maxSvSize', type=int, required=False, help='maximum SV size', default=1000000000) +parser.add_argument('-q', dest='minMapQuality', type=int, required=False, help='minimum alternative mapping quality', default=35) +parser.add_argument('-r', dest='minReadDepth', type=int, required=False, help='minimum number of read pairs required to establish a connection', default=2) +parser.add_argument('-x', dest='maxHaploidCov', type=int, required=False, help='maximum threshold of haploid sequence coverage for regions to be ignored', default=1000) +parser.add_argument('-b', dest='bufferSize', type=int, required=False, help='buffer size for building connection', default=100) +parser.add_argument('-t', dest='onlyTrans', action='store_true', help='only detect transchromosomal rearrangement', default=False) +parser.add_argument('-d', dest='prefix', required=False, help='prefix of fastq files that SV supporting reads will be saved by library') +parser.add_argument('-g', dest='bedFormat', required=False, help='dump SVs and supporting reads in BED format for GBrowse') +parser.add_argument('-l', dest='matePair', required=False, help='analyze Illumina long insert (mate-pair) library') +# parser.add_argument('-a', dest='sortByLibrary', action='store_true', help='print out copy number and support reads per library rather than per bam', default=False) +# parser.add_argument('-h', dest='', action='store_true', help='print out Allele Frequency column', default=False) +parser.add_argument('-y', dest='scoreFilter', type=int, required=False, help='output score filter', default=30) -tmp_dir = tempfile.mkdtemp() -errorFile = tmp_dir+"/errorLog" + +binPath = os.environ['BREAKDANCER_BIN'] -bam2cfg_path = "/home/jeremie/Tools/breakdancer-1.1.2/perl/bam2cfg.pl" -breakdancer_path = "/usr/bin/breakdancer-max" -breakdancer2vcf_path = "/home/jeremie/Tools/breakdancer-1.1.2/breakdancer2vcf.py" +bam2cfgPath = binPath+"/bam2cfg.pl" +breakdancer2vcfPath = binPath+"/breakdancer2vcf.py" -def bam2cfg(args): - config = tmp_dir+"/breakdancer_config" - cmd = 'perl %s %s' % (bam2cfg_path, args.input_bam) - execute(cmd, output=config) - print ("\ncmd = %s \n" ) %(cmd) - return config +# def bam2cfg(args, tempDir): +# config = tempDir+"/breakdancer_config" +# cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile) +# execute(cmd, output=config) +# return config -def breakdancer(args, config): - cmd = '%s %s' % (breakdancer_path, config) - execute(cmd, output=args.output_raw) - - -def breakdancer2vcf(args): - cmd = "python %s -i %s -o %s" % ( breakdancer2vcf_path, args.output_raw, args.output_vcf ) - execute(cmd) +# def breakdancer(args, config): +# cmd = 'breakdancer-max %s' % (config) +# execute(cmd, output=args.outputRawFile) -def execute( cmd, output="" ): +# def breakdancer2vcf(args): +# cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile) +# execute(cmd) + +def execute(cmd, output=None): + # function to execute a cmd and report if an error occur + # print(cmd) try: - err = open(tmp_dir+"/errorLog", 'a') - if output != "": - out = open(output, 'w') - else: - out = subprocess.PIPE - process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err ) + process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE) process.wait() - err.close() - if out != subprocess.PIPE: - out.close() - except Exception, e: - sys.stderr.write("problem doing : %s\n" %(cmd)) - sys.stderr.write( '%s\n\n' % str(e) ) + stdout,stderr = process.communicate() + except Exception, e: # une erreur de ma commande : stderr + sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e)) + return + if output: + output = open(output, 'w') + output.write(stdout) + output.close() + if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes) + sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr)) -def check( output ): +# def execute(cmd, output=""): +# try: +# err = open(tempDir+"/errorLog", 'a') +# if output != "": +# out = open(output, 'w') +# else: +# out = subprocess.PIPE +# process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err) +# process.wait() +# err.close() +# if out != subprocess.PIPE: +# out.close() +# except Exception, e: +# sys.stderr.write("problem doing : %s\n" %(cmd)) +# sys.stderr.write('%s\n\n' % str(e)) + + +def check(output): if (os.path.getsize(output)>0): return True else: @@ -71,11 +101,11 @@ # n = len(lignes) f.close() except Exception, e: - sys.stderr.write( '%s\n' % str(e) ) + sys.stderr.write('%s\n' % str(e)) return n -def compare( output1, output2 ): +def compare(output1, output2): # compare le nombre de ligne entre 2 fichiers # pour verifier qu'aucune ligne n'a ete saute num_raw = getLine(output1) @@ -85,47 +115,80 @@ else: sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf)) +def getVersion(program): + import tempfile, subprocess + try: + temp = tempfile.NamedTemporaryFile().name + tempStdout = open(temp, 'wb') + proc = subprocess.Popen(args=program, shell=True, stdout=tempStdout) + tempStdout.close() + returncode = proc.wait() + stdout = None + for line in open(tempStdout.name, 'rb'): + if line.lower().find('version') >= 0: + stdout = line.strip() + break + if stdout: + sys.stdout.write('%s\n' % stdout) + except: + sys.stdout.write('Could not determine %s version\n' % (program)) def __main__(): time0 = time.time() + tempDir = tempfile.mkdtemp() args = parser.parse_args() + getVersion("breakdancer-max") try: - config = bam2cfg(args) - breakdancer(args, config) - breakdancer2vcf(args) + # config = bam2cfg(args, tempDir) + # breakdancer(args, config) + # breakdancer2vcf(args) + + # bam2cfg + config = tempDir+"/breakdancer_config" + cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile) + execute(cmd, output=config) + + # breakdancer + cmd = 'breakdancer-max %s' % (config) + execute(cmd, output=args.outputRawFile) + + # breakdancer2vcf + if args.outputVcfFile: + cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile) + execute(cmd) # quelques tests # verifier que les fichiers de sorties ne sont pas vides - check(args.output_raw) - check(args.output_vcf) + check(args.outputRawFile) + check(args.outputVcfFile) # comparer le nombre de ligne entre les 2 fichiers # pour etre sur que toute les variations ont ete prises en compte - compare(args.output_raw, args.output_vcf) + compare(args.outputRawFile, args.outputVcfFile) - sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0))) + sys.stdout.write('\nDone in %d seconds\n' %(int(time.time()-time0))) - if ( os.path.getsize(errorFile)>0 ): - sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) ) + if (os.path.getsize(errorFile)>0): + sys.stdout.write('At least one non fatal error was send, check the file : %s\n' %(errorFile)) try: err = open(errorFile, 'r') errors = err.read() err.close() - sys.stdout.write( "Errors :\n%s" %(errors)) + sys.stdout.write("Errors :\n%s" %(errors)) except Exception, e: - sys.stderr.write( '%s\n' % str(e) ) + sys.stderr.write('%s\n' % str(e)) else: - sys.stdout.write( 'BreakDancer successful' ) + sys.stdout.write('BreakDancer successful') except Exception, e: - sys.stdout.write( 'BreakDancer fail\n' ) - sys.stderr.write( '%s\n' % str(e) ) + sys.stdout.write('BreakDancer fail\n') + sys.stderr.write('%s\n' % str(e)) sys.exit() finally: - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) + if os.path.exists(tempDir): + shutil.rmtree(tempDir) if __name__=="__main__": __main__()