Mercurial > repos > jeremie > breakdancer_bin
view breakdancer.py @ 22:96e938d6904e draft default tip
Uploaded
author | jeremie |
---|---|
date | Mon, 07 Jul 2014 10:32:05 -0400 |
parents | b83483e9b1e8 |
children |
line wrap: on
line source
#!/usr/bin/python import argparse, os, shutil, subprocess, sys, tempfile, shlex, time parser = argparse.ArgumentParser(description='') # required parser.add_argument('-i1', dest='inputBamFile', required=True, help='the bam input file') parser.add_argument('-o1', dest='outputRawFile', required=True, help='the raw output file') # optional parser.add_argument('-o2', dest='outputVcfFile', required=False, help='the vcf output file') parser.add_argument('-o', dest='chromosome', required=False, help='operate on a single chromosome') parser.add_argument('-s', dest='minLength', type=int, required=False, help='minimum length of a region', default=7) parser.add_argument('-c', dest='cutoff', type=int, required=False, help='cutoff in unit of standard deviation', default=3) parser.add_argument('-m', dest='maxSvSize', type=int, required=False, help='maximum SV size', default=1000000000) parser.add_argument('-q', dest='minMapQuality', type=int, required=False, help='minimum alternative mapping quality', default=35) parser.add_argument('-r', dest='minReadDepth', type=int, required=False, help='minimum number of read pairs required to establish a connection', default=2) parser.add_argument('-x', dest='maxHaploidCov', type=int, required=False, help='maximum threshold of haploid sequence coverage for regions to be ignored', default=1000) parser.add_argument('-b', dest='bufferSize', type=int, required=False, help='buffer size for building connection', default=100) parser.add_argument('-t', dest='onlyTrans', action='store_true', help='only detect transchromosomal rearrangement', default=False) parser.add_argument('-d', dest='prefix', required=False, help='prefix of fastq files that SV supporting reads will be saved by library') parser.add_argument('-g', dest='bedFormat', required=False, help='dump SVs and supporting reads in BED format for GBrowse') parser.add_argument('-l', dest='matePair', required=False, help='analyze Illumina long insert (mate-pair) library') parser.add_argument('-a', dest='sortByLibrary', action='store_true', help='print out copy number and support reads per library rather than per bam', default=False) # parser.add_argument('-h', dest='AFColumn', action='store_true', help='print out Allele Frequency column', default=False) parser.add_argument('-y', dest='scoreFilter', type=int, required=False, help='output score filter', default=30) # binPath = os.environ['BREAKDANCER_BIN'] # bam2cfgPath = binPath+"/bam2cfg.pl" # breakdancer2vcfPath = binPath+"/breakdancer2vcf.py" # def bam2cfg(args, tempDir): # config = tempDir+"/breakdancer_config" # cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile) # execute(cmd, output=config) # return config # def breakdancer(args, config): # cmd = 'breakdancer-max %s' % (config) # execute(cmd, output=args.outputRawFile) # def breakdancer2vcf(args): # cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile) # execute(cmd) def execute(cmd, output=None): # function to execute a cmd and report if an error occur # print(cmd) try: process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE) process.wait() stdout,stderr = process.communicate() except Exception, e: # une erreur de ma commande : stderr sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e)) return if output: output = open(output, 'w') output.write(stdout) output.close() if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes) sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr)) # def execute(cmd, output=""): # try: # err = open(tempDir+"/errorLog", 'a') # if output != "": # out = open(output, 'w') # else: # out = subprocess.PIPE # process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err) # process.wait() # err.close() # if out != subprocess.PIPE: # out.close() # except Exception, e: # sys.stderr.write("problem doing : %s\n" %(cmd)) # sys.stderr.write('%s\n\n' % str(e)) def check(output): if (os.path.getsize(output)>0): return True else: sys.stderr.write('The output file is empty : %s\n' % (output)) def getLine(file): try: f = open(file, 'r') lignes = f.readlines() n=0 for ligne in lignes: if ligne.strip()[0]!="#": n+=1 # n = len(lignes) f.close() except Exception, e: sys.stderr.write('%s\n' % str(e)) return n def compare(output1, output2): # compare le nombre de ligne entre 2 fichiers # pour verifier qu'aucune ligne n'a ete saute num_raw = getLine(output1) num_vcf = getLine(output2) if (num_raw==num_vcf): return True else: sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf)) def getVersion(program): import tempfile, subprocess try: temp = tempfile.NamedTemporaryFile().name tempStdout = open(temp, 'wb') proc = subprocess.Popen(args=program, shell=True, stdout=tempStdout, stderr=tempStdout) tempStdout.close() returncode = proc.wait() stdout = None for line in open(tempStdout.name, 'rb'): if line.lower().find('version') >= 0: stdout = line.strip() break if stdout: sys.stdout.write('%s\n' % stdout) except: sys.stdout.write('Could not determine %s version\n' % (program)) def __main__(): time0 = time.time() tempDir = tempfile.mkdtemp() args = parser.parse_args() getVersion("breakdancer-max") os.system('echo $PATH') try: # config = bam2cfg(args, tempDir) # breakdancer(args, config) # breakdancer2vcf(args) # bam2cfg config = tempDir+"/breakdancer_config" cmd = 'bam2cfg.pl %s' % (args.inputBamFile) execute(cmd, output=config) # breakdancer cmd = 'breakdancer-max %s' % (config) cmd += ' -s %d -c %d -m %d -q %d -r %d -x %d -b %d -y %d' % (args.minLength, args.cutoff, args.maxSvSize, args.minMapQuality, args.minReadDepth, args.maxHaploidCov, args.bufferSize, args.scoreFilter) if args.chromosome: cmd += ' -o %s ' % (args.chromosome) if args.onlyTrans: cmd += ' -t ' if args.prefix: cmd += ' -d %s ' % (args.prefix) if args.bedFormat: cmd += ' -g %s ' % (args.bedFormat) if args.matePair: cmd += ' -l ' if args.sortByLibrary: cmd += ' -a ' # if args.AFColumn: # cmd += ' -h ' execute(cmd, output=args.outputRawFile) # breakdancer2vcf if args.outputVcfFile: cmd = "breakdancer2vcf.py -i %s -o %s" % (args.outputRawFile, args.outputVcfFile) execute(cmd) # quelques tests # verifier que les fichiers de sorties ne sont pas vides check(args.outputRawFile) check(args.outputVcfFile) # comparer le nombre de ligne entre les 2 fichiers # pour etre sur que toute les variations ont ete prises en compte compare(args.outputRawFile, args.outputVcfFile) sys.stdout.write('\nDone in %d seconds\n' %(int(time.time()-time0))) # if (os.path.getsize(errorFile)>0): # sys.stdout.write('At least one non fatal error was send, check the file : %s\n' %(errorFile)) # try: # err = open(errorFile, 'r') # errors = err.read() # err.close() # sys.stdout.write("Errors :\n%s" %(errors)) # except Exception, e: # sys.stderr.write('%s\n' % str(e)) # else: sys.stdout.write('BreakDancer successful') except Exception, e: sys.stdout.write('BreakDancer fail\n') sys.stderr.write('%s\n' % str(e)) sys.exit() finally: if os.path.exists(tempDir): shutil.rmtree(tempDir) if __name__=="__main__": __main__()