comparison breakdancer.py @ 7:c971e3f6de9f draft

Uploaded
author jeremie
date Wed, 02 Jul 2014 09:46:11 -0400
parents 01bb7558d3b3
children 62bf6aed1908
comparison
equal deleted inserted replaced
6:4a5a1765951d 7:c971e3f6de9f
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time 3 import argparse, os, shutil, subprocess, sys, tempfile, shlex, time
4
5 parser = argparse.ArgumentParser(description='')
6 # required
7 parser.add_argument('-i1', dest='inputBamFile', required=True, help='the bam input file')
8 parser.add_argument('-o1', dest='outputRawFile', required=True, help='the raw output file')
9 # optional
10 parser.add_argument('-o2', dest='outputVcfFile', required=False, help='the vcf output file')
11 parser.add_argument('-o', dest='chromosome', required=False, help='operate on a single chromosome')
12 parser.add_argument('-s', dest='minLength', type=int, required=False, help='minimum length of a region', default=7)
13 parser.add_argument('-c', dest='cutoff', type=int, required=False, help='cutoff in unit of standard deviation', default=3)
14 parser.add_argument('-m', dest='maxSvSize', type=int, required=False, help='maximum SV size', default=1000000000)
15 parser.add_argument('-q', dest='minMapQuality', type=int, required=False, help='minimum alternative mapping quality', default=35)
16 parser.add_argument('-r', dest='minReadDepth', type=int, required=False, help='minimum number of read pairs required to establish a connection', default=2)
17 parser.add_argument('-x', dest='maxHaploidCov', type=int, required=False, help='maximum threshold of haploid sequence coverage for regions to be ignored', default=1000)
18 parser.add_argument('-b', dest='bufferSize', type=int, required=False, help='buffer size for building connection', default=100)
19 parser.add_argument('-t', dest='onlyTrans', action='store_true', help='only detect transchromosomal rearrangement', default=False)
20 parser.add_argument('-d', dest='prefix', required=False, help='prefix of fastq files that SV supporting reads will be saved by library')
21 parser.add_argument('-g', dest='bedFormat', required=False, help='dump SVs and supporting reads in BED format for GBrowse')
22 parser.add_argument('-l', dest='matePair', required=False, help='analyze Illumina long insert (mate-pair) library')
23 # parser.add_argument('-a', dest='sortByLibrary', action='store_true', help='print out copy number and support reads per library rather than per bam', default=False)
24 # parser.add_argument('-h', dest='', action='store_true', help='print out Allele Frequency column', default=False)
25 parser.add_argument('-y', dest='scoreFilter', type=int, required=False, help='output score filter', default=30)
4 26
5 27
6 parser = argparse.ArgumentParser(description='')
7 28
8 parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' ) 29 binPath = os.environ['BREAKDANCER_BIN']
9 parser.add_argument ( '-o1', dest='output_raw', help='the output file' ) 30
10 parser.add_argument ( '-o2', dest='output_vcf', help='the output file' ) 31 bam2cfgPath = binPath+"/bam2cfg.pl"
32 breakdancer2vcfPath = binPath+"/breakdancer2vcf.py"
11 33
12 34
13 tmp_dir = tempfile.mkdtemp() 35 # def bam2cfg(args, tempDir):
14 errorFile = tmp_dir+"/errorLog" 36 # config = tempDir+"/breakdancer_config"
15 37 # cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile)
16 bam2cfg_path = "/home/jeremie/Tools/breakdancer-1.1.2/perl/bam2cfg.pl" 38 # execute(cmd, output=config)
17 breakdancer_path = "/usr/bin/breakdancer-max" 39 # return config
18 breakdancer2vcf_path = "/home/jeremie/Tools/breakdancer-1.1.2/breakdancer2vcf.py"
19 40
20 41
21 def bam2cfg(args): 42 # def breakdancer(args, config):
22 config = tmp_dir+"/breakdancer_config" 43 # cmd = 'breakdancer-max %s' % (config)
23 cmd = 'perl %s %s' % (bam2cfg_path, args.input_bam) 44 # execute(cmd, output=args.outputRawFile)
24 execute(cmd, output=config)
25 print ("\ncmd = %s \n" ) %(cmd)
26 return config
27 45
28 46
29 def breakdancer(args, config): 47 # def breakdancer2vcf(args):
30 cmd = '%s %s' % (breakdancer_path, config) 48 # cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile)
31 execute(cmd, output=args.output_raw) 49 # execute(cmd)
50
51 def execute(cmd, output=None):
52 # function to execute a cmd and report if an error occur
53 # print(cmd)
54 try:
55 process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
56 process.wait()
57 stdout,stderr = process.communicate()
58 except Exception, e: # une erreur de ma commande : stderr
59 sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e))
60 return
61 if output:
62 output = open(output, 'w')
63 output.write(stdout)
64 output.close()
65 if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes)
66 sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr))
32 67
33 68
34 def breakdancer2vcf(args): 69 # def execute(cmd, output=""):
35 cmd = "python %s -i %s -o %s" % ( breakdancer2vcf_path, args.output_raw, args.output_vcf ) 70 # try:
36 execute(cmd) 71 # err = open(tempDir+"/errorLog", 'a')
72 # if output != "":
73 # out = open(output, 'w')
74 # else:
75 # out = subprocess.PIPE
76 # process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err)
77 # process.wait()
78 # err.close()
79 # if out != subprocess.PIPE:
80 # out.close()
81 # except Exception, e:
82 # sys.stderr.write("problem doing : %s\n" %(cmd))
83 # sys.stderr.write('%s\n\n' % str(e))
37 84
38 85
39 def execute( cmd, output="" ): 86 def check(output):
40 try:
41 err = open(tmp_dir+"/errorLog", 'a')
42 if output != "":
43 out = open(output, 'w')
44 else:
45 out = subprocess.PIPE
46 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
47 process.wait()
48 err.close()
49 if out != subprocess.PIPE:
50 out.close()
51 except Exception, e:
52 sys.stderr.write("problem doing : %s\n" %(cmd))
53 sys.stderr.write( '%s\n\n' % str(e) )
54
55
56 def check( output ):
57 if (os.path.getsize(output)>0): 87 if (os.path.getsize(output)>0):
58 return True 88 return True
59 else: 89 else:
60 sys.stderr.write('The output file is empty : %s\n' % (output)) 90 sys.stderr.write('The output file is empty : %s\n' % (output))
61 91
69 if ligne.strip()[0]!="#": 99 if ligne.strip()[0]!="#":
70 n+=1 100 n+=1
71 # n = len(lignes) 101 # n = len(lignes)
72 f.close() 102 f.close()
73 except Exception, e: 103 except Exception, e:
74 sys.stderr.write( '%s\n' % str(e) ) 104 sys.stderr.write('%s\n' % str(e))
75 return n 105 return n
76 106
77 107
78 def compare( output1, output2 ): 108 def compare(output1, output2):
79 # compare le nombre de ligne entre 2 fichiers 109 # compare le nombre de ligne entre 2 fichiers
80 # pour verifier qu'aucune ligne n'a ete saute 110 # pour verifier qu'aucune ligne n'a ete saute
81 num_raw = getLine(output1) 111 num_raw = getLine(output1)
82 num_vcf = getLine(output2) 112 num_vcf = getLine(output2)
83 if (num_raw==num_vcf): 113 if (num_raw==num_vcf):
84 return True 114 return True
85 else: 115 else:
86 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf)) 116 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))
87 117
118 def getVersion(program):
119 import tempfile, subprocess
120 try:
121 temp = tempfile.NamedTemporaryFile().name
122 tempStdout = open(temp, 'wb')
123 proc = subprocess.Popen(args=program, shell=True, stdout=tempStdout)
124 tempStdout.close()
125 returncode = proc.wait()
126 stdout = None
127 for line in open(tempStdout.name, 'rb'):
128 if line.lower().find('version') >= 0:
129 stdout = line.strip()
130 break
131 if stdout:
132 sys.stdout.write('%s\n' % stdout)
133 except:
134 sys.stdout.write('Could not determine %s version\n' % (program))
88 135
89 def __main__(): 136 def __main__():
90 137
91 time0 = time.time() 138 time0 = time.time()
139 tempDir = tempfile.mkdtemp()
92 args = parser.parse_args() 140 args = parser.parse_args()
141 getVersion("breakdancer-max")
93 142
94 try: 143 try:
95 config = bam2cfg(args) 144 # config = bam2cfg(args, tempDir)
96 breakdancer(args, config) 145 # breakdancer(args, config)
97 breakdancer2vcf(args) 146 # breakdancer2vcf(args)
147
148 # bam2cfg
149 config = tempDir+"/breakdancer_config"
150 cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile)
151 execute(cmd, output=config)
152
153 # breakdancer
154 cmd = 'breakdancer-max %s' % (config)
155 execute(cmd, output=args.outputRawFile)
156
157 # breakdancer2vcf
158 if args.outputVcfFile:
159 cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile)
160 execute(cmd)
98 161
99 # quelques tests 162 # quelques tests
100 # verifier que les fichiers de sorties ne sont pas vides 163 # verifier que les fichiers de sorties ne sont pas vides
101 check(args.output_raw) 164 check(args.outputRawFile)
102 check(args.output_vcf) 165 check(args.outputVcfFile)
103 # comparer le nombre de ligne entre les 2 fichiers 166 # comparer le nombre de ligne entre les 2 fichiers
104 # pour etre sur que toute les variations ont ete prises en compte 167 # pour etre sur que toute les variations ont ete prises en compte
105 compare(args.output_raw, args.output_vcf) 168 compare(args.outputRawFile, args.outputVcfFile)
106 169
107 sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0))) 170 sys.stdout.write('\nDone in %d seconds\n' %(int(time.time()-time0)))
108 171
109 if ( os.path.getsize(errorFile)>0 ): 172 if (os.path.getsize(errorFile)>0):
110 sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) ) 173 sys.stdout.write('At least one non fatal error was send, check the file : %s\n' %(errorFile))
111 try: 174 try:
112 err = open(errorFile, 'r') 175 err = open(errorFile, 'r')
113 errors = err.read() 176 errors = err.read()
114 err.close() 177 err.close()
115 sys.stdout.write( "Errors :\n%s" %(errors)) 178 sys.stdout.write("Errors :\n%s" %(errors))
116 except Exception, e: 179 except Exception, e:
117 sys.stderr.write( '%s\n' % str(e) ) 180 sys.stderr.write('%s\n' % str(e))
118 else: 181 else:
119 sys.stdout.write( 'BreakDancer successful' ) 182 sys.stdout.write('BreakDancer successful')
120 183
121 except Exception, e: 184 except Exception, e:
122 sys.stdout.write( 'BreakDancer fail\n' ) 185 sys.stdout.write('BreakDancer fail\n')
123 sys.stderr.write( '%s\n' % str(e) ) 186 sys.stderr.write('%s\n' % str(e))
124 sys.exit() 187 sys.exit()
125 188
126 finally: 189 finally:
127 if os.path.exists( tmp_dir ): 190 if os.path.exists(tempDir):
128 shutil.rmtree( tmp_dir ) 191 shutil.rmtree(tempDir)
129 192
130 if __name__=="__main__": 193 if __name__=="__main__":
131 __main__() 194 __main__()