Mercurial > repos > jeremie > breakdancer_bin
comparison breakdancer.py @ 7:c971e3f6de9f draft
Uploaded
author | jeremie |
---|---|
date | Wed, 02 Jul 2014 09:46:11 -0400 |
parents | 01bb7558d3b3 |
children | 62bf6aed1908 |
comparison
equal
deleted
inserted
replaced
6:4a5a1765951d | 7:c971e3f6de9f |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time | 3 import argparse, os, shutil, subprocess, sys, tempfile, shlex, time |
4 | |
5 parser = argparse.ArgumentParser(description='') | |
6 # required | |
7 parser.add_argument('-i1', dest='inputBamFile', required=True, help='the bam input file') | |
8 parser.add_argument('-o1', dest='outputRawFile', required=True, help='the raw output file') | |
9 # optional | |
10 parser.add_argument('-o2', dest='outputVcfFile', required=False, help='the vcf output file') | |
11 parser.add_argument('-o', dest='chromosome', required=False, help='operate on a single chromosome') | |
12 parser.add_argument('-s', dest='minLength', type=int, required=False, help='minimum length of a region', default=7) | |
13 parser.add_argument('-c', dest='cutoff', type=int, required=False, help='cutoff in unit of standard deviation', default=3) | |
14 parser.add_argument('-m', dest='maxSvSize', type=int, required=False, help='maximum SV size', default=1000000000) | |
15 parser.add_argument('-q', dest='minMapQuality', type=int, required=False, help='minimum alternative mapping quality', default=35) | |
16 parser.add_argument('-r', dest='minReadDepth', type=int, required=False, help='minimum number of read pairs required to establish a connection', default=2) | |
17 parser.add_argument('-x', dest='maxHaploidCov', type=int, required=False, help='maximum threshold of haploid sequence coverage for regions to be ignored', default=1000) | |
18 parser.add_argument('-b', dest='bufferSize', type=int, required=False, help='buffer size for building connection', default=100) | |
19 parser.add_argument('-t', dest='onlyTrans', action='store_true', help='only detect transchromosomal rearrangement', default=False) | |
20 parser.add_argument('-d', dest='prefix', required=False, help='prefix of fastq files that SV supporting reads will be saved by library') | |
21 parser.add_argument('-g', dest='bedFormat', required=False, help='dump SVs and supporting reads in BED format for GBrowse') | |
22 parser.add_argument('-l', dest='matePair', required=False, help='analyze Illumina long insert (mate-pair) library') | |
23 # parser.add_argument('-a', dest='sortByLibrary', action='store_true', help='print out copy number and support reads per library rather than per bam', default=False) | |
24 # parser.add_argument('-h', dest='', action='store_true', help='print out Allele Frequency column', default=False) | |
25 parser.add_argument('-y', dest='scoreFilter', type=int, required=False, help='output score filter', default=30) | |
4 | 26 |
5 | 27 |
6 parser = argparse.ArgumentParser(description='') | |
7 | 28 |
8 parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' ) | 29 binPath = os.environ['BREAKDANCER_BIN'] |
9 parser.add_argument ( '-o1', dest='output_raw', help='the output file' ) | 30 |
10 parser.add_argument ( '-o2', dest='output_vcf', help='the output file' ) | 31 bam2cfgPath = binPath+"/bam2cfg.pl" |
32 breakdancer2vcfPath = binPath+"/breakdancer2vcf.py" | |
11 | 33 |
12 | 34 |
13 tmp_dir = tempfile.mkdtemp() | 35 # def bam2cfg(args, tempDir): |
14 errorFile = tmp_dir+"/errorLog" | 36 # config = tempDir+"/breakdancer_config" |
15 | 37 # cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile) |
16 bam2cfg_path = "/home/jeremie/Tools/breakdancer-1.1.2/perl/bam2cfg.pl" | 38 # execute(cmd, output=config) |
17 breakdancer_path = "/usr/bin/breakdancer-max" | 39 # return config |
18 breakdancer2vcf_path = "/home/jeremie/Tools/breakdancer-1.1.2/breakdancer2vcf.py" | |
19 | 40 |
20 | 41 |
21 def bam2cfg(args): | 42 # def breakdancer(args, config): |
22 config = tmp_dir+"/breakdancer_config" | 43 # cmd = 'breakdancer-max %s' % (config) |
23 cmd = 'perl %s %s' % (bam2cfg_path, args.input_bam) | 44 # execute(cmd, output=args.outputRawFile) |
24 execute(cmd, output=config) | |
25 print ("\ncmd = %s \n" ) %(cmd) | |
26 return config | |
27 | 45 |
28 | 46 |
29 def breakdancer(args, config): | 47 # def breakdancer2vcf(args): |
30 cmd = '%s %s' % (breakdancer_path, config) | 48 # cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile) |
31 execute(cmd, output=args.output_raw) | 49 # execute(cmd) |
50 | |
51 def execute(cmd, output=None): | |
52 # function to execute a cmd and report if an error occur | |
53 # print(cmd) | |
54 try: | |
55 process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
56 process.wait() | |
57 stdout,stderr = process.communicate() | |
58 except Exception, e: # une erreur de ma commande : stderr | |
59 sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e)) | |
60 return | |
61 if output: | |
62 output = open(output, 'w') | |
63 output.write(stdout) | |
64 output.close() | |
65 if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes) | |
66 sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr)) | |
32 | 67 |
33 | 68 |
34 def breakdancer2vcf(args): | 69 # def execute(cmd, output=""): |
35 cmd = "python %s -i %s -o %s" % ( breakdancer2vcf_path, args.output_raw, args.output_vcf ) | 70 # try: |
36 execute(cmd) | 71 # err = open(tempDir+"/errorLog", 'a') |
72 # if output != "": | |
73 # out = open(output, 'w') | |
74 # else: | |
75 # out = subprocess.PIPE | |
76 # process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err) | |
77 # process.wait() | |
78 # err.close() | |
79 # if out != subprocess.PIPE: | |
80 # out.close() | |
81 # except Exception, e: | |
82 # sys.stderr.write("problem doing : %s\n" %(cmd)) | |
83 # sys.stderr.write('%s\n\n' % str(e)) | |
37 | 84 |
38 | 85 |
39 def execute( cmd, output="" ): | 86 def check(output): |
40 try: | |
41 err = open(tmp_dir+"/errorLog", 'a') | |
42 if output != "": | |
43 out = open(output, 'w') | |
44 else: | |
45 out = subprocess.PIPE | |
46 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err ) | |
47 process.wait() | |
48 err.close() | |
49 if out != subprocess.PIPE: | |
50 out.close() | |
51 except Exception, e: | |
52 sys.stderr.write("problem doing : %s\n" %(cmd)) | |
53 sys.stderr.write( '%s\n\n' % str(e) ) | |
54 | |
55 | |
56 def check( output ): | |
57 if (os.path.getsize(output)>0): | 87 if (os.path.getsize(output)>0): |
58 return True | 88 return True |
59 else: | 89 else: |
60 sys.stderr.write('The output file is empty : %s\n' % (output)) | 90 sys.stderr.write('The output file is empty : %s\n' % (output)) |
61 | 91 |
69 if ligne.strip()[0]!="#": | 99 if ligne.strip()[0]!="#": |
70 n+=1 | 100 n+=1 |
71 # n = len(lignes) | 101 # n = len(lignes) |
72 f.close() | 102 f.close() |
73 except Exception, e: | 103 except Exception, e: |
74 sys.stderr.write( '%s\n' % str(e) ) | 104 sys.stderr.write('%s\n' % str(e)) |
75 return n | 105 return n |
76 | 106 |
77 | 107 |
78 def compare( output1, output2 ): | 108 def compare(output1, output2): |
79 # compare le nombre de ligne entre 2 fichiers | 109 # compare le nombre de ligne entre 2 fichiers |
80 # pour verifier qu'aucune ligne n'a ete saute | 110 # pour verifier qu'aucune ligne n'a ete saute |
81 num_raw = getLine(output1) | 111 num_raw = getLine(output1) |
82 num_vcf = getLine(output2) | 112 num_vcf = getLine(output2) |
83 if (num_raw==num_vcf): | 113 if (num_raw==num_vcf): |
84 return True | 114 return True |
85 else: | 115 else: |
86 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf)) | 116 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf)) |
87 | 117 |
118 def getVersion(program): | |
119 import tempfile, subprocess | |
120 try: | |
121 temp = tempfile.NamedTemporaryFile().name | |
122 tempStdout = open(temp, 'wb') | |
123 proc = subprocess.Popen(args=program, shell=True, stdout=tempStdout) | |
124 tempStdout.close() | |
125 returncode = proc.wait() | |
126 stdout = None | |
127 for line in open(tempStdout.name, 'rb'): | |
128 if line.lower().find('version') >= 0: | |
129 stdout = line.strip() | |
130 break | |
131 if stdout: | |
132 sys.stdout.write('%s\n' % stdout) | |
133 except: | |
134 sys.stdout.write('Could not determine %s version\n' % (program)) | |
88 | 135 |
89 def __main__(): | 136 def __main__(): |
90 | 137 |
91 time0 = time.time() | 138 time0 = time.time() |
139 tempDir = tempfile.mkdtemp() | |
92 args = parser.parse_args() | 140 args = parser.parse_args() |
141 getVersion("breakdancer-max") | |
93 | 142 |
94 try: | 143 try: |
95 config = bam2cfg(args) | 144 # config = bam2cfg(args, tempDir) |
96 breakdancer(args, config) | 145 # breakdancer(args, config) |
97 breakdancer2vcf(args) | 146 # breakdancer2vcf(args) |
147 | |
148 # bam2cfg | |
149 config = tempDir+"/breakdancer_config" | |
150 cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile) | |
151 execute(cmd, output=config) | |
152 | |
153 # breakdancer | |
154 cmd = 'breakdancer-max %s' % (config) | |
155 execute(cmd, output=args.outputRawFile) | |
156 | |
157 # breakdancer2vcf | |
158 if args.outputVcfFile: | |
159 cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile) | |
160 execute(cmd) | |
98 | 161 |
99 # quelques tests | 162 # quelques tests |
100 # verifier que les fichiers de sorties ne sont pas vides | 163 # verifier que les fichiers de sorties ne sont pas vides |
101 check(args.output_raw) | 164 check(args.outputRawFile) |
102 check(args.output_vcf) | 165 check(args.outputVcfFile) |
103 # comparer le nombre de ligne entre les 2 fichiers | 166 # comparer le nombre de ligne entre les 2 fichiers |
104 # pour etre sur que toute les variations ont ete prises en compte | 167 # pour etre sur que toute les variations ont ete prises en compte |
105 compare(args.output_raw, args.output_vcf) | 168 compare(args.outputRawFile, args.outputVcfFile) |
106 | 169 |
107 sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0))) | 170 sys.stdout.write('\nDone in %d seconds\n' %(int(time.time()-time0))) |
108 | 171 |
109 if ( os.path.getsize(errorFile)>0 ): | 172 if (os.path.getsize(errorFile)>0): |
110 sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) ) | 173 sys.stdout.write('At least one non fatal error was send, check the file : %s\n' %(errorFile)) |
111 try: | 174 try: |
112 err = open(errorFile, 'r') | 175 err = open(errorFile, 'r') |
113 errors = err.read() | 176 errors = err.read() |
114 err.close() | 177 err.close() |
115 sys.stdout.write( "Errors :\n%s" %(errors)) | 178 sys.stdout.write("Errors :\n%s" %(errors)) |
116 except Exception, e: | 179 except Exception, e: |
117 sys.stderr.write( '%s\n' % str(e) ) | 180 sys.stderr.write('%s\n' % str(e)) |
118 else: | 181 else: |
119 sys.stdout.write( 'BreakDancer successful' ) | 182 sys.stdout.write('BreakDancer successful') |
120 | 183 |
121 except Exception, e: | 184 except Exception, e: |
122 sys.stdout.write( 'BreakDancer fail\n' ) | 185 sys.stdout.write('BreakDancer fail\n') |
123 sys.stderr.write( '%s\n' % str(e) ) | 186 sys.stderr.write('%s\n' % str(e)) |
124 sys.exit() | 187 sys.exit() |
125 | 188 |
126 finally: | 189 finally: |
127 if os.path.exists( tmp_dir ): | 190 if os.path.exists(tempDir): |
128 shutil.rmtree( tmp_dir ) | 191 shutil.rmtree(tempDir) |
129 | 192 |
130 if __name__=="__main__": | 193 if __name__=="__main__": |
131 __main__() | 194 __main__() |