2
|
1 #!/usr/bin/python
|
|
2
|
7
|
3 import argparse, os, shutil, subprocess, sys, tempfile, shlex, time
|
2
|
4
|
|
5 parser = argparse.ArgumentParser(description='')
|
7
|
6 # required
|
|
7 parser.add_argument('-i1', dest='inputBamFile', required=True, help='the bam input file')
|
|
8 parser.add_argument('-o1', dest='outputRawFile', required=True, help='the raw output file')
|
|
9 # optional
|
|
10 parser.add_argument('-o2', dest='outputVcfFile', required=False, help='the vcf output file')
|
|
11 parser.add_argument('-o', dest='chromosome', required=False, help='operate on a single chromosome')
|
|
12 parser.add_argument('-s', dest='minLength', type=int, required=False, help='minimum length of a region', default=7)
|
|
13 parser.add_argument('-c', dest='cutoff', type=int, required=False, help='cutoff in unit of standard deviation', default=3)
|
|
14 parser.add_argument('-m', dest='maxSvSize', type=int, required=False, help='maximum SV size', default=1000000000)
|
|
15 parser.add_argument('-q', dest='minMapQuality', type=int, required=False, help='minimum alternative mapping quality', default=35)
|
|
16 parser.add_argument('-r', dest='minReadDepth', type=int, required=False, help='minimum number of read pairs required to establish a connection', default=2)
|
|
17 parser.add_argument('-x', dest='maxHaploidCov', type=int, required=False, help='maximum threshold of haploid sequence coverage for regions to be ignored', default=1000)
|
|
18 parser.add_argument('-b', dest='bufferSize', type=int, required=False, help='buffer size for building connection', default=100)
|
|
19 parser.add_argument('-t', dest='onlyTrans', action='store_true', help='only detect transchromosomal rearrangement', default=False)
|
|
20 parser.add_argument('-d', dest='prefix', required=False, help='prefix of fastq files that SV supporting reads will be saved by library')
|
|
21 parser.add_argument('-g', dest='bedFormat', required=False, help='dump SVs and supporting reads in BED format for GBrowse')
|
|
22 parser.add_argument('-l', dest='matePair', required=False, help='analyze Illumina long insert (mate-pair) library')
|
13
|
23 parser.add_argument('-a', dest='sortByLibrary', action='store_true', help='print out copy number and support reads per library rather than per bam', default=False)
|
14
|
24 # parser.add_argument('-h', dest='AFColumn', action='store_true', help='print out Allele Frequency column', default=False)
|
7
|
25 parser.add_argument('-y', dest='scoreFilter', type=int, required=False, help='output score filter', default=30)
|
2
|
26
|
|
27
|
7
|
28
|
19
|
29 # binPath = os.environ['BREAKDANCER_BIN']
|
2
|
30
|
19
|
31 # bam2cfgPath = binPath+"/bam2cfg.pl"
|
|
32 # breakdancer2vcfPath = binPath+"/breakdancer2vcf.py"
|
2
|
33
|
|
34
|
7
|
35 # def bam2cfg(args, tempDir):
|
|
36 # config = tempDir+"/breakdancer_config"
|
|
37 # cmd = 'perl %s %s' % (bam2cfgPath, args.inputBamFile)
|
|
38 # execute(cmd, output=config)
|
|
39 # return config
|
2
|
40
|
|
41
|
7
|
42 # def breakdancer(args, config):
|
|
43 # cmd = 'breakdancer-max %s' % (config)
|
|
44 # execute(cmd, output=args.outputRawFile)
|
2
|
45
|
|
46
|
7
|
47 # def breakdancer2vcf(args):
|
|
48 # cmd = "python %s -i %s -o %s" % (breakdancer2vcfPath, args.outputRawFile, args.outputVcfFile)
|
|
49 # execute(cmd)
|
|
50
|
|
51 def execute(cmd, output=None):
|
|
52 # function to execute a cmd and report if an error occur
|
|
53 # print(cmd)
|
2
|
54 try:
|
7
|
55 process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
2
|
56 process.wait()
|
7
|
57 stdout,stderr = process.communicate()
|
|
58 except Exception, e: # une erreur de ma commande : stderr
|
|
59 sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e))
|
|
60 return
|
|
61 if output:
|
|
62 output = open(output, 'w')
|
|
63 output.write(stdout)
|
|
64 output.close()
|
|
65 if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes)
|
|
66 sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr))
|
2
|
67
|
|
68
|
7
|
69 # def execute(cmd, output=""):
|
|
70 # try:
|
|
71 # err = open(tempDir+"/errorLog", 'a')
|
|
72 # if output != "":
|
|
73 # out = open(output, 'w')
|
|
74 # else:
|
|
75 # out = subprocess.PIPE
|
|
76 # process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err)
|
|
77 # process.wait()
|
|
78 # err.close()
|
|
79 # if out != subprocess.PIPE:
|
|
80 # out.close()
|
|
81 # except Exception, e:
|
|
82 # sys.stderr.write("problem doing : %s\n" %(cmd))
|
|
83 # sys.stderr.write('%s\n\n' % str(e))
|
|
84
|
|
85
|
|
86 def check(output):
|
2
|
87 if (os.path.getsize(output)>0):
|
|
88 return True
|
|
89 else:
|
|
90 sys.stderr.write('The output file is empty : %s\n' % (output))
|
|
91
|
|
92
|
|
93 def getLine(file):
|
|
94 try:
|
|
95 f = open(file, 'r')
|
|
96 lignes = f.readlines()
|
|
97 n=0
|
|
98 for ligne in lignes:
|
|
99 if ligne.strip()[0]!="#":
|
|
100 n+=1
|
|
101 # n = len(lignes)
|
|
102 f.close()
|
|
103 except Exception, e:
|
7
|
104 sys.stderr.write('%s\n' % str(e))
|
2
|
105 return n
|
|
106
|
|
107
|
7
|
108 def compare(output1, output2):
|
2
|
109 # compare le nombre de ligne entre 2 fichiers
|
|
110 # pour verifier qu'aucune ligne n'a ete saute
|
|
111 num_raw = getLine(output1)
|
|
112 num_vcf = getLine(output2)
|
|
113 if (num_raw==num_vcf):
|
|
114 return True
|
|
115 else:
|
|
116 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))
|
|
117
|
7
|
118 def getVersion(program):
|
|
119 import tempfile, subprocess
|
|
120 try:
|
|
121 temp = tempfile.NamedTemporaryFile().name
|
|
122 tempStdout = open(temp, 'wb')
|
18
|
123 proc = subprocess.Popen(args=program, shell=True, stdout=tempStdout, stderr=tempStdout)
|
7
|
124 tempStdout.close()
|
|
125 returncode = proc.wait()
|
|
126 stdout = None
|
|
127 for line in open(tempStdout.name, 'rb'):
|
|
128 if line.lower().find('version') >= 0:
|
|
129 stdout = line.strip()
|
|
130 break
|
|
131 if stdout:
|
|
132 sys.stdout.write('%s\n' % stdout)
|
|
133 except:
|
|
134 sys.stdout.write('Could not determine %s version\n' % (program))
|
2
|
135
|
|
136 def __main__():
|
|
137
|
|
138 time0 = time.time()
|
7
|
139 tempDir = tempfile.mkdtemp()
|
2
|
140 args = parser.parse_args()
|
7
|
141 getVersion("breakdancer-max")
|
2
|
142
|
22
|
143 os.system('echo $PATH')
|
2
|
144 try:
|
7
|
145 # config = bam2cfg(args, tempDir)
|
|
146 # breakdancer(args, config)
|
|
147 # breakdancer2vcf(args)
|
|
148
|
|
149 # bam2cfg
|
|
150 config = tempDir+"/breakdancer_config"
|
19
|
151 cmd = 'bam2cfg.pl %s' % (args.inputBamFile)
|
7
|
152 execute(cmd, output=config)
|
|
153
|
|
154 # breakdancer
|
|
155 cmd = 'breakdancer-max %s' % (config)
|
13
|
156 cmd += ' -s %d -c %d -m %d -q %d -r %d -x %d -b %d -y %d' % (args.minLength, args.cutoff, args.maxSvSize, args.minMapQuality, args.minReadDepth, args.maxHaploidCov, args.bufferSize, args.scoreFilter)
|
|
157 if args.chromosome:
|
|
158 cmd += ' -o %s ' % (args.chromosome)
|
|
159 if args.onlyTrans:
|
|
160 cmd += ' -t '
|
|
161 if args.prefix:
|
|
162 cmd += ' -d %s ' % (args.prefix)
|
|
163 if args.bedFormat:
|
|
164 cmd += ' -g %s ' % (args.bedFormat)
|
|
165 if args.matePair:
|
|
166 cmd += ' -l '
|
|
167 if args.sortByLibrary:
|
|
168 cmd += ' -a '
|
14
|
169 # if args.AFColumn:
|
|
170 # cmd += ' -h '
|
7
|
171 execute(cmd, output=args.outputRawFile)
|
|
172
|
|
173 # breakdancer2vcf
|
|
174 if args.outputVcfFile:
|
19
|
175 cmd = "breakdancer2vcf.py -i %s -o %s" % (args.outputRawFile, args.outputVcfFile)
|
7
|
176 execute(cmd)
|
2
|
177
|
|
178 # quelques tests
|
|
179 # verifier que les fichiers de sorties ne sont pas vides
|
7
|
180 check(args.outputRawFile)
|
|
181 check(args.outputVcfFile)
|
2
|
182 # comparer le nombre de ligne entre les 2 fichiers
|
|
183 # pour etre sur que toute les variations ont ete prises en compte
|
7
|
184 compare(args.outputRawFile, args.outputVcfFile)
|
2
|
185
|
7
|
186 sys.stdout.write('\nDone in %d seconds\n' %(int(time.time()-time0)))
|
2
|
187
|
17
|
188 # if (os.path.getsize(errorFile)>0):
|
|
189 # sys.stdout.write('At least one non fatal error was send, check the file : %s\n' %(errorFile))
|
|
190 # try:
|
|
191 # err = open(errorFile, 'r')
|
|
192 # errors = err.read()
|
|
193 # err.close()
|
|
194 # sys.stdout.write("Errors :\n%s" %(errors))
|
|
195 # except Exception, e:
|
|
196 # sys.stderr.write('%s\n' % str(e))
|
|
197 # else:
|
|
198 sys.stdout.write('BreakDancer successful')
|
2
|
199
|
|
200 except Exception, e:
|
7
|
201 sys.stdout.write('BreakDancer fail\n')
|
|
202 sys.stderr.write('%s\n' % str(e))
|
2
|
203 sys.exit()
|
|
204
|
|
205 finally:
|
7
|
206 if os.path.exists(tempDir):
|
|
207 shutil.rmtree(tempDir)
|
2
|
208
|
|
209 if __name__=="__main__":
|
|
210 __main__()
|