annotate breakdancer.py @ 25:d05d0c77b021 draft

Uploaded
author jeremie
date Tue, 01 Jul 2014 10:35:15 -0400
parents b4f641515363
children acec6ba9884a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
1 #!/usr/bin/python
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
2
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
3 import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
4
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
5
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
6 parser = argparse.ArgumentParser(description='')
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
7
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
8 parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
9 parser.add_argument ( '-o1', dest='output_raw', help='the output file' )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
10 parser.add_argument ( '-o2', dest='output_vcf', help='the output file' )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
11
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
12
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
13 tmp_dir = tempfile.mkdtemp()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
14 errorFile = tmp_dir+"/errorLog"
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
15
16
0fb455068d3c Uploaded
jeremie
parents: 10
diff changeset
16 currentDir = os.path.dirname(os.path.abspath(__file__))
21
199c0c7d3093 Uploaded
jeremie
parents: 19
diff changeset
17
22
b4f641515363 Uploaded
jeremie
parents: 21
diff changeset
18 binPath = os.environ['BREAKDANCER_BIN']
25
d05d0c77b021 Uploaded
jeremie
parents: 22
diff changeset
19 print(binPath)
d05d0c77b021 Uploaded
jeremie
parents: 22
diff changeset
20 print(str(binPath))
d05d0c77b021 Uploaded
jeremie
parents: 22
diff changeset
21 bam2cfg = str(binPath)+"/bam2cfg.pl"
21
199c0c7d3093 Uploaded
jeremie
parents: 19
diff changeset
22 breakdancer = binPath+"/breakdancer-max"
199c0c7d3093 Uploaded
jeremie
parents: 19
diff changeset
23 breakdancer2vcf = binPath+"/breakdancer2vcf.py"
10
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
24
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
25
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
26 def bam2cfg(args):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
27 config = tmp_dir+"/breakdancer_config"
19
87a6ec1e2400 Uploaded
jeremie
parents: 16
diff changeset
28 cmd = 'perl %s %s' % (bam2cfg, args.input_bam)
10
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
29 execute(cmd, output=config)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
30 print ("\ncmd = %s \n" ) %(cmd)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
31 return config
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
32
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
33
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
34 def breakdancer(args, config):
19
87a6ec1e2400 Uploaded
jeremie
parents: 16
diff changeset
35 cmd = '%s %s' % (breakdancer, config)
10
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
36 execute(cmd, output=args.output_raw)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
37
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
38
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
39 def breakdancer2vcf(args):
19
87a6ec1e2400 Uploaded
jeremie
parents: 16
diff changeset
40 cmd = "python %s -i %s -o %s" % ( breakdancer2vcf, args.output_raw, args.output_vcf )
10
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
41 execute(cmd)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
42
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
43
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
44 def execute( cmd, output="" ):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
45 try:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
46 err = open(tmp_dir+"/errorLog", 'a')
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
47 if output != "":
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
48 out = open(output, 'w')
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
49 else:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
50 out = subprocess.PIPE
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
51 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
52 process.wait()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
53 err.close()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
54 if out != subprocess.PIPE:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
55 out.close()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
56 except Exception, e:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
57 sys.stderr.write("problem doing : %s\n" %(cmd))
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
58 sys.stderr.write( '%s\n\n' % str(e) )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
59
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
60
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
61 def check( output ):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
62 if (os.path.getsize(output)>0):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
63 return True
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
64 else:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
65 sys.stderr.write('The output file is empty : %s\n' % (output))
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
66
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
67
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
68 def getLine(file):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
69 try:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
70 f = open(file, 'r')
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
71 lignes = f.readlines()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
72 n=0
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
73 for ligne in lignes:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
74 if ligne.strip()[0]!="#":
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
75 n+=1
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
76 # n = len(lignes)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
77 f.close()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
78 except Exception, e:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
79 sys.stderr.write( '%s\n' % str(e) )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
80 return n
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
81
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
82
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
83 def compare( output1, output2 ):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
84 # compare le nombre de ligne entre 2 fichiers
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
85 # pour verifier qu'aucune ligne n'a ete saute
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
86 num_raw = getLine(output1)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
87 num_vcf = getLine(output2)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
88 if (num_raw==num_vcf):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
89 return True
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
90 else:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
91 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
92
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
93
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
94 def __main__():
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
95
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
96 time0 = time.time()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
97 args = parser.parse_args()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
98
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
99 try:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
100 config = bam2cfg(args)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
101 breakdancer(args, config)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
102 breakdancer2vcf(args)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
103
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
104 # quelques tests
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
105 # verifier que les fichiers de sorties ne sont pas vides
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
106 check(args.output_raw)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
107 check(args.output_vcf)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
108 # comparer le nombre de ligne entre les 2 fichiers
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
109 # pour etre sur que toute les variations ont ete prises en compte
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
110 compare(args.output_raw, args.output_vcf)
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
111
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
112 sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0)))
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
113
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
114 if ( os.path.getsize(errorFile)>0 ):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
115 sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
116 try:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
117 err = open(errorFile, 'r')
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
118 errors = err.read()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
119 err.close()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
120 sys.stdout.write( "Errors :\n%s" %(errors))
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
121 except Exception, e:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
122 sys.stderr.write( '%s\n' % str(e) )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
123 else:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
124 sys.stdout.write( 'BreakDancer successful' )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
125
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
126 except Exception, e:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
127 sys.stdout.write( 'BreakDancer fail\n' )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
128 sys.stderr.write( '%s\n' % str(e) )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
129 sys.exit()
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
130
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
131 finally:
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
132 if os.path.exists( tmp_dir ):
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
133 shutil.rmtree( tmp_dir )
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
134
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
135 if __name__=="__main__":
00952c82ebc5 Uploaded
jeremie
parents:
diff changeset
136 __main__()