comparison breakdancer.py @ 10:00952c82ebc5 draft

Uploaded
author jeremie
date Fri, 27 Jun 2014 05:32:01 -0400
parents
children 0fb455068d3c
comparison
equal deleted inserted replaced
9:5c3ba387e329 10:00952c82ebc5
1 #!/usr/bin/python
2
3 import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time
4
5
6 parser = argparse.ArgumentParser(description='')
7
8 parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' )
9 parser.add_argument ( '-o1', dest='output_raw', help='the output file' )
10 parser.add_argument ( '-o2', dest='output_vcf', help='the output file' )
11
12
13 tmp_dir = tempfile.mkdtemp()
14 errorFile = tmp_dir+"/errorLog"
15
16 bam2cfg_path = "bam2cfg.pl"
17 # breakdancer_path = "/usr/bin/breakdancer-max"
18 breakdancer_path = "breakdancer-max"
19 breakdancer2vcf_path = "breakdancer2vcf.py"
20
21
22 def bam2cfg(args):
23 config = tmp_dir+"/breakdancer_config"
24 cmd = 'perl %s %s' % (bam2cfg_path, args.input_bam)
25 execute(cmd, output=config)
26 print ("\ncmd = %s \n" ) %(cmd)
27 return config
28
29
30 def breakdancer(args, config):
31 cmd = '%s %s' % (breakdancer_path, config)
32 execute(cmd, output=args.output_raw)
33
34
35 def breakdancer2vcf(args):
36 cmd = "python %s -i %s -o %s" % ( breakdancer2vcf_path, args.output_raw, args.output_vcf )
37 execute(cmd)
38
39
40 def execute( cmd, output="" ):
41 try:
42 err = open(tmp_dir+"/errorLog", 'a')
43 if output != "":
44 out = open(output, 'w')
45 else:
46 out = subprocess.PIPE
47 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
48 process.wait()
49 err.close()
50 if out != subprocess.PIPE:
51 out.close()
52 except Exception, e:
53 sys.stderr.write("problem doing : %s\n" %(cmd))
54 sys.stderr.write( '%s\n\n' % str(e) )
55
56
57 def check( output ):
58 if (os.path.getsize(output)>0):
59 return True
60 else:
61 sys.stderr.write('The output file is empty : %s\n' % (output))
62
63
64 def getLine(file):
65 try:
66 f = open(file, 'r')
67 lignes = f.readlines()
68 n=0
69 for ligne in lignes:
70 if ligne.strip()[0]!="#":
71 n+=1
72 # n = len(lignes)
73 f.close()
74 except Exception, e:
75 sys.stderr.write( '%s\n' % str(e) )
76 return n
77
78
79 def compare( output1, output2 ):
80 # compare le nombre de ligne entre 2 fichiers
81 # pour verifier qu'aucune ligne n'a ete saute
82 num_raw = getLine(output1)
83 num_vcf = getLine(output2)
84 if (num_raw==num_vcf):
85 return True
86 else:
87 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))
88
89
90 def __main__():
91
92 time0 = time.time()
93 args = parser.parse_args()
94
95 try:
96 config = bam2cfg(args)
97 breakdancer(args, config)
98 breakdancer2vcf(args)
99
100 # quelques tests
101 # verifier que les fichiers de sorties ne sont pas vides
102 check(args.output_raw)
103 check(args.output_vcf)
104 # comparer le nombre de ligne entre les 2 fichiers
105 # pour etre sur que toute les variations ont ete prises en compte
106 compare(args.output_raw, args.output_vcf)
107
108 sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0)))
109
110 if ( os.path.getsize(errorFile)>0 ):
111 sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) )
112 try:
113 err = open(errorFile, 'r')
114 errors = err.read()
115 err.close()
116 sys.stdout.write( "Errors :\n%s" %(errors))
117 except Exception, e:
118 sys.stderr.write( '%s\n' % str(e) )
119 else:
120 sys.stdout.write( 'BreakDancer successful' )
121
122 except Exception, e:
123 sys.stdout.write( 'BreakDancer fail\n' )
124 sys.stderr.write( '%s\n' % str(e) )
125 sys.exit()
126
127 finally:
128 if os.path.exists( tmp_dir ):
129 shutil.rmtree( tmp_dir )
130
131 if __name__=="__main__":
132 __main__()