annotate breakdancer.py @ 2:01bb7558d3b3 draft

Uploaded
author jeremie
date Thu, 26 Jun 2014 06:10:48 -0400
parents
children c971e3f6de9f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
1 #!/usr/bin/python
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
2
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
3 import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
4
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
5
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
6 parser = argparse.ArgumentParser(description='')
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
7
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
8 parser.add_argument ( '-i1', dest='input_bam', help='the bam input file' )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
9 parser.add_argument ( '-o1', dest='output_raw', help='the output file' )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
10 parser.add_argument ( '-o2', dest='output_vcf', help='the output file' )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
11
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
12
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
13 tmp_dir = tempfile.mkdtemp()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
14 errorFile = tmp_dir+"/errorLog"
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
15
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
16 bam2cfg_path = "/home/jeremie/Tools/breakdancer-1.1.2/perl/bam2cfg.pl"
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
17 breakdancer_path = "/usr/bin/breakdancer-max"
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
18 breakdancer2vcf_path = "/home/jeremie/Tools/breakdancer-1.1.2/breakdancer2vcf.py"
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
19
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
20
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
21 def bam2cfg(args):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
22 config = tmp_dir+"/breakdancer_config"
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
23 cmd = 'perl %s %s' % (bam2cfg_path, args.input_bam)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
24 execute(cmd, output=config)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
25 print ("\ncmd = %s \n" ) %(cmd)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
26 return config
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
27
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
28
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
29 def breakdancer(args, config):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
30 cmd = '%s %s' % (breakdancer_path, config)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
31 execute(cmd, output=args.output_raw)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
32
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
33
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
34 def breakdancer2vcf(args):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
35 cmd = "python %s -i %s -o %s" % ( breakdancer2vcf_path, args.output_raw, args.output_vcf )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
36 execute(cmd)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
37
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
38
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
39 def execute( cmd, output="" ):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
40 try:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
41 err = open(tmp_dir+"/errorLog", 'a')
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
42 if output != "":
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
43 out = open(output, 'w')
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
44 else:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
45 out = subprocess.PIPE
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
46 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
47 process.wait()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
48 err.close()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
49 if out != subprocess.PIPE:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
50 out.close()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
51 except Exception, e:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
52 sys.stderr.write("problem doing : %s\n" %(cmd))
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
53 sys.stderr.write( '%s\n\n' % str(e) )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
54
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
55
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
56 def check( output ):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
57 if (os.path.getsize(output)>0):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
58 return True
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
59 else:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
60 sys.stderr.write('The output file is empty : %s\n' % (output))
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
61
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
62
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
63 def getLine(file):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
64 try:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
65 f = open(file, 'r')
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
66 lignes = f.readlines()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
67 n=0
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
68 for ligne in lignes:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
69 if ligne.strip()[0]!="#":
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
70 n+=1
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
71 # n = len(lignes)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
72 f.close()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
73 except Exception, e:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
74 sys.stderr.write( '%s\n' % str(e) )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
75 return n
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
76
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
77
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
78 def compare( output1, output2 ):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
79 # compare le nombre de ligne entre 2 fichiers
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
80 # pour verifier qu'aucune ligne n'a ete saute
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
81 num_raw = getLine(output1)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
82 num_vcf = getLine(output2)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
83 if (num_raw==num_vcf):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
84 return True
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
85 else:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
86 sys.stderr.write('Not the same number of variant between the raw file and the vcf file : %d vs %d\n' % (num_raw, num_vcf))
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
87
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
88
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
89 def __main__():
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
90
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
91 time0 = time.time()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
92 args = parser.parse_args()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
93
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
94 try:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
95 config = bam2cfg(args)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
96 breakdancer(args, config)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
97 breakdancer2vcf(args)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
98
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
99 # quelques tests
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
100 # verifier que les fichiers de sorties ne sont pas vides
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
101 check(args.output_raw)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
102 check(args.output_vcf)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
103 # comparer le nombre de ligne entre les 2 fichiers
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
104 # pour etre sur que toute les variations ont ete prises en compte
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
105 compare(args.output_raw, args.output_vcf)
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
106
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
107 sys.stdout.write( '\nDone in %d seconds\n' %(int(time.time()-time0)))
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
108
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
109 if ( os.path.getsize(errorFile)>0 ):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
110 sys.stdout.write( 'At least one non fatal error was send, check the file : %s\n' %(errorFile) )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
111 try:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
112 err = open(errorFile, 'r')
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
113 errors = err.read()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
114 err.close()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
115 sys.stdout.write( "Errors :\n%s" %(errors))
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
116 except Exception, e:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
117 sys.stderr.write( '%s\n' % str(e) )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
118 else:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
119 sys.stdout.write( 'BreakDancer successful' )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
120
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
121 except Exception, e:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
122 sys.stdout.write( 'BreakDancer fail\n' )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
123 sys.stderr.write( '%s\n' % str(e) )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
124 sys.exit()
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
125
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
126 finally:
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
127 if os.path.exists( tmp_dir ):
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
128 shutil.rmtree( tmp_dir )
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
129
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
130 if __name__=="__main__":
01bb7558d3b3 Uploaded
jeremie
parents:
diff changeset
131 __main__()