annotate pindel.py @ 4:3e64e2294f4b draft

Uploaded
author jeremie
date Wed, 02 Jul 2014 04:44:12 -0400
parents 544c5266fc0f
children de8a643a41e3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
1
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
2 import argparse, optparse, os, shutil, subprocess, sys, tempfile, time, shlex
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
3
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
4 parser = argparse.ArgumentParser(description='')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
5 parser.add_argument('-r', dest='input_reference', required=True, help='the reference file')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
6 parser.add_argument('-b', dest='input_bam', required=True, help='the bam file')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
7 parser.add_argument('-s', dest='insert_size', type=int, default='500', required=True, help='the insert size')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
8 parser.add_argument('-n', dest='sampleTag', default='sample', required=False, help='the sample tag')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
9 parser.add_argument('-o', dest='output_vcf', help='the output vcf', required=True)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
10 parser.add_argument('--name', dest='name', default='defaults')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
11 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
12 parser.add_argument('--window_size', dest='window_size', type=int, default='5')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
13 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
14 parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
15 parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
16 parser.add_argument('--report_duplications', dest='report_duplications', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
17 parser.add_argument('--report_inversions', dest='report_inversions', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
18 parser.add_argument('--report_breakpoints', dest='report_breakpoints', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
19
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
20 tmp_dir = tempfile.mkdtemp()
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
21 pindel_dir=tmp_dir+"/pindel"
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
22 errorFile = tmp_dir+"/errorLog"
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
23
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
24
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
25 def execute( cmd, output="" ):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
26 try:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
27 err = open(tmp_dir+"/errorLog", 'a')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
28 if output != "":
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
29 out = open(output, 'w')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
30 else:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
31 out = subprocess.PIPE
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
32 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
33 process.wait()
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
34 err.close()
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
35 if out != subprocess.PIPE:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
36 out.close()
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
37 except Exception, e:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
38 sys.stderr.write("problem doing : %s\n" %(cmd))
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
39 sys.stderr.write( '%s\n\n' % str(e) )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
40
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
41
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
42 def index (args):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
43 cmd = "samtools index %s" % (args.input_bam)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
44 # print( "commande = "+cmd )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
45 # os.system(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
46 execute(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
47
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
48 cmd = "samtools faidx %s" % (args.input_reference)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
49 # print( "commande = "+cmd )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
50 # os.system(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
51 execute(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
52
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
53
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
54 def config( args ):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
55 # print("config")
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
56 config = tmp_dir+"/pindel_config"
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
57 # cmd = 'echo %s\t%s\t%s > %s' % (bam, size, name, config)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
58 # print("commande = "+cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
59 # os.system(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
60 fil = open(config, 'w')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
61 fil.write("%s\t%s\t%s" % (args.input_bam, args.insert_size, args.name) )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
62 fil.close()
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
63 return config
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
64
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
65
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
66 # def pindel( reference, config, output ):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
67 # print("pindel")
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
68 # cmd="pindel -f %s -i %s -o %s" % (reference, config, output)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
69 # print("commande = "+cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
70 # os.system(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
71
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
72 # os.system("rm %s" % (output))
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
73 # if not os.path.exists(output):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
74 # os.makedirs(output)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
75 # os.system("mv %s_* %s" %(output, output) )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
76
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
77
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
78 def pindel( reference, config, temp, args ):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
79 cmd = "pindel -f %s -i %s -o %s " % (reference, config, pindel_dir)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
80 cmd += "--number_of_threads %d --window_size %d --sequencing_error_rate %f --sensitivity %f" % (args.number_of_threads, args.window_size, args.sequencing_error_rate, args.sensitivity)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
81 if args.report_long_insertions:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
82 cmd += ' --report_long_insertions '
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
83 if args.report_duplications:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
84 cmd += ' --report_duplications '
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
85 if args.report_inversions:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
86 cmd += ' --report_inversions '
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
87 if args.report_breakpoints:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
88 cmd += ' --report_breakpoints '
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
89 print("commande = "+cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
90 execute( cmd )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
91
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
92
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
93
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
94
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
95
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
96 def move( avant, apres ):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
97 if os.path.exists(avant):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
98 execute("mv %s %s" %(avant, apres))
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
99
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
100
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
101 def pindel2vcf( args ):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
102 date = str(time.strftime('%d/%m/%y',time.localtime()))
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
103 name = "test"
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
104 cmd = "pindel2vcf -P %s -r %s -R %s -d %s" % (pindel_dir, args.input_reference, name, date )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
105 print ("commande = " + cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
106 execute(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
107 if os.path.exists(pindel_dir+".vcf"):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
108 execute("cp %s %s" %(pindel_dir+".vcf", args.output_vcf))
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
109
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
110
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
111 def __main__():
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
112
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
113 args = parser.parse_args()
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
114
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
115 # try:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
116 # cmd = 'pindel'
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
117 # cmd += ' -i1 %s -i2 %s -o %s ' % (args.input1, args.input2, args.output1)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
118 # except:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
119 # sys.stdout.write( 'problem args : %s' % (cmd) )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
120 # try:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
121 # os.system(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
122 # except:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
123 # sys.stdout.write( 'problem cmd' )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
124 # pipeline(args.reference, args.bam, args.output_raw, args.output_vcf, size, name, args)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
125
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
126 reference = args.input_reference
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
127 bam = args.input_bam
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
128 temp = args.output_vcf+"_temp/temp"
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
129 output_vcf = args.output_vcf
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
130
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
131 try:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
132 index( args )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
133 conf = config( args )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
134
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
135 pindel( reference, conf, temp, args )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
136 pindel2vcf(args)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
137
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
138
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
139 finally:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
140 if os.path.exists( tmp_dir ):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
141 shutil.rmtree( tmp_dir )
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
142
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
143 if __name__=="__main__":
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
144 __main__()