0
|
1
|
|
2 import argparse, optparse, os, shutil, subprocess, sys, tempfile, time, shlex
|
|
3
|
|
4 parser = argparse.ArgumentParser(description='')
|
|
5 parser.add_argument('-r', dest='input_reference', required=True, help='the reference file')
|
|
6 parser.add_argument('-b', dest='input_bam', required=True, help='the bam file')
|
|
7 parser.add_argument('-s', dest='insert_size', type=int, default='500', required=True, help='the insert size')
|
|
8 parser.add_argument('-n', dest='sampleTag', default='sample', required=False, help='the sample tag')
|
|
9 parser.add_argument('-o', dest='output_vcf', help='the output vcf', required=True)
|
|
10 parser.add_argument('--name', dest='name', default='defaults')
|
|
11 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1')
|
|
12 parser.add_argument('--window_size', dest='window_size', type=int, default='5')
|
|
13 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01')
|
|
14 parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float)
|
|
15 parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False)
|
|
16 parser.add_argument('--report_duplications', dest='report_duplications', action='store_true', default=False)
|
|
17 parser.add_argument('--report_inversions', dest='report_inversions', action='store_true', default=False)
|
|
18 parser.add_argument('--report_breakpoints', dest='report_breakpoints', action='store_true', default=False)
|
|
19
|
|
20 tmp_dir = tempfile.mkdtemp()
|
|
21 pindel_dir=tmp_dir+"/pindel"
|
|
22 errorFile = tmp_dir+"/errorLog"
|
|
23
|
|
24
|
|
25 def execute( cmd, output="" ):
|
|
26 try:
|
|
27 err = open(tmp_dir+"/errorLog", 'a')
|
|
28 if output != "":
|
|
29 out = open(output, 'w')
|
|
30 else:
|
|
31 out = subprocess.PIPE
|
|
32 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
|
|
33 process.wait()
|
|
34 err.close()
|
|
35 if out != subprocess.PIPE:
|
|
36 out.close()
|
|
37 except Exception, e:
|
|
38 sys.stderr.write("problem doing : %s\n" %(cmd))
|
|
39 sys.stderr.write( '%s\n\n' % str(e) )
|
|
40
|
|
41
|
|
42 def index (args):
|
|
43 cmd = "samtools index %s" % (args.input_bam)
|
|
44 # print( "commande = "+cmd )
|
|
45 # os.system(cmd)
|
|
46 execute(cmd)
|
|
47
|
|
48 cmd = "samtools faidx %s" % (args.input_reference)
|
|
49 # print( "commande = "+cmd )
|
|
50 # os.system(cmd)
|
|
51 execute(cmd)
|
|
52
|
|
53
|
|
54 def config( args ):
|
|
55 # print("config")
|
|
56 config = tmp_dir+"/pindel_config"
|
|
57 # cmd = 'echo %s\t%s\t%s > %s' % (bam, size, name, config)
|
|
58 # print("commande = "+cmd)
|
|
59 # os.system(cmd)
|
|
60 fil = open(config, 'w')
|
|
61 fil.write("%s\t%s\t%s" % (args.input_bam, args.insert_size, args.name) )
|
|
62 fil.close()
|
|
63 return config
|
|
64
|
|
65
|
|
66 # def pindel( reference, config, output ):
|
|
67 # print("pindel")
|
|
68 # cmd="pindel -f %s -i %s -o %s" % (reference, config, output)
|
|
69 # print("commande = "+cmd)
|
|
70 # os.system(cmd)
|
|
71
|
|
72 # os.system("rm %s" % (output))
|
|
73 # if not os.path.exists(output):
|
|
74 # os.makedirs(output)
|
|
75 # os.system("mv %s_* %s" %(output, output) )
|
|
76
|
|
77
|
|
78 def pindel( reference, config, temp, args ):
|
|
79 cmd = "pindel -f %s -i %s -o %s " % (reference, config, pindel_dir)
|
|
80 cmd += "--number_of_threads %d --window_size %d --sequencing_error_rate %f --sensitivity %f" % (args.number_of_threads, args.window_size, args.sequencing_error_rate, args.sensitivity)
|
|
81 if args.report_long_insertions:
|
|
82 cmd += ' --report_long_insertions '
|
|
83 if args.report_duplications:
|
|
84 cmd += ' --report_duplications '
|
|
85 if args.report_inversions:
|
|
86 cmd += ' --report_inversions '
|
|
87 if args.report_breakpoints:
|
|
88 cmd += ' --report_breakpoints '
|
|
89 print("commande = "+cmd)
|
|
90 execute( cmd )
|
|
91
|
|
92
|
|
93
|
|
94
|
|
95
|
|
96 def move( avant, apres ):
|
|
97 if os.path.exists(avant):
|
|
98 execute("mv %s %s" %(avant, apres))
|
|
99
|
|
100
|
|
101 def pindel2vcf( args ):
|
|
102 date = str(time.strftime('%d/%m/%y',time.localtime()))
|
|
103 name = "test"
|
|
104 cmd = "pindel2vcf -P %s -r %s -R %s -d %s" % (pindel_dir, args.input_reference, name, date )
|
|
105 print ("commande = " + cmd)
|
|
106 execute(cmd)
|
|
107 if os.path.exists(pindel_dir+".vcf"):
|
|
108 execute("cp %s %s" %(pindel_dir+".vcf", args.output_vcf))
|
|
109
|
|
110
|
|
111 def __main__():
|
|
112
|
|
113 args = parser.parse_args()
|
|
114
|
|
115 # try:
|
|
116 # cmd = 'pindel'
|
|
117 # cmd += ' -i1 %s -i2 %s -o %s ' % (args.input1, args.input2, args.output1)
|
|
118 # except:
|
|
119 # sys.stdout.write( 'problem args : %s' % (cmd) )
|
|
120 # try:
|
|
121 # os.system(cmd)
|
|
122 # except:
|
|
123 # sys.stdout.write( 'problem cmd' )
|
|
124 # pipeline(args.reference, args.bam, args.output_raw, args.output_vcf, size, name, args)
|
|
125
|
|
126 reference = args.input_reference
|
|
127 bam = args.input_bam
|
|
128 temp = args.output_vcf+"_temp/temp"
|
|
129 output_vcf = args.output_vcf
|
|
130
|
|
131 try:
|
|
132 index( args )
|
|
133 conf = config( args )
|
|
134
|
|
135 pindel( reference, conf, temp, args )
|
|
136 pindel2vcf(args)
|
|
137
|
|
138
|
|
139 finally:
|
|
140 if os.path.exists( tmp_dir ):
|
|
141 shutil.rmtree( tmp_dir )
|
|
142
|
|
143 if __name__=="__main__":
|
|
144 __main__()
|