annotate pindel.py @ 14:ee3a2e0cc270 draft default tip

Uploaded
author jeremie
date Wed, 09 Jul 2014 10:17:54 -0400
parents 52d89fc2e7fe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
1
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
2 import argparse, os, shutil, subprocess, sys, tempfile, time, shlex
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
3
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
4 parser = argparse.ArgumentParser(description='')
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
5 parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file')
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
6 parser.add_argument('-b', dest='inputBamFile', required=True, help='the bam file')
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
7 parser.add_argument('-s', dest='insert_size', type=int, default=None, required=False, help='the insert size')
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
8 parser.add_argument('-t', dest='sampleTag', default='default', required=False, help='the sample tag')
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
9 # parser.add_argument('-o1', dest='outputRaw', help='the output raw', required=True)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
10 parser.add_argument('-o2', dest='outputVcfFile', help='the output vcf', required=True)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
11 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
12 parser.add_argument('--window_size', dest='window_size', type=int, default='5')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
13 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01')
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
14 parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
15 parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
16 parser.add_argument('--report_duplications', dest='report_duplications', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
17 parser.add_argument('--report_inversions', dest='report_inversions', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
18 parser.add_argument('--report_breakpoints', dest='report_breakpoints', action='store_true', default=False)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
19
9
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
20 parser.add_argument('-u', '--maximum_allowed_mismatch_rate', dest='maximum_allowed_mismatch_rate', type=float, default='0.02')
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
21 parser.add_argument('--report_close_mapped_reads', dest='report_close_mapped_reads', action='store_true', default=False)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
22 parser.add_argument('--report_only_close_mapped_reads', dest='report_only_close_mapped_reads', action='store_true', default=False)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
23 parser.add_argument('--report_interchromosomal_events', dest='report_interchromosomal_events', action='store_true', default=False)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
24 parser.add_argument('--IndelCorrection', dest='IndelCorrection', action='store_true', default=False)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
25 parser.add_argument('--NormalSamples', dest='NormalSamples', action='store_true', default=False)
9
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
26 parser.add_argument('-a', '--additional_mismatch', dest='additional_mismatch', type=int, default='1')
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
27 parser.add_argument('-m', '--min_perfect_match_around_BP', dest='min_perfect_match_around_BP', type=int, default='3')
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
28 parser.add_argument('-v', '--min_inversion_size', dest='min_inversion_size', type=int, default='50')
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
29 parser.add_argument('-d', '--min_num_matched_bases', dest='min_num_matched_bases', type=int, default='30')
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
30 parser.add_argument('-B', '--balance_cutoff', dest='balance_cutoff', type=int, default='0')
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
31 parser.add_argument('-A', '--anchor_quality', dest='anchor_quality', type=int, default='0')
844ad5ce74cc Uploaded
jeremie
parents: 8
diff changeset
32 parser.add_argument('-M', '--minimum_support_for_event', dest='minimum_support_for_event', type=int, default='3')
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
33 parser.add_argument('-n', '--NM', dest='NM', type=int, default='2')
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
34 parser.add_argument('--detect_DD', dest='detect_DD', action='store_true', default=False)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
35 parser.add_argument('--MAX_DD_BREAKPOINT_DISTANCE', dest='MAX_DD_BREAKPOINT_DISTANCE', type=int, default='350')
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
36 parser.add_argument('--MAX_DISTANCE_CLUSTER_READS', dest='MAX_DISTANCE_CLUSTER_READS', type=int, default='100')
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
37 parser.add_argument('--MIN_DD_CLUSTER_SIZE', dest='MIN_DD_CLUSTER_SIZE', type=int, default='3')
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
38 parser.add_argument('--MIN_DD_BREAKPOINT_SUPPORT', dest='MIN_DD_BREAKPOINT_SUPPORT', type=int, default='3')
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
39 parser.add_argument('--MIN_DD_MAP_DISTANCE', dest='MIN_DD_MAP_DISTANCE', type=int, default='8000')
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
40 parser.add_argument('--DD_REPORT_DUPLICATION_READS', dest='DD_REPORT_DUPLICATION_READS', action='store_true', default=False)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
41
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
42 parser.add_argument('-z', '--input_SV_Calls_for_assembly', dest='input_SV_Calls_for_assembly', action='store_true', default=False)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
43
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
44
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
45 def execute(cmd, output=None):
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
46 import subprocess, sys, shlex
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
47 # function to execute a cmd and report if an error occur
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
48 print(cmd)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
49 try:
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
50 process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
51 process.wait()
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
52 stdout,stderr = process.communicate()
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
53 except Exception, e: # une erreur de ma commande : stderr
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
54 sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e))
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
55 return
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
56 if output:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
57 output = open(output, 'w')
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
58 output.write(stdout)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
59 output.close()
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
60 if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
61 sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr))
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
62
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
63
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
64 def indexBam(inputFastaFile, inputBamFile):
10
accf671c1f3d Uploaded
jeremie
parents: 9
diff changeset
65 cmd = "samtools index %s" %(inputBamFile)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
66 execute(cmd)
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
67 cmd = "samtools faidx %s" %(inputFastaFile)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
68 execute(cmd)
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
69
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
70
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
71 def config(inputBamFile, meanInsertSize, tag, tempDir):
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
72 configFile = tempDir+"/pindel_configFile"
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
73 fil = open(configFile, 'w')
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
74 fil.write("%s\t%s\t%s" %(inputBamFile, meanInsertSize, tag))
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
75 fil.close()
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
76 return configFile
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
77
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
78
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
79 def pindel(reference, configFile, args, tempDir):
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
80 pindelTempDir=tempDir+"/pindel"
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
81 cmd = "pindel -f %s -i %s -o %s " %(reference, configFile, pindelTempDir)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
82 cmd += " --number_of_threads %d --window_size %d --sequencing_error_rate %f --sensitivity %f" %(args.number_of_threads, args.window_size, args.sequencing_error_rate, args.sensitivity)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
83 cmd += " -u %f -n %d -a %d -m %d -v %d -d %d -B %d -A %d -M %d " %(args.maximum_allowed_mismatch_rate, args.NM, args.additional_mismatch, args.min_perfect_match_around_BP, args.min_inversion_size, args.min_num_matched_bases, args.balance_cutoff, args.anchor_quality, args.minimum_support_for_event)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
84
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
85 if args.report_long_insertions:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
86 cmd += ' --report_long_insertions '
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
87 if args.report_duplications:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
88 cmd += ' --report_duplications '
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
89 if args.report_inversions:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
90 cmd += ' --report_inversions '
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
91 if args.report_breakpoints:
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
92 cmd += ' --report_breakpoints '
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
93
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
94 if args.report_close_mapped_reads:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
95 cmd += ' --report_close_mapped_reads '
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
96 if args.report_only_close_mapped_reads:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
97 cmd += ' --report_only_close_mapped_reads '
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
98 if args.report_interchromosomal_events:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
99 cmd += ' --report_interchromosomal_events '
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
100 if args.IndelCorrection:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
101 cmd += ' --IndelCorrection '
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
102 if args.NormalSamples:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
103 cmd += ' --NormalSamples '
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
104 if args.input_SV_Calls_for_assembly:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
105 cmd += ' --input_SV_Calls_for_assembly %s ' %(args.input_SV_Calls_for_assembly)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
106
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
107 if args.detect_DD:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
108 cmd += ' -q '
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
109 cmd += ' --MAX_DD_BREAKPOINT_DISTANCE '+str(args.MAX_DD_BREAKPOINT_DISTANCE)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
110 cmd += ' --MAX_DISTANCE_CLUSTER_READS '+str(args.MAX_DISTANCE_CLUSTER_READS)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
111 cmd += ' --MIN_DD_CLUSTER_SIZE '+str(args.MIN_DD_CLUSTER_SIZE)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
112 cmd += ' --MIN_DD_BREAKPOINT_SUPPORT '+str(args.MIN_DD_BREAKPOINT_SUPPORT)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
113 cmd += ' --MIN_DD_MAP_DISTANCE '+str(args.MIN_DD_MAP_DISTANCE)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
114 if args.DD_REPORT_DUPLICATION_READS:
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
115 cmd += ' --DD_REPORT_DUPLICATION_READS '
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
116
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
117 execute(cmd)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
118 return pindelTempDir
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
119
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
120
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
121 def move(avant, apres):
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
122 if os.path.exists(avant):
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
123 execute("mv %s %s" %(avant, apres))
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
124
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
125
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
126 def pindel2vcf(outputVcf, inputFastaFile, name, pindelTempDir):
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
127 date = str(time.strftime('%d/%m/%y',time.localtime()))
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
128 cmd = "pindel2vcf -P %s -r %s -R %s -d %s" %(pindelTempDir, inputFastaFile, name, date)
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
129 print("commande = " + cmd)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
130 execute(cmd)
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
131 if os.path.exists(pindelTempDir+".vcf"):
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
132 execute("cp %s %s" %(pindelTempDir+".vcf", outputVcf))
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
133
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
134
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
135 # def getMeanInsertSize(bamFile, tempDir):
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
136 # samFile = tempDir+"/sam"
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
137 # histogramFile = tempDir+"/histogram"
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
138 # outputFile = tempDir+"/output"
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
139 # cmd = "samtools view -h %s -o %s" % (bamFile, samFile)
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
140 # execute(cmd, )
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
141 # cmd = "picard-tools CollectInsertSizeMetrics H=%s O=%s I=%s" % (histogramFile, outputFile, samFile)
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
142 # execute(cmd)
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
143 # try:
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
144 # f = open(outputFile, 'r')
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
145 # lignes = f.readlines()
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
146 # f.close()
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
147 # meanInsertSize = lignes[7].split('\t')[0]
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
148 # except:
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
149 # sys.stderr.write("problem while opening file %s " %(outputFile))
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
150 # return 0
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
151 # return meanInsertSize
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
152
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
153
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
154 def __main__():
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
155
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
156 args = parser.parse_args()
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
157 tempDir = tempfile.mkdtemp()
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
158
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
159 try:
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
160 indexBam(args.inputFastaFile, args.inputBamFile)
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
161 # if args.insert_size==None:
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
162 # meanInsertSize = int(getMeanInsertSize(args.inputBamFile, tempDir))
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
163 # else:
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
164 meanInsertSize=args.insert_size
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
165 configFile = config(args.inputBamFile, meanInsertSize, args.sampleTag, tempDir)
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
166 pindelTempDir = pindel(args.inputFastaFile, configFile, args, tempDir)
14
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
167 pindel2vcf(args.outputVcfFile, args.inputFastaFile, args.sampleTag, pindelTempDir)
ee3a2e0cc270 Uploaded
jeremie
parents: 11
diff changeset
168
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
169 finally:
8
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
170 if os.path.exists(tempDir):
de8a643a41e3 Uploaded
jeremie
parents: 0
diff changeset
171 shutil.rmtree(tempDir)
0
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
172
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
173 if __name__=="__main__":
544c5266fc0f Uploaded
jeremie
parents:
diff changeset
174 __main__()