Mercurial > repos > jeremie > pindel_test
comparison pindel.py @ 14:ee3a2e0cc270 draft default tip
Uploaded
author | jeremie |
---|---|
date | Wed, 09 Jul 2014 10:17:54 -0400 |
parents | 52d89fc2e7fe |
children |
comparison
equal
deleted
inserted
replaced
13:077a2dbd906a | 14:ee3a2e0cc270 |
---|---|
1 | 1 |
2 import argparse, optparse, os, shutil, subprocess, sys, tempfile, time, shlex | 2 import argparse, os, shutil, subprocess, sys, tempfile, time, shlex |
3 | 3 |
4 parser = argparse.ArgumentParser(description='') | 4 parser = argparse.ArgumentParser(description='') |
5 parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file') | 5 parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file') |
6 parser.add_argument('-b', dest='inputBamFile', required=True, help='the bam file') | 6 parser.add_argument('-b', dest='inputBamFile', required=True, help='the bam file') |
7 parser.add_argument('-s', dest='insert_size', type=int, default='500', required=True, help='the insert size') | 7 parser.add_argument('-s', dest='insert_size', type=int, default=None, required=False, help='the insert size') |
8 parser.add_argument('-n', dest='sampleTag', default='sample', required=False, help='the sample tag') | 8 parser.add_argument('-t', dest='sampleTag', default='default', required=False, help='the sample tag') |
9 # parser.add_argument('-o1', dest='outputRaw', help='the output raw', required=True) | 9 # parser.add_argument('-o1', dest='outputRaw', help='the output raw', required=True) |
10 parser.add_argument('-o2', dest='outputVcfFile', help='the output vcf', required=True) | 10 parser.add_argument('-o2', dest='outputVcfFile', help='the output vcf', required=True) |
11 parser.add_argument('--name', dest='name', default='name') | |
12 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1') | 11 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1') |
13 parser.add_argument('--window_size', dest='window_size', type=int, default='5') | 12 parser.add_argument('--window_size', dest='window_size', type=int, default='5') |
14 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01') | 13 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01') |
15 parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float) | 14 parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float) |
16 parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False) | 15 parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False) |
29 parser.add_argument('-v', '--min_inversion_size', dest='min_inversion_size', type=int, default='50') | 28 parser.add_argument('-v', '--min_inversion_size', dest='min_inversion_size', type=int, default='50') |
30 parser.add_argument('-d', '--min_num_matched_bases', dest='min_num_matched_bases', type=int, default='30') | 29 parser.add_argument('-d', '--min_num_matched_bases', dest='min_num_matched_bases', type=int, default='30') |
31 parser.add_argument('-B', '--balance_cutoff', dest='balance_cutoff', type=int, default='0') | 30 parser.add_argument('-B', '--balance_cutoff', dest='balance_cutoff', type=int, default='0') |
32 parser.add_argument('-A', '--anchor_quality', dest='anchor_quality', type=int, default='0') | 31 parser.add_argument('-A', '--anchor_quality', dest='anchor_quality', type=int, default='0') |
33 parser.add_argument('-M', '--minimum_support_for_event', dest='minimum_support_for_event', type=int, default='3') | 32 parser.add_argument('-M', '--minimum_support_for_event', dest='minimum_support_for_event', type=int, default='3') |
33 parser.add_argument('-n', '--NM', dest='NM', type=int, default='2') | |
34 parser.add_argument('--detect_DD', dest='detect_DD', action='store_true', default=False) | 34 parser.add_argument('--detect_DD', dest='detect_DD', action='store_true', default=False) |
35 parser.add_argument('--MAX_DD_BREAKPOINT_DISTANCE', dest='MAX_DD_BREAKPOINT_DISTANCE', type=int, default='350') | 35 parser.add_argument('--MAX_DD_BREAKPOINT_DISTANCE', dest='MAX_DD_BREAKPOINT_DISTANCE', type=int, default='350') |
36 parser.add_argument('--MAX_DISTANCE_CLUSTER_READS', dest='MAX_DISTANCE_CLUSTER_READS', type=int, default='100') | 36 parser.add_argument('--MAX_DISTANCE_CLUSTER_READS', dest='MAX_DISTANCE_CLUSTER_READS', type=int, default='100') |
37 parser.add_argument('--MIN_DD_CLUSTER_SIZE', dest='MIN_DD_CLUSTER_SIZE', type=int, default='3') | 37 parser.add_argument('--MIN_DD_CLUSTER_SIZE', dest='MIN_DD_CLUSTER_SIZE', type=int, default='3') |
38 parser.add_argument('--MIN_DD_BREAKPOINT_SUPPORT', dest='MIN_DD_BREAKPOINT_SUPPORT', type=int, default='3') | 38 parser.add_argument('--MIN_DD_BREAKPOINT_SUPPORT', dest='MIN_DD_BREAKPOINT_SUPPORT', type=int, default='3') |
39 parser.add_argument('--MIN_DD_MAP_DISTANCE', dest='MIN_DD_MAP_DISTANCE', type=int, default='8000') | 39 parser.add_argument('--MIN_DD_MAP_DISTANCE', dest='MIN_DD_MAP_DISTANCE', type=int, default='8000') |
40 parser.add_argument('--DD_REPORT_DUPLICATION_READS', dest='DD_REPORT_DUPLICATION_READS', action='store_true', default=False) | 40 parser.add_argument('--DD_REPORT_DUPLICATION_READS', dest='DD_REPORT_DUPLICATION_READS', action='store_true', default=False) |
41 | 41 |
42 parser.add_argument('-z', '--input_SV_Calls_for_assembly', dest='input_SV_Calls_for_assembly', action='store_true', default=False) | |
42 | 43 |
43 # def execute(cmd, output=""): | |
44 # try: | |
45 # err = open(tempDir+"/errorLog", 'a') | |
46 # if output != "": | |
47 # out = open(output, 'w') | |
48 # else: | |
49 # out = subprocess.PIPE | |
50 # process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err) | |
51 # process.wait() | |
52 # err.close() | |
53 # if out != subprocess.PIPE: | |
54 # out.close() | |
55 # except Exception, e: | |
56 # sys.stderr.write("problem doing : %s\n" %(cmd)) | |
57 # sys.stderr.write('%s\n\n' % str(e)) | |
58 | 44 |
59 def execute(cmd, output=None): | 45 def execute(cmd, output=None): |
60 import subprocess, sys, shlex | 46 import subprocess, sys, shlex |
61 # function to execute a cmd and report if an error occur | 47 # function to execute a cmd and report if an error occur |
62 print(cmd) | 48 print(cmd) |
80 execute(cmd) | 66 execute(cmd) |
81 cmd = "samtools faidx %s" %(inputFastaFile) | 67 cmd = "samtools faidx %s" %(inputFastaFile) |
82 execute(cmd) | 68 execute(cmd) |
83 | 69 |
84 | 70 |
85 def config(inputBamFile, meanInsertSize, name, tempDir): | 71 def config(inputBamFile, meanInsertSize, tag, tempDir): |
86 configFile = tempDir+"/pindel_configFile" | 72 configFile = tempDir+"/pindel_configFile" |
87 # cmd = 'echo %s\t%s\t%s > %s' %(bam, size, name, configFile) | |
88 # print("commande = "+cmd) | |
89 # os.system(cmd) | |
90 fil = open(configFile, 'w') | 73 fil = open(configFile, 'w') |
91 fil.write("%s\t%s\t%s" %(inputBamFile, meanInsertSize, name)) | 74 fil.write("%s\t%s\t%s" %(inputBamFile, meanInsertSize, tag)) |
92 fil.close() | 75 fil.close() |
93 return configFile | 76 return configFile |
94 | |
95 | |
96 # def pindel(reference, configFile, output): | |
97 # print("pindel") | |
98 # cmd="pindel -f %s -i %s -o %s" %(reference, configFile, output) | |
99 # print("commande = "+cmd) | |
100 # os.system(cmd) | |
101 | |
102 # os.system("rm %s" %(output)) | |
103 # if not os.path.exists(output): | |
104 # os.makedirs(output) | |
105 # os.system("mv %s_* %s" %(output, output)) | |
106 | 77 |
107 | 78 |
108 def pindel(reference, configFile, args, tempDir): | 79 def pindel(reference, configFile, args, tempDir): |
109 pindelTempDir=tempDir+"/pindel" | 80 pindelTempDir=tempDir+"/pindel" |
110 cmd = "pindel -f %s -i %s -o %s " %(reference, configFile, pindelTempDir) | 81 cmd = "pindel -f %s -i %s -o %s " %(reference, configFile, pindelTempDir) |
159 execute(cmd) | 130 execute(cmd) |
160 if os.path.exists(pindelTempDir+".vcf"): | 131 if os.path.exists(pindelTempDir+".vcf"): |
161 execute("cp %s %s" %(pindelTempDir+".vcf", outputVcf)) | 132 execute("cp %s %s" %(pindelTempDir+".vcf", outputVcf)) |
162 | 133 |
163 | 134 |
164 def getMeanInsertSize(bamFile, tempDir): | 135 # def getMeanInsertSize(bamFile, tempDir): |
165 samFile = tempDir+"/sam" | 136 # samFile = tempDir+"/sam" |
166 histogramFile = tempDir+"/histogram" | 137 # histogramFile = tempDir+"/histogram" |
167 outputFile = tempDir+"/output" | 138 # outputFile = tempDir+"/output" |
168 cmd = "samtools view -h -o %s %s" % (bamFile, samFile) | 139 # cmd = "samtools view -h %s -o %s" % (bamFile, samFile) |
169 execute(cmd, ) | 140 # execute(cmd, ) |
170 cmd = "picard-tools CollectInsertSizeMetrics H=%s O=%s I=%s" % (histogramFile, outputFile, samFile) | 141 # cmd = "picard-tools CollectInsertSizeMetrics H=%s O=%s I=%s" % (histogramFile, outputFile, samFile) |
171 execute(cmd) | 142 # execute(cmd) |
172 try: | 143 # try: |
173 f = open(outputFile, 'r') | 144 # f = open(outputFile, 'r') |
174 lignes = f.readlines() | 145 # lignes = f.readlines() |
175 f.close() | 146 # f.close() |
176 meanInsertSize = lignes[7].split('\t')[0] | 147 # meanInsertSize = lignes[7].split('\t')[0] |
177 except: | 148 # except: |
178 sys.stderr.write("problem while opening file %s " %(outputFile)) | 149 # sys.stderr.write("problem while opening file %s " %(outputFile)) |
179 return 0 | 150 # return 0 |
180 return meanInsertSize | 151 # return meanInsertSize |
181 | 152 |
182 | 153 |
183 def __main__(): | 154 def __main__(): |
184 | 155 |
185 args = parser.parse_args() | 156 args = parser.parse_args() |
186 tempDir = tempfile.mkdtemp() | 157 tempDir = tempfile.mkdtemp() |
187 | 158 |
188 # try: | |
189 # cmd = 'pindel' | |
190 # cmd += ' -i1 %s -i2 %s -o %s ' %(args.input1, args.input2, args.output1) | |
191 # except: | |
192 # sys.stdout.write('problem args : %s' %(cmd)) | |
193 # try: | |
194 # os.system(cmd) | |
195 # except: | |
196 # sys.stdout.write('problem cmd') | |
197 # pipeline(args.reference, args.bam, args.output_raw, args.outputVcfFile, size, name, args) | |
198 | |
199 try: | 159 try: |
200 indexBam(args.inputFastaFile, args.inputBamFile) | 160 indexBam(args.inputFastaFile, args.inputBamFile) |
201 meanInsertSize = int(getMeanInsertSize(args.inputBamFile, tempDir)) | 161 # if args.insert_size==None: |
202 configFile = config(args.inputBamFile, meanInsertSize, args.name, tempDir) | 162 # meanInsertSize = int(getMeanInsertSize(args.inputBamFile, tempDir)) |
163 # else: | |
164 meanInsertSize=args.insert_size | |
165 configFile = config(args.inputBamFile, meanInsertSize, args.sampleTag, tempDir) | |
203 pindelTempDir = pindel(args.inputFastaFile, configFile, args, tempDir) | 166 pindelTempDir = pindel(args.inputFastaFile, configFile, args, tempDir) |
204 pindel2vcf(args.outputVcfFile, args.inputFastaFile, args.name, pindelTempDir) | 167 pindel2vcf(args.outputVcfFile, args.inputFastaFile, args.sampleTag, pindelTempDir) |
205 | 168 |
206 finally: | 169 finally: |
207 if os.path.exists(tempDir): | 170 if os.path.exists(tempDir): |
208 shutil.rmtree(tempDir) | 171 shutil.rmtree(tempDir) |
209 | 172 |
210 if __name__=="__main__": | 173 if __name__=="__main__": |