comparison pindel.py @ 14:ee3a2e0cc270 draft default tip

Uploaded
author jeremie
date Wed, 09 Jul 2014 10:17:54 -0400
parents 52d89fc2e7fe
children
comparison
equal deleted inserted replaced
13:077a2dbd906a 14:ee3a2e0cc270
1 1
2 import argparse, optparse, os, shutil, subprocess, sys, tempfile, time, shlex 2 import argparse, os, shutil, subprocess, sys, tempfile, time, shlex
3 3
4 parser = argparse.ArgumentParser(description='') 4 parser = argparse.ArgumentParser(description='')
5 parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file') 5 parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file')
6 parser.add_argument('-b', dest='inputBamFile', required=True, help='the bam file') 6 parser.add_argument('-b', dest='inputBamFile', required=True, help='the bam file')
7 parser.add_argument('-s', dest='insert_size', type=int, default='500', required=True, help='the insert size') 7 parser.add_argument('-s', dest='insert_size', type=int, default=None, required=False, help='the insert size')
8 parser.add_argument('-n', dest='sampleTag', default='sample', required=False, help='the sample tag') 8 parser.add_argument('-t', dest='sampleTag', default='default', required=False, help='the sample tag')
9 # parser.add_argument('-o1', dest='outputRaw', help='the output raw', required=True) 9 # parser.add_argument('-o1', dest='outputRaw', help='the output raw', required=True)
10 parser.add_argument('-o2', dest='outputVcfFile', help='the output vcf', required=True) 10 parser.add_argument('-o2', dest='outputVcfFile', help='the output vcf', required=True)
11 parser.add_argument('--name', dest='name', default='name')
12 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1') 11 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1')
13 parser.add_argument('--window_size', dest='window_size', type=int, default='5') 12 parser.add_argument('--window_size', dest='window_size', type=int, default='5')
14 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01') 13 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01')
15 parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float) 14 parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float)
16 parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False) 15 parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False)
29 parser.add_argument('-v', '--min_inversion_size', dest='min_inversion_size', type=int, default='50') 28 parser.add_argument('-v', '--min_inversion_size', dest='min_inversion_size', type=int, default='50')
30 parser.add_argument('-d', '--min_num_matched_bases', dest='min_num_matched_bases', type=int, default='30') 29 parser.add_argument('-d', '--min_num_matched_bases', dest='min_num_matched_bases', type=int, default='30')
31 parser.add_argument('-B', '--balance_cutoff', dest='balance_cutoff', type=int, default='0') 30 parser.add_argument('-B', '--balance_cutoff', dest='balance_cutoff', type=int, default='0')
32 parser.add_argument('-A', '--anchor_quality', dest='anchor_quality', type=int, default='0') 31 parser.add_argument('-A', '--anchor_quality', dest='anchor_quality', type=int, default='0')
33 parser.add_argument('-M', '--minimum_support_for_event', dest='minimum_support_for_event', type=int, default='3') 32 parser.add_argument('-M', '--minimum_support_for_event', dest='minimum_support_for_event', type=int, default='3')
33 parser.add_argument('-n', '--NM', dest='NM', type=int, default='2')
34 parser.add_argument('--detect_DD', dest='detect_DD', action='store_true', default=False) 34 parser.add_argument('--detect_DD', dest='detect_DD', action='store_true', default=False)
35 parser.add_argument('--MAX_DD_BREAKPOINT_DISTANCE', dest='MAX_DD_BREAKPOINT_DISTANCE', type=int, default='350') 35 parser.add_argument('--MAX_DD_BREAKPOINT_DISTANCE', dest='MAX_DD_BREAKPOINT_DISTANCE', type=int, default='350')
36 parser.add_argument('--MAX_DISTANCE_CLUSTER_READS', dest='MAX_DISTANCE_CLUSTER_READS', type=int, default='100') 36 parser.add_argument('--MAX_DISTANCE_CLUSTER_READS', dest='MAX_DISTANCE_CLUSTER_READS', type=int, default='100')
37 parser.add_argument('--MIN_DD_CLUSTER_SIZE', dest='MIN_DD_CLUSTER_SIZE', type=int, default='3') 37 parser.add_argument('--MIN_DD_CLUSTER_SIZE', dest='MIN_DD_CLUSTER_SIZE', type=int, default='3')
38 parser.add_argument('--MIN_DD_BREAKPOINT_SUPPORT', dest='MIN_DD_BREAKPOINT_SUPPORT', type=int, default='3') 38 parser.add_argument('--MIN_DD_BREAKPOINT_SUPPORT', dest='MIN_DD_BREAKPOINT_SUPPORT', type=int, default='3')
39 parser.add_argument('--MIN_DD_MAP_DISTANCE', dest='MIN_DD_MAP_DISTANCE', type=int, default='8000') 39 parser.add_argument('--MIN_DD_MAP_DISTANCE', dest='MIN_DD_MAP_DISTANCE', type=int, default='8000')
40 parser.add_argument('--DD_REPORT_DUPLICATION_READS', dest='DD_REPORT_DUPLICATION_READS', action='store_true', default=False) 40 parser.add_argument('--DD_REPORT_DUPLICATION_READS', dest='DD_REPORT_DUPLICATION_READS', action='store_true', default=False)
41 41
42 parser.add_argument('-z', '--input_SV_Calls_for_assembly', dest='input_SV_Calls_for_assembly', action='store_true', default=False)
42 43
43 # def execute(cmd, output=""):
44 # try:
45 # err = open(tempDir+"/errorLog", 'a')
46 # if output != "":
47 # out = open(output, 'w')
48 # else:
49 # out = subprocess.PIPE
50 # process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err)
51 # process.wait()
52 # err.close()
53 # if out != subprocess.PIPE:
54 # out.close()
55 # except Exception, e:
56 # sys.stderr.write("problem doing : %s\n" %(cmd))
57 # sys.stderr.write('%s\n\n' % str(e))
58 44
59 def execute(cmd, output=None): 45 def execute(cmd, output=None):
60 import subprocess, sys, shlex 46 import subprocess, sys, shlex
61 # function to execute a cmd and report if an error occur 47 # function to execute a cmd and report if an error occur
62 print(cmd) 48 print(cmd)
80 execute(cmd) 66 execute(cmd)
81 cmd = "samtools faidx %s" %(inputFastaFile) 67 cmd = "samtools faidx %s" %(inputFastaFile)
82 execute(cmd) 68 execute(cmd)
83 69
84 70
85 def config(inputBamFile, meanInsertSize, name, tempDir): 71 def config(inputBamFile, meanInsertSize, tag, tempDir):
86 configFile = tempDir+"/pindel_configFile" 72 configFile = tempDir+"/pindel_configFile"
87 # cmd = 'echo %s\t%s\t%s > %s' %(bam, size, name, configFile)
88 # print("commande = "+cmd)
89 # os.system(cmd)
90 fil = open(configFile, 'w') 73 fil = open(configFile, 'w')
91 fil.write("%s\t%s\t%s" %(inputBamFile, meanInsertSize, name)) 74 fil.write("%s\t%s\t%s" %(inputBamFile, meanInsertSize, tag))
92 fil.close() 75 fil.close()
93 return configFile 76 return configFile
94
95
96 # def pindel(reference, configFile, output):
97 # print("pindel")
98 # cmd="pindel -f %s -i %s -o %s" %(reference, configFile, output)
99 # print("commande = "+cmd)
100 # os.system(cmd)
101
102 # os.system("rm %s" %(output))
103 # if not os.path.exists(output):
104 # os.makedirs(output)
105 # os.system("mv %s_* %s" %(output, output))
106 77
107 78
108 def pindel(reference, configFile, args, tempDir): 79 def pindel(reference, configFile, args, tempDir):
109 pindelTempDir=tempDir+"/pindel" 80 pindelTempDir=tempDir+"/pindel"
110 cmd = "pindel -f %s -i %s -o %s " %(reference, configFile, pindelTempDir) 81 cmd = "pindel -f %s -i %s -o %s " %(reference, configFile, pindelTempDir)
159 execute(cmd) 130 execute(cmd)
160 if os.path.exists(pindelTempDir+".vcf"): 131 if os.path.exists(pindelTempDir+".vcf"):
161 execute("cp %s %s" %(pindelTempDir+".vcf", outputVcf)) 132 execute("cp %s %s" %(pindelTempDir+".vcf", outputVcf))
162 133
163 134
164 def getMeanInsertSize(bamFile, tempDir): 135 # def getMeanInsertSize(bamFile, tempDir):
165 samFile = tempDir+"/sam" 136 # samFile = tempDir+"/sam"
166 histogramFile = tempDir+"/histogram" 137 # histogramFile = tempDir+"/histogram"
167 outputFile = tempDir+"/output" 138 # outputFile = tempDir+"/output"
168 cmd = "samtools view -h -o %s %s" % (bamFile, samFile) 139 # cmd = "samtools view -h %s -o %s" % (bamFile, samFile)
169 execute(cmd, ) 140 # execute(cmd, )
170 cmd = "picard-tools CollectInsertSizeMetrics H=%s O=%s I=%s" % (histogramFile, outputFile, samFile) 141 # cmd = "picard-tools CollectInsertSizeMetrics H=%s O=%s I=%s" % (histogramFile, outputFile, samFile)
171 execute(cmd) 142 # execute(cmd)
172 try: 143 # try:
173 f = open(outputFile, 'r') 144 # f = open(outputFile, 'r')
174 lignes = f.readlines() 145 # lignes = f.readlines()
175 f.close() 146 # f.close()
176 meanInsertSize = lignes[7].split('\t')[0] 147 # meanInsertSize = lignes[7].split('\t')[0]
177 except: 148 # except:
178 sys.stderr.write("problem while opening file %s " %(outputFile)) 149 # sys.stderr.write("problem while opening file %s " %(outputFile))
179 return 0 150 # return 0
180 return meanInsertSize 151 # return meanInsertSize
181 152
182 153
183 def __main__(): 154 def __main__():
184 155
185 args = parser.parse_args() 156 args = parser.parse_args()
186 tempDir = tempfile.mkdtemp() 157 tempDir = tempfile.mkdtemp()
187 158
188 # try:
189 # cmd = 'pindel'
190 # cmd += ' -i1 %s -i2 %s -o %s ' %(args.input1, args.input2, args.output1)
191 # except:
192 # sys.stdout.write('problem args : %s' %(cmd))
193 # try:
194 # os.system(cmd)
195 # except:
196 # sys.stdout.write('problem cmd')
197 # pipeline(args.reference, args.bam, args.output_raw, args.outputVcfFile, size, name, args)
198
199 try: 159 try:
200 indexBam(args.inputFastaFile, args.inputBamFile) 160 indexBam(args.inputFastaFile, args.inputBamFile)
201 meanInsertSize = int(getMeanInsertSize(args.inputBamFile, tempDir)) 161 # if args.insert_size==None:
202 configFile = config(args.inputBamFile, meanInsertSize, args.name, tempDir) 162 # meanInsertSize = int(getMeanInsertSize(args.inputBamFile, tempDir))
163 # else:
164 meanInsertSize=args.insert_size
165 configFile = config(args.inputBamFile, meanInsertSize, args.sampleTag, tempDir)
203 pindelTempDir = pindel(args.inputFastaFile, configFile, args, tempDir) 166 pindelTempDir = pindel(args.inputFastaFile, configFile, args, tempDir)
204 pindel2vcf(args.outputVcfFile, args.inputFastaFile, args.name, pindelTempDir) 167 pindel2vcf(args.outputVcfFile, args.inputFastaFile, args.sampleTag, pindelTempDir)
205 168
206 finally: 169 finally:
207 if os.path.exists(tempDir): 170 if os.path.exists(tempDir):
208 shutil.rmtree(tempDir) 171 shutil.rmtree(tempDir)
209 172
210 if __name__=="__main__": 173 if __name__=="__main__":