annotate mutect_wrapper.py @ 12:c7d1f62af8da draft

Added required tool_data_sample file.
author geert-vandeweyer
date Mon, 17 Feb 2014 05:36:26 -0500
parents 59c387dcb2d2
children 37a8219b62de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/env python
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
2
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
3 """
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
4 Runs muTect on normal/tumor bam pair.
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
5
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
6 usage: mutect_wrapper.py [options]
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
7
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
8 See below for options
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
9 """
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
10
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
11 import optparse, os, shutil, subprocess, sys, tempfile
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
12 GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
13 GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
14 DEFAULT_GATK_PREFIX = "gatk_file"
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
15 CHUNK_SIZE = 2**20 #1mb
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
16
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
17 def stop_err( msg ):
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
18 sys.stderr.write( '%s\n' % msg )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
19 sys.exit()
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
20
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
21
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
22 def __main__():
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
23 tmp_dir = tempfile.mkdtemp( prefix='tmp-muTect-' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
24 print tmp_dir
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
25 #Parse Command Line
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
26 parser = optparse.OptionParser()
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
27 parser.add_option( '-R', '--ref', dest='ref', help='The reference genome to use ' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
28 parser.add_option( '-n', '--input_normal', dest='normal', help='The Normal Tissue BAM file' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
29 ##parser.add_option( '','--index_normal',dest='index_normal',help='index of normal bam file')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
30 parser.add_option( '-t', '--input_tumor', dest='tumor', help='The Tumor Tissue BAM file' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
31 ##parser.add_option( '','--index_tumor',dest='index_tumor',help='index of tumor bam file')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
32 parser.add_option( '', '--callstats', dest='callstats', help='The file to save the call statistics (txt format)' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
33 parser.add_option( '', '--coverage', dest='coverage', help='The file to save the coverage wig (wig format)' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
34 parser.add_option( '', '--vcf', dest='vcf',help='output VCF file.')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
35 parser.add_option( '', '--cosmic', dest='cosmic',help='COSMIC VCF file')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
36 parser.add_option( '', '--dbsnp',dest='dbsnp',help='dbSNP VCF file')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
37 parser.add_option( '-L','--intervals',dest='intervals',help='Interval file (-L)')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
38 parser.add_option( '-T', '--analysis_type', dest='analysis', help='Analysis Type(default = MuTect)' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
39 parser.add_option( '-p', '--params', dest='params', help='Parameter setting to use (pre_set or full)',default='pre_set' )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
40 parser.add_option( '-j', '--jar', dest='jar', help='path to the mutect jar file.')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
41 parser.add_option( '', '--artifact', dest='artifact', help='Disable HC filter')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
42 parser.add_option( '', '--pon', dest='pon', help='Panel of Normal (VCF file)')
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
43
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
44 (options, args) = parser.parse_args()
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
45
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
46
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
47 # make temp directory for placement of indices
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
48 #tmp_index_dir = tempfile.mkdtemp()
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
49 #tmp_dir = tempfilte.mkdtemp()
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
50 # set runtime arguments
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
51 # input
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
52 normalpath = "%s/normal.bam" %(tmp_dir)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
53 normalidx = "%s/normal.bai" %( tmp_dir)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
54 tumorpath = "%s/tumor.bam" %(tmp_dir)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
55 tumoridx = "%s/tumor.bai" %(tmp_dir)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
56 os.symlink(options.normal, normalpath)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
57 os.symlink(options.tumor, tumorpath)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
58 #os.symlink(options.index_normal, normalidx)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
59 #os.symlink(options.index_tumor, tumoridx)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
60 try:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
61 ic = "cd %s && samtools index normal.bam" % (tmp_dir)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
62 os.system(ic)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
63 ic = "cd %s && samtools index tumor.bam" % (tmp_dir)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
64 os.system(ic)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
65 except:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
66 raise "indexing of bam files failed. "+str(e)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
67
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
68 arguments = '-T %s --enable_extended_output --input_file:normal %s --input_file:tumor %s -R %s --dbsnp %s' % ( options.analysis, normalpath, tumorpath, options.ref, options.dbsnp)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
69 if options.cosmic:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
70 arguments += ' --cosmic %s' %(options.cosmic)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
71 if options.intervals:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
72 if os.path.isfile(options.intervals):
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
73 intervalpath = "%s/intervals.bed" %(tmp_dir)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
74 os.symlink(options.intervals, intervalpath)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
75 arguments += ' -L %s' %(intervalpath)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
76 elif options.intervals != 'None' :
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
77 arguments += ' -L %s' %(options.intervals)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
78 ## disable HC filters
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
79 if options.artifact:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
80 arguments =+ ' --artifact_detection_mode'
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
81 ## enable PON
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
82 if options.pon:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
83 arguments =+ ' --normal_panel %s ' %(options.pon)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
84 # output
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
85 arguments += ' --out %s --coverage_file %s --vcf %s' % (options.callstats, options.coverage,options.vcf)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
86
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
87 # all parameters set?
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
88 #if options.params != 'pre_set':
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
89
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
90 # final command
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
91 command = 'java -Xmx2G -jar %s %s ' % (options.jar, arguments)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
92 #print command
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
93 try:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
94 os.system(command)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
95 except:
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
96 raise "muTect Failed" + str(e)
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
97 #if tmp_dir and os.path.exists( tmp_dir ):
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
98 # shutil.rmtree( tmp_dir )
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
99
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
100
59c387dcb2d2 Uploaded
geert-vandeweyer
parents:
diff changeset
101 if __name__=="__main__": __main__()