comparison shear_wrapper.py @ 4:a82400332451

Use shear_wrapper.py to generate reference indexes when needed
author Jim Johnson <jj@umn.edu>
date Tue, 16 Jul 2013 12:38:48 -0500
parents
children 0158f7356ffd
comparison
equal deleted inserted replaced
3:630532975fa9 4:a82400332451
1 #!/usr/bin/env python
2
3 import optparse, os, shutil, subprocess, sys, tempfile, os.path
4
5 def stop_err( msg, code ):
6 sys.stderr.write( '%s\n' % msg )
7 sys.exit(code)
8
9 def __main__():
10 #Parse Command Line
11 parser = optparse.OptionParser()
12 parser.add_option( '-j', '--jar_path', dest='jar_path', help='Path to SHEAR.jar' )
13 parser.add_option( '-C', '--command', dest='command', help='SHEAR command to run' )
14 parser.add_option( '-p', '--prefix', dest='prefix', help='Prefix for all generated files. Required.' )
15 parser.add_option( '-b', '--bam', dest='bam', help='BAM alignment file containing the input sequences to the assembly.' )
16 parser.add_option( '-B', '--bai', dest='bai', help='BAM index file (.bai)' )
17 parser.add_option( '-f', '--fasta_reference', dest='fasta', help='The reference sequence fasta file' )
18 parser.add_option( '-F', '--fasta_index', dest='fai', help='The .fai index file for the reference sequence fasta' )
19 parser.add_option( '-t', '--twobit', dest='twobit', help='The .2bit encoding of the reference sequence fasta generated by faToTwoBit' )
20 parser.add_option( '-i', '--bwa_index', dest='bwa_index', help='The bwa index of the reference sequence' )
21 parser.add_option( '-r', '--report', dest='report', help='The SHEAR output report' )
22 parser.add_option( '-s', '--sdi', dest='sdi', help='The SHEAR sdi input from the SHEAR sv command' )
23 parser.add_option( '-o', '--output', dest='output', help='The SHEAR output assembly fasta file' )
24 parser.add_option( '-D', '--svidx_dir', dest='svidx_dir', help='The SHEAR output assembly fasta file' )
25 (options, args) = parser.parse_args()
26
27 def make_ref(src, dest, copy=False):
28 if copy:
29 shutil.copy( src, dest )
30 else:
31 os.symlink( src, dest )
32
33 # output version # of tool
34 #try:
35 #except:
36 # sys.stdout.write( 'Could not determine BWA version\n' )
37 if not options.jar_path:
38 stop_err('path to SHEAR.jar is required',1)
39 if options.command:
40 command = options.command
41 else:
42 stop_err('SHEAR command is required',1)
43 args = [ 'java','-jar' ]
44 args.append( options.jar_path )
45 args.append( command )
46 # Check required params for command
47 buffsize = 1048576
48 copy_ref = False
49 prefix = options.prefix if options.prefix and len(options.prefix) > 0 else 'ref'
50 if command in ['sv']:
51 args.append( '-p' )
52 args.append( prefix )
53 if options.svidx_dir and command in ['sv']:
54 if not os.path.isdir(options.svidx_dir):
55 os.makedirs(options.svidx_dir)
56 ref_prefix = os.path.join(options.svidx_dir,prefix)
57 copy_ref = True
58 else:
59 ref_prefix = os.path.join(os.getcwd(),prefix)
60
61 if command in ['sv','assemble']:
62 ref_fasta = ref_prefix + '.fa'
63 ref_index = ref_fasta + '.fai'
64 if options.fasta:
65 make_ref( options.fasta, ref_fasta, copy=copy_ref )
66 else:
67 stop_err('Reference fasta file is required',3)
68 # if reference fasta index is not supllied, generate it
69 if options.fai:
70 make_ref( options.fai, ref_index, copy=copy_ref )
71 elif os.path.exists(options.fasta + '.fai'):
72 make_ref( options.fasta + '.fai', ref_index, copy=copy_ref )
73 else:
74 # generate fasta fai index
75 cmd1 = 'samtools faidx %s' % (ref_fasta )
76 try:
77 tmp_name = tempfile.NamedTemporaryFile(prefix='fai_', suffix='.err').name
78 tmp_stderr = open( tmp_name, 'wb' )
79 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
80 returncode = proc.wait()
81 tmp_stderr.close()
82 if returncode != 0:
83 stderr = ''
84 # get stderr, allowing for case where it's very large
85 tmp_stderr = open( tmp, 'rb' )
86 try:
87 while True:
88 stderr += tmp_stderr.read( buffsize )
89 if not stderr or len( stderr ) % buffsize != 0:
90 break
91 except OverflowError:
92 pass
93 raise Exception, stderr
94 except Exception, e:
95 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),5 )
96 args.append( '-f' )
97 args.append( ref_fasta )
98
99 if command in ['sv']:
100 if not options.bam:
101 stop_err('BAM alignment file is required',2)
102
103 # if reference 2bit is not supplied, generate it
104 twobit = ref_prefix + '.2bit'
105 if options.twobit:
106 make_ref( options.twobit, twobit, copy=copy_ref )
107 else:
108 # generate 2bit index
109 cmd1 = 'faToTwoBit %s %s' % (ref_fasta, twobit )
110 try:
111 tmp_name = tempfile.NamedTemporaryFile(prefix='twobit_', suffix='.err').name
112 tmp_stderr = open( tmp_name, 'wb' )
113 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
114 returncode = proc.wait()
115 tmp_stderr.close()
116 if returncode != 0:
117 stderr = ''
118 # get stderr, allowing for case where it's very large
119 tmp_stderr = open( tmp, 'rb' )
120 try:
121 while True:
122 stderr += tmp_stderr.read( buffsize )
123 if not stderr or len( stderr ) % buffsize != 0:
124 break
125 except OverflowError:
126 pass
127 raise Exception, stderr
128 except Exception, e:
129 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),6 )
130 args.append( '-t' )
131 args.append( twobit )
132
133 # if bwa index is not supplied, generate it
134 bwa_index = ref_fasta
135 if options.bwa_index:
136 if copy_ref:
137 shutil.copytree(os.path.dirname(bwa_index),options.svidx_dir)
138 bwa_index = options.bwa_index
139 else:
140 ONE_GB = 2**30
141 if os.stat( ref_fasta ).st_size <= ONE_GB: #use 1 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary
142 index_algorithm = 'is'
143 else:
144 index_algorithm = 'bwtsw'
145 cmd1 = 'bwa index -a %s %s' % ( index_algorithm, ref_fasta )
146 try:
147 tmp_name = tempfile.NamedTemporaryFile(prefix='bwa_index_', suffix='.err').name
148 tmp_stderr = open( tmp_name, 'wb' )
149 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
150 returncode = proc.wait()
151 tmp_stderr.close()
152 if returncode != 0:
153 stderr = ''
154 # get stderr, allowing for case where it's very large
155 tmp_stderr = open( tmp, 'rb' )
156 try:
157 while True:
158 stderr += tmp_stderr.read( buffsize )
159 if not stderr or len( stderr ) % buffsize != 0:
160 break
161 except OverflowError:
162 pass
163 raise Exception, stderr
164 bwa_index = ref_fasta
165 except Exception, e:
166 # clean up temp dirs
167 stop_err( 'Error creating bwa index. ' + str( e ),7 )
168 args.append( '-i' )
169 args.append( bwa_index )
170
171 bam_file = os.path.join(os.getcwd(),prefix + '.bam')
172 bai_file = bam_file + '.bai'
173 if options.bam:
174 os.symlink( options.bam, bam_file )
175 else:
176 stop_err('BAM alignment file is required',2)
177 # if bam index is not supplied, generate it
178 if options.bai:
179 os.symlink( options.bai, bai_file )
180 elif os.path.exists(options.bam + '.bai'):
181 os.symlink( options.bam + '.bai', bai_file )
182 else:
183 # generate bam index
184 cmd1 = 'samtools index %s %s' % (bam_file, bai_file )
185 try:
186 tmp_name = tempfile.NamedTemporaryFile(prefix='bai_', suffix='.err').name
187 tmp_stderr = open( tmp_name, 'wb' )
188 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
189 returncode = proc.wait()
190 tmp_stderr.close()
191 if returncode != 0:
192 stderr = ''
193 # get stderr, allowing for case where it's very large
194 tmp_stderr = open( tmp, 'rb' )
195 try:
196 while True:
197 stderr += tmp_stderr.read( buffsize )
198 if not stderr or len( stderr ) % buffsize != 0:
199 break
200 except OverflowError:
201 pass
202 raise Exception, stderr
203 except Exception, e:
204 stop_err( 'Error indexing BAM alignment file. ' + str( e ),4)
205 args.append( '-b' )
206 args.append( bam_file )
207
208 if command in ['assemble']:
209 if not options.sdi or not options.output:
210 stop_err('SHEAR .sdi file and output file are required for assemble',2)
211 args.append( '-s' )
212 args.append( options.sdi )
213 args.append( '-o' )
214 args.append( options.output )
215
216 # Run SHEAR command
217 print >> sys.stdout, "%s" % ' '.join(args)
218 try:
219 proc = subprocess.Popen( args=args, shell=False )
220 returncode = proc.wait()
221 if returncode != 0:
222 stop_err( 'Error running SHEAR ' + command, returncode )
223 if command in ['sv']:
224 report_path = os.path.join(os.getcwd(),prefix + '.report')
225 if os.path.exists(report_path):
226 if options.report:
227 shutil.copy(report_path,options.report)
228 else:
229 raise Exception, 'no report file'
230 sdi_path = os.path.join(os.getcwd(),prefix + '.sdi')
231 if os.path.exists(sdi_path):
232 if options.sdi:
233 shutil.copy(sdi_path,options.sdi)
234 else:
235 raise Exception, 'no sdi file'
236 except Exception, e:
237 # clean up temp dirs
238 stop_err( 'Error running SHEAR %s %s' % (command,str(e)),9 )
239
240 if __name__=="__main__": __main__()
241