Mercurial > repos > jjohnson > shear
comparison shear_wrapper.py @ 4:a82400332451
Use shear_wrapper.py to generate reference indexes when needed
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 16 Jul 2013 12:38:48 -0500 |
parents | |
children | 0158f7356ffd |
comparison
equal
deleted
inserted
replaced
3:630532975fa9 | 4:a82400332451 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import optparse, os, shutil, subprocess, sys, tempfile, os.path | |
4 | |
5 def stop_err( msg, code ): | |
6 sys.stderr.write( '%s\n' % msg ) | |
7 sys.exit(code) | |
8 | |
9 def __main__(): | |
10 #Parse Command Line | |
11 parser = optparse.OptionParser() | |
12 parser.add_option( '-j', '--jar_path', dest='jar_path', help='Path to SHEAR.jar' ) | |
13 parser.add_option( '-C', '--command', dest='command', help='SHEAR command to run' ) | |
14 parser.add_option( '-p', '--prefix', dest='prefix', help='Prefix for all generated files. Required.' ) | |
15 parser.add_option( '-b', '--bam', dest='bam', help='BAM alignment file containing the input sequences to the assembly.' ) | |
16 parser.add_option( '-B', '--bai', dest='bai', help='BAM index file (.bai)' ) | |
17 parser.add_option( '-f', '--fasta_reference', dest='fasta', help='The reference sequence fasta file' ) | |
18 parser.add_option( '-F', '--fasta_index', dest='fai', help='The .fai index file for the reference sequence fasta' ) | |
19 parser.add_option( '-t', '--twobit', dest='twobit', help='The .2bit encoding of the reference sequence fasta generated by faToTwoBit' ) | |
20 parser.add_option( '-i', '--bwa_index', dest='bwa_index', help='The bwa index of the reference sequence' ) | |
21 parser.add_option( '-r', '--report', dest='report', help='The SHEAR output report' ) | |
22 parser.add_option( '-s', '--sdi', dest='sdi', help='The SHEAR sdi input from the SHEAR sv command' ) | |
23 parser.add_option( '-o', '--output', dest='output', help='The SHEAR output assembly fasta file' ) | |
24 parser.add_option( '-D', '--svidx_dir', dest='svidx_dir', help='The SHEAR output assembly fasta file' ) | |
25 (options, args) = parser.parse_args() | |
26 | |
27 def make_ref(src, dest, copy=False): | |
28 if copy: | |
29 shutil.copy( src, dest ) | |
30 else: | |
31 os.symlink( src, dest ) | |
32 | |
33 # output version # of tool | |
34 #try: | |
35 #except: | |
36 # sys.stdout.write( 'Could not determine BWA version\n' ) | |
37 if not options.jar_path: | |
38 stop_err('path to SHEAR.jar is required',1) | |
39 if options.command: | |
40 command = options.command | |
41 else: | |
42 stop_err('SHEAR command is required',1) | |
43 args = [ 'java','-jar' ] | |
44 args.append( options.jar_path ) | |
45 args.append( command ) | |
46 # Check required params for command | |
47 buffsize = 1048576 | |
48 copy_ref = False | |
49 prefix = options.prefix if options.prefix and len(options.prefix) > 0 else 'ref' | |
50 if command in ['sv']: | |
51 args.append( '-p' ) | |
52 args.append( prefix ) | |
53 if options.svidx_dir and command in ['sv']: | |
54 if not os.path.isdir(options.svidx_dir): | |
55 os.makedirs(options.svidx_dir) | |
56 ref_prefix = os.path.join(options.svidx_dir,prefix) | |
57 copy_ref = True | |
58 else: | |
59 ref_prefix = os.path.join(os.getcwd(),prefix) | |
60 | |
61 if command in ['sv','assemble']: | |
62 ref_fasta = ref_prefix + '.fa' | |
63 ref_index = ref_fasta + '.fai' | |
64 if options.fasta: | |
65 make_ref( options.fasta, ref_fasta, copy=copy_ref ) | |
66 else: | |
67 stop_err('Reference fasta file is required',3) | |
68 # if reference fasta index is not supllied, generate it | |
69 if options.fai: | |
70 make_ref( options.fai, ref_index, copy=copy_ref ) | |
71 elif os.path.exists(options.fasta + '.fai'): | |
72 make_ref( options.fasta + '.fai', ref_index, copy=copy_ref ) | |
73 else: | |
74 # generate fasta fai index | |
75 cmd1 = 'samtools faidx %s' % (ref_fasta ) | |
76 try: | |
77 tmp_name = tempfile.NamedTemporaryFile(prefix='fai_', suffix='.err').name | |
78 tmp_stderr = open( tmp_name, 'wb' ) | |
79 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
80 returncode = proc.wait() | |
81 tmp_stderr.close() | |
82 if returncode != 0: | |
83 stderr = '' | |
84 # get stderr, allowing for case where it's very large | |
85 tmp_stderr = open( tmp, 'rb' ) | |
86 try: | |
87 while True: | |
88 stderr += tmp_stderr.read( buffsize ) | |
89 if not stderr or len( stderr ) % buffsize != 0: | |
90 break | |
91 except OverflowError: | |
92 pass | |
93 raise Exception, stderr | |
94 except Exception, e: | |
95 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),5 ) | |
96 args.append( '-f' ) | |
97 args.append( ref_fasta ) | |
98 | |
99 if command in ['sv']: | |
100 if not options.bam: | |
101 stop_err('BAM alignment file is required',2) | |
102 | |
103 # if reference 2bit is not supplied, generate it | |
104 twobit = ref_prefix + '.2bit' | |
105 if options.twobit: | |
106 make_ref( options.twobit, twobit, copy=copy_ref ) | |
107 else: | |
108 # generate 2bit index | |
109 cmd1 = 'faToTwoBit %s %s' % (ref_fasta, twobit ) | |
110 try: | |
111 tmp_name = tempfile.NamedTemporaryFile(prefix='twobit_', suffix='.err').name | |
112 tmp_stderr = open( tmp_name, 'wb' ) | |
113 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
114 returncode = proc.wait() | |
115 tmp_stderr.close() | |
116 if returncode != 0: | |
117 stderr = '' | |
118 # get stderr, allowing for case where it's very large | |
119 tmp_stderr = open( tmp, 'rb' ) | |
120 try: | |
121 while True: | |
122 stderr += tmp_stderr.read( buffsize ) | |
123 if not stderr or len( stderr ) % buffsize != 0: | |
124 break | |
125 except OverflowError: | |
126 pass | |
127 raise Exception, stderr | |
128 except Exception, e: | |
129 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),6 ) | |
130 args.append( '-t' ) | |
131 args.append( twobit ) | |
132 | |
133 # if bwa index is not supplied, generate it | |
134 bwa_index = ref_fasta | |
135 if options.bwa_index: | |
136 if copy_ref: | |
137 shutil.copytree(os.path.dirname(bwa_index),options.svidx_dir) | |
138 bwa_index = options.bwa_index | |
139 else: | |
140 ONE_GB = 2**30 | |
141 if os.stat( ref_fasta ).st_size <= ONE_GB: #use 1 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary | |
142 index_algorithm = 'is' | |
143 else: | |
144 index_algorithm = 'bwtsw' | |
145 cmd1 = 'bwa index -a %s %s' % ( index_algorithm, ref_fasta ) | |
146 try: | |
147 tmp_name = tempfile.NamedTemporaryFile(prefix='bwa_index_', suffix='.err').name | |
148 tmp_stderr = open( tmp_name, 'wb' ) | |
149 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
150 returncode = proc.wait() | |
151 tmp_stderr.close() | |
152 if returncode != 0: | |
153 stderr = '' | |
154 # get stderr, allowing for case where it's very large | |
155 tmp_stderr = open( tmp, 'rb' ) | |
156 try: | |
157 while True: | |
158 stderr += tmp_stderr.read( buffsize ) | |
159 if not stderr or len( stderr ) % buffsize != 0: | |
160 break | |
161 except OverflowError: | |
162 pass | |
163 raise Exception, stderr | |
164 bwa_index = ref_fasta | |
165 except Exception, e: | |
166 # clean up temp dirs | |
167 stop_err( 'Error creating bwa index. ' + str( e ),7 ) | |
168 args.append( '-i' ) | |
169 args.append( bwa_index ) | |
170 | |
171 bam_file = os.path.join(os.getcwd(),prefix + '.bam') | |
172 bai_file = bam_file + '.bai' | |
173 if options.bam: | |
174 os.symlink( options.bam, bam_file ) | |
175 else: | |
176 stop_err('BAM alignment file is required',2) | |
177 # if bam index is not supplied, generate it | |
178 if options.bai: | |
179 os.symlink( options.bai, bai_file ) | |
180 elif os.path.exists(options.bam + '.bai'): | |
181 os.symlink( options.bam + '.bai', bai_file ) | |
182 else: | |
183 # generate bam index | |
184 cmd1 = 'samtools index %s %s' % (bam_file, bai_file ) | |
185 try: | |
186 tmp_name = tempfile.NamedTemporaryFile(prefix='bai_', suffix='.err').name | |
187 tmp_stderr = open( tmp_name, 'wb' ) | |
188 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
189 returncode = proc.wait() | |
190 tmp_stderr.close() | |
191 if returncode != 0: | |
192 stderr = '' | |
193 # get stderr, allowing for case where it's very large | |
194 tmp_stderr = open( tmp, 'rb' ) | |
195 try: | |
196 while True: | |
197 stderr += tmp_stderr.read( buffsize ) | |
198 if not stderr or len( stderr ) % buffsize != 0: | |
199 break | |
200 except OverflowError: | |
201 pass | |
202 raise Exception, stderr | |
203 except Exception, e: | |
204 stop_err( 'Error indexing BAM alignment file. ' + str( e ),4) | |
205 args.append( '-b' ) | |
206 args.append( bam_file ) | |
207 | |
208 if command in ['assemble']: | |
209 if not options.sdi or not options.output: | |
210 stop_err('SHEAR .sdi file and output file are required for assemble',2) | |
211 args.append( '-s' ) | |
212 args.append( options.sdi ) | |
213 args.append( '-o' ) | |
214 args.append( options.output ) | |
215 | |
216 # Run SHEAR command | |
217 print >> sys.stdout, "%s" % ' '.join(args) | |
218 try: | |
219 proc = subprocess.Popen( args=args, shell=False ) | |
220 returncode = proc.wait() | |
221 if returncode != 0: | |
222 stop_err( 'Error running SHEAR ' + command, returncode ) | |
223 if command in ['sv']: | |
224 report_path = os.path.join(os.getcwd(),prefix + '.report') | |
225 if os.path.exists(report_path): | |
226 if options.report: | |
227 shutil.copy(report_path,options.report) | |
228 else: | |
229 raise Exception, 'no report file' | |
230 sdi_path = os.path.join(os.getcwd(),prefix + '.sdi') | |
231 if os.path.exists(sdi_path): | |
232 if options.sdi: | |
233 shutil.copy(sdi_path,options.sdi) | |
234 else: | |
235 raise Exception, 'no sdi file' | |
236 except Exception, e: | |
237 # clean up temp dirs | |
238 stop_err( 'Error running SHEAR %s %s' % (command,str(e)),9 ) | |
239 | |
240 if __name__=="__main__": __main__() | |
241 |