comparison bowtie_wrapper.py @ 4:bdd9fcdb398e draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit 397bc85634156862fdaa3d74b8f1e53752876619
author devteam
date Thu, 04 May 2017 14:59:41 -0400
parents ba29251df197
children
comparison
equal deleted inserted replaced
3:9326d6e6882a 4:bdd9fcdb398e
62 -x, --indexSettings=x: Whether or not indexing options are to be set 62 -x, --indexSettings=x: Whether or not indexing options are to be set
63 -H, --suppressHeader=H: Suppress header 63 -H, --suppressHeader=H: Suppress header
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' 64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
65 """ 65 """
66 66
67 import optparse, os, shutil, subprocess, sys, tempfile 67 import optparse
68 68 import os
69 #Allow more than Sanger encoded variants 69 import shutil
70 import subprocess
71 import sys
72 import tempfile
73
74 # Allow more than Sanger encoded variants
70 DEFAULT_ASCII_ENCODING = '--phred33-quals' 75 DEFAULT_ASCII_ENCODING = '--phred33-quals'
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' } 76 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = {'fastqsanger': '--phred33-quals', 'fastqillumina': '--phred64-quals', 'fastqsolexa': '--solexa-quals'}
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config 77 # FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
78
73 79
74 def stop_err( msg ): 80 def stop_err( msg ):
75 sys.stderr.write( '%s\n' % msg ) 81 sys.exit('%s\n' % msg)
76 sys.exit() 82
77 83
78 def __main__(): 84 def __main__():
79 #Parse Command Line
80 parser = optparse.OptionParser() 85 parser = optparse.OptionParser()
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) 86 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
82 parser.add_option( '-o', '--output', dest='output', help='The output file' ) 87 parser.add_option( '-o', '--output', dest='output', help='The output file' )
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' ) 88 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' ) 89 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
150 else: 155 else:
151 colorspace = '' 156 colorspace = ''
152 # index if necessary 157 # index if necessary
153 if options.genomeSource == 'history' and not options.do_not_build_index: 158 if options.genomeSource == 'history' and not options.do_not_build_index:
154 # set up commands 159 # set up commands
155 if options.index_settings =='indexPreSet': 160 if options.index_settings == 'indexPreSet':
156 indexing_cmds = '%s' % colorspace 161 indexing_cmds = '%s' % colorspace
157 else: 162 else:
158 try: 163 try:
159 if options.iautoB and options.iautoB == 'set': 164 if options.iautoB and options.iautoB == 'set':
160 iautoB = '--noauto' 165 iautoB = '--noauto'
202 iseed = '' 207 iseed = ''
203 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ 208 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
204 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, 209 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
205 inoref, options.ioffrate, iftab, intoa, iendian, 210 inoref, options.ioffrate, iftab, intoa, iendian,
206 iseed, colorspace ) 211 iseed, colorspace )
207 except ValueError, e: 212 except ValueError as e:
208 # clean up temp dir 213 # clean up temp dir
209 if os.path.exists( tmp_index_dir ): 214 if os.path.exists( tmp_index_dir ):
210 shutil.rmtree( tmp_index_dir ) 215 shutil.rmtree( tmp_index_dir )
211 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) 216 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
212 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) 217 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
214 ref_file.close() 219 ref_file.close()
215 os.symlink( options.ref, ref_file_name ) 220 os.symlink( options.ref, ref_file_name )
216 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) 221 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
217 try: 222 try:
218 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name 223 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
219 tmp_stderr = open( tmp, 'wb' ) 224 with open(tmp, 'w') as tmp_stderr:
220 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) 225 returncode = subprocess.call(args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
221 returncode = proc.wait()
222 tmp_stderr.close()
223 # get stderr, allowing for case where it's very large
224 tmp_stderr = open( tmp, 'rb' )
225 stderr = ''
226 buffsize = 1048576
227 try:
228 while True:
229 stderr += tmp_stderr.read( buffsize )
230 if not stderr or len( stderr ) % buffsize != 0:
231 break
232 except OverflowError:
233 pass
234 tmp_stderr.close()
235 if returncode != 0: 226 if returncode != 0:
236 raise Exception, stderr 227 # get stderr, allowing for case where it's very large
237 except Exception, e: 228 stderr = ''
229 buffsize = 1048576
230 with open(tmp, 'r') as tmp_stderr:
231 try:
232 while True:
233 stderr += tmp_stderr.read(buffsize)
234 if not stderr or len(stderr) % buffsize != 0:
235 break
236 except OverflowError:
237 pass
238 raise Exception(stderr)
239 except Exception as e:
238 # clean up temp dir 240 # clean up temp dir
239 if os.path.exists( tmp_index_dir ): 241 if os.path.exists( tmp_index_dir ):
240 shutil.rmtree( tmp_index_dir ) 242 shutil.rmtree( tmp_index_dir )
241 stop_err( 'Error indexing reference sequence\n' + str( e ) ) 243 stop_err( 'Error indexing reference sequence\n' + str( e ) )
242 stdout += 'File indexed. ' 244 stdout += 'File indexed. '
259 else: 261 else:
260 mateOrient = '' 262 mateOrient = ''
261 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) 263 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
262 if options.params == 'preSet': 264 if options.params == 'preSet':
263 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \ 265 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
264 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding ) 266 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
265 else: 267 else:
266 try: 268 try:
267 if options.skip and int( options.skip ) > 0: 269 if options.skip and int( options.skip ) > 0:
268 skip = '-s %s' % options.skip 270 skip = '-s %s' % options.skip
269 else: 271 else:
278 trimH = '' 280 trimH = ''
279 if options.trimL and int( options.trimL ) > 0: 281 if options.trimL and int( options.trimL ) > 0:
280 trimL = '-3 %s' % options.trimL 282 trimL = '-3 %s' % options.trimL
281 else: 283 else:
282 trimL = '' 284 trimL = ''
283 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \ 285 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' or
284 or options.maxMismatches == '2' or options.maxMismatches == '3'): 286 options.maxMismatches == '2' or options.maxMismatches == '3'):
285 maxMismatches = '-v %s' % options.maxMismatches 287 maxMismatches = '-v %s' % options.maxMismatches
286 else: 288 else:
287 maxMismatches = '' 289 maxMismatches = ''
288 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ 290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' or
289 or options.mismatchSeed == '2' or options.mismatchSeed == '3'): 291 options.mismatchSeed == '2' or options.mismatchSeed == '3'):
290 mismatchSeed = '-n %s' % options.mismatchSeed 292 mismatchSeed = '-n %s' % options.mismatchSeed
291 else: 293 else:
292 mismatchSeed = '' 294 mismatchSeed = ''
293 if options.mismatchQual and int( options.mismatchQual ) >= 1: 295 if options.mismatchQual and int( options.mismatchQual ) >= 1:
294 mismatchQual = '-e %s' % options.mismatchQual 296 mismatchQual = '-e %s' % options.mismatchQual
398 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, 400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
399 tryHard, valAlign, allValAligns, suppressAlign, best, 401 tryHard, valAlign, allValAligns, suppressAlign, best,
400 strata, offrate, seed, snpphred, snpfrac, keepends, 402 strata, offrate, seed, snpphred, snpfrac, keepends,
401 output_unmapped_reads, output_suppressed_reads, 403 output_unmapped_reads, output_suppressed_reads,
402 quality_score_encoding ) 404 quality_score_encoding )
403 except ValueError, e: 405 except ValueError as e:
404 # clean up temp dir 406 # clean up temp dir
405 if os.path.exists( tmp_index_dir ): 407 if os.path.exists( tmp_index_dir ):
406 shutil.rmtree( tmp_index_dir ) 408 shutil.rmtree( tmp_index_dir )
407 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) 409 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
408 try: 410 try:
413 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) 415 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
414 else: 416 else:
415 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) 417 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
416 # align 418 # align
417 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name 419 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
418 tmp_stderr = open( tmp, 'wb' ) 420 with open(tmp, 'w') as tmp_stderr:
419 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stdout=sys.stdout, stderr=tmp_stderr.fileno() ) 421 returncode = subprocess.call(args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
420 returncode = proc.wait()
421 tmp_stderr.close()
422 # get stderr, allowing for case where it's very large 422 # get stderr, allowing for case where it's very large
423 tmp_stderr = open( tmp, 'rb' )
424 stderr = '' 423 stderr = ''
425 buffsize = 1048576 424 buffsize = 1048576
426 try: 425 with open(tmp, 'r') as tmp_stderr:
427 while True: 426 try:
428 stderr += tmp_stderr.read( buffsize ) 427 while True:
429 if not stderr or len( stderr ) % buffsize != 0: 428 stderr += tmp_stderr.read(buffsize)
430 break 429 if not stderr or len(stderr) % buffsize != 0:
431 except OverflowError: 430 break
432 pass 431 except OverflowError:
433 tmp_stderr.close() 432 pass
434 if returncode != 0: 433 if returncode != 0:
435 raise Exception, stderr 434 raise Exception(stderr)
436 elif options.output_mapping_stats is not None: 435 elif options.output_mapping_stats is not None:
437 # Write stderr (containing the mapping statistics) to a named file 436 # Write stderr (containing the mapping statistics) to a named file
438 with open(options.output_mapping_stats, 'w') as mapping_stats: 437 with open(options.output_mapping_stats, 'w') as mapping_stats:
439 mapping_stats.write( stderr ) 438 mapping_stats.write( stderr )
440 # get suppressed and unmapped reads output files in place if appropriate 439 # get suppressed and unmapped reads output files in place if appropriate
441 if options.paired == 'paired' and tmp_suppressed_file_name and \ 440 if options.paired == 'paired' and tmp_suppressed_file_name and \
442 options.output_suppressed_reads_l and options.output_suppressed_reads_r: 441 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
443 try: 442 try:
444 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) 443 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
445 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) 444 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
446 shutil.move( left, options.output_suppressed_reads_l ) 445 shutil.move( left, options.output_suppressed_reads_l )
447 shutil.move( right, options.output_suppressed_reads_r ) 446 shutil.move( right, options.output_suppressed_reads_r )
448 except Exception, e: 447 except Exception as e:
449 sys.stdout.write( 'Error producing the suppressed output file.\n' ) 448 sys.stdout.write( 'Error producing the suppressed output file.\n' )
450 if options.paired == 'paired' and tmp_unmapped_file_name and \ 449 if options.paired == 'paired' and tmp_unmapped_file_name and \
451 options.output_unmapped_reads_l and options.output_unmapped_reads_r: 450 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
452 try: 451 try:
453 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' ) 452 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
454 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' ) 453 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
455 shutil.move( left, options.output_unmapped_reads_l ) 454 shutil.move( left, options.output_unmapped_reads_l )
456 shutil.move( right, options.output_unmapped_reads_r ) 455 shutil.move( right, options.output_unmapped_reads_r )
457 except Exception, e: 456 except Exception as e:
458 sys.stdout.write( 'Error producing the unmapped output file.\n' ) 457 sys.stdout.write( 'Error producing the unmapped output file.\n' )
459 # check that there are results in the output file 458 # check that there are results in the output file
460 if os.path.getsize( options.output ) == 0: 459 if os.path.getsize( options.output ) == 0:
461 raise Exception, 'The output file is empty, there may be an error with your input file or settings.' 460 raise Exception('The output file is empty, there may be an error with your input file or settings.')
462 except Exception, e: 461 except Exception as e:
463 stop_err( 'Error aligning sequence. ' + str( e ) ) 462 stop_err( 'Error aligning sequence. ' + str( e ) )
464 finally: 463 finally:
465 # clean up temp dir 464 # clean up temp dir
466 if os.path.exists( tmp_index_dir ): 465 if os.path.exists( tmp_index_dir ):
467 shutil.rmtree( tmp_index_dir ) 466 shutil.rmtree( tmp_index_dir )
468 stdout += 'Sequence file aligned.\n' 467 stdout += 'Sequence file aligned.\n'
469 sys.stdout.write( stdout ) 468 sys.stdout.write( stdout )
470 469
470
471 if __name__ == "__main__": 471 if __name__ == "__main__":
472 __main__() 472 __main__()