Mercurial > repos > devteam > bowtie_wrappers
comparison bowtie_wrapper.py @ 4:bdd9fcdb398e draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit 397bc85634156862fdaa3d74b8f1e53752876619
author | devteam |
---|---|
date | Thu, 04 May 2017 14:59:41 -0400 |
parents | ba29251df197 |
children |
comparison
equal
deleted
inserted
replaced
3:9326d6e6882a | 4:bdd9fcdb398e |
---|---|
62 -x, --indexSettings=x: Whether or not indexing options are to be set | 62 -x, --indexSettings=x: Whether or not indexing options are to be set |
63 -H, --suppressHeader=H: Suppress header | 63 -H, --suppressHeader=H: Suppress header |
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' | 64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' |
65 """ | 65 """ |
66 | 66 |
67 import optparse, os, shutil, subprocess, sys, tempfile | 67 import optparse |
68 | 68 import os |
69 #Allow more than Sanger encoded variants | 69 import shutil |
70 import subprocess | |
71 import sys | |
72 import tempfile | |
73 | |
74 # Allow more than Sanger encoded variants | |
70 DEFAULT_ASCII_ENCODING = '--phred33-quals' | 75 DEFAULT_ASCII_ENCODING = '--phred33-quals' |
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' } | 76 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = {'fastqsanger': '--phred33-quals', 'fastqillumina': '--phred64-quals', 'fastqsolexa': '--solexa-quals'} |
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config | 77 # FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config |
78 | |
73 | 79 |
74 def stop_err( msg ): | 80 def stop_err( msg ): |
75 sys.stderr.write( '%s\n' % msg ) | 81 sys.exit('%s\n' % msg) |
76 sys.exit() | 82 |
77 | 83 |
78 def __main__(): | 84 def __main__(): |
79 #Parse Command Line | |
80 parser = optparse.OptionParser() | 85 parser = optparse.OptionParser() |
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) | 86 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) |
82 parser.add_option( '-o', '--output', dest='output', help='The output file' ) | 87 parser.add_option( '-o', '--output', dest='output', help='The output file' ) |
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' ) | 88 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' ) |
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' ) | 89 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' ) |
150 else: | 155 else: |
151 colorspace = '' | 156 colorspace = '' |
152 # index if necessary | 157 # index if necessary |
153 if options.genomeSource == 'history' and not options.do_not_build_index: | 158 if options.genomeSource == 'history' and not options.do_not_build_index: |
154 # set up commands | 159 # set up commands |
155 if options.index_settings =='indexPreSet': | 160 if options.index_settings == 'indexPreSet': |
156 indexing_cmds = '%s' % colorspace | 161 indexing_cmds = '%s' % colorspace |
157 else: | 162 else: |
158 try: | 163 try: |
159 if options.iautoB and options.iautoB == 'set': | 164 if options.iautoB and options.iautoB == 'set': |
160 iautoB = '--noauto' | 165 iautoB = '--noauto' |
202 iseed = '' | 207 iseed = '' |
203 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ | 208 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ |
204 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, | 209 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, |
205 inoref, options.ioffrate, iftab, intoa, iendian, | 210 inoref, options.ioffrate, iftab, intoa, iendian, |
206 iseed, colorspace ) | 211 iseed, colorspace ) |
207 except ValueError, e: | 212 except ValueError as e: |
208 # clean up temp dir | 213 # clean up temp dir |
209 if os.path.exists( tmp_index_dir ): | 214 if os.path.exists( tmp_index_dir ): |
210 shutil.rmtree( tmp_index_dir ) | 215 shutil.rmtree( tmp_index_dir ) |
211 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) | 216 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) |
212 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) | 217 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) |
214 ref_file.close() | 219 ref_file.close() |
215 os.symlink( options.ref, ref_file_name ) | 220 os.symlink( options.ref, ref_file_name ) |
216 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) | 221 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) |
217 try: | 222 try: |
218 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name | 223 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name |
219 tmp_stderr = open( tmp, 'wb' ) | 224 with open(tmp, 'w') as tmp_stderr: |
220 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) | 225 returncode = subprocess.call(args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno()) |
221 returncode = proc.wait() | |
222 tmp_stderr.close() | |
223 # get stderr, allowing for case where it's very large | |
224 tmp_stderr = open( tmp, 'rb' ) | |
225 stderr = '' | |
226 buffsize = 1048576 | |
227 try: | |
228 while True: | |
229 stderr += tmp_stderr.read( buffsize ) | |
230 if not stderr or len( stderr ) % buffsize != 0: | |
231 break | |
232 except OverflowError: | |
233 pass | |
234 tmp_stderr.close() | |
235 if returncode != 0: | 226 if returncode != 0: |
236 raise Exception, stderr | 227 # get stderr, allowing for case where it's very large |
237 except Exception, e: | 228 stderr = '' |
229 buffsize = 1048576 | |
230 with open(tmp, 'r') as tmp_stderr: | |
231 try: | |
232 while True: | |
233 stderr += tmp_stderr.read(buffsize) | |
234 if not stderr or len(stderr) % buffsize != 0: | |
235 break | |
236 except OverflowError: | |
237 pass | |
238 raise Exception(stderr) | |
239 except Exception as e: | |
238 # clean up temp dir | 240 # clean up temp dir |
239 if os.path.exists( tmp_index_dir ): | 241 if os.path.exists( tmp_index_dir ): |
240 shutil.rmtree( tmp_index_dir ) | 242 shutil.rmtree( tmp_index_dir ) |
241 stop_err( 'Error indexing reference sequence\n' + str( e ) ) | 243 stop_err( 'Error indexing reference sequence\n' + str( e ) ) |
242 stdout += 'File indexed. ' | 244 stdout += 'File indexed. ' |
259 else: | 261 else: |
260 mateOrient = '' | 262 mateOrient = '' |
261 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) | 263 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) |
262 if options.params == 'preSet': | 264 if options.params == 'preSet': |
263 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \ | 265 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \ |
264 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding ) | 266 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding ) |
265 else: | 267 else: |
266 try: | 268 try: |
267 if options.skip and int( options.skip ) > 0: | 269 if options.skip and int( options.skip ) > 0: |
268 skip = '-s %s' % options.skip | 270 skip = '-s %s' % options.skip |
269 else: | 271 else: |
278 trimH = '' | 280 trimH = '' |
279 if options.trimL and int( options.trimL ) > 0: | 281 if options.trimL and int( options.trimL ) > 0: |
280 trimL = '-3 %s' % options.trimL | 282 trimL = '-3 %s' % options.trimL |
281 else: | 283 else: |
282 trimL = '' | 284 trimL = '' |
283 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \ | 285 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' or |
284 or options.maxMismatches == '2' or options.maxMismatches == '3'): | 286 options.maxMismatches == '2' or options.maxMismatches == '3'): |
285 maxMismatches = '-v %s' % options.maxMismatches | 287 maxMismatches = '-v %s' % options.maxMismatches |
286 else: | 288 else: |
287 maxMismatches = '' | 289 maxMismatches = '' |
288 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ | 290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' or |
289 or options.mismatchSeed == '2' or options.mismatchSeed == '3'): | 291 options.mismatchSeed == '2' or options.mismatchSeed == '3'): |
290 mismatchSeed = '-n %s' % options.mismatchSeed | 292 mismatchSeed = '-n %s' % options.mismatchSeed |
291 else: | 293 else: |
292 mismatchSeed = '' | 294 mismatchSeed = '' |
293 if options.mismatchQual and int( options.mismatchQual ) >= 1: | 295 if options.mismatchQual and int( options.mismatchQual ) >= 1: |
294 mismatchQual = '-e %s' % options.mismatchQual | 296 mismatchQual = '-e %s' % options.mismatchQual |
398 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, | 400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, |
399 tryHard, valAlign, allValAligns, suppressAlign, best, | 401 tryHard, valAlign, allValAligns, suppressAlign, best, |
400 strata, offrate, seed, snpphred, snpfrac, keepends, | 402 strata, offrate, seed, snpphred, snpfrac, keepends, |
401 output_unmapped_reads, output_suppressed_reads, | 403 output_unmapped_reads, output_suppressed_reads, |
402 quality_score_encoding ) | 404 quality_score_encoding ) |
403 except ValueError, e: | 405 except ValueError as e: |
404 # clean up temp dir | 406 # clean up temp dir |
405 if os.path.exists( tmp_index_dir ): | 407 if os.path.exists( tmp_index_dir ): |
406 shutil.rmtree( tmp_index_dir ) | 408 shutil.rmtree( tmp_index_dir ) |
407 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) | 409 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) |
408 try: | 410 try: |
413 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) | 415 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) |
414 else: | 416 else: |
415 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) | 417 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) |
416 # align | 418 # align |
417 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name | 419 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name |
418 tmp_stderr = open( tmp, 'wb' ) | 420 with open(tmp, 'w') as tmp_stderr: |
419 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stdout=sys.stdout, stderr=tmp_stderr.fileno() ) | 421 returncode = subprocess.call(args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno()) |
420 returncode = proc.wait() | |
421 tmp_stderr.close() | |
422 # get stderr, allowing for case where it's very large | 422 # get stderr, allowing for case where it's very large |
423 tmp_stderr = open( tmp, 'rb' ) | |
424 stderr = '' | 423 stderr = '' |
425 buffsize = 1048576 | 424 buffsize = 1048576 |
426 try: | 425 with open(tmp, 'r') as tmp_stderr: |
427 while True: | 426 try: |
428 stderr += tmp_stderr.read( buffsize ) | 427 while True: |
429 if not stderr or len( stderr ) % buffsize != 0: | 428 stderr += tmp_stderr.read(buffsize) |
430 break | 429 if not stderr or len(stderr) % buffsize != 0: |
431 except OverflowError: | 430 break |
432 pass | 431 except OverflowError: |
433 tmp_stderr.close() | 432 pass |
434 if returncode != 0: | 433 if returncode != 0: |
435 raise Exception, stderr | 434 raise Exception(stderr) |
436 elif options.output_mapping_stats is not None: | 435 elif options.output_mapping_stats is not None: |
437 # Write stderr (containing the mapping statistics) to a named file | 436 # Write stderr (containing the mapping statistics) to a named file |
438 with open(options.output_mapping_stats, 'w') as mapping_stats: | 437 with open(options.output_mapping_stats, 'w') as mapping_stats: |
439 mapping_stats.write( stderr ) | 438 mapping_stats.write( stderr ) |
440 # get suppressed and unmapped reads output files in place if appropriate | 439 # get suppressed and unmapped reads output files in place if appropriate |
441 if options.paired == 'paired' and tmp_suppressed_file_name and \ | 440 if options.paired == 'paired' and tmp_suppressed_file_name and \ |
442 options.output_suppressed_reads_l and options.output_suppressed_reads_r: | 441 options.output_suppressed_reads_l and options.output_suppressed_reads_r: |
443 try: | 442 try: |
444 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) | 443 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) |
445 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) | 444 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) |
446 shutil.move( left, options.output_suppressed_reads_l ) | 445 shutil.move( left, options.output_suppressed_reads_l ) |
447 shutil.move( right, options.output_suppressed_reads_r ) | 446 shutil.move( right, options.output_suppressed_reads_r ) |
448 except Exception, e: | 447 except Exception as e: |
449 sys.stdout.write( 'Error producing the suppressed output file.\n' ) | 448 sys.stdout.write( 'Error producing the suppressed output file.\n' ) |
450 if options.paired == 'paired' and tmp_unmapped_file_name and \ | 449 if options.paired == 'paired' and tmp_unmapped_file_name and \ |
451 options.output_unmapped_reads_l and options.output_unmapped_reads_r: | 450 options.output_unmapped_reads_l and options.output_unmapped_reads_r: |
452 try: | 451 try: |
453 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' ) | 452 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' ) |
454 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' ) | 453 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' ) |
455 shutil.move( left, options.output_unmapped_reads_l ) | 454 shutil.move( left, options.output_unmapped_reads_l ) |
456 shutil.move( right, options.output_unmapped_reads_r ) | 455 shutil.move( right, options.output_unmapped_reads_r ) |
457 except Exception, e: | 456 except Exception as e: |
458 sys.stdout.write( 'Error producing the unmapped output file.\n' ) | 457 sys.stdout.write( 'Error producing the unmapped output file.\n' ) |
459 # check that there are results in the output file | 458 # check that there are results in the output file |
460 if os.path.getsize( options.output ) == 0: | 459 if os.path.getsize( options.output ) == 0: |
461 raise Exception, 'The output file is empty, there may be an error with your input file or settings.' | 460 raise Exception('The output file is empty, there may be an error with your input file or settings.') |
462 except Exception, e: | 461 except Exception as e: |
463 stop_err( 'Error aligning sequence. ' + str( e ) ) | 462 stop_err( 'Error aligning sequence. ' + str( e ) ) |
464 finally: | 463 finally: |
465 # clean up temp dir | 464 # clean up temp dir |
466 if os.path.exists( tmp_index_dir ): | 465 if os.path.exists( tmp_index_dir ): |
467 shutil.rmtree( tmp_index_dir ) | 466 shutil.rmtree( tmp_index_dir ) |
468 stdout += 'Sequence file aligned.\n' | 467 stdout += 'Sequence file aligned.\n' |
469 sys.stdout.write( stdout ) | 468 sys.stdout.write( stdout ) |
470 | 469 |
470 | |
471 if __name__ == "__main__": | 471 if __name__ == "__main__": |
472 __main__() | 472 __main__() |