Mercurial > repos > qfab > bowtie_for_fasta
changeset 3:d33922d2b6e0 draft
Uploaded
author | qfab |
---|---|
date | Thu, 28 Aug 2014 20:20:48 -0400 |
parents | a7fb6ab26a74 |
children | 89563c2b1f71 |
files | bowtie_for_fasta-254d8bd5d21a/bowtie_for_fasta_wrapper.py |
diffstat | 1 files changed, 37 insertions(+), 156 deletions(-) [+] |
line wrap: on
line diff
--- a/bowtie_for_fasta-254d8bd5d21a/bowtie_for_fasta_wrapper.py Thu Aug 28 20:20:12 2014 -0400 +++ b/bowtie_for_fasta-254d8bd5d21a/bowtie_for_fasta_wrapper.py Thu Aug 28 20:20:48 2014 -0400 @@ -15,29 +15,16 @@ #Parse Command Line parser = optparse.OptionParser() parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) - parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-n', '-n', dest='validal', help='The n param' ) + parser.add_option( '-m', '-m', dest='suppal', help='The m param' ) + parser.add_option( '-k', '-k', dest='reportvalal', help='The k param' ) + parser.add_option( '-l', '-l', dest='seedlength', help='The l param' ) + parser.add_option( '-e', '-e', dest='maqerr', help='The e param' ) parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' ) parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' ) parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' ) - parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' ) - parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' ) - parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' ) - parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) - parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) - parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) - parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) - parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) - parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' ) - parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) - parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' ) - parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' ) - parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) - parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) - parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) - parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) - parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' ) parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' ) parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' ) parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' ) @@ -50,11 +37,10 @@ parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) - parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference') - parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' ) - parser.add_option( '-O', '--seed', dest='seed', help='Seed for pseudo-random number generator' ) + parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference') + parser.add_option( '-f', '-f', dest='input', help='The input file' ) parser.add_option( '-S', '-S', dest='samformat', help='The output format' ) - parser.add_option( '-o', '--output', dest='output', help='The output file' ) + parser.add_option( '-o', '-o', dest='output', help='The output file' ) parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fasta", help='galaxy input format' ) (options, args) = parser.parse_args() stdout = '' @@ -178,142 +164,37 @@ tmp_suppressed_file_name = None tmp_unmapped_file_name = None quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) - try: - if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ - or options.mismatchSeed == '2' or options.mismatchSeed == '3'): - mismatchSeed = '-n %s' % options.mismatchSeed - else: - mismatchSeed = '' - if options.mismatchQual and int( options.mismatchQual ) >= 0: - mismatchQual = '-e %s' % options.mismatchQual - else: - mismatchQual = '' - print "seedLen is %s" % options.seedLen - if options.seedLen and int( options.seedLen ) >= 5: - print "seedLen1 is %s" % options.seedLen - seedLen = '-l %s' % options.seedLen - else: - print "seedLen2 is %s" % options.seedLen - seedLen = '' - if options.valAlign and int( options.valAlign ) >= 0: - valAlign = '-k %s' % options.valAlign - else: - valAlign = '' - if options.suppressAlign and int( options.suppressAlign ) >= 0: - suppressAlign = '-m %s' % options.suppressAlign - else: - suppressAlign = '' - except ValueError, e: - # clean up temp dir - if os.path.exists( tmp_index_dir ): - shutil.rmtree( tmp_index_dir ) - stop_err( 'Something is wrong \n' + str( e ) ) - if options.params: - aligning_cmds = '-f -p %s -S %s %s %s %s %s %s ' % \ - (options.threads, mismatchSeed,mismatchQual,seedLen,valAlign,suppressAlign, - quality_score_encoding ) - else: + # have to nest try-except in try-finally to handle 2.4 try: - if options.skip and int( options.skip ) > 0: - skip = '-s %s' % options.skip - else: - skip = '' - if options.alignLimit and int( options.alignLimit ) >= 0: - alignLimit = '-u %s' % options.alignLimit - else: - alignLimit = '' - if options.trimH and int( options.trimH ) > 0: - trimH = '-5 %s' % options.trimH - else: - trimH = '' - if options.trimL and int( options.trimL ) > 0: - trimL = '-3 %s' % options.trimL - else: - trimL = '' - if int(options.maqSoapAlign) != '-1' and int(options.maqSoapAlign) >= '0': - maqSoapAlign = '-v %s' % options.maqSoapAlign - else: - maqSoapAlign = '' - if options.rounding == 'noRound': - rounding = '--nomaqround' - else: - rounding = '' - if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \ - ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ): - maxBacktracks = '--maxbts %s' % options.maxBacktracks - else: - maxBacktracks = '' - if options.tryHard == 'doTryHard': - tryHard = '-y' - else: - tryHard = '' - if options.valAlign and int( options.valAlign ) >= 0: - valAlign = '-k %s' % options.valAlign - else: - valAlign = '' - if options.allValAligns == 'doAllValAligns': - allValAligns = '-a' - else: - allValAligns = '' - if options.best == 'doBest': - best = '--best' - else: - best = '' - if options.strata == 'doStrata': - strata = '--strata' - else: - strata = '' - if options.offrate and int( options.offrate ) >= 0: - offrate = '-o %s' % options.offrate - else: - offrate = '' - if options.seed and int( options.seed ) > 0: - seed = '--seed %s' % options.seed - else: - iseed = '' - aligning_cmds = '-f -p %s %s %s %s %s %s %s %s %s %s %s %s ' \ - '%s %s %s %s %s %s %s %s' % \ - ( options.threads, skip, alignLimit, - trimH, trimL, maqSoapAlign,mismatchSeed, mismatchQual, seedLen, rounding, - maxBacktracks,tryHard, valAlign, allValAligns, suppressAlign, best, - strata, offrate,seed, quality_score_encoding ) - except ValueError, e: - # clean up temp dir - if os.path.exists( tmp_index_dir ): - shutil.rmtree( tmp_index_dir ) - stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) - - # have to nest try-except in try-finally to handle 2.4 - try: - # prepare actual mapping commands - cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output) - print(cmd2) - # align - tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - # check that there are results in the output file - if os.path.getsize( options.output ) == 0: - raise Exception, 'The output file is empty, there may be an error with your input file or settings.' - except Exception, e: - stop_err( 'Error aligning sequence. ' + str( e ) ) + # prepare actual mapping commands + cmd2 = 'bowtie -n %s -m %s -k %s -l %s -e %s %s -f %s %s > %s' % ( options.validal, options.suppal, options.reportvalal, options.seedlength, options.maqerr,options.ref, options.input, format, options.output) + print(cmd2) + # align + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + # check that there are results in the output file + if os.path.getsize( options.output ) == 0: + raise Exception, 'The output file is empty, there may be an error with your input file or settings.' + except Exception, e: + stop_err( 'Error aligning sequence. ' + str( e ) ) finally: # clean up temp dir if os.path.exists( tmp_index_dir ):