changeset 3:d33922d2b6e0 draft

Uploaded
author qfab
date Thu, 28 Aug 2014 20:20:48 -0400
parents a7fb6ab26a74
children 89563c2b1f71
files bowtie_for_fasta-254d8bd5d21a/bowtie_for_fasta_wrapper.py
diffstat 1 files changed, 37 insertions(+), 156 deletions(-) [+]
line wrap: on
line diff
--- a/bowtie_for_fasta-254d8bd5d21a/bowtie_for_fasta_wrapper.py	Thu Aug 28 20:20:12 2014 -0400
+++ b/bowtie_for_fasta-254d8bd5d21a/bowtie_for_fasta_wrapper.py	Thu Aug 28 20:20:48 2014 -0400
@@ -15,29 +15,16 @@
     #Parse Command Line
     parser = optparse.OptionParser()
     parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
-    parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
+    parser.add_option( '-n', '-n', dest='validal', help='The n param' )
+    parser.add_option( '-m', '-m', dest='suppal', help='The m param' )
+    parser.add_option( '-k', '-k', dest='reportvalal', help='The k param' )
+    parser.add_option( '-l', '-l', dest='seedlength', help='The l param' )
+    parser.add_option( '-e', '-e', dest='maqerr', help='The e param' )
     parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
     parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
     parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
-    parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
-    parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
-    parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
-    parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
-    parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
-    parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
-    parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
-    parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
-    parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' )
-    parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
-    parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' )
-    parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' )
-    parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
-    parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
-    parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
-    parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
     parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
     parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
-    parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
     parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
     parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
     parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
@@ -50,11 +37,10 @@
     parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
     parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
     parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
-    parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference')
-    parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' ) 
-    parser.add_option( '-O', '--seed', dest='seed', help='Seed for pseudo-random number generator' ) 
+    parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference') 
+    parser.add_option( '-f', '-f', dest='input', help='The input file' )
     parser.add_option( '-S', '-S', dest='samformat', help='The output format' )
-    parser.add_option( '-o', '--output', dest='output', help='The output file' )
+    parser.add_option( '-o', '-o', dest='output', help='The output file' )
     parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fasta", help='galaxy input format' )
     (options, args) = parser.parse_args()
     stdout = ''
@@ -178,142 +164,37 @@
     tmp_suppressed_file_name = None
     tmp_unmapped_file_name = None
     quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
-
     try:
-     if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
-	or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
-	 mismatchSeed = '-n %s' % options.mismatchSeed
-     else:
-	 mismatchSeed = ''
-     if options.mismatchQual and int( options.mismatchQual ) >= 0:
-       mismatchQual = '-e %s' % options.mismatchQual
-     else:
-       mismatchQual = ''
-     print "seedLen is %s" % options.seedLen
-     if options.seedLen and int( options.seedLen ) >= 5:
-       print "seedLen1 is %s" % options.seedLen
-       seedLen = '-l %s' % options.seedLen
-     else:
-       print "seedLen2 is %s" % options.seedLen
-       seedLen = ''
-     if options.valAlign and int( options.valAlign ) >= 0:
-       valAlign = '-k %s' % options.valAlign
-     else:
-       valAlign = ''
-     if options.suppressAlign and int( options.suppressAlign ) >= 0:
-       suppressAlign = '-m %s' % options.suppressAlign
-     else:
-       suppressAlign = ''
-    except ValueError, e:
-    # clean up temp dir
-      if os.path.exists( tmp_index_dir ):
-        shutil.rmtree( tmp_index_dir )
-      stop_err( 'Something is wrong \n' + str( e ) )
-    if options.params:
-        aligning_cmds = '-f -p %s -S %s %s %s %s %s %s ' % \
-                (options.threads, mismatchSeed,mismatchQual,seedLen,valAlign,suppressAlign,
-		 quality_score_encoding )
-    else:
+        # have to nest try-except in try-finally to handle 2.4
         try:
-            if options.skip and int( options.skip ) > 0:
-                skip = '-s %s' % options.skip
-            else:
-                skip = ''
-            if options.alignLimit and int( options.alignLimit ) >= 0:
-                alignLimit = '-u %s' % options.alignLimit
-            else:
-                alignLimit = ''
-            if options.trimH and int( options.trimH ) > 0:
-                trimH = '-5 %s' % options.trimH
-            else:
-                trimH = ''
-            if options.trimL and int( options.trimL ) > 0:
-                trimL = '-3 %s' % options.trimL
-            else:
-                trimL = ''
-            if int(options.maqSoapAlign) != '-1' and int(options.maqSoapAlign) >= '0':
-                maqSoapAlign = '-v %s' % options.maqSoapAlign
-            else:
-                maqSoapAlign = ''
-            if options.rounding == 'noRound':
-                rounding = '--nomaqround'
-            else:
-                rounding = ''
-            if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
-                    ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
-                maxBacktracks = '--maxbts %s' % options.maxBacktracks
-            else:
-                maxBacktracks = ''
-            if options.tryHard == 'doTryHard':
-                tryHard = '-y'
-            else:
-                tryHard = ''
-            if options.valAlign and int( options.valAlign ) >= 0:
-                valAlign = '-k %s' % options.valAlign
-            else:
-                valAlign = ''
-            if options.allValAligns == 'doAllValAligns':
-                allValAligns = '-a'
-            else:
-                allValAligns = ''
-            if options.best == 'doBest':
-                best = '--best'
-            else:
-                best = ''
-            if options.strata == 'doStrata':
-                strata = '--strata'
-            else:
-                strata = ''
-            if options.offrate and int( options.offrate ) >= 0:
-                offrate = '-o %s' % options.offrate
-            else:
-                offrate = ''
-            if options.seed and int( options.seed ) > 0:
-              seed = '--seed %s' % options.seed
-            else:
-              iseed = ''
-	    aligning_cmds = '-f -p %s %s %s %s %s %s %s %s %s %s %s %s ' \
-                            '%s %s %s %s %s %s %s %s' % \
-                            ( options.threads, skip, alignLimit, 
-                              trimH, trimL, maqSoapAlign,mismatchSeed, mismatchQual, seedLen, rounding,
-                              maxBacktracks,tryHard, valAlign, allValAligns, suppressAlign, best,
-                              strata, offrate,seed, quality_score_encoding )
-        except ValueError, e:
-         # clean up temp dir
-           if os.path.exists( tmp_index_dir ):
-             shutil.rmtree( tmp_index_dir )
-           stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
- 
-       # have to nest try-except in try-finally to handle 2.4
-    try:
-      # prepare actual mapping commands
-      cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output)
-      print(cmd2)
-      # align
-      tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
-      tmp_stderr = open( tmp, 'wb' )
-      proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
-      returncode = proc.wait()
-      tmp_stderr.close()
-      # get stderr, allowing for case where it's very large
-      tmp_stderr = open( tmp, 'rb' )
-      stderr = ''
-      buffsize = 1048576
-      try:
-        while True:
-          stderr += tmp_stderr.read( buffsize )
-          if not stderr or len( stderr ) % buffsize != 0:
-            break
-      except OverflowError:
-        pass
-      tmp_stderr.close()
-      if returncode != 0:
-        raise Exception, stderr
-      # check that there are results in the output file
-      if os.path.getsize( options.output ) == 0:
-        raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
-    except Exception, e:
-      stop_err( 'Error aligning sequence. ' + str( e ) )
+            # prepare actual mapping commands
+            cmd2 = 'bowtie -n %s -m %s -k %s -l %s -e %s %s -f %s %s > %s' % ( options.validal, options.suppal, options.reportvalal, options.seedlength, options.maqerr,options.ref, options.input, format, options.output)
+            print(cmd2)
+            # align
+            tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
+            tmp_stderr = open( tmp, 'wb' )
+            proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
+            returncode = proc.wait()
+            tmp_stderr.close()
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, stderr
+            # check that there are results in the output file
+            if os.path.getsize( options.output ) == 0:
+                raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
+        except Exception, e:
+            stop_err( 'Error aligning sequence. ' + str( e ) )
     finally:
         # clean up temp dir
         if os.path.exists( tmp_index_dir ):