comparison bowtie_wrapper.py @ 0:4926b3e1e2fe draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author devteam
date Mon, 09 Nov 2015 11:18:30 -0500
parents
children 867a8c8e870e
comparison
equal deleted inserted replaced
-1:000000000000 0:4926b3e1e2fe
1 #!/usr/bin/env python
2
3 """
4 Runs Bowtie on single-end or paired-end data.
5 For use with Bowtie v. 0.12.7
6
7 usage: bowtie_wrapper.py [options]
8 -t, --threads=t: The number of threads to run
9 -o, --output=o: The output file
10 --output_unmapped_reads=: File name for unmapped reads (single-end)
11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
19 -2, --paired=2: Whether the data is single- or paired-end
20 -g, --genomeSource=g: The type of reference provided
21 -r, --ref=r: The reference genome to use or index
22 -s, --skip=s: Skip the first n reads
23 -a, --alignLimit=a: Only align the first n reads
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
28 -l, --seedLen=l: Seed length
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
44 -F, --offrate=F: Override the offrate of the index to n
45 -8, --snpphred=8: SNP penalty on Phred scale
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
48 -S, --seed=S: Seed for pseudo-random number generator
49 -C, --params=C: Whether to use default or specified parameters
50 -u, --iautoB=u: Automatic or specified behavior
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
54 -D, --idcv=D: The period for the difference-cover sample
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
62 -x, --indexSettings=x: Whether or not indexing options are to be set
63 -H, --suppressHeader=H: Suppress header
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
65 """
66
67 import optparse, os, shutil, subprocess, sys, tempfile
68
69 #Allow more than Sanger encoded variants
70 DEFAULT_ASCII_ENCODING = '--phred33-quals'
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
73
74 def stop_err( msg ):
75 sys.stderr.write( '%s\n' % msg )
76 sys.exit()
77
78 def __main__():
79 #Parse Command Line
80 parser = optparse.OptionParser()
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
82 parser.add_option( '-o', '--output', dest='output', help='The output file' )
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
85 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
86 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
87 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
88 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
89 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
90 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
91 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
92 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
93 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
94 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
95 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
96 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
97 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
98 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
99 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
100 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
101 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
102 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
103 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
104 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
105 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
106 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
107 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
108 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
109 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
110 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
111 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
112 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
113 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
114 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
115 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
116 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
117 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
118 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
119 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
120 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
121 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
122 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
123 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
124 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
125 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
126 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
127 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
128 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
129 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
130 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
131 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
132 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
133 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
134 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
135 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
136 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
137 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
138 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
139 (options, args) = parser.parse_args()
140 if options.mismatchSeed and options.maxMismatches:
141 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
142 stdout = ''
143
144 # make temp directory for placement of indices and copy reference file there if necessary
145 tmp_index_dir = tempfile.mkdtemp()
146 # get type of data (solid or solexa)
147 if options.dataType == 'solid':
148 colorspace = '-C'
149 else:
150 colorspace = ''
151 # index if necessary
152 if options.genomeSource == 'history' and not options.do_not_build_index:
153 # set up commands
154 if options.index_settings =='indexPreSet':
155 indexing_cmds = '%s' % colorspace
156 else:
157 try:
158 if options.iautoB and options.iautoB == 'set':
159 iautoB = '--noauto'
160 else:
161 iautoB = ''
162 if options.ipacked and options.ipacked == 'packed':
163 ipacked = '--packed'
164 else:
165 ipacked = ''
166 if options.ibmax and int( options.ibmax ) >= 1:
167 ibmax = '--bmax %s' % options.ibmax
168 else:
169 ibmax = ''
170 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
171 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
172 else:
173 ibmaxdivn = ''
174 if options.idcv and int( options.idcv ) >= 3:
175 idcv = '--dcv %s' % options.idcv
176 else:
177 idcv = ''
178 if options.inodc and options.inodc == 'nodc':
179 inodc = '--nodc'
180 else:
181 inodc = ''
182 if options.inoref and options.inoref == 'noref':
183 inoref = '--noref'
184 else:
185 inoref = ''
186 if options.iftab and int( options.iftab ) >= 1:
187 iftab = '--ftabchars %s' % options.iftab
188 else:
189 iftab = ''
190 if options.intoa and options.intoa == 'yes':
191 intoa = '--ntoa'
192 else:
193 intoa = ''
194 if options.iendian and options.iendian == 'big':
195 iendian = '--big'
196 else:
197 iendian = '--little'
198 if options.iseed and int( options.iseed ) > 0:
199 iseed = '--seed %s' % options.iseed
200 else:
201 iseed = ''
202 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
203 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
204 inoref, options.ioffrate, iftab, intoa, iendian,
205 iseed, colorspace )
206 except ValueError, e:
207 # clean up temp dir
208 if os.path.exists( tmp_index_dir ):
209 shutil.rmtree( tmp_index_dir )
210 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
211 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
212 ref_file_name = ref_file.name
213 ref_file.close()
214 os.symlink( options.ref, ref_file_name )
215 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
216 try:
217 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
218 tmp_stderr = open( tmp, 'wb' )
219 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
220 returncode = proc.wait()
221 tmp_stderr.close()
222 # get stderr, allowing for case where it's very large
223 tmp_stderr = open( tmp, 'rb' )
224 stderr = ''
225 buffsize = 1048576
226 try:
227 while True:
228 stderr += tmp_stderr.read( buffsize )
229 if not stderr or len( stderr ) % buffsize != 0:
230 break
231 except OverflowError:
232 pass
233 tmp_stderr.close()
234 if returncode != 0:
235 raise Exception, stderr
236 except Exception, e:
237 # clean up temp dir
238 if os.path.exists( tmp_index_dir ):
239 shutil.rmtree( tmp_index_dir )
240 stop_err( 'Error indexing reference sequence\n' + str( e ) )
241 stdout += 'File indexed. '
242 else:
243 ref_file_name = options.ref
244 # set up aligning and generate aligning command options
245 # automatically set threads in both cases
246 tmp_suppressed_file_name = None
247 tmp_unmapped_file_name = None
248 if options.suppressHeader == 'true':
249 suppressHeader = '--sam-nohead'
250 else:
251 suppressHeader = ''
252 if options.maxInsert and int( options.maxInsert ) > 0:
253 maxInsert = '-X %s' % options.maxInsert
254 else:
255 maxInsert = ''
256 if options.mateOrient:
257 mateOrient = '--%s' % options.mateOrient
258 else:
259 mateOrient = ''
260 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
261 if options.params == 'preSet':
262 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
263 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
264 else:
265 try:
266 if options.skip and int( options.skip ) > 0:
267 skip = '-s %s' % options.skip
268 else:
269 skip = ''
270 if options.alignLimit and int( options.alignLimit ) >= 0:
271 alignLimit = '-u %s' % options.alignLimit
272 else:
273 alignLimit = ''
274 if options.trimH and int( options.trimH ) > 0:
275 trimH = '-5 %s' % options.trimH
276 else:
277 trimH = ''
278 if options.trimL and int( options.trimL ) > 0:
279 trimL = '-3 %s' % options.trimL
280 else:
281 trimL = ''
282 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \
283 or options.maxMismatches == '2' or options.maxMismatches == '3'):
284 maxMismatches = '-v %s' % options.maxMismatches
285 else:
286 maxMismatches = ''
287 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
288 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
289 mismatchSeed = '-n %s' % options.mismatchSeed
290 else:
291 mismatchSeed = ''
292 if options.mismatchQual and int( options.mismatchQual ) >= 1:
293 mismatchQual = '-e %s' % options.mismatchQual
294 else:
295 mismatchQual = ''
296 if options.seedLen and int( options.seedLen ) >= 5:
297 seedLen = '-l %s' % options.seedLen
298 else:
299 seedLen = ''
300 if options.rounding == 'noRound':
301 rounding = '--nomaqround'
302 else:
303 rounding = ''
304 if options.minInsert and int( options.minInsert ) > 0:
305 minInsert = '-I %s' % options.minInsert
306 else:
307 minInsert = ''
308 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
309 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
310 else:
311 maxAlignAttempt = ''
312 if options.forwardAlign == 'noForward':
313 forwardAlign = '--nofw'
314 else:
315 forwardAlign = ''
316 if options.reverseAlign == 'noReverse':
317 reverseAlign = '--norc'
318 else:
319 reverseAlign = ''
320 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
321 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
322 maxBacktracks = '--maxbts %s' % options.maxBacktracks
323 else:
324 maxBacktracks = ''
325 if options.tryHard == 'doTryHard':
326 tryHard = '-y'
327 else:
328 tryHard = ''
329 if options.valAlign and int( options.valAlign ) >= 0:
330 valAlign = '-k %s' % options.valAlign
331 else:
332 valAlign = ''
333 if options.allValAligns == 'doAllValAligns':
334 allValAligns = '-a'
335 else:
336 allValAligns = ''
337 if options.suppressAlign and int( options.suppressAlign ) >= 0:
338 suppressAlign = '-m %s' % options.suppressAlign
339 else:
340 suppressAlign = ''
341 if options.best == 'doBest':
342 best = '--best'
343 else:
344 best = ''
345 if options.strata == 'doStrata':
346 strata = '--strata'
347 else:
348 strata = ''
349 if options.offrate and int( options.offrate ) >= 0:
350 offrate = '-o %s' % options.offrate
351 else:
352 offrate = ''
353 if options.seed and int( options.seed ) >= 0:
354 seed = '--seed %s' % options.seed
355 else:
356 seed = ''
357 if options.paired == 'paired':
358 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
359 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
360 tmp_unmapped_file_name = tmp_unmapped_file.name
361 tmp_unmapped_file.close()
362 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
363 else:
364 output_unmapped_reads = ''
365 if options.output_suppressed_reads:
366 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
367 tmp_suppressed_file_name = tmp_suppressed_file.name
368 tmp_suppressed_file.close()
369 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
370 else:
371 output_suppressed_reads = ''
372 else:
373 if options.output_unmapped_reads:
374 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
375 else:
376 output_unmapped_reads = ''
377 if options.output_suppressed_reads:
378 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
379 else:
380 output_suppressed_reads = ''
381 snpfrac = ''
382 if options.snpphred and int( options.snpphred ) >= 0:
383 snpphred = '--snpphred %s' % options.snpphred
384 else:
385 snpphred = ''
386 if options.snpfrac and float( options.snpfrac ) >= 0:
387 snpfrac = '--snpfrac %s' % options.snpfrac
388 if options.keepends and options.keepends == 'doKeepends':
389 keepends = '--col-keepends'
390 else:
391 keepends = ''
392 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
393 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
394 ( maxInsert, mateOrient, options.threads, suppressHeader,
395 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
396 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
397 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
398 tryHard, valAlign, allValAligns, suppressAlign, best,
399 strata, offrate, seed, snpphred, snpfrac, keepends,
400 output_unmapped_reads, output_suppressed_reads,
401 quality_score_encoding )
402 except ValueError, e:
403 # clean up temp dir
404 if os.path.exists( tmp_index_dir ):
405 shutil.rmtree( tmp_index_dir )
406 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
407 try:
408 # have to nest try-except in try-finally to handle 2.4
409 try:
410 # prepare actual mapping commands
411 if options.paired == 'paired':
412 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
413 else:
414 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
415 # align
416 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
417 tmp_stderr = open( tmp, 'wb' )
418 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
419 returncode = proc.wait()
420 tmp_stderr.close()
421 # get stderr, allowing for case where it's very large
422 tmp_stderr = open( tmp, 'rb' )
423 stderr = ''
424 buffsize = 1048576
425 try:
426 while True:
427 stderr += tmp_stderr.read( buffsize )
428 if not stderr or len( stderr ) % buffsize != 0:
429 break
430 except OverflowError:
431 pass
432 tmp_stderr.close()
433 if returncode != 0:
434 raise Exception, stderr
435 # get suppressed and unmapped reads output files in place if appropriate
436 if options.paired == 'paired' and tmp_suppressed_file_name and \
437 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
438 try:
439 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
440 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
441 shutil.move( left, options.output_suppressed_reads_l )
442 shutil.move( right, options.output_suppressed_reads_r )
443 except Exception, e:
444 sys.stdout.write( 'Error producing the suppressed output file.\n' )
445 if options.paired == 'paired' and tmp_unmapped_file_name and \
446 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
447 try:
448 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
449 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
450 shutil.move( left, options.output_unmapped_reads_l )
451 shutil.move( right, options.output_unmapped_reads_r )
452 except Exception, e:
453 sys.stdout.write( 'Error producing the unmapped output file.\n' )
454 # check that there are results in the output file
455 if os.path.getsize( options.output ) == 0:
456 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
457 except Exception, e:
458 stop_err( 'Error aligning sequence. ' + str( e ) )
459 finally:
460 # clean up temp dir
461 if os.path.exists( tmp_index_dir ):
462 shutil.rmtree( tmp_index_dir )
463 stdout += 'Sequence file aligned.\n'
464 sys.stdout.write( stdout )
465
466 if __name__ == "__main__":
467 __main__()