annotate tools/smalt_wrapper.py @ 1:b857363cc1db draft

Uploaded
author cjav
date Wed, 13 Feb 2013 13:23:09 -0500
parents 87f4f05b44bb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
1 #!/usr/bin/env python
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
2
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
3 """
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
4 Runs Smalt on single-end or paired-end data.
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
5 Produces a SAM file containing the mappings.
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
6 Works with Smalt version 0.7.1.
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
7
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
8 usage: smalt_wrapper.py [options]
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
9
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
10 See below for options
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
11 """
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
12
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
13 import optparse, os, shutil, subprocess, sys, tempfile
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
14
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
15 def stop_err( msg ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
16 sys.stderr.write( '%s\n' % msg )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
17 sys.exit()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
18
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
19 def __main__():
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
20 #Parse Command Line
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
21 parser = optparse.OptionParser()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
22 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to use' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
23 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
24 parser.add_option( '-f', '--input1', dest='fastq', help='The (forward) fastq file to use for the mapping' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
25 parser.add_option( '-F', '--input2', dest='rfastq', help='The reverse fastq file to use for mapping if paired-end data' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
26 parser.add_option( '-u', '--output', dest='output', help='The file to save the output (SAM format)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
27 parser.add_option( '-g', '--genAlignType', dest='genAlignType', help='The type of pairing (single or paired)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
28 parser.add_option( '-p', '--params', dest='params', help='Parameter setting to use (pre_set or full)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
29 parser.add_option( '-s', '--fileSource', dest='fileSource', help='Whether to use a previously indexed reference sequence or one form history (indexed or history)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
30 parser.add_option( '-x', '--exhaustiveSearch', dest='exhaustiveSearch', help='This flag triggers a more exhaustive search for alignments at the cost of decreased speed' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
31 parser.add_option( '-c', '--minCover', dest='minCover', help='Only consider mappings where the k-mer word seeds cover the query read to a minimum extent' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
32 parser.add_option( '-d', '--scorDiff', dest='scorDiff', help='Set a threshold of the Smith-Waterman alignment score relative to the maximum score' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
33 parser.add_option( '-i', '--insertMax', dest='insertMax', help='Maximum insert size (Only in paired-end mode)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
34 parser.add_option( '-j', '--insertMin', dest='insertMin', help='Minimum insert size (Only in paired-end mode)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
35 parser.add_option( '-l', '--pairTyp', dest='pairTyp', help='Type of read pair library, can be either pe, mp or pp' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
36 parser.add_option( '-m', '--minScor', dest='minScor', help='Sets an absolute threshold of the Smith-Waterman scores' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
37 parser.add_option( '-a', '--partialAlignments', dest='partialAlignments', help='Report partial alignments if they are complementary on the read (split reads)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
38 parser.add_option( '-q', '--minBasq', dest='minBasq', help='Sets a base quality threshold (0 <= minbasq <= 10, default 0)' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
39 parser.add_option( '-e', '--seed', dest='seed', help='If <seed> >= 0 report an alignment selected at random where there are multiple mappings with the same best alignment score. With <seed> = 0 (default) a seed is derived from the current calendar time. If <seed> < 0 reads with multiple best mappings are reported as "not mapped".' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
40 parser.add_option( '-w', '--complexityWeighted', dest='complexityWeighted', help='Smith-Waterman scores are complexity weighted' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
41 parser.add_option( '-y', '--minId', dest='minId', help='Sets an identity threshold for a mapping to be reported' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
42 parser.add_option( '-D', '--dbkey', dest='dbkey', help='Dbkey for reference genome' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
43 parser.add_option( '-X', '--do_not_build_index', dest='do_not_build_index', action='store_true', help="Don't build index" )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
44 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
45 (options, args) = parser.parse_args()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
46
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
47 # output version # of tool
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
48 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
49 tmp = tempfile.NamedTemporaryFile().name
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
50 tmp_stdout = open( tmp, 'wb' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
51 proc = subprocess.Popen( args='smalt 2>&1', shell=True, stdout=tmp_stdout )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
52 tmp_stdout.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
53 returncode = proc.wait()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
54 stdout = None
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
55 for line in open( tmp_stdout.name, 'rb' ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
56 if line.lower().find( 'version' ) >= 0:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
57 stdout = line.strip()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
58 break
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
59 if stdout:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
60 sys.stdout.write( 'SMALT %s\n' % stdout )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
61 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
62 raise Exception
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
63 except:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
64 sys.stdout.write( 'Could not determine SMALT version\n' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
65
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
66 fastq = options.fastq
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
67 if options.rfastq:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
68 rfastq = options.rfastq
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
69
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
70 # make temp directory for placement of indices
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
71 tmp_index_dir = tempfile.mkdtemp()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
72 tmp_dir = tempfile.mkdtemp()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
73 # index if necessary
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
74 if options.fileSource == 'history' and not options.do_not_build_index:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
75 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
76 ref_file_name = ref_file.name
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
77 ref_file.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
78 os.symlink( options.ref, ref_file_name )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
79 cmd1 = 'smalt index %s %s' % ( ref_file_name, ref_file_name )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
80 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
81 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
82 tmp_stderr = open( tmp, 'wb' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
83 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
84 returncode = proc.wait()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
85 tmp_stderr.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
86 # get stderr, allowing for case where it's very large
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
87 tmp_stderr = open( tmp, 'rb' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
88 stderr = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
89 buffsize = 1048576
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
90 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
91 while True:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
92 stderr += tmp_stderr.read( buffsize )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
93 if not stderr or len( stderr ) % buffsize != 0:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
94 break
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
95 except OverflowError:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
96 pass
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
97 tmp_stderr.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
98 if returncode != 0:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
99 raise Exception, stderr
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
100 except Exception, e:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
101 # clean up temp dirs
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
102 if os.path.exists( tmp_index_dir ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
103 shutil.rmtree( tmp_index_dir )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
104 if os.path.exists( tmp_dir ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
105 shutil.rmtree( tmp_dir )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
106 stop_err( 'Error indexing reference sequence. ' + str( e ) )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
107 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
108 ref_file_name = options.ref
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
109
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
110 # set up aligning and generate aligning command options
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
111 if options.params == 'pre_set':
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
112 aligning_cmds = '-n %s ' % ( options.threads )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
113 gen_alignment_cmds = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
114 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
115 if options.exhaustiveSearch == 'true':
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
116 exhaustiveSearch = '-x'
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
117 minCover = '-c %s' % options.minCover
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
118 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
119 exhaustiveSearch = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
120 minCover = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
121 if options.partialAlignments == 'true':
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
122 partialAlignments = '-x'
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
123 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
124 partialAlignments = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
125 if options.complexityWeighted == 'true':
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
126 complexityWeighted = '-w'
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
127 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
128 complexityWeighted = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
129 aligning_cmds = '-d %s -m %s -q %s -r %s -y %s' % \
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
130 ( options.scorDiff, options.minScor, options.minBasq, options.seed, options.minId )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
131 if options.genAlignType == 'paired':
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
132 gen_alignment_cmds = '-i %s -j %s -l %s' % ( options.insertMax, options.insertMin, options.pairTyp )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
133 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
134 gen_alignment_cmds = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
135 # prepare actual aligning and generate aligning commands
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
136 if options.genAlignType == 'paired':
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
137 cmd = 'smalt map %s %s -o %s %s %s ' % ( aligning_cmds, gen_alignment_cmds, options.output, ref_file_name, fastq, rfastq )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
138 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
139 cmd = 'smalt map %s -o %s %s %s ' % ( aligning_cmds, options.output, ref_file_name, fastq )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
140 # perform alignments
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
141 buffsize = 1048576
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
142 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
143 # need to nest try-except in try-finally to handle 2.4
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
144 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
145 # align
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
146 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
147 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
148 tmp_stderr = open( tmp, 'wb' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
149 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
150 returncode = proc.wait()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
151 tmp_stderr.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
152 # get stderr, allowing for case where it's very large
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
153 tmp_stderr = open( tmp, 'rb' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
154 stderr = ''
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
155 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
156 while True:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
157 stderr += tmp_stderr.read( buffsize )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
158 if not stderr or len( stderr ) % buffsize != 0:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
159 break
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
160 except OverflowError:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
161 pass
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
162 tmp_stderr.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
163 if returncode != 0:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
164 raise Exception, stderr
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
165 except Exception, e:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
166 raise Exception, 'Error aligning sequence. ' + str( e )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
167 # remove header if necessary
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
168 if options.suppressHeader == 'true':
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
169 tmp_out = tempfile.NamedTemporaryFile( dir=tmp_dir)
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
170 tmp_out_name = tmp_out.name
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
171 tmp_out.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
172 try:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
173 shutil.move( options.output, tmp_out_name )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
174 except Exception, e:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
175 raise Exception, 'Error moving output file before removing headers. ' + str( e )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
176 fout = file( options.output, 'w' )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
177 for line in file( tmp_out.name, 'r' ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
178 if not ( line.startswith( '@HD' ) or line.startswith( '@SQ' ) or line.startswith( '@RG' ) or line.startswith( '@PG' ) or line.startswith( '@CO' ) ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
179 fout.write( line )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
180 fout.close()
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
181 # check that there are results in the output file
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
182 if os.path.getsize( options.output ) > 0:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
183 sys.stdout.write( 'SMALT run on %s-end data' % options.genAlignType )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
184 else:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
185 raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
186 except Exception, e:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
187 stop_err( 'The alignment failed.\n' + str( e ) )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
188 finally:
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
189 # clean up temp dir
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
190 if os.path.exists( tmp_index_dir ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
191 shutil.rmtree( tmp_index_dir )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
192 if os.path.exists( tmp_dir ):
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
193 shutil.rmtree( tmp_dir )
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
194
87f4f05b44bb Uploaded
cjav
parents:
diff changeset
195 if __name__=="__main__": __main__()