comparison tmap_wrapper_0.3.3/tmap_wrapper.py @ 2:e2640d81157a default tip

* moving to TMAP 0.3.3
author Nils Homer <nilshomer@gmail.com>
date Sun, 26 Feb 2012 20:28:27 -0500
parents tmap_wrapper_0.0.19/tmap_wrapper.py@de2efe4dda3f
children
comparison
equal deleted inserted replaced
1:d18b5f2ada22 2:e2640d81157a
1 #!/usr/bin/env python
2
3 # TODO
4 # - map1/map2/map3 specific options
5
6 """
7 Runs TMAP on Ion Torrent data.
8 Produces a SAM file containing the mappings.
9 Works with TMAP version 0.3.3 or higher.
10
11 usage: tmap_wrapper.py [options]
12 --threads: The number of threads to use
13 --ref: The reference genome to use or index
14 --input: The input FASTQ/SFF file to use for the mapping
15 --inputtype: The input type (FASTQ/SFF)
16 --output: The file to save the output (SAM format)
17 --params: Parameter setting to use (pre_set or full)
18 --fileSource: Whether to use a previously indexed reference sequence or one from history (indexed or history)
19 --algorithm: The algorithm (ex. mapall, map1, map2, map3, map4, ...)
20 --globalOptions: The global options
21 --flowspaceOptions: The flowspace options
22 --pairingOptions: The pairing options
23 --algorithmOptions: The algorithm options
24 --suppressHeader: Suppress header
25 --dbkey: Dbkey for reference genome
26 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
27 """
28
29 import optparse, os, shutil, subprocess, sys, tempfile
30
31 def stop_err( msg ):
32 sys.stderr.write( '%s\n' % msg )
33 sys.exit()
34
35 def __main__():
36 #Parse Command Line
37 parser = optparse.OptionParser()
38 # Global options
39 parser.add_option( '--threads', dest='threads', help='The number of threads to use' )
40 parser.add_option( '--ref', dest='ref', help='The reference genome to use or index' )
41 parser.add_option( '--input', dest='input', help='The input file to use for the mapping' )
42 parser.add_option( '--inputtype', dest='inputtype', help='The input file type' )
43 parser.add_option( '--output', dest='output', help='The file to save the output (SAM format)' )
44 parser.add_option( '--params', dest='params', help='Parameter setting to use (pre_set or full)' )
45 parser.add_option( '--fileSource', dest='fileSource', help='Whether to use a previously indexed reference sequence or one from history (indexed or history)' )
46 parser.add_option( '--algorithm', dest='algorithm', help='The algorithm (ex. mapall, map1, map2, map3, map4, ...)')
47 parser.add_option( '--globalOptions', dest='globalOptions', help='The global options ' )
48 parser.add_option( '--flowspaceOptions', dest='flowspaceOptions', help='The flowspace options' )
49 parser.add_option( '--pairingOptions', dest='pairingOptions', help='The pairing options' )
50 parser.add_option( '--algorithmOptions', dest='algorithmOptions', help='The algorithm options' )
51 parser.add_option( '--suppressHeader', dest='suppressHeader', help='Suppress header' )
52 parser.add_option( '--dbkey', dest='dbkey', help='Dbkey for reference genome' )
53 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action='store_true', help="Don't build index" )
54
55 # parse the options
56 (options, args) = parser.parse_args()
57
58 # output version # of tool
59 try:
60 tmp = tempfile.NamedTemporaryFile().name
61 tmp_stdout = open( tmp, 'wb' )
62 proc = subprocess.Popen( args='tmap --version 2>&1', shell=True, stdout=tmp_stdout )
63 tmp_stdout.close()
64 returncode = proc.wait()
65 stdout = None
66 for line in open( tmp_stdout.name, 'rb' ):
67 if line.lower().find( 'version' ) >= 0:
68 stdout = line.strip()
69 break
70 if stdout:
71 sys.stdout.write( 'TMAP %s\n' % stdout )
72 else:
73 raise Exception
74 except:
75 sys.stdout.write( 'Could not determine TMAP version\n' )
76
77 # make temp directory for placement of indices
78 tmp_index_dir = tempfile.mkdtemp()
79 tmp_dir = tempfile.mkdtemp()
80
81 # index if necessary
82 if options.fileSource == 'history' and not options.do_not_build_index:
83 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
84 ref_file_name = ref_file.name
85 ref_file.close()
86 os.symlink( options.ref, ref_file_name )
87 cmd1 = 'tmap index -f %s -v ' % ( ref_file_name )
88 try:
89 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
90 tmp_stderr = open( tmp, 'wb' )
91 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
92 returncode = proc.wait()
93 tmp_stderr.close()
94 # get stderr, allowing for case where it's very large
95 tmp_stderr = open( tmp, 'rb' )
96 stderr = ''
97 buffsize = 1048576
98 try:
99 while True:
100 stderr += tmp_stderr.read( buffsize )
101 if not stderr or len( stderr ) % buffsize != 0:
102 break
103 except OverflowError:
104 pass
105 tmp_stderr.close()
106 if returncode != 0:
107 raise Exception, stderr
108 except Exception, e:
109 # clean up temp dirs
110 if os.path.exists( tmp_index_dir ):
111 shutil.rmtree( tmp_index_dir )
112 if os.path.exists( tmp_dir ):
113 shutil.rmtree( tmp_dir )
114 stop_err( 'Error indexing reference sequence. ' + str( e ) )
115 else:
116 ref_file_name = options.ref
117
118 # set up mapping and generate mapping command options
119 if options.params == 'pre_set':
120 options.algorithm = 'mapall'
121 options.flowspaceOptions = ''
122 options.pairingOptions = ''
123 options.algorithmOptions = 'stage1 map1 map2 map3'
124
125 #mapping_cmds
126 # prepare actual mapping and generate mapping commands
127 cmd = 'tmap %s -f %s -r %s -i %s -n %s %s %s %s > %s' % \
128 ( options.algorithm, options.ref, options.input, options.inputtype, \
129 options.threads, options.flowspaceOptions, \
130 options.pairingOptions, options.algorithmOptions, options.output )
131 # perform alignments
132 buffsize = 1048576
133 try:
134 # need to nest try-except in try-finally to handle 2.4
135 try:
136 # align
137 try:
138 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
139 tmp_stderr = open( tmp, 'wb' )
140 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
141 returncode = proc.wait()
142 tmp_stderr.close()
143 # get stderr, allowing for case where it's very large
144 tmp_stderr = open( tmp, 'rb' )
145 stderr = ''
146 try:
147 stderr += cmd + '\n'
148 while True:
149 stderr += tmp_stderr.read( buffsize )
150 if not stderr or len( stderr ) % buffsize != 0:
151 break
152 except OverflowError:
153 pass
154 tmp_stderr.close()
155 if returncode != 0:
156 raise Exception, stderr
157 except Exception, e:
158 raise Exception, 'Error mapping sequence. ' + str( e )
159 # remove header if necessary
160 if options.suppressHeader == 'true':
161 tmp_out = tempfile.NamedTemporaryFile( dir=tmp_dir)
162 tmp_out_name = tmp_out.name
163 tmp_out.close()
164 try:
165 shutil.move( options.output, tmp_out_name )
166 except Exception, e:
167 raise Exception, 'Error moving output file before removing headers. ' + str( e )
168 fout = file( options.output, 'w' )
169 for line in file( tmp_out.name, 'r' ):
170 if not ( line.startswith( '@HD' ) or line.startswith( '@SQ' ) or line.startswith( '@RG' ) or line.startswith( '@PG' ) or line.startswith( '@CO' ) ):
171 fout.write( line )
172 fout.close()
173 # check that there are results in the output file
174 if os.path.getsize( options.output ) > 0:
175 sys.stdout.write( 'TMAP completed' )
176 else:
177 raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
178 except Exception, e:
179 stop_err( 'The alignment failed.\n' + str( e ) )
180 finally:
181 # clean up temp dir
182 if os.path.exists( tmp_index_dir ):
183 shutil.rmtree( tmp_index_dir )
184 if os.path.exists( tmp_dir ):
185 shutil.rmtree( tmp_dir )
186
187 if __name__=="__main__": __main__()