Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Index/Blast.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Blast.pm,v 1.8.2.1 2003/06/28 21:57:04 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::Index::Blast | |
4 # | |
5 # Cared for by Jason Stajich <jason@cgt.mc.duke.edu> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Index::Blast - Indexes Blast reports and supports retrieval based on query accession(s) | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use strict; | |
20 use Bio::Index::Blast; | |
21 my $index = new Bio::Index::Blast(-filename => $indexfile, | |
22 -write_flag => 1); | |
23 $index->make_index($file1, $file2); | |
24 | |
25 my $data = $index->get_stream($id); | |
26 | |
27 my $bplite_report = $index->fetch_report($id); | |
28 print "query is ", $bplite_report->query, "\n"; | |
29 while( my $sbjct = $bplite_report->nextSbjct ) { | |
30 print $sbjct->name, "\n"; | |
31 while( my $hsp = $sbjct->nextHSP ) { | |
32 print "\t e-value ", $hsp->P, | |
33 } | |
34 print "\n"; | |
35 } | |
36 | |
37 =head1 DESCRIPTION | |
38 | |
39 This object allows one to build an index on a blast file (or files) | |
40 and provide quick access to the blast report for that accession. | |
41 Note: for best results 'use strict'. | |
42 | |
43 =head1 FEEDBACK | |
44 | |
45 =head2 Mailing Lists | |
46 | |
47 User feedback is an integral part of the evolution of this and other | |
48 Bioperl modules. Send your comments and suggestions preferably to | |
49 the Bioperl mailing list. Your participation is much appreciated. | |
50 | |
51 bioperl-l@bioperl.org - General discussion | |
52 http://bioperl.org/MailList.shtml - About the mailing lists | |
53 | |
54 =head2 Reporting Bugs | |
55 | |
56 Report bugs to the Bioperl bug tracking system to help us keep track | |
57 of the bugs and their resolution. Bug reports can be submitted via | |
58 email or the web: | |
59 | |
60 bioperl-bugs@bioperl.org | |
61 http://bugzilla.bioperl.org/ | |
62 | |
63 =head1 AUTHOR - Jason Stajich | |
64 | |
65 Email jason@cgt.mc.duke.edu | |
66 | |
67 Describe contact details here | |
68 | |
69 =head1 APPENDIX | |
70 | |
71 The rest of the documentation details each of the object methods. | |
72 Internal methods are usually preceded with a _ | |
73 | |
74 =cut | |
75 | |
76 | |
77 # Let the code begin... | |
78 | |
79 | |
80 package Bio::Index::Blast; | |
81 use vars qw(@ISA $VERSION); | |
82 use strict; | |
83 | |
84 use Bio::Root::Root; | |
85 use Bio::Index::Abstract; | |
86 use Bio::Tools::BPlite; | |
87 use IO::String; | |
88 | |
89 @ISA = qw(Bio::Index::Abstract Bio::Root::Root ); | |
90 | |
91 BEGIN { | |
92 $VERSION = 0.1; | |
93 } | |
94 | |
95 sub _version { | |
96 return $VERSION; | |
97 } | |
98 | |
99 =head2 new | |
100 | |
101 Usage : $index = Bio::Index::Abstract->new( | |
102 -filename => $dbm_file, | |
103 -write_flag => 0, | |
104 -dbm_package => 'DB_File', | |
105 -verbose => 0); | |
106 Function: Returns a new index object. If filename is | |
107 specified, then open_dbm() is immediately called. | |
108 Bio::Index::Abstract->new() will usually be called | |
109 directly only when opening an existing index. | |
110 Returns : A new index object | |
111 Args : -filename The name of the dbm index file. | |
112 -write_flag TRUE if write access to the dbm file is | |
113 needed. | |
114 -dbm_package The Perl dbm module to use for the | |
115 index. | |
116 -verbose Print debugging output to STDERR if | |
117 TRUE. | |
118 | |
119 =cut | |
120 | |
121 sub new { | |
122 | |
123 my($class,@args) = @_; | |
124 | |
125 my $self = $class->SUPER::new(@args); | |
126 | |
127 } | |
128 | |
129 =head2 Bio::Index::Blast implemented methods | |
130 | |
131 =cut | |
132 | |
133 =head2 fetch_report | |
134 | |
135 Title : fetch_report | |
136 Usage : my $blastreport = $idx->fetch_report($id); | |
137 Function: Returns a Bio::Tools::BPlite report object | |
138 for a specific blast report | |
139 Returns : Bio::Tools::BPlite | |
140 Args : valid id | |
141 | |
142 =cut | |
143 | |
144 sub fetch_report{ | |
145 my ($self,$id) = @_; | |
146 my $fh = $self->get_stream($id); | |
147 my $report = new Bio::Tools::BPlite(-fh => $fh, | |
148 -noclose => 1); | |
149 return $report; | |
150 } | |
151 | |
152 | |
153 # shamlessly stolen from Bio::Index::Fasta | |
154 | |
155 =head2 id_parser | |
156 | |
157 Title : id_parser | |
158 Usage : $index->id_parser( CODE ) | |
159 Function: Stores or returns the code used by record_id to | |
160 parse the ID for record from a string. Useful | |
161 for (for instance) specifying a different | |
162 parser for different flavours of blast dbs. | |
163 Returns \&default_id_parser (see below) if not | |
164 set. If you supply your own id_parser | |
165 subroutine, then it should expect a fasta | |
166 description line. An entry will be added to | |
167 the index for each string in the list returned. | |
168 Example : $index->id_parser( \&my_id_parser ) | |
169 Returns : ref to CODE if called without arguments | |
170 Args : CODE | |
171 | |
172 =cut | |
173 | |
174 sub id_parser { | |
175 my( $self, $code ) = @_; | |
176 | |
177 if ($code) { | |
178 $self->{'_id_parser'} = $code; | |
179 } | |
180 return $self->{'_id_parser'} || \&default_id_parser; | |
181 } | |
182 | |
183 | |
184 | |
185 =head2 default_id_parser | |
186 | |
187 Title : default_id_parser | |
188 Usage : $id = default_id_parser( $header ) | |
189 Function: The default Blast Query ID parser for Bio::Index::Blast.pm | |
190 Returns $1 from applying the regexp /^>\s*(\S+)/ | |
191 to $header. | |
192 Returns : ID string | |
193 Args : a header line string | |
194 | |
195 =cut | |
196 | |
197 sub default_id_parser { | |
198 if ($_[0] =~ /^\s*(\S+)/) { | |
199 return $1; | |
200 } else { | |
201 return; | |
202 } | |
203 } | |
204 | |
205 =head2 Require methods from Bio::Index::Abstract | |
206 | |
207 =cut | |
208 | |
209 =head2 _index_file | |
210 | |
211 Title : _index_file | |
212 Usage : $index->_index_file( $file_name, $i ) | |
213 Function: Specialist function to index BLAST report file(s). | |
214 Is provided with a filename and an integer | |
215 by make_index in its SUPER class. | |
216 Example : | |
217 Returns : | |
218 Args : | |
219 | |
220 =cut | |
221 | |
222 sub _index_file { | |
223 my( $self, | |
224 $file, # File name | |
225 $i, # Index-number of file being indexed | |
226 ) = @_; | |
227 | |
228 my( $begin, # Offset from start of file of the start | |
229 # of the last found record. | |
230 ); | |
231 | |
232 open(BLAST, "<$file") or die("cannot open file $file\n"); | |
233 | |
234 my (@data, @records); | |
235 my $indexpoint = 0; | |
236 my $lastline = 0; | |
237 | |
238 while(<BLAST> ) { | |
239 if( /(T)?BLAST[PNX]/ ) { | |
240 if( @data ) { | |
241 # if we have already read a report | |
242 # then store the data for this report | |
243 # in the CURRENT index | |
244 $self->_process_report($indexpoint, $i,join("",@data)); | |
245 | |
246 } # handle fencepost problem (beginning) | |
247 # by skipping here when empty | |
248 | |
249 # since we are at the beginning of a new report | |
250 # store this begin location for the next index | |
251 $indexpoint = $lastline; | |
252 @data = (); | |
253 } | |
254 push @data, $_; | |
255 $lastline = tell(BLAST); | |
256 } | |
257 # handle fencepost problem (end) | |
258 if( @data ) { | |
259 $self->_process_report($indexpoint,$i,join("",@data)); | |
260 } | |
261 } | |
262 | |
263 sub _process_report { | |
264 my ($self,$begin,$i,$data) = @_; | |
265 | |
266 if( ! $data ) { | |
267 $self->warn("calling _process_report without a valid data string"); | |
268 return ; | |
269 } | |
270 my $id_parser = $self->id_parser; | |
271 | |
272 my $datal = new IO::String($data); | |
273 my $report = new Bio::Tools::BPlite(-fh => $datal, | |
274 -noclose => 1); | |
275 | |
276 my $query = $report->query; | |
277 foreach my $id (&$id_parser($query)) { | |
278 print "id is $id, begin is $begin\n" if( $self->verbose > 0); | |
279 $self->add_record($id, $i, $begin); | |
280 } | |
281 } | |
282 =head2 Bio::Index::Abstract methods | |
283 | |
284 =head2 filename | |
285 | |
286 Title : filename | |
287 Usage : $value = $self->filename(); | |
288 $self->filename($value); | |
289 Function: Gets or sets the name of the dbm index file. | |
290 Returns : The current value of filename | |
291 Args : Value of filename if setting, or none if | |
292 getting the value. | |
293 | |
294 =head2 write_flag | |
295 | |
296 Title : write_flag | |
297 Usage : $value = $self->write_flag(); | |
298 $self->write_flag($value); | |
299 Function: Gets or sets the value of write_flag, which | |
300 is wether the dbm file should be opened with | |
301 write access. | |
302 Returns : The current value of write_flag (default 0) | |
303 Args : Value of write_flag if setting, or none if | |
304 getting the value. | |
305 | |
306 =head2 dbm_package | |
307 | |
308 Usage : $value = $self->dbm_package(); | |
309 $self->dbm_package($value); | |
310 | |
311 Function: Gets or sets the name of the Perl dbm module used. | |
312 If the value is unset, then it returns the value of | |
313 the package variable $USE_DBM_TYPE or if that is | |
314 unset, then it chooses the best available dbm type, | |
315 choosing 'DB_File' in preference to 'SDBM_File'. | |
316 Bio::Abstract::Index may work with other dbm file | |
317 types. | |
318 | |
319 Returns : The current value of dbm_package | |
320 Args : Value of dbm_package if setting, or none if | |
321 getting the value. | |
322 | |
323 | |
324 =head2 get_stream | |
325 | |
326 Title : get_stream | |
327 Usage : $stream = $index->get_stream( $id ); | |
328 Function: Returns a file handle with the file pointer | |
329 at the approprite place | |
330 | |
331 This provides for a way to get the actual | |
332 file contents and not an object | |
333 | |
334 WARNING: you must parse the record deliminter | |
335 *yourself*. Abstract wont do this for you | |
336 So this code | |
337 | |
338 $fh = $index->get_stream($myid); | |
339 while( <$fh> ) { | |
340 # do something | |
341 } | |
342 will parse the entire file if you don't put in | |
343 a last statement in, like | |
344 | |
345 while( <$fh> ) { | |
346 /^\/\// && last; # end of record | |
347 # do something | |
348 } | |
349 | |
350 Returns : A filehandle object | |
351 Args : string represents the accession number | |
352 Notes : This method should not be used without forethought | |
353 | |
354 | |
355 =head2 open_dbm | |
356 | |
357 Usage : $index->open_dbm() | |
358 Function: Opens the dbm file associated with the index | |
359 object. Write access is only given if explicitly | |
360 asked for by calling new(-write => 1) or having set | |
361 the write_flag(1) on the index object. The type of | |
362 dbm file opened is that returned by dbm_package(). | |
363 The name of the file to be is opened is obtained by | |
364 calling the filename() method. | |
365 | |
366 Example : $index->_open_dbm() | |
367 Returns : 1 on success | |
368 | |
369 | |
370 =head2 _version | |
371 | |
372 Title : _version | |
373 Usage : $type = $index->_version() | |
374 Function: Returns a string which identifes the version of an | |
375 index module. Used to permanently identify an index | |
376 file as having been created by a particular version | |
377 of the index module. Must be provided by the sub class | |
378 Example : | |
379 Returns : | |
380 Args : none | |
381 | |
382 =head2 _filename | |
383 | |
384 Title : _filename | |
385 Usage : $index->_filename( FILE INT ) | |
386 Function: Indexes the file | |
387 Example : | |
388 Returns : | |
389 Args : | |
390 | |
391 =head2 _file_handle | |
392 | |
393 Title : _file_handle | |
394 Usage : $fh = $index->_file_handle( INT ) | |
395 Function: Returns an open filehandle for the file | |
396 index INT. On opening a new filehandle it | |
397 caches it in the @{$index->_filehandle} array. | |
398 If the requested filehandle is already open, | |
399 it simply returns it from the array. | |
400 Example : $fist_file_indexed = $index->_file_handle( 0 ); | |
401 Returns : ref to a filehandle | |
402 Args : INT | |
403 | |
404 =head2 _file_count | |
405 | |
406 Title : _file_count | |
407 Usage : $index->_file_count( INT ) | |
408 Function: Used by the index building sub in a sub class to | |
409 track the number of files indexed. Sets or gets | |
410 the number of files indexed when called with or | |
411 without an argument. | |
412 Example : | |
413 Returns : INT | |
414 Args : INT | |
415 | |
416 | |
417 =head2 add_record | |
418 | |
419 Title : add_record | |
420 Usage : $index->add_record( $id, @stuff ); | |
421 Function: Calls pack_record on @stuff, and adds the result | |
422 of pack_record to the index database under key $id. | |
423 If $id is a reference to an array, then a new entry | |
424 is added under a key corresponding to each element | |
425 of the array. | |
426 Example : $index->add_record( $id, $fileNumber, $begin, $end ) | |
427 Returns : TRUE on success or FALSE on failure | |
428 Args : ID LIST | |
429 | |
430 =head2 pack_record | |
431 | |
432 Title : pack_record | |
433 Usage : $packed_string = $index->pack_record( LIST ) | |
434 Function: Packs an array of scalars into a single string | |
435 joined by ASCII 034 (which is unlikely to be used | |
436 in any of the strings), and returns it. | |
437 Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end ) | |
438 Returns : STRING or undef | |
439 Args : LIST | |
440 | |
441 =head2 unpack_record | |
442 | |
443 Title : unpack_record | |
444 Usage : $index->unpack_record( STRING ) | |
445 Function: Splits the sting provided into an array, | |
446 splitting on ASCII 034. | |
447 Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} ) | |
448 Returns : A 3 element ARRAY | |
449 Args : STRING containing ASCII 034 | |
450 | |
451 =head2 DESTROY | |
452 | |
453 Title : DESTROY | |
454 Usage : Called automatically when index goes out of scope | |
455 Function: Closes connection to database and handles to | |
456 sequence files | |
457 Returns : NEVER | |
458 Args : NONE | |
459 | |
460 | |
461 =cut | |
462 | |
463 | |
464 1; | |
465 | |
466 | |
467 | |
468 1; |