0
|
1 # $Id: Blast.pm,v 1.8.2.1 2003/06/28 21:57:04 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Index::Blast
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@cgt.mc.duke.edu>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Index::Blast - Indexes Blast reports and supports retrieval based on query accession(s)
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use strict;
|
|
20 use Bio::Index::Blast;
|
|
21 my $index = new Bio::Index::Blast(-filename => $indexfile,
|
|
22 -write_flag => 1);
|
|
23 $index->make_index($file1, $file2);
|
|
24
|
|
25 my $data = $index->get_stream($id);
|
|
26
|
|
27 my $bplite_report = $index->fetch_report($id);
|
|
28 print "query is ", $bplite_report->query, "\n";
|
|
29 while( my $sbjct = $bplite_report->nextSbjct ) {
|
|
30 print $sbjct->name, "\n";
|
|
31 while( my $hsp = $sbjct->nextHSP ) {
|
|
32 print "\t e-value ", $hsp->P,
|
|
33 }
|
|
34 print "\n";
|
|
35 }
|
|
36
|
|
37 =head1 DESCRIPTION
|
|
38
|
|
39 This object allows one to build an index on a blast file (or files)
|
|
40 and provide quick access to the blast report for that accession.
|
|
41 Note: for best results 'use strict'.
|
|
42
|
|
43 =head1 FEEDBACK
|
|
44
|
|
45 =head2 Mailing Lists
|
|
46
|
|
47 User feedback is an integral part of the evolution of this and other
|
|
48 Bioperl modules. Send your comments and suggestions preferably to
|
|
49 the Bioperl mailing list. Your participation is much appreciated.
|
|
50
|
|
51 bioperl-l@bioperl.org - General discussion
|
|
52 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
53
|
|
54 =head2 Reporting Bugs
|
|
55
|
|
56 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
57 of the bugs and their resolution. Bug reports can be submitted via
|
|
58 email or the web:
|
|
59
|
|
60 bioperl-bugs@bioperl.org
|
|
61 http://bugzilla.bioperl.org/
|
|
62
|
|
63 =head1 AUTHOR - Jason Stajich
|
|
64
|
|
65 Email jason@cgt.mc.duke.edu
|
|
66
|
|
67 Describe contact details here
|
|
68
|
|
69 =head1 APPENDIX
|
|
70
|
|
71 The rest of the documentation details each of the object methods.
|
|
72 Internal methods are usually preceded with a _
|
|
73
|
|
74 =cut
|
|
75
|
|
76
|
|
77 # Let the code begin...
|
|
78
|
|
79
|
|
80 package Bio::Index::Blast;
|
|
81 use vars qw(@ISA $VERSION);
|
|
82 use strict;
|
|
83
|
|
84 use Bio::Root::Root;
|
|
85 use Bio::Index::Abstract;
|
|
86 use Bio::Tools::BPlite;
|
|
87 use IO::String;
|
|
88
|
|
89 @ISA = qw(Bio::Index::Abstract Bio::Root::Root );
|
|
90
|
|
91 BEGIN {
|
|
92 $VERSION = 0.1;
|
|
93 }
|
|
94
|
|
95 sub _version {
|
|
96 return $VERSION;
|
|
97 }
|
|
98
|
|
99 =head2 new
|
|
100
|
|
101 Usage : $index = Bio::Index::Abstract->new(
|
|
102 -filename => $dbm_file,
|
|
103 -write_flag => 0,
|
|
104 -dbm_package => 'DB_File',
|
|
105 -verbose => 0);
|
|
106 Function: Returns a new index object. If filename is
|
|
107 specified, then open_dbm() is immediately called.
|
|
108 Bio::Index::Abstract->new() will usually be called
|
|
109 directly only when opening an existing index.
|
|
110 Returns : A new index object
|
|
111 Args : -filename The name of the dbm index file.
|
|
112 -write_flag TRUE if write access to the dbm file is
|
|
113 needed.
|
|
114 -dbm_package The Perl dbm module to use for the
|
|
115 index.
|
|
116 -verbose Print debugging output to STDERR if
|
|
117 TRUE.
|
|
118
|
|
119 =cut
|
|
120
|
|
121 sub new {
|
|
122
|
|
123 my($class,@args) = @_;
|
|
124
|
|
125 my $self = $class->SUPER::new(@args);
|
|
126
|
|
127 }
|
|
128
|
|
129 =head2 Bio::Index::Blast implemented methods
|
|
130
|
|
131 =cut
|
|
132
|
|
133 =head2 fetch_report
|
|
134
|
|
135 Title : fetch_report
|
|
136 Usage : my $blastreport = $idx->fetch_report($id);
|
|
137 Function: Returns a Bio::Tools::BPlite report object
|
|
138 for a specific blast report
|
|
139 Returns : Bio::Tools::BPlite
|
|
140 Args : valid id
|
|
141
|
|
142 =cut
|
|
143
|
|
144 sub fetch_report{
|
|
145 my ($self,$id) = @_;
|
|
146 my $fh = $self->get_stream($id);
|
|
147 my $report = new Bio::Tools::BPlite(-fh => $fh,
|
|
148 -noclose => 1);
|
|
149 return $report;
|
|
150 }
|
|
151
|
|
152
|
|
153 # shamlessly stolen from Bio::Index::Fasta
|
|
154
|
|
155 =head2 id_parser
|
|
156
|
|
157 Title : id_parser
|
|
158 Usage : $index->id_parser( CODE )
|
|
159 Function: Stores or returns the code used by record_id to
|
|
160 parse the ID for record from a string. Useful
|
|
161 for (for instance) specifying a different
|
|
162 parser for different flavours of blast dbs.
|
|
163 Returns \&default_id_parser (see below) if not
|
|
164 set. If you supply your own id_parser
|
|
165 subroutine, then it should expect a fasta
|
|
166 description line. An entry will be added to
|
|
167 the index for each string in the list returned.
|
|
168 Example : $index->id_parser( \&my_id_parser )
|
|
169 Returns : ref to CODE if called without arguments
|
|
170 Args : CODE
|
|
171
|
|
172 =cut
|
|
173
|
|
174 sub id_parser {
|
|
175 my( $self, $code ) = @_;
|
|
176
|
|
177 if ($code) {
|
|
178 $self->{'_id_parser'} = $code;
|
|
179 }
|
|
180 return $self->{'_id_parser'} || \&default_id_parser;
|
|
181 }
|
|
182
|
|
183
|
|
184
|
|
185 =head2 default_id_parser
|
|
186
|
|
187 Title : default_id_parser
|
|
188 Usage : $id = default_id_parser( $header )
|
|
189 Function: The default Blast Query ID parser for Bio::Index::Blast.pm
|
|
190 Returns $1 from applying the regexp /^>\s*(\S+)/
|
|
191 to $header.
|
|
192 Returns : ID string
|
|
193 Args : a header line string
|
|
194
|
|
195 =cut
|
|
196
|
|
197 sub default_id_parser {
|
|
198 if ($_[0] =~ /^\s*(\S+)/) {
|
|
199 return $1;
|
|
200 } else {
|
|
201 return;
|
|
202 }
|
|
203 }
|
|
204
|
|
205 =head2 Require methods from Bio::Index::Abstract
|
|
206
|
|
207 =cut
|
|
208
|
|
209 =head2 _index_file
|
|
210
|
|
211 Title : _index_file
|
|
212 Usage : $index->_index_file( $file_name, $i )
|
|
213 Function: Specialist function to index BLAST report file(s).
|
|
214 Is provided with a filename and an integer
|
|
215 by make_index in its SUPER class.
|
|
216 Example :
|
|
217 Returns :
|
|
218 Args :
|
|
219
|
|
220 =cut
|
|
221
|
|
222 sub _index_file {
|
|
223 my( $self,
|
|
224 $file, # File name
|
|
225 $i, # Index-number of file being indexed
|
|
226 ) = @_;
|
|
227
|
|
228 my( $begin, # Offset from start of file of the start
|
|
229 # of the last found record.
|
|
230 );
|
|
231
|
|
232 open(BLAST, "<$file") or die("cannot open file $file\n");
|
|
233
|
|
234 my (@data, @records);
|
|
235 my $indexpoint = 0;
|
|
236 my $lastline = 0;
|
|
237
|
|
238 while(<BLAST> ) {
|
|
239 if( /(T)?BLAST[PNX]/ ) {
|
|
240 if( @data ) {
|
|
241 # if we have already read a report
|
|
242 # then store the data for this report
|
|
243 # in the CURRENT index
|
|
244 $self->_process_report($indexpoint, $i,join("",@data));
|
|
245
|
|
246 } # handle fencepost problem (beginning)
|
|
247 # by skipping here when empty
|
|
248
|
|
249 # since we are at the beginning of a new report
|
|
250 # store this begin location for the next index
|
|
251 $indexpoint = $lastline;
|
|
252 @data = ();
|
|
253 }
|
|
254 push @data, $_;
|
|
255 $lastline = tell(BLAST);
|
|
256 }
|
|
257 # handle fencepost problem (end)
|
|
258 if( @data ) {
|
|
259 $self->_process_report($indexpoint,$i,join("",@data));
|
|
260 }
|
|
261 }
|
|
262
|
|
263 sub _process_report {
|
|
264 my ($self,$begin,$i,$data) = @_;
|
|
265
|
|
266 if( ! $data ) {
|
|
267 $self->warn("calling _process_report without a valid data string");
|
|
268 return ;
|
|
269 }
|
|
270 my $id_parser = $self->id_parser;
|
|
271
|
|
272 my $datal = new IO::String($data);
|
|
273 my $report = new Bio::Tools::BPlite(-fh => $datal,
|
|
274 -noclose => 1);
|
|
275
|
|
276 my $query = $report->query;
|
|
277 foreach my $id (&$id_parser($query)) {
|
|
278 print "id is $id, begin is $begin\n" if( $self->verbose > 0);
|
|
279 $self->add_record($id, $i, $begin);
|
|
280 }
|
|
281 }
|
|
282 =head2 Bio::Index::Abstract methods
|
|
283
|
|
284 =head2 filename
|
|
285
|
|
286 Title : filename
|
|
287 Usage : $value = $self->filename();
|
|
288 $self->filename($value);
|
|
289 Function: Gets or sets the name of the dbm index file.
|
|
290 Returns : The current value of filename
|
|
291 Args : Value of filename if setting, or none if
|
|
292 getting the value.
|
|
293
|
|
294 =head2 write_flag
|
|
295
|
|
296 Title : write_flag
|
|
297 Usage : $value = $self->write_flag();
|
|
298 $self->write_flag($value);
|
|
299 Function: Gets or sets the value of write_flag, which
|
|
300 is wether the dbm file should be opened with
|
|
301 write access.
|
|
302 Returns : The current value of write_flag (default 0)
|
|
303 Args : Value of write_flag if setting, or none if
|
|
304 getting the value.
|
|
305
|
|
306 =head2 dbm_package
|
|
307
|
|
308 Usage : $value = $self->dbm_package();
|
|
309 $self->dbm_package($value);
|
|
310
|
|
311 Function: Gets or sets the name of the Perl dbm module used.
|
|
312 If the value is unset, then it returns the value of
|
|
313 the package variable $USE_DBM_TYPE or if that is
|
|
314 unset, then it chooses the best available dbm type,
|
|
315 choosing 'DB_File' in preference to 'SDBM_File'.
|
|
316 Bio::Abstract::Index may work with other dbm file
|
|
317 types.
|
|
318
|
|
319 Returns : The current value of dbm_package
|
|
320 Args : Value of dbm_package if setting, or none if
|
|
321 getting the value.
|
|
322
|
|
323
|
|
324 =head2 get_stream
|
|
325
|
|
326 Title : get_stream
|
|
327 Usage : $stream = $index->get_stream( $id );
|
|
328 Function: Returns a file handle with the file pointer
|
|
329 at the approprite place
|
|
330
|
|
331 This provides for a way to get the actual
|
|
332 file contents and not an object
|
|
333
|
|
334 WARNING: you must parse the record deliminter
|
|
335 *yourself*. Abstract wont do this for you
|
|
336 So this code
|
|
337
|
|
338 $fh = $index->get_stream($myid);
|
|
339 while( <$fh> ) {
|
|
340 # do something
|
|
341 }
|
|
342 will parse the entire file if you don't put in
|
|
343 a last statement in, like
|
|
344
|
|
345 while( <$fh> ) {
|
|
346 /^\/\// && last; # end of record
|
|
347 # do something
|
|
348 }
|
|
349
|
|
350 Returns : A filehandle object
|
|
351 Args : string represents the accession number
|
|
352 Notes : This method should not be used without forethought
|
|
353
|
|
354
|
|
355 =head2 open_dbm
|
|
356
|
|
357 Usage : $index->open_dbm()
|
|
358 Function: Opens the dbm file associated with the index
|
|
359 object. Write access is only given if explicitly
|
|
360 asked for by calling new(-write => 1) or having set
|
|
361 the write_flag(1) on the index object. The type of
|
|
362 dbm file opened is that returned by dbm_package().
|
|
363 The name of the file to be is opened is obtained by
|
|
364 calling the filename() method.
|
|
365
|
|
366 Example : $index->_open_dbm()
|
|
367 Returns : 1 on success
|
|
368
|
|
369
|
|
370 =head2 _version
|
|
371
|
|
372 Title : _version
|
|
373 Usage : $type = $index->_version()
|
|
374 Function: Returns a string which identifes the version of an
|
|
375 index module. Used to permanently identify an index
|
|
376 file as having been created by a particular version
|
|
377 of the index module. Must be provided by the sub class
|
|
378 Example :
|
|
379 Returns :
|
|
380 Args : none
|
|
381
|
|
382 =head2 _filename
|
|
383
|
|
384 Title : _filename
|
|
385 Usage : $index->_filename( FILE INT )
|
|
386 Function: Indexes the file
|
|
387 Example :
|
|
388 Returns :
|
|
389 Args :
|
|
390
|
|
391 =head2 _file_handle
|
|
392
|
|
393 Title : _file_handle
|
|
394 Usage : $fh = $index->_file_handle( INT )
|
|
395 Function: Returns an open filehandle for the file
|
|
396 index INT. On opening a new filehandle it
|
|
397 caches it in the @{$index->_filehandle} array.
|
|
398 If the requested filehandle is already open,
|
|
399 it simply returns it from the array.
|
|
400 Example : $fist_file_indexed = $index->_file_handle( 0 );
|
|
401 Returns : ref to a filehandle
|
|
402 Args : INT
|
|
403
|
|
404 =head2 _file_count
|
|
405
|
|
406 Title : _file_count
|
|
407 Usage : $index->_file_count( INT )
|
|
408 Function: Used by the index building sub in a sub class to
|
|
409 track the number of files indexed. Sets or gets
|
|
410 the number of files indexed when called with or
|
|
411 without an argument.
|
|
412 Example :
|
|
413 Returns : INT
|
|
414 Args : INT
|
|
415
|
|
416
|
|
417 =head2 add_record
|
|
418
|
|
419 Title : add_record
|
|
420 Usage : $index->add_record( $id, @stuff );
|
|
421 Function: Calls pack_record on @stuff, and adds the result
|
|
422 of pack_record to the index database under key $id.
|
|
423 If $id is a reference to an array, then a new entry
|
|
424 is added under a key corresponding to each element
|
|
425 of the array.
|
|
426 Example : $index->add_record( $id, $fileNumber, $begin, $end )
|
|
427 Returns : TRUE on success or FALSE on failure
|
|
428 Args : ID LIST
|
|
429
|
|
430 =head2 pack_record
|
|
431
|
|
432 Title : pack_record
|
|
433 Usage : $packed_string = $index->pack_record( LIST )
|
|
434 Function: Packs an array of scalars into a single string
|
|
435 joined by ASCII 034 (which is unlikely to be used
|
|
436 in any of the strings), and returns it.
|
|
437 Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
|
|
438 Returns : STRING or undef
|
|
439 Args : LIST
|
|
440
|
|
441 =head2 unpack_record
|
|
442
|
|
443 Title : unpack_record
|
|
444 Usage : $index->unpack_record( STRING )
|
|
445 Function: Splits the sting provided into an array,
|
|
446 splitting on ASCII 034.
|
|
447 Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
|
|
448 Returns : A 3 element ARRAY
|
|
449 Args : STRING containing ASCII 034
|
|
450
|
|
451 =head2 DESTROY
|
|
452
|
|
453 Title : DESTROY
|
|
454 Usage : Called automatically when index goes out of scope
|
|
455 Function: Closes connection to database and handles to
|
|
456 sequence files
|
|
457 Returns : NEVER
|
|
458 Args : NONE
|
|
459
|
|
460
|
|
461 =cut
|
|
462
|
|
463
|
|
464 1;
|
|
465
|
|
466
|
|
467
|
|
468 1;
|