0
|
1 # $Id: SearchResultEventBuilder.pm,v 1.25.2.1 2003/01/17 20:32:54 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SearchIO::SearchResultEventBuilder
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SearchIO::SearchResultEventBuilder - Event Handler for SearchIO events.
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # Do not use this object directly, this object is part of the SearchIO
|
|
20 # event based parsing system.
|
|
21
|
|
22 =head1 DESCRIPTION
|
|
23
|
|
24 This object handles Search Events generated by the SearchIO classes
|
|
25 and build appropriate Bio::Search::* objects from them.
|
|
26
|
|
27 =head1 FEEDBACK
|
|
28
|
|
29 =head2 Mailing Lists
|
|
30
|
|
31 User feedback is an integral part of the evolution of this and other
|
|
32 Bioperl modules. Send your comments and suggestions preferably to
|
|
33 the Bioperl mailing list. Your participation is much appreciated.
|
|
34
|
|
35 bioperl-l@bioperl.org - General discussion
|
|
36 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
37
|
|
38 =head2 Reporting Bugs
|
|
39
|
|
40 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
41 of the bugs and their resolution. Bug reports can be submitted via
|
|
42 email or the web:
|
|
43
|
|
44 bioperl-bugs@bioperl.org
|
|
45 http://bugzilla.bioperl.org/
|
|
46
|
|
47 =head1 AUTHOR - Jason Stajich
|
|
48
|
|
49 Email jason@bioperl.org
|
|
50
|
|
51 Describe contact details here
|
|
52
|
|
53 =head1 CONTRIBUTORS
|
|
54
|
|
55 Additional contributors names and emails here
|
|
56
|
|
57 =head1 APPENDIX
|
|
58
|
|
59 The rest of the documentation details each of the object methods.
|
|
60 Internal methods are usually preceded with a _
|
|
61
|
|
62 =cut
|
|
63
|
|
64
|
|
65 # Let the code begin...
|
|
66
|
|
67
|
|
68 package Bio::SearchIO::SearchResultEventBuilder;
|
|
69 use vars qw(@ISA %KNOWNEVENTS);
|
|
70 use strict;
|
|
71
|
|
72 use Bio::Root::Root;
|
|
73 use Bio::SearchIO::EventHandlerI;
|
|
74 use Bio::Search::HSP::HSPFactory;
|
|
75 use Bio::Search::Hit::HitFactory;
|
|
76 use Bio::Search::Result::ResultFactory;
|
|
77
|
|
78 @ISA = qw(Bio::Root::Root Bio::SearchIO::EventHandlerI);
|
|
79
|
|
80 =head2 new
|
|
81
|
|
82 Title : new
|
|
83 Usage : my $obj = new Bio::SearchIO::SearchResultEventBuilder();
|
|
84 Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object
|
|
85 Returns : Bio::SearchIO::SearchResultEventBuilder
|
|
86 Args : -hsp_factory => Bio::Factory::ObjectFactoryI
|
|
87 -hit_factory => Bio::Factory::ObjectFactoryI
|
|
88 -result_factory => Bio::Factory::ObjectFactoryI
|
|
89
|
|
90 See L<Bio::Factory::ObjectFactoryI> for more information
|
|
91
|
|
92 =cut
|
|
93
|
|
94 sub new {
|
|
95 my ($class,@args) = @_;
|
|
96 my $self = $class->SUPER::new(@args);
|
|
97 my ($hspF,$hitF,$resultF) = $self->_rearrange([qw(HSP_FACTORY
|
|
98 HIT_FACTORY
|
|
99 RESULT_FACTORY)],@args);
|
|
100 $self->register_factory('hsp', $hspF || Bio::Search::HSP::HSPFactory->new());
|
|
101 $self->register_factory('hit', $hitF || Bio::Search::Hit::HitFactory->new());
|
|
102 $self->register_factory('result', $resultF || Bio::Search::Result::ResultFactory->new());
|
|
103
|
|
104 return $self;
|
|
105 }
|
|
106
|
|
107 # new comes from the superclass
|
|
108
|
|
109 =head2 will_handle
|
|
110
|
|
111 Title : will_handle
|
|
112 Usage : if( $handler->will_handle($event_type) ) { ... }
|
|
113 Function: Tests if this event builder knows how to process a specific event
|
|
114 Returns : boolean
|
|
115 Args : event type name
|
|
116
|
|
117
|
|
118 =cut
|
|
119
|
|
120 sub will_handle{
|
|
121 my ($self,$type) = @_;
|
|
122 # these are the events we recognize
|
|
123 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' );
|
|
124 }
|
|
125
|
|
126 =head2 SAX methods
|
|
127
|
|
128 =cut
|
|
129
|
|
130 =head2 start_result
|
|
131
|
|
132 Title : start_result
|
|
133 Usage : $handler->start_result($resulttype)
|
|
134 Function: Begins a result event cycle
|
|
135 Returns : none
|
|
136 Args : Type of Report
|
|
137
|
|
138 =cut
|
|
139
|
|
140 sub start_result {
|
|
141 my ($self,$type) = @_;
|
|
142 $self->{'_resulttype'} = $type;
|
|
143 $self->{'_hits'} = [];
|
|
144 $self->{'_hsps'} = [];
|
|
145 return;
|
|
146 }
|
|
147
|
|
148 =head2 end_result
|
|
149
|
|
150 Title : end_result
|
|
151 Usage : my @results = $parser->end_result
|
|
152 Function: Finishes a result handler cycle
|
|
153 Returns : A Bio::Search::Result::ResultI
|
|
154 Args : none
|
|
155
|
|
156 =cut
|
|
157
|
|
158 sub end_result {
|
|
159 my ($self,$type,$data) = @_;
|
|
160 if( defined $data->{'runid'} &&
|
|
161 $data->{'runid'} !~ /^\s+$/ ) {
|
|
162
|
|
163 if( $data->{'runid'} !~ /^lcl\|/) {
|
|
164 $data->{"RESULT-query_name"}= $data->{'runid'};
|
|
165 } else {
|
|
166 ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) = split(/\s+/,$data->{"RESULT-query_description"},2);
|
|
167 }
|
|
168
|
|
169 if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
|
|
170 my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
|
|
171 # this is for |123|gb|ABC1.1|
|
|
172 $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
|
|
173 $data->{"RESULT-query_accession"}= $acc;
|
|
174 }
|
|
175 delete $data->{'runid'};
|
|
176 }
|
|
177 my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); }
|
|
178 grep { /^RESULT/ } keys %{$data};
|
|
179
|
|
180 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
|
|
181 $data->{'RESULT-algorithm_name'} || $type);
|
|
182 $args{'-hits'} = $self->{'_hits'};
|
|
183 my $result = $self->factory('result')->create(%args);
|
|
184 $self->{'_hits'} = [];
|
|
185 return $result;
|
|
186 }
|
|
187
|
|
188 =head2 start_hsp
|
|
189
|
|
190 Title : start_hsp
|
|
191 Usage : $handler->start_hsp($name,$data)
|
|
192 Function: Begins processing a HSP event
|
|
193 Returns : none
|
|
194 Args : type of element
|
|
195 associated data (hashref)
|
|
196
|
|
197 =cut
|
|
198
|
|
199 sub start_hsp {
|
|
200 my ($self,@args) = @_;
|
|
201 return;
|
|
202 }
|
|
203
|
|
204 =head2 end_hsp
|
|
205
|
|
206 Title : end_hsp
|
|
207 Usage : $handler->end_hsp()
|
|
208 Function: Finish processing a HSP event
|
|
209 Returns : none
|
|
210 Args : type of event and associated hashref
|
|
211
|
|
212
|
|
213 =cut
|
|
214
|
|
215 sub end_hsp {
|
|
216 my ($self,$type,$data) = @_;
|
|
217 # this code is to deal with the fact that Blast XML data
|
|
218 # always has start < end and one has to infer strandedness
|
|
219 # from the frame which is a problem for the Search::HSP object
|
|
220 # which expect to be able to infer strand from the order of
|
|
221 # of the begin/end of the query and hit coordinates
|
|
222 if( defined $data->{'HSP-query_frame'} && # this is here to protect from undefs
|
|
223 (( $data->{'HSP-query_frame'} < 0 &&
|
|
224 $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) ||
|
|
225 $data->{'HSP-query_frame'} > 0 &&
|
|
226 ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) )
|
|
227 )
|
|
228 {
|
|
229 # swap
|
|
230 ($data->{'HSP-query_start'},
|
|
231 $data->{'HSP-query_end'}) = ($data->{'HSP-query_end'},
|
|
232 $data->{'HSP-query_start'});
|
|
233 }
|
|
234 if( defined $data->{'HSP-hit_frame'} && # this is here to protect from undefs
|
|
235 ((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 &&
|
|
236 $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) ||
|
|
237 defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 &&
|
|
238 ( $data->{'HSP-hit_start'} > $data->{'HSP-hit_end'} ) )
|
|
239 )
|
|
240 {
|
|
241 # swap
|
|
242 ($data->{'HSP-hit_start'},
|
|
243 $data->{'HSP-hit_end'}) = ($data->{'HSP-hit_end'},
|
|
244 $data->{'HSP-hit_start'});
|
|
245 }
|
|
246 $data->{'HSP-query_frame'} ||= 0;
|
|
247 $data->{'HSP-hit_frame'} ||= 0;
|
|
248 # handle Blast 2.1.2 which did not support data member: hsp_align-len
|
|
249 $data->{'HSP-query_length'} ||= length ($data->{'HSP-query_seq'} || '');
|
|
250 $data->{'HSP-hit_length'} ||= length ($data->{'HSP-hit_seq'} || '');
|
|
251 $data->{'HSP-hsp_length'} ||= length ($data->{'HSP-homology_seq'} || '');
|
|
252
|
|
253 my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) }
|
|
254 grep { /^HSP/ } keys %{$data};
|
|
255
|
|
256 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
|
|
257 $data->{'RESULT-algorithm_name'} || $type);
|
|
258 # copy this over from result
|
|
259 $args{'-query_name'} = $data->{'RESULT-query_name'};
|
|
260 $args{'-hit_name'} = $data->{'HIT-name'};
|
|
261 my ($rank) = scalar @{$self->{'_hsps'}} + 1;
|
|
262 $args{'-rank'} = $rank;
|
|
263
|
|
264 my $hsp = $self->factory('hsp')->create(%args);
|
|
265 push @{$self->{'_hsps'}}, $hsp;
|
|
266 return $hsp;
|
|
267 }
|
|
268
|
|
269
|
|
270 =head2 start_hit
|
|
271
|
|
272 Title : start_hit
|
|
273 Usage : $handler->start_hit()
|
|
274 Function: Starts a Hit event cycle
|
|
275 Returns : none
|
|
276 Args : type of event and associated hashref
|
|
277
|
|
278
|
|
279 =cut
|
|
280
|
|
281 sub start_hit{
|
|
282 my ($self,$type) = @_;
|
|
283 $self->{'_hsps'} = [];
|
|
284 return;
|
|
285 }
|
|
286
|
|
287
|
|
288 =head2 end_hit
|
|
289
|
|
290 Title : end_hit
|
|
291 Usage : $handler->end_hit()
|
|
292 Function: Ends a Hit event cycle
|
|
293 Returns : Bio::Search::Hit::HitI object
|
|
294 Args : type of event and associated hashref
|
|
295
|
|
296
|
|
297 =cut
|
|
298
|
|
299 sub end_hit{
|
|
300 my ($self,$type,$data) = @_;
|
|
301 my %args = map { my $v = $data->{$_}; s/HIT//; ($_ => $v); } grep { /^HIT/ } keys %{$data};
|
|
302
|
|
303 # I hate special cases, but this is here because NCBI BLAST XML
|
|
304 # doesn't play nice and is undergoing mutation -jason
|
|
305 if( $args{'-name'} =~ /BL_ORD_ID/ ) {
|
|
306 ($args{'-name'}, $args{'-description'}) = split(/\s+/,$args{'-description'},2);
|
|
307 }
|
|
308 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
|
|
309 $data->{'RESULT-algorithm_name'} || $type);
|
|
310 $args{'-hsps'} = $self->{'_hsps'};
|
|
311 $args{'-query_len'} = $data->{'RESULT-query_length'};
|
|
312 my ($hitrank) = scalar @{$self->{'_hits'}} + 1;
|
|
313 $args{'-rank'} = $hitrank;
|
|
314 my $hit = $self->factory('hit')->create(%args);
|
|
315 push @{$self->{'_hits'}}, $hit;
|
|
316 $self->{'_hsps'} = [];
|
|
317 return $hit;
|
|
318 }
|
|
319
|
|
320 =head2 Factory methods
|
|
321
|
|
322 =cut
|
|
323
|
|
324 =head2 register_factory
|
|
325
|
|
326 Title : register_factory
|
|
327 Usage : $handler->register_factory('TYPE',$factory);
|
|
328 Function: Register a specific factory for a object type class
|
|
329 Returns : none
|
|
330 Args : string representing the class and
|
|
331 Bio::Factory::ObjectFactoryI
|
|
332
|
|
333 See L<Bio::Factory::ObjectFactoryI> for more information
|
|
334
|
|
335 =cut
|
|
336
|
|
337 sub register_factory{
|
|
338 my ($self, $type,$f) = @_;
|
|
339 if( ! defined $f || ! ref($f) ||
|
|
340 ! $f->isa('Bio::Factory::ObjectFactoryI') ) {
|
|
341 $self->throw("Cannot set factory to value $f".ref($f)."\n");
|
|
342 }
|
|
343 $self->{'_factories'}->{lc($type)} = $f;
|
|
344 }
|
|
345
|
|
346
|
|
347 =head2 factory
|
|
348
|
|
349 Title : factory
|
|
350 Usage : my $f = $handler->factory('TYPE');
|
|
351 Function: Retrieves the associated factory for requested 'TYPE'
|
|
352 Returns : a Bio::Factory::ObjectFactoryI or undef if none registered
|
|
353 Args : name of factory class to retrieve
|
|
354
|
|
355 See L<Bio::Factory::ObjectFactoryI> for more information
|
|
356
|
|
357 =cut
|
|
358
|
|
359 sub factory{
|
|
360 my ($self,$type) = @_;
|
|
361 return $self->{'_factories'}->{lc($type)} || $self->throw("No factory registered for $type");
|
|
362 }
|
|
363
|
|
364
|
|
365 1;
|