Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/SearchResultEventBuilder.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: SearchResultEventBuilder.pm,v 1.25.2.1 2003/01/17 20:32:54 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::SearchIO::SearchResultEventBuilder | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::SearchIO::SearchResultEventBuilder - Event Handler for SearchIO events. | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # Do not use this object directly, this object is part of the SearchIO | |
20 # event based parsing system. | |
21 | |
22 =head1 DESCRIPTION | |
23 | |
24 This object handles Search Events generated by the SearchIO classes | |
25 and build appropriate Bio::Search::* objects from them. | |
26 | |
27 =head1 FEEDBACK | |
28 | |
29 =head2 Mailing Lists | |
30 | |
31 User feedback is an integral part of the evolution of this and other | |
32 Bioperl modules. Send your comments and suggestions preferably to | |
33 the Bioperl mailing list. Your participation is much appreciated. | |
34 | |
35 bioperl-l@bioperl.org - General discussion | |
36 http://bioperl.org/MailList.shtml - About the mailing lists | |
37 | |
38 =head2 Reporting Bugs | |
39 | |
40 Report bugs to the Bioperl bug tracking system to help us keep track | |
41 of the bugs and their resolution. Bug reports can be submitted via | |
42 email or the web: | |
43 | |
44 bioperl-bugs@bioperl.org | |
45 http://bugzilla.bioperl.org/ | |
46 | |
47 =head1 AUTHOR - Jason Stajich | |
48 | |
49 Email jason@bioperl.org | |
50 | |
51 Describe contact details here | |
52 | |
53 =head1 CONTRIBUTORS | |
54 | |
55 Additional contributors names and emails here | |
56 | |
57 =head1 APPENDIX | |
58 | |
59 The rest of the documentation details each of the object methods. | |
60 Internal methods are usually preceded with a _ | |
61 | |
62 =cut | |
63 | |
64 | |
65 # Let the code begin... | |
66 | |
67 | |
68 package Bio::SearchIO::SearchResultEventBuilder; | |
69 use vars qw(@ISA %KNOWNEVENTS); | |
70 use strict; | |
71 | |
72 use Bio::Root::Root; | |
73 use Bio::SearchIO::EventHandlerI; | |
74 use Bio::Search::HSP::HSPFactory; | |
75 use Bio::Search::Hit::HitFactory; | |
76 use Bio::Search::Result::ResultFactory; | |
77 | |
78 @ISA = qw(Bio::Root::Root Bio::SearchIO::EventHandlerI); | |
79 | |
80 =head2 new | |
81 | |
82 Title : new | |
83 Usage : my $obj = new Bio::SearchIO::SearchResultEventBuilder(); | |
84 Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object | |
85 Returns : Bio::SearchIO::SearchResultEventBuilder | |
86 Args : -hsp_factory => Bio::Factory::ObjectFactoryI | |
87 -hit_factory => Bio::Factory::ObjectFactoryI | |
88 -result_factory => Bio::Factory::ObjectFactoryI | |
89 | |
90 See L<Bio::Factory::ObjectFactoryI> for more information | |
91 | |
92 =cut | |
93 | |
94 sub new { | |
95 my ($class,@args) = @_; | |
96 my $self = $class->SUPER::new(@args); | |
97 my ($hspF,$hitF,$resultF) = $self->_rearrange([qw(HSP_FACTORY | |
98 HIT_FACTORY | |
99 RESULT_FACTORY)],@args); | |
100 $self->register_factory('hsp', $hspF || Bio::Search::HSP::HSPFactory->new()); | |
101 $self->register_factory('hit', $hitF || Bio::Search::Hit::HitFactory->new()); | |
102 $self->register_factory('result', $resultF || Bio::Search::Result::ResultFactory->new()); | |
103 | |
104 return $self; | |
105 } | |
106 | |
107 # new comes from the superclass | |
108 | |
109 =head2 will_handle | |
110 | |
111 Title : will_handle | |
112 Usage : if( $handler->will_handle($event_type) ) { ... } | |
113 Function: Tests if this event builder knows how to process a specific event | |
114 Returns : boolean | |
115 Args : event type name | |
116 | |
117 | |
118 =cut | |
119 | |
120 sub will_handle{ | |
121 my ($self,$type) = @_; | |
122 # these are the events we recognize | |
123 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' ); | |
124 } | |
125 | |
126 =head2 SAX methods | |
127 | |
128 =cut | |
129 | |
130 =head2 start_result | |
131 | |
132 Title : start_result | |
133 Usage : $handler->start_result($resulttype) | |
134 Function: Begins a result event cycle | |
135 Returns : none | |
136 Args : Type of Report | |
137 | |
138 =cut | |
139 | |
140 sub start_result { | |
141 my ($self,$type) = @_; | |
142 $self->{'_resulttype'} = $type; | |
143 $self->{'_hits'} = []; | |
144 $self->{'_hsps'} = []; | |
145 return; | |
146 } | |
147 | |
148 =head2 end_result | |
149 | |
150 Title : end_result | |
151 Usage : my @results = $parser->end_result | |
152 Function: Finishes a result handler cycle | |
153 Returns : A Bio::Search::Result::ResultI | |
154 Args : none | |
155 | |
156 =cut | |
157 | |
158 sub end_result { | |
159 my ($self,$type,$data) = @_; | |
160 if( defined $data->{'runid'} && | |
161 $data->{'runid'} !~ /^\s+$/ ) { | |
162 | |
163 if( $data->{'runid'} !~ /^lcl\|/) { | |
164 $data->{"RESULT-query_name"}= $data->{'runid'}; | |
165 } else { | |
166 ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) = split(/\s+/,$data->{"RESULT-query_description"},2); | |
167 } | |
168 | |
169 if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) { | |
170 my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1 | |
171 # this is for |123|gb|ABC1.1| | |
172 $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/); | |
173 $data->{"RESULT-query_accession"}= $acc; | |
174 } | |
175 delete $data->{'runid'}; | |
176 } | |
177 my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } | |
178 grep { /^RESULT/ } keys %{$data}; | |
179 | |
180 $args{'-algorithm'} = uc( $args{'-algorithm_name'} || | |
181 $data->{'RESULT-algorithm_name'} || $type); | |
182 $args{'-hits'} = $self->{'_hits'}; | |
183 my $result = $self->factory('result')->create(%args); | |
184 $self->{'_hits'} = []; | |
185 return $result; | |
186 } | |
187 | |
188 =head2 start_hsp | |
189 | |
190 Title : start_hsp | |
191 Usage : $handler->start_hsp($name,$data) | |
192 Function: Begins processing a HSP event | |
193 Returns : none | |
194 Args : type of element | |
195 associated data (hashref) | |
196 | |
197 =cut | |
198 | |
199 sub start_hsp { | |
200 my ($self,@args) = @_; | |
201 return; | |
202 } | |
203 | |
204 =head2 end_hsp | |
205 | |
206 Title : end_hsp | |
207 Usage : $handler->end_hsp() | |
208 Function: Finish processing a HSP event | |
209 Returns : none | |
210 Args : type of event and associated hashref | |
211 | |
212 | |
213 =cut | |
214 | |
215 sub end_hsp { | |
216 my ($self,$type,$data) = @_; | |
217 # this code is to deal with the fact that Blast XML data | |
218 # always has start < end and one has to infer strandedness | |
219 # from the frame which is a problem for the Search::HSP object | |
220 # which expect to be able to infer strand from the order of | |
221 # of the begin/end of the query and hit coordinates | |
222 if( defined $data->{'HSP-query_frame'} && # this is here to protect from undefs | |
223 (( $data->{'HSP-query_frame'} < 0 && | |
224 $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) || | |
225 $data->{'HSP-query_frame'} > 0 && | |
226 ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) ) | |
227 ) | |
228 { | |
229 # swap | |
230 ($data->{'HSP-query_start'}, | |
231 $data->{'HSP-query_end'}) = ($data->{'HSP-query_end'}, | |
232 $data->{'HSP-query_start'}); | |
233 } | |
234 if( defined $data->{'HSP-hit_frame'} && # this is here to protect from undefs | |
235 ((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 && | |
236 $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) || | |
237 defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 && | |
238 ( $data->{'HSP-hit_start'} > $data->{'HSP-hit_end'} ) ) | |
239 ) | |
240 { | |
241 # swap | |
242 ($data->{'HSP-hit_start'}, | |
243 $data->{'HSP-hit_end'}) = ($data->{'HSP-hit_end'}, | |
244 $data->{'HSP-hit_start'}); | |
245 } | |
246 $data->{'HSP-query_frame'} ||= 0; | |
247 $data->{'HSP-hit_frame'} ||= 0; | |
248 # handle Blast 2.1.2 which did not support data member: hsp_align-len | |
249 $data->{'HSP-query_length'} ||= length ($data->{'HSP-query_seq'} || ''); | |
250 $data->{'HSP-hit_length'} ||= length ($data->{'HSP-hit_seq'} || ''); | |
251 $data->{'HSP-hsp_length'} ||= length ($data->{'HSP-homology_seq'} || ''); | |
252 | |
253 my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) } | |
254 grep { /^HSP/ } keys %{$data}; | |
255 | |
256 $args{'-algorithm'} = uc( $args{'-algorithm_name'} || | |
257 $data->{'RESULT-algorithm_name'} || $type); | |
258 # copy this over from result | |
259 $args{'-query_name'} = $data->{'RESULT-query_name'}; | |
260 $args{'-hit_name'} = $data->{'HIT-name'}; | |
261 my ($rank) = scalar @{$self->{'_hsps'}} + 1; | |
262 $args{'-rank'} = $rank; | |
263 | |
264 my $hsp = $self->factory('hsp')->create(%args); | |
265 push @{$self->{'_hsps'}}, $hsp; | |
266 return $hsp; | |
267 } | |
268 | |
269 | |
270 =head2 start_hit | |
271 | |
272 Title : start_hit | |
273 Usage : $handler->start_hit() | |
274 Function: Starts a Hit event cycle | |
275 Returns : none | |
276 Args : type of event and associated hashref | |
277 | |
278 | |
279 =cut | |
280 | |
281 sub start_hit{ | |
282 my ($self,$type) = @_; | |
283 $self->{'_hsps'} = []; | |
284 return; | |
285 } | |
286 | |
287 | |
288 =head2 end_hit | |
289 | |
290 Title : end_hit | |
291 Usage : $handler->end_hit() | |
292 Function: Ends a Hit event cycle | |
293 Returns : Bio::Search::Hit::HitI object | |
294 Args : type of event and associated hashref | |
295 | |
296 | |
297 =cut | |
298 | |
299 sub end_hit{ | |
300 my ($self,$type,$data) = @_; | |
301 my %args = map { my $v = $data->{$_}; s/HIT//; ($_ => $v); } grep { /^HIT/ } keys %{$data}; | |
302 | |
303 # I hate special cases, but this is here because NCBI BLAST XML | |
304 # doesn't play nice and is undergoing mutation -jason | |
305 if( $args{'-name'} =~ /BL_ORD_ID/ ) { | |
306 ($args{'-name'}, $args{'-description'}) = split(/\s+/,$args{'-description'},2); | |
307 } | |
308 $args{'-algorithm'} = uc( $args{'-algorithm_name'} || | |
309 $data->{'RESULT-algorithm_name'} || $type); | |
310 $args{'-hsps'} = $self->{'_hsps'}; | |
311 $args{'-query_len'} = $data->{'RESULT-query_length'}; | |
312 my ($hitrank) = scalar @{$self->{'_hits'}} + 1; | |
313 $args{'-rank'} = $hitrank; | |
314 my $hit = $self->factory('hit')->create(%args); | |
315 push @{$self->{'_hits'}}, $hit; | |
316 $self->{'_hsps'} = []; | |
317 return $hit; | |
318 } | |
319 | |
320 =head2 Factory methods | |
321 | |
322 =cut | |
323 | |
324 =head2 register_factory | |
325 | |
326 Title : register_factory | |
327 Usage : $handler->register_factory('TYPE',$factory); | |
328 Function: Register a specific factory for a object type class | |
329 Returns : none | |
330 Args : string representing the class and | |
331 Bio::Factory::ObjectFactoryI | |
332 | |
333 See L<Bio::Factory::ObjectFactoryI> for more information | |
334 | |
335 =cut | |
336 | |
337 sub register_factory{ | |
338 my ($self, $type,$f) = @_; | |
339 if( ! defined $f || ! ref($f) || | |
340 ! $f->isa('Bio::Factory::ObjectFactoryI') ) { | |
341 $self->throw("Cannot set factory to value $f".ref($f)."\n"); | |
342 } | |
343 $self->{'_factories'}->{lc($type)} = $f; | |
344 } | |
345 | |
346 | |
347 =head2 factory | |
348 | |
349 Title : factory | |
350 Usage : my $f = $handler->factory('TYPE'); | |
351 Function: Retrieves the associated factory for requested 'TYPE' | |
352 Returns : a Bio::Factory::ObjectFactoryI or undef if none registered | |
353 Args : name of factory class to retrieve | |
354 | |
355 See L<Bio::Factory::ObjectFactoryI> for more information | |
356 | |
357 =cut | |
358 | |
359 sub factory{ | |
360 my ($self,$type) = @_; | |
361 return $self->{'_factories'}->{lc($type)} || $self->throw("No factory registered for $type"); | |
362 } | |
363 | |
364 | |
365 1; |