comparison variant_effect_predictor/Bio/SearchIO/hmmer.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: hmmer.pm,v 1.13.2.3 2003/08/07 15:40:36 jason Exp $
2 #
3 # BioPerl module for Bio::SearchIO::hmmer
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::SearchIO::hmmer - A parser for HMMER output (hmmpfam, hmmsearch)
16
17 =head1 SYNOPSIS
18
19 # do not use this class directly it is available through Bio::SearchIO
20 use Bio::SearchIO;
21 my $in = new Bio::SearchIO(-format => 'hmmer',
22 -file => 't/data/L77119.hmmer');
23 while( my $result = $in->next_result ) {
24 # this is a Bio::Search::Result::HMMERResult object
25 print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
26 while( my $hit = $result->next_hit ) {
27 print $hit->name(), "\n";
28 while( my $hsp = $hit->next_hsp ) {
29 print "length is ", $hsp->length(), "\n";
30 }
31 }
32 }
33
34 =head1 DESCRIPTION
35
36 This object implements a parser for HMMER output.
37
38 =head1 FEEDBACK
39
40 =head2 Mailing Lists
41
42 User feedback is an integral part of the evolution of this and other
43 Bioperl modules. Send your comments and suggestions preferably to
44 the Bioperl mailing list. Your participation is much appreciated.
45
46 bioperl-l@bioperl.org - General discussion
47 http://bioperl.org/MailList.shtml - About the mailing lists
48
49 =head2 Reporting Bugs
50
51 Report bugs to the Bioperl bug tracking system to help us keep track
52 of the bugs and their resolution. Bug reports can be submitted via
53 email or the web:
54
55 bioperl-bugs@bioperl.org
56 http://bugzilla.bioperl.org/
57
58 =head1 AUTHOR - Jason Stajich
59
60 Email jason@bioperl.org
61
62 Describe contact details here
63
64 =head1 CONTRIBUTORS
65
66 Additional contributors names and emails here
67
68 =head1 APPENDIX
69
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
72
73 =cut
74
75 package Bio::SearchIO::hmmer;
76 use vars qw(@ISA);
77 use strict;
78 use vars qw(@ISA %MAPPING %MODEMAP
79 $DEFAULT_HSP_FACTORY_CLASS
80 $DEFAULT_HIT_FACTORY_CLASS
81 $DEFAULT_RESULT_FACTORY_CLASS
82 );
83 use Bio::SearchIO;
84 use Bio::Factory::ObjectFactory;
85
86 @ISA = qw(Bio::SearchIO );
87
88 BEGIN {
89 # mapping of HMMER items to Bioperl hash keys
90 %MODEMAP = ('HMMER_Output' => 'result',
91 'Hit' => 'hit',
92 'Hsp' => 'hsp'
93 );
94
95 %MAPPING = (
96 'Hsp_bit-score' => 'HSP-bits',
97 'Hsp_score' => 'HSP-score',
98 'Hsp_evalue' => 'HSP-evalue',
99 'Hsp_query-from' => 'HSP-query_start',
100 'Hsp_query-to' => 'HSP-query_end',
101 'Hsp_hit-from' => 'HSP-hit_start',
102 'Hsp_hit-to' => 'HSP-hit_end',
103 'Hsp_positive' => 'HSP-conserved',
104 'Hsp_identity' => 'HSP-identical',
105 'Hsp_gaps' => 'HSP-hsp_gaps',
106 'Hsp_hitgaps' => 'HSP-hit_gaps',
107 'Hsp_querygaps' => 'HSP-query_gaps',
108 'Hsp_qseq' => 'HSP-query_seq',
109 'Hsp_hseq' => 'HSP-hit_seq',
110 'Hsp_midline' => 'HSP-homology_seq',
111 'Hsp_align-len' => 'HSP-hsp_length',
112 'Hsp_query-frame'=> 'HSP-query_frame',
113 'Hsp_hit-frame' => 'HSP-hit_frame',
114
115 'Hit_id' => 'HIT-name',
116 'Hit_len' => 'HIT-length',
117 'Hit_accession' => 'HIT-accession',
118 'Hit_desc' => 'HIT-description',
119 'Hit_signif' => 'HIT-significance',
120 'Hit_score' => 'HIT-score',
121
122 'HMMER_program' => 'RESULT-algorithm_name',
123 'HMMER_version' => 'RESULT-algorithm_version',
124 'HMMER_query-def' => 'RESULT-query_name',
125 'HMMER_query-len' => 'RESULT-query_length',
126 'HMMER_query-acc' => 'RESULT-query_accession',
127 'HMMER_querydesc' => 'RESULT-query_description',
128 'HMMER_hmm' => 'RESULT-hmm_name',
129 'HMMER_seqfile' => 'RESULT-sequence_file',
130 );
131 $DEFAULT_HIT_FACTORY_CLASS = 'Bio::Factory::HMMERHitFactory';
132 $DEFAULT_HSP_FACTORY_CLASS = 'Bio::Factory::HMMERHSPFactory';
133 $DEFAULT_RESULT_FACTORY_CLASS = 'Bio::Factory::HMMERResultFactory';
134 }
135
136
137 =head2 new
138
139 Title : new
140 Usage : my $obj = new Bio::SearchIO::hmmer();
141 Function: Builds a new Bio::SearchIO::hmmer object
142 Returns : Bio::SearchIO::hmmer
143 Args : -fh/-file => HMMER filename
144 -format => 'hmmer'
145
146 =cut
147
148 sub _initialize {
149 my ($self,@args) = @_;
150 $self->SUPER::_initialize(@args);
151 my $handler = $self->_eventHandler;
152 # new object initialization code
153 $handler->register_factory('result',
154 Bio::Factory::ObjectFactory->new(
155 -type => 'Bio::Search::Result::HMMERResult',
156 -interface => 'Bio::Search::Result::ResultI'));
157
158 $handler->register_factory('hit',
159 Bio::Factory::ObjectFactory->new(
160 -type => 'Bio::Search::Hit::HMMERHit',
161 -interface => 'Bio::Search::Hit::HitI'));
162
163 $handler->register_factory('hsp',
164 Bio::Factory::ObjectFactory->new(
165 -type => 'Bio::Search::HSP::HMMERHSP',
166 -interface => 'Bio::Search::HSP::HSPI'));
167 $self->{'_hmmidline'} = 'HMMER 2.2g (August 2001)';
168 }
169 =head2 next_result
170
171 Title : next_result
172 Usage : my $hit = $searchio->next_result;
173 Function: Returns the next Result from a search
174 Returns : Bio::Search::Result::ResultI object
175 Args : none
176
177 =cut
178
179 sub next_result{
180 my ($self) = @_;
181 my $seentop = 0;
182 my $reporttype;
183 my ($last,@hitinfo,@hspinfo,%hspinfo,%hitinfo);
184
185 my @alignemnt_lines;
186
187 $self->start_document();
188 while( defined ($_ = $self->_readline )) {
189 my $lineorig = $_;
190 chomp;
191 if( /^HMMER\s+(\S+)\s+\((.+)\)/o ) {
192 my ($prog,$version) = split;
193 if( $seentop ) {
194 $self->_pushback($_);
195 $self->end_element({'Name' => 'HMMER_Output'});
196 return $self->end_document();
197 }
198 $self->{'_hmmidline'} = $_;
199 $self->start_element({'Name' => 'HMMER_Output'});
200 $self->{'_result_count'}++;
201 $seentop = 1;
202 if( defined $last ) {
203 ($reporttype) = split(/\s+/,$last);
204 $self->element({'Name' => 'HMMER_program',
205 'Data' => uc ($reporttype)});
206 }
207 $self->element({'Name' => 'HMMER_version',
208 'Data' => $version});
209 } elsif( s/^HMM file:\s+//o ) {
210 $self->{'_hmmfileline'} = $lineorig;
211 $self->element({'Name' => 'HMMER_hmm',
212 'Data' => $_});
213 } elsif( s/^Sequence\s+(file|database):\s+//o ) {
214 $self->{'_hmmseqline'} = $lineorig;
215 $self->element({'Name' => 'HMMER_seqfile',
216 'Data' => $_});
217 } elsif( s/^Query(\s+(sequence|HMM))?:\s+//o) {
218 if( ! $seentop ) {
219 # we're in a multi-query report
220 $self->_pushback($self->{'_hmmidline'});
221 $self->_pushback($self->{'_hmmfileline'});
222 $self->_pushback($self->{'_hmmseqline'});
223 $self->_pushback($lineorig);
224 next;
225 }
226 s/\s+$//;
227 $self->element({'Name' => 'HMMER_query-def',
228 'Data' => $_});
229 } elsif( s/^Accession:\s+//o ) {
230 s/\s+$//;
231 $self->element({'Name' => 'HMMER_query-acc',
232 'Data' => $_});
233 } elsif( s/^Description:\s+//o ) {
234 s/\s+$//;
235 $self->element({'Name' => 'HMMER_querydesc',
236 'Data' => $_});
237 } elsif( defined $self->{'_reporttype'} &&
238 $self->{'_reporttype'} eq 'HMMSEARCH' ) {
239 # PROCESS HMMSEARCH RESULTS HERE
240 if( /^Scores for complete sequences/o ) {
241 while( defined($_ = $self->_readline) ) {
242 last if( /^\s+$/);
243 next if( /^Sequence\s+Description/o || /^\-\-\-/o );
244 my @line = split;
245 my ($name, $n,$evalue,$score)= (shift @line,
246 pop @line,
247 pop @line,
248 pop @line);
249 my $desc = join(' ', @line);
250 push @hitinfo, [ $name, $desc,$evalue,$score];
251 $hitinfo{$name} = $#hitinfo;
252 }
253 } elsif( /^Parsed for domains:/o ) {
254 @hspinfo = ();
255
256 while( defined($_ = $self->_readline) ) {
257 last if( /^\s+$/);
258 next if( /^(Model|Sequence)\s+Domain/o || /^\-\-\-/o );
259 if( my ($n,$domainnum,$domainct, @vals) =
260 (m!^(\S+)\s+ # host name
261 (\d+)/(\d+)\s+ # num/num (ie 1 of 2)
262 (\d+)\s+(\d+).+? # sequence start and end
263 (\d+)\s+(\d+)\s+ # hmm start and end
264 \S+\s+ # []
265 (\S+)\s+ # score
266 (\S+) # evalue
267 \s*$!ox ) ) {
268 # array lookup so that we can get rid of things
269 # when they've been processed
270 my $info = $hitinfo[$hitinfo{$n}];
271 if( !defined $info ) {
272 $self->warn("Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}");
273 next;
274 }
275 push @hspinfo, [ $n, @vals];
276 }
277 }
278 } elsif( /^Alignments of top/o ) {
279 my ($prelength,$lastdomain,$count,$width);
280 $count = 0;
281 my %domaincounter;
282 my $second_tier=0;
283 while( defined($_ = $self->_readline) ) {
284 next if( /^Align/o ||
285 /^\s+RF\s+[x\s]+$/o);
286 if( /^Histogram/o || m!^//!o ) {
287 if( $self->in_element('hsp')) {
288 $self->end_element({'Name' => 'Hsp'});
289 }
290 if( $self->within_element('hit')) {
291 $self->end_element({'Name' => 'Hit'});
292 }
293 last;
294 }
295 chomp;
296
297 if( /^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+from\s+(\d+)\s+to\s+(\d+)/o ) {
298 my ($name,$domainct,$domaintotal,
299 $from,$to) = ($1,$2,$3,$4,$5);
300 $domaincounter{$name}++;
301 if( ! defined $lastdomain || $lastdomain ne $name ) {
302 if( $self->within_element('hit') ) {
303 if( $self->within_element('hsp') ) {
304 $self->end_element({'Name' => 'Hsp'});
305 }
306 $self->end_element({'Name' => 'Hit'});
307 }
308 $self->start_element({'Name' => 'Hit'});
309 my $info = [@{$hitinfo[$hitinfo{$name}] || $self->throw("Could not find hit info for $name: Insure that your database contains only unique sequence names")}];
310 if( $info->[0] ne $name ) {
311 $self->throw("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name)");
312 }
313 $self->element({'Name' => 'Hit_id',
314 'Data' => shift @{$info}});
315 $self->element({'Name' => 'Hit_desc',
316 'Data' => shift @{$info}});
317 $self->element({'Name' => 'Hit_signif',
318 'Data' => shift @{$info}});
319 $self->element({'Name' => 'Hit_score',
320 'Data' => shift @{$info}});
321 }
322 $self->end_element({'Name' => 'Hsp'})
323 if $self->in_element('hsp');
324
325 $self->start_element({'Name' => 'Hsp'});
326 $self->element({'Name' => 'Hsp_identity',
327 'Data' => 0});
328 $self->element({'Name' => 'Hsp_positive',
329 'Data' => 0});
330 my $HSPinfo = shift @hspinfo;
331 my $id = shift @$HSPinfo;
332 if( $id ne $name ) {
333 $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)");
334 }
335 if( $domaincounter{$name} == $domaintotal) {
336 $hitinfo[$hitinfo{$name}] = undef;
337 }
338
339 $self->element({'Name' => 'Hsp_hit-from',
340 'Data' => shift @$HSPinfo});
341 $self->element({'Name' => 'Hsp_hit-to',
342 'Data' => shift @$HSPinfo});
343 $self->element({'Name' => 'Hsp_query-from',
344 'Data' => shift @$HSPinfo});
345 $self->element({'Name' => 'Hsp_query-to',
346 'Data' => shift @$HSPinfo});
347 $self->element({'Name' => 'Hsp_score',
348 'Data' => shift @$HSPinfo});
349 $self->element({'Name' => 'Hsp_evalue',
350 'Data' => shift @$HSPinfo});
351 $lastdomain = $name;
352 } else {
353 # Might want to change this so that it
354 # accumulates all the of the alignment lines into
355 # three array slots and then tests for the
356 # end of the line
357 if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain
358 $prelength = CORE::length($1);
359 $width = 0;
360 # $width = CORE::length($2);
361 $self->element({'Name' =>'Hsp_qseq',
362 'Data' => $2});
363 $count = 0;
364 $second_tier = 0;
365 } elsif ( /^(\s+)(\S+)\<\-\*\s*$/o ) { #end of domain
366 $self->element({'Name' =>'Hsp_qseq',
367 'Data' => $2});
368 $width = CORE::length($2);
369 $count = 0;
370 } elsif( ($count != 1 && /^\s+$/o) ||
371 CORE::length($_) == 0 ||
372 /^\s+\-?\*\s*$/ ) {
373 next;
374 } elsif( $count == 0 ) {
375 $prelength -= 3 unless ($second_tier++);
376 unless( defined $prelength) {
377 # $self->warn("prelength not set");
378 next;
379 }
380 $self->element({'Name' => 'Hsp_qseq',
381 'Data' => substr($_,$prelength)});
382 } elsif( $count == 1) {
383 if( ! defined $prelength ) {
384 $self->warn("prelength not set");
385 }
386 if( $width ) {
387 $self->element({'Name' => 'Hsp_midline',
388 'Data' => substr($_,
389 $prelength,
390 $width)});
391 } else {
392 $self->debug( "midline is $_\n") if( CORE::length($_) <= $prelength && $self->verbose > 0);
393 $self->element({'Name' => 'Hsp_midline',
394 'Data' => substr($_,
395 $prelength)});
396 }
397 } elsif( $count == 2) {
398 if( /^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o ) {
399 $self->element({'Name' => 'Hsp_hseq',
400 'Data' => $3});
401 } else {
402 $self->warn("unrecognized line: $_\n");
403 }
404 }
405 $count = 0 if $count++ >= 2;
406 }
407 }
408 } elsif( /^Histogram/o || m!^//!o ) {
409
410 while( my $HSPinfo = shift @hspinfo ) {
411 my $id = shift @$HSPinfo;
412 my $info = [@{$hitinfo[$hitinfo{$id}]}];
413 next unless defined $info;
414 $self->start_element({'Name' => 'Hit'});
415 $self->element({'Name' => 'Hit_id',
416 'Data' => shift @{$info}});
417 $self->element({'Name' => 'Hit_desc',
418 'Data' => shift @{$info}});
419 $self->element({'Name' => 'Hit_signif',
420 'Data' => shift @{$info}});
421 $self->element({'Name' => 'Hit_score',
422 'Data' => shift @{$info}});
423 $self->start_element({'Name' => 'Hsp'});
424 $self->element({'Name' => 'Hsp_query-from',
425 'Data' => shift @$HSPinfo});
426 $self->element({'Name' => 'Hsp_query-to',
427 'Data' => shift @$HSPinfo});
428 $self->element({'Name' => 'Hsp_hit-from',
429 'Data' => shift @$HSPinfo});
430 $self->element({'Name' => 'Hsp_hit-to',
431 'Data' => shift @$HSPinfo});
432 $self->element({'Name' => 'Hsp_score',
433 'Data' => shift @$HSPinfo});
434 $self->element({'Name' => 'Hsp_evalue',
435 'Data' => shift @$HSPinfo});
436 $self->element({'Name' => 'Hsp_identity',
437 'Data' => 0});
438 $self->element({'Name' => 'Hsp_positive',
439 'Data' => 0});
440 $self->element({'Name' => 'Hsp_positive',
441 'Data' => 0});
442 $self->end_element({'Name' => 'Hsp'});
443 $self->end_element({'Name' => 'Hit'});
444 }
445 @hitinfo = ();
446 %hitinfo = ();
447 last;
448 }
449 } elsif( defined $self->{'_reporttype'} &&
450 $self->{'_reporttype'} eq 'HMMPFAM' ) {
451 if( /^Scores for sequence family/o ) {
452 while( defined($_ = $self->_readline) ) {
453 last if( /^\s+$/);
454 next if( /^Model\s+Description/o || /^\-\-\-/o );
455 chomp;
456 my @line = split;
457 my ($model,$n,$evalue,$score) = (shift @line,
458 pop @line,
459 pop @line,
460 pop @line);
461 my $desc = join(' ', @line);
462 push @hitinfo, [ $model, $desc,$score,$evalue,$n];
463 $hitinfo{$model} = $#hitinfo;
464 }
465 } elsif( /^Parsed for domains:/o ) {
466 @hspinfo = ();
467 while( defined($_ = $self->_readline) ) {
468 last if( /^\s+$/);
469 next if( /^Model\s+Domain/o || /^\-\-\-/o );
470 chomp;
471 if( my ($n,$domainnum,$domainct,@vals) =
472 (m!^(\S+)\s+ # domain name
473 \s+(\d+)/(\d+)\s+ # domain num out of num
474 (\d+)\s+(\d+).+? # seq start, end
475 (\d+)\s+(\d+)\s+ # hmm start, end
476 \S+\s+ # []
477 (\S+)\s+ # score
478 (\S+) # evalue
479 \s*$!ox ) ) {
480 my $hindex = $hitinfo{$n};
481 if( ! defined $hindex ) {
482 push @hitinfo, [ $n, '',$vals[5],$vals[6],
483 $domainct];
484 $hitinfo{$n} = $#hitinfo;
485 $hindex = $#hitinfo;
486 }
487 my $info = $hitinfo[$hindex];
488 if( ! defined $info ) {
489 $self->warn("incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}");
490 next;
491 }
492 push @hspinfo, [ $n, @vals];
493 }
494 }
495 } elsif( /^Alignments of top/o ) {
496 my ($prelength,$lastdomain,$count,$width);
497 $count = 0;
498 my $second_tier=0;
499 while( defined($_ = $self->_readline) ) {
500 next if( /^Align/o || /^\s+RF\s+[x\s]+$/o);
501
502 if( /^Histogram/o || m!^//!o || /^Query sequence/o ) {
503 if( $self->in_element('hsp')) {
504 $self->end_element({'Name' => 'Hsp'});
505 }
506 if( $self->in_element('hit') ) {
507 $self->end_element({'Name' => 'Hit'});
508 }
509 $self->end_element({'Name' => 'HMMER_Output'});
510 if( /^Query sequence/o ) { $self->_pushback($_); }
511 return $self->end_document();
512 last;
513 }
514 chomp;
515 if( m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o ) {
516 my ($name,$from,$to) = ($1,$2,$3);
517 if( ! defined $lastdomain || $lastdomain ne $name ) {
518
519 if( $self->within_element('hit') ) {
520 if( $self->in_element('hsp') ) {
521 $self->end_element({'Name' => 'Hsp'});
522 }
523 $self->end_element({'Name' => 'Hit'});
524 }
525 $self->start_element({'Name' => 'Hit'});
526 my $info = [@{$hitinfo[$hitinfo{$name}]}];
527 if( ! defined $info ||
528 $info->[0] ne $name ) {
529 $self->warn("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name). We're
530 back loading this from the alignment information instead");
531 $info = [$name, '',
532 /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox];
533 push @hitinfo, $info;
534 $hitinfo{$name} = $#hitinfo;
535 }
536
537 $self->element({'Name' => 'Hit_id',
538 'Data' => shift @{$info}});
539 $self->element({'Name' => 'Hit_desc',
540 'Data' => shift @{$info}});
541 $self->element({'Name' => 'Hit_score',
542 'Data' => shift @{$info}});
543 $self->element({'Name' => 'Hit_signif',
544 'Data' => shift @{$info}});
545 }
546 if( $self->within_element('hsp') ) {
547 #if( defined $lastdomain ) {
548 $self->end_element({'Name' => 'Hsp'});
549 }
550 $self->start_element({'Name' => 'Hsp'});
551 $self->element({'Name' => 'Hsp_identity',
552 'Data' => 0});
553 $self->element({'Name' => 'Hsp_positive',
554 'Data' => 0});
555 my $HSPinfo = shift @hspinfo;
556 my $id = shift @$HSPinfo;
557
558 if( $id ne $name ) {
559 $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)");
560 }
561 $self->element({'Name' => 'Hsp_query-from',
562 'Data' => shift @$HSPinfo});
563 $self->element({'Name' => 'Hsp_query-to',
564 'Data' => shift @$HSPinfo});
565 $self->element({'Name' => 'Hsp_hit-from',
566 'Data' => shift @$HSPinfo});
567 $self->element({'Name' => 'Hsp_hit-to',
568 'Data' => shift @$HSPinfo});
569 $self->element({'Name' => 'Hsp_score',
570 'Data' => shift @$HSPinfo});
571 $self->element({'Name' => 'Hsp_evalue',
572 'Data' => shift @$HSPinfo});
573
574 $lastdomain = $name;
575 } else {
576 if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain
577 $prelength = CORE::length($1);
578 $width = 0;
579 # $width = CORE::length($2);
580 $self->element({'Name' =>'Hsp_hseq',
581 'Data' => $2});
582 $count = 0;
583 $second_tier = 0;
584 } elsif ( /^(\s+)(\S+)\<\-?\*?\s*$/o ) { #end of domain
585 $prelength -= 3 unless ($second_tier++);
586 $self->element({'Name' =>'Hsp_hseq',
587 'Data' => $2});
588 $width = CORE::length($2);
589 $count = 0;
590 } elsif( CORE::length($_) == 0 ||
591 ($count != 1 && /^\s+$/o) ||
592 /^\s+\-?\*\s*$/ ) {
593 next;
594 } elsif( $count == 0 ) {
595 $prelength -= 3 unless ($second_tier++);
596 unless( defined $prelength) {
597 # $self->warn("prelength not set");
598 next;
599 }
600 $self->element({'Name' => 'Hsp_hseq',
601 'Data' => substr($_,$prelength)});
602 } elsif( $count == 1 ) {
603 if( ! defined $prelength ) {
604 $self->warn("prelength not set");
605 }
606 if( $width ) {
607 $self->element({'Name' => 'Hsp_midline',
608 'Data' => substr($_,$prelength,$width)});
609 } else {
610 $self->element({'Name' => 'Hsp_midline',
611 'Data' => substr($_,$prelength)});
612 }
613 } elsif( $count == 2 ) {
614 if( /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o ||
615 /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o
616 ) {
617 $self->element({'Name' => 'Hsp_qseq',
618 'Data' => $3});
619 } else {
620 $self->warn("unrecognized line ($count): $_\n");
621 }
622 }
623 $count = 0 if $count++ >= 2;
624 }
625 }
626 } else {
627 $self->debug($_);
628 }
629 }
630 $last = $_;
631 }
632
633 $self->end_element({'Name' => 'HMMER_Output'}) unless ! $seentop;
634 return $self->end_document();
635 }
636
637 =head2 start_element
638
639 Title : start_element
640 Usage : $eventgenerator->start_element
641 Function: Handles a start element event
642 Returns : none
643 Args : hashref with at least 2 keys 'Data' and 'Name'
644
645
646 =cut
647
648 sub start_element{
649 my ($self,$data) = @_;
650 # we currently don't care about attributes
651 my $nm = $data->{'Name'};
652 my $type = $MODEMAP{$nm};
653 if( $type ) {
654 if( $self->_eventHandler->will_handle($type) ) {
655 my $func = sprintf("start_%s",lc $type);
656 $self->_eventHandler->$func($data->{'Attributes'});
657 }
658 unshift @{$self->{'_elements'}}, $type;
659 }
660 if(defined $type &&
661 $type eq 'result') {
662 $self->{'_values'} = {};
663 $self->{'_result'}= undef;
664 }
665 }
666
667 =head2 end_element
668
669 Title : start_element
670 Usage : $eventgenerator->end_element
671 Function: Handles an end element event
672 Returns : none
673 Args : hashref with at least 2 keys 'Data' and 'Name'
674
675
676 =cut
677
678 sub end_element {
679 my ($self,$data) = @_;
680 my $nm = $data->{'Name'};
681 my $type = $MODEMAP{$nm};
682 my $rc;
683
684 if($nm eq 'HMMER_program' ) {
685 if( $self->{'_last_data'} =~ /(HMM\S+)/i ) {
686 $self->{'_reporttype'} = uc $1;
687 }
688 }
689 # Hsp are sort of weird, in that they end when another
690 # object begins so have to detect this in end_element for now
691 if( $nm eq 'Hsp' ) {
692 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
693 $self->element({'Name' => $_,
694 'Data' => $self->{'_last_hspdata'}->{$_}});
695 }
696 $self->{'_last_hspdata'} = {};
697 }
698 if( $type ) {
699 if( $self->_eventHandler->will_handle($type) ) {
700 my $func = sprintf("end_%s",lc $type);
701 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
702 $self->{'_values'});
703 }
704 my $lastelem = shift @{$self->{'_elements'}};
705 } elsif( $MAPPING{$nm} ) {
706 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
707 my $key = (keys %{$MAPPING{$nm}})[0];
708 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
709 } else {
710 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
711 }
712 } else {
713 $self->debug( "unknown nm $nm, ignoring\n");
714 }
715 $self->{'_last_data'} = ''; # remove read data if we are at
716 # end of an element
717 $self->{'_result'} = $rc if( defined $type && $type eq 'result' );
718 return $rc;
719 }
720
721 =head2 element
722
723 Title : element
724 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
725 Function: Convience method that calls start_element, characters, end_element
726 Returns : none
727 Args : Hash ref with the keys 'Name' and 'Data'
728
729
730 =cut
731
732 sub element{
733 my ($self,$data) = @_;
734 $self->start_element($data);
735 $self->characters($data);
736 $self->end_element($data);
737 }
738
739 =head2 characters
740
741 Title : characters
742 Usage : $eventgenerator->characters($str)
743 Function: Send a character events
744 Returns : none
745 Args : string
746
747
748 =cut
749
750 sub characters{
751 my ($self,$data) = @_;
752
753 if( $self->in_element('hsp') &&
754 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o &&
755 defined $data->{'Data'} ) {
756 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
757 }
758 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
759
760 $self->{'_last_data'} = $data->{'Data'};
761 }
762
763 =head2 within_element
764
765 Title : within_element
766 Usage : if( $eventgenerator->within_element($element) ) {}
767 Function: Test if we are within a particular element
768 This is different than 'in' because within can be tested
769 for a whole block.
770 Returns : boolean
771 Args : string element name
772
773
774 =cut
775
776 sub within_element{
777 my ($self,$name) = @_;
778 return 0 if ( ! defined $name ||
779 ! defined $self->{'_elements'} ||
780 scalar @{$self->{'_elements'}} == 0) ;
781 foreach ( @{$self->{'_elements'}} ) {
782 return 1 if( $_ eq $name );
783 }
784 return 0;
785 }
786
787
788 =head2 in_element
789
790 Title : in_element
791 Usage : if( $eventgenerator->in_element($element) ) {}
792 Function: Test if we are in a particular element
793 This is different than 'within' because 'in' only
794 tests its immediete parent.
795 Returns : boolean
796 Args : string element name
797
798
799 =cut
800
801 sub in_element{
802 my ($self,$name) = @_;
803 return 0 if ! defined $self->{'_elements'}->[0];
804 return ( $self->{'_elements'}->[0] eq $name)
805 }
806
807 =head2 start_document
808
809 Title : start_document
810 Usage : $eventgenerator->start_document
811 Function: Handle a start document event
812 Returns : none
813 Args : none
814
815
816 =cut
817
818 sub start_document{
819 my ($self) = @_;
820 $self->{'_lasttype'} = '';
821 $self->{'_values'} = {};
822 $self->{'_result'}= undef;
823 $self->{'_elements'} = [];
824 }
825
826 =head2 end_document
827
828 Title : end_document
829 Usage : $eventgenerator->end_document
830 Function: Handles an end document event
831 Returns : Bio::Search::Result::ResultI object
832 Args : none
833
834
835 =cut
836
837 sub end_document{
838 my ($self) = @_;
839 return $self->{'_result'};
840 }
841
842 =head2 result_count
843
844 Title : result_count
845 Usage : my $count = $searchio->result_count
846 Function: Returns the number of results we have processed
847 Returns : integer
848 Args : none
849
850
851 =cut
852
853 sub result_count {
854 my $self = shift;
855 return $self->{'_result_count'};
856 }
857
858 1;