Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/SearchIO/hmmer.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/SearchIO/hmmer.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,858 @@ +# $Id: hmmer.pm,v 1.13.2.3 2003/08/07 15:40:36 jason Exp $ +# +# BioPerl module for Bio::SearchIO::hmmer +# +# Cared for by Jason Stajich <jason@bioperl.org> +# +# Copyright Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::SearchIO::hmmer - A parser for HMMER output (hmmpfam, hmmsearch) + +=head1 SYNOPSIS + + # do not use this class directly it is available through Bio::SearchIO + use Bio::SearchIO; + my $in = new Bio::SearchIO(-format => 'hmmer', + -file => 't/data/L77119.hmmer'); + while( my $result = $in->next_result ) { + # this is a Bio::Search::Result::HMMERResult object + print $result->query_name(), " for HMM ", $result->hmm_name(), "\n"; + while( my $hit = $result->next_hit ) { + print $hit->name(), "\n"; + while( my $hsp = $hit->next_hsp ) { + print "length is ", $hsp->length(), "\n"; + } + } + } + +=head1 DESCRIPTION + +This object implements a parser for HMMER output. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Jason Stajich + +Email jason@bioperl.org + +Describe contact details here + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +package Bio::SearchIO::hmmer; +use vars qw(@ISA); +use strict; +use vars qw(@ISA %MAPPING %MODEMAP + $DEFAULT_HSP_FACTORY_CLASS + $DEFAULT_HIT_FACTORY_CLASS + $DEFAULT_RESULT_FACTORY_CLASS + ); +use Bio::SearchIO; +use Bio::Factory::ObjectFactory; + +@ISA = qw(Bio::SearchIO ); + +BEGIN { + # mapping of HMMER items to Bioperl hash keys + %MODEMAP = ('HMMER_Output' => 'result', + 'Hit' => 'hit', + 'Hsp' => 'hsp' + ); + + %MAPPING = ( + 'Hsp_bit-score' => 'HSP-bits', + 'Hsp_score' => 'HSP-score', + 'Hsp_evalue' => 'HSP-evalue', + 'Hsp_query-from' => 'HSP-query_start', + 'Hsp_query-to' => 'HSP-query_end', + 'Hsp_hit-from' => 'HSP-hit_start', + 'Hsp_hit-to' => 'HSP-hit_end', + 'Hsp_positive' => 'HSP-conserved', + 'Hsp_identity' => 'HSP-identical', + 'Hsp_gaps' => 'HSP-hsp_gaps', + 'Hsp_hitgaps' => 'HSP-hit_gaps', + 'Hsp_querygaps' => 'HSP-query_gaps', + 'Hsp_qseq' => 'HSP-query_seq', + 'Hsp_hseq' => 'HSP-hit_seq', + 'Hsp_midline' => 'HSP-homology_seq', + 'Hsp_align-len' => 'HSP-hsp_length', + 'Hsp_query-frame'=> 'HSP-query_frame', + 'Hsp_hit-frame' => 'HSP-hit_frame', + + 'Hit_id' => 'HIT-name', + 'Hit_len' => 'HIT-length', + 'Hit_accession' => 'HIT-accession', + 'Hit_desc' => 'HIT-description', + 'Hit_signif' => 'HIT-significance', + 'Hit_score' => 'HIT-score', + + 'HMMER_program' => 'RESULT-algorithm_name', + 'HMMER_version' => 'RESULT-algorithm_version', + 'HMMER_query-def' => 'RESULT-query_name', + 'HMMER_query-len' => 'RESULT-query_length', + 'HMMER_query-acc' => 'RESULT-query_accession', + 'HMMER_querydesc' => 'RESULT-query_description', + 'HMMER_hmm' => 'RESULT-hmm_name', + 'HMMER_seqfile' => 'RESULT-sequence_file', + ); + $DEFAULT_HIT_FACTORY_CLASS = 'Bio::Factory::HMMERHitFactory'; + $DEFAULT_HSP_FACTORY_CLASS = 'Bio::Factory::HMMERHSPFactory'; + $DEFAULT_RESULT_FACTORY_CLASS = 'Bio::Factory::HMMERResultFactory'; +} + + +=head2 new + + Title : new + Usage : my $obj = new Bio::SearchIO::hmmer(); + Function: Builds a new Bio::SearchIO::hmmer object + Returns : Bio::SearchIO::hmmer + Args : -fh/-file => HMMER filename + -format => 'hmmer' + +=cut + +sub _initialize { + my ($self,@args) = @_; + $self->SUPER::_initialize(@args); + my $handler = $self->_eventHandler; + # new object initialization code + $handler->register_factory('result', + Bio::Factory::ObjectFactory->new( + -type => 'Bio::Search::Result::HMMERResult', + -interface => 'Bio::Search::Result::ResultI')); + + $handler->register_factory('hit', + Bio::Factory::ObjectFactory->new( + -type => 'Bio::Search::Hit::HMMERHit', + -interface => 'Bio::Search::Hit::HitI')); + + $handler->register_factory('hsp', + Bio::Factory::ObjectFactory->new( + -type => 'Bio::Search::HSP::HMMERHSP', + -interface => 'Bio::Search::HSP::HSPI')); + $self->{'_hmmidline'} = 'HMMER 2.2g (August 2001)'; +} +=head2 next_result + + Title : next_result + Usage : my $hit = $searchio->next_result; + Function: Returns the next Result from a search + Returns : Bio::Search::Result::ResultI object + Args : none + +=cut + +sub next_result{ + my ($self) = @_; + my $seentop = 0; + my $reporttype; + my ($last,@hitinfo,@hspinfo,%hspinfo,%hitinfo); + + my @alignemnt_lines; + + $self->start_document(); + while( defined ($_ = $self->_readline )) { + my $lineorig = $_; + chomp; + if( /^HMMER\s+(\S+)\s+\((.+)\)/o ) { + my ($prog,$version) = split; + if( $seentop ) { + $self->_pushback($_); + $self->end_element({'Name' => 'HMMER_Output'}); + return $self->end_document(); + } + $self->{'_hmmidline'} = $_; + $self->start_element({'Name' => 'HMMER_Output'}); + $self->{'_result_count'}++; + $seentop = 1; + if( defined $last ) { + ($reporttype) = split(/\s+/,$last); + $self->element({'Name' => 'HMMER_program', + 'Data' => uc ($reporttype)}); + } + $self->element({'Name' => 'HMMER_version', + 'Data' => $version}); + } elsif( s/^HMM file:\s+//o ) { + $self->{'_hmmfileline'} = $lineorig; + $self->element({'Name' => 'HMMER_hmm', + 'Data' => $_}); + } elsif( s/^Sequence\s+(file|database):\s+//o ) { + $self->{'_hmmseqline'} = $lineorig; + $self->element({'Name' => 'HMMER_seqfile', + 'Data' => $_}); + } elsif( s/^Query(\s+(sequence|HMM))?:\s+//o) { + if( ! $seentop ) { + # we're in a multi-query report + $self->_pushback($self->{'_hmmidline'}); + $self->_pushback($self->{'_hmmfileline'}); + $self->_pushback($self->{'_hmmseqline'}); + $self->_pushback($lineorig); + next; + } + s/\s+$//; + $self->element({'Name' => 'HMMER_query-def', + 'Data' => $_}); + } elsif( s/^Accession:\s+//o ) { + s/\s+$//; + $self->element({'Name' => 'HMMER_query-acc', + 'Data' => $_}); + } elsif( s/^Description:\s+//o ) { + s/\s+$//; + $self->element({'Name' => 'HMMER_querydesc', + 'Data' => $_}); + } elsif( defined $self->{'_reporttype'} && + $self->{'_reporttype'} eq 'HMMSEARCH' ) { + # PROCESS HMMSEARCH RESULTS HERE + if( /^Scores for complete sequences/o ) { + while( defined($_ = $self->_readline) ) { + last if( /^\s+$/); + next if( /^Sequence\s+Description/o || /^\-\-\-/o ); + my @line = split; + my ($name, $n,$evalue,$score)= (shift @line, + pop @line, + pop @line, + pop @line); + my $desc = join(' ', @line); + push @hitinfo, [ $name, $desc,$evalue,$score]; + $hitinfo{$name} = $#hitinfo; + } + } elsif( /^Parsed for domains:/o ) { + @hspinfo = (); + + while( defined($_ = $self->_readline) ) { + last if( /^\s+$/); + next if( /^(Model|Sequence)\s+Domain/o || /^\-\-\-/o ); + if( my ($n,$domainnum,$domainct, @vals) = + (m!^(\S+)\s+ # host name + (\d+)/(\d+)\s+ # num/num (ie 1 of 2) + (\d+)\s+(\d+).+? # sequence start and end + (\d+)\s+(\d+)\s+ # hmm start and end + \S+\s+ # [] + (\S+)\s+ # score + (\S+) # evalue + \s*$!ox ) ) { + # array lookup so that we can get rid of things + # when they've been processed + my $info = $hitinfo[$hitinfo{$n}]; + if( !defined $info ) { + $self->warn("Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}"); + next; + } + push @hspinfo, [ $n, @vals]; + } + } + } elsif( /^Alignments of top/o ) { + my ($prelength,$lastdomain,$count,$width); + $count = 0; + my %domaincounter; + my $second_tier=0; + while( defined($_ = $self->_readline) ) { + next if( /^Align/o || + /^\s+RF\s+[x\s]+$/o); + if( /^Histogram/o || m!^//!o ) { + if( $self->in_element('hsp')) { + $self->end_element({'Name' => 'Hsp'}); + } + if( $self->within_element('hit')) { + $self->end_element({'Name' => 'Hit'}); + } + last; + } + chomp; + + if( /^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+from\s+(\d+)\s+to\s+(\d+)/o ) { + my ($name,$domainct,$domaintotal, + $from,$to) = ($1,$2,$3,$4,$5); + $domaincounter{$name}++; + if( ! defined $lastdomain || $lastdomain ne $name ) { + if( $self->within_element('hit') ) { + if( $self->within_element('hsp') ) { + $self->end_element({'Name' => 'Hsp'}); + } + $self->end_element({'Name' => 'Hit'}); + } + $self->start_element({'Name' => 'Hit'}); + my $info = [@{$hitinfo[$hitinfo{$name}] || $self->throw("Could not find hit info for $name: Insure that your database contains only unique sequence names")}]; + if( $info->[0] ne $name ) { + $self->throw("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name)"); + } + $self->element({'Name' => 'Hit_id', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_desc', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_signif', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_score', + 'Data' => shift @{$info}}); + } + $self->end_element({'Name' => 'Hsp'}) + if $self->in_element('hsp'); + + $self->start_element({'Name' => 'Hsp'}); + $self->element({'Name' => 'Hsp_identity', + 'Data' => 0}); + $self->element({'Name' => 'Hsp_positive', + 'Data' => 0}); + my $HSPinfo = shift @hspinfo; + my $id = shift @$HSPinfo; + if( $id ne $name ) { + $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)"); + } + if( $domaincounter{$name} == $domaintotal) { + $hitinfo[$hitinfo{$name}] = undef; + } + + $self->element({'Name' => 'Hsp_hit-from', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_hit-to', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_query-from', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_query-to', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_score', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_evalue', + 'Data' => shift @$HSPinfo}); + $lastdomain = $name; + } else { + # Might want to change this so that it + # accumulates all the of the alignment lines into + # three array slots and then tests for the + # end of the line + if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain + $prelength = CORE::length($1); + $width = 0; + # $width = CORE::length($2); + $self->element({'Name' =>'Hsp_qseq', + 'Data' => $2}); + $count = 0; + $second_tier = 0; + } elsif ( /^(\s+)(\S+)\<\-\*\s*$/o ) { #end of domain + $self->element({'Name' =>'Hsp_qseq', + 'Data' => $2}); + $width = CORE::length($2); + $count = 0; + } elsif( ($count != 1 && /^\s+$/o) || + CORE::length($_) == 0 || + /^\s+\-?\*\s*$/ ) { + next; + } elsif( $count == 0 ) { + $prelength -= 3 unless ($second_tier++); + unless( defined $prelength) { + # $self->warn("prelength not set"); + next; + } + $self->element({'Name' => 'Hsp_qseq', + 'Data' => substr($_,$prelength)}); + } elsif( $count == 1) { + if( ! defined $prelength ) { + $self->warn("prelength not set"); + } + if( $width ) { + $self->element({'Name' => 'Hsp_midline', + 'Data' => substr($_, + $prelength, + $width)}); + } else { + $self->debug( "midline is $_\n") if( CORE::length($_) <= $prelength && $self->verbose > 0); + $self->element({'Name' => 'Hsp_midline', + 'Data' => substr($_, + $prelength)}); + } + } elsif( $count == 2) { + if( /^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o ) { + $self->element({'Name' => 'Hsp_hseq', + 'Data' => $3}); + } else { + $self->warn("unrecognized line: $_\n"); + } + } + $count = 0 if $count++ >= 2; + } + } + } elsif( /^Histogram/o || m!^//!o ) { + + while( my $HSPinfo = shift @hspinfo ) { + my $id = shift @$HSPinfo; + my $info = [@{$hitinfo[$hitinfo{$id}]}]; + next unless defined $info; + $self->start_element({'Name' => 'Hit'}); + $self->element({'Name' => 'Hit_id', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_desc', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_signif', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_score', + 'Data' => shift @{$info}}); + $self->start_element({'Name' => 'Hsp'}); + $self->element({'Name' => 'Hsp_query-from', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_query-to', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_hit-from', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_hit-to', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_score', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_evalue', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_identity', + 'Data' => 0}); + $self->element({'Name' => 'Hsp_positive', + 'Data' => 0}); + $self->element({'Name' => 'Hsp_positive', + 'Data' => 0}); + $self->end_element({'Name' => 'Hsp'}); + $self->end_element({'Name' => 'Hit'}); + } + @hitinfo = (); + %hitinfo = (); + last; + } + } elsif( defined $self->{'_reporttype'} && + $self->{'_reporttype'} eq 'HMMPFAM' ) { + if( /^Scores for sequence family/o ) { + while( defined($_ = $self->_readline) ) { + last if( /^\s+$/); + next if( /^Model\s+Description/o || /^\-\-\-/o ); + chomp; + my @line = split; + my ($model,$n,$evalue,$score) = (shift @line, + pop @line, + pop @line, + pop @line); + my $desc = join(' ', @line); + push @hitinfo, [ $model, $desc,$score,$evalue,$n]; + $hitinfo{$model} = $#hitinfo; + } + } elsif( /^Parsed for domains:/o ) { + @hspinfo = (); + while( defined($_ = $self->_readline) ) { + last if( /^\s+$/); + next if( /^Model\s+Domain/o || /^\-\-\-/o ); + chomp; + if( my ($n,$domainnum,$domainct,@vals) = + (m!^(\S+)\s+ # domain name + \s+(\d+)/(\d+)\s+ # domain num out of num + (\d+)\s+(\d+).+? # seq start, end + (\d+)\s+(\d+)\s+ # hmm start, end + \S+\s+ # [] + (\S+)\s+ # score + (\S+) # evalue + \s*$!ox ) ) { + my $hindex = $hitinfo{$n}; + if( ! defined $hindex ) { + push @hitinfo, [ $n, '',$vals[5],$vals[6], + $domainct]; + $hitinfo{$n} = $#hitinfo; + $hindex = $#hitinfo; + } + my $info = $hitinfo[$hindex]; + if( ! defined $info ) { + $self->warn("incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}"); + next; + } + push @hspinfo, [ $n, @vals]; + } + } + } elsif( /^Alignments of top/o ) { + my ($prelength,$lastdomain,$count,$width); + $count = 0; + my $second_tier=0; + while( defined($_ = $self->_readline) ) { + next if( /^Align/o || /^\s+RF\s+[x\s]+$/o); + + if( /^Histogram/o || m!^//!o || /^Query sequence/o ) { + if( $self->in_element('hsp')) { + $self->end_element({'Name' => 'Hsp'}); + } + if( $self->in_element('hit') ) { + $self->end_element({'Name' => 'Hit'}); + } + $self->end_element({'Name' => 'HMMER_Output'}); + if( /^Query sequence/o ) { $self->_pushback($_); } + return $self->end_document(); + last; + } + chomp; + if( m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o ) { + my ($name,$from,$to) = ($1,$2,$3); + if( ! defined $lastdomain || $lastdomain ne $name ) { + + if( $self->within_element('hit') ) { + if( $self->in_element('hsp') ) { + $self->end_element({'Name' => 'Hsp'}); + } + $self->end_element({'Name' => 'Hit'}); + } + $self->start_element({'Name' => 'Hit'}); + my $info = [@{$hitinfo[$hitinfo{$name}]}]; + if( ! defined $info || + $info->[0] ne $name ) { + $self->warn("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name). We're +back loading this from the alignment information instead"); + $info = [$name, '', + /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox]; + push @hitinfo, $info; + $hitinfo{$name} = $#hitinfo; + } + + $self->element({'Name' => 'Hit_id', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_desc', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_score', + 'Data' => shift @{$info}}); + $self->element({'Name' => 'Hit_signif', + 'Data' => shift @{$info}}); + } + if( $self->within_element('hsp') ) { + #if( defined $lastdomain ) { + $self->end_element({'Name' => 'Hsp'}); + } + $self->start_element({'Name' => 'Hsp'}); + $self->element({'Name' => 'Hsp_identity', + 'Data' => 0}); + $self->element({'Name' => 'Hsp_positive', + 'Data' => 0}); + my $HSPinfo = shift @hspinfo; + my $id = shift @$HSPinfo; + + if( $id ne $name ) { + $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)"); + } + $self->element({'Name' => 'Hsp_query-from', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_query-to', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_hit-from', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_hit-to', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_score', + 'Data' => shift @$HSPinfo}); + $self->element({'Name' => 'Hsp_evalue', + 'Data' => shift @$HSPinfo}); + + $lastdomain = $name; + } else { + if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain + $prelength = CORE::length($1); + $width = 0; + # $width = CORE::length($2); + $self->element({'Name' =>'Hsp_hseq', + 'Data' => $2}); + $count = 0; + $second_tier = 0; + } elsif ( /^(\s+)(\S+)\<\-?\*?\s*$/o ) { #end of domain + $prelength -= 3 unless ($second_tier++); + $self->element({'Name' =>'Hsp_hseq', + 'Data' => $2}); + $width = CORE::length($2); + $count = 0; + } elsif( CORE::length($_) == 0 || + ($count != 1 && /^\s+$/o) || + /^\s+\-?\*\s*$/ ) { + next; + } elsif( $count == 0 ) { + $prelength -= 3 unless ($second_tier++); + unless( defined $prelength) { + # $self->warn("prelength not set"); + next; + } + $self->element({'Name' => 'Hsp_hseq', + 'Data' => substr($_,$prelength)}); + } elsif( $count == 1 ) { + if( ! defined $prelength ) { + $self->warn("prelength not set"); + } + if( $width ) { + $self->element({'Name' => 'Hsp_midline', + 'Data' => substr($_,$prelength,$width)}); + } else { + $self->element({'Name' => 'Hsp_midline', + 'Data' => substr($_,$prelength)}); + } + } elsif( $count == 2 ) { + if( /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o || + /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o + ) { + $self->element({'Name' => 'Hsp_qseq', + 'Data' => $3}); + } else { + $self->warn("unrecognized line ($count): $_\n"); + } + } + $count = 0 if $count++ >= 2; + } + } + } else { + $self->debug($_); + } + } + $last = $_; + } + + $self->end_element({'Name' => 'HMMER_Output'}) unless ! $seentop; + return $self->end_document(); +} + +=head2 start_element + + Title : start_element + Usage : $eventgenerator->start_element + Function: Handles a start element event + Returns : none + Args : hashref with at least 2 keys 'Data' and 'Name' + + +=cut + +sub start_element{ + my ($self,$data) = @_; + # we currently don't care about attributes + my $nm = $data->{'Name'}; + my $type = $MODEMAP{$nm}; + if( $type ) { + if( $self->_eventHandler->will_handle($type) ) { + my $func = sprintf("start_%s",lc $type); + $self->_eventHandler->$func($data->{'Attributes'}); + } + unshift @{$self->{'_elements'}}, $type; + } + if(defined $type && + $type eq 'result') { + $self->{'_values'} = {}; + $self->{'_result'}= undef; + } +} + +=head2 end_element + + Title : start_element + Usage : $eventgenerator->end_element + Function: Handles an end element event + Returns : none + Args : hashref with at least 2 keys 'Data' and 'Name' + + +=cut + +sub end_element { + my ($self,$data) = @_; + my $nm = $data->{'Name'}; + my $type = $MODEMAP{$nm}; + my $rc; + + if($nm eq 'HMMER_program' ) { + if( $self->{'_last_data'} =~ /(HMM\S+)/i ) { + $self->{'_reporttype'} = uc $1; + } + } + # Hsp are sort of weird, in that they end when another + # object begins so have to detect this in end_element for now + if( $nm eq 'Hsp' ) { + foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) { + $self->element({'Name' => $_, + 'Data' => $self->{'_last_hspdata'}->{$_}}); + } + $self->{'_last_hspdata'} = {}; + } + if( $type ) { + if( $self->_eventHandler->will_handle($type) ) { + my $func = sprintf("end_%s",lc $type); + $rc = $self->_eventHandler->$func($self->{'_reporttype'}, + $self->{'_values'}); + } + my $lastelem = shift @{$self->{'_elements'}}; + } elsif( $MAPPING{$nm} ) { + if ( ref($MAPPING{$nm}) =~ /hash/i ) { + my $key = (keys %{$MAPPING{$nm}})[0]; + $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'}; + } else { + $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'}; + } + } else { + $self->debug( "unknown nm $nm, ignoring\n"); + } + $self->{'_last_data'} = ''; # remove read data if we are at + # end of an element + $self->{'_result'} = $rc if( defined $type && $type eq 'result' ); + return $rc; +} + +=head2 element + + Title : element + Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); + Function: Convience method that calls start_element, characters, end_element + Returns : none + Args : Hash ref with the keys 'Name' and 'Data' + + +=cut + +sub element{ + my ($self,$data) = @_; + $self->start_element($data); + $self->characters($data); + $self->end_element($data); +} + +=head2 characters + + Title : characters + Usage : $eventgenerator->characters($str) + Function: Send a character events + Returns : none + Args : string + + +=cut + +sub characters{ + my ($self,$data) = @_; + + if( $self->in_element('hsp') && + $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o && + defined $data->{'Data'} ) { + $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'}; + } + return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); + + $self->{'_last_data'} = $data->{'Data'}; +} + +=head2 within_element + + Title : within_element + Usage : if( $eventgenerator->within_element($element) ) {} + Function: Test if we are within a particular element + This is different than 'in' because within can be tested + for a whole block. + Returns : boolean + Args : string element name + + +=cut + +sub within_element{ + my ($self,$name) = @_; + return 0 if ( ! defined $name || + ! defined $self->{'_elements'} || + scalar @{$self->{'_elements'}} == 0) ; + foreach ( @{$self->{'_elements'}} ) { + return 1 if( $_ eq $name ); + } + return 0; +} + + +=head2 in_element + + Title : in_element + Usage : if( $eventgenerator->in_element($element) ) {} + Function: Test if we are in a particular element + This is different than 'within' because 'in' only + tests its immediete parent. + Returns : boolean + Args : string element name + + +=cut + +sub in_element{ + my ($self,$name) = @_; + return 0 if ! defined $self->{'_elements'}->[0]; + return ( $self->{'_elements'}->[0] eq $name) +} + +=head2 start_document + + Title : start_document + Usage : $eventgenerator->start_document + Function: Handle a start document event + Returns : none + Args : none + + +=cut + +sub start_document{ + my ($self) = @_; + $self->{'_lasttype'} = ''; + $self->{'_values'} = {}; + $self->{'_result'}= undef; + $self->{'_elements'} = []; +} + +=head2 end_document + + Title : end_document + Usage : $eventgenerator->end_document + Function: Handles an end document event + Returns : Bio::Search::Result::ResultI object + Args : none + + +=cut + +sub end_document{ + my ($self) = @_; + return $self->{'_result'}; +} + +=head2 result_count + + Title : result_count + Usage : my $count = $searchio->result_count + Function: Returns the number of results we have processed + Returns : integer + Args : none + + +=cut + +sub result_count { + my $self = shift; + return $self->{'_result_count'}; +} + +1;