Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/hmmer.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: hmmer.pm,v 1.13.2.3 2003/08/07 15:40:36 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::SearchIO::hmmer | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SearchIO::hmmer - A parser for HMMER output (hmmpfam, hmmsearch) | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 # do not use this class directly it is available through Bio::SearchIO | |
| 20 use Bio::SearchIO; | |
| 21 my $in = new Bio::SearchIO(-format => 'hmmer', | |
| 22 -file => 't/data/L77119.hmmer'); | |
| 23 while( my $result = $in->next_result ) { | |
| 24 # this is a Bio::Search::Result::HMMERResult object | |
| 25 print $result->query_name(), " for HMM ", $result->hmm_name(), "\n"; | |
| 26 while( my $hit = $result->next_hit ) { | |
| 27 print $hit->name(), "\n"; | |
| 28 while( my $hsp = $hit->next_hsp ) { | |
| 29 print "length is ", $hsp->length(), "\n"; | |
| 30 } | |
| 31 } | |
| 32 } | |
| 33 | |
| 34 =head1 DESCRIPTION | |
| 35 | |
| 36 This object implements a parser for HMMER output. | |
| 37 | |
| 38 =head1 FEEDBACK | |
| 39 | |
| 40 =head2 Mailing Lists | |
| 41 | |
| 42 User feedback is an integral part of the evolution of this and other | |
| 43 Bioperl modules. Send your comments and suggestions preferably to | |
| 44 the Bioperl mailing list. Your participation is much appreciated. | |
| 45 | |
| 46 bioperl-l@bioperl.org - General discussion | |
| 47 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 48 | |
| 49 =head2 Reporting Bugs | |
| 50 | |
| 51 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 52 of the bugs and their resolution. Bug reports can be submitted via | |
| 53 email or the web: | |
| 54 | |
| 55 bioperl-bugs@bioperl.org | |
| 56 http://bugzilla.bioperl.org/ | |
| 57 | |
| 58 =head1 AUTHOR - Jason Stajich | |
| 59 | |
| 60 Email jason@bioperl.org | |
| 61 | |
| 62 Describe contact details here | |
| 63 | |
| 64 =head1 CONTRIBUTORS | |
| 65 | |
| 66 Additional contributors names and emails here | |
| 67 | |
| 68 =head1 APPENDIX | |
| 69 | |
| 70 The rest of the documentation details each of the object methods. | |
| 71 Internal methods are usually preceded with a _ | |
| 72 | |
| 73 =cut | |
| 74 | |
| 75 package Bio::SearchIO::hmmer; | |
| 76 use vars qw(@ISA); | |
| 77 use strict; | |
| 78 use vars qw(@ISA %MAPPING %MODEMAP | |
| 79 $DEFAULT_HSP_FACTORY_CLASS | |
| 80 $DEFAULT_HIT_FACTORY_CLASS | |
| 81 $DEFAULT_RESULT_FACTORY_CLASS | |
| 82 ); | |
| 83 use Bio::SearchIO; | |
| 84 use Bio::Factory::ObjectFactory; | |
| 85 | |
| 86 @ISA = qw(Bio::SearchIO ); | |
| 87 | |
| 88 BEGIN { | |
| 89 # mapping of HMMER items to Bioperl hash keys | |
| 90 %MODEMAP = ('HMMER_Output' => 'result', | |
| 91 'Hit' => 'hit', | |
| 92 'Hsp' => 'hsp' | |
| 93 ); | |
| 94 | |
| 95 %MAPPING = ( | |
| 96 'Hsp_bit-score' => 'HSP-bits', | |
| 97 'Hsp_score' => 'HSP-score', | |
| 98 'Hsp_evalue' => 'HSP-evalue', | |
| 99 'Hsp_query-from' => 'HSP-query_start', | |
| 100 'Hsp_query-to' => 'HSP-query_end', | |
| 101 'Hsp_hit-from' => 'HSP-hit_start', | |
| 102 'Hsp_hit-to' => 'HSP-hit_end', | |
| 103 'Hsp_positive' => 'HSP-conserved', | |
| 104 'Hsp_identity' => 'HSP-identical', | |
| 105 'Hsp_gaps' => 'HSP-hsp_gaps', | |
| 106 'Hsp_hitgaps' => 'HSP-hit_gaps', | |
| 107 'Hsp_querygaps' => 'HSP-query_gaps', | |
| 108 'Hsp_qseq' => 'HSP-query_seq', | |
| 109 'Hsp_hseq' => 'HSP-hit_seq', | |
| 110 'Hsp_midline' => 'HSP-homology_seq', | |
| 111 'Hsp_align-len' => 'HSP-hsp_length', | |
| 112 'Hsp_query-frame'=> 'HSP-query_frame', | |
| 113 'Hsp_hit-frame' => 'HSP-hit_frame', | |
| 114 | |
| 115 'Hit_id' => 'HIT-name', | |
| 116 'Hit_len' => 'HIT-length', | |
| 117 'Hit_accession' => 'HIT-accession', | |
| 118 'Hit_desc' => 'HIT-description', | |
| 119 'Hit_signif' => 'HIT-significance', | |
| 120 'Hit_score' => 'HIT-score', | |
| 121 | |
| 122 'HMMER_program' => 'RESULT-algorithm_name', | |
| 123 'HMMER_version' => 'RESULT-algorithm_version', | |
| 124 'HMMER_query-def' => 'RESULT-query_name', | |
| 125 'HMMER_query-len' => 'RESULT-query_length', | |
| 126 'HMMER_query-acc' => 'RESULT-query_accession', | |
| 127 'HMMER_querydesc' => 'RESULT-query_description', | |
| 128 'HMMER_hmm' => 'RESULT-hmm_name', | |
| 129 'HMMER_seqfile' => 'RESULT-sequence_file', | |
| 130 ); | |
| 131 $DEFAULT_HIT_FACTORY_CLASS = 'Bio::Factory::HMMERHitFactory'; | |
| 132 $DEFAULT_HSP_FACTORY_CLASS = 'Bio::Factory::HMMERHSPFactory'; | |
| 133 $DEFAULT_RESULT_FACTORY_CLASS = 'Bio::Factory::HMMERResultFactory'; | |
| 134 } | |
| 135 | |
| 136 | |
| 137 =head2 new | |
| 138 | |
| 139 Title : new | |
| 140 Usage : my $obj = new Bio::SearchIO::hmmer(); | |
| 141 Function: Builds a new Bio::SearchIO::hmmer object | |
| 142 Returns : Bio::SearchIO::hmmer | |
| 143 Args : -fh/-file => HMMER filename | |
| 144 -format => 'hmmer' | |
| 145 | |
| 146 =cut | |
| 147 | |
| 148 sub _initialize { | |
| 149 my ($self,@args) = @_; | |
| 150 $self->SUPER::_initialize(@args); | |
| 151 my $handler = $self->_eventHandler; | |
| 152 # new object initialization code | |
| 153 $handler->register_factory('result', | |
| 154 Bio::Factory::ObjectFactory->new( | |
| 155 -type => 'Bio::Search::Result::HMMERResult', | |
| 156 -interface => 'Bio::Search::Result::ResultI')); | |
| 157 | |
| 158 $handler->register_factory('hit', | |
| 159 Bio::Factory::ObjectFactory->new( | |
| 160 -type => 'Bio::Search::Hit::HMMERHit', | |
| 161 -interface => 'Bio::Search::Hit::HitI')); | |
| 162 | |
| 163 $handler->register_factory('hsp', | |
| 164 Bio::Factory::ObjectFactory->new( | |
| 165 -type => 'Bio::Search::HSP::HMMERHSP', | |
| 166 -interface => 'Bio::Search::HSP::HSPI')); | |
| 167 $self->{'_hmmidline'} = 'HMMER 2.2g (August 2001)'; | |
| 168 } | |
| 169 =head2 next_result | |
| 170 | |
| 171 Title : next_result | |
| 172 Usage : my $hit = $searchio->next_result; | |
| 173 Function: Returns the next Result from a search | |
| 174 Returns : Bio::Search::Result::ResultI object | |
| 175 Args : none | |
| 176 | |
| 177 =cut | |
| 178 | |
| 179 sub next_result{ | |
| 180 my ($self) = @_; | |
| 181 my $seentop = 0; | |
| 182 my $reporttype; | |
| 183 my ($last,@hitinfo,@hspinfo,%hspinfo,%hitinfo); | |
| 184 | |
| 185 my @alignemnt_lines; | |
| 186 | |
| 187 $self->start_document(); | |
| 188 while( defined ($_ = $self->_readline )) { | |
| 189 my $lineorig = $_; | |
| 190 chomp; | |
| 191 if( /^HMMER\s+(\S+)\s+\((.+)\)/o ) { | |
| 192 my ($prog,$version) = split; | |
| 193 if( $seentop ) { | |
| 194 $self->_pushback($_); | |
| 195 $self->end_element({'Name' => 'HMMER_Output'}); | |
| 196 return $self->end_document(); | |
| 197 } | |
| 198 $self->{'_hmmidline'} = $_; | |
| 199 $self->start_element({'Name' => 'HMMER_Output'}); | |
| 200 $self->{'_result_count'}++; | |
| 201 $seentop = 1; | |
| 202 if( defined $last ) { | |
| 203 ($reporttype) = split(/\s+/,$last); | |
| 204 $self->element({'Name' => 'HMMER_program', | |
| 205 'Data' => uc ($reporttype)}); | |
| 206 } | |
| 207 $self->element({'Name' => 'HMMER_version', | |
| 208 'Data' => $version}); | |
| 209 } elsif( s/^HMM file:\s+//o ) { | |
| 210 $self->{'_hmmfileline'} = $lineorig; | |
| 211 $self->element({'Name' => 'HMMER_hmm', | |
| 212 'Data' => $_}); | |
| 213 } elsif( s/^Sequence\s+(file|database):\s+//o ) { | |
| 214 $self->{'_hmmseqline'} = $lineorig; | |
| 215 $self->element({'Name' => 'HMMER_seqfile', | |
| 216 'Data' => $_}); | |
| 217 } elsif( s/^Query(\s+(sequence|HMM))?:\s+//o) { | |
| 218 if( ! $seentop ) { | |
| 219 # we're in a multi-query report | |
| 220 $self->_pushback($self->{'_hmmidline'}); | |
| 221 $self->_pushback($self->{'_hmmfileline'}); | |
| 222 $self->_pushback($self->{'_hmmseqline'}); | |
| 223 $self->_pushback($lineorig); | |
| 224 next; | |
| 225 } | |
| 226 s/\s+$//; | |
| 227 $self->element({'Name' => 'HMMER_query-def', | |
| 228 'Data' => $_}); | |
| 229 } elsif( s/^Accession:\s+//o ) { | |
| 230 s/\s+$//; | |
| 231 $self->element({'Name' => 'HMMER_query-acc', | |
| 232 'Data' => $_}); | |
| 233 } elsif( s/^Description:\s+//o ) { | |
| 234 s/\s+$//; | |
| 235 $self->element({'Name' => 'HMMER_querydesc', | |
| 236 'Data' => $_}); | |
| 237 } elsif( defined $self->{'_reporttype'} && | |
| 238 $self->{'_reporttype'} eq 'HMMSEARCH' ) { | |
| 239 # PROCESS HMMSEARCH RESULTS HERE | |
| 240 if( /^Scores for complete sequences/o ) { | |
| 241 while( defined($_ = $self->_readline) ) { | |
| 242 last if( /^\s+$/); | |
| 243 next if( /^Sequence\s+Description/o || /^\-\-\-/o ); | |
| 244 my @line = split; | |
| 245 my ($name, $n,$evalue,$score)= (shift @line, | |
| 246 pop @line, | |
| 247 pop @line, | |
| 248 pop @line); | |
| 249 my $desc = join(' ', @line); | |
| 250 push @hitinfo, [ $name, $desc,$evalue,$score]; | |
| 251 $hitinfo{$name} = $#hitinfo; | |
| 252 } | |
| 253 } elsif( /^Parsed for domains:/o ) { | |
| 254 @hspinfo = (); | |
| 255 | |
| 256 while( defined($_ = $self->_readline) ) { | |
| 257 last if( /^\s+$/); | |
| 258 next if( /^(Model|Sequence)\s+Domain/o || /^\-\-\-/o ); | |
| 259 if( my ($n,$domainnum,$domainct, @vals) = | |
| 260 (m!^(\S+)\s+ # host name | |
| 261 (\d+)/(\d+)\s+ # num/num (ie 1 of 2) | |
| 262 (\d+)\s+(\d+).+? # sequence start and end | |
| 263 (\d+)\s+(\d+)\s+ # hmm start and end | |
| 264 \S+\s+ # [] | |
| 265 (\S+)\s+ # score | |
| 266 (\S+) # evalue | |
| 267 \s*$!ox ) ) { | |
| 268 # array lookup so that we can get rid of things | |
| 269 # when they've been processed | |
| 270 my $info = $hitinfo[$hitinfo{$n}]; | |
| 271 if( !defined $info ) { | |
| 272 $self->warn("Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}"); | |
| 273 next; | |
| 274 } | |
| 275 push @hspinfo, [ $n, @vals]; | |
| 276 } | |
| 277 } | |
| 278 } elsif( /^Alignments of top/o ) { | |
| 279 my ($prelength,$lastdomain,$count,$width); | |
| 280 $count = 0; | |
| 281 my %domaincounter; | |
| 282 my $second_tier=0; | |
| 283 while( defined($_ = $self->_readline) ) { | |
| 284 next if( /^Align/o || | |
| 285 /^\s+RF\s+[x\s]+$/o); | |
| 286 if( /^Histogram/o || m!^//!o ) { | |
| 287 if( $self->in_element('hsp')) { | |
| 288 $self->end_element({'Name' => 'Hsp'}); | |
| 289 } | |
| 290 if( $self->within_element('hit')) { | |
| 291 $self->end_element({'Name' => 'Hit'}); | |
| 292 } | |
| 293 last; | |
| 294 } | |
| 295 chomp; | |
| 296 | |
| 297 if( /^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+from\s+(\d+)\s+to\s+(\d+)/o ) { | |
| 298 my ($name,$domainct,$domaintotal, | |
| 299 $from,$to) = ($1,$2,$3,$4,$5); | |
| 300 $domaincounter{$name}++; | |
| 301 if( ! defined $lastdomain || $lastdomain ne $name ) { | |
| 302 if( $self->within_element('hit') ) { | |
| 303 if( $self->within_element('hsp') ) { | |
| 304 $self->end_element({'Name' => 'Hsp'}); | |
| 305 } | |
| 306 $self->end_element({'Name' => 'Hit'}); | |
| 307 } | |
| 308 $self->start_element({'Name' => 'Hit'}); | |
| 309 my $info = [@{$hitinfo[$hitinfo{$name}] || $self->throw("Could not find hit info for $name: Insure that your database contains only unique sequence names")}]; | |
| 310 if( $info->[0] ne $name ) { | |
| 311 $self->throw("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name)"); | |
| 312 } | |
| 313 $self->element({'Name' => 'Hit_id', | |
| 314 'Data' => shift @{$info}}); | |
| 315 $self->element({'Name' => 'Hit_desc', | |
| 316 'Data' => shift @{$info}}); | |
| 317 $self->element({'Name' => 'Hit_signif', | |
| 318 'Data' => shift @{$info}}); | |
| 319 $self->element({'Name' => 'Hit_score', | |
| 320 'Data' => shift @{$info}}); | |
| 321 } | |
| 322 $self->end_element({'Name' => 'Hsp'}) | |
| 323 if $self->in_element('hsp'); | |
| 324 | |
| 325 $self->start_element({'Name' => 'Hsp'}); | |
| 326 $self->element({'Name' => 'Hsp_identity', | |
| 327 'Data' => 0}); | |
| 328 $self->element({'Name' => 'Hsp_positive', | |
| 329 'Data' => 0}); | |
| 330 my $HSPinfo = shift @hspinfo; | |
| 331 my $id = shift @$HSPinfo; | |
| 332 if( $id ne $name ) { | |
| 333 $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)"); | |
| 334 } | |
| 335 if( $domaincounter{$name} == $domaintotal) { | |
| 336 $hitinfo[$hitinfo{$name}] = undef; | |
| 337 } | |
| 338 | |
| 339 $self->element({'Name' => 'Hsp_hit-from', | |
| 340 'Data' => shift @$HSPinfo}); | |
| 341 $self->element({'Name' => 'Hsp_hit-to', | |
| 342 'Data' => shift @$HSPinfo}); | |
| 343 $self->element({'Name' => 'Hsp_query-from', | |
| 344 'Data' => shift @$HSPinfo}); | |
| 345 $self->element({'Name' => 'Hsp_query-to', | |
| 346 'Data' => shift @$HSPinfo}); | |
| 347 $self->element({'Name' => 'Hsp_score', | |
| 348 'Data' => shift @$HSPinfo}); | |
| 349 $self->element({'Name' => 'Hsp_evalue', | |
| 350 'Data' => shift @$HSPinfo}); | |
| 351 $lastdomain = $name; | |
| 352 } else { | |
| 353 # Might want to change this so that it | |
| 354 # accumulates all the of the alignment lines into | |
| 355 # three array slots and then tests for the | |
| 356 # end of the line | |
| 357 if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain | |
| 358 $prelength = CORE::length($1); | |
| 359 $width = 0; | |
| 360 # $width = CORE::length($2); | |
| 361 $self->element({'Name' =>'Hsp_qseq', | |
| 362 'Data' => $2}); | |
| 363 $count = 0; | |
| 364 $second_tier = 0; | |
| 365 } elsif ( /^(\s+)(\S+)\<\-\*\s*$/o ) { #end of domain | |
| 366 $self->element({'Name' =>'Hsp_qseq', | |
| 367 'Data' => $2}); | |
| 368 $width = CORE::length($2); | |
| 369 $count = 0; | |
| 370 } elsif( ($count != 1 && /^\s+$/o) || | |
| 371 CORE::length($_) == 0 || | |
| 372 /^\s+\-?\*\s*$/ ) { | |
| 373 next; | |
| 374 } elsif( $count == 0 ) { | |
| 375 $prelength -= 3 unless ($second_tier++); | |
| 376 unless( defined $prelength) { | |
| 377 # $self->warn("prelength not set"); | |
| 378 next; | |
| 379 } | |
| 380 $self->element({'Name' => 'Hsp_qseq', | |
| 381 'Data' => substr($_,$prelength)}); | |
| 382 } elsif( $count == 1) { | |
| 383 if( ! defined $prelength ) { | |
| 384 $self->warn("prelength not set"); | |
| 385 } | |
| 386 if( $width ) { | |
| 387 $self->element({'Name' => 'Hsp_midline', | |
| 388 'Data' => substr($_, | |
| 389 $prelength, | |
| 390 $width)}); | |
| 391 } else { | |
| 392 $self->debug( "midline is $_\n") if( CORE::length($_) <= $prelength && $self->verbose > 0); | |
| 393 $self->element({'Name' => 'Hsp_midline', | |
| 394 'Data' => substr($_, | |
| 395 $prelength)}); | |
| 396 } | |
| 397 } elsif( $count == 2) { | |
| 398 if( /^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o ) { | |
| 399 $self->element({'Name' => 'Hsp_hseq', | |
| 400 'Data' => $3}); | |
| 401 } else { | |
| 402 $self->warn("unrecognized line: $_\n"); | |
| 403 } | |
| 404 } | |
| 405 $count = 0 if $count++ >= 2; | |
| 406 } | |
| 407 } | |
| 408 } elsif( /^Histogram/o || m!^//!o ) { | |
| 409 | |
| 410 while( my $HSPinfo = shift @hspinfo ) { | |
| 411 my $id = shift @$HSPinfo; | |
| 412 my $info = [@{$hitinfo[$hitinfo{$id}]}]; | |
| 413 next unless defined $info; | |
| 414 $self->start_element({'Name' => 'Hit'}); | |
| 415 $self->element({'Name' => 'Hit_id', | |
| 416 'Data' => shift @{$info}}); | |
| 417 $self->element({'Name' => 'Hit_desc', | |
| 418 'Data' => shift @{$info}}); | |
| 419 $self->element({'Name' => 'Hit_signif', | |
| 420 'Data' => shift @{$info}}); | |
| 421 $self->element({'Name' => 'Hit_score', | |
| 422 'Data' => shift @{$info}}); | |
| 423 $self->start_element({'Name' => 'Hsp'}); | |
| 424 $self->element({'Name' => 'Hsp_query-from', | |
| 425 'Data' => shift @$HSPinfo}); | |
| 426 $self->element({'Name' => 'Hsp_query-to', | |
| 427 'Data' => shift @$HSPinfo}); | |
| 428 $self->element({'Name' => 'Hsp_hit-from', | |
| 429 'Data' => shift @$HSPinfo}); | |
| 430 $self->element({'Name' => 'Hsp_hit-to', | |
| 431 'Data' => shift @$HSPinfo}); | |
| 432 $self->element({'Name' => 'Hsp_score', | |
| 433 'Data' => shift @$HSPinfo}); | |
| 434 $self->element({'Name' => 'Hsp_evalue', | |
| 435 'Data' => shift @$HSPinfo}); | |
| 436 $self->element({'Name' => 'Hsp_identity', | |
| 437 'Data' => 0}); | |
| 438 $self->element({'Name' => 'Hsp_positive', | |
| 439 'Data' => 0}); | |
| 440 $self->element({'Name' => 'Hsp_positive', | |
| 441 'Data' => 0}); | |
| 442 $self->end_element({'Name' => 'Hsp'}); | |
| 443 $self->end_element({'Name' => 'Hit'}); | |
| 444 } | |
| 445 @hitinfo = (); | |
| 446 %hitinfo = (); | |
| 447 last; | |
| 448 } | |
| 449 } elsif( defined $self->{'_reporttype'} && | |
| 450 $self->{'_reporttype'} eq 'HMMPFAM' ) { | |
| 451 if( /^Scores for sequence family/o ) { | |
| 452 while( defined($_ = $self->_readline) ) { | |
| 453 last if( /^\s+$/); | |
| 454 next if( /^Model\s+Description/o || /^\-\-\-/o ); | |
| 455 chomp; | |
| 456 my @line = split; | |
| 457 my ($model,$n,$evalue,$score) = (shift @line, | |
| 458 pop @line, | |
| 459 pop @line, | |
| 460 pop @line); | |
| 461 my $desc = join(' ', @line); | |
| 462 push @hitinfo, [ $model, $desc,$score,$evalue,$n]; | |
| 463 $hitinfo{$model} = $#hitinfo; | |
| 464 } | |
| 465 } elsif( /^Parsed for domains:/o ) { | |
| 466 @hspinfo = (); | |
| 467 while( defined($_ = $self->_readline) ) { | |
| 468 last if( /^\s+$/); | |
| 469 next if( /^Model\s+Domain/o || /^\-\-\-/o ); | |
| 470 chomp; | |
| 471 if( my ($n,$domainnum,$domainct,@vals) = | |
| 472 (m!^(\S+)\s+ # domain name | |
| 473 \s+(\d+)/(\d+)\s+ # domain num out of num | |
| 474 (\d+)\s+(\d+).+? # seq start, end | |
| 475 (\d+)\s+(\d+)\s+ # hmm start, end | |
| 476 \S+\s+ # [] | |
| 477 (\S+)\s+ # score | |
| 478 (\S+) # evalue | |
| 479 \s*$!ox ) ) { | |
| 480 my $hindex = $hitinfo{$n}; | |
| 481 if( ! defined $hindex ) { | |
| 482 push @hitinfo, [ $n, '',$vals[5],$vals[6], | |
| 483 $domainct]; | |
| 484 $hitinfo{$n} = $#hitinfo; | |
| 485 $hindex = $#hitinfo; | |
| 486 } | |
| 487 my $info = $hitinfo[$hindex]; | |
| 488 if( ! defined $info ) { | |
| 489 $self->warn("incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}"); | |
| 490 next; | |
| 491 } | |
| 492 push @hspinfo, [ $n, @vals]; | |
| 493 } | |
| 494 } | |
| 495 } elsif( /^Alignments of top/o ) { | |
| 496 my ($prelength,$lastdomain,$count,$width); | |
| 497 $count = 0; | |
| 498 my $second_tier=0; | |
| 499 while( defined($_ = $self->_readline) ) { | |
| 500 next if( /^Align/o || /^\s+RF\s+[x\s]+$/o); | |
| 501 | |
| 502 if( /^Histogram/o || m!^//!o || /^Query sequence/o ) { | |
| 503 if( $self->in_element('hsp')) { | |
| 504 $self->end_element({'Name' => 'Hsp'}); | |
| 505 } | |
| 506 if( $self->in_element('hit') ) { | |
| 507 $self->end_element({'Name' => 'Hit'}); | |
| 508 } | |
| 509 $self->end_element({'Name' => 'HMMER_Output'}); | |
| 510 if( /^Query sequence/o ) { $self->_pushback($_); } | |
| 511 return $self->end_document(); | |
| 512 last; | |
| 513 } | |
| 514 chomp; | |
| 515 if( m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o ) { | |
| 516 my ($name,$from,$to) = ($1,$2,$3); | |
| 517 if( ! defined $lastdomain || $lastdomain ne $name ) { | |
| 518 | |
| 519 if( $self->within_element('hit') ) { | |
| 520 if( $self->in_element('hsp') ) { | |
| 521 $self->end_element({'Name' => 'Hsp'}); | |
| 522 } | |
| 523 $self->end_element({'Name' => 'Hit'}); | |
| 524 } | |
| 525 $self->start_element({'Name' => 'Hit'}); | |
| 526 my $info = [@{$hitinfo[$hitinfo{$name}]}]; | |
| 527 if( ! defined $info || | |
| 528 $info->[0] ne $name ) { | |
| 529 $self->warn("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name). We're | |
| 530 back loading this from the alignment information instead"); | |
| 531 $info = [$name, '', | |
| 532 /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox]; | |
| 533 push @hitinfo, $info; | |
| 534 $hitinfo{$name} = $#hitinfo; | |
| 535 } | |
| 536 | |
| 537 $self->element({'Name' => 'Hit_id', | |
| 538 'Data' => shift @{$info}}); | |
| 539 $self->element({'Name' => 'Hit_desc', | |
| 540 'Data' => shift @{$info}}); | |
| 541 $self->element({'Name' => 'Hit_score', | |
| 542 'Data' => shift @{$info}}); | |
| 543 $self->element({'Name' => 'Hit_signif', | |
| 544 'Data' => shift @{$info}}); | |
| 545 } | |
| 546 if( $self->within_element('hsp') ) { | |
| 547 #if( defined $lastdomain ) { | |
| 548 $self->end_element({'Name' => 'Hsp'}); | |
| 549 } | |
| 550 $self->start_element({'Name' => 'Hsp'}); | |
| 551 $self->element({'Name' => 'Hsp_identity', | |
| 552 'Data' => 0}); | |
| 553 $self->element({'Name' => 'Hsp_positive', | |
| 554 'Data' => 0}); | |
| 555 my $HSPinfo = shift @hspinfo; | |
| 556 my $id = shift @$HSPinfo; | |
| 557 | |
| 558 if( $id ne $name ) { | |
| 559 $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)"); | |
| 560 } | |
| 561 $self->element({'Name' => 'Hsp_query-from', | |
| 562 'Data' => shift @$HSPinfo}); | |
| 563 $self->element({'Name' => 'Hsp_query-to', | |
| 564 'Data' => shift @$HSPinfo}); | |
| 565 $self->element({'Name' => 'Hsp_hit-from', | |
| 566 'Data' => shift @$HSPinfo}); | |
| 567 $self->element({'Name' => 'Hsp_hit-to', | |
| 568 'Data' => shift @$HSPinfo}); | |
| 569 $self->element({'Name' => 'Hsp_score', | |
| 570 'Data' => shift @$HSPinfo}); | |
| 571 $self->element({'Name' => 'Hsp_evalue', | |
| 572 'Data' => shift @$HSPinfo}); | |
| 573 | |
| 574 $lastdomain = $name; | |
| 575 } else { | |
| 576 if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain | |
| 577 $prelength = CORE::length($1); | |
| 578 $width = 0; | |
| 579 # $width = CORE::length($2); | |
| 580 $self->element({'Name' =>'Hsp_hseq', | |
| 581 'Data' => $2}); | |
| 582 $count = 0; | |
| 583 $second_tier = 0; | |
| 584 } elsif ( /^(\s+)(\S+)\<\-?\*?\s*$/o ) { #end of domain | |
| 585 $prelength -= 3 unless ($second_tier++); | |
| 586 $self->element({'Name' =>'Hsp_hseq', | |
| 587 'Data' => $2}); | |
| 588 $width = CORE::length($2); | |
| 589 $count = 0; | |
| 590 } elsif( CORE::length($_) == 0 || | |
| 591 ($count != 1 && /^\s+$/o) || | |
| 592 /^\s+\-?\*\s*$/ ) { | |
| 593 next; | |
| 594 } elsif( $count == 0 ) { | |
| 595 $prelength -= 3 unless ($second_tier++); | |
| 596 unless( defined $prelength) { | |
| 597 # $self->warn("prelength not set"); | |
| 598 next; | |
| 599 } | |
| 600 $self->element({'Name' => 'Hsp_hseq', | |
| 601 'Data' => substr($_,$prelength)}); | |
| 602 } elsif( $count == 1 ) { | |
| 603 if( ! defined $prelength ) { | |
| 604 $self->warn("prelength not set"); | |
| 605 } | |
| 606 if( $width ) { | |
| 607 $self->element({'Name' => 'Hsp_midline', | |
| 608 'Data' => substr($_,$prelength,$width)}); | |
| 609 } else { | |
| 610 $self->element({'Name' => 'Hsp_midline', | |
| 611 'Data' => substr($_,$prelength)}); | |
| 612 } | |
| 613 } elsif( $count == 2 ) { | |
| 614 if( /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o || | |
| 615 /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o | |
| 616 ) { | |
| 617 $self->element({'Name' => 'Hsp_qseq', | |
| 618 'Data' => $3}); | |
| 619 } else { | |
| 620 $self->warn("unrecognized line ($count): $_\n"); | |
| 621 } | |
| 622 } | |
| 623 $count = 0 if $count++ >= 2; | |
| 624 } | |
| 625 } | |
| 626 } else { | |
| 627 $self->debug($_); | |
| 628 } | |
| 629 } | |
| 630 $last = $_; | |
| 631 } | |
| 632 | |
| 633 $self->end_element({'Name' => 'HMMER_Output'}) unless ! $seentop; | |
| 634 return $self->end_document(); | |
| 635 } | |
| 636 | |
| 637 =head2 start_element | |
| 638 | |
| 639 Title : start_element | |
| 640 Usage : $eventgenerator->start_element | |
| 641 Function: Handles a start element event | |
| 642 Returns : none | |
| 643 Args : hashref with at least 2 keys 'Data' and 'Name' | |
| 644 | |
| 645 | |
| 646 =cut | |
| 647 | |
| 648 sub start_element{ | |
| 649 my ($self,$data) = @_; | |
| 650 # we currently don't care about attributes | |
| 651 my $nm = $data->{'Name'}; | |
| 652 my $type = $MODEMAP{$nm}; | |
| 653 if( $type ) { | |
| 654 if( $self->_eventHandler->will_handle($type) ) { | |
| 655 my $func = sprintf("start_%s",lc $type); | |
| 656 $self->_eventHandler->$func($data->{'Attributes'}); | |
| 657 } | |
| 658 unshift @{$self->{'_elements'}}, $type; | |
| 659 } | |
| 660 if(defined $type && | |
| 661 $type eq 'result') { | |
| 662 $self->{'_values'} = {}; | |
| 663 $self->{'_result'}= undef; | |
| 664 } | |
| 665 } | |
| 666 | |
| 667 =head2 end_element | |
| 668 | |
| 669 Title : start_element | |
| 670 Usage : $eventgenerator->end_element | |
| 671 Function: Handles an end element event | |
| 672 Returns : none | |
| 673 Args : hashref with at least 2 keys 'Data' and 'Name' | |
| 674 | |
| 675 | |
| 676 =cut | |
| 677 | |
| 678 sub end_element { | |
| 679 my ($self,$data) = @_; | |
| 680 my $nm = $data->{'Name'}; | |
| 681 my $type = $MODEMAP{$nm}; | |
| 682 my $rc; | |
| 683 | |
| 684 if($nm eq 'HMMER_program' ) { | |
| 685 if( $self->{'_last_data'} =~ /(HMM\S+)/i ) { | |
| 686 $self->{'_reporttype'} = uc $1; | |
| 687 } | |
| 688 } | |
| 689 # Hsp are sort of weird, in that they end when another | |
| 690 # object begins so have to detect this in end_element for now | |
| 691 if( $nm eq 'Hsp' ) { | |
| 692 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) { | |
| 693 $self->element({'Name' => $_, | |
| 694 'Data' => $self->{'_last_hspdata'}->{$_}}); | |
| 695 } | |
| 696 $self->{'_last_hspdata'} = {}; | |
| 697 } | |
| 698 if( $type ) { | |
| 699 if( $self->_eventHandler->will_handle($type) ) { | |
| 700 my $func = sprintf("end_%s",lc $type); | |
| 701 $rc = $self->_eventHandler->$func($self->{'_reporttype'}, | |
| 702 $self->{'_values'}); | |
| 703 } | |
| 704 my $lastelem = shift @{$self->{'_elements'}}; | |
| 705 } elsif( $MAPPING{$nm} ) { | |
| 706 if ( ref($MAPPING{$nm}) =~ /hash/i ) { | |
| 707 my $key = (keys %{$MAPPING{$nm}})[0]; | |
| 708 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'}; | |
| 709 } else { | |
| 710 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'}; | |
| 711 } | |
| 712 } else { | |
| 713 $self->debug( "unknown nm $nm, ignoring\n"); | |
| 714 } | |
| 715 $self->{'_last_data'} = ''; # remove read data if we are at | |
| 716 # end of an element | |
| 717 $self->{'_result'} = $rc if( defined $type && $type eq 'result' ); | |
| 718 return $rc; | |
| 719 } | |
| 720 | |
| 721 =head2 element | |
| 722 | |
| 723 Title : element | |
| 724 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); | |
| 725 Function: Convience method that calls start_element, characters, end_element | |
| 726 Returns : none | |
| 727 Args : Hash ref with the keys 'Name' and 'Data' | |
| 728 | |
| 729 | |
| 730 =cut | |
| 731 | |
| 732 sub element{ | |
| 733 my ($self,$data) = @_; | |
| 734 $self->start_element($data); | |
| 735 $self->characters($data); | |
| 736 $self->end_element($data); | |
| 737 } | |
| 738 | |
| 739 =head2 characters | |
| 740 | |
| 741 Title : characters | |
| 742 Usage : $eventgenerator->characters($str) | |
| 743 Function: Send a character events | |
| 744 Returns : none | |
| 745 Args : string | |
| 746 | |
| 747 | |
| 748 =cut | |
| 749 | |
| 750 sub characters{ | |
| 751 my ($self,$data) = @_; | |
| 752 | |
| 753 if( $self->in_element('hsp') && | |
| 754 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o && | |
| 755 defined $data->{'Data'} ) { | |
| 756 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'}; | |
| 757 } | |
| 758 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); | |
| 759 | |
| 760 $self->{'_last_data'} = $data->{'Data'}; | |
| 761 } | |
| 762 | |
| 763 =head2 within_element | |
| 764 | |
| 765 Title : within_element | |
| 766 Usage : if( $eventgenerator->within_element($element) ) {} | |
| 767 Function: Test if we are within a particular element | |
| 768 This is different than 'in' because within can be tested | |
| 769 for a whole block. | |
| 770 Returns : boolean | |
| 771 Args : string element name | |
| 772 | |
| 773 | |
| 774 =cut | |
| 775 | |
| 776 sub within_element{ | |
| 777 my ($self,$name) = @_; | |
| 778 return 0 if ( ! defined $name || | |
| 779 ! defined $self->{'_elements'} || | |
| 780 scalar @{$self->{'_elements'}} == 0) ; | |
| 781 foreach ( @{$self->{'_elements'}} ) { | |
| 782 return 1 if( $_ eq $name ); | |
| 783 } | |
| 784 return 0; | |
| 785 } | |
| 786 | |
| 787 | |
| 788 =head2 in_element | |
| 789 | |
| 790 Title : in_element | |
| 791 Usage : if( $eventgenerator->in_element($element) ) {} | |
| 792 Function: Test if we are in a particular element | |
| 793 This is different than 'within' because 'in' only | |
| 794 tests its immediete parent. | |
| 795 Returns : boolean | |
| 796 Args : string element name | |
| 797 | |
| 798 | |
| 799 =cut | |
| 800 | |
| 801 sub in_element{ | |
| 802 my ($self,$name) = @_; | |
| 803 return 0 if ! defined $self->{'_elements'}->[0]; | |
| 804 return ( $self->{'_elements'}->[0] eq $name) | |
| 805 } | |
| 806 | |
| 807 =head2 start_document | |
| 808 | |
| 809 Title : start_document | |
| 810 Usage : $eventgenerator->start_document | |
| 811 Function: Handle a start document event | |
| 812 Returns : none | |
| 813 Args : none | |
| 814 | |
| 815 | |
| 816 =cut | |
| 817 | |
| 818 sub start_document{ | |
| 819 my ($self) = @_; | |
| 820 $self->{'_lasttype'} = ''; | |
| 821 $self->{'_values'} = {}; | |
| 822 $self->{'_result'}= undef; | |
| 823 $self->{'_elements'} = []; | |
| 824 } | |
| 825 | |
| 826 =head2 end_document | |
| 827 | |
| 828 Title : end_document | |
| 829 Usage : $eventgenerator->end_document | |
| 830 Function: Handles an end document event | |
| 831 Returns : Bio::Search::Result::ResultI object | |
| 832 Args : none | |
| 833 | |
| 834 | |
| 835 =cut | |
| 836 | |
| 837 sub end_document{ | |
| 838 my ($self) = @_; | |
| 839 return $self->{'_result'}; | |
| 840 } | |
| 841 | |
| 842 =head2 result_count | |
| 843 | |
| 844 Title : result_count | |
| 845 Usage : my $count = $searchio->result_count | |
| 846 Function: Returns the number of results we have processed | |
| 847 Returns : integer | |
| 848 Args : none | |
| 849 | |
| 850 | |
| 851 =cut | |
| 852 | |
| 853 sub result_count { | |
| 854 my $self = shift; | |
| 855 return $self->{'_result_count'}; | |
| 856 } | |
| 857 | |
| 858 1; |
