Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/SearchIO/hmmer.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: hmmer.pm,v 1.13.2.3 2003/08/07 15:40:36 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::SearchIO::hmmer | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::SearchIO::hmmer - A parser for HMMER output (hmmpfam, hmmsearch) | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # do not use this class directly it is available through Bio::SearchIO | |
20 use Bio::SearchIO; | |
21 my $in = new Bio::SearchIO(-format => 'hmmer', | |
22 -file => 't/data/L77119.hmmer'); | |
23 while( my $result = $in->next_result ) { | |
24 # this is a Bio::Search::Result::HMMERResult object | |
25 print $result->query_name(), " for HMM ", $result->hmm_name(), "\n"; | |
26 while( my $hit = $result->next_hit ) { | |
27 print $hit->name(), "\n"; | |
28 while( my $hsp = $hit->next_hsp ) { | |
29 print "length is ", $hsp->length(), "\n"; | |
30 } | |
31 } | |
32 } | |
33 | |
34 =head1 DESCRIPTION | |
35 | |
36 This object implements a parser for HMMER output. | |
37 | |
38 =head1 FEEDBACK | |
39 | |
40 =head2 Mailing Lists | |
41 | |
42 User feedback is an integral part of the evolution of this and other | |
43 Bioperl modules. Send your comments and suggestions preferably to | |
44 the Bioperl mailing list. Your participation is much appreciated. | |
45 | |
46 bioperl-l@bioperl.org - General discussion | |
47 http://bioperl.org/MailList.shtml - About the mailing lists | |
48 | |
49 =head2 Reporting Bugs | |
50 | |
51 Report bugs to the Bioperl bug tracking system to help us keep track | |
52 of the bugs and their resolution. Bug reports can be submitted via | |
53 email or the web: | |
54 | |
55 bioperl-bugs@bioperl.org | |
56 http://bugzilla.bioperl.org/ | |
57 | |
58 =head1 AUTHOR - Jason Stajich | |
59 | |
60 Email jason@bioperl.org | |
61 | |
62 Describe contact details here | |
63 | |
64 =head1 CONTRIBUTORS | |
65 | |
66 Additional contributors names and emails here | |
67 | |
68 =head1 APPENDIX | |
69 | |
70 The rest of the documentation details each of the object methods. | |
71 Internal methods are usually preceded with a _ | |
72 | |
73 =cut | |
74 | |
75 package Bio::SearchIO::hmmer; | |
76 use vars qw(@ISA); | |
77 use strict; | |
78 use vars qw(@ISA %MAPPING %MODEMAP | |
79 $DEFAULT_HSP_FACTORY_CLASS | |
80 $DEFAULT_HIT_FACTORY_CLASS | |
81 $DEFAULT_RESULT_FACTORY_CLASS | |
82 ); | |
83 use Bio::SearchIO; | |
84 use Bio::Factory::ObjectFactory; | |
85 | |
86 @ISA = qw(Bio::SearchIO ); | |
87 | |
88 BEGIN { | |
89 # mapping of HMMER items to Bioperl hash keys | |
90 %MODEMAP = ('HMMER_Output' => 'result', | |
91 'Hit' => 'hit', | |
92 'Hsp' => 'hsp' | |
93 ); | |
94 | |
95 %MAPPING = ( | |
96 'Hsp_bit-score' => 'HSP-bits', | |
97 'Hsp_score' => 'HSP-score', | |
98 'Hsp_evalue' => 'HSP-evalue', | |
99 'Hsp_query-from' => 'HSP-query_start', | |
100 'Hsp_query-to' => 'HSP-query_end', | |
101 'Hsp_hit-from' => 'HSP-hit_start', | |
102 'Hsp_hit-to' => 'HSP-hit_end', | |
103 'Hsp_positive' => 'HSP-conserved', | |
104 'Hsp_identity' => 'HSP-identical', | |
105 'Hsp_gaps' => 'HSP-hsp_gaps', | |
106 'Hsp_hitgaps' => 'HSP-hit_gaps', | |
107 'Hsp_querygaps' => 'HSP-query_gaps', | |
108 'Hsp_qseq' => 'HSP-query_seq', | |
109 'Hsp_hseq' => 'HSP-hit_seq', | |
110 'Hsp_midline' => 'HSP-homology_seq', | |
111 'Hsp_align-len' => 'HSP-hsp_length', | |
112 'Hsp_query-frame'=> 'HSP-query_frame', | |
113 'Hsp_hit-frame' => 'HSP-hit_frame', | |
114 | |
115 'Hit_id' => 'HIT-name', | |
116 'Hit_len' => 'HIT-length', | |
117 'Hit_accession' => 'HIT-accession', | |
118 'Hit_desc' => 'HIT-description', | |
119 'Hit_signif' => 'HIT-significance', | |
120 'Hit_score' => 'HIT-score', | |
121 | |
122 'HMMER_program' => 'RESULT-algorithm_name', | |
123 'HMMER_version' => 'RESULT-algorithm_version', | |
124 'HMMER_query-def' => 'RESULT-query_name', | |
125 'HMMER_query-len' => 'RESULT-query_length', | |
126 'HMMER_query-acc' => 'RESULT-query_accession', | |
127 'HMMER_querydesc' => 'RESULT-query_description', | |
128 'HMMER_hmm' => 'RESULT-hmm_name', | |
129 'HMMER_seqfile' => 'RESULT-sequence_file', | |
130 ); | |
131 $DEFAULT_HIT_FACTORY_CLASS = 'Bio::Factory::HMMERHitFactory'; | |
132 $DEFAULT_HSP_FACTORY_CLASS = 'Bio::Factory::HMMERHSPFactory'; | |
133 $DEFAULT_RESULT_FACTORY_CLASS = 'Bio::Factory::HMMERResultFactory'; | |
134 } | |
135 | |
136 | |
137 =head2 new | |
138 | |
139 Title : new | |
140 Usage : my $obj = new Bio::SearchIO::hmmer(); | |
141 Function: Builds a new Bio::SearchIO::hmmer object | |
142 Returns : Bio::SearchIO::hmmer | |
143 Args : -fh/-file => HMMER filename | |
144 -format => 'hmmer' | |
145 | |
146 =cut | |
147 | |
148 sub _initialize { | |
149 my ($self,@args) = @_; | |
150 $self->SUPER::_initialize(@args); | |
151 my $handler = $self->_eventHandler; | |
152 # new object initialization code | |
153 $handler->register_factory('result', | |
154 Bio::Factory::ObjectFactory->new( | |
155 -type => 'Bio::Search::Result::HMMERResult', | |
156 -interface => 'Bio::Search::Result::ResultI')); | |
157 | |
158 $handler->register_factory('hit', | |
159 Bio::Factory::ObjectFactory->new( | |
160 -type => 'Bio::Search::Hit::HMMERHit', | |
161 -interface => 'Bio::Search::Hit::HitI')); | |
162 | |
163 $handler->register_factory('hsp', | |
164 Bio::Factory::ObjectFactory->new( | |
165 -type => 'Bio::Search::HSP::HMMERHSP', | |
166 -interface => 'Bio::Search::HSP::HSPI')); | |
167 $self->{'_hmmidline'} = 'HMMER 2.2g (August 2001)'; | |
168 } | |
169 =head2 next_result | |
170 | |
171 Title : next_result | |
172 Usage : my $hit = $searchio->next_result; | |
173 Function: Returns the next Result from a search | |
174 Returns : Bio::Search::Result::ResultI object | |
175 Args : none | |
176 | |
177 =cut | |
178 | |
179 sub next_result{ | |
180 my ($self) = @_; | |
181 my $seentop = 0; | |
182 my $reporttype; | |
183 my ($last,@hitinfo,@hspinfo,%hspinfo,%hitinfo); | |
184 | |
185 my @alignemnt_lines; | |
186 | |
187 $self->start_document(); | |
188 while( defined ($_ = $self->_readline )) { | |
189 my $lineorig = $_; | |
190 chomp; | |
191 if( /^HMMER\s+(\S+)\s+\((.+)\)/o ) { | |
192 my ($prog,$version) = split; | |
193 if( $seentop ) { | |
194 $self->_pushback($_); | |
195 $self->end_element({'Name' => 'HMMER_Output'}); | |
196 return $self->end_document(); | |
197 } | |
198 $self->{'_hmmidline'} = $_; | |
199 $self->start_element({'Name' => 'HMMER_Output'}); | |
200 $self->{'_result_count'}++; | |
201 $seentop = 1; | |
202 if( defined $last ) { | |
203 ($reporttype) = split(/\s+/,$last); | |
204 $self->element({'Name' => 'HMMER_program', | |
205 'Data' => uc ($reporttype)}); | |
206 } | |
207 $self->element({'Name' => 'HMMER_version', | |
208 'Data' => $version}); | |
209 } elsif( s/^HMM file:\s+//o ) { | |
210 $self->{'_hmmfileline'} = $lineorig; | |
211 $self->element({'Name' => 'HMMER_hmm', | |
212 'Data' => $_}); | |
213 } elsif( s/^Sequence\s+(file|database):\s+//o ) { | |
214 $self->{'_hmmseqline'} = $lineorig; | |
215 $self->element({'Name' => 'HMMER_seqfile', | |
216 'Data' => $_}); | |
217 } elsif( s/^Query(\s+(sequence|HMM))?:\s+//o) { | |
218 if( ! $seentop ) { | |
219 # we're in a multi-query report | |
220 $self->_pushback($self->{'_hmmidline'}); | |
221 $self->_pushback($self->{'_hmmfileline'}); | |
222 $self->_pushback($self->{'_hmmseqline'}); | |
223 $self->_pushback($lineorig); | |
224 next; | |
225 } | |
226 s/\s+$//; | |
227 $self->element({'Name' => 'HMMER_query-def', | |
228 'Data' => $_}); | |
229 } elsif( s/^Accession:\s+//o ) { | |
230 s/\s+$//; | |
231 $self->element({'Name' => 'HMMER_query-acc', | |
232 'Data' => $_}); | |
233 } elsif( s/^Description:\s+//o ) { | |
234 s/\s+$//; | |
235 $self->element({'Name' => 'HMMER_querydesc', | |
236 'Data' => $_}); | |
237 } elsif( defined $self->{'_reporttype'} && | |
238 $self->{'_reporttype'} eq 'HMMSEARCH' ) { | |
239 # PROCESS HMMSEARCH RESULTS HERE | |
240 if( /^Scores for complete sequences/o ) { | |
241 while( defined($_ = $self->_readline) ) { | |
242 last if( /^\s+$/); | |
243 next if( /^Sequence\s+Description/o || /^\-\-\-/o ); | |
244 my @line = split; | |
245 my ($name, $n,$evalue,$score)= (shift @line, | |
246 pop @line, | |
247 pop @line, | |
248 pop @line); | |
249 my $desc = join(' ', @line); | |
250 push @hitinfo, [ $name, $desc,$evalue,$score]; | |
251 $hitinfo{$name} = $#hitinfo; | |
252 } | |
253 } elsif( /^Parsed for domains:/o ) { | |
254 @hspinfo = (); | |
255 | |
256 while( defined($_ = $self->_readline) ) { | |
257 last if( /^\s+$/); | |
258 next if( /^(Model|Sequence)\s+Domain/o || /^\-\-\-/o ); | |
259 if( my ($n,$domainnum,$domainct, @vals) = | |
260 (m!^(\S+)\s+ # host name | |
261 (\d+)/(\d+)\s+ # num/num (ie 1 of 2) | |
262 (\d+)\s+(\d+).+? # sequence start and end | |
263 (\d+)\s+(\d+)\s+ # hmm start and end | |
264 \S+\s+ # [] | |
265 (\S+)\s+ # score | |
266 (\S+) # evalue | |
267 \s*$!ox ) ) { | |
268 # array lookup so that we can get rid of things | |
269 # when they've been processed | |
270 my $info = $hitinfo[$hitinfo{$n}]; | |
271 if( !defined $info ) { | |
272 $self->warn("Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}"); | |
273 next; | |
274 } | |
275 push @hspinfo, [ $n, @vals]; | |
276 } | |
277 } | |
278 } elsif( /^Alignments of top/o ) { | |
279 my ($prelength,$lastdomain,$count,$width); | |
280 $count = 0; | |
281 my %domaincounter; | |
282 my $second_tier=0; | |
283 while( defined($_ = $self->_readline) ) { | |
284 next if( /^Align/o || | |
285 /^\s+RF\s+[x\s]+$/o); | |
286 if( /^Histogram/o || m!^//!o ) { | |
287 if( $self->in_element('hsp')) { | |
288 $self->end_element({'Name' => 'Hsp'}); | |
289 } | |
290 if( $self->within_element('hit')) { | |
291 $self->end_element({'Name' => 'Hit'}); | |
292 } | |
293 last; | |
294 } | |
295 chomp; | |
296 | |
297 if( /^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+from\s+(\d+)\s+to\s+(\d+)/o ) { | |
298 my ($name,$domainct,$domaintotal, | |
299 $from,$to) = ($1,$2,$3,$4,$5); | |
300 $domaincounter{$name}++; | |
301 if( ! defined $lastdomain || $lastdomain ne $name ) { | |
302 if( $self->within_element('hit') ) { | |
303 if( $self->within_element('hsp') ) { | |
304 $self->end_element({'Name' => 'Hsp'}); | |
305 } | |
306 $self->end_element({'Name' => 'Hit'}); | |
307 } | |
308 $self->start_element({'Name' => 'Hit'}); | |
309 my $info = [@{$hitinfo[$hitinfo{$name}] || $self->throw("Could not find hit info for $name: Insure that your database contains only unique sequence names")}]; | |
310 if( $info->[0] ne $name ) { | |
311 $self->throw("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name)"); | |
312 } | |
313 $self->element({'Name' => 'Hit_id', | |
314 'Data' => shift @{$info}}); | |
315 $self->element({'Name' => 'Hit_desc', | |
316 'Data' => shift @{$info}}); | |
317 $self->element({'Name' => 'Hit_signif', | |
318 'Data' => shift @{$info}}); | |
319 $self->element({'Name' => 'Hit_score', | |
320 'Data' => shift @{$info}}); | |
321 } | |
322 $self->end_element({'Name' => 'Hsp'}) | |
323 if $self->in_element('hsp'); | |
324 | |
325 $self->start_element({'Name' => 'Hsp'}); | |
326 $self->element({'Name' => 'Hsp_identity', | |
327 'Data' => 0}); | |
328 $self->element({'Name' => 'Hsp_positive', | |
329 'Data' => 0}); | |
330 my $HSPinfo = shift @hspinfo; | |
331 my $id = shift @$HSPinfo; | |
332 if( $id ne $name ) { | |
333 $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)"); | |
334 } | |
335 if( $domaincounter{$name} == $domaintotal) { | |
336 $hitinfo[$hitinfo{$name}] = undef; | |
337 } | |
338 | |
339 $self->element({'Name' => 'Hsp_hit-from', | |
340 'Data' => shift @$HSPinfo}); | |
341 $self->element({'Name' => 'Hsp_hit-to', | |
342 'Data' => shift @$HSPinfo}); | |
343 $self->element({'Name' => 'Hsp_query-from', | |
344 'Data' => shift @$HSPinfo}); | |
345 $self->element({'Name' => 'Hsp_query-to', | |
346 'Data' => shift @$HSPinfo}); | |
347 $self->element({'Name' => 'Hsp_score', | |
348 'Data' => shift @$HSPinfo}); | |
349 $self->element({'Name' => 'Hsp_evalue', | |
350 'Data' => shift @$HSPinfo}); | |
351 $lastdomain = $name; | |
352 } else { | |
353 # Might want to change this so that it | |
354 # accumulates all the of the alignment lines into | |
355 # three array slots and then tests for the | |
356 # end of the line | |
357 if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain | |
358 $prelength = CORE::length($1); | |
359 $width = 0; | |
360 # $width = CORE::length($2); | |
361 $self->element({'Name' =>'Hsp_qseq', | |
362 'Data' => $2}); | |
363 $count = 0; | |
364 $second_tier = 0; | |
365 } elsif ( /^(\s+)(\S+)\<\-\*\s*$/o ) { #end of domain | |
366 $self->element({'Name' =>'Hsp_qseq', | |
367 'Data' => $2}); | |
368 $width = CORE::length($2); | |
369 $count = 0; | |
370 } elsif( ($count != 1 && /^\s+$/o) || | |
371 CORE::length($_) == 0 || | |
372 /^\s+\-?\*\s*$/ ) { | |
373 next; | |
374 } elsif( $count == 0 ) { | |
375 $prelength -= 3 unless ($second_tier++); | |
376 unless( defined $prelength) { | |
377 # $self->warn("prelength not set"); | |
378 next; | |
379 } | |
380 $self->element({'Name' => 'Hsp_qseq', | |
381 'Data' => substr($_,$prelength)}); | |
382 } elsif( $count == 1) { | |
383 if( ! defined $prelength ) { | |
384 $self->warn("prelength not set"); | |
385 } | |
386 if( $width ) { | |
387 $self->element({'Name' => 'Hsp_midline', | |
388 'Data' => substr($_, | |
389 $prelength, | |
390 $width)}); | |
391 } else { | |
392 $self->debug( "midline is $_\n") if( CORE::length($_) <= $prelength && $self->verbose > 0); | |
393 $self->element({'Name' => 'Hsp_midline', | |
394 'Data' => substr($_, | |
395 $prelength)}); | |
396 } | |
397 } elsif( $count == 2) { | |
398 if( /^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o ) { | |
399 $self->element({'Name' => 'Hsp_hseq', | |
400 'Data' => $3}); | |
401 } else { | |
402 $self->warn("unrecognized line: $_\n"); | |
403 } | |
404 } | |
405 $count = 0 if $count++ >= 2; | |
406 } | |
407 } | |
408 } elsif( /^Histogram/o || m!^//!o ) { | |
409 | |
410 while( my $HSPinfo = shift @hspinfo ) { | |
411 my $id = shift @$HSPinfo; | |
412 my $info = [@{$hitinfo[$hitinfo{$id}]}]; | |
413 next unless defined $info; | |
414 $self->start_element({'Name' => 'Hit'}); | |
415 $self->element({'Name' => 'Hit_id', | |
416 'Data' => shift @{$info}}); | |
417 $self->element({'Name' => 'Hit_desc', | |
418 'Data' => shift @{$info}}); | |
419 $self->element({'Name' => 'Hit_signif', | |
420 'Data' => shift @{$info}}); | |
421 $self->element({'Name' => 'Hit_score', | |
422 'Data' => shift @{$info}}); | |
423 $self->start_element({'Name' => 'Hsp'}); | |
424 $self->element({'Name' => 'Hsp_query-from', | |
425 'Data' => shift @$HSPinfo}); | |
426 $self->element({'Name' => 'Hsp_query-to', | |
427 'Data' => shift @$HSPinfo}); | |
428 $self->element({'Name' => 'Hsp_hit-from', | |
429 'Data' => shift @$HSPinfo}); | |
430 $self->element({'Name' => 'Hsp_hit-to', | |
431 'Data' => shift @$HSPinfo}); | |
432 $self->element({'Name' => 'Hsp_score', | |
433 'Data' => shift @$HSPinfo}); | |
434 $self->element({'Name' => 'Hsp_evalue', | |
435 'Data' => shift @$HSPinfo}); | |
436 $self->element({'Name' => 'Hsp_identity', | |
437 'Data' => 0}); | |
438 $self->element({'Name' => 'Hsp_positive', | |
439 'Data' => 0}); | |
440 $self->element({'Name' => 'Hsp_positive', | |
441 'Data' => 0}); | |
442 $self->end_element({'Name' => 'Hsp'}); | |
443 $self->end_element({'Name' => 'Hit'}); | |
444 } | |
445 @hitinfo = (); | |
446 %hitinfo = (); | |
447 last; | |
448 } | |
449 } elsif( defined $self->{'_reporttype'} && | |
450 $self->{'_reporttype'} eq 'HMMPFAM' ) { | |
451 if( /^Scores for sequence family/o ) { | |
452 while( defined($_ = $self->_readline) ) { | |
453 last if( /^\s+$/); | |
454 next if( /^Model\s+Description/o || /^\-\-\-/o ); | |
455 chomp; | |
456 my @line = split; | |
457 my ($model,$n,$evalue,$score) = (shift @line, | |
458 pop @line, | |
459 pop @line, | |
460 pop @line); | |
461 my $desc = join(' ', @line); | |
462 push @hitinfo, [ $model, $desc,$score,$evalue,$n]; | |
463 $hitinfo{$model} = $#hitinfo; | |
464 } | |
465 } elsif( /^Parsed for domains:/o ) { | |
466 @hspinfo = (); | |
467 while( defined($_ = $self->_readline) ) { | |
468 last if( /^\s+$/); | |
469 next if( /^Model\s+Domain/o || /^\-\-\-/o ); | |
470 chomp; | |
471 if( my ($n,$domainnum,$domainct,@vals) = | |
472 (m!^(\S+)\s+ # domain name | |
473 \s+(\d+)/(\d+)\s+ # domain num out of num | |
474 (\d+)\s+(\d+).+? # seq start, end | |
475 (\d+)\s+(\d+)\s+ # hmm start, end | |
476 \S+\s+ # [] | |
477 (\S+)\s+ # score | |
478 (\S+) # evalue | |
479 \s*$!ox ) ) { | |
480 my $hindex = $hitinfo{$n}; | |
481 if( ! defined $hindex ) { | |
482 push @hitinfo, [ $n, '',$vals[5],$vals[6], | |
483 $domainct]; | |
484 $hitinfo{$n} = $#hitinfo; | |
485 $hindex = $#hitinfo; | |
486 } | |
487 my $info = $hitinfo[$hindex]; | |
488 if( ! defined $info ) { | |
489 $self->warn("incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}"); | |
490 next; | |
491 } | |
492 push @hspinfo, [ $n, @vals]; | |
493 } | |
494 } | |
495 } elsif( /^Alignments of top/o ) { | |
496 my ($prelength,$lastdomain,$count,$width); | |
497 $count = 0; | |
498 my $second_tier=0; | |
499 while( defined($_ = $self->_readline) ) { | |
500 next if( /^Align/o || /^\s+RF\s+[x\s]+$/o); | |
501 | |
502 if( /^Histogram/o || m!^//!o || /^Query sequence/o ) { | |
503 if( $self->in_element('hsp')) { | |
504 $self->end_element({'Name' => 'Hsp'}); | |
505 } | |
506 if( $self->in_element('hit') ) { | |
507 $self->end_element({'Name' => 'Hit'}); | |
508 } | |
509 $self->end_element({'Name' => 'HMMER_Output'}); | |
510 if( /^Query sequence/o ) { $self->_pushback($_); } | |
511 return $self->end_document(); | |
512 last; | |
513 } | |
514 chomp; | |
515 if( m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o ) { | |
516 my ($name,$from,$to) = ($1,$2,$3); | |
517 if( ! defined $lastdomain || $lastdomain ne $name ) { | |
518 | |
519 if( $self->within_element('hit') ) { | |
520 if( $self->in_element('hsp') ) { | |
521 $self->end_element({'Name' => 'Hsp'}); | |
522 } | |
523 $self->end_element({'Name' => 'Hit'}); | |
524 } | |
525 $self->start_element({'Name' => 'Hit'}); | |
526 my $info = [@{$hitinfo[$hitinfo{$name}]}]; | |
527 if( ! defined $info || | |
528 $info->[0] ne $name ) { | |
529 $self->warn("Somehow the Model table order does not match the order in the domains (got ".$info->[0].", expected $name). We're | |
530 back loading this from the alignment information instead"); | |
531 $info = [$name, '', | |
532 /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox]; | |
533 push @hitinfo, $info; | |
534 $hitinfo{$name} = $#hitinfo; | |
535 } | |
536 | |
537 $self->element({'Name' => 'Hit_id', | |
538 'Data' => shift @{$info}}); | |
539 $self->element({'Name' => 'Hit_desc', | |
540 'Data' => shift @{$info}}); | |
541 $self->element({'Name' => 'Hit_score', | |
542 'Data' => shift @{$info}}); | |
543 $self->element({'Name' => 'Hit_signif', | |
544 'Data' => shift @{$info}}); | |
545 } | |
546 if( $self->within_element('hsp') ) { | |
547 #if( defined $lastdomain ) { | |
548 $self->end_element({'Name' => 'Hsp'}); | |
549 } | |
550 $self->start_element({'Name' => 'Hsp'}); | |
551 $self->element({'Name' => 'Hsp_identity', | |
552 'Data' => 0}); | |
553 $self->element({'Name' => 'Hsp_positive', | |
554 'Data' => 0}); | |
555 my $HSPinfo = shift @hspinfo; | |
556 my $id = shift @$HSPinfo; | |
557 | |
558 if( $id ne $name ) { | |
559 $self->throw("Somehow the domain list details do not match the table (got $id, expected $name)"); | |
560 } | |
561 $self->element({'Name' => 'Hsp_query-from', | |
562 'Data' => shift @$HSPinfo}); | |
563 $self->element({'Name' => 'Hsp_query-to', | |
564 'Data' => shift @$HSPinfo}); | |
565 $self->element({'Name' => 'Hsp_hit-from', | |
566 'Data' => shift @$HSPinfo}); | |
567 $self->element({'Name' => 'Hsp_hit-to', | |
568 'Data' => shift @$HSPinfo}); | |
569 $self->element({'Name' => 'Hsp_score', | |
570 'Data' => shift @$HSPinfo}); | |
571 $self->element({'Name' => 'Hsp_evalue', | |
572 'Data' => shift @$HSPinfo}); | |
573 | |
574 $lastdomain = $name; | |
575 } else { | |
576 if( /^(\s+\*\-\>)(\S+)/o ) { # start of domain | |
577 $prelength = CORE::length($1); | |
578 $width = 0; | |
579 # $width = CORE::length($2); | |
580 $self->element({'Name' =>'Hsp_hseq', | |
581 'Data' => $2}); | |
582 $count = 0; | |
583 $second_tier = 0; | |
584 } elsif ( /^(\s+)(\S+)\<\-?\*?\s*$/o ) { #end of domain | |
585 $prelength -= 3 unless ($second_tier++); | |
586 $self->element({'Name' =>'Hsp_hseq', | |
587 'Data' => $2}); | |
588 $width = CORE::length($2); | |
589 $count = 0; | |
590 } elsif( CORE::length($_) == 0 || | |
591 ($count != 1 && /^\s+$/o) || | |
592 /^\s+\-?\*\s*$/ ) { | |
593 next; | |
594 } elsif( $count == 0 ) { | |
595 $prelength -= 3 unless ($second_tier++); | |
596 unless( defined $prelength) { | |
597 # $self->warn("prelength not set"); | |
598 next; | |
599 } | |
600 $self->element({'Name' => 'Hsp_hseq', | |
601 'Data' => substr($_,$prelength)}); | |
602 } elsif( $count == 1 ) { | |
603 if( ! defined $prelength ) { | |
604 $self->warn("prelength not set"); | |
605 } | |
606 if( $width ) { | |
607 $self->element({'Name' => 'Hsp_midline', | |
608 'Data' => substr($_,$prelength,$width)}); | |
609 } else { | |
610 $self->element({'Name' => 'Hsp_midline', | |
611 'Data' => substr($_,$prelength)}); | |
612 } | |
613 } elsif( $count == 2 ) { | |
614 if( /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o || | |
615 /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o | |
616 ) { | |
617 $self->element({'Name' => 'Hsp_qseq', | |
618 'Data' => $3}); | |
619 } else { | |
620 $self->warn("unrecognized line ($count): $_\n"); | |
621 } | |
622 } | |
623 $count = 0 if $count++ >= 2; | |
624 } | |
625 } | |
626 } else { | |
627 $self->debug($_); | |
628 } | |
629 } | |
630 $last = $_; | |
631 } | |
632 | |
633 $self->end_element({'Name' => 'HMMER_Output'}) unless ! $seentop; | |
634 return $self->end_document(); | |
635 } | |
636 | |
637 =head2 start_element | |
638 | |
639 Title : start_element | |
640 Usage : $eventgenerator->start_element | |
641 Function: Handles a start element event | |
642 Returns : none | |
643 Args : hashref with at least 2 keys 'Data' and 'Name' | |
644 | |
645 | |
646 =cut | |
647 | |
648 sub start_element{ | |
649 my ($self,$data) = @_; | |
650 # we currently don't care about attributes | |
651 my $nm = $data->{'Name'}; | |
652 my $type = $MODEMAP{$nm}; | |
653 if( $type ) { | |
654 if( $self->_eventHandler->will_handle($type) ) { | |
655 my $func = sprintf("start_%s",lc $type); | |
656 $self->_eventHandler->$func($data->{'Attributes'}); | |
657 } | |
658 unshift @{$self->{'_elements'}}, $type; | |
659 } | |
660 if(defined $type && | |
661 $type eq 'result') { | |
662 $self->{'_values'} = {}; | |
663 $self->{'_result'}= undef; | |
664 } | |
665 } | |
666 | |
667 =head2 end_element | |
668 | |
669 Title : start_element | |
670 Usage : $eventgenerator->end_element | |
671 Function: Handles an end element event | |
672 Returns : none | |
673 Args : hashref with at least 2 keys 'Data' and 'Name' | |
674 | |
675 | |
676 =cut | |
677 | |
678 sub end_element { | |
679 my ($self,$data) = @_; | |
680 my $nm = $data->{'Name'}; | |
681 my $type = $MODEMAP{$nm}; | |
682 my $rc; | |
683 | |
684 if($nm eq 'HMMER_program' ) { | |
685 if( $self->{'_last_data'} =~ /(HMM\S+)/i ) { | |
686 $self->{'_reporttype'} = uc $1; | |
687 } | |
688 } | |
689 # Hsp are sort of weird, in that they end when another | |
690 # object begins so have to detect this in end_element for now | |
691 if( $nm eq 'Hsp' ) { | |
692 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) { | |
693 $self->element({'Name' => $_, | |
694 'Data' => $self->{'_last_hspdata'}->{$_}}); | |
695 } | |
696 $self->{'_last_hspdata'} = {}; | |
697 } | |
698 if( $type ) { | |
699 if( $self->_eventHandler->will_handle($type) ) { | |
700 my $func = sprintf("end_%s",lc $type); | |
701 $rc = $self->_eventHandler->$func($self->{'_reporttype'}, | |
702 $self->{'_values'}); | |
703 } | |
704 my $lastelem = shift @{$self->{'_elements'}}; | |
705 } elsif( $MAPPING{$nm} ) { | |
706 if ( ref($MAPPING{$nm}) =~ /hash/i ) { | |
707 my $key = (keys %{$MAPPING{$nm}})[0]; | |
708 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'}; | |
709 } else { | |
710 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'}; | |
711 } | |
712 } else { | |
713 $self->debug( "unknown nm $nm, ignoring\n"); | |
714 } | |
715 $self->{'_last_data'} = ''; # remove read data if we are at | |
716 # end of an element | |
717 $self->{'_result'} = $rc if( defined $type && $type eq 'result' ); | |
718 return $rc; | |
719 } | |
720 | |
721 =head2 element | |
722 | |
723 Title : element | |
724 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); | |
725 Function: Convience method that calls start_element, characters, end_element | |
726 Returns : none | |
727 Args : Hash ref with the keys 'Name' and 'Data' | |
728 | |
729 | |
730 =cut | |
731 | |
732 sub element{ | |
733 my ($self,$data) = @_; | |
734 $self->start_element($data); | |
735 $self->characters($data); | |
736 $self->end_element($data); | |
737 } | |
738 | |
739 =head2 characters | |
740 | |
741 Title : characters | |
742 Usage : $eventgenerator->characters($str) | |
743 Function: Send a character events | |
744 Returns : none | |
745 Args : string | |
746 | |
747 | |
748 =cut | |
749 | |
750 sub characters{ | |
751 my ($self,$data) = @_; | |
752 | |
753 if( $self->in_element('hsp') && | |
754 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o && | |
755 defined $data->{'Data'} ) { | |
756 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'}; | |
757 } | |
758 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); | |
759 | |
760 $self->{'_last_data'} = $data->{'Data'}; | |
761 } | |
762 | |
763 =head2 within_element | |
764 | |
765 Title : within_element | |
766 Usage : if( $eventgenerator->within_element($element) ) {} | |
767 Function: Test if we are within a particular element | |
768 This is different than 'in' because within can be tested | |
769 for a whole block. | |
770 Returns : boolean | |
771 Args : string element name | |
772 | |
773 | |
774 =cut | |
775 | |
776 sub within_element{ | |
777 my ($self,$name) = @_; | |
778 return 0 if ( ! defined $name || | |
779 ! defined $self->{'_elements'} || | |
780 scalar @{$self->{'_elements'}} == 0) ; | |
781 foreach ( @{$self->{'_elements'}} ) { | |
782 return 1 if( $_ eq $name ); | |
783 } | |
784 return 0; | |
785 } | |
786 | |
787 | |
788 =head2 in_element | |
789 | |
790 Title : in_element | |
791 Usage : if( $eventgenerator->in_element($element) ) {} | |
792 Function: Test if we are in a particular element | |
793 This is different than 'within' because 'in' only | |
794 tests its immediete parent. | |
795 Returns : boolean | |
796 Args : string element name | |
797 | |
798 | |
799 =cut | |
800 | |
801 sub in_element{ | |
802 my ($self,$name) = @_; | |
803 return 0 if ! defined $self->{'_elements'}->[0]; | |
804 return ( $self->{'_elements'}->[0] eq $name) | |
805 } | |
806 | |
807 =head2 start_document | |
808 | |
809 Title : start_document | |
810 Usage : $eventgenerator->start_document | |
811 Function: Handle a start document event | |
812 Returns : none | |
813 Args : none | |
814 | |
815 | |
816 =cut | |
817 | |
818 sub start_document{ | |
819 my ($self) = @_; | |
820 $self->{'_lasttype'} = ''; | |
821 $self->{'_values'} = {}; | |
822 $self->{'_result'}= undef; | |
823 $self->{'_elements'} = []; | |
824 } | |
825 | |
826 =head2 end_document | |
827 | |
828 Title : end_document | |
829 Usage : $eventgenerator->end_document | |
830 Function: Handles an end document event | |
831 Returns : Bio::Search::Result::ResultI object | |
832 Args : none | |
833 | |
834 | |
835 =cut | |
836 | |
837 sub end_document{ | |
838 my ($self) = @_; | |
839 return $self->{'_result'}; | |
840 } | |
841 | |
842 =head2 result_count | |
843 | |
844 Title : result_count | |
845 Usage : my $count = $searchio->result_count | |
846 Function: Returns the number of results we have processed | |
847 Returns : integer | |
848 Args : none | |
849 | |
850 | |
851 =cut | |
852 | |
853 sub result_count { | |
854 my $self = shift; | |
855 return $self->{'_result_count'}; | |
856 } | |
857 | |
858 1; |