comparison variant_effect_predictor/Bio/Tools/HMMER/Results.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Results.pm,v 1.22.2.1 2003/01/07 13:58:01 jason Exp $
2 #
3 # Perl Module for HMMResults
4 #
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
6 #
7 #Copyright Genome Research Limited (1997).
8
9 =head1 NAME
10
11 Bio::Tools::HMMER::Results - Object representing HMMER output results
12
13 =head1 SYNOPSIS
14
15 # parse a hmmsearch file (can also parse a hmmpfam file)
16 $res = new Bio::Tools::HMMER::Results( -file => 'output.hmm' , -type => 'hmmsearch');
17
18 # print out the results for each sequence
19 foreach $seq ( $res->each_Set ) {
20 print "Sequence bit score is",$seq->bits,"\n";
21 foreach $domain ( $seq->each_Domain ) {
22 print " Domain start ",$domain->start," end ",$domain->end,
23 " score ",$domain->bits,"\n";
24 }
25 }
26
27 # new result object on a sequence/domain cutoff of 25 bits sequence, 15 bits domain
28 $newresult = $res->filter_on_cutoff(25,15);
29
30 # alternative way of getting out all domains directly
31 foreach $domain ( $res->each_Domain ) {
32 print "Domain on ",$domain->seq_id," with score ",
33 $domain->bits," evalue ",$domain->evalue,"\n";
34 }
35
36 =head1 DESCRIPTION
37
38 This object represents HMMER output, either from hmmsearch or
39 hmmpfam. For hmmsearch, a series of HMMER::Set objects are made, one
40 for each sequence, which have the the bits score for the object. For
41 hmmpfam searches, only one Set object is made.
42
43
44 These objects come from the original HMMResults modules used
45 internally in Pfam, written by Ewan. Ewan then converted them to
46 bioperl objects in 1999. That conversion is meant to be backwardly
47 compatible, but may not be (caveat emptor).
48
49 =head1 FEEDBACK
50
51 =head2 Mailing Lists
52
53 User feedback is an integral part of the evolution of this and other
54 Bioperl modules. Send your comments and suggestions preferably to one
55 of the Bioperl mailing lists. Your participation is much appreciated.
56
57 bioperl-l@bioperl.org - General discussion
58 http://www.bioperl.org/MailList.html - About the mailing lists
59
60 =head2 Reporting Bugs
61
62 Report bugs to the Bioperl bug tracking system to help us keep track
63 the bugs and their resolution. Bug reports can be submitted via email
64 or the web:
65
66 bioperl-bugs@bio.perl.org
67 http://www.bugzilla.bioperl.org/
68
69 =head1 AUTHOR - Ewan Birney
70
71 Email birney@ebi.ac.uk
72
73 =head1 CONTRIBUTORS
74
75 Jason Stajich, jason@bioperl.org
76
77 =head1 APPENDIX
78
79 The rest of the documentation details each of the object
80 methods. Internal methods are usually preceded with a _
81
82 =cut
83
84 package Bio::Tools::HMMER::Results;
85
86 use vars qw(@ISA);
87 use Carp;
88 use strict;
89
90 use Bio::Root::Root;
91 use Bio::Root::IO;
92 use Bio::Tools::HMMER::Domain;
93 use Bio::Tools::HMMER::Set;
94 use Bio::SeqAnalysisParserI;
95 use Symbol;
96
97 @ISA = qw(Bio::Root::Root Bio::Root::IO Bio::SeqAnalysisParserI);
98
99 sub new {
100 my($class,@args) = @_;
101
102 my $self = $class->SUPER::new(@args);
103
104 $self->{'domain'} = []; # array of HMMUnits
105 $self->{'seq'} = {};
106
107 my ($parsetype) = $self->_rearrange([qw(TYPE)],@args);
108 $self->_initialize_io(@args);
109 if( !defined $parsetype ) {
110 $self->throw("No parse type provided. should be hmmsearch or hmmpfam");
111 }
112 $self->parsetype($parsetype);
113 if( defined $self->_fh() ) {
114 if( $parsetype eq 'hmmsearch' ) {
115 $self->_parse_hmmsearch($self->_fh());
116 } elsif ( $parsetype eq 'hmmpfam' ) {
117 $self->_parse_hmmpfam($self->_fh());
118 } else {
119 $self->throw("Did not recoginise type $parsetype");
120 }
121 }
122
123 return $self; # success - we hope!
124 }
125
126
127 =head2 next_feature
128
129 Title : next_feature
130 Usage : while( my $feat = $res->next_feature ) { # do something }
131 Function: SeqAnalysisParserI implementing function
132 Example :
133 Returns : A Bio::SeqFeatureI compliant object, in this case,
134 each DomainUnit object, ie, flattening the Sequence
135 aspect of this.
136 Args : None
137
138
139 =cut
140
141 sub next_feature{
142 my ($self) = @_;
143
144 if( $self->{'_started_next_feature'} == 1 ) {
145 return shift @{$self->{'_next_feature_array'}};
146 } else {
147 $self->{'_started_next_feature'} = 1;
148 my @array;
149 foreach my $seq ( $self->each_Set() ) {
150 foreach my $unit ( $seq->each_Domain() ) {
151 push(@array,$unit);
152 }
153 }
154 my $res = shift @array;
155 $self->{'_next_feature_array'} = \@array;
156 return $res;
157 }
158
159 $self->throw("Should not reach here! Error!");
160 }
161
162
163 =head2 number
164
165 Title : number
166 Usage : print "There are ",$res->number," domains hit\n";
167 Function: provides the number of domains in the HMMER report
168
169 =cut
170
171 sub number {
172 my $self = shift;
173 my @val;
174 my $ref;
175 $ref = $self->{'domain'};
176
177
178 @val = @{$self->{'domain'}};
179 return scalar @val;
180 }
181
182 =head2 seqfile
183
184 Title : seqfile
185 Usage : $obj->seqfile($newval)
186 Function:
187 Example :
188 Returns : value of seqfile
189 Args : newvalue (optional)
190
191
192 =cut
193
194 sub seqfile{
195 my ($self,$value) = @_;
196 if( defined $value) {
197 $self->{'seqfile'} = $value;
198 }
199 return $self->{'seqfile'};
200
201 }
202
203 =head2 hmmfile
204
205 Title : hmmfile
206 Usage : $obj->hmmfile($newval)
207 Function:
208 Example :
209 Returns : value of hmmfile
210 Args : newvalue (optional)
211
212
213 =cut
214
215 sub hmmfile{
216 my ($self,$value) = @_;
217 if( defined $value) {
218 $self->{'hmmfile'} = $value;
219 }
220 return $self->{'hmmfile'};
221
222 }
223
224 =head2 add_Domain
225
226 Title : add_Domain
227 Usage : $res->add_Domain($unit)
228 Function: adds a domain to the results array. Mainly used internally.
229 Args : A Bio::Tools::HMMER::Domain
230
231
232 =cut
233
234 sub add_Domain {
235 my $self = shift;
236 my $unit = shift;
237 my $name;
238
239 $name = $unit->seq_id();
240
241 if( ! exists $self->{'seq'}->{$name} ) {
242 $self->warn("Adding a domain of $name but with no HMMSequence. Will be kept in domain array but not added to a HMMSequence");
243 } else {
244 $self->{'seq'}->{$name}->add_Domain($unit);
245 }
246 push(@{$self->{'domain'}},$unit);
247 }
248
249
250 =head2 each_Domain
251
252 Title : each_Domain
253 Usage : foreach $domain ( $res->each_Domain() )
254 Function: array of Domain units which are held in this report
255 Returns : array
256 Args : none
257
258
259 =cut
260
261 sub each_Domain {
262 my $self = shift;
263 my (@arr,$u);
264
265 foreach $u ( @{$self->{'domain'}} ) {
266 push(@arr,$u);
267 }
268
269 return @arr;
270 }
271
272
273 =head2 domain_bits_cutoff_from_evalue
274
275 Title : domain_bits_cutoff_from_evalue
276 Usage : $cutoff = domain_bits_cutoff_from_evalue(0.01);
277 Function: return a bits cutoff from an evalue using the
278 scores here. Somewhat interesting logic:
279 Find the two bit score which straddle the evalue
280 if( 25 is between these two points) return 25
281 else return the midpoint.
282
283 This logic tries to ensure that with large signal to
284 noise separation one still has sensible 25 bit cutoff
285 Returns :
286 Args :
287
288 =cut
289
290 sub domain_bits_cutoff_from_evalue {
291 my $self = shift;
292 my $eval = shift;
293 my ($dom,$prev,@doms,$cutoff,$sep,$seen);
294
295 @doms = $self->each_Domain;
296
297
298 @doms = map { $_->[0] }
299 sort { $b->[1] <=> $a->[1] }
300 map { [ $_, $_->bits] } @doms;
301 $seen = 0;
302 foreach $_ ( @doms ) {
303 if( $_->evalue > $eval ) {
304 $seen = 1;
305 $dom = $_;
306 last;
307 }
308 $prev = $_;
309 }
310
311 if( ! defined $prev || $seen == 0) {
312 $self->throw("Evalue is either above or below the list...");
313 return undef;
314 }
315
316 $sep = $prev->bits - $dom->bits ;
317
318 if( $sep < 1 ) {
319 return $prev->bits();
320 }
321 if( $dom->bits < 25 && $prev->bits > 25 ) {
322 return 25;
323 }
324
325 return int( $dom->bits + $sep/2 ) ;
326
327 }
328
329
330 sub dictate_hmm_acc {
331 my $self = shift;
332 my $acc = shift;
333 my ($unit);
334
335
336 foreach $unit ( $self->eachHMMUnit() ) {
337 $unit->hmmacc($acc);
338 }
339 }
340
341 =head2 write_FT_output
342
343 Title : write_FT_output
344 Usage : $res->write_FT_output(\*STDOUT,'DOMAIN')
345 Function: writes feature table output ala swissprot
346 Returns :
347 Args :
348
349
350 =cut
351
352 sub write_FT_output {
353 my $self = shift;
354 my $file = shift;
355 my $idt = shift;
356 my ($seq,$unit);
357
358 if( !defined $idt ) {
359 $idt = "DOMAIN";
360 }
361
362 foreach $seq ( $self->each_Set() ) {
363 print $file sprintf("ID %s\n",$seq->name());
364 foreach $unit ( $seq->each_Domain() ) {
365 print $file sprintf("FT %s %d %d %s\n",$idt,
366 $unit->start,$unit->end,$unit->hmmname);
367 }
368 print $file "//\n";
369 }
370 }
371
372 =head2 filter_on_cutoff
373
374 Title : filter_on_cutoff
375 Usage : $newresults = $results->filter_on_cutoff(25,15);
376 Function: Produces a new HMMER::Results module which has
377 been trimmed at the cutoff.
378 Returns : a Bio::Tools::HMMER::Results module
379 Args : sequence cutoff and domain cutoff. in bits score
380 if you want one cutoff, simply use same number both places
381
382 =cut
383
384 sub filter_on_cutoff {
385 my $self = shift;
386 my $seqthr = shift;
387 my $domthr = shift;
388 my ($new,$seq,$unit,@array,@narray);
389
390 if( !defined $domthr ) {
391 $self->throw("hmmresults filter on cutoff needs two arguments");
392 }
393
394 $new = Bio::Tools::HMMER::Results->new(-type => $self->parsetype);
395
396 foreach $seq ( $self->each_Set()) {
397 next if( $seq->bits() < $seqthr );
398 $new->add_Set($seq);
399 foreach $unit ( $seq->each_Domain() ) {
400 next if( $unit->bits() < $domthr );
401 $new->add_Domain($unit);
402 }
403 }
404 $new;
405 }
406
407 =head2 write_ascii_out
408
409 Title : write_ascii_out
410 Usage : $res->write_ascii_out(\*STDOUT)
411 Function: writes as
412 seq seq_start seq_end model-acc model_start model_end model_name
413 Returns :
414 Args :
415
416 FIXME: Now that we have no modelacc, this is probably a bad thing.
417
418 =cut
419
420 # writes as seq sstart send modelacc hstart hend modelname
421
422 sub write_ascii_out {
423 my $self = shift;
424 my $fh = shift;
425 my ($unit,$seq);
426
427 if( !defined $fh) {
428 $fh = \*STDOUT;
429 }
430
431
432 foreach $seq ( $self->each_Set()) {
433 foreach $unit ( $seq->each_Domain()) {
434 print $fh sprintf("%s %4d %4d %s %4d %4d %4.2f %4.2g %s\n",
435 $unit->seq_id(),$unit->start(),$unit->end(),
436 $unit->hmmacc,$unit->hstart,$unit->hend,
437 $unit->bits,$unit->evalue,$unit->hmmname);
438 }
439 }
440
441 }
442
443 =head2 write_GDF_bits
444
445 Title : write_GDF_bits
446 Usage : $res->write_GDF_bits(25,15,\*STDOUT)
447 Function: writes GDF format with a sequence,domain threshold
448 Returns :
449 Args :
450
451 =cut
452
453 sub write_GDF_bits {
454 my $self = shift;
455 my $seqt = shift;
456 my $domt = shift;
457 my $file = shift;
458 my $seq;
459 my $unit;
460 my (@array,@narray);
461
462 if( !defined $file ) {
463 $self->throw("Attempting to use write_GDF_bits without passing in correct arguments!");
464 return;
465 }
466
467 foreach $seq ( $self->each_Set()) {
468
469 if( $seq->bits() < $seqt ) {
470 next;
471 }
472
473 foreach $unit ( $seq->each_Domain() ) {
474 if( $unit->bits() < $domt ) {
475 next;
476 }
477 push(@array,$unit);
478 }
479
480 }
481
482 @narray = sort { my ($aa,$bb,$st_a,$st_b);
483 $aa = $a->seq_id();
484 $bb = $b->seq_id();
485 if ( $aa eq $bb) {
486 $st_a = $a->start();
487 $st_b = $b->start();
488 return $st_a <=> $st_b;
489 }
490 else {
491 return $aa cmp $bb;
492 } } @array;
493
494 foreach $unit ( @narray ) {
495 print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue);
496 }
497
498 }
499
500 sub write_scores_bits {
501 my $self = shift;
502 my $seqt = shift;
503 my $domt = shift;
504 my $file = shift;
505 my $seq;
506 my $unit;
507 my (@array,@narray);
508
509 if( !defined $file ) {
510 carp("Attempting to use write_scores_bits without passing in correct arguments!");
511 return;
512 }
513
514 foreach $seq ( $self->eachHMMSequence()) {
515
516 if( $seq->bits() < $seqt ) {
517 next;
518 }
519
520 foreach $unit ( $seq->eachHMMUnit() ) {
521 if( $unit->bits() < $domt ) {
522 next;
523 }
524 push(@array,$unit);
525 }
526
527 }
528
529 @narray = sort { my ($aa,$bb,$st_a,$st_b);
530 $aa = $a->bits();
531 $bb = $b->bits();
532 return $aa <=> $bb;
533 } @array;
534
535 foreach $unit ( @narray ) {
536 print $file sprintf("%4.2f %s\n",$unit->bits(),$unit->get_nse());
537 }
538
539 }
540
541 sub write_GDF {
542 my $self = shift;
543 my $file = shift;
544 my $unit;
545
546 if( !defined $file ) {
547 $file = \*STDOUT;
548 }
549
550
551 foreach $unit ( $self->eachHMMUnit() ) {
552 print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue);
553 }
554
555 }
556
557 sub highest_noise {
558 my $self = shift;
559 my $seqt = shift;
560 my $domt = shift;
561 my ($seq,$unit,$hseq,$hdom,$noiseseq,$noisedom);
562
563 $hseq = $hdom = -100000;
564
565 foreach $seq ( $self->eachHMMSequence()) {
566 if( $seq->bits() < $seqt && $seq->bits() > $hseq ) {
567 $hseq = $seq->bits();
568 $noiseseq = $seq;
569 }
570 foreach $unit ( $seq->eachHMMUnit() ) {
571 if( (($seq->bits() < $seqt) || ($seq->bits() > $seqt && $unit->bits < $domt)) && $unit->bits() > $hdom ) {
572 $hdom = $unit->bits();
573 $noisedom = $unit;
574 }
575 }
576 }
577
578
579 return ($noiseseq,$noisedom);
580
581 }
582
583
584 sub lowest_true {
585 my $self = shift;
586 my $seqt = shift;
587 my $domt = shift;
588 my ($seq,$unit,$lowseq,$lowdom,$trueseq,$truedom);
589
590 if( ! defined $domt ) {
591 carp "lowest true needs at least a domain threshold cut-off";
592 return (0,0);
593 }
594
595 $lowseq = $lowdom = 100000;
596
597 foreach $seq ( $self->eachHMMSequence()) {
598
599 if( $seq->bits() >= $seqt && $seq->bits() < $lowseq ) {
600 $lowseq = $seq->bits();
601 $trueseq = $seq;
602 }
603 if( $seq->bits() < $seqt ) {
604 next;
605 }
606
607 foreach $unit ( $seq->eachHMMUnit() ) {
608 if( $unit->bits() >= $domt && $unit->bits() < $lowdom ) {
609 $lowdom = $unit->bits();
610 $truedom = $unit;
611 }
612 }
613 }
614
615
616 return ($trueseq,$truedom);
617
618 }
619
620
621
622 =head2 add_Set
623
624 Title : add_Set
625 Usage : Mainly internal function
626 Function:
627 Returns :
628 Args :
629
630
631 =cut
632
633 sub add_Set {
634 my $self = shift;
635 my $seq = shift;
636 my $name;
637
638 $name = $seq->name();
639
640 if( exists $self->{'seq'}->{$name} ) {
641 $self->throw("You alredy have $name in HMMResults!");
642 }
643 $self->{'seq'}->{$name} = $seq;
644 }
645
646
647 =head2 each_Set
648
649 Title : each_Set
650 Usage :
651 Function:
652 Returns :
653 Args :
654
655
656 =cut
657
658 sub each_Set {
659 my $self = shift;
660 my (@array,$name);
661
662
663 foreach $name ( keys %{$self->{'seq'}} ) {
664 push(@array,$self->{'seq'}->{$name});
665 }
666 return @array;
667 }
668
669
670 =head2 get_Set
671
672 Title : get_Set
673 Usage : $set = $res->get_Set('sequence-name');
674 Function: returns the Set for a particular sequence
675 Returns : a HMMER::Set object
676 Args : name of the sequence
677
678
679 =cut
680
681 sub get_Set {
682 my $self = shift;
683 my $name = shift;
684
685 return $self->{'seq'}->{$name};
686 }
687
688
689 =head2 _parse_hmmpfam
690
691 Title : _parse_hmmpfam
692 Usage : $res->_parse_hmmpfam($filehandle)
693 Function:
694 Returns :
695 Args :
696
697
698 =cut
699
700 sub _parse_hmmpfam {
701 my $self = shift;
702 my $file = shift;
703
704 my ($id,$sqfrom,$sqto,$hmmf,$hmmt,$sc,$ev,
705 $unit,$nd,$seq,$name,$seqname,$from,
706 $to,%hash,%acc,$acc);
707 my $count = 0;
708
709 while(<$file>) {
710 if( /^HMM file:\s+(\S+)/ ) { $self->hmmfile($1); next; }
711 elsif( /^Sequence file:\s+(\S+)/ ) { $self->seqfile($1); next }
712 elsif( /^Query(\s+sequence)?:\s+(\S+)/ ) {
713
714 $seqname = $2;
715
716 $seq = Bio::Tools::HMMER::Set->new();
717
718 $seq ->name($seqname);
719 $self->add_Set($seq);
720 %hash = ();
721
722 while(<$file>){
723
724 if( /Accession:\s+(\S+)/ ) { $seq->accession($1); next }
725 elsif( s/^Description:\s+// ) { chomp; $seq->desc($_); next }
726 /^Parsed for domains/ && last;
727
728 # This is to parse out the accession numbers in old Pfam format.
729 # now not support due to changes in HMMER.
730
731 if( (($id,$acc, $sc, $ev, $nd) = /^\s*(\S+)\s+(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) {
732 $hash{$id} = $sc; # we need this for the sequence
733 # core of the domains below!
734 $acc {$id} = $acc;
735
736 # this is the more common parsing routine
737 } elsif ( (($id,$sc, $ev, $nd) =
738 /^\s*(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/) ) {
739
740 $hash{$id} = $sc; # we need this for the
741 # sequence score of hte domains below!
742
743 }
744 }
745
746 while(<$file>) {
747 /^Align/ && last;
748 /^\/\// && last;
749 # this is meant to match
750
751 #Sequence Domain seq-f seq-t hmm-f hmm-t score E-value
752 #-------- ------- ----- ----- ----- ----- ----- -------
753 #PF00621 1/1 198 372 .. 1 207 [] 281.6 1e-80
754
755 if( (($id, $sqfrom, $sqto, $hmmf,$hmmt,$sc, $ev) =
756 /(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) {
757 $unit = Bio::Tools::HMMER::Domain->new();
758 $unit->seq_id ($seqname);
759 $unit->hmmname ($id);
760 $unit->start ($sqfrom);
761 $unit->end ($sqto);
762 $unit->hstart($hmmf);
763 $unit->hend ($hmmt);
764 $unit->bits ($sc);
765 $unit->evalue ($ev);
766
767 if( !exists($hash{$id}) ) {
768 $self->throw("HMMResults parsing error in hmmpfam for $id - can't find sequecne score");
769 }
770
771 $unit->seqbits($hash{$id});
772
773 if( defined $acc{$id} ) {
774 $unit->hmmacc($acc{$id});
775 }
776
777 # this should find it's own sequence!
778 $self->add_Domain($unit);
779 }
780 }
781 if( /^\/\// ) { next; }
782
783 $_ = <$file>;
784 # parses alignment lines. Icky as we have to break on the same line
785 # that we need to read to place the alignment lines with the unit.
786
787 while(1) {
788 (!defined $_ || /^\/\//) && last;
789
790 # matches:
791 # PF00621: domain 1 of 1, from 198 to 372
792 if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) {
793
794 $name = $1;
795 $from = $2;
796 $to = $3;
797
798 # find the HMMUnit which this alignment is from
799
800 $unit = $self->get_unit_nse($seqname,$name,$from,$to);
801 if( !defined $unit ) {
802 $self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!");
803 $_ = <$file>;
804 next;
805 }
806 while(<$file>) {
807 /^\/\// && last;
808 /^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last;
809 $unit->add_alignment_line($_);
810 }
811 } else {
812 $_ = <$file>;
813 }
814 }
815
816 # back to main 'Query:' loop
817 }
818 }
819 }
820
821 # mainly internal function
822
823 sub get_unit_nse {
824 my $self = shift;
825 my $seqname = shift;
826 my $domname = shift;
827 my $start = shift;
828 my $end = shift;
829
830 my($seq,$unit);
831
832 $seq = $self->get_Set($seqname);
833
834 if( !defined $seq ) {
835 $self->throw("Could not get sequence name $seqname - so can't get its unit");
836 }
837
838 foreach $unit ( $seq->each_Domain() ) {
839 if( $unit->hmmname() eq $domname && $unit->start() == $start && $unit->end() == $end ) {
840 return $unit;
841 }
842 }
843
844 return undef;
845 }
846
847
848 =head2 _parse_hmmsearch
849
850 Title : _parse_hmmsearch
851 Usage : $res->_parse_hmmsearch($filehandle)
852 Function:
853 Returns :
854 Args :
855
856
857 =cut
858
859 sub _parse_hmmsearch {
860 my $self = shift;
861 my $file = shift;
862 my ($id,$sqfrom,$sqto,$sc,$ev,$unit,$nd,$seq,$hmmf,$hmmt,
863 $hmmfname,$hmmacc, $hmmid, %seqh);
864 my $count = 0;
865
866 while(<$file>) {
867 /^HMM file:\s+(\S+)/ and do { $self->hmmfile($1); $hmmfname = $1 };
868 /^Accession:\s+(\S+)/ and do { $hmmacc = $1 };
869 /^Query HMM:\s+(\S+)/ and do { $hmmid = $1 };
870 /^Sequence database:\s+(\S+)/ and do { $self->seqfile($1) };
871 /^Scores for complete sequences/ && last;
872 }
873
874 $hmmfname = "given" if not $hmmfname;
875
876 while(<$file>) {
877 /^Parsed for domains/ && last;
878 if( (($id, $sc, $ev, $nd) = /(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) {
879 $seq = Bio::Tools::HMMER::Set->new();
880 $seq->name($id);
881 $seq->bits($sc);
882 $seqh{$id} = $sc;
883 $seq->evalue($ev);
884 $self->add_Set($seq);
885 $seq->accession($hmmacc);
886 }
887 }
888
889 while(<$file>) {
890 /^Alignments of top-scoring domains/ && last;
891 if( (($id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev) = /(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) {
892 $unit = Bio::Tools::HMMER::Domain->new();
893
894 $unit->seq_id($id);
895 $unit->hmmname($hmmfname);
896 $unit->start($sqfrom);
897 $unit->end($sqto);
898 $unit->bits($sc);
899 $unit->hstart($hmmf);
900 $unit->hend($hmmt);
901 $unit->evalue($ev);
902 $unit->seqbits($seqh{$id});
903 $self->add_Domain($unit);
904 $count++;
905 }
906 }
907
908 $_ = <$file>;
909
910 ## Recognize and store domain alignments
911
912 while(1) {
913 if( !defined $_ ) {
914 last;
915 }
916 /^Histogram of all scores/ && last;
917
918 # matches:
919 # PF00621: domain 1 of 1, from 198 to 372
920 if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) {
921 my $name = $1;
922 my $from = $2;
923 my $to = $3;
924
925 # find the HMMUnit which this alignment is from
926 $unit = $self->get_unit_nse($name,$hmmfname,$from,$to);
927
928 if( !defined $unit ) {
929 $self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!");
930 next;
931 }
932 while(<$file>) {
933 /^Histogram of all scores/ && last;
934 /^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last;
935 $unit->add_alignment_line($_);
936 }
937 }
938 else {
939 $_ = <$file>;
940 }
941 }
942
943 return $count;
944 }
945
946 =head2 parsetype
947
948 Title : parsetype
949 Usage : $obj->parsetype($newval)
950 Function:
951 Returns : value of parsetype
952 Args : newvalue (optional)
953
954
955 =cut
956
957 sub parsetype{
958 my ($self,$value) = @_;
959 if( defined $value) {
960 $self->{'_parsetype'} = $value;
961 }
962 return $self->{'_parsetype'};
963 }
964
965 1; # says use was ok
966 __END__
967
968