diff variant_effect_predictor/Bio/Search/HSP/GenericHSP.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Search/HSP/GenericHSP.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1125 @@
+# $Id: GenericHSP.pm,v 1.40.2.3 2003/03/24 20:44:45 jason Exp $
+#
+# BioPerl module for Bio::Search::HSP::GenericHSP
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::GenericHSP - A "Generic" implementation of a High Scoring Pair 
+
+=head1 SYNOPSIS
+
+    my $hsp = new Bio::Search::HSP::GenericHSP( -algorithm => 'blastp',
+						-evalue    => '1e-30',
+						);
+
+    $r_type = $hsp->algorithm
+
+    $pvalue = $hsp->p();
+
+    $evalue = $hsp->evalue();
+
+    $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+
+    $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+
+    $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+
+    $qseq = $hsp->query_string;
+
+    $hseq = $hsp->hit_string;
+
+    $homo_string = $hsp->homology_string;
+
+    $len = $hsp->length( ['query'|'hit'|'total'] );
+
+    $len = $hsp->length( ['query'|'hit'|'total'] );
+
+    $rank = $hsp->rank;
+
+
+=head1 DESCRIPTION
+
+This implementation is "Generic", meaning it is is suitable for
+holding information about High Scoring pairs from most Search reports
+such as BLAST and FastA.  Specialized objects can be derived from
+this.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich and Steve Chervitz
+
+Email jason@bioperl.org
+Email sac@bioperl.org
+
+Describe contact details here
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::HSP::GenericHSP;
+use vars qw(@ISA $GAP_SYMBOL);
+use strict;
+
+use Bio::Root::Root;
+use Bio::SeqFeature::Similarity;
+use Bio::Search::HSP::HSPI;
+
+@ISA = qw(Bio::Search::HSP::HSPI Bio::Root::Root );
+
+BEGIN {
+    $GAP_SYMBOL = '-';
+}
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::GenericHSP();
+ Function: Builds a new Bio::Search::HSP::GenericHSP object 
+ Returns : Bio::Search::HSP::GenericHSP
+ Args    : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
+           -evalue    => evalue
+           -pvalue    => pvalue
+           -bits      => bit value for HSP
+           -score     => score value for HSP (typically z-score but depends on
+					      analysis)
+           -hsp_length=> Length of the HSP (including gaps)
+           -identical => # of residues that that matched identically
+           -conserved => # of residues that matched conservatively 
+                           (only protein comparisions; 
+			    conserved == identical in nucleotide comparisons)
+           -hsp_gaps   => # of gaps in the HSP
+           -query_gaps => # of gaps in the query in the alignment
+           -hit_gaps   => # of gaps in the subject in the alignment    
+           -query_name  => HSP Query sequence name (if available)
+           -query_start => HSP Query start (in original query sequence coords)
+           -query_end   => HSP Query end (in original query sequence coords)
+           -hit_name    => HSP Hit sequence name (if available)
+           -hit_start   => HSP Hit start (in original hit sequence coords)
+           -hit_end     => HSP Hit end (in original hit sequence coords)
+           -hit_length  => total length of the hit sequence
+           -query_length=> total length of the query sequence
+           -query_seq   => query sequence portion of the HSP
+           -hit_seq     => hit sequence portion of the HSP
+           -homology_seq=> homology sequence for the HSP
+           -hit_frame   => hit frame (only if hit is translated protein)
+           -query_frame => query frame (only if query is translated protein)
+           -rank        => HSP rank
+
+
+=cut
+
+sub new {
+    my($class,@args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    my ($algo, $evalue, $pvalue, $identical, $conserved, 
+	$gaps, $query_gaps, $hit_gaps,
+	$hit_seq, $query_seq, $homology_seq,
+	$hsp_len, $query_len,$hit_len,
+	$hit_name,$query_name,$bits,$score,
+	$hs,$he,$qs,$qe,
+	$qframe,$hframe,
+	$rank) = $self->_rearrange([qw(ALGORITHM
+				       EVALUE
+				       PVALUE
+				       IDENTICAL
+				       CONSERVED
+				       HSP_GAPS
+				       QUERY_GAPS
+				       HIT_GAPS
+				       HIT_SEQ
+				       QUERY_SEQ
+				       HOMOLOGY_SEQ
+				       HSP_LENGTH
+				       QUERY_LENGTH
+				       HIT_LENGTH
+				       HIT_NAME
+				       QUERY_NAME
+				       BITS
+				       SCORE
+				       HIT_START
+				       HIT_END
+				       QUERY_START
+				       QUERY_END
+				       QUERY_FRAME
+				       HIT_FRAME
+				       RANK
+				       )], @args);
+
+    $algo = 'GENERIC' unless defined $algo;
+    $self->algorithm($algo);
+
+#    defined $evalue    && $self->evalue($evalue)
+#    $hsp->significance is initialized by the 
+#    the SimilarityPair object - let's only keep one
+#    value, don't need 2 slots.
+
+    defined $pvalue    && $self->pvalue($pvalue);
+    defined $bits      && $self->bits($bits);
+    defined $score     && $self->score($score);
+    my ($queryfactor, $hitfactor) = (0,0);
+
+    if( $algo =~ /^(PSI)?T(BLAST|FAST)[NY]/oi ) {
+	$hitfactor = 1;	
+    } elsif ($algo =~ /^(FAST|BLAST)(X|Y|XY)/oi ) {
+	$queryfactor = 1;	
+    } elsif ($algo =~ /^T(BLAST|FAST)(X|Y|XY)/oi ||
+	     $algo =~ /^(BLAST|FAST)N/oi ||
+	     $algo eq 'WABA' || 
+	     $algo eq 'EXONERATE' || $algo eq 'MEGABLAST' ||
+	     $algo eq 'SMITH-WATERMAN' ){
+	$hitfactor = 1;
+	$queryfactor = 1;
+    } elsif( $algo eq 'RPSBLAST' ) {	
+	$queryfactor = $hitfactor = 0;
+	$qframe = $hframe = 0;
+    }
+    # Store the aligned query as sequence feature
+    my $strand;
+    unless(  defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin @args ($qs,$qe)"); }
+    unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
+    if ($qe > $qs) {  # normal query: start < end
+	if ($queryfactor) { $strand = 1; } else { $strand = undef; }	
+    } else { # reverse query (i dont know if this is possible, 
+	     # but feel free to correct)
+	if ($queryfactor) { $strand = -1; } else { $strand = undef; }
+	($qs,$qe) = ($qe,$qs);
+    
+    }
+    $self->query( new  Bio::SeqFeature::Similarity
+		  ('-primary'  => $self->primary_tag,
+		   '-start'    => $qs,
+		   '-expect'   => $evalue,
+		   '-bits'     => $bits,
+		   '-end'      => $qe,
+		   '-strand'   => $strand,
+		   '-seq_id'   => $query_name,
+		   '-seqlength'=> $query_len,
+		   '-source'   => $algo,
+		   ) );
+    
+    # to determine frame from something like FASTXY which doesn't
+    # report the frame
+    if( defined $strand && ! defined $qframe && $queryfactor ) {
+	$qframe = ( $self->query->start % 3 ) * $strand;
+    } elsif( ! defined $strand ) { 
+	$qframe = 0;
+    }
+    # store the aligned subject as sequence feature
+    if ($he > $hs) {		# normal subject
+	if ($hitfactor) { $strand = 1; } else { $strand = undef; }
+    } else {
+	if ($hitfactor) { $strand = -1; } else { $strand = undef; }
+	($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
+    }
+
+    $self->hit( Bio::SeqFeature::Similarity->new
+		('-start'     => $hs,
+		 '-end'       => $he,
+		 '-strand'    => $strand,
+		 '-expect'    => $evalue,
+		 '-bits'      => $bits,
+		 '-source'    => $algo,
+		 '-seq_id'    => $hit_name,
+		 '-seqlength' => $hit_len,
+		 '-primary'   => $self->primary_tag ));
+    
+    if( defined $strand && ! defined $hframe && $hitfactor ) {
+	$hframe = ( $hs % 3 ) * $strand;
+    } elsif( ! defined $strand ) { 
+	$hframe = 0;
+    }
+
+    $self->frame($qframe,$hframe);
+
+    if( ! defined $query_len || ! defined $hit_len ) { 
+	$self->throw("Must defined hit and query length");
+    }
+
+    if( ! defined $identical ) { 
+	$self->warn("Did not defined the number of identical matches in the HSP assuming 0");
+	$identical = 0;
+    } 
+    if( ! defined $conserved ) {
+	$self->warn("Did not defined the number of conserved matches in the HSP assuming conserved == identical ($identical)") 
+	    if( $algo !~ /^((FAST|BLAST)N)|Exonerate/oi);
+	$conserved = $identical;
+    } 
+    # protect for divide by zero if user does not specify 
+    # hsp_len, query_len, or hit_len
+    
+    $self->num_identical($identical);
+    $self->num_conserved($conserved);
+    
+    if( $hsp_len ) {
+	$self->length('total', $hsp_len);
+	$self->frac_identical( 'total', $identical / $self->length('total'));
+	$self->frac_conserved( 'total', $conserved / $self->length('total'));
+    }
+    if( $hit_len ) {
+#	$self->length('hit', $self->hit->length);
+	$self->frac_identical( 'hit', $identical / $self->length('hit'));
+	$self->frac_conserved( 'hit', $conserved / $self->length('hit'));
+    }
+    if( $query_len ) {
+#	$self->length('query', $self->query->length);	
+	$self->frac_identical( 'query', $identical / $self->length('query')) ;
+	$self->frac_conserved( 'query', $conserved / $self->length('query'));
+    }
+    $self->query_string($query_seq);
+    $self->hit_string($hit_seq);
+    $self->homology_string($homology_seq);
+    
+    if( defined $query_gaps ) {
+	$self->gaps('query', $query_gaps);
+    } elsif( defined $query_seq ) {
+	$self->gaps('query', scalar ( $query_seq =~ tr/\-//));
+    }
+    if( defined $hit_gaps ) {
+	$self->gaps('hit', $hit_gaps);
+    } elsif( defined $hit_seq ) {
+	$self->gaps('hit', scalar ( $hit_seq =~ tr/\-//));
+    }
+    if( ! defined $gaps ) {
+	$gaps = $self->gaps("query") + $self->gaps("hit");
+    }
+    $self->gaps('total', $gaps);
+    $self->percent_identity($identical / $hsp_len ) if( $hsp_len > 0 );
+
+    $rank && $self->rank($rank);
+    return $self;
+}
+
+
+
+=head2 Bio::Search::HSP::HSPI methods
+
+Implementation of Bio::Search::HSP::HSPI methods follow
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the HSP
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+sub algorithm{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_algorithm'};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_algorithm'} = $value;
+    } 
+
+    return $previous;   
+}
+
+=head2 pvalue
+
+ Title   : pvalue
+ Usage   : my $pvalue = $hsp->pvalue();
+ Function: Returns the P-value for this HSP or undef 
+ Returns : float or exponential (2e-10)
+           P-value is not defined with NCBI Blast2 reports.
+ Args    : [optional] numeric to set value
+
+=cut
+
+sub pvalue {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_pvalue'};
+    if( defined $value  ) { 	
+	$self->{'_pvalue'} = $value;
+    } 
+    return $previous;   
+}
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   : my $evalue = $hsp->evalue();
+ Function: Returns the e-value for this HSP
+ Returns : float or exponential (2e-10)
+ Args    : [optional] numeric to set value
+
+=cut
+
+sub evalue { shift->significance(@_) }
+
+=head2 frac_identical
+
+ Title   : frac_identical
+ Usage   : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+ Function: Returns the fraction of identitical positions for this HSP 
+ Returns : Float in range 0.0 -> 1.0
+ Args    : arg 1:  'query' = num identical / length of query seq (without gaps)
+                   'hit'   = num identical / length of hit seq (without gaps)
+                   'total' = num identical / length of alignment (with gaps)
+                   default = 'total' 
+           arg 2: [optional] frac identical value to set for the type requested
+
+=cut
+
+sub frac_identical {
+   my ($self, $type,$value) = @_;
+
+   $type = lc $type if defined $type;
+   $type = 'total' if( ! defined $type ||
+		       $type !~ /query|hit|total/); 
+   my $previous = $self->{'_frac_identical'}->{$type};
+   if( defined $value || ! defined $previous ) { 
+       $value = $previous = '' unless defined $value;
+       if( $type eq 'hit' || $type eq 'query' ) {
+	   $self->$type()->frac_identical( $value);
+       }
+       $self->{'_frac_identical'}->{$type} = $value;
+   } 
+   return $previous;   
+
+}
+
+=head2 frac_conserved
+
+ Title    : frac_conserved
+ Usage    : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+ Function : Returns the fraction of conserved positions for this HSP.
+            This is the fraction of symbols in the alignment with a 
+            positive score.
+ Returns : Float in range 0.0 -> 1.0
+ Args    : arg 1: 'query' = num conserved / length of query seq (without gaps)
+                  'hit'   = num conserved / length of hit seq (without gaps)
+                  'total' = num conserved / length of alignment (with gaps)
+                  default = 'total' 
+           arg 2: [optional] frac conserved value to set for the type requested
+
+=cut
+
+sub frac_conserved {
+    my ($self, $type,$value) = @_;
+    $type = lc $type if defined $type;
+    $type = 'total' if( ! defined $type ||
+			$type !~ /query|hit|total/);
+    my $previous = $self->{'_frac_conserved'}->{$type};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_frac_conserved'}->{$type} = $value;
+    } 
+    return $previous;   
+}
+
+=head2 gaps
+
+ Title    : gaps
+ Usage    : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+ Function : Get the number of gaps in the query, hit, or total alignment.
+ Returns  : Integer, number of gaps or 0 if none
+ Args     : arg 1: 'query' = num gaps in query seq
+                   'hit'   = num gaps in hit seq
+                   'total' = num gaps in whole alignment 
+                   default = 'total' 
+            arg 2: [optional] integer gap value to set for the type requested
+
+=cut
+
+sub gaps        {
+    my ($self, $type,$value) = @_;
+    $type = lc $type if defined $type;
+    $type = 'total' if( ! defined $type ||
+			$type !~ /query|hit|subject|sbjct|total/);
+    $type = 'hit' if $type =~ /sbjct|subject/;
+    my $previous = $self->{'_gaps'}->{$type};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_gaps'}->{$type} = $value;
+    }
+    return $previous || 0;
+}
+
+=head2 query_string
+
+ Title   : query_string
+ Usage   : my $qseq = $hsp->query_string;
+ Function: Retrieves the query sequence of this HSP as a string
+ Returns : string
+ Args    : [optional] string to set for query sequence
+
+
+=cut
+
+sub query_string{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_query_string'};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_query_string'} = $value;
+	# do some housekeeping so we know when to 
+	# re-run _calculate_seq_positions
+	$self->{'_sequenceschanged'} = 1;
+    } 
+    return $previous;   
+}
+
+=head2 hit_string
+
+ Title   : hit_string
+ Usage   : my $hseq = $hsp->hit_string;
+ Function: Retrieves the hit sequence of this HSP as a string
+ Returns : string
+ Args    : [optional] string to set for hit sequence
+
+
+=cut
+
+sub hit_string{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_hit_string'};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_hit_string'} = $value;
+	# do some housekeeping so we know when to 
+	# re-run _calculate_seq_positions
+	$self->{'_sequenceschanged'} = 1;
+    } 
+    return $previous;   
+}
+
+=head2 homology_string
+
+ Title   : homology_string
+ Usage   : my $homo_string = $hsp->homology_string;
+ Function: Retrieves the homology sequence for this HSP as a string.
+         : The homology sequence is the string of symbols in between the 
+         : query and hit sequences in the alignment indicating the degree
+         : of conservation (e.g., identical, similar, not similar).
+ Returns : string
+ Args    : [optional] string to set for homology sequence
+
+=cut
+
+sub homology_string{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_homology_string'};
+    if( defined $value || ! defined $previous ) { 
+	$value = $previous = '' unless defined $value;
+	$self->{'_homology_string'} = $value;
+	# do some housekeeping so we know when to 
+	# re-run _calculate_seq_positions
+	$self->{'_sequenceschanged'} = 1;
+    } 
+    return $previous;   
+}
+
+=head2 length
+
+ Title    : length
+ Usage    : my $len = $hsp->length( ['query'|'hit'|'total'] );
+ Function : Returns the length of the query or hit in the alignment 
+            (without gaps) 
+            or the aggregate length of the HSP (including gaps;
+            this may be greater than either hit or query )
+ Returns  : integer
+ Args     : arg 1: 'query' = length of query seq (without gaps)
+                   'hit'   = length of hit seq (without gaps)
+                   'total' = length of alignment (with gaps)
+                   default = 'total' 
+            arg 2: [optional] integer length value to set for specific type
+
+=cut
+
+sub length {
+
+    my $self = shift;
+    my $type = shift;
+
+    $type = 'total' unless defined $type;
+    $type = lc $type;
+
+    if( $type =~ /^q/i ) {
+	return $self->query()->length(shift);
+    } elsif( $type =~ /^(hit|subject|sbjct)/ ) {
+	return $self->hit()->length(shift);
+    } else { 
+	my $v = shift;
+	if( defined $v ) { 
+	    $self->{'_hsplength'} = $v;
+	}
+	return $self->{'_hsplength'};
+   }
+    return 0; # should never get here
+}
+
+=head2 hsp_length
+
+ Title   : hsp_length
+ Usage   : my $len = $hsp->hsp_length()
+ Function: shortcut  length('hsp')
+ Returns : floating point between 0 and 100 
+ Args    : none
+
+=cut
+
+sub hsp_length { return shift->length('hsp', shift); }
+
+=head2 percent_identity
+
+ Title   : percent_identity
+ Usage   : my $percentid = $hsp->percent_identity()
+ Function: Returns the calculated percent identity for an HSP
+ Returns : floating point between 0 and 100 
+ Args    : none
+
+
+=cut
+
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $hsp->frame($queryframe,$subjectframe)
+ Function: Set the Frame for both query and subject and insure that
+           they agree.
+           This overrides the frame() method implementation in
+           FeaturePair.
+ Returns : array of query and subjects if return type wants an array
+           or query frame if defined or subject frame
+ Args    : none
+ Note    : Frames are stored in the GFF way (0-2) not 1-3
+           as they are in BLAST (negative frames are deduced by checking 
+				 the strand of the query or hit)
+
+=cut
+
+sub frame {
+    my ($self, $qframe, $sframe) = @_;
+    if( defined $qframe ) {
+	if( $qframe == 0 ) {
+	    $qframe = 0;
+	} elsif( $qframe !~ /^([+-])?([1-3])/ ) {
+	    $self->warn("Specifying an invalid query frame ($qframe)");
+	    $qframe = undef;
+	} else {
+	    my $dir = $1;
+	    $dir = '+' unless defined $dir;
+	    if( ($dir eq '-' && $self->query->strand >= 0) ||
+		($dir eq '+' && $self->query->strand <= 0) ) {
+		$self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
+	    }
+	    # Set frame to GFF [0-2] -
+	    # what if someone tries to put in a GFF frame!
+	    $qframe = $2 - 1;	    
+	}
+	$self->query->frame($qframe);
+    }
+    if( defined $sframe ) {
+	  if( $sframe == 0 ) {
+	    $sframe = 0;
+	  } elsif( $sframe !~ /^([+-])?([1-3])/ ) {
+	    $self->warn("Specifying an invalid subject frame ($sframe)");
+	    $sframe = undef;
+	  } else {
+	      my $dir = $1;
+	      $dir = '+' unless defined $dir;
+	      if( ($dir eq '-' && $self->hit->strand >= 0) ||
+		  ($dir eq '+' && $self->hit->strand <= 0) )
+	      {
+		  $self->warn("Subject frame ($sframe) did not match strand of subject (". $self->hit->strand() . ")");
+	      }
+
+	      # Set frame to GFF [0-2]
+	      $sframe = $2 - 1;
+	  }
+	  $self->hit->frame($sframe);
+      }
+    if (wantarray() && $self->algorithm =~ /^T(BLAST|FAST)(X|Y|XY)/oi)
+    {
+	return ($self->query->frame(), $self->hit->frame());
+    } elsif (wantarray())  {
+	($self->query->frame() &&
+	 return ($self->query->frame(), undef)) ||
+	     ($self->hit->frame() &&
+	      return (undef, $self->hit->frame()));
+    } else {
+	($self->query->frame() &&
+	 return $self->query->frame()) ||
+	($self->hit->frame() &&
+	 return $self->hit->frame());
+    }
+}
+
+
+=head2 get_aln
+
+ Title   : get_aln
+ Usage   : my $aln = $hsp->gel_aln
+ Function: Returns a Bio::SimpleAlign representing the HSP alignment
+ Returns : Bio::SimpleAlign
+ Args    : none
+
+=cut
+
+sub get_aln {
+    my ($self) = @_;
+    require Bio::LocatableSeq;
+    require Bio::SimpleAlign;
+    my $aln = new Bio::SimpleAlign;
+    my $hs = $self->hit_string();
+    my $qs = $self->query_string();
+    # FASTA specific stuff moved to the FastaHSP object
+    my $seqonly = $qs;
+    $seqonly =~ s/[\-\s]//g;
+    my ($q_nm,$s_nm) = ($self->query->seq_id(),
+			$self->hit->seq_id());
+    unless( defined $q_nm && CORE::length ($q_nm) ) {
+	$q_nm = 'query';
+    }
+    unless( defined $s_nm && CORE::length ($s_nm) ) {
+	$s_nm = 'hit';
+    }
+    my $query = new Bio::LocatableSeq('-seq'   => $qs,
+				      '-id'    => $q_nm,
+				      '-start' => 1,
+				      '-end' => CORE::length($seqonly),
+				      );
+    $seqonly = $hs;
+    $seqonly =~ s/[\-\s]//g;
+    my $hit =  new Bio::LocatableSeq('-seq'   => $hs,
+				      '-id'    => $s_nm,
+				      '-start' => 1,
+				      '-end' => CORE::length($seqonly),
+				      );
+    $aln->add_seq($query);
+    $aln->add_seq($hit);
+    return $aln;
+}
+
+=head2 num_conserved
+
+ Title   : num_conserved
+ Usage   : $obj->num_conserved($newval)
+ Function: returns the number of conserved residues in the alignment
+ Returns : inetger
+ Args    : integer (optional)
+
+
+=cut
+
+sub num_conserved{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->{'num_conserved'} = $value;
+   }
+   return $self->{'num_conserved'};
+}
+
+=head2 num_identical
+
+ Title   : num_identical
+ Usage   : $obj->num_identical($newval)
+ Function: returns the number of identical residues in the alignment
+ Returns : integer
+ Args    : integer (optional)
+
+
+=cut
+
+sub num_identical{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->{'_num_identical'} = $value;
+   }
+   return $self->{'_num_identical'};
+}
+
+=head2 rank
+
+ Usage     : $hsp->rank( [string] );
+ Purpose   : Get the rank of the HSP within a given Blast hit.
+ Example   : $rank = $hsp->rank;
+ Returns   : Integer (1..n) corresponding to the order in which the HSP
+             appears in the BLAST report.
+
+=cut
+
+sub rank { 
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_rank'} = $value;
+    }
+    return $self->{'_rank'};
+}
+
+
+=head2 seq_inds
+
+ Title   : seq_inds
+ Purpose   : Get a list of residue positions (indices) for all identical 
+           : or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hsp->seq_inds('query', 'identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved');
+             @h_ind = $hsp->seq_inds('hit', 'conserved-not-identical'); 
+           : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
+ Returns   : List of integers 
+           : May include ranges if collapse is true.
+ Argument  : seq_type  = 'query' or 'hit' or 'sbjct'  (default = query)
+           :  ('sbjct' is synonymous with 'hit') 
+           : class     = 'identical' or 'conserved' or 'nomatch' or 'gap'
+           :              (default = identical)
+           :              (can be shortened to 'id' or 'cons')
+           :             or 'conserved-not-identical'
+           : collapse  = boolean, if true, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11" 
+           :             collapses to "1-5 7 9-11". This is useful for 
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  : 
+
+See Also   : L<Bio::Search::SearchUtils::collapse_nums()|Bio::Search::SearchUtils>, 
+             L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub seq_inds{
+   my ($self, $seqType, $class, $collapse) = @_;
+
+   # prepare the internal structures - this is cached so
+   # if the strings have not changed we're okay
+   $self->_calculate_seq_positions();
+
+   $seqType  ||= 'query';
+   $class ||= 'identical';
+   $collapse ||= 0;
+   $seqType = 'sbjct' if $seqType eq 'hit';
+   my $t = lc(substr($seqType,0,1));
+   if( $t eq 'q' ) {
+       $seqType = 'query';
+   } elsif ( $t eq 's' || $t eq 'h' ) {
+       $seqType = 'sbjct';
+   } else { 
+       $self->warn("unknown seqtype $seqType using 'query'");
+       $seqType = 'query';
+   }
+   $t = lc(substr($class,0,1));
+ 
+   if( $t eq 'c' ) {
+       if( $class =~ /conserved\-not\-identical/ ) {
+	   $class = 'conserved';
+       } else { 
+	   $class = 'conservedall';
+       }
+   } elsif( $t eq 'i' ) {
+       $class = 'identical';
+   } elsif( $t eq 'n' ) {
+       $class = 'nomatch';
+   } elsif( $t eq 'g' ) {
+       $class = 'gap';
+   } else { 
+       $self->warn("unknown sequence class $class using 'identical'");
+       $class = 'identical';
+   }
+   
+   ## Sensitive to member name changes.
+   $seqType  = "_\L$seqType\E";
+   $class = "_\L$class\E";
+   my @ary;
+
+   if( $class eq '_gap' ) {
+       # this means that we are remapping the gap length that is stored
+       # in the hash (for example $self->{'_gapRes_query'} ) 
+       # so we'll return an array which has the values of the position of the 
+       # of the gap (the key in the hash) + the gap length (value in the
+       # hash for this key - 1.
+
+       @ary = map { $_ > 1 ?
+			$_..($_ + $self->{"${class}Res$seqType"}->{$_} - 1) : 
+			$_ }
+              sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
+   } elsif( $class eq '_conservedall' ) {
+       @ary = sort { $a <=> $b } 
+       keys %{ $self->{"_conservedRes$seqType"}},
+       keys %{ $self->{"_identicalRes$seqType"}},
+   }  else { 
+       @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
+   } 
+   require Bio::Search::BlastUtils if $collapse;
+   
+   return $collapse ? &Bio::Search::SearchUtils::collapse_nums(@ary) : @ary;
+}
+
+
+=head2 Inherited from Bio::SeqFeature::SimilarityPair
+
+These methods come from Bio::SeqFeature::SimilarityPair
+
+=head2 query
+
+ Title   : query
+ Usage   : my $query = $hsp->query
+ Function: Returns a SeqFeature representing the query in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 hit
+
+ Title   : hit
+ Usage   : my $hit = $hsp->hit
+ Function: Returns a SeqFeature representing the hit in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: Get/Set the significance value
+ Returns : numeric
+ Args    : [optional] new value to set
+
+
+=head2 score
+
+ Title   : score
+ Usage   : my $score = $hsp->score();
+ Function: Returns the score for this HSP or undef 
+ Returns : numeric           
+ Args    : [optional] numeric to set value
+
+=cut 
+
+# overriding 
+
+sub score {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_score'};
+    if( defined $value ) { 
+        $self->{'_score'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 bits
+
+ Title   : bits
+ Usage   : my $bits = $hsp->bits();
+ Function: Returns the bit value for this HSP or undef 
+ Returns : numeric
+ Args    : none
+
+=cut
+
+# overriding 
+
+sub bits {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_bits'};
+    if( defined $value ) { 
+        $self->{'_bits'} = $value;
+    } 
+    return $previous;
+}
+
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $hsp->strand('quer')
+ Function: Retrieves the strand for the HSP component requested
+ Returns : +1 or -1 (0 if unknown)
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
+           'query' to retrieve the query strand (default)
+
+=cut
+
+=head1 Private methods
+
+=cut
+
+=head2 _calculate_seq_positions
+
+ Title   : _calculate_seq_positions
+ Usage   : $self->_calculate_seq_positions
+ Function: Internal function
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _calculate_seq_positions {
+    my ($self,@args) = @_;
+    return unless ( $self->{'_sequenceschanged'} );
+    $self->{'_sequenceschanged'} = 0;
+    my ($mchar, $schar, $qchar);
+    my ($seqString, $qseq,$sseq) = ( $self->homology_string(),
+				     $self->query_string(),
+				     $self->hit_string() );
+
+    # Using hashes to avoid saving duplicate residue numbers.
+    my %identicalList_query = ();
+    my %identicalList_sbjct = ();
+    my %conservedList_query = ();
+    my %conservedList_sbjct = ();
+    
+    my %gapList_query = ();
+    my %gapList_sbjct = ();
+    my %nomatchList_query = ();
+    my %nomatchList_sbjct = ();
+
+    my $qdir = $self->query->strand || 1;
+    my $sdir = $self->hit->strand || 1;
+    my $resCount_query = ($qdir >=0) ? $self->query->end : $self->query->start;
+    my $resCount_sbjct = ($sdir >=0) ? $self->hit->end : $self->hit->start;
+    
+    my $prog = $self->algorithm;
+    if( $prog  =~ /FAST/i ) {
+	# fasta reports some extra 'regional' sequence information
+	# we need to clear out first
+	# this seemed a bit insane to me at first, but it appears to 
+	# work --jason
+	
+	# we infer the end of the regional sequence where the first
+	# non space is in the homology string
+	# then we use the HSP->length to tell us how far to read
+	# to cut off the end of the sequence
+
+	# one possible problem is the sequence which 
+	
+	my ($start) = (0);
+	if( $seqString =~ /^(\s+)/ ) {
+	    $start = CORE::length($1);
+	}
+
+	$seqString = substr($seqString, $start,$self->length('total'));
+	$qseq = substr($qseq, $start,$self->length('total'));
+	$sseq = substr($sseq, $start,$self->length('total'));
+
+	$qseq =~ s![\\\/]!!g;
+	$sseq =~ s![\\\/]!!g;
+    }
+    
+    if($prog =~ /^(PSI)?T(BLAST|FAST)N/oi ) {
+	$resCount_sbjct = int($resCount_sbjct / 3);
+    } elsif($prog =~ /^(BLAST|FAST)(X|Y|XY)/oi  ) {
+	$resCount_query = int($resCount_query / 3);
+    } elsif($prog =~ /^T(BLAST|FAST)(X|Y|XY)/oi ) {
+	$resCount_query = int($resCount_query / 3);
+	$resCount_sbjct = int($resCount_sbjct / 3);
+    }    
+    while( $mchar = chop($seqString) ) {
+	($qchar, $schar) = (chop($qseq), chop($sseq));
+	if( $mchar eq '+' || $mchar eq '.' || $mchar eq ':' ) { 
+	    $conservedList_query{ $resCount_query } = 1; 
+	    $conservedList_sbjct{ $resCount_sbjct } = 1;
+	} elsif( $mchar ne ' ' ) { 
+	    $identicalList_query{ $resCount_query } = 1; 
+	    $identicalList_sbjct{ $resCount_sbjct } = 1;
+	} elsif( $mchar eq ' ') { 
+	    $nomatchList_query{ $resCount_query } = 1;
+	    $nomatchList_sbjct{ $resCount_sbjct } = 1;
+	}
+	if( $qchar eq $GAP_SYMBOL ) {
+	    $gapList_query{ $resCount_query } ++;
+	} else { 	    
+	    $resCount_query -= $qdir;
+	}
+	if( $schar eq $GAP_SYMBOL ) {
+	    $gapList_sbjct{ $resCount_query } ++;
+	} else {
+	    $resCount_sbjct -=$sdir;
+	}
+    }
+    $self->{'_identicalRes_query'} = \%identicalList_query;
+    $self->{'_conservedRes_query'} = \%conservedList_query;
+    $self->{'_nomatchRes_query'}   = \%nomatchList_query;
+    $self->{'_gapRes_query'}       = \%gapList_query;
+
+    $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
+    $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
+    $self->{'_nomatchRes_sbjct'}   = \%nomatchList_sbjct;
+    $self->{'_gapRes_sbjct'}       = \%gapList_sbjct;
+    return 1;
+}
+
+=head2 n
+
+See documentation in L<Bio::Search::HSP::HSPI::n()|Bio::Search::HSP::HSPI>
+
+=cut
+
+#-----
+sub n { 
+    my $self = shift; 
+    if(@_) { $self->{'_n'} = shift; }
+    defined $self->{'_n'} ? $self->{'_n'} : '';
+}
+
+=head2 range
+
+See documentation in L<Bio::Search::HSP::HSPI::range()|Bio::Search::HSP::HSPI>
+
+=cut
+
+#----------
+sub range {
+#----------
+    my ($self, $seqType) = @_;
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my ($start, $end);
+    if( $seqType eq 'query' ) {
+        $start = $self->query->start;
+        $end = $self->query->end;
+    }
+    else {
+        $start = $self->hit->start;
+        $end = $self->hit->end;
+    }
+    return ($start, $end);
+}
+
+
+1;