Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Search/HSP/HSPI.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 #----------------------------------------------------------------- | |
| 2 # $Id: HSPI.pm,v 1.21.2.1 2003/01/22 22:51:00 jason Exp $ | |
| 3 # | |
| 4 # BioPerl module for Bio::Search::HSP::HSPI | |
| 5 # | |
| 6 # Cared for by Steve Chervitz <sac@bioperl.org> | |
| 7 # and Jason Stajich <jason@bioperl.org> | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 #----------------------------------------------------------------- | |
| 11 | |
| 12 # POD documentation - main docs before the code | |
| 13 | |
| 14 =head1 NAME | |
| 15 | |
| 16 Bio::Search::HSP::HSPI - Interface for a High Scoring Pair in a similarity search result | |
| 17 | |
| 18 =head1 SYNOPSIS | |
| 19 | |
| 20 $r_type = $hsp->algorithm | |
| 21 | |
| 22 $pvalue = $hsp->pvalue(); | |
| 23 | |
| 24 $evalue = $hsp->evalue(); | |
| 25 | |
| 26 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] ); | |
| 27 | |
| 28 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] ); | |
| 29 | |
| 30 $gaps = $hsp->gaps( ['query'|'hit'|'total'] ); | |
| 31 | |
| 32 $qseq = $hsp->query_string; | |
| 33 | |
| 34 $hseq = $hsp->hit_string; | |
| 35 | |
| 36 $homology_string = $hsp->homology_string; | |
| 37 | |
| 38 $len = $hsp->length( ['query'|'hit'|'total'] ); | |
| 39 | |
| 40 $rank = $hsp->rank; | |
| 41 | |
| 42 =head1 DESCRIPTION | |
| 43 | |
| 44 Bio::Search::HSP::HSPI objects cannot be instantiated since this | |
| 45 module defines a pure interface. | |
| 46 | |
| 47 Given an object that implements the Bio::Search::HSP::HSPI interface, | |
| 48 you can do the following things with it: | |
| 49 | |
| 50 =head1 SEE ALSO | |
| 51 | |
| 52 This interface inherits methods from these other modules: | |
| 53 | |
| 54 L<Bio::SeqFeatureI>, | |
| 55 L<Bio::SeqFeature::FeaturePair> | |
| 56 L<Bio::SeqFeature::SimilarityPair> | |
| 57 | |
| 58 Please refer to these modules for documentation of the | |
| 59 many additional inherited methods. | |
| 60 | |
| 61 =head1 FEEDBACK | |
| 62 | |
| 63 =head2 Mailing Lists | |
| 64 | |
| 65 User feedback is an integral part of the evolution of this and other | |
| 66 Bioperl modules. Send your comments and suggestions preferably to | |
| 67 the Bioperl mailing list. Your participation is much appreciated. | |
| 68 | |
| 69 bioperl-l@bioperl.org - General discussion | |
| 70 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 71 | |
| 72 =head2 Reporting Bugs | |
| 73 | |
| 74 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 75 of the bugs and their resolution. Bug reports can be submitted via | |
| 76 email or the web: | |
| 77 | |
| 78 bioperl-bugs@bioperl.org | |
| 79 http://bugzilla.bioperl.org/ | |
| 80 | |
| 81 =head1 AUTHOR - Steve Chervitz, Jason Stajich | |
| 82 | |
| 83 Email sac@bioperl.org | |
| 84 Email jason@bioperl.org | |
| 85 | |
| 86 =head1 COPYRIGHT | |
| 87 | |
| 88 Copyright (c) 2001 Steve Chervitz, Jason Stajich. All Rights Reserved. | |
| 89 | |
| 90 =head1 DISCLAIMER | |
| 91 | |
| 92 This software is provided "as is" without warranty of any kind. | |
| 93 | |
| 94 =head1 APPENDIX | |
| 95 | |
| 96 The rest of the documentation details each of the object methods. | |
| 97 Internal methods are usually preceded with a _ | |
| 98 | |
| 99 =cut | |
| 100 | |
| 101 | |
| 102 # Let the code begin... | |
| 103 | |
| 104 | |
| 105 package Bio::Search::HSP::HSPI; | |
| 106 use vars qw(@ISA); | |
| 107 | |
| 108 use Bio::Root::RootI; | |
| 109 use Bio::SeqFeature::SimilarityPair; | |
| 110 | |
| 111 use strict; | |
| 112 use Carp; | |
| 113 | |
| 114 @ISA = qw(Bio::SeqFeature::SimilarityPair Bio::Root::RootI); | |
| 115 | |
| 116 | |
| 117 =head2 algorithm | |
| 118 | |
| 119 Title : algorithm | |
| 120 Usage : my $r_type = $hsp->algorithm | |
| 121 Function: Obtain the name of the algorithm used to obtain the HSP | |
| 122 Returns : string (e.g., BLASTP) | |
| 123 Args : none | |
| 124 | |
| 125 =cut | |
| 126 | |
| 127 sub algorithm{ | |
| 128 my ($self,@args) = @_; | |
| 129 $self->throw_not_implemented; | |
| 130 } | |
| 131 | |
| 132 =head2 pvalue | |
| 133 | |
| 134 Title : pvalue | |
| 135 Usage : my $pvalue = $hsp->pvalue(); | |
| 136 Function: Returns the P-value for this HSP or undef | |
| 137 Returns : float or exponential (2e-10) | |
| 138 P-value is not defined with NCBI Blast2 reports. | |
| 139 Args : none | |
| 140 | |
| 141 =cut | |
| 142 | |
| 143 sub pvalue { | |
| 144 my ($self) = @_; | |
| 145 $self->throw_not_implemented; | |
| 146 } | |
| 147 | |
| 148 =head2 evalue | |
| 149 | |
| 150 Title : evalue | |
| 151 Usage : my $evalue = $hsp->evalue(); | |
| 152 Function: Returns the e-value for this HSP | |
| 153 Returns : float or exponential (2e-10) | |
| 154 Args : none | |
| 155 | |
| 156 =cut | |
| 157 | |
| 158 sub evalue { | |
| 159 my ($self) = @_; | |
| 160 $self->throw_not_implemented; | |
| 161 } | |
| 162 | |
| 163 =head2 frac_identical | |
| 164 | |
| 165 Title : frac_identical | |
| 166 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] ); | |
| 167 Function: Returns the fraction of identitical positions for this HSP | |
| 168 Returns : Float in range 0.0 -> 1.0 | |
| 169 Args : 'query' = num identical / length of query seq (without gaps) | |
| 170 'hit' = num identical / length of hit seq (without gaps) | |
| 171 'total' = num identical / length of alignment (with gaps) | |
| 172 default = 'total' | |
| 173 | |
| 174 =cut | |
| 175 | |
| 176 sub frac_identical { | |
| 177 my ($self, $type) = @_; | |
| 178 $self->throw_not_implemented; | |
| 179 } | |
| 180 | |
| 181 =head2 frac_conserved | |
| 182 | |
| 183 Title : frac_conserved | |
| 184 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] ); | |
| 185 Function : Returns the fraction of conserved positions for this HSP. | |
| 186 This is the fraction of symbols in the alignment with a | |
| 187 positive score. | |
| 188 Returns : Float in range 0.0 -> 1.0 | |
| 189 Args : 'query' = num conserved / length of query seq (without gaps) | |
| 190 'hit' = num conserved / length of hit seq (without gaps) | |
| 191 'total' = num conserved / length of alignment (with gaps) | |
| 192 default = 'total' | |
| 193 | |
| 194 =cut | |
| 195 | |
| 196 sub frac_conserved { | |
| 197 my ($self, $type) = @_; | |
| 198 $self->throw_not_implemented; | |
| 199 } | |
| 200 | |
| 201 =head2 num_identical | |
| 202 | |
| 203 Title : num_identical | |
| 204 Usage : $obj->num_identical($newval) | |
| 205 Function: returns the number of identical residues in the alignment | |
| 206 Returns : integer | |
| 207 Args : integer (optional) | |
| 208 | |
| 209 | |
| 210 =cut | |
| 211 | |
| 212 sub num_identical{ | |
| 213 shift->throw_not_implemented; | |
| 214 } | |
| 215 | |
| 216 =head2 num_conserved | |
| 217 | |
| 218 Title : num_conserved | |
| 219 Usage : $obj->num_conserved($newval) | |
| 220 Function: returns the number of conserved residues in the alignment | |
| 221 Returns : inetger | |
| 222 Args : integer (optional) | |
| 223 | |
| 224 | |
| 225 =cut | |
| 226 | |
| 227 sub num_conserved{ | |
| 228 shift->throw_not_implemented(); | |
| 229 } | |
| 230 | |
| 231 =head2 gaps | |
| 232 | |
| 233 Title : gaps | |
| 234 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] ); | |
| 235 Function : Get the number of gaps in the query, hit, or total alignment. | |
| 236 Returns : Integer, number of gaps or 0 if none | |
| 237 Args : 'query' = num conserved / length of query seq (without gaps) | |
| 238 'hit' = num conserved / length of hit seq (without gaps) | |
| 239 'total' = num conserved / length of alignment (with gaps) | |
| 240 default = 'total' | |
| 241 | |
| 242 =cut | |
| 243 | |
| 244 sub gaps { | |
| 245 my ($self, $type) = @_; | |
| 246 $self->throw_not_implemented; | |
| 247 } | |
| 248 | |
| 249 =head2 query_string | |
| 250 | |
| 251 Title : query_string | |
| 252 Usage : my $qseq = $hsp->query_string; | |
| 253 Function: Retrieves the query sequence of this HSP as a string | |
| 254 Returns : string | |
| 255 Args : none | |
| 256 | |
| 257 | |
| 258 =cut | |
| 259 | |
| 260 sub query_string{ | |
| 261 my ($self) = @_; | |
| 262 $self->throw_not_implemented; | |
| 263 } | |
| 264 | |
| 265 =head2 hit_string | |
| 266 | |
| 267 Title : hit_string | |
| 268 Usage : my $hseq = $hsp->hit_string; | |
| 269 Function: Retrieves the hit sequence of this HSP as a string | |
| 270 Returns : string | |
| 271 Args : none | |
| 272 | |
| 273 | |
| 274 =cut | |
| 275 | |
| 276 sub hit_string{ | |
| 277 my ($self) = @_; | |
| 278 $self->throw_not_implemented; | |
| 279 } | |
| 280 | |
| 281 =head2 homology_string | |
| 282 | |
| 283 Title : homology_string | |
| 284 Usage : my $homo_string = $hsp->homology_string; | |
| 285 Function: Retrieves the homology sequence for this HSP as a string. | |
| 286 : The homology sequence is the string of symbols in between the | |
| 287 : query and hit sequences in the alignment indicating the degree | |
| 288 : of conservation (e.g., identical, similar, not similar). | |
| 289 Returns : string | |
| 290 Args : none | |
| 291 | |
| 292 =cut | |
| 293 | |
| 294 sub homology_string{ | |
| 295 my ($self) = @_; | |
| 296 $self->throw_not_implemented; | |
| 297 } | |
| 298 | |
| 299 =head2 length | |
| 300 | |
| 301 Title : length | |
| 302 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] ); | |
| 303 Function : Returns the length of the query or hit in the alignment (without gaps) | |
| 304 or the aggregate length of the HSP (including gaps; | |
| 305 this may be greater than either hit or query ) | |
| 306 Returns : integer | |
| 307 Args : 'query' = length of query seq (without gaps) | |
| 308 'hit' = length of hit seq (without gaps) | |
| 309 'total' = length of alignment (with gaps) | |
| 310 default = 'total' | |
| 311 Args : none | |
| 312 | |
| 313 =cut | |
| 314 | |
| 315 sub length{ | |
| 316 shift->throw_not_implemented(); | |
| 317 } | |
| 318 | |
| 319 =head2 percent_identity | |
| 320 | |
| 321 Title : percent_identity | |
| 322 Usage : my $percentid = $hsp->percent_identity() | |
| 323 Function: Returns the calculated percent identity for an HSP | |
| 324 Returns : floating point between 0 and 100 | |
| 325 Args : none | |
| 326 | |
| 327 | |
| 328 =cut | |
| 329 | |
| 330 sub percent_identity{ | |
| 331 my ($self) = @_; | |
| 332 return $self->frac_identical('hsp') * 100; | |
| 333 } | |
| 334 | |
| 335 =head2 get_aln | |
| 336 | |
| 337 Title : get_aln | |
| 338 Usage : my $aln = $hsp->gel_aln | |
| 339 Function: Returns a Bio::SimpleAlign representing the HSP alignment | |
| 340 Returns : Bio::SimpleAlign | |
| 341 Args : none | |
| 342 | |
| 343 =cut | |
| 344 | |
| 345 sub get_aln { | |
| 346 my ($self) = @_; | |
| 347 $self->throw_not_implemented; | |
| 348 } | |
| 349 | |
| 350 | |
| 351 =head2 seq_inds | |
| 352 | |
| 353 Title : seq_inds | |
| 354 Purpose : Get a list of residue positions (indices) for all identical | |
| 355 : or conserved residues in the query or sbjct sequence. | |
| 356 Example : @s_ind = $hsp->seq_inds('query', 'identical'); | |
| 357 : @h_ind = $hsp->seq_inds('hit', 'conserved'); | |
| 358 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1); | |
| 359 Returns : List of integers | |
| 360 : May include ranges if collapse is true. | |
| 361 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query) | |
| 362 ('sbjct' is synonymous with 'hit') | |
| 363 class = 'identical' or 'conserved' or 'nomatch' or 'gap' | |
| 364 (default = identical) | |
| 365 (can be shortened to 'id' or 'cons') | |
| 366 | |
| 367 collapse = boolean, if true, consecutive positions are merged | |
| 368 using a range notation, e.g., "1 2 3 4 5 7 9 10 11" | |
| 369 collapses to "1-5 7 9-11". This is useful for | |
| 370 consolidating long lists. Default = no collapse. | |
| 371 Throws : n/a. | |
| 372 Comments : | |
| 373 | |
| 374 See Also : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI> | |
| 375 | |
| 376 =cut | |
| 377 | |
| 378 sub seq_inds { | |
| 379 shift->throw_not_implemented(); | |
| 380 } | |
| 381 | |
| 382 =head2 Inherited from Bio::SeqFeature::SimilarityPair | |
| 383 | |
| 384 These methods come from Bio::SeqFeature::SimilarityPair | |
| 385 | |
| 386 =head2 query | |
| 387 | |
| 388 Title : query | |
| 389 Usage : my $query = $hsp->query | |
| 390 Function: Returns a SeqFeature representing the query in the HSP | |
| 391 Returns : Bio::SeqFeature::Similarity | |
| 392 Args : [optional] new value to set | |
| 393 | |
| 394 | |
| 395 =head2 hit | |
| 396 | |
| 397 Title : hit | |
| 398 Usage : my $hit = $hsp->hit | |
| 399 Function: Returns a SeqFeature representing the hit in the HSP | |
| 400 Returns : Bio::SeqFeature::Similarity | |
| 401 Args : [optional] new value to set | |
| 402 | |
| 403 | |
| 404 =head2 significance | |
| 405 | |
| 406 Title : significance | |
| 407 Usage : $evalue = $obj->significance(); | |
| 408 $obj->significance($evalue); | |
| 409 Function: Get/Set the significance value (see Bio::SeqFeature::SimilarityPair) | |
| 410 Returns : significance value (scientific notation string) | |
| 411 Args : significance value (sci notation string) | |
| 412 | |
| 413 | |
| 414 =head2 score | |
| 415 | |
| 416 Title : score | |
| 417 Usage : my $score = $hsp->score(); | |
| 418 Function: Returns the score for this HSP or undef | |
| 419 Returns : numeric | |
| 420 Args : [optional] numeric to set value | |
| 421 | |
| 422 =head2 bits | |
| 423 | |
| 424 Title : bits | |
| 425 Usage : my $bits = $hsp->bits(); | |
| 426 Function: Returns the bit value for this HSP or undef | |
| 427 Returns : numeric | |
| 428 Args : none | |
| 429 | |
| 430 =cut | |
| 431 | |
| 432 # override | |
| 433 | |
| 434 =head2 strand | |
| 435 | |
| 436 Title : strand | |
| 437 Usage : $hsp->strand('query') | |
| 438 Function: Retrieves the strand for the HSP component requested | |
| 439 Returns : +1 or -1 (0 if unknown) | |
| 440 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject | |
| 441 'query' to retrieve the query strand (default) | |
| 442 'list' or 'array' to retreive both query and hit together | |
| 443 | |
| 444 =cut | |
| 445 | |
| 446 sub strand { | |
| 447 my $self = shift; | |
| 448 my $val = shift; | |
| 449 $val = 'query' unless defined $val; | |
| 450 $val =~ s/^\s+//; | |
| 451 | |
| 452 if( $val =~ /^q/i ) { | |
| 453 return $self->query->strand(shift); | |
| 454 } elsif( $val =~ /^(hi|s)/i ) { | |
| 455 return $self->hit->strand(shift); | |
| 456 } elsif ( $val =~ m/^(list|array)/) { | |
| 457 return ($self->query->strand(shift), $self->hit->strand(shift)); | |
| 458 } else { | |
| 459 $self->warn("unrecognized component $val requested\n"); | |
| 460 } | |
| 461 return 0; | |
| 462 } | |
| 463 | |
| 464 =head2 start | |
| 465 | |
| 466 Title : start | |
| 467 Usage : $hsp->start('query') | |
| 468 Function: Retrieves the start for the HSP component requested | |
| 469 Returns : integer | |
| 470 Args : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject | |
| 471 'query' to retrieve the query start (default) | |
| 472 | |
| 473 =cut | |
| 474 | |
| 475 sub start { | |
| 476 my $self = shift; | |
| 477 my $val = shift; | |
| 478 $val = 'query' unless defined $val; | |
| 479 $val =~ s/^\s+//; | |
| 480 | |
| 481 if( $val =~ /^q/i ) { | |
| 482 return $self->query->start(shift); | |
| 483 } elsif( $val =~ /^(hi|s)/i ) { | |
| 484 return $self->hit->start(shift); | |
| 485 } else { | |
| 486 $self->warn("unrecognized component $val requested\n"); | |
| 487 } | |
| 488 return 0; | |
| 489 } | |
| 490 | |
| 491 =head2 end | |
| 492 | |
| 493 Title : end | |
| 494 Usage : $hsp->end('query') | |
| 495 Function: Retrieves the end for the HSP component requested | |
| 496 Returns : integer | |
| 497 Args : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject | |
| 498 'query' to retrieve the query end (default) | |
| 499 | |
| 500 =cut | |
| 501 | |
| 502 sub end { | |
| 503 my $self = shift; | |
| 504 my $val = shift; | |
| 505 $val = 'query' unless defined $val; | |
| 506 $val =~ s/^\s+//; | |
| 507 | |
| 508 if( $val =~ /^q/i ) { | |
| 509 return $self->query->end(shift); | |
| 510 } elsif( $val =~ /^(hi|s)/i ) { | |
| 511 return $self->hit->end(shift); | |
| 512 } else { | |
| 513 $self->warn("unrecognized component $val requested\n"); | |
| 514 } | |
| 515 return 0; | |
| 516 } | |
| 517 | |
| 518 =head2 seq_str | |
| 519 | |
| 520 Usage : $hsp->seq_str( seq_type ); | |
| 521 Purpose : Get the full query, sbjct, or 'match' sequence as a string. | |
| 522 : The 'match' sequence is the string of symbols in between the | |
| 523 : query and sbjct sequences. | |
| 524 Example : $str = $hsp->seq_str('query'); | |
| 525 Returns : String | |
| 526 Argument : seq_Type = 'query' or 'hit' or 'sbjct' or 'match' | |
| 527 : ('sbjct' is synonymous with 'hit') | |
| 528 : default is 'query' | |
| 529 Throws : Exception if the argument does not match an accepted seq_type. | |
| 530 Comments : | |
| 531 | |
| 532 See Also : L<seq()|seq>, L<seq_inds()|seq_inds>, B<_set_match_seq()> | |
| 533 | |
| 534 =cut | |
| 535 | |
| 536 sub seq_str { | |
| 537 my $self = shift; | |
| 538 my $type = shift; | |
| 539 if( $type =~ /^q/i ) { return $self->query_string(shift) } | |
| 540 elsif( $type =~ /^(s|hi)/i ) { return $self->hit_string(shift)} | |
| 541 elsif ( $type =~ /^(ho|ma)/i ) { return $self->hit_string(shift) } | |
| 542 else { | |
| 543 $self->warn("unknown sequence type $type"); | |
| 544 } | |
| 545 return ''; | |
| 546 } | |
| 547 | |
| 548 | |
| 549 =head2 rank | |
| 550 | |
| 551 Usage : $hsp->rank( [string] ); | |
| 552 Purpose : Get the rank of the HSP within a given Blast hit. | |
| 553 Example : $rank = $hsp->rank; | |
| 554 Returns : Integer (1..n) corresponding to the order in which the HSP | |
| 555 appears in the BLAST report. | |
| 556 | |
| 557 =cut | |
| 558 | |
| 559 sub rank { shift->throw_not_implemented } | |
| 560 | |
| 561 =head2 matches | |
| 562 | |
| 563 Usage : $hsp->matches([seq_type], [start], [stop]); | |
| 564 Purpose : Get the total number of identical and conservative matches | |
| 565 : in the query or sbjct sequence for the given HSP. Optionally can | |
| 566 : report data within a defined interval along the seq. | |
| 567 : (Note: 'conservative' matches are called 'positives' in the | |
| 568 : Blast report.) | |
| 569 Example : ($id,$cons) = $hsp_object->matches('hit'); | |
| 570 : ($id,$cons) = $hsp_object->matches('query',300,400); | |
| 571 Returns : 2-element array of integers | |
| 572 Argument : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query) | |
| 573 : ('sbjct' is synonymous with 'hit') | |
| 574 : (2) start = Starting coordinate (optional) | |
| 575 : (3) stop = Ending coordinate (optional) | |
| 576 Throws : Exception if the supplied coordinates are out of range. | |
| 577 Comments : Relies on seq_str('match') to get the string of alignment symbols | |
| 578 : between the query and sbjct lines which are used for determining | |
| 579 : the number of identical and conservative matches. | |
| 580 | |
| 581 See Also : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, L<Bio::Search::Hit::BlastHit::_adjust_contigs()|Bio::Search::Hit::BlastHit> | |
| 582 | |
| 583 =cut | |
| 584 | |
| 585 #----------- | |
| 586 sub matches { | |
| 587 #----------- | |
| 588 my( $self, %param ) = @_; | |
| 589 my(@data); | |
| 590 my($seqType, $beg, $end) = ($param{-SEQ}, $param{-START}, $param{-STOP}); | |
| 591 $seqType ||= 'query'; | |
| 592 $seqType = 'sbjct' if $seqType eq 'hit'; | |
| 593 | |
| 594 if(!defined $beg && !defined $end) { | |
| 595 ## Get data for the whole alignment. | |
| 596 push @data, ($self->num_identical, $self->num_conserved); | |
| 597 } else { | |
| 598 ## Get the substring representing the desired sub-section of aln. | |
| 599 $beg ||= 0; | |
| 600 $end ||= 0; | |
| 601 my($start,$stop) = $self->range($seqType); | |
| 602 if($beg == 0) { $beg = $start; $end = $beg+$end; } | |
| 603 elsif($end == 0) { $end = $stop; $beg = $end-$beg; } | |
| 604 | |
| 605 if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop) | |
| 606 else { $end += 1;} ##ML moved from commented position below, makes | |
| 607 ##more sense here | |
| 608 # if($end > $stop) { $end = $stop; } | |
| 609 if($beg < $start) { $beg = $start; } | |
| 610 # else { $end += 1;} | |
| 611 | |
| 612 # my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg)); | |
| 613 | |
| 614 ## ML: START fix for substr out of range error ------------------ | |
| 615 my $seq = ""; | |
| 616 if (($self->algorithm eq 'TBLASTN') and ($seqType eq 'sbjct')) | |
| 617 { | |
| 618 $seq = substr($self->seq_str('match'), | |
| 619 int(($beg-$start)/3), int(($end-$beg+1)/3)); | |
| 620 | |
| 621 } elsif (($self->algorithm eq 'BLASTX') and ($seqType eq 'query')) | |
| 622 { | |
| 623 $seq = substr($self->seq_str('match'), | |
| 624 int(($beg-$start)/3), int(($end-$beg+1)/3)); | |
| 625 } else { | |
| 626 $seq = substr($self->seq_str('match'), | |
| 627 $beg-$start, ($end-$beg)); | |
| 628 } | |
| 629 ## ML: End of fix for substr out of range error ----------------- | |
| 630 | |
| 631 | |
| 632 ## ML: debugging code | |
| 633 ## This is where we get our exception. Try printing out the values going | |
| 634 ## into this: | |
| 635 ## | |
| 636 # print STDERR | |
| 637 # qq(*------------MY EXCEPTION --------------------\nSeq: ") , | |
| 638 # $self->seq_str("$seqType"), qq("\n),$self->rank,",( index:"; | |
| 639 # print STDERR $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:", | |
| 640 # CORE::length $self->seq_str("$seqType"); | |
| 641 # print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ), | |
| 642 # ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n"; | |
| 643 ## ML: END DEBUGGING CODE---------- | |
| 644 | |
| 645 if(!CORE::length $seq) { | |
| 646 $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop"); | |
| 647 } | |
| 648 ## Get data for a substring. | |
| 649 # printf "Collecting HSP subsection data: beg,end = %d,%d; start,stop = %d,%d\n%s<---\n", $beg, $end, $start, $stop, $seq; | |
| 650 # printf "Original match seq:\n%s\n",$self->seq_str('match'); | |
| 651 $seq =~ s/ //g; # remove space (no info). | |
| 652 my $len_cons = CORE::length $seq; | |
| 653 $seq =~ s/\+//g; # remove '+' characters (conservative substitutions) | |
| 654 my $len_id = CORE::length $seq; | |
| 655 push @data, ($len_id, $len_cons); | |
| 656 # printf " HSP = %s\n id = %d; cons = %d\n", $self->rank, $len_id, $len_cons; <STDIN>; | |
| 657 } | |
| 658 @data; | |
| 659 } | |
| 660 | |
| 661 =head2 n | |
| 662 | |
| 663 Usage : $hsp_obj->n() | |
| 664 Purpose : Get the N value (num HSPs on which P/Expect is based). | |
| 665 : This value is not defined with NCBI Blast2 with gapping. | |
| 666 Returns : Integer or null string if not defined. | |
| 667 Argument : n/a | |
| 668 Throws : n/a | |
| 669 Comments : The 'N' value is listed in parenthesis with P/Expect value: | |
| 670 : e.g., P(3) = 1.2e-30 ---> (N = 3). | |
| 671 : Not defined in NCBI Blast2 with gaps. | |
| 672 : This typically is equal to the number of HSPs but not always. | |
| 673 : To obtain the number of HSPs, use Bio::Search::Hit::HitI::num_hsps(). | |
| 674 | |
| 675 See Also : L<Bio::SeqFeature::SimilarityPair::score()|Bio::SeqFeature::SimilarityPair> | |
| 676 | |
| 677 =cut | |
| 678 | |
| 679 sub n { shift->throw_not_implemented } | |
| 680 | |
| 681 =head2 range | |
| 682 | |
| 683 Usage : $hsp->range( [seq_type] ); | |
| 684 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence | |
| 685 : in the HSP alignment. | |
| 686 Example : ($query_beg, $query_end) = $hsp->range('query'); | |
| 687 : ($hit_beg, $hit_end) = $hsp->range('hit'); | |
| 688 Returns : Two-element array of integers | |
| 689 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query') | |
| 690 : ('sbjct' is synonymous with 'hit') | |
| 691 Throws : n/a | |
| 692 Comments : This is a convenience method for constructions such as | |
| 693 ($hsp->query->start, $hsp->query->end) | |
| 694 | |
| 695 =cut | |
| 696 | |
| 697 sub range { shift->throw_not_implemented } | |
| 698 | |
| 699 | |
| 700 1; | |
| 701 |
