Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/RangeI.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: RangeI.pm,v 1.30 2002/11/05 02:55:12 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::RangeI | |
| 4 # | |
| 5 # Cared for by Lehvaslaiho <heikki@ebi.ac.uk> | |
| 6 # | |
| 7 # Copyright Matthew Pocock | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::RangeI - Range interface | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 #Do not run this module directly | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This provides a standard BioPerl range interface that should be | |
| 24 implemented by any object that wants to be treated as a range. This | |
| 25 serves purely as an abstract base class for implementers and can not | |
| 26 be instantiated. | |
| 27 | |
| 28 Ranges are modeled as having (start, end, length, strand). They use | |
| 29 Bio-coordinates - all points E<gt>= start and E<lt>= end are within the | |
| 30 range. End is always greater-than or equal-to start, and length is | |
| 31 greather than or equal to 1. The behaviour of a range is undefined if | |
| 32 ranges with negative numbers or zero are used. | |
| 33 | |
| 34 So, in summary: | |
| 35 | |
| 36 length = end - start + 1 | |
| 37 end >= start | |
| 38 strand = (-1 | 0 | +1) | |
| 39 | |
| 40 =head1 FEEDBACK | |
| 41 | |
| 42 =head2 Mailing Lists | |
| 43 | |
| 44 User feedback is an integral part of the evolution of this and other | |
| 45 Bioperl modules. Send your comments and suggestions preferably to one | |
| 46 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 47 | |
| 48 bioperl-l@bioperl.org - General discussion | |
| 49 http://bio.perl.org/MailList.html - About the mailing lists | |
| 50 | |
| 51 =head2 Reporting Bugs | |
| 52 | |
| 53 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 54 the bugs and their resolution. Bug reports can be submitted via email | |
| 55 or the web: | |
| 56 | |
| 57 bioperl-bugs@bio.perl.org | |
| 58 http://bugzilla.bioperl.org/ | |
| 59 | |
| 60 =head1 AUTHOR - Heikki Lehvaslaiho | |
| 61 | |
| 62 Email: heikki@ebi.ac.uk | |
| 63 | |
| 64 =head1 CONTRIBUTORS | |
| 65 | |
| 66 Juha Muilu (muilu@ebi.ac.uk) | |
| 67 | |
| 68 =head1 APPENDIX | |
| 69 | |
| 70 The rest of the documentation details each of the object | |
| 71 methods. Internal methods are usually preceded with a _ | |
| 72 | |
| 73 =cut | |
| 74 | |
| 75 package Bio::RangeI; | |
| 76 | |
| 77 use strict; | |
| 78 use Carp; | |
| 79 use Bio::Root::RootI; | |
| 80 use vars qw(@ISA); | |
| 81 use integer; | |
| 82 use vars qw( @ISA %STRAND_OPTIONS ); | |
| 83 | |
| 84 @ISA = qw( Bio::Root::RootI ); | |
| 85 | |
| 86 BEGIN { | |
| 87 # STRAND_OPTIONS contains the legal values for the strand options | |
| 88 %STRAND_OPTIONS = map { $_, '_'.$_ } | |
| 89 ( | |
| 90 'strong', # ranges must have the same strand | |
| 91 'weak', # ranges must have the same strand or no strand | |
| 92 'ignore', # ignore strand information | |
| 93 ); | |
| 94 } | |
| 95 | |
| 96 # utility methods | |
| 97 # | |
| 98 | |
| 99 # returns true if strands are equal and non-zero | |
| 100 sub _strong { | |
| 101 my ($r1, $r2) = @_; | |
| 102 my ($s1, $s2) = ($r1->strand(), $r2->strand()); | |
| 103 | |
| 104 return 1 if $s1 != 0 && $s1 == $s2; | |
| 105 } | |
| 106 | |
| 107 # returns true if strands are equal or either is zero | |
| 108 sub _weak { | |
| 109 my ($r1, $r2) = @_; | |
| 110 my ($s1, $s2) = ($r1->strand(), $r2->strand()); | |
| 111 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2; | |
| 112 } | |
| 113 | |
| 114 # returns true for any strandedness | |
| 115 sub _ignore { | |
| 116 return 1; | |
| 117 } | |
| 118 | |
| 119 # works out what test to use for the strictness and returns true/false | |
| 120 # e.g. $r1->_testStrand($r2, 'strong') | |
| 121 sub _testStrand() { | |
| 122 my ($r1, $r2, $comp) = @_; | |
| 123 return 1 unless $comp; | |
| 124 my $func = $STRAND_OPTIONS{$comp}; | |
| 125 return $r1->$func($r2); | |
| 126 } | |
| 127 | |
| 128 =head1 Abstract methods | |
| 129 | |
| 130 These methods must be implemented in all subclasses. | |
| 131 | |
| 132 =head2 start | |
| 133 | |
| 134 Title : start | |
| 135 Usage : $start = $range->start(); | |
| 136 Function: get/set the start of this range | |
| 137 Returns : the start of this range | |
| 138 Args : optionaly allows the start to be set | |
| 139 using $range->start($start) | |
| 140 | |
| 141 =cut | |
| 142 | |
| 143 sub start { | |
| 144 shift->throw_not_implemented(); | |
| 145 } | |
| 146 | |
| 147 =head2 end | |
| 148 | |
| 149 Title : end | |
| 150 Usage : $end = $range->end(); | |
| 151 Function: get/set the end of this range | |
| 152 Returns : the end of this range | |
| 153 Args : optionaly allows the end to be set | |
| 154 using $range->end($end) | |
| 155 | |
| 156 =cut | |
| 157 | |
| 158 sub end { | |
| 159 shift->throw_not_implemented(); | |
| 160 } | |
| 161 | |
| 162 =head2 length | |
| 163 | |
| 164 Title : length | |
| 165 Usage : $length = $range->length(); | |
| 166 Function: get/set the length of this range | |
| 167 Returns : the length of this range | |
| 168 Args : optionaly allows the length to be set | |
| 169 using $range->length($length) | |
| 170 | |
| 171 =cut | |
| 172 | |
| 173 sub length { | |
| 174 shift->throw_not_implemented(); | |
| 175 } | |
| 176 | |
| 177 =head2 strand | |
| 178 | |
| 179 Title : strand | |
| 180 Usage : $strand = $range->strand(); | |
| 181 Function: get/set the strand of this range | |
| 182 Returns : the strandidness (-1, 0, +1) | |
| 183 Args : optionaly allows the strand to be set | |
| 184 using $range->strand($strand) | |
| 185 | |
| 186 =cut | |
| 187 | |
| 188 sub strand { | |
| 189 shift->throw_not_implemented(); | |
| 190 } | |
| 191 | |
| 192 =head1 Boolean Methods | |
| 193 | |
| 194 These methods return true or false. They throw an error if start and | |
| 195 end are not defined. | |
| 196 | |
| 197 $range->overlaps($otherRange) && print "Ranges overlap\n"; | |
| 198 | |
| 199 =head2 overlaps | |
| 200 | |
| 201 Title : overlaps | |
| 202 Usage : if($r1->overlaps($r2)) { do stuff } | |
| 203 Function: tests if $r2 overlaps $r1 | |
| 204 Args : arg #1 = a range to compare this one to (mandatory) | |
| 205 arg #2 = strand option ('strong', 'weak', 'ignore') (optional) | |
| 206 Returns : true if the ranges overlap, false otherwise | |
| 207 | |
| 208 =cut | |
| 209 | |
| 210 sub overlaps { | |
| 211 my ($self, $other, $so) = @_; | |
| 212 | |
| 213 $self->throw("start is undefined") unless defined $self->start; | |
| 214 $self->throw("end is undefined") unless defined $self->end; | |
| 215 $self->throw("not a Bio::RangeI object") unless defined $other && | |
| 216 $other->isa('Bio::RangeI'); | |
| 217 $other->throw("start is undefined") unless defined $other->start; | |
| 218 $other->throw("end is undefined") unless defined $other->end; | |
| 219 | |
| 220 return | |
| 221 ($self->_testStrand($other, $so) | |
| 222 and not ( | |
| 223 ($self->start() > $other->end() or | |
| 224 $self->end() < $other->start() ) | |
| 225 )); | |
| 226 } | |
| 227 | |
| 228 =head2 contains | |
| 229 | |
| 230 Title : contains | |
| 231 Usage : if($r1->contains($r2) { do stuff } | |
| 232 Function: tests whether $r1 totally contains $r2 | |
| 233 Args : arg #1 = a range to compare this one to (mandatory) | |
| 234 alternatively, integer scalar to test | |
| 235 arg #2 = strand option ('strong', 'weak', 'ignore') (optional) | |
| 236 Returns : true if the argument is totaly contained within this range | |
| 237 | |
| 238 =cut | |
| 239 | |
| 240 sub contains { | |
| 241 my ($self, $other, $so) = @_; | |
| 242 $self->throw("start is undefined") unless defined $self->start; | |
| 243 $self->throw("end is undefined") unless defined $self->end; | |
| 244 | |
| 245 if(defined $other && ref $other) { # a range object? | |
| 246 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); | |
| 247 $other->throw("start is undefined") unless defined $other->start; | |
| 248 $other->throw("end is undefined") unless defined $other->end; | |
| 249 | |
| 250 return ($self->_testStrand($other, $so) and | |
| 251 $other->start() >= $self->start() and | |
| 252 $other->end() <= $self->end()); | |
| 253 } else { # a scalar? | |
| 254 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/; | |
| 255 return ($other >= $self->start() and $other <= $self->end()); | |
| 256 } | |
| 257 } | |
| 258 | |
| 259 =head2 equals | |
| 260 | |
| 261 Title : equals | |
| 262 Usage : if($r1->equals($r2)) | |
| 263 Function: test whether $r1 has the same start, end, length as $r2 | |
| 264 Args : a range to test for equality | |
| 265 Returns : true if they are describing the same range | |
| 266 | |
| 267 =cut | |
| 268 | |
| 269 sub equals { | |
| 270 my ($self, $other, $so) = @_; | |
| 271 | |
| 272 $self->throw("start is undefined") unless defined $self->start; | |
| 273 $self->throw("end is undefined") unless defined $self->end; | |
| 274 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); | |
| 275 $other->throw("start is undefined") unless defined $other->start; | |
| 276 $other->throw("end is undefined") unless defined $other->end; | |
| 277 | |
| 278 return ($self->_testStrand($other, $so) and | |
| 279 $self->start() == $other->start() and | |
| 280 $self->end() == $other->end() ); | |
| 281 } | |
| 282 | |
| 283 =head1 Geometrical methods | |
| 284 | |
| 285 These methods do things to the geometry of ranges, and return | |
| 286 Bio::RangeI compliant objects or triplets (start, stop, strand) from | |
| 287 which new ranges could be built. | |
| 288 | |
| 289 | |
| 290 =head2 intersection | |
| 291 | |
| 292 Title : intersection | |
| 293 Usage : ($start, $stop, $strand) = $r1->intersection($r2) | |
| 294 Function: gives the range that is contained by both ranges | |
| 295 Args : arg #1 = a range to compare this one to (mandatory) | |
| 296 arg #2 = strand option ('strong', 'weak', 'ignore') (optional) | |
| 297 Returns : undef if they do not overlap, | |
| 298 or the range that they do overlap | |
| 299 (in an objectlike the calling one) | |
| 300 | |
| 301 =cut | |
| 302 | |
| 303 sub intersection { | |
| 304 my ($self, $other, $so) = @_; | |
| 305 return unless $self->_testStrand($other, $so); | |
| 306 | |
| 307 $self->throw("start is undefined") unless defined $self->start; | |
| 308 $self->throw("end is undefined") unless defined $self->end; | |
| 309 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); | |
| 310 $other->throw("start is undefined") unless defined $other->start; | |
| 311 $other->throw("end is undefined") unless defined $other->end; | |
| 312 | |
| 313 my @start = sort {$a<=>$b} | |
| 314 ($self->start(), $other->start()); | |
| 315 my @end = sort {$a<=>$b} | |
| 316 ($self->end(), $other->end()); | |
| 317 | |
| 318 my $start = pop @start; | |
| 319 my $end = shift @end; | |
| 320 | |
| 321 my $union_strand; # Strand for the union range object. | |
| 322 | |
| 323 if($self->strand == $other->strand) { | |
| 324 $union_strand = $other->strand; | |
| 325 } else { | |
| 326 $union_strand = 0; | |
| 327 } | |
| 328 | |
| 329 if($start > $end) { | |
| 330 return undef; | |
| 331 } else { | |
| 332 return $self->new('-start' => $start, | |
| 333 '-end' => $end, | |
| 334 '-strand' => $union_strand | |
| 335 ); | |
| 336 #return ($start, $end, $union_strand); | |
| 337 } | |
| 338 } | |
| 339 | |
| 340 =head2 union | |
| 341 | |
| 342 Title : union | |
| 343 Usage : ($start, $stop, $strand) = $r1->union($r2); | |
| 344 : ($start, $stop, $strand) = Bio::RangeI->union(@ranges); | |
| 345 my $newrange = Bio::RangeI->union(@ranges); | |
| 346 Function: finds the minimal range that contains all of the ranges | |
| 347 Args : a range or list of ranges to find the union of | |
| 348 Returns : the range object containing all of the ranges | |
| 349 | |
| 350 =cut | |
| 351 | |
| 352 sub union { | |
| 353 my $self = shift; | |
| 354 my @ranges = @_; | |
| 355 if(ref $self) { | |
| 356 unshift @ranges, $self; | |
| 357 } | |
| 358 | |
| 359 my @start = sort {$a<=>$b} | |
| 360 map( { $_->start() } @ranges); | |
| 361 my @end = sort {$a<=>$b} | |
| 362 map( { $_->end() } @ranges); | |
| 363 | |
| 364 my $start = shift @start; | |
| 365 while( !defined $start ) { | |
| 366 $start = shift @start; | |
| 367 } | |
| 368 | |
| 369 my $end = pop @end; | |
| 370 | |
| 371 my $union_strand; # Strand for the union range object. | |
| 372 | |
| 373 foreach(@ranges) { | |
| 374 if(! defined $union_strand) { | |
| 375 $union_strand = $_->strand; | |
| 376 next; | |
| 377 } else { | |
| 378 if($union_strand ne $_->strand) { | |
| 379 $union_strand = 0; | |
| 380 last; | |
| 381 } | |
| 382 } | |
| 383 } | |
| 384 return undef unless $start or $end; | |
| 385 if( wantarray() ) { | |
| 386 return ( $start,$end,$union_strand); | |
| 387 } else { | |
| 388 return $self->new('-start' => $start, | |
| 389 '-end' => $end, | |
| 390 '-strand' => $union_strand | |
| 391 ); | |
| 392 } | |
| 393 } | |
| 394 | |
| 395 =head2 overlap_extent | |
| 396 | |
| 397 Title : overlap_extent | |
| 398 Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b) | |
| 399 Function: Provides actual amount of overlap between two different | |
| 400 ranges. | |
| 401 Example : | |
| 402 Returns : array of values for | |
| 403 - the amount unique to a | |
| 404 - the amount common to both | |
| 405 - the amount unique to b | |
| 406 Args : a range | |
| 407 | |
| 408 | |
| 409 =cut | |
| 410 | |
| 411 sub overlap_extent{ | |
| 412 my ($a,$b) = @_; | |
| 413 | |
| 414 $a->throw("start is undefined") unless defined $a->start; | |
| 415 $a->throw("end is undefined") unless defined $a->end; | |
| 416 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI'); | |
| 417 $b->throw("start is undefined") unless defined $b->start; | |
| 418 $b->throw("end is undefined") unless defined $b->end; | |
| 419 | |
| 420 my ($au,$bu,$is,$ie); | |
| 421 if( ! $a->overlaps($b) ) { | |
| 422 return ($a->length,0,$b->length); | |
| 423 } | |
| 424 | |
| 425 if( $a->start < $b->start ) { | |
| 426 $au = $b->start - $a->start; | |
| 427 } else { | |
| 428 $bu = $a->start - $b->start; | |
| 429 } | |
| 430 | |
| 431 if( $a->end > $b->end ) { | |
| 432 $au += $a->end - $b->end; | |
| 433 } else { | |
| 434 $bu += $b->end - $a->end; | |
| 435 } | |
| 436 my $intersect = $a->intersection($b); | |
| 437 $ie = $intersect->end; | |
| 438 $is = $intersect->start; | |
| 439 | |
| 440 return ($au,$ie-$is+1,$bu); | |
| 441 } | |
| 442 | |
| 443 1; |
