Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/RangeI.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: RangeI.pm,v 1.30 2002/11/05 02:55:12 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::RangeI | |
4 # | |
5 # Cared for by Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Matthew Pocock | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::RangeI - Range interface | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 #Do not run this module directly | |
20 | |
21 =head1 DESCRIPTION | |
22 | |
23 This provides a standard BioPerl range interface that should be | |
24 implemented by any object that wants to be treated as a range. This | |
25 serves purely as an abstract base class for implementers and can not | |
26 be instantiated. | |
27 | |
28 Ranges are modeled as having (start, end, length, strand). They use | |
29 Bio-coordinates - all points E<gt>= start and E<lt>= end are within the | |
30 range. End is always greater-than or equal-to start, and length is | |
31 greather than or equal to 1. The behaviour of a range is undefined if | |
32 ranges with negative numbers or zero are used. | |
33 | |
34 So, in summary: | |
35 | |
36 length = end - start + 1 | |
37 end >= start | |
38 strand = (-1 | 0 | +1) | |
39 | |
40 =head1 FEEDBACK | |
41 | |
42 =head2 Mailing Lists | |
43 | |
44 User feedback is an integral part of the evolution of this and other | |
45 Bioperl modules. Send your comments and suggestions preferably to one | |
46 of the Bioperl mailing lists. Your participation is much appreciated. | |
47 | |
48 bioperl-l@bioperl.org - General discussion | |
49 http://bio.perl.org/MailList.html - About the mailing lists | |
50 | |
51 =head2 Reporting Bugs | |
52 | |
53 Report bugs to the Bioperl bug tracking system to help us keep track | |
54 the bugs and their resolution. Bug reports can be submitted via email | |
55 or the web: | |
56 | |
57 bioperl-bugs@bio.perl.org | |
58 http://bugzilla.bioperl.org/ | |
59 | |
60 =head1 AUTHOR - Heikki Lehvaslaiho | |
61 | |
62 Email: heikki@ebi.ac.uk | |
63 | |
64 =head1 CONTRIBUTORS | |
65 | |
66 Juha Muilu (muilu@ebi.ac.uk) | |
67 | |
68 =head1 APPENDIX | |
69 | |
70 The rest of the documentation details each of the object | |
71 methods. Internal methods are usually preceded with a _ | |
72 | |
73 =cut | |
74 | |
75 package Bio::RangeI; | |
76 | |
77 use strict; | |
78 use Carp; | |
79 use Bio::Root::RootI; | |
80 use vars qw(@ISA); | |
81 use integer; | |
82 use vars qw( @ISA %STRAND_OPTIONS ); | |
83 | |
84 @ISA = qw( Bio::Root::RootI ); | |
85 | |
86 BEGIN { | |
87 # STRAND_OPTIONS contains the legal values for the strand options | |
88 %STRAND_OPTIONS = map { $_, '_'.$_ } | |
89 ( | |
90 'strong', # ranges must have the same strand | |
91 'weak', # ranges must have the same strand or no strand | |
92 'ignore', # ignore strand information | |
93 ); | |
94 } | |
95 | |
96 # utility methods | |
97 # | |
98 | |
99 # returns true if strands are equal and non-zero | |
100 sub _strong { | |
101 my ($r1, $r2) = @_; | |
102 my ($s1, $s2) = ($r1->strand(), $r2->strand()); | |
103 | |
104 return 1 if $s1 != 0 && $s1 == $s2; | |
105 } | |
106 | |
107 # returns true if strands are equal or either is zero | |
108 sub _weak { | |
109 my ($r1, $r2) = @_; | |
110 my ($s1, $s2) = ($r1->strand(), $r2->strand()); | |
111 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2; | |
112 } | |
113 | |
114 # returns true for any strandedness | |
115 sub _ignore { | |
116 return 1; | |
117 } | |
118 | |
119 # works out what test to use for the strictness and returns true/false | |
120 # e.g. $r1->_testStrand($r2, 'strong') | |
121 sub _testStrand() { | |
122 my ($r1, $r2, $comp) = @_; | |
123 return 1 unless $comp; | |
124 my $func = $STRAND_OPTIONS{$comp}; | |
125 return $r1->$func($r2); | |
126 } | |
127 | |
128 =head1 Abstract methods | |
129 | |
130 These methods must be implemented in all subclasses. | |
131 | |
132 =head2 start | |
133 | |
134 Title : start | |
135 Usage : $start = $range->start(); | |
136 Function: get/set the start of this range | |
137 Returns : the start of this range | |
138 Args : optionaly allows the start to be set | |
139 using $range->start($start) | |
140 | |
141 =cut | |
142 | |
143 sub start { | |
144 shift->throw_not_implemented(); | |
145 } | |
146 | |
147 =head2 end | |
148 | |
149 Title : end | |
150 Usage : $end = $range->end(); | |
151 Function: get/set the end of this range | |
152 Returns : the end of this range | |
153 Args : optionaly allows the end to be set | |
154 using $range->end($end) | |
155 | |
156 =cut | |
157 | |
158 sub end { | |
159 shift->throw_not_implemented(); | |
160 } | |
161 | |
162 =head2 length | |
163 | |
164 Title : length | |
165 Usage : $length = $range->length(); | |
166 Function: get/set the length of this range | |
167 Returns : the length of this range | |
168 Args : optionaly allows the length to be set | |
169 using $range->length($length) | |
170 | |
171 =cut | |
172 | |
173 sub length { | |
174 shift->throw_not_implemented(); | |
175 } | |
176 | |
177 =head2 strand | |
178 | |
179 Title : strand | |
180 Usage : $strand = $range->strand(); | |
181 Function: get/set the strand of this range | |
182 Returns : the strandidness (-1, 0, +1) | |
183 Args : optionaly allows the strand to be set | |
184 using $range->strand($strand) | |
185 | |
186 =cut | |
187 | |
188 sub strand { | |
189 shift->throw_not_implemented(); | |
190 } | |
191 | |
192 =head1 Boolean Methods | |
193 | |
194 These methods return true or false. They throw an error if start and | |
195 end are not defined. | |
196 | |
197 $range->overlaps($otherRange) && print "Ranges overlap\n"; | |
198 | |
199 =head2 overlaps | |
200 | |
201 Title : overlaps | |
202 Usage : if($r1->overlaps($r2)) { do stuff } | |
203 Function: tests if $r2 overlaps $r1 | |
204 Args : arg #1 = a range to compare this one to (mandatory) | |
205 arg #2 = strand option ('strong', 'weak', 'ignore') (optional) | |
206 Returns : true if the ranges overlap, false otherwise | |
207 | |
208 =cut | |
209 | |
210 sub overlaps { | |
211 my ($self, $other, $so) = @_; | |
212 | |
213 $self->throw("start is undefined") unless defined $self->start; | |
214 $self->throw("end is undefined") unless defined $self->end; | |
215 $self->throw("not a Bio::RangeI object") unless defined $other && | |
216 $other->isa('Bio::RangeI'); | |
217 $other->throw("start is undefined") unless defined $other->start; | |
218 $other->throw("end is undefined") unless defined $other->end; | |
219 | |
220 return | |
221 ($self->_testStrand($other, $so) | |
222 and not ( | |
223 ($self->start() > $other->end() or | |
224 $self->end() < $other->start() ) | |
225 )); | |
226 } | |
227 | |
228 =head2 contains | |
229 | |
230 Title : contains | |
231 Usage : if($r1->contains($r2) { do stuff } | |
232 Function: tests whether $r1 totally contains $r2 | |
233 Args : arg #1 = a range to compare this one to (mandatory) | |
234 alternatively, integer scalar to test | |
235 arg #2 = strand option ('strong', 'weak', 'ignore') (optional) | |
236 Returns : true if the argument is totaly contained within this range | |
237 | |
238 =cut | |
239 | |
240 sub contains { | |
241 my ($self, $other, $so) = @_; | |
242 $self->throw("start is undefined") unless defined $self->start; | |
243 $self->throw("end is undefined") unless defined $self->end; | |
244 | |
245 if(defined $other && ref $other) { # a range object? | |
246 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); | |
247 $other->throw("start is undefined") unless defined $other->start; | |
248 $other->throw("end is undefined") unless defined $other->end; | |
249 | |
250 return ($self->_testStrand($other, $so) and | |
251 $other->start() >= $self->start() and | |
252 $other->end() <= $self->end()); | |
253 } else { # a scalar? | |
254 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/; | |
255 return ($other >= $self->start() and $other <= $self->end()); | |
256 } | |
257 } | |
258 | |
259 =head2 equals | |
260 | |
261 Title : equals | |
262 Usage : if($r1->equals($r2)) | |
263 Function: test whether $r1 has the same start, end, length as $r2 | |
264 Args : a range to test for equality | |
265 Returns : true if they are describing the same range | |
266 | |
267 =cut | |
268 | |
269 sub equals { | |
270 my ($self, $other, $so) = @_; | |
271 | |
272 $self->throw("start is undefined") unless defined $self->start; | |
273 $self->throw("end is undefined") unless defined $self->end; | |
274 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); | |
275 $other->throw("start is undefined") unless defined $other->start; | |
276 $other->throw("end is undefined") unless defined $other->end; | |
277 | |
278 return ($self->_testStrand($other, $so) and | |
279 $self->start() == $other->start() and | |
280 $self->end() == $other->end() ); | |
281 } | |
282 | |
283 =head1 Geometrical methods | |
284 | |
285 These methods do things to the geometry of ranges, and return | |
286 Bio::RangeI compliant objects or triplets (start, stop, strand) from | |
287 which new ranges could be built. | |
288 | |
289 | |
290 =head2 intersection | |
291 | |
292 Title : intersection | |
293 Usage : ($start, $stop, $strand) = $r1->intersection($r2) | |
294 Function: gives the range that is contained by both ranges | |
295 Args : arg #1 = a range to compare this one to (mandatory) | |
296 arg #2 = strand option ('strong', 'weak', 'ignore') (optional) | |
297 Returns : undef if they do not overlap, | |
298 or the range that they do overlap | |
299 (in an objectlike the calling one) | |
300 | |
301 =cut | |
302 | |
303 sub intersection { | |
304 my ($self, $other, $so) = @_; | |
305 return unless $self->_testStrand($other, $so); | |
306 | |
307 $self->throw("start is undefined") unless defined $self->start; | |
308 $self->throw("end is undefined") unless defined $self->end; | |
309 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI'); | |
310 $other->throw("start is undefined") unless defined $other->start; | |
311 $other->throw("end is undefined") unless defined $other->end; | |
312 | |
313 my @start = sort {$a<=>$b} | |
314 ($self->start(), $other->start()); | |
315 my @end = sort {$a<=>$b} | |
316 ($self->end(), $other->end()); | |
317 | |
318 my $start = pop @start; | |
319 my $end = shift @end; | |
320 | |
321 my $union_strand; # Strand for the union range object. | |
322 | |
323 if($self->strand == $other->strand) { | |
324 $union_strand = $other->strand; | |
325 } else { | |
326 $union_strand = 0; | |
327 } | |
328 | |
329 if($start > $end) { | |
330 return undef; | |
331 } else { | |
332 return $self->new('-start' => $start, | |
333 '-end' => $end, | |
334 '-strand' => $union_strand | |
335 ); | |
336 #return ($start, $end, $union_strand); | |
337 } | |
338 } | |
339 | |
340 =head2 union | |
341 | |
342 Title : union | |
343 Usage : ($start, $stop, $strand) = $r1->union($r2); | |
344 : ($start, $stop, $strand) = Bio::RangeI->union(@ranges); | |
345 my $newrange = Bio::RangeI->union(@ranges); | |
346 Function: finds the minimal range that contains all of the ranges | |
347 Args : a range or list of ranges to find the union of | |
348 Returns : the range object containing all of the ranges | |
349 | |
350 =cut | |
351 | |
352 sub union { | |
353 my $self = shift; | |
354 my @ranges = @_; | |
355 if(ref $self) { | |
356 unshift @ranges, $self; | |
357 } | |
358 | |
359 my @start = sort {$a<=>$b} | |
360 map( { $_->start() } @ranges); | |
361 my @end = sort {$a<=>$b} | |
362 map( { $_->end() } @ranges); | |
363 | |
364 my $start = shift @start; | |
365 while( !defined $start ) { | |
366 $start = shift @start; | |
367 } | |
368 | |
369 my $end = pop @end; | |
370 | |
371 my $union_strand; # Strand for the union range object. | |
372 | |
373 foreach(@ranges) { | |
374 if(! defined $union_strand) { | |
375 $union_strand = $_->strand; | |
376 next; | |
377 } else { | |
378 if($union_strand ne $_->strand) { | |
379 $union_strand = 0; | |
380 last; | |
381 } | |
382 } | |
383 } | |
384 return undef unless $start or $end; | |
385 if( wantarray() ) { | |
386 return ( $start,$end,$union_strand); | |
387 } else { | |
388 return $self->new('-start' => $start, | |
389 '-end' => $end, | |
390 '-strand' => $union_strand | |
391 ); | |
392 } | |
393 } | |
394 | |
395 =head2 overlap_extent | |
396 | |
397 Title : overlap_extent | |
398 Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b) | |
399 Function: Provides actual amount of overlap between two different | |
400 ranges. | |
401 Example : | |
402 Returns : array of values for | |
403 - the amount unique to a | |
404 - the amount common to both | |
405 - the amount unique to b | |
406 Args : a range | |
407 | |
408 | |
409 =cut | |
410 | |
411 sub overlap_extent{ | |
412 my ($a,$b) = @_; | |
413 | |
414 $a->throw("start is undefined") unless defined $a->start; | |
415 $a->throw("end is undefined") unless defined $a->end; | |
416 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI'); | |
417 $b->throw("start is undefined") unless defined $b->start; | |
418 $b->throw("end is undefined") unless defined $b->end; | |
419 | |
420 my ($au,$bu,$is,$ie); | |
421 if( ! $a->overlaps($b) ) { | |
422 return ($a->length,0,$b->length); | |
423 } | |
424 | |
425 if( $a->start < $b->start ) { | |
426 $au = $b->start - $a->start; | |
427 } else { | |
428 $bu = $a->start - $b->start; | |
429 } | |
430 | |
431 if( $a->end > $b->end ) { | |
432 $au += $a->end - $b->end; | |
433 } else { | |
434 $bu += $b->end - $a->end; | |
435 } | |
436 my $intersect = $a->intersection($b); | |
437 $ie = $intersect->end; | |
438 $is = $intersect->start; | |
439 | |
440 return ($au,$ie-$is+1,$bu); | |
441 } | |
442 | |
443 1; |