Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Location/Split.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Split.pm,v 1.35 2002/12/28 03:26:32 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Location::SplitLocation | |
4 # Cared for by Jason Stajich <jason@bioperl.org> | |
5 # | |
6 # Copyright Jason Stajich | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 # POD documentation - main docs before the code | |
10 | |
11 =head1 NAME | |
12 | |
13 Bio::Location::Split - Implementation of a Location on a Sequence | |
14 which has multiple locations (start/end points) | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 use Bio::Location::Split; | |
19 | |
20 my $splitlocation = new Bio::Location::Split(); | |
21 $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>1, | |
22 -end=>30, | |
23 -strand=>1)); | |
24 $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>50, | |
25 -end=>61, | |
26 -strand=>1)); | |
27 my @sublocs = $splitlocation->sub_Location(); | |
28 | |
29 my $count = 1; | |
30 # print the start/end points of the sub locations | |
31 foreach my $location ( sort { $a->start <=> $b->start } | |
32 @sublocs ) { | |
33 printf "sub feature %d [%d..%d]\n", | |
34 $count, $location->start,$location->end, "\n"; | |
35 $count++; | |
36 } | |
37 | |
38 =head1 DESCRIPTION | |
39 | |
40 This implementation handles locations which span more than one | |
41 start/end location, or and/or lie on different sequences. | |
42 | |
43 =head1 FEEDBACK | |
44 | |
45 User feedback is an integral part of the evolution of this and other | |
46 Bioperl modules. Send your comments and suggestions preferably to one | |
47 of the Bioperl mailing lists. Your participation is much appreciated. | |
48 | |
49 bioperl-l@bioperl.org - General discussion | |
50 http://bio.perl.org/MailList.html - About the mailing lists | |
51 | |
52 =head2 Reporting Bugs | |
53 | |
54 Report bugs to the Bioperl bug tracking system to help us keep track | |
55 the bugs and their resolution. Bug reports can be submitted via email | |
56 or the web: | |
57 | |
58 bioperl-bugs@bio.perl.org | |
59 http://bugzilla.bioperl.org/ | |
60 | |
61 =head1 AUTHOR - Jason Stajich | |
62 | |
63 Email jason@bioperl.org | |
64 | |
65 =head1 APPENDIX | |
66 | |
67 The rest of the documentation details each of the object | |
68 methods. Internal methods are usually preceded with a _ | |
69 | |
70 =cut | |
71 | |
72 # Let the code begin... | |
73 | |
74 | |
75 package Bio::Location::Split; | |
76 use vars qw(@ISA @CORBALOCATIONOPERATOR); | |
77 use strict; | |
78 | |
79 use Bio::Root::Root; | |
80 use Bio::Location::SplitLocationI; | |
81 use Bio::Location::Atomic; | |
82 | |
83 @ISA = qw(Bio::Location::Atomic Bio::Location::SplitLocationI ); | |
84 | |
85 BEGIN { | |
86 # as defined by BSANE 0.03 | |
87 @CORBALOCATIONOPERATOR= ('NONE','JOIN', undef, 'ORDER'); | |
88 } | |
89 | |
90 sub new { | |
91 my ($class, @args) = @_; | |
92 my $self = $class->SUPER::new(@args); | |
93 # initialize | |
94 $self->{'_sublocations'} = []; | |
95 my ( $type, $seqid, $locations ) = | |
96 $self->_rearrange([qw(SPLITTYPE | |
97 SEQ_ID | |
98 LOCATIONS | |
99 )], @args); | |
100 if( defined $locations && ref($locations) =~ /array/i ) { | |
101 $self->add_sub_Location(@$locations); | |
102 } | |
103 $seqid && $self->seq_id($seqid); | |
104 $type = lc ($type); | |
105 $self->splittype($type || 'JOIN'); | |
106 return $self; | |
107 } | |
108 | |
109 =head2 each_Location | |
110 | |
111 Title : each_Location | |
112 Usage : @locations = $locObject->each_Location($order); | |
113 Function: Conserved function call across Location:: modules - will | |
114 return an array containing the component Location(s) in | |
115 that object, regardless if the calling object is itself a | |
116 single location or one containing sublocations. | |
117 Returns : an array of Bio::LocationI implementing objects | |
118 Args : Optional sort order to be passed to sub_Location() | |
119 | |
120 =cut | |
121 | |
122 sub each_Location { | |
123 my ($self, $order) = @_; | |
124 my @locs = (); | |
125 foreach my $subloc ($self->sub_Location($order)) { | |
126 # Recursively check to get hierarchical split locations: | |
127 push @locs, $subloc->each_Location($order); | |
128 } | |
129 return @locs; | |
130 } | |
131 | |
132 =head2 sub_Location | |
133 | |
134 Title : sub_Location | |
135 Usage : @sublocs = $splitloc->sub_Location(); | |
136 Function: Returns the array of sublocations making up this compound (split) | |
137 location. Those sublocations referring to the same sequence as | |
138 the root split location will be sorted by start position (forward | |
139 sort) or end position (reverse sort) and come first (before | |
140 those on other sequences). | |
141 | |
142 The sort order can be optionally specified or suppressed by the | |
143 value of the first argument. The default is no sort. | |
144 | |
145 Returns : an array of Bio::LocationI implementing objects | |
146 Args : Optionally 1, 0, or -1 for specifying a forward, no, or reverse | |
147 sort order | |
148 | |
149 =cut | |
150 | |
151 sub sub_Location { | |
152 my ($self, $order) = @_; | |
153 $order = 0 unless defined $order; | |
154 if( defined($order) && ($order !~ /^-?\d+$/) ) { | |
155 $self->throw("value $order passed in to sub_Location is $order, an invalid value"); | |
156 } | |
157 $order = 1 if($order > 1); | |
158 $order = -1 if($order < -1); | |
159 | |
160 my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : (); | |
161 | |
162 # return the array if no ordering requested | |
163 return @sublocs if( ($order == 0) || (! @sublocs) ); | |
164 | |
165 # sort those locations that are on the same sequence as the top (`master') | |
166 # if the top seq is undefined, we take the first defined in a sublocation | |
167 my $seqid = $self->seq_id(); | |
168 my $i = 0; | |
169 while((! defined($seqid)) && ($i <= $#sublocs)) { | |
170 $seqid = $sublocs[$i++]->seq_id(); | |
171 } | |
172 if((! $self->seq_id()) && $seqid) { | |
173 $self->warn("sorted sublocation array requested but ". | |
174 "root location doesn't define seq_id ". | |
175 "(at least one sublocation does!)"); | |
176 } | |
177 my @locs = ($seqid ? | |
178 grep { $_->seq_id() eq $seqid; } @sublocs : | |
179 @sublocs); | |
180 if(@locs) { | |
181 if($order == 1) { | |
182 # Schwartzian transforms for performance boost | |
183 @locs = map { $_->[0] } | |
184 sort { (defined $a && defined $b) ? | |
185 $a->[1] <=> $b->[1] : $a ? -1 : 1 } | |
186 map { [$_, $_->start] } @locs; | |
187 | |
188 } else { # $order == -1 | |
189 @locs = map {$_->[0]} | |
190 sort { | |
191 (defined $a && defined $b) ? | |
192 $b->[1] <=> $a->[1] : $a ? -1 : 1 } | |
193 map { [$_, $_->end] } @locs; | |
194 } | |
195 } | |
196 # push the rest unsorted | |
197 if($seqid) { | |
198 push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs); | |
199 } | |
200 # done! | |
201 return @locs; | |
202 } | |
203 | |
204 =head2 add_sub_Location | |
205 | |
206 Title : add_sub_Location | |
207 Usage : $splitloc->add_sub_Location(@locationIobjs); | |
208 Function: add an additional sublocation | |
209 Returns : number of current sub locations | |
210 Args : list of Bio::LocationI implementing object(s) to add | |
211 | |
212 =cut | |
213 | |
214 sub add_sub_Location { | |
215 my ($self,@args) = @_; | |
216 my @locs; | |
217 foreach my $loc ( @args ) { | |
218 if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) { | |
219 $self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!"); | |
220 next; | |
221 } | |
222 push @{$self->{'_sublocations'}}, $loc; | |
223 } | |
224 | |
225 return scalar @{$self->{'_sublocations'}}; | |
226 } | |
227 | |
228 =head2 splittype | |
229 | |
230 Title : splittype | |
231 Usage : $splittype = $fuzzy->splittype(); | |
232 Function: get/set the split splittype | |
233 Returns : the splittype of split feature (join, order) | |
234 Args : splittype to set | |
235 | |
236 =cut | |
237 | |
238 sub splittype { | |
239 my ($self, $value) = @_; | |
240 if( defined $value || ! defined $self->{'_splittype'} ) { | |
241 $value = 'JOIN' unless( defined $value ); | |
242 $self->{'_splittype'} = uc ($value); | |
243 } | |
244 return $self->{'_splittype'}; | |
245 } | |
246 | |
247 =head2 is_single_sequence | |
248 | |
249 Title : is_single_sequence | |
250 Usage : if($splitloc->is_single_sequence()) { | |
251 print "Location object $splitloc is split ". | |
252 "but only across a single sequence\n"; | |
253 } | |
254 Function: Determine whether this location is split across a single or | |
255 multiple sequences. | |
256 | |
257 This implementation ignores (sub-)locations that do not define | |
258 seq_id(). The same holds true for the root location. | |
259 | |
260 Returns : TRUE if all sublocations lie on the same sequence as the root | |
261 location (feature), and FALSE otherwise. | |
262 Args : none | |
263 | |
264 =cut | |
265 | |
266 sub is_single_sequence { | |
267 my ($self) = @_; | |
268 | |
269 my $seqid = $self->seq_id(); | |
270 foreach my $loc ($self->sub_Location(0)) { | |
271 $seqid = $loc->seq_id() if(! $seqid); | |
272 if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) { | |
273 return 0; | |
274 } | |
275 } | |
276 return 1; | |
277 } | |
278 | |
279 =head1 LocationI methods | |
280 | |
281 =head2 strand | |
282 | |
283 Title : strand | |
284 Usage : $obj->strand($newval) | |
285 Function: For SplitLocations, setting the strand of the container | |
286 (this object) is a short-cut for setting the strand of all | |
287 sublocations. | |
288 | |
289 In get-mode, checks if no sub-location is remote, and if | |
290 all have the same strand. If so, it returns that shared | |
291 strand value. Otherwise it returns undef. | |
292 | |
293 Example : | |
294 Returns : on get, value of strand if identical between sublocations | |
295 (-1, 1, or undef) | |
296 Args : new value (-1 or 1, optional) | |
297 | |
298 | |
299 =cut | |
300 | |
301 sub strand{ | |
302 my ($self,$value) = @_; | |
303 if( defined $value) { | |
304 $self->{'strand'} = $value; | |
305 # propagate to all sublocs | |
306 foreach my $loc ($self->sub_Location(0)) { | |
307 $loc->strand($value) if ! $loc->is_remote(); | |
308 } | |
309 } else { | |
310 my ($strand, $lstrand); | |
311 foreach my $loc ($self->sub_Location(0)) { | |
312 # we give up upon any location that's remote or doesn't have | |
313 # the strand specified, or has a differing one set than | |
314 # previously seen. | |
315 # calling strand() is potentially expensive if the subloc is also | |
316 # a split location, so we cache it | |
317 $lstrand = $loc->strand(); | |
318 if((! $lstrand) || | |
319 ($strand && ($strand != $lstrand)) || | |
320 $loc->is_remote()) { | |
321 $strand = undef; | |
322 last; | |
323 } elsif(! $strand) { | |
324 $strand = $lstrand; | |
325 } | |
326 } | |
327 return $strand; | |
328 } | |
329 } | |
330 | |
331 =head2 start | |
332 | |
333 Title : start | |
334 Usage : $start = $location->start(); | |
335 Function: get the starting point of the first (sorted) sublocation | |
336 Returns : integer | |
337 Args : none | |
338 | |
339 =cut | |
340 | |
341 sub start { | |
342 my ($self,$value) = @_; | |
343 if( defined $value ) { | |
344 $self->throw("Trying to set the starting point of a split location, that is not possible, try manipulating the sub Locations"); | |
345 } | |
346 return $self->SUPER::start(); | |
347 } | |
348 | |
349 =head2 end | |
350 | |
351 Title : end | |
352 Usage : $end = $location->end(); | |
353 Function: get the ending point of the last (sorted) sublocation | |
354 Returns : integer | |
355 Args : none | |
356 | |
357 =cut | |
358 | |
359 sub end { | |
360 my ($self,$value) = @_; | |
361 if( defined $value ) { | |
362 $self->throw("Trying to set the ending point of a split location, that is not possible, try manipulating the sub Locations"); | |
363 } | |
364 return $self->SUPER::end(); | |
365 } | |
366 | |
367 =head2 min_start | |
368 | |
369 Title : min_start | |
370 Usage : $min_start = $location->min_start(); | |
371 Function: get the minimum starting point | |
372 Returns : the minimum starting point from the contained sublocations | |
373 Args : none | |
374 | |
375 =cut | |
376 | |
377 sub min_start { | |
378 my ($self, $value) = @_; | |
379 | |
380 if( defined $value ) { | |
381 $self->throw("Trying to set the minimum starting point of a split location, that is not possible, try manipulating the sub Locations"); | |
382 } | |
383 my @locs = $self->sub_Location(1); | |
384 return $locs[0]->min_start() if @locs; | |
385 return undef; | |
386 } | |
387 | |
388 =head2 max_start | |
389 | |
390 Title : max_start | |
391 Usage : my $maxstart = $location->max_start(); | |
392 Function: Get maximum starting location of feature startpoint | |
393 Returns : integer or undef if no maximum starting point. | |
394 Args : none | |
395 | |
396 =cut | |
397 | |
398 sub max_start { | |
399 my ($self,$value) = @_; | |
400 | |
401 if( defined $value ) { | |
402 $self->throw("Trying to set the maximum starting point of a split location, that is not possible, try manipulating the sub Locations"); | |
403 } | |
404 my @locs = $self->sub_Location(1); | |
405 return $locs[0]->max_start() if @locs; | |
406 return undef; | |
407 } | |
408 | |
409 =head2 start_pos_type | |
410 | |
411 Title : start_pos_type | |
412 Usage : my $start_pos_type = $location->start_pos_type(); | |
413 Function: Get start position type (ie <,>, ^) | |
414 Returns : type of position coded as text | |
415 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') | |
416 Args : none | |
417 | |
418 =cut | |
419 | |
420 sub start_pos_type { | |
421 my ($self,$value) = @_; | |
422 | |
423 if( defined $value ) { | |
424 $self->throw("Trying to set the start_pos_type of a split location, that is not possible, try manipulating the sub Locations"); | |
425 } | |
426 my @locs = $self->sub_Location(); | |
427 return ( @locs ) ? $locs[0]->start_pos_type() : undef; | |
428 } | |
429 | |
430 =head2 min_end | |
431 | |
432 Title : min_end | |
433 Usage : my $minend = $location->min_end(); | |
434 Function: Get minimum ending location of feature endpoint | |
435 Returns : integer or undef if no minimum ending point. | |
436 Args : none | |
437 | |
438 =cut | |
439 | |
440 sub min_end { | |
441 my ($self,$value) = @_; | |
442 | |
443 if( defined $value ) { | |
444 $self->throw("Trying to set the minimum end point of a split location, that is not possible, try manipulating the sub Locations"); | |
445 } | |
446 # reverse sort locations by largest ending to smallest ending | |
447 my @locs = $self->sub_Location(-1); | |
448 return $locs[0]->min_end() if @locs; | |
449 return undef; | |
450 } | |
451 | |
452 =head2 max_end | |
453 | |
454 Title : max_end | |
455 Usage : my $maxend = $location->max_end(); | |
456 Function: Get maximum ending location of feature endpoint | |
457 Returns : integer or undef if no maximum ending point. | |
458 Args : none | |
459 | |
460 =cut | |
461 | |
462 sub max_end { | |
463 my ($self,$value) = @_; | |
464 | |
465 if( defined $value ) { | |
466 $self->throw("Trying to set the maximum end point of a split location, that is not possible, try manipulating the sub Locations"); | |
467 } | |
468 # reverse sort locations by largest ending to smallest ending | |
469 my @locs = $self->sub_Location(-1); | |
470 return $locs[0]->max_end() if @locs; | |
471 return undef; | |
472 } | |
473 | |
474 =head2 end_pos_type | |
475 | |
476 Title : end_pos_type | |
477 Usage : my $end_pos_type = $location->end_pos_type(); | |
478 Function: Get end position type (ie <,>, ^) | |
479 Returns : type of position coded as text | |
480 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') | |
481 Args : none | |
482 | |
483 =cut | |
484 | |
485 sub end_pos_type { | |
486 my ($self,$value) = @_; | |
487 | |
488 if( defined $value ) { | |
489 $self->throw("Trying to set end_pos_type of a split location, that is not possible, try manipulating the sub Locations"); | |
490 } | |
491 my @locs = $self->sub_Location(); | |
492 return ( @locs ) ? $locs[0]->end_pos_type() : undef; | |
493 } | |
494 | |
495 | |
496 =head2 seq_id | |
497 | |
498 Title : seq_id | |
499 Usage : my $seqid = $location->seq_id(); | |
500 Function: Get/Set seq_id that location refers to | |
501 | |
502 We override this here in order to propagate to all sublocations | |
503 which are not remote (provided this root is not remote either) | |
504 Returns : seq_id | |
505 Args : [optional] seq_id value to set | |
506 | |
507 | |
508 =cut | |
509 | |
510 sub seq_id { | |
511 my ($self, $seqid) = @_; | |
512 | |
513 if(! $self->is_remote()) { | |
514 foreach my $subloc ($self->sub_Location(0)) { | |
515 $subloc->seq_id($seqid) if ! $subloc->is_remote(); | |
516 } | |
517 } | |
518 return $self->SUPER::seq_id($seqid); | |
519 } | |
520 | |
521 =head2 coordinate_policy | |
522 | |
523 Title : coordinate_policy | |
524 Usage : $policy = $location->coordinate_policy(); | |
525 $location->coordinate_policy($mypolicy); # set may not be possible | |
526 Function: Get the coordinate computing policy employed by this object. | |
527 | |
528 See Bio::Location::CoordinatePolicyI for documentation about | |
529 the policy object and its use. | |
530 | |
531 The interface *does not* require implementing classes to accept | |
532 setting of a different policy. The implementation provided here | |
533 does, however, allow to do so. | |
534 | |
535 Implementors of this interface are expected to initialize every | |
536 new instance with a CoordinatePolicyI object. The implementation | |
537 provided here will return a default policy object if none has | |
538 been set yet. To change this default policy object call this | |
539 method as a class method with an appropriate argument. Note that | |
540 in this case only subsequently created Location objects will be | |
541 affected. | |
542 | |
543 Returns : A Bio::Location::CoordinatePolicyI implementing object. | |
544 Args : On set, a Bio::Location::CoordinatePolicyI implementing object. | |
545 | |
546 =head2 to_FTstring | |
547 | |
548 Title : to_FTstring | |
549 Usage : my $locstr = $location->to_FTstring() | |
550 Function: returns the FeatureTable string of this location | |
551 Returns : string | |
552 Args : none | |
553 | |
554 =cut | |
555 | |
556 sub to_FTstring { | |
557 my ($self) = @_; | |
558 my @strs; | |
559 foreach my $loc ( $self->sub_Location() ) { | |
560 my $str = $loc->to_FTstring(); | |
561 # we only append the remote seq_id if it hasn't been done already | |
562 # by the sub-location (which it should if it knows it's remote) | |
563 # (and of course only if it's necessary) | |
564 if( (! $loc->is_remote) && | |
565 defined($self->seq_id) && defined($loc->seq_id) && | |
566 ($loc->seq_id ne $self->seq_id) ) { | |
567 $str = sprintf("%s:%s", $loc->seq_id, $str); | |
568 } | |
569 push @strs, $str; | |
570 } | |
571 | |
572 my $str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs)); | |
573 return $str; | |
574 } | |
575 | |
576 # we'll probably need to override the RangeI methods since our locations will | |
577 # not be contiguous. | |
578 | |
579 1; |