0
|
1 # $Id: Split.pm,v 1.35 2002/12/28 03:26:32 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Location::SplitLocation
|
|
4 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
5 #
|
|
6 # Copyright Jason Stajich
|
|
7 #
|
|
8 # You may distribute this module under the same terms as perl itself
|
|
9 # POD documentation - main docs before the code
|
|
10
|
|
11 =head1 NAME
|
|
12
|
|
13 Bio::Location::Split - Implementation of a Location on a Sequence
|
|
14 which has multiple locations (start/end points)
|
|
15
|
|
16 =head1 SYNOPSIS
|
|
17
|
|
18 use Bio::Location::Split;
|
|
19
|
|
20 my $splitlocation = new Bio::Location::Split();
|
|
21 $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>1,
|
|
22 -end=>30,
|
|
23 -strand=>1));
|
|
24 $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>50,
|
|
25 -end=>61,
|
|
26 -strand=>1));
|
|
27 my @sublocs = $splitlocation->sub_Location();
|
|
28
|
|
29 my $count = 1;
|
|
30 # print the start/end points of the sub locations
|
|
31 foreach my $location ( sort { $a->start <=> $b->start }
|
|
32 @sublocs ) {
|
|
33 printf "sub feature %d [%d..%d]\n",
|
|
34 $count, $location->start,$location->end, "\n";
|
|
35 $count++;
|
|
36 }
|
|
37
|
|
38 =head1 DESCRIPTION
|
|
39
|
|
40 This implementation handles locations which span more than one
|
|
41 start/end location, or and/or lie on different sequences.
|
|
42
|
|
43 =head1 FEEDBACK
|
|
44
|
|
45 User feedback is an integral part of the evolution of this and other
|
|
46 Bioperl modules. Send your comments and suggestions preferably to one
|
|
47 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
48
|
|
49 bioperl-l@bioperl.org - General discussion
|
|
50 http://bio.perl.org/MailList.html - About the mailing lists
|
|
51
|
|
52 =head2 Reporting Bugs
|
|
53
|
|
54 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
55 the bugs and their resolution. Bug reports can be submitted via email
|
|
56 or the web:
|
|
57
|
|
58 bioperl-bugs@bio.perl.org
|
|
59 http://bugzilla.bioperl.org/
|
|
60
|
|
61 =head1 AUTHOR - Jason Stajich
|
|
62
|
|
63 Email jason@bioperl.org
|
|
64
|
|
65 =head1 APPENDIX
|
|
66
|
|
67 The rest of the documentation details each of the object
|
|
68 methods. Internal methods are usually preceded with a _
|
|
69
|
|
70 =cut
|
|
71
|
|
72 # Let the code begin...
|
|
73
|
|
74
|
|
75 package Bio::Location::Split;
|
|
76 use vars qw(@ISA @CORBALOCATIONOPERATOR);
|
|
77 use strict;
|
|
78
|
|
79 use Bio::Root::Root;
|
|
80 use Bio::Location::SplitLocationI;
|
|
81 use Bio::Location::Atomic;
|
|
82
|
|
83 @ISA = qw(Bio::Location::Atomic Bio::Location::SplitLocationI );
|
|
84
|
|
85 BEGIN {
|
|
86 # as defined by BSANE 0.03
|
|
87 @CORBALOCATIONOPERATOR= ('NONE','JOIN', undef, 'ORDER');
|
|
88 }
|
|
89
|
|
90 sub new {
|
|
91 my ($class, @args) = @_;
|
|
92 my $self = $class->SUPER::new(@args);
|
|
93 # initialize
|
|
94 $self->{'_sublocations'} = [];
|
|
95 my ( $type, $seqid, $locations ) =
|
|
96 $self->_rearrange([qw(SPLITTYPE
|
|
97 SEQ_ID
|
|
98 LOCATIONS
|
|
99 )], @args);
|
|
100 if( defined $locations && ref($locations) =~ /array/i ) {
|
|
101 $self->add_sub_Location(@$locations);
|
|
102 }
|
|
103 $seqid && $self->seq_id($seqid);
|
|
104 $type = lc ($type);
|
|
105 $self->splittype($type || 'JOIN');
|
|
106 return $self;
|
|
107 }
|
|
108
|
|
109 =head2 each_Location
|
|
110
|
|
111 Title : each_Location
|
|
112 Usage : @locations = $locObject->each_Location($order);
|
|
113 Function: Conserved function call across Location:: modules - will
|
|
114 return an array containing the component Location(s) in
|
|
115 that object, regardless if the calling object is itself a
|
|
116 single location or one containing sublocations.
|
|
117 Returns : an array of Bio::LocationI implementing objects
|
|
118 Args : Optional sort order to be passed to sub_Location()
|
|
119
|
|
120 =cut
|
|
121
|
|
122 sub each_Location {
|
|
123 my ($self, $order) = @_;
|
|
124 my @locs = ();
|
|
125 foreach my $subloc ($self->sub_Location($order)) {
|
|
126 # Recursively check to get hierarchical split locations:
|
|
127 push @locs, $subloc->each_Location($order);
|
|
128 }
|
|
129 return @locs;
|
|
130 }
|
|
131
|
|
132 =head2 sub_Location
|
|
133
|
|
134 Title : sub_Location
|
|
135 Usage : @sublocs = $splitloc->sub_Location();
|
|
136 Function: Returns the array of sublocations making up this compound (split)
|
|
137 location. Those sublocations referring to the same sequence as
|
|
138 the root split location will be sorted by start position (forward
|
|
139 sort) or end position (reverse sort) and come first (before
|
|
140 those on other sequences).
|
|
141
|
|
142 The sort order can be optionally specified or suppressed by the
|
|
143 value of the first argument. The default is no sort.
|
|
144
|
|
145 Returns : an array of Bio::LocationI implementing objects
|
|
146 Args : Optionally 1, 0, or -1 for specifying a forward, no, or reverse
|
|
147 sort order
|
|
148
|
|
149 =cut
|
|
150
|
|
151 sub sub_Location {
|
|
152 my ($self, $order) = @_;
|
|
153 $order = 0 unless defined $order;
|
|
154 if( defined($order) && ($order !~ /^-?\d+$/) ) {
|
|
155 $self->throw("value $order passed in to sub_Location is $order, an invalid value");
|
|
156 }
|
|
157 $order = 1 if($order > 1);
|
|
158 $order = -1 if($order < -1);
|
|
159
|
|
160 my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : ();
|
|
161
|
|
162 # return the array if no ordering requested
|
|
163 return @sublocs if( ($order == 0) || (! @sublocs) );
|
|
164
|
|
165 # sort those locations that are on the same sequence as the top (`master')
|
|
166 # if the top seq is undefined, we take the first defined in a sublocation
|
|
167 my $seqid = $self->seq_id();
|
|
168 my $i = 0;
|
|
169 while((! defined($seqid)) && ($i <= $#sublocs)) {
|
|
170 $seqid = $sublocs[$i++]->seq_id();
|
|
171 }
|
|
172 if((! $self->seq_id()) && $seqid) {
|
|
173 $self->warn("sorted sublocation array requested but ".
|
|
174 "root location doesn't define seq_id ".
|
|
175 "(at least one sublocation does!)");
|
|
176 }
|
|
177 my @locs = ($seqid ?
|
|
178 grep { $_->seq_id() eq $seqid; } @sublocs :
|
|
179 @sublocs);
|
|
180 if(@locs) {
|
|
181 if($order == 1) {
|
|
182 # Schwartzian transforms for performance boost
|
|
183 @locs = map { $_->[0] }
|
|
184 sort { (defined $a && defined $b) ?
|
|
185 $a->[1] <=> $b->[1] : $a ? -1 : 1 }
|
|
186 map { [$_, $_->start] } @locs;
|
|
187
|
|
188 } else { # $order == -1
|
|
189 @locs = map {$_->[0]}
|
|
190 sort {
|
|
191 (defined $a && defined $b) ?
|
|
192 $b->[1] <=> $a->[1] : $a ? -1 : 1 }
|
|
193 map { [$_, $_->end] } @locs;
|
|
194 }
|
|
195 }
|
|
196 # push the rest unsorted
|
|
197 if($seqid) {
|
|
198 push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs);
|
|
199 }
|
|
200 # done!
|
|
201 return @locs;
|
|
202 }
|
|
203
|
|
204 =head2 add_sub_Location
|
|
205
|
|
206 Title : add_sub_Location
|
|
207 Usage : $splitloc->add_sub_Location(@locationIobjs);
|
|
208 Function: add an additional sublocation
|
|
209 Returns : number of current sub locations
|
|
210 Args : list of Bio::LocationI implementing object(s) to add
|
|
211
|
|
212 =cut
|
|
213
|
|
214 sub add_sub_Location {
|
|
215 my ($self,@args) = @_;
|
|
216 my @locs;
|
|
217 foreach my $loc ( @args ) {
|
|
218 if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) {
|
|
219 $self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!");
|
|
220 next;
|
|
221 }
|
|
222 push @{$self->{'_sublocations'}}, $loc;
|
|
223 }
|
|
224
|
|
225 return scalar @{$self->{'_sublocations'}};
|
|
226 }
|
|
227
|
|
228 =head2 splittype
|
|
229
|
|
230 Title : splittype
|
|
231 Usage : $splittype = $fuzzy->splittype();
|
|
232 Function: get/set the split splittype
|
|
233 Returns : the splittype of split feature (join, order)
|
|
234 Args : splittype to set
|
|
235
|
|
236 =cut
|
|
237
|
|
238 sub splittype {
|
|
239 my ($self, $value) = @_;
|
|
240 if( defined $value || ! defined $self->{'_splittype'} ) {
|
|
241 $value = 'JOIN' unless( defined $value );
|
|
242 $self->{'_splittype'} = uc ($value);
|
|
243 }
|
|
244 return $self->{'_splittype'};
|
|
245 }
|
|
246
|
|
247 =head2 is_single_sequence
|
|
248
|
|
249 Title : is_single_sequence
|
|
250 Usage : if($splitloc->is_single_sequence()) {
|
|
251 print "Location object $splitloc is split ".
|
|
252 "but only across a single sequence\n";
|
|
253 }
|
|
254 Function: Determine whether this location is split across a single or
|
|
255 multiple sequences.
|
|
256
|
|
257 This implementation ignores (sub-)locations that do not define
|
|
258 seq_id(). The same holds true for the root location.
|
|
259
|
|
260 Returns : TRUE if all sublocations lie on the same sequence as the root
|
|
261 location (feature), and FALSE otherwise.
|
|
262 Args : none
|
|
263
|
|
264 =cut
|
|
265
|
|
266 sub is_single_sequence {
|
|
267 my ($self) = @_;
|
|
268
|
|
269 my $seqid = $self->seq_id();
|
|
270 foreach my $loc ($self->sub_Location(0)) {
|
|
271 $seqid = $loc->seq_id() if(! $seqid);
|
|
272 if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) {
|
|
273 return 0;
|
|
274 }
|
|
275 }
|
|
276 return 1;
|
|
277 }
|
|
278
|
|
279 =head1 LocationI methods
|
|
280
|
|
281 =head2 strand
|
|
282
|
|
283 Title : strand
|
|
284 Usage : $obj->strand($newval)
|
|
285 Function: For SplitLocations, setting the strand of the container
|
|
286 (this object) is a short-cut for setting the strand of all
|
|
287 sublocations.
|
|
288
|
|
289 In get-mode, checks if no sub-location is remote, and if
|
|
290 all have the same strand. If so, it returns that shared
|
|
291 strand value. Otherwise it returns undef.
|
|
292
|
|
293 Example :
|
|
294 Returns : on get, value of strand if identical between sublocations
|
|
295 (-1, 1, or undef)
|
|
296 Args : new value (-1 or 1, optional)
|
|
297
|
|
298
|
|
299 =cut
|
|
300
|
|
301 sub strand{
|
|
302 my ($self,$value) = @_;
|
|
303 if( defined $value) {
|
|
304 $self->{'strand'} = $value;
|
|
305 # propagate to all sublocs
|
|
306 foreach my $loc ($self->sub_Location(0)) {
|
|
307 $loc->strand($value) if ! $loc->is_remote();
|
|
308 }
|
|
309 } else {
|
|
310 my ($strand, $lstrand);
|
|
311 foreach my $loc ($self->sub_Location(0)) {
|
|
312 # we give up upon any location that's remote or doesn't have
|
|
313 # the strand specified, or has a differing one set than
|
|
314 # previously seen.
|
|
315 # calling strand() is potentially expensive if the subloc is also
|
|
316 # a split location, so we cache it
|
|
317 $lstrand = $loc->strand();
|
|
318 if((! $lstrand) ||
|
|
319 ($strand && ($strand != $lstrand)) ||
|
|
320 $loc->is_remote()) {
|
|
321 $strand = undef;
|
|
322 last;
|
|
323 } elsif(! $strand) {
|
|
324 $strand = $lstrand;
|
|
325 }
|
|
326 }
|
|
327 return $strand;
|
|
328 }
|
|
329 }
|
|
330
|
|
331 =head2 start
|
|
332
|
|
333 Title : start
|
|
334 Usage : $start = $location->start();
|
|
335 Function: get the starting point of the first (sorted) sublocation
|
|
336 Returns : integer
|
|
337 Args : none
|
|
338
|
|
339 =cut
|
|
340
|
|
341 sub start {
|
|
342 my ($self,$value) = @_;
|
|
343 if( defined $value ) {
|
|
344 $self->throw("Trying to set the starting point of a split location, that is not possible, try manipulating the sub Locations");
|
|
345 }
|
|
346 return $self->SUPER::start();
|
|
347 }
|
|
348
|
|
349 =head2 end
|
|
350
|
|
351 Title : end
|
|
352 Usage : $end = $location->end();
|
|
353 Function: get the ending point of the last (sorted) sublocation
|
|
354 Returns : integer
|
|
355 Args : none
|
|
356
|
|
357 =cut
|
|
358
|
|
359 sub end {
|
|
360 my ($self,$value) = @_;
|
|
361 if( defined $value ) {
|
|
362 $self->throw("Trying to set the ending point of a split location, that is not possible, try manipulating the sub Locations");
|
|
363 }
|
|
364 return $self->SUPER::end();
|
|
365 }
|
|
366
|
|
367 =head2 min_start
|
|
368
|
|
369 Title : min_start
|
|
370 Usage : $min_start = $location->min_start();
|
|
371 Function: get the minimum starting point
|
|
372 Returns : the minimum starting point from the contained sublocations
|
|
373 Args : none
|
|
374
|
|
375 =cut
|
|
376
|
|
377 sub min_start {
|
|
378 my ($self, $value) = @_;
|
|
379
|
|
380 if( defined $value ) {
|
|
381 $self->throw("Trying to set the minimum starting point of a split location, that is not possible, try manipulating the sub Locations");
|
|
382 }
|
|
383 my @locs = $self->sub_Location(1);
|
|
384 return $locs[0]->min_start() if @locs;
|
|
385 return undef;
|
|
386 }
|
|
387
|
|
388 =head2 max_start
|
|
389
|
|
390 Title : max_start
|
|
391 Usage : my $maxstart = $location->max_start();
|
|
392 Function: Get maximum starting location of feature startpoint
|
|
393 Returns : integer or undef if no maximum starting point.
|
|
394 Args : none
|
|
395
|
|
396 =cut
|
|
397
|
|
398 sub max_start {
|
|
399 my ($self,$value) = @_;
|
|
400
|
|
401 if( defined $value ) {
|
|
402 $self->throw("Trying to set the maximum starting point of a split location, that is not possible, try manipulating the sub Locations");
|
|
403 }
|
|
404 my @locs = $self->sub_Location(1);
|
|
405 return $locs[0]->max_start() if @locs;
|
|
406 return undef;
|
|
407 }
|
|
408
|
|
409 =head2 start_pos_type
|
|
410
|
|
411 Title : start_pos_type
|
|
412 Usage : my $start_pos_type = $location->start_pos_type();
|
|
413 Function: Get start position type (ie <,>, ^)
|
|
414 Returns : type of position coded as text
|
|
415 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
|
|
416 Args : none
|
|
417
|
|
418 =cut
|
|
419
|
|
420 sub start_pos_type {
|
|
421 my ($self,$value) = @_;
|
|
422
|
|
423 if( defined $value ) {
|
|
424 $self->throw("Trying to set the start_pos_type of a split location, that is not possible, try manipulating the sub Locations");
|
|
425 }
|
|
426 my @locs = $self->sub_Location();
|
|
427 return ( @locs ) ? $locs[0]->start_pos_type() : undef;
|
|
428 }
|
|
429
|
|
430 =head2 min_end
|
|
431
|
|
432 Title : min_end
|
|
433 Usage : my $minend = $location->min_end();
|
|
434 Function: Get minimum ending location of feature endpoint
|
|
435 Returns : integer or undef if no minimum ending point.
|
|
436 Args : none
|
|
437
|
|
438 =cut
|
|
439
|
|
440 sub min_end {
|
|
441 my ($self,$value) = @_;
|
|
442
|
|
443 if( defined $value ) {
|
|
444 $self->throw("Trying to set the minimum end point of a split location, that is not possible, try manipulating the sub Locations");
|
|
445 }
|
|
446 # reverse sort locations by largest ending to smallest ending
|
|
447 my @locs = $self->sub_Location(-1);
|
|
448 return $locs[0]->min_end() if @locs;
|
|
449 return undef;
|
|
450 }
|
|
451
|
|
452 =head2 max_end
|
|
453
|
|
454 Title : max_end
|
|
455 Usage : my $maxend = $location->max_end();
|
|
456 Function: Get maximum ending location of feature endpoint
|
|
457 Returns : integer or undef if no maximum ending point.
|
|
458 Args : none
|
|
459
|
|
460 =cut
|
|
461
|
|
462 sub max_end {
|
|
463 my ($self,$value) = @_;
|
|
464
|
|
465 if( defined $value ) {
|
|
466 $self->throw("Trying to set the maximum end point of a split location, that is not possible, try manipulating the sub Locations");
|
|
467 }
|
|
468 # reverse sort locations by largest ending to smallest ending
|
|
469 my @locs = $self->sub_Location(-1);
|
|
470 return $locs[0]->max_end() if @locs;
|
|
471 return undef;
|
|
472 }
|
|
473
|
|
474 =head2 end_pos_type
|
|
475
|
|
476 Title : end_pos_type
|
|
477 Usage : my $end_pos_type = $location->end_pos_type();
|
|
478 Function: Get end position type (ie <,>, ^)
|
|
479 Returns : type of position coded as text
|
|
480 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
|
|
481 Args : none
|
|
482
|
|
483 =cut
|
|
484
|
|
485 sub end_pos_type {
|
|
486 my ($self,$value) = @_;
|
|
487
|
|
488 if( defined $value ) {
|
|
489 $self->throw("Trying to set end_pos_type of a split location, that is not possible, try manipulating the sub Locations");
|
|
490 }
|
|
491 my @locs = $self->sub_Location();
|
|
492 return ( @locs ) ? $locs[0]->end_pos_type() : undef;
|
|
493 }
|
|
494
|
|
495
|
|
496 =head2 seq_id
|
|
497
|
|
498 Title : seq_id
|
|
499 Usage : my $seqid = $location->seq_id();
|
|
500 Function: Get/Set seq_id that location refers to
|
|
501
|
|
502 We override this here in order to propagate to all sublocations
|
|
503 which are not remote (provided this root is not remote either)
|
|
504 Returns : seq_id
|
|
505 Args : [optional] seq_id value to set
|
|
506
|
|
507
|
|
508 =cut
|
|
509
|
|
510 sub seq_id {
|
|
511 my ($self, $seqid) = @_;
|
|
512
|
|
513 if(! $self->is_remote()) {
|
|
514 foreach my $subloc ($self->sub_Location(0)) {
|
|
515 $subloc->seq_id($seqid) if ! $subloc->is_remote();
|
|
516 }
|
|
517 }
|
|
518 return $self->SUPER::seq_id($seqid);
|
|
519 }
|
|
520
|
|
521 =head2 coordinate_policy
|
|
522
|
|
523 Title : coordinate_policy
|
|
524 Usage : $policy = $location->coordinate_policy();
|
|
525 $location->coordinate_policy($mypolicy); # set may not be possible
|
|
526 Function: Get the coordinate computing policy employed by this object.
|
|
527
|
|
528 See Bio::Location::CoordinatePolicyI for documentation about
|
|
529 the policy object and its use.
|
|
530
|
|
531 The interface *does not* require implementing classes to accept
|
|
532 setting of a different policy. The implementation provided here
|
|
533 does, however, allow to do so.
|
|
534
|
|
535 Implementors of this interface are expected to initialize every
|
|
536 new instance with a CoordinatePolicyI object. The implementation
|
|
537 provided here will return a default policy object if none has
|
|
538 been set yet. To change this default policy object call this
|
|
539 method as a class method with an appropriate argument. Note that
|
|
540 in this case only subsequently created Location objects will be
|
|
541 affected.
|
|
542
|
|
543 Returns : A Bio::Location::CoordinatePolicyI implementing object.
|
|
544 Args : On set, a Bio::Location::CoordinatePolicyI implementing object.
|
|
545
|
|
546 =head2 to_FTstring
|
|
547
|
|
548 Title : to_FTstring
|
|
549 Usage : my $locstr = $location->to_FTstring()
|
|
550 Function: returns the FeatureTable string of this location
|
|
551 Returns : string
|
|
552 Args : none
|
|
553
|
|
554 =cut
|
|
555
|
|
556 sub to_FTstring {
|
|
557 my ($self) = @_;
|
|
558 my @strs;
|
|
559 foreach my $loc ( $self->sub_Location() ) {
|
|
560 my $str = $loc->to_FTstring();
|
|
561 # we only append the remote seq_id if it hasn't been done already
|
|
562 # by the sub-location (which it should if it knows it's remote)
|
|
563 # (and of course only if it's necessary)
|
|
564 if( (! $loc->is_remote) &&
|
|
565 defined($self->seq_id) && defined($loc->seq_id) &&
|
|
566 ($loc->seq_id ne $self->seq_id) ) {
|
|
567 $str = sprintf("%s:%s", $loc->seq_id, $str);
|
|
568 }
|
|
569 push @strs, $str;
|
|
570 }
|
|
571
|
|
572 my $str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs));
|
|
573 return $str;
|
|
574 }
|
|
575
|
|
576 # we'll probably need to override the RangeI methods since our locations will
|
|
577 # not be contiguous.
|
|
578
|
|
579 1;
|