comparison variant_effect_predictor/Bio/Location/Split.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Split.pm,v 1.35 2002/12/28 03:26:32 lapp Exp $
2 #
3 # BioPerl module for Bio::Location::SplitLocation
4 # Cared for by Jason Stajich <jason@bioperl.org>
5 #
6 # Copyright Jason Stajich
7 #
8 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::Location::Split - Implementation of a Location on a Sequence
14 which has multiple locations (start/end points)
15
16 =head1 SYNOPSIS
17
18 use Bio::Location::Split;
19
20 my $splitlocation = new Bio::Location::Split();
21 $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>1,
22 -end=>30,
23 -strand=>1));
24 $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>50,
25 -end=>61,
26 -strand=>1));
27 my @sublocs = $splitlocation->sub_Location();
28
29 my $count = 1;
30 # print the start/end points of the sub locations
31 foreach my $location ( sort { $a->start <=> $b->start }
32 @sublocs ) {
33 printf "sub feature %d [%d..%d]\n",
34 $count, $location->start,$location->end, "\n";
35 $count++;
36 }
37
38 =head1 DESCRIPTION
39
40 This implementation handles locations which span more than one
41 start/end location, or and/or lie on different sequences.
42
43 =head1 FEEDBACK
44
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to one
47 of the Bioperl mailing lists. Your participation is much appreciated.
48
49 bioperl-l@bioperl.org - General discussion
50 http://bio.perl.org/MailList.html - About the mailing lists
51
52 =head2 Reporting Bugs
53
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 the bugs and their resolution. Bug reports can be submitted via email
56 or the web:
57
58 bioperl-bugs@bio.perl.org
59 http://bugzilla.bioperl.org/
60
61 =head1 AUTHOR - Jason Stajich
62
63 Email jason@bioperl.org
64
65 =head1 APPENDIX
66
67 The rest of the documentation details each of the object
68 methods. Internal methods are usually preceded with a _
69
70 =cut
71
72 # Let the code begin...
73
74
75 package Bio::Location::Split;
76 use vars qw(@ISA @CORBALOCATIONOPERATOR);
77 use strict;
78
79 use Bio::Root::Root;
80 use Bio::Location::SplitLocationI;
81 use Bio::Location::Atomic;
82
83 @ISA = qw(Bio::Location::Atomic Bio::Location::SplitLocationI );
84
85 BEGIN {
86 # as defined by BSANE 0.03
87 @CORBALOCATIONOPERATOR= ('NONE','JOIN', undef, 'ORDER');
88 }
89
90 sub new {
91 my ($class, @args) = @_;
92 my $self = $class->SUPER::new(@args);
93 # initialize
94 $self->{'_sublocations'} = [];
95 my ( $type, $seqid, $locations ) =
96 $self->_rearrange([qw(SPLITTYPE
97 SEQ_ID
98 LOCATIONS
99 )], @args);
100 if( defined $locations && ref($locations) =~ /array/i ) {
101 $self->add_sub_Location(@$locations);
102 }
103 $seqid && $self->seq_id($seqid);
104 $type = lc ($type);
105 $self->splittype($type || 'JOIN');
106 return $self;
107 }
108
109 =head2 each_Location
110
111 Title : each_Location
112 Usage : @locations = $locObject->each_Location($order);
113 Function: Conserved function call across Location:: modules - will
114 return an array containing the component Location(s) in
115 that object, regardless if the calling object is itself a
116 single location or one containing sublocations.
117 Returns : an array of Bio::LocationI implementing objects
118 Args : Optional sort order to be passed to sub_Location()
119
120 =cut
121
122 sub each_Location {
123 my ($self, $order) = @_;
124 my @locs = ();
125 foreach my $subloc ($self->sub_Location($order)) {
126 # Recursively check to get hierarchical split locations:
127 push @locs, $subloc->each_Location($order);
128 }
129 return @locs;
130 }
131
132 =head2 sub_Location
133
134 Title : sub_Location
135 Usage : @sublocs = $splitloc->sub_Location();
136 Function: Returns the array of sublocations making up this compound (split)
137 location. Those sublocations referring to the same sequence as
138 the root split location will be sorted by start position (forward
139 sort) or end position (reverse sort) and come first (before
140 those on other sequences).
141
142 The sort order can be optionally specified or suppressed by the
143 value of the first argument. The default is no sort.
144
145 Returns : an array of Bio::LocationI implementing objects
146 Args : Optionally 1, 0, or -1 for specifying a forward, no, or reverse
147 sort order
148
149 =cut
150
151 sub sub_Location {
152 my ($self, $order) = @_;
153 $order = 0 unless defined $order;
154 if( defined($order) && ($order !~ /^-?\d+$/) ) {
155 $self->throw("value $order passed in to sub_Location is $order, an invalid value");
156 }
157 $order = 1 if($order > 1);
158 $order = -1 if($order < -1);
159
160 my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : ();
161
162 # return the array if no ordering requested
163 return @sublocs if( ($order == 0) || (! @sublocs) );
164
165 # sort those locations that are on the same sequence as the top (`master')
166 # if the top seq is undefined, we take the first defined in a sublocation
167 my $seqid = $self->seq_id();
168 my $i = 0;
169 while((! defined($seqid)) && ($i <= $#sublocs)) {
170 $seqid = $sublocs[$i++]->seq_id();
171 }
172 if((! $self->seq_id()) && $seqid) {
173 $self->warn("sorted sublocation array requested but ".
174 "root location doesn't define seq_id ".
175 "(at least one sublocation does!)");
176 }
177 my @locs = ($seqid ?
178 grep { $_->seq_id() eq $seqid; } @sublocs :
179 @sublocs);
180 if(@locs) {
181 if($order == 1) {
182 # Schwartzian transforms for performance boost
183 @locs = map { $_->[0] }
184 sort { (defined $a && defined $b) ?
185 $a->[1] <=> $b->[1] : $a ? -1 : 1 }
186 map { [$_, $_->start] } @locs;
187
188 } else { # $order == -1
189 @locs = map {$_->[0]}
190 sort {
191 (defined $a && defined $b) ?
192 $b->[1] <=> $a->[1] : $a ? -1 : 1 }
193 map { [$_, $_->end] } @locs;
194 }
195 }
196 # push the rest unsorted
197 if($seqid) {
198 push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs);
199 }
200 # done!
201 return @locs;
202 }
203
204 =head2 add_sub_Location
205
206 Title : add_sub_Location
207 Usage : $splitloc->add_sub_Location(@locationIobjs);
208 Function: add an additional sublocation
209 Returns : number of current sub locations
210 Args : list of Bio::LocationI implementing object(s) to add
211
212 =cut
213
214 sub add_sub_Location {
215 my ($self,@args) = @_;
216 my @locs;
217 foreach my $loc ( @args ) {
218 if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) {
219 $self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!");
220 next;
221 }
222 push @{$self->{'_sublocations'}}, $loc;
223 }
224
225 return scalar @{$self->{'_sublocations'}};
226 }
227
228 =head2 splittype
229
230 Title : splittype
231 Usage : $splittype = $fuzzy->splittype();
232 Function: get/set the split splittype
233 Returns : the splittype of split feature (join, order)
234 Args : splittype to set
235
236 =cut
237
238 sub splittype {
239 my ($self, $value) = @_;
240 if( defined $value || ! defined $self->{'_splittype'} ) {
241 $value = 'JOIN' unless( defined $value );
242 $self->{'_splittype'} = uc ($value);
243 }
244 return $self->{'_splittype'};
245 }
246
247 =head2 is_single_sequence
248
249 Title : is_single_sequence
250 Usage : if($splitloc->is_single_sequence()) {
251 print "Location object $splitloc is split ".
252 "but only across a single sequence\n";
253 }
254 Function: Determine whether this location is split across a single or
255 multiple sequences.
256
257 This implementation ignores (sub-)locations that do not define
258 seq_id(). The same holds true for the root location.
259
260 Returns : TRUE if all sublocations lie on the same sequence as the root
261 location (feature), and FALSE otherwise.
262 Args : none
263
264 =cut
265
266 sub is_single_sequence {
267 my ($self) = @_;
268
269 my $seqid = $self->seq_id();
270 foreach my $loc ($self->sub_Location(0)) {
271 $seqid = $loc->seq_id() if(! $seqid);
272 if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) {
273 return 0;
274 }
275 }
276 return 1;
277 }
278
279 =head1 LocationI methods
280
281 =head2 strand
282
283 Title : strand
284 Usage : $obj->strand($newval)
285 Function: For SplitLocations, setting the strand of the container
286 (this object) is a short-cut for setting the strand of all
287 sublocations.
288
289 In get-mode, checks if no sub-location is remote, and if
290 all have the same strand. If so, it returns that shared
291 strand value. Otherwise it returns undef.
292
293 Example :
294 Returns : on get, value of strand if identical between sublocations
295 (-1, 1, or undef)
296 Args : new value (-1 or 1, optional)
297
298
299 =cut
300
301 sub strand{
302 my ($self,$value) = @_;
303 if( defined $value) {
304 $self->{'strand'} = $value;
305 # propagate to all sublocs
306 foreach my $loc ($self->sub_Location(0)) {
307 $loc->strand($value) if ! $loc->is_remote();
308 }
309 } else {
310 my ($strand, $lstrand);
311 foreach my $loc ($self->sub_Location(0)) {
312 # we give up upon any location that's remote or doesn't have
313 # the strand specified, or has a differing one set than
314 # previously seen.
315 # calling strand() is potentially expensive if the subloc is also
316 # a split location, so we cache it
317 $lstrand = $loc->strand();
318 if((! $lstrand) ||
319 ($strand && ($strand != $lstrand)) ||
320 $loc->is_remote()) {
321 $strand = undef;
322 last;
323 } elsif(! $strand) {
324 $strand = $lstrand;
325 }
326 }
327 return $strand;
328 }
329 }
330
331 =head2 start
332
333 Title : start
334 Usage : $start = $location->start();
335 Function: get the starting point of the first (sorted) sublocation
336 Returns : integer
337 Args : none
338
339 =cut
340
341 sub start {
342 my ($self,$value) = @_;
343 if( defined $value ) {
344 $self->throw("Trying to set the starting point of a split location, that is not possible, try manipulating the sub Locations");
345 }
346 return $self->SUPER::start();
347 }
348
349 =head2 end
350
351 Title : end
352 Usage : $end = $location->end();
353 Function: get the ending point of the last (sorted) sublocation
354 Returns : integer
355 Args : none
356
357 =cut
358
359 sub end {
360 my ($self,$value) = @_;
361 if( defined $value ) {
362 $self->throw("Trying to set the ending point of a split location, that is not possible, try manipulating the sub Locations");
363 }
364 return $self->SUPER::end();
365 }
366
367 =head2 min_start
368
369 Title : min_start
370 Usage : $min_start = $location->min_start();
371 Function: get the minimum starting point
372 Returns : the minimum starting point from the contained sublocations
373 Args : none
374
375 =cut
376
377 sub min_start {
378 my ($self, $value) = @_;
379
380 if( defined $value ) {
381 $self->throw("Trying to set the minimum starting point of a split location, that is not possible, try manipulating the sub Locations");
382 }
383 my @locs = $self->sub_Location(1);
384 return $locs[0]->min_start() if @locs;
385 return undef;
386 }
387
388 =head2 max_start
389
390 Title : max_start
391 Usage : my $maxstart = $location->max_start();
392 Function: Get maximum starting location of feature startpoint
393 Returns : integer or undef if no maximum starting point.
394 Args : none
395
396 =cut
397
398 sub max_start {
399 my ($self,$value) = @_;
400
401 if( defined $value ) {
402 $self->throw("Trying to set the maximum starting point of a split location, that is not possible, try manipulating the sub Locations");
403 }
404 my @locs = $self->sub_Location(1);
405 return $locs[0]->max_start() if @locs;
406 return undef;
407 }
408
409 =head2 start_pos_type
410
411 Title : start_pos_type
412 Usage : my $start_pos_type = $location->start_pos_type();
413 Function: Get start position type (ie <,>, ^)
414 Returns : type of position coded as text
415 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
416 Args : none
417
418 =cut
419
420 sub start_pos_type {
421 my ($self,$value) = @_;
422
423 if( defined $value ) {
424 $self->throw("Trying to set the start_pos_type of a split location, that is not possible, try manipulating the sub Locations");
425 }
426 my @locs = $self->sub_Location();
427 return ( @locs ) ? $locs[0]->start_pos_type() : undef;
428 }
429
430 =head2 min_end
431
432 Title : min_end
433 Usage : my $minend = $location->min_end();
434 Function: Get minimum ending location of feature endpoint
435 Returns : integer or undef if no minimum ending point.
436 Args : none
437
438 =cut
439
440 sub min_end {
441 my ($self,$value) = @_;
442
443 if( defined $value ) {
444 $self->throw("Trying to set the minimum end point of a split location, that is not possible, try manipulating the sub Locations");
445 }
446 # reverse sort locations by largest ending to smallest ending
447 my @locs = $self->sub_Location(-1);
448 return $locs[0]->min_end() if @locs;
449 return undef;
450 }
451
452 =head2 max_end
453
454 Title : max_end
455 Usage : my $maxend = $location->max_end();
456 Function: Get maximum ending location of feature endpoint
457 Returns : integer or undef if no maximum ending point.
458 Args : none
459
460 =cut
461
462 sub max_end {
463 my ($self,$value) = @_;
464
465 if( defined $value ) {
466 $self->throw("Trying to set the maximum end point of a split location, that is not possible, try manipulating the sub Locations");
467 }
468 # reverse sort locations by largest ending to smallest ending
469 my @locs = $self->sub_Location(-1);
470 return $locs[0]->max_end() if @locs;
471 return undef;
472 }
473
474 =head2 end_pos_type
475
476 Title : end_pos_type
477 Usage : my $end_pos_type = $location->end_pos_type();
478 Function: Get end position type (ie <,>, ^)
479 Returns : type of position coded as text
480 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
481 Args : none
482
483 =cut
484
485 sub end_pos_type {
486 my ($self,$value) = @_;
487
488 if( defined $value ) {
489 $self->throw("Trying to set end_pos_type of a split location, that is not possible, try manipulating the sub Locations");
490 }
491 my @locs = $self->sub_Location();
492 return ( @locs ) ? $locs[0]->end_pos_type() : undef;
493 }
494
495
496 =head2 seq_id
497
498 Title : seq_id
499 Usage : my $seqid = $location->seq_id();
500 Function: Get/Set seq_id that location refers to
501
502 We override this here in order to propagate to all sublocations
503 which are not remote (provided this root is not remote either)
504 Returns : seq_id
505 Args : [optional] seq_id value to set
506
507
508 =cut
509
510 sub seq_id {
511 my ($self, $seqid) = @_;
512
513 if(! $self->is_remote()) {
514 foreach my $subloc ($self->sub_Location(0)) {
515 $subloc->seq_id($seqid) if ! $subloc->is_remote();
516 }
517 }
518 return $self->SUPER::seq_id($seqid);
519 }
520
521 =head2 coordinate_policy
522
523 Title : coordinate_policy
524 Usage : $policy = $location->coordinate_policy();
525 $location->coordinate_policy($mypolicy); # set may not be possible
526 Function: Get the coordinate computing policy employed by this object.
527
528 See Bio::Location::CoordinatePolicyI for documentation about
529 the policy object and its use.
530
531 The interface *does not* require implementing classes to accept
532 setting of a different policy. The implementation provided here
533 does, however, allow to do so.
534
535 Implementors of this interface are expected to initialize every
536 new instance with a CoordinatePolicyI object. The implementation
537 provided here will return a default policy object if none has
538 been set yet. To change this default policy object call this
539 method as a class method with an appropriate argument. Note that
540 in this case only subsequently created Location objects will be
541 affected.
542
543 Returns : A Bio::Location::CoordinatePolicyI implementing object.
544 Args : On set, a Bio::Location::CoordinatePolicyI implementing object.
545
546 =head2 to_FTstring
547
548 Title : to_FTstring
549 Usage : my $locstr = $location->to_FTstring()
550 Function: returns the FeatureTable string of this location
551 Returns : string
552 Args : none
553
554 =cut
555
556 sub to_FTstring {
557 my ($self) = @_;
558 my @strs;
559 foreach my $loc ( $self->sub_Location() ) {
560 my $str = $loc->to_FTstring();
561 # we only append the remote seq_id if it hasn't been done already
562 # by the sub-location (which it should if it knows it's remote)
563 # (and of course only if it's necessary)
564 if( (! $loc->is_remote) &&
565 defined($self->seq_id) && defined($loc->seq_id) &&
566 ($loc->seq_id ne $self->seq_id) ) {
567 $str = sprintf("%s:%s", $loc->seq_id, $str);
568 }
569 push @strs, $str;
570 }
571
572 my $str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs));
573 return $str;
574 }
575
576 # we'll probably need to override the RangeI methods since our locations will
577 # not be contiguous.
578
579 1;