0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Feature - Ensembl specific sequence feature.
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 my $feat = new Bio::EnsEMBL::Feature(
|
|
28 -start => 100,
|
|
29 -end => 220,
|
|
30 -strand => -1,
|
|
31 -slice => $slice -analysis => $analysis
|
|
32 );
|
|
33
|
|
34 my $start = $feat->start();
|
|
35 my $end = $feat->end();
|
|
36 my $strand = $feat->strand();
|
|
37
|
|
38 # Move the feature to the chromosomal coordinate system
|
|
39 $feature = $feature->transform('chromosome');
|
|
40
|
|
41 # Move the feature to a different slice (possibly on another coord
|
|
42 # system)
|
|
43 $feature = $feature->transfer($new_slice);
|
|
44
|
|
45 # Project the feature onto another coordinate system possibly across
|
|
46 # boundaries:
|
|
47 @projection = @{ $feature->project('contig') };
|
|
48
|
|
49 # Change the start, end, and strand of the feature in place
|
|
50 $feature->move( $new_start, $new_end, $new_strand );
|
|
51
|
|
52 =head1 DESCRIPTION
|
|
53
|
|
54 This is the Base feature class from which all Ensembl features inherit.
|
|
55 It provides a bare minimum functionality that all features require. It
|
|
56 basically describes a location on a sequence in an arbitrary coordinate
|
|
57 system.
|
|
58
|
|
59 =head1 METHODS
|
|
60
|
|
61 =cut
|
|
62
|
|
63
|
|
64 package Bio::EnsEMBL::Feature;
|
|
65
|
|
66 use strict;
|
|
67 use warnings;
|
|
68
|
|
69 use Bio::EnsEMBL::Storable;
|
|
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
71 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
|
|
72 use Bio::EnsEMBL::Utils::Scalar qw(check_ref assert_ref);
|
|
73 use Bio::EnsEMBL::Slice;
|
|
74 use Bio::EnsEMBL::StrainSlice;
|
|
75 use vars qw(@ISA);
|
|
76
|
|
77 use Scalar::Util qw(weaken isweak);
|
|
78
|
|
79 @ISA = qw(Bio::EnsEMBL::Storable);
|
|
80
|
|
81
|
|
82 =head2 new
|
|
83
|
|
84 Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that this
|
|
85 feature is on. The coordinates of the created feature are
|
|
86 relative to the start of the slice.
|
|
87 Arg [-START]: The start coordinate of this feature relative to the start
|
|
88 of the slice it is sitting on. Coordinates start at 1 and
|
|
89 are inclusive.
|
|
90 Arg [-END] : The end coordinate of this feature relative to the start of
|
|
91 the slice it is sitting on. Coordinates start at 1 and are
|
|
92 inclusive.
|
|
93 Arg [-STRAND]: The orientation of this feature. Valid values are 1,-1,0.
|
|
94 Arg [-SEQNAME] : A seqname to be used instead of the default name of the
|
|
95 of the slice. Useful for features that do not have an
|
|
96 attached slice such as protein features.
|
|
97 Arg [-dbID] : (optional) internal database id
|
|
98 Arg [-ADAPTOR]: (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor
|
|
99 Example : $feature = Bio::EnsEMBL::Feature->new(-start => 1,
|
|
100 -end => 100,
|
|
101 -strand => 1,
|
|
102 -slice => $slice,
|
|
103 -analysis => $analysis);
|
|
104 Description: Constructs a new Bio::EnsEMBL::Feature. Generally subclasses
|
|
105 of this method are instantiated, rather than this class itself.
|
|
106 Returntype : Bio::EnsEMBL::Feature
|
|
107 Exceptions : Thrown on invalid -SLICE, -ANALYSIS, -STRAND ,-ADAPTOR arguments
|
|
108 Caller : general, subclass constructors
|
|
109 Status : Stable
|
|
110
|
|
111 =cut
|
|
112
|
|
113
|
|
114 sub new {
|
|
115 my $caller = shift;
|
|
116
|
|
117 my $class = ref($caller) || $caller;
|
|
118 my ( $start, $end, $strand, $slice, $analysis,$seqname, $dbID, $adaptor ) =
|
|
119 rearrange(['START','END','STRAND','SLICE','ANALYSIS', 'SEQNAME',
|
|
120 'DBID', 'ADAPTOR'], @_);
|
|
121 if($slice) {
|
|
122 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
|
|
123 throw('-SLICE argument must be a Bio::EnsEMBL::Slice not '.$slice);
|
|
124 }
|
|
125 }
|
|
126
|
|
127 if($analysis) {
|
|
128 if(!ref($analysis) || !$analysis->isa('Bio::EnsEMBL::Analysis')) {
|
|
129 throw('-ANALYSIS argument must be a Bio::EnsEMBL::Analysis not '.
|
|
130 $analysis);
|
|
131 }
|
|
132 }
|
|
133
|
|
134 if(defined($strand)) {
|
|
135 if(!($strand == 1) && !($strand == -1) && !($strand == 0)) {
|
|
136 throw('-STRAND argument must be 1, -1, or 0');
|
|
137 }
|
|
138 }
|
|
139
|
|
140 if(defined($start) && defined($end)) {
|
|
141 if (($start =~ /\d+/) && ($end =~ /\d+/)) {
|
|
142 if($end+1 < $start) {
|
|
143 throw(sprintf('Start (%d) must be less than or equal to end+1 (%d)', $start, ($end+1)));
|
|
144 }
|
|
145 } else {
|
|
146 throw('Start and end must be integers');
|
|
147 }
|
|
148 }
|
|
149
|
|
150 my $self = bless({'start' => $start,
|
|
151 'end' => $end,
|
|
152 'strand' => $strand,
|
|
153 'slice' => $slice,
|
|
154 'analysis' => $analysis,
|
|
155 'seqname' => $seqname,
|
|
156 'dbID' => $dbID}, $class);
|
|
157
|
|
158 $self->adaptor($adaptor);
|
|
159 return $self;
|
|
160 }
|
|
161
|
|
162
|
|
163 =head2 new_fast
|
|
164
|
|
165 Arg [1] : hashref to be blessed
|
|
166 Description: Construct a new Bio::EnsEMBL::Feature using the hashref.
|
|
167 Exceptions : none
|
|
168 Returntype : Bio::EnsEMBL::Feature
|
|
169 Caller : general, subclass constructors
|
|
170 Status : Stable
|
|
171
|
|
172 =cut
|
|
173
|
|
174
|
|
175 sub new_fast {
|
|
176 my $class = shift;
|
|
177 my $hashref = shift;
|
|
178 my $self = bless $hashref, $class;
|
|
179 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
|
|
180 return $self;
|
|
181 }
|
|
182
|
|
183 =head2 start
|
|
184
|
|
185 Arg [1] : (optional) int $start
|
|
186 The start of this feature relative to the start of the slice
|
|
187 that it is on.
|
|
188 Example : $start = $feat->start()
|
|
189 Description: Getter/Setter for the start of this feature relative to the
|
|
190 start of the slice it is on. Note that negative values, or
|
|
191 values exceeding the length of the slice are permitted.
|
|
192 Start must be less than or equal to the end regardless of the
|
|
193 strand. Coordinate values start at 1 and are inclusive.
|
|
194 Returntype : int
|
|
195 Exceptions : none
|
|
196 Caller : general
|
|
197 Status : Stable
|
|
198
|
|
199 =cut
|
|
200
|
|
201 sub start {
|
|
202 my ( $self, $value ) = @_;
|
|
203
|
|
204 if ( defined($value) ) {
|
|
205 $self->{'start'} = $value;
|
|
206 }
|
|
207
|
|
208 return $self->{'start'};
|
|
209 }
|
|
210
|
|
211
|
|
212
|
|
213 =head2 end
|
|
214
|
|
215 Arg [1] : (optional) int $end
|
|
216 Example : $end = $feat->end();
|
|
217 Description: Getter/Setter for the end of this feature relative to the
|
|
218 start of the slice that it is on. Note that negative values,
|
|
219 of values exceeding the length of the slice are permitted. End
|
|
220 must be greater than or equal to start regardless of the strand.
|
|
221 Coordinate values start at 1 and are inclusive.
|
|
222 Returntype : int
|
|
223 Exceptions : none
|
|
224 Caller : general
|
|
225 Status : Stable
|
|
226
|
|
227 =cut
|
|
228
|
|
229 sub end {
|
|
230 my ( $self, $value ) = @_;
|
|
231
|
|
232 if ( defined($value) ) {
|
|
233 $self->{'end'} = $value;
|
|
234 }
|
|
235
|
|
236 return $self->{'end'};
|
|
237 }
|
|
238
|
|
239
|
|
240
|
|
241
|
|
242 =head2 strand
|
|
243
|
|
244 Arg [1] : (optional) int $strand
|
|
245 Example : $feat->strand(-1);
|
|
246 Description: Getter/Setter for the strand of this feature relative to the
|
|
247 slice it is on. 0 is an unknown or non-applicable strand.
|
|
248 -1 is the reverse (negative) strand and 1 is the forward
|
|
249 (positive) strand. No other values are permitted.
|
|
250 Returntype : int
|
|
251 Exceptions : thrown if an invalid strand argument is passed
|
|
252 Caller : general
|
|
253 Status : Stable
|
|
254
|
|
255 =cut
|
|
256
|
|
257 sub strand {
|
|
258 my ( $self, $strand ) = @_;
|
|
259
|
|
260 if ( defined($strand) ) {
|
|
261 if ( $strand != 0 && $strand != 1 && $strand != -1 ) {
|
|
262 throw('strand argument must be 0, -1 or 1');
|
|
263 }
|
|
264
|
|
265 $self->{'strand'} = $strand;
|
|
266 }
|
|
267
|
|
268 return $self->{'strand'};
|
|
269 }
|
|
270
|
|
271 =head2 move
|
|
272
|
|
273 Arg [1] : int start
|
|
274 Arg [2] : int end
|
|
275 Arg [3] : (optional) int strand
|
|
276 Example : None
|
|
277 Description: Sets the start, end and strand in one call rather than in
|
|
278 3 seperate calls to the start(), end() and strand() methods.
|
|
279 This is for convenience and for speed when this needs to be
|
|
280 done within a tight loop.
|
|
281 Returntype : none
|
|
282 Exceptions : Thrown is invalid arguments are provided
|
|
283 Caller : general
|
|
284 Status : Stable
|
|
285
|
|
286 =cut
|
|
287
|
|
288 sub move {
|
|
289 my $self = shift;
|
|
290
|
|
291 throw('start and end arguments are required') if(@_ < 2);
|
|
292
|
|
293 my $start = shift;
|
|
294 my $end = shift;
|
|
295 my $strand = shift;
|
|
296
|
|
297 if(defined($start) && defined($end) && $end < $start) {
|
|
298 throw('start must be less than or equal to end');
|
|
299 }
|
|
300 if(defined($strand) && $strand != 0 && $strand != -1 && $strand != 1) {
|
|
301 throw('strand must be 0, -1 or 1');
|
|
302 }
|
|
303
|
|
304 $self->{'start'} = $start;
|
|
305 $self->{'end'} = $end;
|
|
306 $self->{'strand'} = $strand if(defined($strand));
|
|
307 }
|
|
308
|
|
309
|
|
310
|
|
311 =head2 length
|
|
312
|
|
313 Arg [1] : none
|
|
314 Example : $length = $feat->length();
|
|
315 Description: Returns the length of this feature
|
|
316 Returntype : Integer
|
|
317 Exceptions : Throws if end < start and the feature is not on a
|
|
318 circular slice
|
|
319 Caller : general
|
|
320 Status : Stable
|
|
321
|
|
322 =cut
|
|
323
|
|
324 sub length {
|
|
325 my ($self) = @_;
|
|
326
|
|
327 if ( $self->{'end'} < $self->{'start'} ) {
|
|
328 # if circular, we can work out the length of an origin-spanning
|
|
329 # feature using the size of the underlying region.
|
|
330 if ( $self->slice() && $self->slice()->is_circular() ) {
|
|
331 my $len =
|
|
332 $self->slice()->seq_region_length() -
|
|
333 ( $self->{'start'} - $self->{'end'} ) + 1;
|
|
334 return $len;
|
|
335 } else {
|
|
336 throw( "Cannot determine length of non-circular feature "
|
|
337 . "where start > end" );
|
|
338 }
|
|
339 }
|
|
340
|
|
341 return $self->{'end'} - $self->{'start'} + 1;
|
|
342 }
|
|
343
|
|
344 =head2 analysis
|
|
345
|
|
346 Arg [1] : (optional) Bio::EnsEMBL::Analysis $analysis
|
|
347 Example : $feature->analysis(new Bio::EnsEMBL::Analysis(...))
|
|
348 Description: Getter/Setter for the analysis that is associated with
|
|
349 this feature. The analysis describes how this feature
|
|
350 was derived.
|
|
351 Returntype : Bio::EnsEMBL::Analysis
|
|
352 Exceptions : thrown if an invalid argument is passed
|
|
353 Caller : general
|
|
354 Status : Stable
|
|
355
|
|
356 =cut
|
|
357
|
|
358 sub analysis {
|
|
359 my $self = shift;
|
|
360
|
|
361 if(@_) {
|
|
362 my $an = shift;
|
|
363 if(defined($an) && (!ref($an) || !$an->isa('Bio::EnsEMBL::Analysis'))) {
|
|
364 throw('analysis argument must be a Bio::EnsEMBL::Analysis');
|
|
365 }
|
|
366 $self->{'analysis'} = $an;
|
|
367 }
|
|
368
|
|
369 return $self->{'analysis'};
|
|
370 }
|
|
371
|
|
372
|
|
373
|
|
374 =head2 slice
|
|
375
|
|
376 Arg [1] : (optional) Bio::EnsEMBL::Slice $slice
|
|
377 Example : $seqname = $feature->slice()->name();
|
|
378 Description: Getter/Setter for the Slice that is associated with this
|
|
379 feature. The slice represents the underlying sequence that this
|
|
380 feature is on. Note that this method call is analagous to the
|
|
381 old SeqFeature methods contig(), entire_seq(), attach_seq(),
|
|
382 etc.
|
|
383 Returntype : Bio::EnsEMBL::Slice
|
|
384 Exceptions : thrown if an invalid argument is passed
|
|
385 Caller : general
|
|
386 Status : Stable
|
|
387
|
|
388 =cut
|
|
389
|
|
390 sub slice {
|
|
391 my ( $self, $slice ) = @_;
|
|
392
|
|
393 if ( defined($slice) ) {
|
|
394 if ( !check_ref( $slice, 'Bio::EnsEMBL::Slice' )
|
|
395 && !check_ref( $slice, 'Bio::EnsEMBL::LRGSlice' ) )
|
|
396 {
|
|
397 throw('slice argument must be a Bio::EnsEMBL::Slice');
|
|
398 }
|
|
399
|
|
400 $self->{'slice'} = $slice;
|
|
401 } elsif ( @_ > 1 ) {
|
|
402 delete($self->{'slice'});
|
|
403 }
|
|
404
|
|
405 return $self->{'slice'};
|
|
406 }
|
|
407
|
|
408 =head2 equals
|
|
409
|
|
410 Arg [1] : Bio::EnsEMBL::Feature object
|
|
411 Example : if ($featureA->equals($featureB)) { ... }
|
|
412 Description : Compares two features using various criteria. The
|
|
413 test for eqality goes through the following list and
|
|
414 terminates at the first true match:
|
|
415
|
|
416 1. If the two features are the same object, they are
|
|
417 equal.
|
|
418 2. If they are of different types (e.g., transcript
|
|
419 and gene), they are *not* equal.
|
|
420 3. If they both have dbIDs: if these are the same,
|
|
421 then they are equal, otherwise not.
|
|
422 4. If they both have slices and analysis objects:
|
|
423 if the analysis dbIDs are the same and the
|
|
424 seq_region_id are the same, along with
|
|
425 seq_region_start and seq_region_end, then they are
|
|
426 equal, otherwise not.
|
|
427
|
|
428 If none of the above is able to determine equality,
|
|
429 undef is returned.
|
|
430
|
|
431 Return type : tri-Boolean (0, 1, undef = "unknown")
|
|
432
|
|
433 Exceptions : Thrown if a non-feature is passed as the argument.
|
|
434
|
|
435 =cut
|
|
436
|
|
437 sub equals {
|
|
438 my ( $self, $feature ) = @_;
|
|
439
|
|
440 # If the features are the same object, they are equal.
|
|
441 if ( !defined($feature) ) { return 0 }
|
|
442 if ( $self eq $feature ) { return 1 }
|
|
443
|
|
444 assert_ref( $feature, 'Bio::EnsEMBL::Feature' );
|
|
445
|
|
446 # If the features have different types, they are *not* equal.
|
|
447 if ( ref($self) ne ref($feature) ) {
|
|
448 return 0;
|
|
449 }
|
|
450
|
|
451 # If the features has the same dbID, they are equal.
|
|
452 if ( defined( $self->dbID() ) && defined( $feature->dbID() ) ) {
|
|
453 if ( $self->dbID() == $feature->dbID() ) { return 1 }
|
|
454 else { return 0 }
|
|
455 }
|
|
456
|
|
457 # We now know that one of the features do not have a dbID.
|
|
458
|
|
459 # If the features have the same start, end, strand and seq_region_id,
|
|
460 # and analysis_id, they are equal.
|
|
461 if (
|
|
462 ( defined( $self->analysis() ) && defined( $feature->analysis() ) )
|
|
463 && ( defined( $self->slice() ) && defined( $feature->slice() ) ) )
|
|
464 {
|
|
465 if ( ( $self->start() == $feature->start() ) &&
|
|
466 ( $self->end() == $feature->end() ) &&
|
|
467 ( $self->strand() == $feature->strand() ) &&
|
|
468 ( $self->slice()->get_seq_region_id() ==
|
|
469 $feature->slice()->get_seq_region_id() ) &&
|
|
470 ( $self->analysis()->dbID() == $feature->analysis()->dbID() ) )
|
|
471 {
|
|
472 return 1;
|
|
473 }
|
|
474 else { return 0 }
|
|
475 }
|
|
476
|
|
477 # We now know that one of the features does not have either analysis
|
|
478 # or slice.
|
|
479
|
|
480 # We don't know if the features are equal. This happens if they are
|
|
481 # not the same object but are of the same type, and one of them lacks
|
|
482 # dbID, and if there aren't slice and analysis objects attached to
|
|
483 # them both.
|
|
484 return undef;
|
|
485 } ## end sub equals
|
|
486
|
|
487
|
|
488 =head2 transform
|
|
489
|
|
490 Arg [1] : string $coord_system
|
|
491 The coord system to transform this feature to.
|
|
492 Arg [2] : string $version (optional)
|
|
493 The version of the coord system to transform this feature to.
|
|
494 Example : $feature = $feature->transform('contig');
|
|
495 next if(!defined($feature));
|
|
496 Description: Returns a copy of this feature, but converted to a different
|
|
497 coordinate system. The converted feature will be placed on a
|
|
498 slice which spans an entire sequence region of the new
|
|
499 coordinate system. If the requested coordinate system is the
|
|
500 same coordinate system it is simply placed on a slice which
|
|
501 spans the entire seq_region (as opposed to the original slice
|
|
502 which may have only partially covered the seq_region).
|
|
503
|
|
504 If a feature spans a boundary in the new coordinate system,
|
|
505 undef is returned instead.
|
|
506
|
|
507 For example, transforming an exon in contig coordinates to one
|
|
508 in chromosomal coodinates will place the exon on a slice of an
|
|
509 entire chromosome.
|
|
510 Returntype : Bio::EnsEMBL::Feature (or undef)
|
|
511 Exceptions : thrown if an invalid coordinate system is provided
|
|
512 warning if Feature is not attached to a slice
|
|
513 Caller : general, transfer()
|
|
514 Status : Stable
|
|
515
|
|
516 =cut
|
|
517
|
|
518 sub transform {
|
|
519 my $self = shift;
|
|
520 my $cs_name = shift;
|
|
521 my $cs_version = shift;
|
|
522 my $to_slice = shift;
|
|
523
|
|
524 #
|
|
525 # For backwards compatibility check if the arguments are old style args
|
|
526 #
|
|
527 if(!$cs_name || ref($cs_name)) {
|
|
528 deprecate('Calling transform without a coord system name is deprecated.');
|
|
529 return $self->_deprecated_transform($cs_name);
|
|
530 }
|
|
531
|
|
532 my $slice = $self->{'slice'};
|
|
533
|
|
534 if(!$slice) {
|
|
535 warning("Feature cannot be transformed without attached slice.");
|
|
536 return undef;
|
|
537 }
|
|
538
|
|
539 if(!$slice->adaptor()) {
|
|
540 warning("Feature cannot be transformed without adaptor on" .
|
|
541 " attached slice.");
|
|
542 return undef;
|
|
543 }
|
|
544
|
|
545 #use db from slice since this feature may not yet be stored in a database
|
|
546 my $db = $slice->adaptor->db();
|
|
547 my $cs = $db->get_CoordSystemAdaptor->fetch_by_name($cs_name, $cs_version);
|
|
548 my $current_cs = $slice->coord_system();
|
|
549
|
|
550 if(!$current_cs) {
|
|
551 warning("Feature cannot be transformed without CoordSystem on " .
|
|
552 "attached slice.");
|
|
553 return undef;
|
|
554 }
|
|
555
|
|
556 if(!$cs) {
|
|
557 throw("Cannot transform to unknown coordinate system " .
|
|
558 "[$cs_name $cs_version]\n");
|
|
559 }
|
|
560
|
|
561 # if feature is already in the requested coordinate system, we can just
|
|
562 # return a copy
|
|
563 if( $cs->equals( $current_cs ) && $slice->start() == 1 &&
|
|
564 $slice->strand() == 1 ) {
|
|
565 my $new_feature;
|
|
566 %$new_feature = %$self;
|
|
567 bless $new_feature, ref $self;
|
|
568 return $new_feature;
|
|
569 }
|
|
570 my $projection;
|
|
571 if(defined($to_slice)){
|
|
572 $projection = $self->project_to_slice( $to_slice ); }
|
|
573 else{
|
|
574 $projection = $self->project( $cs_name, $cs_version );
|
|
575 }
|
|
576
|
|
577 if(@$projection == 0){
|
|
578 return undef;
|
|
579 }
|
|
580 if( @$projection != 1 and !defined($to_slice)) {
|
|
581 # warn "MORE than one projection and NO slice specified ";
|
|
582 # warn "from ".$self->slice->name." to $cs_name, $cs_version\n";
|
|
583 return undef;
|
|
584 }
|
|
585 my $index = 0;
|
|
586 if(defined($to_slice)){
|
|
587 my $found = 0;
|
|
588 my $i = 0;
|
|
589 foreach my $proj (@{$projection}) {
|
|
590 my $slice = $proj->[2];
|
|
591 if($to_slice->get_seq_region_id eq $slice->get_seq_region_id){
|
|
592 $found =1;
|
|
593 $index = $i;
|
|
594 }
|
|
595 $i++;
|
|
596 }
|
|
597 if(!$found){
|
|
598 if(@$projection != 1){
|
|
599 if(@$projection == 0){
|
|
600 warn "number of mappings is ".@$projection."\n";
|
|
601 warn "could not project feature ".ref($self)." from ".$self->slice->seq_region_name." to ".$to_slice->seq_region_name."\n";
|
|
602 warn "In the region of ".$self->slice->start." <-> ".$self->slice->end."\n";
|
|
603 warn "feat start=".($self->slice->start+$self->start)."\tend=".($self->slice->start+$self->end)."\n";
|
|
604 }
|
|
605 else{
|
|
606 foreach my $proj (@{$projection}) {
|
|
607 my $slice = $proj->[2];
|
|
608 warn "available slice ".$slice->seq_regon_name."\n";
|
|
609 }
|
|
610 warn "MORE than one projection and none to slice specified (".$to_slice->seq_region_name.")\n";
|
|
611 }
|
|
612 }
|
|
613 else {
|
|
614 foreach my $proj (@{$projection}) {
|
|
615 warn "Mapping is to ".$proj->[2]->seq_region_name."\n";
|
|
616 }
|
|
617 warn "One projection but none to slice specified\n";
|
|
618 }
|
|
619 return undef;
|
|
620 }
|
|
621 }
|
|
622
|
|
623 my $p_slice = $projection->[$index]->[2];
|
|
624 my $slice_adaptor = $db->get_SliceAdaptor;
|
|
625 $slice = $slice_adaptor->fetch_by_region($p_slice->coord_system()->name(),
|
|
626 $p_slice->seq_region_name(),
|
|
627 undef, #start
|
|
628 undef, #end
|
|
629 1, #strand
|
|
630 $p_slice->coord_system()->version);
|
|
631
|
|
632 my $new_feature;
|
|
633 %$new_feature = %$self;
|
|
634 bless $new_feature, ref $self;
|
|
635 $new_feature->{'start'} = $p_slice->start();
|
|
636 $new_feature->{'end'} = $p_slice->end();
|
|
637 $new_feature->{'strand'} =
|
|
638 ($self->{'strand'} == 0) ? 0 : $p_slice->strand();
|
|
639 $new_feature->{'slice'} = $slice;
|
|
640 return $new_feature;
|
|
641
|
|
642 }
|
|
643
|
|
644
|
|
645
|
|
646 =head2 transfer
|
|
647
|
|
648 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
649 The slice to transfer this feature to
|
|
650 Example : $feature = $feature->transfer($slice);
|
|
651 next if(!defined($feature));
|
|
652 Description: Returns a copy of this feature which has been shifted onto
|
|
653 another slice.
|
|
654
|
|
655 If the new slice is in a different coordinate system the
|
|
656 feature is transformed first and then placed on the slice.
|
|
657 If the feature would be split across a coordinate system
|
|
658 boundary or mapped to a gap undef is returned instead.
|
|
659
|
|
660 If the feature cannot be placed on the provided slice because
|
|
661 it maps to an entirely different location, undef is returned
|
|
662 instead.
|
|
663
|
|
664 Returntype : Bio::EnsEMBL::Feature (or undef)
|
|
665 Exceptions : throw on incorrect argument
|
|
666 throw if feature does not have attached slice
|
|
667 Caller : general, transform()
|
|
668 Status : Stable
|
|
669
|
|
670 =cut
|
|
671
|
|
672
|
|
673 sub transfer {
|
|
674 my $self = shift;
|
|
675 my $slice = shift;
|
|
676
|
|
677 if(!$slice || !ref($slice) || (!$slice->isa('Bio::EnsEMBL::Slice') && !$slice->isa('Bio::EnsEMBL::LRGSlice'))) {
|
|
678 throw('Slice argument is required');
|
|
679 }
|
|
680
|
|
681 #make a shallow copy of the feature to be transfered
|
|
682 my $feature;
|
|
683 %{$feature} = %{$self};
|
|
684 bless $feature, ref($self);
|
|
685 weaken $feature->{adaptor};
|
|
686
|
|
687 my $current_slice = $self->{'slice'};
|
|
688
|
|
689 if(!$current_slice) {
|
|
690 warning("Feature cannot be transfered without attached slice.");
|
|
691 return undef;
|
|
692 }
|
|
693
|
|
694 my $cur_cs = $current_slice->coord_system();
|
|
695 my $dest_cs = $slice->coord_system();
|
|
696
|
|
697 #if we are not in the same coord system a transformation step is needed first
|
|
698 if(!$dest_cs->equals($cur_cs)) {
|
|
699 $feature = $feature->transform($dest_cs->name, $dest_cs->version, $slice);
|
|
700 return undef if(!defined($feature));
|
|
701 $current_slice = $feature->{'slice'};
|
|
702 }
|
|
703
|
|
704 # feature went to entirely different seq_region
|
|
705 if($current_slice->seq_region_name() ne $slice->seq_region_name()) {
|
|
706 return undef;
|
|
707 }
|
|
708
|
|
709 #if the current feature positions are not relative to the start of the
|
|
710 #seq region, convert them so they are
|
|
711 my $cur_slice_start = $current_slice->start();
|
|
712 my $cur_slice_strand = $current_slice->strand();
|
|
713 if($cur_slice_start != 1 || $cur_slice_strand != 1) {
|
|
714 my $fstart = $feature->{'start'};
|
|
715 my $fend = $feature->{'end'};
|
|
716
|
|
717 if($cur_slice_strand == 1) {
|
|
718 $feature->{'start'} = $fstart + $cur_slice_start - 1;
|
|
719 $feature->{'end'} = $fend + $cur_slice_start - 1;
|
|
720 } else {
|
|
721 my $cur_slice_end = $current_slice->end();
|
|
722 $feature->{'start'} = $cur_slice_end - $fend + 1;
|
|
723 $feature->{'end'} = $cur_slice_end - $fstart + 1;
|
|
724 $feature->{'strand'} *= -1;
|
|
725 }
|
|
726 }
|
|
727
|
|
728 my $fstart = $feature->{'start'};
|
|
729 my $fend = $feature->{'end'};
|
|
730
|
|
731 #convert to destination slice coords
|
|
732 if($slice->strand == 1) {
|
|
733 $feature->{'start'} = $fstart - $slice->start() + 1;
|
|
734 $feature->{'end'} = $fend - $slice->start() + 1;
|
|
735 } else {
|
|
736 $feature->{'start'} = $slice->end() - $fend + 1;
|
|
737 $feature->{'end'} = $slice->end() - $fstart + 1;
|
|
738 $feature->{'strand'} *= -1;
|
|
739 }
|
|
740
|
|
741 $feature->{'slice'} = $slice;
|
|
742
|
|
743 return $feature;
|
|
744 }
|
|
745
|
|
746 =head2 project_to_slice
|
|
747
|
|
748 Arg [1] : slice to project to
|
|
749
|
|
750
|
|
751 Example :
|
|
752 my $clone_projection = $feature->project_to_slice($slice);
|
|
753
|
|
754 foreach my $seg (@$clone_projection) {
|
|
755 my $clone = $seg->to_Slice();
|
|
756 print "Features current coords ", $seg->from_start, '-',
|
|
757 $seg->from_end, " project onto clone coords " .
|
|
758 $clone->seq_region_name, ':', $clone->start, '-', $clone->end,
|
|
759 $clone->strand, "\n";
|
|
760 }
|
|
761 Description: Returns the results of 'projecting' this feature onto another
|
|
762 slice . This is useful to see where a feature
|
|
763 would lie in a coordinate system in which it
|
|
764 crosses a boundary.
|
|
765
|
|
766 This method returns a reference to a list of
|
|
767 Bio::EnsEMBL::ProjectionSegment objects.
|
|
768 ProjectionSegments are blessed arrays and can also be used as
|
|
769 triplets [from_start,from_end,to_Slice]. The from_start and
|
|
770 from_end are the coordinates relative to the feature start.
|
|
771 For example, if a feature is current 100-200bp on a slice
|
|
772 then the triplets returned might be:
|
|
773 [1,50,$slice1],
|
|
774 [51,101,$slice2]
|
|
775
|
|
776 The to_Slice is a slice spanning the region on the requested
|
|
777 coordinate system that this feature projected to.
|
|
778
|
|
779 If the feature projects entirely into a gap then a reference to
|
|
780 an empty list is returned.
|
|
781
|
|
782 Returntype : listref of Bio::EnsEMBL::ProjectionSegments
|
|
783 which can also be used as [$start,$end,$slice] triplets
|
|
784 Exceptions : slice does not have an adaptor
|
|
785 Caller : general
|
|
786 Status : At Risk
|
|
787
|
|
788 =cut
|
|
789
|
|
790 sub project_to_slice {
|
|
791 my $self = shift;
|
|
792 my $to_slice = shift;
|
|
793 my $slice = $self->{'slice'};
|
|
794
|
|
795 if(!$slice) {
|
|
796 warning("Feature cannot be projected without attached slice.");
|
|
797 return [];
|
|
798 }
|
|
799
|
|
800
|
|
801 #get an adaptor from the attached slice because this feature may not yet
|
|
802 #be stored and may not have its own adaptor
|
|
803 my $slice_adaptor = $slice->adaptor();
|
|
804
|
|
805 if(!$slice_adaptor) {
|
|
806 throw("Cannot project feature because associated slice does not have an " .
|
|
807 " adaptor");
|
|
808 }
|
|
809
|
|
810 my $strand = $self->strand() * $slice->strand();
|
|
811 #fetch by feature always gives back forward strand slice:
|
|
812 $slice = $slice_adaptor->fetch_by_Feature($self);
|
|
813 $slice = $slice->invert if($strand == -1);
|
|
814 return $slice->project_to_slice($to_slice);
|
|
815 }
|
|
816
|
|
817
|
|
818 =head2 project
|
|
819
|
|
820 Arg [1] : string $name
|
|
821 The name of the coordinate system to project this feature onto
|
|
822 Arg [2] : string $version (optional)
|
|
823 The version of the coordinate system (such as 'NCBI34') to
|
|
824 project this feature onto
|
|
825 Example :
|
|
826 my $clone_projection = $feature->project('clone');
|
|
827
|
|
828 foreach my $seg (@$clone_projection) {
|
|
829 my $clone = $seg->to_Slice();
|
|
830 print "Features current coords ", $seg->from_start, '-',
|
|
831 $seg->from_end, " project onto clone coords " .
|
|
832 $clone->seq_region_name, ':', $clone->start, '-', $clone->end,
|
|
833 $clone->strand, "\n";
|
|
834 }
|
|
835 Description: Returns the results of 'projecting' this feature onto another
|
|
836 coordinate system. This is useful to see where a feature
|
|
837 would lie in a coordinate system in which it
|
|
838 crosses a boundary.
|
|
839
|
|
840 This method returns a reference to a list of
|
|
841 Bio::EnsEMBL::ProjectionSegment objects.
|
|
842 ProjectionSegments are blessed arrays and can also be used as
|
|
843 triplets [from_start,from_end,to_Slice]. The from_start and
|
|
844 from_end are the coordinates relative to the feature start.
|
|
845 For example, if a feature is current 100-200bp on a slice
|
|
846 then the triplets returned might be:
|
|
847 [1,50,$slice1],
|
|
848 [51,101,$slice2]
|
|
849
|
|
850 The to_Slice is a slice spanning the region on the requested
|
|
851 coordinate system that this feature projected to.
|
|
852
|
|
853 If the feature projects entirely into a gap then a reference to
|
|
854 an empty list is returned.
|
|
855
|
|
856 Returntype : listref of Bio::EnsEMBL::ProjectionSegments
|
|
857 which can also be used as [$start,$end,$slice] triplets
|
|
858 Exceptions : slice does not have an adaptor
|
|
859 Caller : general
|
|
860 Status : Stable
|
|
861
|
|
862 =cut
|
|
863
|
|
864 sub project {
|
|
865 my $self = shift;
|
|
866 my $cs_name = shift;
|
|
867 my $cs_version = shift;
|
|
868
|
|
869 my $slice = $self->{'slice'};
|
|
870
|
|
871 if(!$slice) {
|
|
872 warning("Feature cannot be projected without attached slice.");
|
|
873 return [];
|
|
874 }
|
|
875
|
|
876
|
|
877 #get an adaptor from the attached slice because this feature may not yet
|
|
878 #be stored and may not have its own adaptor
|
|
879 my $slice_adaptor = $slice->adaptor();
|
|
880
|
|
881 if(!$slice_adaptor) {
|
|
882 throw("Cannot project feature because associated slice does not have an " .
|
|
883 " adaptor");
|
|
884 }
|
|
885
|
|
886 my $strand = $self->strand() * $slice->strand();
|
|
887 #fetch by feature always gives back forward strand slice:
|
|
888 $slice = $slice_adaptor->fetch_by_Feature($self);
|
|
889 $slice = $slice->invert if($strand == -1);
|
|
890 return $slice->project($cs_name, $cs_version);
|
|
891 }
|
|
892
|
|
893
|
|
894
|
|
895 =head2 seqname
|
|
896
|
|
897 Arg [1] : (optional) $seqname
|
|
898 Example : $seqname = $feat->seqname();
|
|
899 Description: Getter/Setter for the name of the sequence that this feature
|
|
900 is on. Normally you can get away with not setting this value
|
|
901 and it will default to the name of the slice on which this
|
|
902 feature is on. It is useful to set this value on features which
|
|
903 do not ordinarily sit on features such as ProteinFeatures which
|
|
904 sit on peptides.
|
|
905 Returntype : string
|
|
906 Exceptions : none
|
|
907 Caller : general
|
|
908 Status : Stable
|
|
909
|
|
910 =cut
|
|
911
|
|
912 sub seqname {
|
|
913 my $self = shift;
|
|
914
|
|
915 if(@_) {
|
|
916 $self->{'seqname'} = shift;
|
|
917 }
|
|
918
|
|
919 if(!$self->{'seqname'} && $self->slice()) {
|
|
920 return $self->slice->name();
|
|
921 }
|
|
922
|
|
923 return $self->{'seqname'};
|
|
924 }
|
|
925
|
|
926
|
|
927
|
|
928
|
|
929 =head2 display_id
|
|
930
|
|
931 Arg [1] : none
|
|
932 Example : print $f->display_id();
|
|
933 Description: This method returns a string that is considered to be
|
|
934 the 'display' identifier. It is overridden by subclasses to
|
|
935 return an appropriate value for objects of that particular
|
|
936 class. If no appropriate display id is available an empty
|
|
937 string is returned instead.
|
|
938 Returntype : string
|
|
939 Exceptions : none
|
|
940 Caller : web drawing code
|
|
941 Status : Stable
|
|
942
|
|
943 =cut
|
|
944
|
|
945 sub display_id {
|
|
946 my $self = shift;
|
|
947 return '';
|
|
948 }
|
|
949
|
|
950
|
|
951 =head2 feature_Slice
|
|
952
|
|
953 Args : none
|
|
954 Example : $slice = $feature->feature_Slice()
|
|
955 Description: This is a convenience method to return a slice that covers the
|
|
956 Area of this feature. The feature start will be at 1 on it, and
|
|
957 it will have the length of this feature.
|
|
958 Returntype : Bio::EnsEMBL::Slice or undef if this feature has no attached
|
|
959 Slice.
|
|
960 Exceptions : warning if Feature does not have attached slice.
|
|
961 Caller : web drawing code
|
|
962 Status : Stable
|
|
963
|
|
964 =cut
|
|
965
|
|
966 sub feature_Slice {
|
|
967 my $self = shift;
|
|
968
|
|
969 my $slice = $self->slice();
|
|
970
|
|
971 if(!$slice) {
|
|
972 warning('Cannot obtain Feature_Slice for feature without attached slice');
|
|
973 return undef;
|
|
974 }
|
|
975
|
|
976 if($slice->isa("Bio::EnsEMBL::StrainSlice")){
|
|
977 return Bio::EnsEMBL::StrainSlice->new
|
|
978 (-seq_region_name => $slice->seq_region_name,
|
|
979 -seq_region_length => $slice->seq_region_length,
|
|
980 -coord_system => $slice->coord_system,
|
|
981 -start => $self->seq_region_start(),
|
|
982 -end => $self->seq_region_end(),
|
|
983 -strand => $self->seq_region_strand(),
|
|
984 -adaptor => $slice->adaptor(),
|
|
985 -strain_name => $slice->strain_name());
|
|
986 }
|
|
987 else{
|
|
988 return Bio::EnsEMBL::Slice->new
|
|
989 (-seq_region_name => $slice->seq_region_name,
|
|
990 -seq_region_length => $slice->seq_region_length,
|
|
991 -coord_system => $slice->coord_system,
|
|
992 -start => $self->seq_region_start(),
|
|
993 -end => $self->seq_region_end(),
|
|
994 -strand => $self->seq_region_strand(),
|
|
995 -adaptor => $slice->adaptor());
|
|
996 }
|
|
997 }
|
|
998
|
|
999
|
|
1000 =head2 seq_region_name
|
|
1001
|
|
1002 Arg [1] : none
|
|
1003 Example : print $feature->seq_region_name();
|
|
1004 Description: Gets the name of the seq_region which this feature is on.
|
|
1005 Returns undef if this Feature is not on a slice.
|
|
1006 Returntype : string or undef
|
|
1007 Exceptions : none
|
|
1008 Caller : general
|
|
1009 Status : Stable
|
|
1010
|
|
1011 =cut
|
|
1012
|
|
1013 sub seq_region_name {
|
|
1014 my $self = shift;
|
|
1015 my $slice = $self->{'slice'};
|
|
1016
|
|
1017 return ($slice) ? $slice->seq_region_name() : undef;
|
|
1018 }
|
|
1019
|
|
1020
|
|
1021 =head2 seq_region_length
|
|
1022
|
|
1023 Arg [1] : none
|
|
1024 Example : print $feature->seq_region_length();
|
|
1025 Description: Returns the length of the seq_region which this feature is on
|
|
1026 Returns undef if this Feature is not on a slice.
|
|
1027 Returntype : int (unsigned) or undef
|
|
1028 Exceptions : none
|
|
1029 Caller : general
|
|
1030 Status : Stable
|
|
1031
|
|
1032 =cut
|
|
1033
|
|
1034
|
|
1035 sub seq_region_length {
|
|
1036 my $self = shift;
|
|
1037 my $slice = $self->{'slice'};
|
|
1038
|
|
1039 return ($slice) ? $slice->seq_region_length() : undef;
|
|
1040 }
|
|
1041
|
|
1042
|
|
1043 =head2 seq_region_strand
|
|
1044
|
|
1045 Arg [1] : none
|
|
1046 Example : print $feature->seq_region_strand();
|
|
1047 Description: Returns the strand of the seq_region which this feature is on
|
|
1048 (i.e. feature_strand * slice_strand)
|
|
1049 Returns undef if this Feature is not on a slice.
|
|
1050 Returntype : 1,0,-1 or undef
|
|
1051 Exceptions : none
|
|
1052 Caller : general
|
|
1053 Status : Stable
|
|
1054
|
|
1055 =cut
|
|
1056
|
|
1057
|
|
1058 sub seq_region_strand {
|
|
1059 my $self = shift;
|
|
1060 my $slice = $self->{'slice'};
|
|
1061
|
|
1062 return ($slice) ? $slice->strand() * $self->{'strand'} : undef;
|
|
1063 }
|
|
1064
|
|
1065
|
|
1066 =head2 seq_region_start
|
|
1067
|
|
1068 Arg [1] : none
|
|
1069 Example : print $feature->seq_region_start();
|
|
1070 Description: Convenience method which returns the absolute start of this
|
|
1071 feature on the seq_region, as opposed to the relative (slice)
|
|
1072 position.
|
|
1073
|
|
1074 Returns undef if this feature is not on a slice.
|
|
1075 Returntype : int or undef
|
|
1076 Exceptions : none
|
|
1077 Caller : general
|
|
1078 Status : Stable
|
|
1079
|
|
1080 =cut
|
|
1081
|
|
1082 sub seq_region_start {
|
|
1083 my ($self) = @_;
|
|
1084
|
|
1085 my $slice = $self->slice();
|
|
1086
|
|
1087 if ( defined($slice) ) {
|
|
1088 my $start;
|
|
1089
|
|
1090 if ( $slice->strand() == 1 ) {
|
|
1091 if ( defined( $self->start() ) ) {
|
|
1092 if ($self->start < 0 && $slice->is_circular) {
|
|
1093 $start = $slice->seq_region_length + $self->start;
|
|
1094 } else {
|
|
1095 $start = $slice->start() + $self->start() - 1;
|
|
1096 }
|
|
1097 }
|
|
1098 } else {
|
|
1099 if ( defined( $self->end() ) ) {
|
|
1100 $start = $slice->end() - $self->end() + 1;
|
|
1101 }
|
|
1102 }
|
|
1103
|
|
1104 if ( defined($start)
|
|
1105 && $slice->is_circular()
|
|
1106 && $start > $slice->seq_region_length() )
|
|
1107 {
|
|
1108 $start -= $slice->seq_region_length();
|
|
1109 }
|
|
1110
|
|
1111 return $start;
|
|
1112 }
|
|
1113
|
|
1114 return undef;
|
|
1115 } ## end sub seq_region_start
|
|
1116
|
|
1117
|
|
1118 =head2 seq_region_end
|
|
1119
|
|
1120 Arg [1] : none
|
|
1121 Example : print $feature->seq_region_end();
|
|
1122 Description: Convenience method which returns the absolute end of this
|
|
1123 feature on the seq_region, as opposed to the relative (slice)
|
|
1124 position.
|
|
1125
|
|
1126 Returns undef if this feature is not on a slice.
|
|
1127 Returntype : int or undef
|
|
1128 Exceptions : none
|
|
1129 Caller : general
|
|
1130 Status : Stable
|
|
1131
|
|
1132 =cut
|
|
1133
|
|
1134 sub seq_region_end {
|
|
1135 my ($self) = @_;
|
|
1136
|
|
1137 my $slice = $self->slice();
|
|
1138
|
|
1139 if ( defined($slice) ) {
|
|
1140 my $end;
|
|
1141
|
|
1142 if ( $slice->strand() == 1 ) {
|
|
1143 if ( defined( $self->end() ) ) {
|
|
1144 $end = $slice->start() + $self->end() - 1;
|
|
1145 }
|
|
1146 } else {
|
|
1147 if ( defined( $self->start() ) ) {
|
|
1148 $end = $slice->end() - $self->start() + 1;
|
|
1149 }
|
|
1150 }
|
|
1151
|
|
1152 if ( defined($end)
|
|
1153 && $slice->is_circular()
|
|
1154 && $end > $slice->seq_region_length() )
|
|
1155 {
|
|
1156 $end -= $slice->seq_region_length();
|
|
1157 }
|
|
1158
|
|
1159 return $end;
|
|
1160 }
|
|
1161
|
|
1162 return undef;
|
|
1163 } ## end sub seq_region_end
|
|
1164
|
|
1165
|
|
1166 =head2 coord_system_name
|
|
1167
|
|
1168 Arg [1] : none
|
|
1169 Example : print $feature->coord_system_name()
|
|
1170 Description: Gets the name of the coord_system which this feature is on.
|
|
1171 Returns undef if this Feature is not on a slice.
|
|
1172 Returntype : string or undef
|
|
1173 Exceptions : none
|
|
1174 Caller : general
|
|
1175 Status : Stable
|
|
1176
|
|
1177 =cut
|
|
1178
|
|
1179 sub coord_system_name {
|
|
1180 my $self = shift;
|
|
1181 my $slice = $self->{'slice'};
|
|
1182 return ($slice) ? $slice->coord_system_name() : undef;
|
|
1183 }
|
|
1184
|
|
1185
|
|
1186 =head2 seq
|
|
1187
|
|
1188 Args : none
|
|
1189 Example : my $dna_sequence = $simple_feature->seq();
|
|
1190 Description: Returns the dna sequence from the attached slice and
|
|
1191 attached database that overlaps with this feature.
|
|
1192 Returns undef if there is no slice or no database.
|
|
1193 Returns undef if this feature is unstranded (i.e. strand=0).
|
|
1194 Returntype : undef or string
|
|
1195 Exceptions : warning if this feature is not stranded
|
|
1196 Caller : general
|
|
1197 Status : Stable
|
|
1198
|
|
1199 =cut
|
|
1200
|
|
1201
|
|
1202 sub seq {
|
|
1203 my $self = shift;
|
|
1204
|
|
1205 if( ! defined $self->{'slice'} ) {
|
|
1206 return undef;
|
|
1207 }
|
|
1208
|
|
1209 if(!$self->strand()) {
|
|
1210 warning("Cannot retrieve sequence for unstranded feature.");
|
|
1211 return undef;
|
|
1212 }
|
|
1213
|
|
1214 return $self->{'slice'}->subseq($self->start(), $self->end(),
|
|
1215 $self->strand());
|
|
1216
|
|
1217 }
|
|
1218
|
|
1219
|
|
1220
|
|
1221
|
|
1222 =head2 get_all_alt_locations
|
|
1223
|
|
1224 Arg [1] : none
|
|
1225 Example : @features = @{$feature->get_all_alt_locations()};
|
|
1226 foreach $f (@features) {
|
|
1227 print $f->slice->seq_region_name,' ',$f->start, $f->end,"\n";
|
|
1228 }
|
|
1229
|
|
1230 Description: Retrieves shallow copies of this feature in its alternate
|
|
1231 locations. A feature can be considered to have multiple
|
|
1232 locations when it sits on a alternative structural haplotype
|
|
1233 or when it is on a pseudo autosomal region. Most features will
|
|
1234 just return a reference to an empty list though.
|
|
1235 The features returned by this method will be on a slice which
|
|
1236 covers the entire alternate region.
|
|
1237
|
|
1238 Currently this method does not take into account alternate
|
|
1239 locations on the alternate locations (e.g. a reference
|
|
1240 sequence may have multiple alternate haplotypes. Asking
|
|
1241 for alternate locations of a feature on one of the alternate
|
|
1242 haplotypes will give you back the reference location, but not
|
|
1243 locations on the other alternate haplotypes).
|
|
1244
|
|
1245 Returntype : listref of features of the same type of this feature.
|
|
1246 Exceptions : none
|
|
1247 Caller : general
|
|
1248 Status : Stable
|
|
1249
|
|
1250 =cut
|
|
1251
|
|
1252 sub get_all_alt_locations {
|
|
1253 my $self = shift;
|
|
1254 my $return_all = shift || 0;
|
|
1255
|
|
1256 my $slice = $self->{'slice'} or return [];
|
|
1257 my $sa = $slice->adaptor() or return [];
|
|
1258
|
|
1259 # get slice of entire region
|
|
1260 $slice = $sa->fetch_by_seq_region_id($slice->get_seq_region_id);
|
|
1261
|
|
1262 my $axfa = $sa->db->get_AssemblyExceptionFeatureAdaptor();
|
|
1263 my $axfs = $axfa->fetch_all_by_Slice($slice);
|
|
1264
|
|
1265 my (@haps, @alt);
|
|
1266
|
|
1267 foreach my $axf (@$axfs) {
|
|
1268 if(uc($axf->type()) eq 'HAP') {
|
|
1269 push @haps, $axf;
|
|
1270 } elsif(uc($axf->type()) =~ 'PAR') {
|
|
1271 push @alt, $axf;
|
|
1272 } elsif( $axf->type() eq "PATCH_FIX"){
|
|
1273 push @haps, $axf;
|
|
1274 } elsif( $axf->type() eq "PATCH_FIX REF"){
|
|
1275 push @haps, $axf if $return_all > 0 ;
|
|
1276 } elsif( $axf->type() eq "HAP REF" ) {
|
|
1277 push @haps, $axf if $return_all > 0 ;
|
|
1278 # do nothing when you are on REF
|
|
1279 } elsif( $axf->type() eq "PATCH_NOVEL"){
|
|
1280 push @haps, $axf;
|
|
1281 }elsif( $axf->type() eq "PATCH_NOVEL REF"){
|
|
1282 push @haps, $axf if $return_all > 0 ;
|
|
1283 } else {
|
|
1284 warning("Unknown exception feature type ". $axf->type()."- ignoring.");
|
|
1285 }
|
|
1286 }
|
|
1287
|
|
1288 # regions surrounding hap are those of interest, not hap itself
|
|
1289 # convert hap alt. exc. features to regions around haps instead
|
|
1290 foreach my $h (@haps) {
|
|
1291 my $haslice = $h->alternate_slice();
|
|
1292 my $hacs = $haslice->coord_system();
|
|
1293
|
|
1294 if($h->start() > 1 && $haslice->start() > 1) {
|
|
1295 my $aslice = $sa->fetch_by_region($hacs->name(),
|
|
1296 $haslice->seq_region_name(),
|
|
1297 1,
|
|
1298 $haslice->start()-1,
|
|
1299 $haslice->strand(),
|
|
1300 $hacs->version());
|
|
1301
|
|
1302 push @alt, Bio::EnsEMBL::AssemblyExceptionFeature->new
|
|
1303 (-start => 1,
|
|
1304 -end => $h->start()-1,
|
|
1305 -alternate_slice => $aslice);
|
|
1306 }
|
|
1307
|
|
1308 if($h->end() < $slice->seq_region_length() &&
|
|
1309 $haslice->end < $haslice->seq_region_length()) {
|
|
1310 my $aslice = $sa->fetch_by_region($hacs->name(),
|
|
1311 $haslice->seq_region_name(),
|
|
1312 $haslice->end()+1,
|
|
1313 $haslice->seq_region_length(),
|
|
1314 $haslice->strand(),
|
|
1315 $hacs->version());
|
|
1316
|
|
1317 push @alt, Bio::EnsEMBL::AssemblyExceptionFeature->new
|
|
1318 (-start => $h->end() + 1,
|
|
1319 -end => $slice->seq_region_length(),
|
|
1320 -alternate_slice => $aslice);
|
|
1321 }
|
|
1322 }
|
|
1323
|
|
1324
|
|
1325 # check if exception regions contain our feature
|
|
1326
|
|
1327 my @features;
|
|
1328
|
|
1329 foreach my $axf (@alt) {
|
|
1330 # ignore other region if feature is not entirely on it
|
|
1331 next if($self->seq_region_start() < $axf->start() ||
|
|
1332 $self->seq_region_end() > $axf->end());
|
|
1333
|
|
1334 # quick shallow copy of the feature
|
|
1335 my $f;
|
|
1336 %$f = %$self;
|
|
1337 bless $f, ref($self);
|
|
1338
|
|
1339 my $aslice = $axf->alternate_slice();
|
|
1340
|
|
1341 # position feature on entire slice of other region
|
|
1342 $f->{'start'} = $f->seq_region_start() - $axf->start() + $aslice->start();
|
|
1343 $f->{'end'} = $f->seq_region_end() - $axf->start() + $aslice->start();
|
|
1344 $f->{'strand'} *= $aslice->strand();
|
|
1345
|
|
1346 $f->{'slice'} = $sa->fetch_by_seq_region_id($aslice->get_seq_region_id());
|
|
1347
|
|
1348 push @features, $f;
|
|
1349 }
|
|
1350
|
|
1351 return \@features;
|
|
1352 }
|
|
1353
|
|
1354
|
|
1355 =head2 overlaps
|
|
1356
|
|
1357 Arg [1] : Bio::EnsEMBL::Feature $f
|
|
1358 The other feature you want to check overlap with this feature
|
|
1359 for.
|
|
1360 Description: This method does a range comparison of this features start and
|
|
1361 end and compares it with another features start and end. It will
|
|
1362 return true if these ranges overlap and the features are on the
|
|
1363 same seq_region.
|
|
1364 Returntype : TRUE if features overlap, FALSE if they don't
|
|
1365 Exceptions : warning if features are on different seq_regions
|
|
1366 Caller : general
|
|
1367 Status : Stable
|
|
1368
|
|
1369 =cut
|
|
1370
|
|
1371 sub overlaps {
|
|
1372 my $self = shift;
|
|
1373 my $f = shift;
|
|
1374
|
|
1375 my $sr1_name = $self->seq_region_name;
|
|
1376 my $sr2_name = $f->seq_region_name;
|
|
1377
|
|
1378 if ($sr1_name and $sr2_name and ($sr1_name ne $sr2_name)) {
|
|
1379 warning("Bio::EnsEMBL::Feature->overlaps(): features are on different seq regions.");
|
|
1380 return undef;
|
|
1381 }
|
|
1382
|
|
1383 return ($self->seq_region_end >= $f->seq_region_start and $self->seq_region_start <= $f->seq_region_end);
|
|
1384 }
|
|
1385
|
|
1386
|
|
1387 =head2 get_overlapping_Genes
|
|
1388
|
|
1389 Description: Get all the genes that overlap this feature.
|
|
1390 Returntype : list ref of Bio::EnsEMBL::Gene
|
|
1391 Caller : general
|
|
1392 Status : UnStable
|
|
1393
|
|
1394 =cut
|
|
1395
|
|
1396 sub get_overlapping_Genes{
|
|
1397 my $self = shift;
|
|
1398
|
|
1399 my $slice = $self->feature_Slice;
|
|
1400 return $slice->get_all_Genes();
|
|
1401 }
|
|
1402
|
|
1403 # query for absolute nearest.
|
|
1404 # select x.display_label, g.gene_id, g.seq_region_start, ABS(cast((32921638 - g.seq_region_end) as signed)) as 'dist' from gene g, xref x where g.display_xref_id = x.xref_id and seq_region_id = 27513 order by ABS(cast((32921638 - g.seq_region_end) as signed)) limit 10;
|
|
1405
|
|
1406 =head2 get_nearest_Gene
|
|
1407
|
|
1408 Description: Get all the nearest gene to the feature
|
|
1409 Returntype : Bio::EnsEMBL::Gene
|
|
1410 Caller : general
|
|
1411 Status : UnStable
|
|
1412
|
|
1413 =cut
|
|
1414
|
|
1415 sub get_nearest_Gene {
|
|
1416 my $self = shift;
|
|
1417 my $stranded = shift;
|
|
1418 my $stream = shift;
|
|
1419
|
|
1420 my $ga = Bio::EnsEMBL::Registry->get_adaptor($self->adaptor->db->species,"core","Gene");
|
|
1421
|
|
1422 return $ga->fetch_nearest_Gene_by_Feature($self, $stranded, $stream);
|
|
1423
|
|
1424 }
|
|
1425
|
|
1426 =head2 summary_as_hash
|
|
1427
|
|
1428 Example : $feature_summary = $feature->summary_as_hash();
|
|
1429 Description : Retrieves a textual summary of this Feature.
|
|
1430 Should be overidden by subclasses for specific tweaking
|
|
1431 Returns : hashref of arrays of descriptive strings
|
|
1432 Status : Intended for internal use
|
|
1433 =cut
|
|
1434
|
|
1435 sub summary_as_hash {
|
|
1436 my $self = shift;
|
|
1437 my %summary;
|
|
1438 $summary{'ID'} = $self->display_id;
|
|
1439 $summary{'start'} = $self->seq_region_start;
|
|
1440 $summary{'end'} = $self->seq_region_end;
|
|
1441 $summary{'strand'} = $self->strand;
|
|
1442 $summary{'seq_region_name'} = $self->seq_region_name;
|
|
1443 return \%summary;
|
|
1444 }
|
|
1445
|
|
1446 =head2 species
|
|
1447
|
|
1448 Example : $feature->species();
|
|
1449 Description : Shortcut to the feature's DBAdaptor and returns its species name
|
|
1450 Returntype : String the species name
|
|
1451 Exceptions : Thrown if there is no attached adaptor
|
|
1452 Caller : Webcode
|
|
1453
|
|
1454 =cut
|
|
1455
|
|
1456 sub species {
|
|
1457 my ($self) = @_;
|
|
1458 throw "Can only call this method if you have attached an adaptor" if ! $self->adaptor();
|
|
1459 return $self->adaptor()->db()->species();
|
|
1460 }
|
|
1461
|
|
1462
|
|
1463 ##############################################
|
|
1464 # Methods included for backwards compatibility
|
|
1465 ##############################################
|
|
1466
|
|
1467
|
|
1468 =head2 contig
|
|
1469
|
|
1470 This method is deprecated and included for backwards compatibility only.
|
|
1471 Use slice() instead
|
|
1472 =cut
|
|
1473 sub contig {
|
|
1474 deprecate('Use slice() instead');
|
|
1475 slice(@_);
|
|
1476 }
|
|
1477
|
|
1478
|
|
1479
|
|
1480 =head2 sub_SeqFeature
|
|
1481
|
|
1482 This method is deprecated and only for genebuild backwards compatibility.
|
|
1483 Avoid using it if possible
|
|
1484 =cut
|
|
1485 sub sub_SeqFeature{
|
|
1486 my ($self) = @_;
|
|
1487 return @{$self->{'_gsf_sub_array'}} if($self->{'_gsf_sub_array'});
|
|
1488 }
|
|
1489
|
|
1490 =head2 add_sub_SeqFeature
|
|
1491
|
|
1492 This method is deprecated and only for genebuild backwards compatibility.
|
|
1493 Avoid using it if possible
|
|
1494 =cut
|
|
1495 sub add_sub_SeqFeature{
|
|
1496 my ($self,$feat,$expand) = @_;
|
|
1497 my ($p, $f, $l) = caller;
|
|
1498 if( $expand eq 'EXPAND' ) {
|
|
1499 # if this doesn't have start/end set - forget it!
|
|
1500 if( ! $self->start && ! $self->end ) {
|
|
1501
|
|
1502 $self->start($feat->start());
|
|
1503 $self->end($feat->end());
|
|
1504 $self->strand($feat->strand);
|
|
1505 } else {
|
|
1506 if( $feat->start < $self->start ) {
|
|
1507 $self->start($feat->start);
|
|
1508 }
|
|
1509
|
|
1510 if( $feat->end > $self->end ) {
|
|
1511 $self->end($feat->end);
|
|
1512 }
|
|
1513 }
|
|
1514 } else {
|
|
1515 if($self->start > $feat->start || $self->end < $feat->end) {
|
|
1516 throw("$feat is not contained within parent feature, " .
|
|
1517 "and expansion is not valid");
|
|
1518 }
|
|
1519 }
|
|
1520
|
|
1521 push(@{$self->{'_gsf_sub_array'}},$feat);
|
|
1522 }
|
|
1523
|
|
1524 =head2 flush_sub_SeqFeature
|
|
1525
|
|
1526 This method is deprecated and only for genebuild backwards compatibility.
|
|
1527 Avoid using it isf possible
|
|
1528 =cut
|
|
1529 sub flush_sub_SeqFeature {
|
|
1530 my ($self) = @_;
|
|
1531 $self->{'_gsf_sub_array'} = [];
|
|
1532 }
|
|
1533
|
|
1534
|
|
1535 sub _deprecated_transform {
|
|
1536 my $self = shift;
|
|
1537 my $arg = shift;
|
|
1538
|
|
1539 if(!$arg) {
|
|
1540 warning("Calling transform() with no arguments is deprecated.\n".
|
|
1541 "A coordinate system name argument should be used instead.\n".
|
|
1542 "You probably wanted transform('seqlevel') or transform('contig').");
|
|
1543 return $self->transform('seqlevel');
|
|
1544 }
|
|
1545
|
|
1546 if(ref($arg) eq 'Bio::EnsEMBL::Slice') {
|
|
1547 if($arg->{'empty'}) {
|
|
1548 warning("Calling transform with an empty slice is deprecated.\n" .
|
|
1549 "A coordinate system name argument should be used instead.\n".
|
|
1550 "You probably wanted transform('chromosome') or " .
|
|
1551 "transform('toplevel')");
|
|
1552 return $self->transform('toplevel');
|
|
1553 }
|
|
1554 warning("Calling transform with a slice is deprecated.\n" .
|
|
1555 "Use the transfer method instead");
|
|
1556 return $self->transfer($arg);
|
|
1557 }
|
|
1558
|
|
1559 warning("Calling transform with a [".ref($arg)."] arg is no longer " .
|
|
1560 "(or never was) supported. Doing nothing instead.");
|
|
1561
|
|
1562 return $self;
|
|
1563 }
|
|
1564
|
|
1565
|
|
1566 =head2 id
|
|
1567
|
|
1568 This method is deprecated and only included for backwards compatibility.
|
|
1569 Use display_id, hseqname, dbID or stable_id instead
|
|
1570
|
|
1571 =cut
|
|
1572
|
|
1573 sub id {
|
|
1574 my $self = shift;
|
|
1575 deprecate("id method is not used - use display_id instead");
|
|
1576 return $self->{'stable_id'} if($self->{'stable_id'});
|
|
1577 return $self->{'hseqname'} if($self->{'hseqname'});
|
|
1578 return $self->{'seqname'} if($self->{'seqname'});
|
|
1579 return $self->{'dbID'};
|
|
1580 }
|
|
1581
|
|
1582 1;
|