0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Exon - A class representing an Exon
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 $ex = new Bio::EnsEMBL::Exon(
|
|
28 -START => 100,
|
|
29 -END => 200,
|
|
30 -STRAND => 1,
|
|
31 -SLICE => $slice,
|
|
32 -DBID => $dbID,
|
|
33 -ANALYSIS => $analysis,
|
|
34 -STABLE_ID => 'ENSE000000123',
|
|
35 -VERSION => 2
|
|
36 );
|
|
37
|
|
38 # seq() returns a Bio::Seq
|
|
39 my $seq = $exon->seq->seq();
|
|
40
|
|
41 # Peptide only makes sense within transcript context
|
|
42 my $pep = $exon->peptide($transcript)->seq();
|
|
43
|
|
44 # Normal feature operations can be performed:
|
|
45 $exon = $exon->transform('clone');
|
|
46 $exon->move( $new_start, $new_end, $new_strand );
|
|
47 print $exon->slice->seq_region_name();
|
|
48
|
|
49 =head1 DESCRIPTION
|
|
50
|
|
51 This is a class which represents an exon which is part of a transcript.
|
|
52 See Bio::EnsEMBL:Transcript
|
|
53
|
|
54 =head1 METHODS
|
|
55
|
|
56 =cut
|
|
57
|
|
58 package Bio::EnsEMBL::Exon;
|
|
59
|
|
60 use strict;
|
|
61
|
|
62 use Bio::EnsEMBL::Feature;
|
|
63 use Bio::Seq; # exons have to have sequences...
|
|
64
|
|
65 use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate );
|
|
66 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
67 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
|
|
68 use Bio::EnsEMBL::DBSQL::SupportingFeatureAdaptor;
|
|
69
|
|
70 use vars qw(@ISA);
|
|
71 @ISA = qw(Bio::EnsEMBL::Feature);
|
|
72
|
|
73
|
|
74 =head2 new
|
|
75
|
|
76 Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that this
|
|
77 feature is on. The coordinates of the created feature are
|
|
78 relative to the start of the slice.
|
|
79 Arg [-START]: The start coordinate of this feature relative to the start
|
|
80 of the slice it is sitting on. Coordinates start at 1 and
|
|
81 are inclusive.
|
|
82 Arg [-END] : The end coordinate of this feature relative to the start of
|
|
83 the slice it is sitting on. Coordinates start at 1 and are
|
|
84 inclusive.
|
|
85 Arg [-STRAND]: The orientation of this feature. Valid values are 1,-1,0.
|
|
86 Arg [-SEQNAME] : (optional) A seqname to be used instead of the default name
|
|
87 of the of the slice. Useful for features that do not have an
|
|
88 attached slice such as protein features.
|
|
89 Arg [-dbID] : (optional) internal database id
|
|
90 Arg [-ADAPTOR]: (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor
|
|
91 Arg [-PHASE] : the phase.
|
|
92 Arg [-END_PHASE]: the end phase
|
|
93 Arg [-STABLE_ID]: (optional) the stable id of the exon
|
|
94 Arg [-VERSION] : (optional) the version
|
|
95 Arg [-CREATED_DATE] : (optional) the created date
|
|
96 Arg [-MODIFIED_DATE]: (optional) the last midifeid date
|
|
97
|
|
98 Example : none
|
|
99 Description: create an Exon object
|
|
100 Returntype : Bio::EnsEMBL::Exon
|
|
101 Exceptions : if phase is not valid (i.e. 0,1, 2 -1)
|
|
102 Caller : general
|
|
103 Status : Stable
|
|
104
|
|
105 =cut
|
|
106
|
|
107 sub new {
|
|
108 my $class = shift;
|
|
109
|
|
110 $class = ref $class || $class;
|
|
111
|
|
112 my $self = $class->SUPER::new( @_ );
|
|
113
|
|
114 my ( $phase, $end_phase, $stable_id, $version, $created_date,
|
|
115 $modified_date, $is_current, $is_constitutive )
|
|
116 = rearrange( [
|
|
117 "PHASE", "END_PHASE",
|
|
118 "STABLE_ID", "VERSION",
|
|
119 "CREATED_DATE", "MODIFIED_DATE",
|
|
120 "IS_CURRENT", "IS_CONSTITUTIVE"
|
|
121 ],
|
|
122 @_
|
|
123 );
|
|
124
|
|
125 if ( defined($phase) ) { # make sure phase is valid.
|
|
126 $self->phase($phase);
|
|
127 }
|
|
128
|
|
129 $self->{'end_phase'} = $end_phase;
|
|
130 $self->{'stable_id'} = $stable_id;
|
|
131 $self->{'version'} = $version;
|
|
132 $self->{'created_date'} = $created_date;
|
|
133 $self->{'modified_date'} = $modified_date;
|
|
134
|
|
135 # Default is_current
|
|
136 if ( !defined($is_current) ) { $is_current = 1 }
|
|
137 $self->{'is_current'} = $is_current;
|
|
138
|
|
139 # Default is_constitutive
|
|
140 if ( !defined($is_constitutive) ) { $is_constitutive = 0 }
|
|
141 $self->{'is_constitutive'} = $is_constitutive;
|
|
142
|
|
143 return $self;
|
|
144 }
|
|
145
|
|
146
|
|
147 # =head2 new_fast
|
|
148
|
|
149 # Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
150 # Arg [2] : int $start
|
|
151 # Arg [3] : int $end
|
|
152 # Arg [4] : int $strand (1 or -1)
|
|
153 # Example : none
|
|
154 # Description: create an Exon object
|
|
155 # Returntype : Bio::EnsEMBL::Exon
|
|
156 # Exceptions : throws if end < start
|
|
157 # Caller : general
|
|
158 # Status : Stable
|
|
159
|
|
160 # =cut
|
|
161
|
|
162 # sub new_fast {
|
|
163 # my ($class, $slice, $start, $end, $strand) = @_;
|
|
164
|
|
165 # my $self = bless {}, $class;
|
|
166
|
|
167 # # Swap start and end if they're in the wrong order
|
|
168 # # We assume that the strand is correct and keep the input value.
|
|
169
|
|
170 # if ($start > $end) {
|
|
171 # throw( "End smaller than start not allowed" );
|
|
172 # }
|
|
173
|
|
174 # $self->start ($start);
|
|
175 # $self->end ($end);
|
|
176 # $self->strand($strand);
|
|
177 # $self->slice($slice);
|
|
178
|
|
179 # return $self;
|
|
180 # }
|
|
181
|
|
182
|
|
183 =head2 end_phase
|
|
184
|
|
185 Arg [1] : (optional) int $end_phase
|
|
186 Example : $end_phase = $feat->end_phase;
|
|
187 Description: Gets/Sets the end phase of the exon.
|
|
188 end_phase = number of bases from the last incomplete codon of
|
|
189 this exon.
|
|
190 Usually, end_phase = (phase + exon_length)%3
|
|
191 but end_phase could be -1 if the exon is half-coding and its 3
|
|
192 prime end is UTR.
|
|
193 Returntype : int
|
|
194 Exceptions : warning if end_phase is called without an argument and the
|
|
195 value is not set.
|
|
196 Caller : general
|
|
197 Status : Stable
|
|
198
|
|
199 =cut
|
|
200
|
|
201 sub end_phase {
|
|
202 my $self = shift;
|
|
203 if (@_) {
|
|
204 $self->{'end_phase'} = shift;
|
|
205 }
|
|
206 else {
|
|
207 if ( !defined( $self->{'end_phase'} ) ) {
|
|
208 warning("No end phase set in Exon. You must set it explicitly.");
|
|
209 }
|
|
210 }
|
|
211 return $self->{'end_phase'};
|
|
212 }
|
|
213
|
|
214
|
|
215 =head2 phase
|
|
216
|
|
217 Arg [1] : (optional) int $phase
|
|
218 Example : my $phase = $exon->phase;
|
|
219 $exon->phase(2);
|
|
220 Description: Gets/Sets the phase of the exon.
|
|
221 Returntype : int
|
|
222 Exceptions : throws if phase is not (0, 1 2 or -1).
|
|
223 Caller : general
|
|
224 Status : Stable
|
|
225
|
|
226
|
|
227 Get or set the phase of the Exon, which tells the
|
|
228 translation machinery, which makes a peptide from
|
|
229 the DNA, where to start.
|
|
230
|
|
231 The Ensembl phase convention can be thought of as
|
|
232 "the number of bases of the first codon which are
|
|
233 on the previous exon". It is therefore 0, 1 or 2
|
|
234 (or -1 if the exon is non-coding). In ascii art,
|
|
235 with alternate codons represented by B<###> and
|
|
236 B<+++>:
|
|
237
|
|
238 Previous Exon Intron This Exon
|
|
239 ...------------- -------------...
|
|
240
|
|
241 5' Phase 3'
|
|
242 ...#+++###+++### 0 +++###+++###+...
|
|
243 ...+++###+++###+ 1 ++###+++###++...
|
|
244 ...++###+++###++ 2 +###+++###+++...
|
|
245
|
|
246 Here is another explanation from Ewan:
|
|
247
|
|
248 Phase means the place where the intron lands
|
|
249 inside the codon - 0 between codons, 1 between
|
|
250 the 1st and second base, 2 between the second and
|
|
251 3rd base. Exons therefore have a start phase and
|
|
252 a end phase, but introns have just one phase.
|
|
253
|
|
254 =cut
|
|
255
|
|
256 sub phase {
|
|
257 my ($self,$value) = @_;
|
|
258
|
|
259 if (defined($value)) {
|
|
260 # Value must be 0,1,2, or -1 for non-coding
|
|
261 if ($value =~ /^(-1|0|1|2)$/) {
|
|
262 #print STDERR "Setting phase to $value\n";
|
|
263 $self->{'phase'} = $value;
|
|
264 } else {
|
|
265 throw("Bad value ($value) for exon phase. Should only be" .
|
|
266 " -1,0,1,2\n");
|
|
267 }
|
|
268 }
|
|
269 return $self->{'phase'};
|
|
270 }
|
|
271
|
|
272
|
|
273 =head2 frame
|
|
274
|
|
275 Arg [1] : none
|
|
276 Example : $frame = $exon->frame
|
|
277 Description: Gets the frame of this exon
|
|
278 Returntype : int
|
|
279 Exceptions : thrown if an arg is passed
|
|
280 thrown if frame cannot be calculated due to a bad phase value
|
|
281 Caller : general
|
|
282 Status : Stable
|
|
283
|
|
284 =cut
|
|
285
|
|
286 sub frame {
|
|
287 my ($self,$value) = @_;
|
|
288
|
|
289 if( defined $value ) {
|
|
290 throw("Cannot set frame. Deduced from seq_start and phase");
|
|
291 }
|
|
292
|
|
293 # frame is mod 3 of the translation point
|
|
294
|
|
295 if( $self->phase == -1 ) {
|
|
296 return '.'; # gff convention for no frame info
|
|
297 }
|
|
298 if( $self->phase == 0 ) {
|
|
299 return $self->start%3;
|
|
300 }
|
|
301
|
|
302 if( $self->phase == 1 ) {
|
|
303 return ($self->start+2)%3;
|
|
304 }
|
|
305
|
|
306 if( $self->phase == 2 ) {
|
|
307 return ($self->start+1)%3;
|
|
308 }
|
|
309
|
|
310 throw("bad phase in exon ".$self->phase);
|
|
311
|
|
312 }
|
|
313
|
|
314
|
|
315 =head2 start
|
|
316
|
|
317 Arg [1] : int $start (optional)
|
|
318 Example : $start = $exon->start();
|
|
319 Description: Getter/Setter for the start of this exon. The superclass
|
|
320 implmentation is overridden to flush the internal sequence
|
|
321 cache if this value is altered
|
|
322 Returntype : int
|
|
323 Exceptions : none
|
|
324 Caller : general
|
|
325 Status : Stable
|
|
326
|
|
327 =cut
|
|
328
|
|
329 sub start {
|
|
330 my $self = shift;
|
|
331 # if an arg was provided, flush the internal sequence cache
|
|
332 delete $self->{'_seq_cache'} if(@_);
|
|
333 return $self->SUPER::start(@_);
|
|
334 }
|
|
335
|
|
336
|
|
337 =head2 end
|
|
338
|
|
339 Arg [1] : int $end (optional)
|
|
340 Example : $end = $exon->end();
|
|
341 Description: Getter/Setter for the end of this exon. The superclass
|
|
342 implmentation is overridden to flush the internal sequence
|
|
343 cache if this value is altered
|
|
344 Returntype : int
|
|
345 Exceptions : none
|
|
346 Caller : general
|
|
347 Status : Stable
|
|
348
|
|
349 =cut
|
|
350
|
|
351 sub end {
|
|
352 my $self = shift;
|
|
353 # if an arg was provided, flush the internal sequence cache
|
|
354 delete $self->{'_seq_cache'} if(@_);
|
|
355 return $self->SUPER::end(@_);
|
|
356 }
|
|
357
|
|
358
|
|
359 =head2 strand
|
|
360
|
|
361 Arg [1] : int $strand (optional)
|
|
362 Example : $start = $exon->strand();
|
|
363 Description: Getter/Setter for the strand of this exon. The superclass
|
|
364 implmentation is overridden to flush the internal sequence
|
|
365 cache if this value is altered
|
|
366 Returntype : int
|
|
367 Exceptions : none
|
|
368 Caller : general
|
|
369 Status : Stable
|
|
370
|
|
371 =cut
|
|
372
|
|
373 sub strand {
|
|
374 my $self = shift;
|
|
375 # if an arg was provided, flush the internal sequence cache
|
|
376 delete $self->{'_seq_cache'} if(@_);
|
|
377 return $self->SUPER::strand(@_);
|
|
378 }
|
|
379
|
|
380 =head2 cdna_start
|
|
381
|
|
382 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
383 The transcript for which cDNA coordinates should be
|
|
384 relative to.
|
|
385 Example : $cdna_start = $exon->cdna_start($transcript);
|
|
386 Description : Returns the start position of the exon in cDNA
|
|
387 coordinates.
|
|
388 Since an exon may be part of one or more transcripts,
|
|
389 the relevant transcript must be given as argument to
|
|
390 this method.
|
|
391 Return type : Integer
|
|
392 Exceptions : Throws if the given argument is not a transcript.
|
|
393 Throws if the first part of the exon maps into a gap.
|
|
394 Throws if the exon can not be mapped at all.
|
|
395 Caller : General
|
|
396 Status : Stable
|
|
397
|
|
398 =cut
|
|
399
|
|
400 sub cdna_start {
|
|
401 my ($self, $transcript) = @_;
|
|
402 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
|
|
403
|
|
404 my $id = $transcript->dbID();
|
|
405
|
|
406 if(defined $id && exists $self->{cdna_start}->{$id}) {
|
|
407 return $self->{cdna_start}->{$id};
|
|
408 }
|
|
409
|
|
410 my $cdna_start;
|
|
411 my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand());
|
|
412 if(@coords && !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
|
|
413 $cdna_start = $coords[0]->start();
|
|
414 }
|
|
415 elsif(@coords) {
|
|
416 throw "First part of exon maps into gap";
|
|
417 }
|
|
418 else {
|
|
419 throw "Can not map exon";
|
|
420 }
|
|
421
|
|
422 if(defined $id) {
|
|
423 $self->{cdna_start}->{$id} = $cdna_start;
|
|
424 }
|
|
425
|
|
426 return $cdna_start;
|
|
427 } ## end sub cdna_start
|
|
428
|
|
429 =head2 cdna_end
|
|
430
|
|
431 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
432 The transcript for which cDNA coordinates should be
|
|
433 relative to.
|
|
434 Example : $cdna_end = $exon->cdna_end($transcript);
|
|
435 Description : Returns the end position of the exon in cDNA
|
|
436 coordinates.
|
|
437 Since an exon may be part of one or more transcripts,
|
|
438 the relevant transcript must be given as argument to
|
|
439 this method.
|
|
440 Return type : Integer
|
|
441 Exceptions : Throws if the given argument is not a transcript.
|
|
442 Throws if the last part of the exon maps into a gap.
|
|
443 Throws if the exon can not be mapped at all.
|
|
444 Caller : General
|
|
445 Status : Stable
|
|
446
|
|
447 =cut
|
|
448
|
|
449 sub cdna_end {
|
|
450 my ($self, $transcript) = @_;
|
|
451 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
|
|
452
|
|
453 my $id = $transcript->dbID();
|
|
454
|
|
455 if(defined $id && exists $self->{cdna_end}->{$id}) {
|
|
456 return $self->{cdna_end}->{$id};
|
|
457 }
|
|
458
|
|
459 my $cdna_end;
|
|
460 my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand());
|
|
461 if(@coords && !$coords[-1]->isa('Bio::EnsEMBL::Mapper::Gap')) {
|
|
462 $cdna_end = $coords[-1]->end();
|
|
463 }
|
|
464 elsif(@coords) {
|
|
465 throw "Last part of exon maps into gap";
|
|
466 }
|
|
467 else {
|
|
468 throw "Can not map exon";
|
|
469 }
|
|
470
|
|
471 if(defined $id) {
|
|
472 $self->{cdna_end}->{$id} = $cdna_end;
|
|
473 }
|
|
474
|
|
475 return $cdna_end;
|
|
476 } ## end sub cdna_end
|
|
477
|
|
478 =head2 cdna_coding_start
|
|
479
|
|
480 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
481 The transcript for which cDNA coordinates should be
|
|
482 relative to.
|
|
483 Example : $cdna_coding_start = $exon->cdna_coding_start($transcript);
|
|
484 Description : Returns the start position of the coding region of the
|
|
485 exon in cDNA coordinates. Returns undef if the whole
|
|
486 exon is non-coding.
|
|
487 Since an exon may be part of one or more transcripts,
|
|
488 the relevant transcript must be given as argument to
|
|
489 this method.
|
|
490 Return type : Integer or undef
|
|
491 Exceptions : Throws if the given argument is not a transcript.
|
|
492 Caller : General
|
|
493 Status : Stable
|
|
494
|
|
495 =cut
|
|
496
|
|
497 sub cdna_coding_start {
|
|
498 my ($self, $transcript) = @_;
|
|
499 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
|
|
500
|
|
501 my $id = $transcript->dbID();
|
|
502
|
|
503 if(defined $id && exists $self->{cdna_coding_start}->{$id}) {
|
|
504 return $self->{cdna_coding_start}->{$id};
|
|
505 }
|
|
506
|
|
507 my $cdna_coding_start;
|
|
508 my $transcript_coding_start = $transcript->cdna_coding_start();
|
|
509 if(defined $transcript_coding_start) {
|
|
510 my $cdna_start = $self->cdna_start($transcript);
|
|
511
|
|
512 if ( $transcript_coding_start < $cdna_start ) {
|
|
513 # Coding region starts upstream of this exon...
|
|
514
|
|
515 if ( $transcript->cdna_coding_end() < $cdna_start ) {
|
|
516 # ... and also ends upstream of this exon.
|
|
517 $cdna_coding_start = undef;
|
|
518 }
|
|
519 else {
|
|
520 # ... and does not end upstream of this exon.
|
|
521 $cdna_coding_start = $cdna_start;
|
|
522 }
|
|
523 } else {
|
|
524 # Coding region starts either within or downstream of this
|
|
525 # exon.
|
|
526
|
|
527 if ( $transcript_coding_start <= $self->cdna_end($transcript) ) {
|
|
528 # Coding region starts within this exon.
|
|
529 $cdna_coding_start = $transcript_coding_start;
|
|
530 }
|
|
531 else {
|
|
532 # Coding region starts downstream of this exon.
|
|
533 $cdna_coding_start = undef;
|
|
534 }
|
|
535 }
|
|
536 }
|
|
537 else {
|
|
538 $cdna_coding_start = undef;
|
|
539 }
|
|
540
|
|
541 if(defined $id) {
|
|
542 $self->{cdna_coding_start}->{$id} = $cdna_coding_start;
|
|
543 $self->{cdna_coding_end}->{$id} = undef if ! defined $cdna_coding_start;
|
|
544 }
|
|
545
|
|
546 return $cdna_coding_start;
|
|
547 } ## end sub cdna_coding_start
|
|
548
|
|
549 =head2 cdna_coding_end
|
|
550
|
|
551 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
552 The transcript for which cDNA coordinates should be
|
|
553 relative to.
|
|
554 Example : $cdna_coding_end = $exon->cdna_coding_end($transcript);
|
|
555 Description : Returns the end position of the coding region of the
|
|
556 exon in cDNA coordinates. Returns undef if the whole
|
|
557 exon is non-coding.
|
|
558 Since an exon may be part of one or more transcripts,
|
|
559 the relevant transcript must be given as argument to
|
|
560 this method.
|
|
561 Return type : Integer or undef
|
|
562 Exceptions : Throws if the given argument is not a transcript.
|
|
563 Caller : General
|
|
564 Status : Stable
|
|
565
|
|
566 =cut
|
|
567
|
|
568 sub cdna_coding_end {
|
|
569 my ($self, $transcript) = @_;
|
|
570 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
|
|
571
|
|
572 my $id = $transcript->dbID();
|
|
573
|
|
574 if(defined $id && exists $self->{cdna_coding_end}->{$id}) {
|
|
575 return $self->{cdna_coding_end}->{$id};
|
|
576 }
|
|
577
|
|
578 my $cdna_coding_end;
|
|
579 my $transcript_coding_end = $transcript->cdna_coding_end();
|
|
580 if(defined $transcript_coding_end) {
|
|
581 my $cdna_end = $self->cdna_end($transcript);
|
|
582
|
|
583 if ( $transcript_coding_end > $cdna_end ) {
|
|
584
|
|
585 # Coding region ends downstream of this exon...
|
|
586 if ( $transcript->cdna_coding_start() > $cdna_end ) {
|
|
587 # ... and also starts downstream of this exon.
|
|
588 $cdna_coding_end = undef;
|
|
589 }
|
|
590 else {
|
|
591 # ... and does not start downstream of this exon.
|
|
592 $cdna_coding_end = $cdna_end;
|
|
593 }
|
|
594 }
|
|
595 else {
|
|
596 # Coding region ends either within or upstream of this
|
|
597 # exon.
|
|
598
|
|
599 if ( $transcript_coding_end >= $self->cdna_start($transcript) ) {
|
|
600 # Coding region ends within this exon.
|
|
601 $cdna_coding_end = $transcript_coding_end;
|
|
602 }
|
|
603 else {
|
|
604 # Coding region ends upstream of this exon.
|
|
605 $cdna_coding_end = undef;
|
|
606 }
|
|
607 }
|
|
608 }
|
|
609 else {
|
|
610 $cdna_coding_end = undef;
|
|
611 }
|
|
612
|
|
613 if(defined $id) {
|
|
614 $self->{cdna_coding_end}->{$id} = $cdna_coding_end;
|
|
615 $self->{cdna_coding_start}->{$id} = undef if ! defined $cdna_coding_end;
|
|
616 }
|
|
617
|
|
618 return $cdna_coding_end;
|
|
619 } ## end sub cdna_coding_end
|
|
620
|
|
621 =head2 coding_region_start
|
|
622
|
|
623 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
624 Example : $coding_region_start =
|
|
625 $exon->coding_region_start($transcript);
|
|
626 Description : Returns the start position of the coding region
|
|
627 of the exon in slice-relative coordinates on the
|
|
628 forward strand. Returns undef if the whole exon is
|
|
629 non-coding.
|
|
630 Since an exon may be part of one or more transcripts,
|
|
631 the relevant transcript must be given as argument to
|
|
632 this method.
|
|
633 Return type : Integer or undef
|
|
634 Exceptions : Throws if the given argument is not a transcript.
|
|
635 Caller : General
|
|
636 Status : Stable
|
|
637
|
|
638 =cut
|
|
639
|
|
640 # The implementation of this method is analogous to the implementation
|
|
641 # of cdna_coding_start().
|
|
642
|
|
643 sub coding_region_start {
|
|
644 my ($self, $transcript) = @_;
|
|
645 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
|
|
646
|
|
647 my $id = $transcript->dbID();
|
|
648
|
|
649 if(defined $id && exists $self->{coding_region_start}->{$id}) {
|
|
650 return $self->{coding_region_start}->{$id};
|
|
651 }
|
|
652
|
|
653 my $coding_region_start;
|
|
654 my $transcript_coding_start = $transcript->coding_region_start();
|
|
655 if(defined $transcript_coding_start) {
|
|
656 my $start = $self->start();
|
|
657
|
|
658 if ( $transcript_coding_start < $start ) {
|
|
659 # Coding region starts upstream of this exon...
|
|
660
|
|
661 if ( $transcript->coding_region_end() < $start ) {
|
|
662 # ... and also ends upstream of this exon.
|
|
663 $coding_region_start = undef;
|
|
664 }
|
|
665 else {
|
|
666 # ... and does not end upstream of this exon.
|
|
667 $coding_region_start = $start;
|
|
668 }
|
|
669 }
|
|
670 else {
|
|
671 # Coding region starts either within or downstream of this
|
|
672 # exon.
|
|
673
|
|
674 if ( $transcript_coding_start <= $self->end() ) {
|
|
675 # Coding region starts within this exon.
|
|
676 $coding_region_start = $transcript_coding_start;
|
|
677 }
|
|
678 else {
|
|
679 # Coding region starts downstream of this exon.
|
|
680 $coding_region_start = undef;
|
|
681 }
|
|
682 }
|
|
683 }
|
|
684 else {
|
|
685 $coding_region_start = undef;
|
|
686 }
|
|
687
|
|
688 if(defined $id) {
|
|
689 $self->{coding_region_start}->{$id} = $coding_region_start;
|
|
690 $self->{coding_region_end}->{$id} = undef if ! defined $coding_region_start;
|
|
691 }
|
|
692
|
|
693 return $coding_region_start;
|
|
694 } ## end sub coding_region_start
|
|
695
|
|
696 =head2 coding_region_end
|
|
697
|
|
698 Arg [1] : Bio::EnsEMBL::Transcript $transcript
|
|
699 Example : $coding_region_end =
|
|
700 $exon->coding_region_end($transcript);
|
|
701 Description : Returns the end position of the coding region of
|
|
702 the exon in slice-relative coordinates on the
|
|
703 forward strand. Returns undef if the whole exon is
|
|
704 non-coding.
|
|
705 Since an exon may be part of one or more transcripts,
|
|
706 the relevant transcript must be given as argument to
|
|
707 this method.
|
|
708 Return type : Integer or undef
|
|
709 Exceptions : Throws if the given argument is not a transcript.
|
|
710 Caller : General
|
|
711 Status : Stable
|
|
712
|
|
713 =cut
|
|
714
|
|
715 # The implementation of this method is analogous to the implementation
|
|
716 # of cdna_coding_end().
|
|
717
|
|
718 sub coding_region_end {
|
|
719 my ($self, $transcript) = @_;
|
|
720 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
|
|
721
|
|
722 my $id = $transcript->dbID();
|
|
723
|
|
724 if(defined $id && exists $self->{coding_region_end}->{$id}) {
|
|
725 return $self->{coding_region_end}->{$id};
|
|
726 }
|
|
727
|
|
728 my $coding_region_end;
|
|
729 my $transcript_coding_end = $transcript->coding_region_end();
|
|
730 if(defined $transcript_coding_end) {
|
|
731
|
|
732 my $end = $self->end();
|
|
733 if($transcript_coding_end > $end) {
|
|
734 # Coding region ends downstream of this exon...
|
|
735
|
|
736 if ( $transcript->coding_region_start() > $end ) {
|
|
737 # ... and also starts downstream of this exon.
|
|
738 $coding_region_end = undef;
|
|
739 }
|
|
740 else {
|
|
741 # ... and does not start downstream of this exon.
|
|
742 $coding_region_end = $end;
|
|
743 }
|
|
744 }
|
|
745 else {
|
|
746 # Coding region ends either within or upstream of this
|
|
747 # exon.
|
|
748 if ( $transcript_coding_end >= $self->start() ) {
|
|
749 $coding_region_end = $transcript_coding_end;
|
|
750 }
|
|
751 else {
|
|
752 $coding_region_end = undef;
|
|
753 }
|
|
754 }
|
|
755 }
|
|
756 else {
|
|
757 # This is a non-coding transcript.
|
|
758 $coding_region_end = undef;
|
|
759 }
|
|
760
|
|
761 if(defined $id) {
|
|
762 $self->{coding_region_end}->{$id} = $coding_region_end;
|
|
763 $self->{coding_region_start}->{$id} = undef if ! defined $coding_region_end;
|
|
764 }
|
|
765
|
|
766 return $coding_region_end;
|
|
767 } ## end sub coding_region_end
|
|
768
|
|
769 =head2 slice
|
|
770
|
|
771 Arg [1] : Bio::EnsEMBL::Slice
|
|
772 Example : $slice = $exon->slice();
|
|
773 Description: Getter/Setter for the slice this exon is on. The superclass
|
|
774 implmentation is overridden to flush the internal sequence
|
|
775 cache if this value is altered
|
|
776 Returntype : Bio::EnsEMBL::Slice
|
|
777 Exceptions : none
|
|
778 Caller : general
|
|
779 Status : Stable
|
|
780
|
|
781 =cut
|
|
782
|
|
783 sub slice {
|
|
784 my ( $self, $slice ) = @_;
|
|
785
|
|
786 if ( defined($slice) ) {
|
|
787 # If a new slice was provided, flush the internal sequence cache and
|
|
788 # transfer all supporting evidence to the new slice.
|
|
789
|
|
790 delete $self->{'_seq_cache'};
|
|
791
|
|
792 if ( exists( $self->{'_supporting_evidence'} ) ) {
|
|
793 my @new_features;
|
|
794
|
|
795 for my $old_feature ( @{ $self->{'_supporting_evidence'} } ) {
|
|
796
|
|
797 my $new_feature;
|
|
798
|
|
799 if ( defined( $old_feature->slice() ) ) {
|
|
800 $new_feature = $old_feature->transfer($slice);
|
|
801 } else {
|
|
802 # If the old feature does not have a slice, assume transfer is
|
|
803 # not necessary.
|
|
804 $new_feature = $old_feature;
|
|
805 }
|
|
806
|
|
807 push( @new_features, $new_feature );
|
|
808 }
|
|
809
|
|
810 $self->{'_supporting_evidence'} = \@new_features;
|
|
811 }
|
|
812
|
|
813 return $self->SUPER::slice($slice);
|
|
814 } elsif ( @_ > 1 ) {
|
|
815 return $self->SUPER::slice(undef);
|
|
816 } else {
|
|
817 return $self->SUPER::slice();
|
|
818 }
|
|
819 } ## end sub slice
|
|
820
|
|
821 =head2 equals
|
|
822
|
|
823 Arg [1] : Bio::EnsEMBL::Exon exon
|
|
824 Example : if ($exonA->equals($exonB)) { ... }
|
|
825 Description : Compares two exons for equality.
|
|
826 The test for eqality goes through the following list
|
|
827 and terminates at the first true match:
|
|
828
|
|
829 1. If Bio::EnsEMBL::Feature::equals() returns false,
|
|
830 then the exons are *not* equal.
|
|
831 2. If both exons have stable IDs: if these are the
|
|
832 same, the exons are equal, otherwise not.
|
|
833 3. If the exons have the same start, end, strand, and
|
|
834 phase, then they are equal, otherwise not.
|
|
835
|
|
836 Return type : Boolean (0, 1)
|
|
837
|
|
838 Exceptions : Thrown if a non-transcript is passed as the argument.
|
|
839
|
|
840 =cut
|
|
841
|
|
842 sub equals {
|
|
843 my ( $self, $exon ) = @_;
|
|
844
|
|
845 if ( !defined($exon) ) { return 0 }
|
|
846 if ( $self eq $exon ) { return 1 }
|
|
847
|
|
848 assert_ref( $exon, 'Bio::EnsEMBL::Exon' );
|
|
849
|
|
850 my $feature_equals = $self->SUPER::equals($exon);
|
|
851 if ( defined($feature_equals) && $feature_equals == 0 ) {
|
|
852 return 0;
|
|
853 }
|
|
854
|
|
855 if ( defined( $self->stable_id() ) && defined( $exon->stable_id() ) )
|
|
856 {
|
|
857 if ( $self->stable_id() eq $exon->stable_id() ) {
|
|
858 return 1;
|
|
859 }
|
|
860 else {
|
|
861 return 0;
|
|
862 }
|
|
863 }
|
|
864
|
|
865 if ( $self->start() == $exon->start() &&
|
|
866 $self->end() == $exon->end() &&
|
|
867 $self->strand() == $exon->strand() &&
|
|
868 $self->phase() == $exon->phase() &&
|
|
869 $self->end_phase() == $exon->end_phase() )
|
|
870 {
|
|
871 return 1;
|
|
872 }
|
|
873
|
|
874 return 0;
|
|
875 } ## end sub equals
|
|
876
|
|
877 =head2 move
|
|
878
|
|
879 Arg [1] : int start
|
|
880 Arg [2] : int end
|
|
881 Arg [3] : (optional) int strand
|
|
882 Example : None
|
|
883 Description: Sets the start, end and strand in one call rather than in
|
|
884 3 seperate calls to the start(), end() and strand() methods.
|
|
885 This is for convenience and for speed when this needs to be
|
|
886 done within a tight loop. This overrides the superclass
|
|
887 move() method so that the internal sequence cache can be
|
|
888 flushed if the exon if moved.
|
|
889 Returntype : none
|
|
890 Exceptions : Thrown is invalid arguments are provided
|
|
891 Caller : general
|
|
892 Status : Stable
|
|
893
|
|
894 =cut
|
|
895
|
|
896 sub move {
|
|
897 my $self = shift;
|
|
898 # flush the internal sequence cache
|
|
899 delete $self->{'_seq_cache'};
|
|
900 return $self->SUPER::move(@_);
|
|
901 }
|
|
902
|
|
903
|
|
904 =head2 transform
|
|
905
|
|
906 Arg 1 : String $coordinate_system_name
|
|
907 Arg [2] : String $coordinate_system_version
|
|
908 Description: moves this exon to the given coordinate system. If this exon has
|
|
909 attached supporting evidence, they move as well.
|
|
910 Returntype : Bio::EnsEMBL::Exon
|
|
911 Exceptions : wrong parameters
|
|
912 Caller : general
|
|
913 Status : Stable
|
|
914
|
|
915 =cut
|
|
916
|
|
917 sub transform {
|
|
918 my $self = shift;
|
|
919
|
|
920 # catch for old style transform calls
|
|
921 if( !@_ || ( ref $_[0] &&
|
|
922 ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))
|
|
923 )) {
|
|
924 deprecate('Calling transform without a coord system name is deprecated.');
|
|
925 return $self->_deprecated_transform(@_);
|
|
926 }
|
|
927
|
|
928 my $new_exon = $self->SUPER::transform( @_ );
|
|
929 if (not defined $new_exon or
|
|
930 $new_exon->length != $self->length) {
|
|
931 return undef;
|
|
932 }
|
|
933
|
|
934 if( exists $self->{'_supporting_evidence'} ) {
|
|
935 my @new_features;
|
|
936 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
|
|
937 my $new_feature = $old_feature->transform( @_ );
|
|
938 if (defined $new_feature) {
|
|
939 push( @new_features, $new_feature );
|
|
940 }
|
|
941 }
|
|
942 $new_exon->{'_supporting_evidence'} = \@new_features;
|
|
943 }
|
|
944
|
|
945 #dont want to share the same sequence cache
|
|
946 delete $new_exon->{'_seq_cache'};
|
|
947
|
|
948 return $new_exon;
|
|
949 }
|
|
950
|
|
951
|
|
952 =head2 transfer
|
|
953
|
|
954 Arg [1] : Bio::EnsEMBL::Slice $destination_slice
|
|
955 Example : none
|
|
956 Description: Moves this Exon to given target slice coordinates. If Features
|
|
957 are attached they are moved as well. Returns a new exon.
|
|
958 Returntype : Bio::EnsEMBL::Gene
|
|
959 Exceptions : none
|
|
960 Caller : general
|
|
961 Status : Stable
|
|
962
|
|
963 =cut
|
|
964
|
|
965 sub transfer {
|
|
966 my $self = shift;
|
|
967
|
|
968 my $new_exon = $self->SUPER::transfer( @_ );
|
|
969 return undef unless $new_exon;
|
|
970
|
|
971 if( exists $self->{'_supporting_evidence'} ) {
|
|
972 my @new_features;
|
|
973 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
|
|
974 my $new_feature = $old_feature->transfer( @_ );
|
|
975 push( @new_features, $new_feature );
|
|
976 }
|
|
977 $new_exon->{'_supporting_evidence'} = \@new_features;
|
|
978 }
|
|
979
|
|
980 #dont want to share the same sequence cache
|
|
981 delete $new_exon->{'_seq_cache'};
|
|
982
|
|
983 return $new_exon;
|
|
984 }
|
|
985
|
|
986
|
|
987 =head2 add_supporting_features
|
|
988
|
|
989 Arg [1] : Bio::EnsEMBL::Feature $feature
|
|
990 Example : $exon->add_supporting_features(@features);
|
|
991 Description: Adds a list of supporting features to this exon.
|
|
992 Duplicate features are not added.
|
|
993 If supporting features are added manually in this
|
|
994 way, prior to calling get_all_supporting_features then the
|
|
995 get_all_supporting_features call will not retrieve supporting
|
|
996 features from the database.
|
|
997 Returntype : none
|
|
998 Exceptions : throw if any of the features are not Feature
|
|
999 throw if any of the features are not in the same coordinate
|
|
1000 system as the exon
|
|
1001 Caller : general
|
|
1002 Status : Stable
|
|
1003
|
|
1004 =cut
|
|
1005
|
|
1006 sub add_supporting_features {
|
|
1007 my ($self,@features) = @_;
|
|
1008
|
|
1009 return unless @features;
|
|
1010
|
|
1011 $self->{_supporting_evidence} ||= [];
|
|
1012
|
|
1013 # check whether this feature object has been added already
|
|
1014 FEATURE: foreach my $feature (@features) {
|
|
1015 unless($feature && $feature->isa("Bio::EnsEMBL::Feature")) {
|
|
1016 throw("Supporting feat [$feature] not a " .
|
|
1017 "Bio::EnsEMBL::Feature");
|
|
1018 }
|
|
1019
|
|
1020 if ((defined $self->slice() && defined $feature->slice())&&
|
|
1021 ( $self->slice()->name() ne $feature->slice()->name())){
|
|
1022 throw("Supporting feat not in same coord system as exon\n" .
|
|
1023 "exon is attached to [".$self->slice()->name()."]\n" .
|
|
1024 "feat is attached to [".$feature->slice()->name()."]");
|
|
1025 }
|
|
1026
|
|
1027 foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
|
|
1028 # compare objects
|
|
1029 if ( $feature == $added_feature ){
|
|
1030 # this feature has already been added
|
|
1031 next FEATURE;
|
|
1032 }
|
|
1033 }
|
|
1034
|
|
1035 # no duplicate was found, add the feature
|
|
1036 push(@{$self->{_supporting_evidence}},$feature);
|
|
1037 }
|
|
1038 }
|
|
1039
|
|
1040
|
|
1041 =head2 flush_supporting_features
|
|
1042
|
|
1043 Example : $exon->flush_supporting_features;
|
|
1044 Description : Removes all supporting evidence from the exon.
|
|
1045 Return type : (Empty) listref
|
|
1046 Exceptions : none
|
|
1047 Caller : general
|
|
1048 Status : Stable
|
|
1049
|
|
1050 =cut
|
|
1051
|
|
1052 sub flush_supporting_features {
|
|
1053 my $self = shift;
|
|
1054 $self->{'_supporting_evidence'} = [];
|
|
1055 }
|
|
1056
|
|
1057
|
|
1058 =head2 get_all_supporting_features
|
|
1059
|
|
1060 Arg [1] : none
|
|
1061 Example : @evidence = @{$exon->get_all_supporting_features()};
|
|
1062 Description: Retreives any supporting features added manually by
|
|
1063 calls to add_supporting_features. If no features have been
|
|
1064 added manually and this exon is in a database (i.e. it h
|
|
1065 Returntype : listreference of Bio::EnsEMBL::BaseAlignFeature objects
|
|
1066 Exceptions : none
|
|
1067 Caller : general
|
|
1068 Status : Stable
|
|
1069
|
|
1070 =cut
|
|
1071
|
|
1072 sub get_all_supporting_features {
|
|
1073 my $self = shift;
|
|
1074
|
|
1075 if( !exists $self->{_supporting_evidence} ) {
|
|
1076 if($self->adaptor) {
|
|
1077 my $sfa = $self->adaptor->db->get_SupportingFeatureAdaptor();
|
|
1078 $self->{_supporting_evidence} = $sfa->fetch_all_by_Exon($self);
|
|
1079 }
|
|
1080 }
|
|
1081
|
|
1082 return $self->{_supporting_evidence} || [];
|
|
1083 }
|
|
1084
|
|
1085
|
|
1086 =head2 find_supporting_evidence
|
|
1087
|
|
1088 # This method is only for genebuild backwards compatibility.
|
|
1089 # Avoid using it if possible
|
|
1090
|
|
1091 Arg [1] : Bio::EnsEMBL::Feature $features
|
|
1092 The list of features to search for supporting (i.e. overlapping)
|
|
1093 evidence.
|
|
1094 Arg [2] : (optional) boolean $sorted
|
|
1095 Used to speed up the calculation of overlapping features.
|
|
1096 Should be set to true if the list of features is sorted in
|
|
1097 ascending order on their start coordinates.
|
|
1098 Example : $exon->find_supporting_evidence(\@features);
|
|
1099 Description: Looks through all the similarity features and
|
|
1100 stores as supporting features any feature
|
|
1101 that overlaps with an exon.
|
|
1102 Returntype : none
|
|
1103 Exceptions : none
|
|
1104 Caller : general
|
|
1105 Status : Medium Risk
|
|
1106
|
|
1107 =cut
|
|
1108
|
|
1109 sub find_supporting_evidence {
|
|
1110 my ($self,$features,$sorted) = @_;
|
|
1111
|
|
1112 foreach my $f (@$features) {
|
|
1113 # return if we have a sorted feature array
|
|
1114 if ($sorted == 1 && $f->start > $self->end) {
|
|
1115 return;
|
|
1116 }
|
|
1117 if ($f->sub_SeqFeature) {
|
|
1118 my @subf = $f->sub_SeqFeature;
|
|
1119
|
|
1120 $self->find_supporting_evidence(\@subf);
|
|
1121 }
|
|
1122 else {
|
|
1123 if ($f->entire_seq()->name eq $self->slice()->name) {
|
|
1124 if ($f->end >= $self->start && $f->start <= $self->end && $f->strand == $self->strand) {
|
|
1125 $self->add_supporting_features($f);
|
|
1126 }
|
|
1127 }
|
|
1128 }
|
|
1129 }
|
|
1130 }
|
|
1131
|
|
1132
|
|
1133 =head2 stable_id
|
|
1134
|
|
1135 Arg [1] : string $stable_id
|
|
1136 Example : none
|
|
1137 Description: get/set for attribute stable_id
|
|
1138 Returntype : string
|
|
1139 Exceptions : none
|
|
1140 Caller : general
|
|
1141 Status : Stable
|
|
1142
|
|
1143 =cut
|
|
1144
|
|
1145 sub stable_id {
|
|
1146 my $self = shift;
|
|
1147 $self->{'stable_id'} = shift if( @_ );
|
|
1148 return $self->{'stable_id'};
|
|
1149 }
|
|
1150
|
|
1151
|
|
1152 =head2 created_date
|
|
1153
|
|
1154 Arg [1] : string $created_date
|
|
1155 Example : none
|
|
1156 Description: get/set for attribute created_date
|
|
1157 Returntype : string
|
|
1158 Exceptions : none
|
|
1159 Caller : general
|
|
1160 Status : Stable
|
|
1161
|
|
1162 =cut
|
|
1163
|
|
1164 sub created_date {
|
|
1165 my $self = shift;
|
|
1166 $self->{'created_date'} = shift if ( @_ );
|
|
1167 return $self->{'created_date'};
|
|
1168 }
|
|
1169
|
|
1170
|
|
1171 =head2 modified_date
|
|
1172
|
|
1173 Arg [1] : string $modified_date
|
|
1174 Example : none
|
|
1175 Description: get/set for attribute modified_date
|
|
1176 Returntype : string
|
|
1177 Exceptions : none
|
|
1178 Caller : general
|
|
1179 Status : Stable
|
|
1180
|
|
1181 =cut
|
|
1182
|
|
1183 sub modified_date {
|
|
1184 my $self = shift;
|
|
1185 $self->{'modified_date'} = shift if ( @_ );
|
|
1186 return $self->{'modified_date'};
|
|
1187 }
|
|
1188
|
|
1189
|
|
1190 =head2 version
|
|
1191
|
|
1192 Arg [1] : string $version
|
|
1193 Example : none
|
|
1194 Description: get/set for attribute version
|
|
1195 Returntype : string
|
|
1196 Exceptions : none
|
|
1197 Caller : general
|
|
1198 Status : Stable
|
|
1199
|
|
1200 =cut
|
|
1201
|
|
1202 sub version {
|
|
1203 my $self = shift;
|
|
1204 $self->{'version'} = shift if( @_ );
|
|
1205 return $self->{'version'};
|
|
1206 }
|
|
1207
|
|
1208
|
|
1209 =head2 is_current
|
|
1210
|
|
1211 Arg [1] : Boolean $is_current
|
|
1212 Example : $exon->is_current(1)
|
|
1213 Description: Getter/setter for is_current state of this exon.
|
|
1214 Returntype : Int
|
|
1215 Exceptions : none
|
|
1216 Caller : general
|
|
1217 Status : Stable
|
|
1218
|
|
1219 =cut
|
|
1220
|
|
1221 sub is_current {
|
|
1222 my ( $self, $value ) = @_;
|
|
1223
|
|
1224 if ( defined($value) ) {
|
|
1225 $self->{'is_current'} = $value;
|
|
1226 }
|
|
1227 return $self->{'is_current'};
|
|
1228 }
|
|
1229
|
|
1230 =head2 is_constitutive
|
|
1231
|
|
1232 Arg [1] : Boolean $is_constitutive
|
|
1233 Example : $exon->is_constitutive(0)
|
|
1234 Description: Getter/setter for is_constitutive state of this exon.
|
|
1235 Returntype : Int
|
|
1236 Exceptions : none
|
|
1237 Caller : general
|
|
1238 Status : Stable
|
|
1239
|
|
1240 =cut
|
|
1241
|
|
1242 sub is_constitutive {
|
|
1243 my ( $self, $value ) = @_;
|
|
1244
|
|
1245 if ( defined($value) ) {
|
|
1246 $self->{'is_constitutive'} = $value;
|
|
1247 }
|
|
1248 return $self->{'is_constitutive'};
|
|
1249 }
|
|
1250
|
|
1251
|
|
1252 =head2 adjust_start_end
|
|
1253
|
|
1254 Arg 1 : int $start_adjustment
|
|
1255 Arg 2 : int $end_adjustment
|
|
1256 Example : none
|
|
1257 Description: returns a new Exon with this much shifted coordinates
|
|
1258 Returntype : Bio::EnsEMBL::Exon
|
|
1259 Exceptions : none
|
|
1260 Caller : Transcript->get_all_translateable_Exons()
|
|
1261 Status : Stable
|
|
1262
|
|
1263 =cut
|
|
1264
|
|
1265 sub adjust_start_end {
|
|
1266 my ( $self, $start_adjust, $end_adjust ) = @_;
|
|
1267
|
|
1268 my $new_exon = Bio::EnsEMBL::Exon->new();
|
|
1269 %{$new_exon} = %{$self};
|
|
1270
|
|
1271 #invalidate the sequence cache
|
|
1272 delete $new_exon->{'_seq_cache'};
|
|
1273
|
|
1274 if( $self->strand() == 1 ) {
|
|
1275 $new_exon->start( $self->start() + $start_adjust );
|
|
1276 $new_exon->end( $self->end() + $end_adjust )
|
|
1277 } else {
|
|
1278 $new_exon->start( $self->start() - $end_adjust );
|
|
1279 $new_exon->end( $self->end() - $start_adjust )
|
|
1280 }
|
|
1281
|
|
1282 return $new_exon;
|
|
1283 }
|
|
1284
|
|
1285
|
|
1286 =head2 peptide
|
|
1287
|
|
1288 Arg [1] : Bio::EnsEMBL::Transcript $tr
|
|
1289 Example : my $pep_str = $exon->peptide($transcript)->seq;
|
|
1290 Description: Retrieves the portion of the transcripts peptide
|
|
1291 encoded by this exon. The transcript argument is necessary
|
|
1292 because outside of the context of a transcript it is not
|
|
1293 possible to correctly determine the translation. Note that
|
|
1294 an entire amino acid will be present at the exon boundaries
|
|
1295 even if only a partial codon is present. Therefore the
|
|
1296 concatenation of all of the peptides of a transcripts exons
|
|
1297 is not the same as a transcripts translation because the
|
|
1298 summation may contain duplicated amino acids at splice sites.
|
|
1299 In the case that this exon is entirely UTR, a Bio::Seq object
|
|
1300 with an empty sequence string is returned.
|
|
1301 Returntype : Bio::Seq
|
|
1302 Exceptions : thrown if transcript argument is not provided
|
|
1303 Caller : general
|
|
1304 Status : Stable
|
|
1305
|
|
1306 =cut
|
|
1307
|
|
1308 sub peptide {
|
|
1309 my $self = shift;
|
|
1310 my $tr = shift;
|
|
1311
|
|
1312 unless($tr && ref($tr) && $tr->isa('Bio::EnsEMBL::Transcript')) {
|
|
1313 throw("transcript arg must be Bio::EnsEMBL:::Transcript not [$tr]");
|
|
1314 }
|
|
1315
|
|
1316 #convert exons coordinates to peptide coordinates
|
|
1317 my $tmp_exon = $self->transfer($tr->slice);
|
|
1318 if (!$tmp_exon) {
|
|
1319 throw("Couldn't transfer exon to transcript's slice");
|
|
1320 }
|
|
1321
|
|
1322 my @coords =
|
|
1323 $tr->genomic2pep($tmp_exon->start, $tmp_exon->end, $tmp_exon->strand);
|
|
1324
|
|
1325 #filter out gaps
|
|
1326 @coords = grep {$_->isa('Bio::EnsEMBL::Mapper::Coordinate')} @coords;
|
|
1327
|
|
1328 #if this is UTR then the peptide will be empty string
|
|
1329 my $pep_str = '';
|
|
1330
|
|
1331
|
|
1332 if(scalar(@coords) > 1) {
|
|
1333 my $coord = $self->_merge_ajoining_coords(\@coords);
|
|
1334 if($coord) {
|
|
1335 @coords = ($coord);
|
|
1336 }
|
|
1337 else {
|
|
1338 my ($e_id, $tr_id) = ($self->stable_id(), $tr->stable_id());
|
|
1339 throw("Error. Exon maps to multiple locations in peptide and those".
|
|
1340 " locations are not continuous." .
|
|
1341 " Is this exon [$e_id] a member of this transcript [$tr_id]?");
|
|
1342 }
|
|
1343 }
|
|
1344 elsif(scalar(@coords) == 1) {
|
|
1345 my $c = $coords[0];
|
|
1346 my $pep = $tr->translate;
|
|
1347
|
|
1348 #bioperl doesn't give back residues for incomplete codons
|
|
1349 #make sure we don't subseq too far...
|
|
1350 my ($start, $end);
|
|
1351 $end = ($c->end > $pep->length) ? $pep->length : $c->end;
|
|
1352 $start = ($c->start < $end) ? $c->start : $end;
|
|
1353 $pep_str = $tr->translate->subseq($start, $end);
|
|
1354 }
|
|
1355
|
|
1356 return
|
|
1357 Bio::Seq->new( -seq => $pep_str,
|
|
1358 -moltype => 'protein',
|
|
1359 -alphabet => 'protein',
|
|
1360 -id => $self->display_id );
|
|
1361 }
|
|
1362
|
|
1363 =head2 _merge_ajoining_coords
|
|
1364
|
|
1365 Arg [1] : ArrayRef of Bio::EnsEMBL::Mapper::Coordinate objects
|
|
1366 Example :
|
|
1367 Description : Merges coords which are ajoining or overlapping
|
|
1368 Returntype : Bio::EnsEMBL::Mapper::Coordinate or undef if it cannot happen
|
|
1369 Exceptions : Exception if the cooords cannot be condensed into one location
|
|
1370 Caller : internal
|
|
1371 Status : Development
|
|
1372
|
|
1373 =cut
|
|
1374
|
|
1375 sub _merge_ajoining_coords {
|
|
1376 my ($self, $coords) = @_;
|
|
1377
|
|
1378 my $okay = 1;
|
|
1379 my $coord = shift @{$coords};
|
|
1380 my $start = $coord->start();
|
|
1381 my $last_end = $coord->end();
|
|
1382 foreach my $other_coord (@{$coords}) {
|
|
1383 if( ($last_end + 1) >= $other_coord->start() ) {
|
|
1384 $last_end = $other_coord->end();
|
|
1385 }
|
|
1386 else {
|
|
1387 $okay = 0;
|
|
1388 last;
|
|
1389 }
|
|
1390 }
|
|
1391
|
|
1392 if(!$okay) {
|
|
1393 return;
|
|
1394 }
|
|
1395
|
|
1396 my $new_coord = Bio::EnsEMBL::Mapper::Coordinate->new(
|
|
1397 $coord->id(), $start, $last_end, $coord->strand(), $coord->rank());
|
|
1398 return $new_coord;
|
|
1399 }
|
|
1400
|
|
1401
|
|
1402
|
|
1403
|
|
1404 =head2 seq
|
|
1405
|
|
1406 Arg [1] : none
|
|
1407 Example : my $seq_str = $exon->seq->seq;
|
|
1408 Description: Retrieves the dna sequence of this Exon.
|
|
1409 Returned in a Bio::Seq object. Note that the sequence may
|
|
1410 include UTRs (or even be entirely UTR).
|
|
1411 Returntype : Bio::Seq or undef
|
|
1412 Exceptions : warning if argument passed,
|
|
1413 warning if exon does not have attatched slice
|
|
1414 warning if exon strand is not defined (or 0)
|
|
1415 Caller : general
|
|
1416 Status : Stable
|
|
1417
|
|
1418 =cut
|
|
1419
|
|
1420 sub seq {
|
|
1421 my ( $self, $arg ) = @_;
|
|
1422
|
|
1423 if ( defined $arg ) {
|
|
1424 warning("seq setting on Exon not supported currently");
|
|
1425 $self->{'_seq_cache'} = $arg->seq();
|
|
1426 }
|
|
1427
|
|
1428 if ( !defined( $self->{'_seq_cache'} ) ) {
|
|
1429 my $seq;
|
|
1430
|
|
1431 if ( !defined $self->slice() ) {
|
|
1432 warning("Cannot retrieve seq for exon without slice\n");
|
|
1433 return undef;
|
|
1434 }
|
|
1435
|
|
1436 if ( !$self->strand() ) {
|
|
1437 warning("Cannot retrieve seq for unstranded exon\n");
|
|
1438 return undef;
|
|
1439 }
|
|
1440
|
|
1441 if ($self->slice->is_circular() ) {
|
|
1442 if ( $self->slice->start > $self->slice->end) {
|
|
1443 # Normally exons overlapping chromosome origin will have negative feature start, but slice will be from 1 .. length
|
|
1444 # But in case you got an exon attached to a sub slice try this
|
|
1445 my $mid_point = $self->slice()->seq_region_length() - $self->slice()->start() + 1;
|
|
1446 my $seq1 = $self->slice()->subseq( $self->start(), $mid_point, $self->strand() );
|
|
1447
|
|
1448 my $seq2 = $self->slice()->subseq( $mid_point + 1, $self->end(), $self->strand() );
|
|
1449
|
|
1450 $seq = $self->strand() > 0 ? "$seq1$seq2" : "$seq2$seq1";
|
|
1451 } elsif ( $self->start < 0 || $self->start > $self->end) {
|
|
1452 # Normally exons overlapping chromosome origin will be 0 based, and can have negative start
|
|
1453 # But if you go via sub_Slice it gives you chromosome based coordinates, i.e it will have start greater then end
|
|
1454 my $start_point = $self->slice->seq_region_length + $self->slice->start;
|
|
1455 my $mid_point = $self->slice->seq_region_length;
|
|
1456 my $seq1 = $self->slice->subseq( $self->start, $mid_point, $self->strand);
|
|
1457 my $seq2 = $self->slice->subseq(1, $self->end, $self->strand );
|
|
1458 $seq = $self->strand > 0 ? "$seq1$seq2" : "$seq2$seq1";
|
|
1459 } else {
|
|
1460 # End this is the case for genes not overlapping the origin
|
|
1461 $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
|
|
1462 }
|
|
1463 } else {
|
|
1464 $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
|
|
1465 }
|
|
1466
|
|
1467 $self->{'_seq_cache'} = $seq;
|
|
1468 } ## end if ( !defined( $self->...))
|
|
1469
|
|
1470 return
|
|
1471 Bio::Seq->new( -seq => $self->{'_seq_cache'},
|
|
1472 -id => $self->display_id,
|
|
1473 -moltype => 'dna',
|
|
1474 -alphabet => 'dna' );
|
|
1475 } ## end sub seq
|
|
1476
|
|
1477
|
|
1478 =head2 hashkey
|
|
1479
|
|
1480 Arg [1] : none
|
|
1481 Example : if(exists $hash{$exon->hashkey}) { do_something(); }
|
|
1482 Description: Returns a unique hashkey that can be used to uniquely identify
|
|
1483 this exon. Exons are considered to be identical if they share
|
|
1484 the same seq_region, start, end, strand, phase, end_phase.
|
|
1485 Note that this will consider two exons on different slices
|
|
1486 to be different, even if they actually are not.
|
|
1487 Returntype : string formatted as slice_name-start-end-strand-phase-end_phase
|
|
1488 Exceptions : thrown if not all the necessary attributes needed to generate
|
|
1489 a unique hash value are set
|
|
1490 set
|
|
1491 Caller : general
|
|
1492 Status : Stable
|
|
1493
|
|
1494 =cut
|
|
1495
|
|
1496 sub hashkey {
|
|
1497 my $self = shift;
|
|
1498
|
|
1499 my $slice = $self->{'slice'};
|
|
1500 my $slice_name = ($slice) ? $slice->name() : undef;
|
|
1501 my $start = $self->{'start'};
|
|
1502 my $end = $self->{'end'};
|
|
1503 my $strand = $self->{'strand'};
|
|
1504 my $phase = $self->{'phase'};
|
|
1505 my $end_phase = $self->{'end_phase'};
|
|
1506
|
|
1507 if(!defined($slice_name)) {
|
|
1508 throw('Slice must be set to generate correct hashkey.');
|
|
1509 }
|
|
1510
|
|
1511 if(!defined($start)) {
|
|
1512 warning("start attribute must be defined to generate correct hashkey.");
|
|
1513 }
|
|
1514
|
|
1515 if(!defined($end)) {
|
|
1516 throw("end attribute must be defined to generate correct hashkey.");
|
|
1517 }
|
|
1518
|
|
1519 if(!defined($strand)) {
|
|
1520 throw("strand attribute must be defined to generate correct hashkey.");
|
|
1521 }
|
|
1522
|
|
1523 if(!defined($phase)) {
|
|
1524 throw("phase attribute must be defined to generate correct hashkey.");
|
|
1525 }
|
|
1526
|
|
1527 if(!defined($end_phase)) {
|
|
1528 throw("end_phase attribute must be defined to generate correct hashkey.");
|
|
1529 }
|
|
1530
|
|
1531 return "$slice_name-$start-$end-$strand-$phase-$end_phase";
|
|
1532 }
|
|
1533
|
|
1534
|
|
1535 =head2 display_id
|
|
1536
|
|
1537 Arg [1] : none
|
|
1538 Example : print $exons->display_id();
|
|
1539 Description: This method returns a string that is considered to be
|
|
1540 the 'display' identifier. For exons this is (depending on
|
|
1541 availability and in this order) the stable Id, the dbID or an
|
|
1542 empty string.
|
|
1543 Returntype : string
|
|
1544 Exceptions : none
|
|
1545 Caller : web drawing code
|
|
1546 Status : Stable
|
|
1547
|
|
1548 =cut
|
|
1549
|
|
1550 sub display_id {
|
|
1551 my $self = shift;
|
|
1552 return $self->{'stable_id'} || $self->dbID || '';
|
|
1553 }
|
|
1554
|
|
1555
|
|
1556 =head2 load
|
|
1557
|
|
1558 Args : None
|
|
1559 Example : $exon->load();
|
|
1560 Description : The Ensembl API makes extensive use of
|
|
1561 lazy-loading. Under some circumstances (e.g.,
|
|
1562 when copying genes between databases), all data of
|
|
1563 an object needs to be fully loaded. This method
|
|
1564 loads the parts of the object that are usually
|
|
1565 lazy-loaded.
|
|
1566 Returns : Nothing.
|
|
1567
|
|
1568 =cut
|
|
1569
|
|
1570 sub load {
|
|
1571 my ($self) = @_;
|
|
1572
|
|
1573 $self->analysis();
|
|
1574 $self->stable_id();
|
|
1575 $self->get_all_supporting_features();
|
|
1576 }
|
|
1577
|
|
1578 =head1 DEPRECATED METHODS
|
|
1579
|
|
1580 =cut
|
|
1581
|
|
1582
|
|
1583 =head2 _get_stable_entry_info
|
|
1584
|
|
1585 Description: DEPRECATED.
|
|
1586
|
|
1587 =cut
|
|
1588
|
|
1589 sub _get_stable_entry_info {
|
|
1590 my $self = shift;
|
|
1591 deprecate( "This function shouldnt be called any more" );
|
|
1592 if( !defined $self->adaptor ) {
|
|
1593 return undef;
|
|
1594 }
|
|
1595 $self->adaptor->get_stable_entry_info($self);
|
|
1596 }
|
|
1597
|
|
1598
|
|
1599 =head2 temporary_id
|
|
1600
|
|
1601 Description: DEPRECATED. This should not be necessary.
|
|
1602
|
|
1603 =cut
|
|
1604
|
|
1605 sub temporary_id {
|
|
1606 my $self = shift;
|
|
1607 deprecate('It should not be necessary to use this method.');
|
|
1608 $self->{'tempID'} = shift if(@_);
|
|
1609 return $self->{'tempID'};
|
|
1610 }
|
|
1611
|
|
1612
|
|
1613 =head2 created
|
|
1614
|
|
1615 Description: DEPRECATED. Do not use.
|
|
1616
|
|
1617 =cut
|
|
1618
|
|
1619 sub created {
|
|
1620 my ($self,$value) = @_;
|
|
1621 deprecate( "Created attribute not supported any more." );
|
|
1622 if(defined $value ) {
|
|
1623 $self->{'_created'} = $value;
|
|
1624 }
|
|
1625 return $self->{'_created'};
|
|
1626 }
|
|
1627
|
|
1628 =head2 modified
|
|
1629
|
|
1630 Description: DEPRECATED. Do not use.
|
|
1631
|
|
1632 =cut
|
|
1633
|
|
1634
|
|
1635 sub modified {
|
|
1636 my ($self,$value) = @_;
|
|
1637 deprecate( "Modified attribute not supported any more." );
|
|
1638 if( defined $value ) {
|
|
1639 $self->{'_modified'} = $value;
|
|
1640 }
|
|
1641 return $self->{'_modified'};
|
|
1642 }
|
|
1643
|
|
1644
|
|
1645 =head2 type
|
|
1646
|
|
1647 Description: DEPRECATED. Do not use.
|
|
1648
|
|
1649 =cut
|
|
1650
|
|
1651 sub type {
|
|
1652 my ($self,$value) = @_;
|
|
1653 deprecate("Type attribute not supported anymore.");
|
|
1654 if (defined($value)) {
|
|
1655 $self->{'type'} = $value;
|
|
1656 }
|
|
1657 return $self->{'type'};
|
|
1658 }
|
|
1659
|
|
1660
|
|
1661 1;
|