Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/EnsEMBL/Exon.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Exon - A class representing an Exon | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 $ex = new Bio::EnsEMBL::Exon( | |
28 -START => 100, | |
29 -END => 200, | |
30 -STRAND => 1, | |
31 -SLICE => $slice, | |
32 -DBID => $dbID, | |
33 -ANALYSIS => $analysis, | |
34 -STABLE_ID => 'ENSE000000123', | |
35 -VERSION => 2 | |
36 ); | |
37 | |
38 # seq() returns a Bio::Seq | |
39 my $seq = $exon->seq->seq(); | |
40 | |
41 # Peptide only makes sense within transcript context | |
42 my $pep = $exon->peptide($transcript)->seq(); | |
43 | |
44 # Normal feature operations can be performed: | |
45 $exon = $exon->transform('clone'); | |
46 $exon->move( $new_start, $new_end, $new_strand ); | |
47 print $exon->slice->seq_region_name(); | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 This is a class which represents an exon which is part of a transcript. | |
52 See Bio::EnsEMBL:Transcript | |
53 | |
54 =head1 METHODS | |
55 | |
56 =cut | |
57 | |
58 package Bio::EnsEMBL::Exon; | |
59 | |
60 use strict; | |
61 | |
62 use Bio::EnsEMBL::Feature; | |
63 use Bio::Seq; # exons have to have sequences... | |
64 | |
65 use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate ); | |
66 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
67 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref ); | |
68 use Bio::EnsEMBL::DBSQL::SupportingFeatureAdaptor; | |
69 | |
70 use vars qw(@ISA); | |
71 @ISA = qw(Bio::EnsEMBL::Feature); | |
72 | |
73 | |
74 =head2 new | |
75 | |
76 Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that this | |
77 feature is on. The coordinates of the created feature are | |
78 relative to the start of the slice. | |
79 Arg [-START]: The start coordinate of this feature relative to the start | |
80 of the slice it is sitting on. Coordinates start at 1 and | |
81 are inclusive. | |
82 Arg [-END] : The end coordinate of this feature relative to the start of | |
83 the slice it is sitting on. Coordinates start at 1 and are | |
84 inclusive. | |
85 Arg [-STRAND]: The orientation of this feature. Valid values are 1,-1,0. | |
86 Arg [-SEQNAME] : (optional) A seqname to be used instead of the default name | |
87 of the of the slice. Useful for features that do not have an | |
88 attached slice such as protein features. | |
89 Arg [-dbID] : (optional) internal database id | |
90 Arg [-ADAPTOR]: (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor | |
91 Arg [-PHASE] : the phase. | |
92 Arg [-END_PHASE]: the end phase | |
93 Arg [-STABLE_ID]: (optional) the stable id of the exon | |
94 Arg [-VERSION] : (optional) the version | |
95 Arg [-CREATED_DATE] : (optional) the created date | |
96 Arg [-MODIFIED_DATE]: (optional) the last midifeid date | |
97 | |
98 Example : none | |
99 Description: create an Exon object | |
100 Returntype : Bio::EnsEMBL::Exon | |
101 Exceptions : if phase is not valid (i.e. 0,1, 2 -1) | |
102 Caller : general | |
103 Status : Stable | |
104 | |
105 =cut | |
106 | |
107 sub new { | |
108 my $class = shift; | |
109 | |
110 $class = ref $class || $class; | |
111 | |
112 my $self = $class->SUPER::new( @_ ); | |
113 | |
114 my ( $phase, $end_phase, $stable_id, $version, $created_date, | |
115 $modified_date, $is_current, $is_constitutive ) | |
116 = rearrange( [ | |
117 "PHASE", "END_PHASE", | |
118 "STABLE_ID", "VERSION", | |
119 "CREATED_DATE", "MODIFIED_DATE", | |
120 "IS_CURRENT", "IS_CONSTITUTIVE" | |
121 ], | |
122 @_ | |
123 ); | |
124 | |
125 if ( defined($phase) ) { # make sure phase is valid. | |
126 $self->phase($phase); | |
127 } | |
128 | |
129 $self->{'end_phase'} = $end_phase; | |
130 $self->{'stable_id'} = $stable_id; | |
131 $self->{'version'} = $version; | |
132 $self->{'created_date'} = $created_date; | |
133 $self->{'modified_date'} = $modified_date; | |
134 | |
135 # Default is_current | |
136 if ( !defined($is_current) ) { $is_current = 1 } | |
137 $self->{'is_current'} = $is_current; | |
138 | |
139 # Default is_constitutive | |
140 if ( !defined($is_constitutive) ) { $is_constitutive = 0 } | |
141 $self->{'is_constitutive'} = $is_constitutive; | |
142 | |
143 return $self; | |
144 } | |
145 | |
146 | |
147 # =head2 new_fast | |
148 | |
149 # Arg [1] : Bio::EnsEMBL::Slice $slice | |
150 # Arg [2] : int $start | |
151 # Arg [3] : int $end | |
152 # Arg [4] : int $strand (1 or -1) | |
153 # Example : none | |
154 # Description: create an Exon object | |
155 # Returntype : Bio::EnsEMBL::Exon | |
156 # Exceptions : throws if end < start | |
157 # Caller : general | |
158 # Status : Stable | |
159 | |
160 # =cut | |
161 | |
162 # sub new_fast { | |
163 # my ($class, $slice, $start, $end, $strand) = @_; | |
164 | |
165 # my $self = bless {}, $class; | |
166 | |
167 # # Swap start and end if they're in the wrong order | |
168 # # We assume that the strand is correct and keep the input value. | |
169 | |
170 # if ($start > $end) { | |
171 # throw( "End smaller than start not allowed" ); | |
172 # } | |
173 | |
174 # $self->start ($start); | |
175 # $self->end ($end); | |
176 # $self->strand($strand); | |
177 # $self->slice($slice); | |
178 | |
179 # return $self; | |
180 # } | |
181 | |
182 | |
183 =head2 end_phase | |
184 | |
185 Arg [1] : (optional) int $end_phase | |
186 Example : $end_phase = $feat->end_phase; | |
187 Description: Gets/Sets the end phase of the exon. | |
188 end_phase = number of bases from the last incomplete codon of | |
189 this exon. | |
190 Usually, end_phase = (phase + exon_length)%3 | |
191 but end_phase could be -1 if the exon is half-coding and its 3 | |
192 prime end is UTR. | |
193 Returntype : int | |
194 Exceptions : warning if end_phase is called without an argument and the | |
195 value is not set. | |
196 Caller : general | |
197 Status : Stable | |
198 | |
199 =cut | |
200 | |
201 sub end_phase { | |
202 my $self = shift; | |
203 if (@_) { | |
204 $self->{'end_phase'} = shift; | |
205 } | |
206 else { | |
207 if ( !defined( $self->{'end_phase'} ) ) { | |
208 warning("No end phase set in Exon. You must set it explicitly."); | |
209 } | |
210 } | |
211 return $self->{'end_phase'}; | |
212 } | |
213 | |
214 | |
215 =head2 phase | |
216 | |
217 Arg [1] : (optional) int $phase | |
218 Example : my $phase = $exon->phase; | |
219 $exon->phase(2); | |
220 Description: Gets/Sets the phase of the exon. | |
221 Returntype : int | |
222 Exceptions : throws if phase is not (0, 1 2 or -1). | |
223 Caller : general | |
224 Status : Stable | |
225 | |
226 | |
227 Get or set the phase of the Exon, which tells the | |
228 translation machinery, which makes a peptide from | |
229 the DNA, where to start. | |
230 | |
231 The Ensembl phase convention can be thought of as | |
232 "the number of bases of the first codon which are | |
233 on the previous exon". It is therefore 0, 1 or 2 | |
234 (or -1 if the exon is non-coding). In ascii art, | |
235 with alternate codons represented by B<###> and | |
236 B<+++>: | |
237 | |
238 Previous Exon Intron This Exon | |
239 ...------------- -------------... | |
240 | |
241 5' Phase 3' | |
242 ...#+++###+++### 0 +++###+++###+... | |
243 ...+++###+++###+ 1 ++###+++###++... | |
244 ...++###+++###++ 2 +###+++###+++... | |
245 | |
246 Here is another explanation from Ewan: | |
247 | |
248 Phase means the place where the intron lands | |
249 inside the codon - 0 between codons, 1 between | |
250 the 1st and second base, 2 between the second and | |
251 3rd base. Exons therefore have a start phase and | |
252 a end phase, but introns have just one phase. | |
253 | |
254 =cut | |
255 | |
256 sub phase { | |
257 my ($self,$value) = @_; | |
258 | |
259 if (defined($value)) { | |
260 # Value must be 0,1,2, or -1 for non-coding | |
261 if ($value =~ /^(-1|0|1|2)$/) { | |
262 #print STDERR "Setting phase to $value\n"; | |
263 $self->{'phase'} = $value; | |
264 } else { | |
265 throw("Bad value ($value) for exon phase. Should only be" . | |
266 " -1,0,1,2\n"); | |
267 } | |
268 } | |
269 return $self->{'phase'}; | |
270 } | |
271 | |
272 | |
273 =head2 frame | |
274 | |
275 Arg [1] : none | |
276 Example : $frame = $exon->frame | |
277 Description: Gets the frame of this exon | |
278 Returntype : int | |
279 Exceptions : thrown if an arg is passed | |
280 thrown if frame cannot be calculated due to a bad phase value | |
281 Caller : general | |
282 Status : Stable | |
283 | |
284 =cut | |
285 | |
286 sub frame { | |
287 my ($self,$value) = @_; | |
288 | |
289 if( defined $value ) { | |
290 throw("Cannot set frame. Deduced from seq_start and phase"); | |
291 } | |
292 | |
293 # frame is mod 3 of the translation point | |
294 | |
295 if( $self->phase == -1 ) { | |
296 return '.'; # gff convention for no frame info | |
297 } | |
298 if( $self->phase == 0 ) { | |
299 return $self->start%3; | |
300 } | |
301 | |
302 if( $self->phase == 1 ) { | |
303 return ($self->start+2)%3; | |
304 } | |
305 | |
306 if( $self->phase == 2 ) { | |
307 return ($self->start+1)%3; | |
308 } | |
309 | |
310 throw("bad phase in exon ".$self->phase); | |
311 | |
312 } | |
313 | |
314 | |
315 =head2 start | |
316 | |
317 Arg [1] : int $start (optional) | |
318 Example : $start = $exon->start(); | |
319 Description: Getter/Setter for the start of this exon. The superclass | |
320 implmentation is overridden to flush the internal sequence | |
321 cache if this value is altered | |
322 Returntype : int | |
323 Exceptions : none | |
324 Caller : general | |
325 Status : Stable | |
326 | |
327 =cut | |
328 | |
329 sub start { | |
330 my $self = shift; | |
331 # if an arg was provided, flush the internal sequence cache | |
332 delete $self->{'_seq_cache'} if(@_); | |
333 return $self->SUPER::start(@_); | |
334 } | |
335 | |
336 | |
337 =head2 end | |
338 | |
339 Arg [1] : int $end (optional) | |
340 Example : $end = $exon->end(); | |
341 Description: Getter/Setter for the end of this exon. The superclass | |
342 implmentation is overridden to flush the internal sequence | |
343 cache if this value is altered | |
344 Returntype : int | |
345 Exceptions : none | |
346 Caller : general | |
347 Status : Stable | |
348 | |
349 =cut | |
350 | |
351 sub end { | |
352 my $self = shift; | |
353 # if an arg was provided, flush the internal sequence cache | |
354 delete $self->{'_seq_cache'} if(@_); | |
355 return $self->SUPER::end(@_); | |
356 } | |
357 | |
358 | |
359 =head2 strand | |
360 | |
361 Arg [1] : int $strand (optional) | |
362 Example : $start = $exon->strand(); | |
363 Description: Getter/Setter for the strand of this exon. The superclass | |
364 implmentation is overridden to flush the internal sequence | |
365 cache if this value is altered | |
366 Returntype : int | |
367 Exceptions : none | |
368 Caller : general | |
369 Status : Stable | |
370 | |
371 =cut | |
372 | |
373 sub strand { | |
374 my $self = shift; | |
375 # if an arg was provided, flush the internal sequence cache | |
376 delete $self->{'_seq_cache'} if(@_); | |
377 return $self->SUPER::strand(@_); | |
378 } | |
379 | |
380 =head2 cdna_start | |
381 | |
382 Arg [1] : Bio::EnsEMBL::Transcript $transcript | |
383 The transcript for which cDNA coordinates should be | |
384 relative to. | |
385 Example : $cdna_start = $exon->cdna_start($transcript); | |
386 Description : Returns the start position of the exon in cDNA | |
387 coordinates. | |
388 Since an exon may be part of one or more transcripts, | |
389 the relevant transcript must be given as argument to | |
390 this method. | |
391 Return type : Integer | |
392 Exceptions : Throws if the given argument is not a transcript. | |
393 Throws if the first part of the exon maps into a gap. | |
394 Throws if the exon can not be mapped at all. | |
395 Caller : General | |
396 Status : Stable | |
397 | |
398 =cut | |
399 | |
400 sub cdna_start { | |
401 my ($self, $transcript) = @_; | |
402 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); | |
403 | |
404 my $id = $transcript->dbID(); | |
405 | |
406 if(defined $id && exists $self->{cdna_start}->{$id}) { | |
407 return $self->{cdna_start}->{$id}; | |
408 } | |
409 | |
410 my $cdna_start; | |
411 my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand()); | |
412 if(@coords && !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) { | |
413 $cdna_start = $coords[0]->start(); | |
414 } | |
415 elsif(@coords) { | |
416 throw "First part of exon maps into gap"; | |
417 } | |
418 else { | |
419 throw "Can not map exon"; | |
420 } | |
421 | |
422 if(defined $id) { | |
423 $self->{cdna_start}->{$id} = $cdna_start; | |
424 } | |
425 | |
426 return $cdna_start; | |
427 } ## end sub cdna_start | |
428 | |
429 =head2 cdna_end | |
430 | |
431 Arg [1] : Bio::EnsEMBL::Transcript $transcript | |
432 The transcript for which cDNA coordinates should be | |
433 relative to. | |
434 Example : $cdna_end = $exon->cdna_end($transcript); | |
435 Description : Returns the end position of the exon in cDNA | |
436 coordinates. | |
437 Since an exon may be part of one or more transcripts, | |
438 the relevant transcript must be given as argument to | |
439 this method. | |
440 Return type : Integer | |
441 Exceptions : Throws if the given argument is not a transcript. | |
442 Throws if the last part of the exon maps into a gap. | |
443 Throws if the exon can not be mapped at all. | |
444 Caller : General | |
445 Status : Stable | |
446 | |
447 =cut | |
448 | |
449 sub cdna_end { | |
450 my ($self, $transcript) = @_; | |
451 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); | |
452 | |
453 my $id = $transcript->dbID(); | |
454 | |
455 if(defined $id && exists $self->{cdna_end}->{$id}) { | |
456 return $self->{cdna_end}->{$id}; | |
457 } | |
458 | |
459 my $cdna_end; | |
460 my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand()); | |
461 if(@coords && !$coords[-1]->isa('Bio::EnsEMBL::Mapper::Gap')) { | |
462 $cdna_end = $coords[-1]->end(); | |
463 } | |
464 elsif(@coords) { | |
465 throw "Last part of exon maps into gap"; | |
466 } | |
467 else { | |
468 throw "Can not map exon"; | |
469 } | |
470 | |
471 if(defined $id) { | |
472 $self->{cdna_end}->{$id} = $cdna_end; | |
473 } | |
474 | |
475 return $cdna_end; | |
476 } ## end sub cdna_end | |
477 | |
478 =head2 cdna_coding_start | |
479 | |
480 Arg [1] : Bio::EnsEMBL::Transcript $transcript | |
481 The transcript for which cDNA coordinates should be | |
482 relative to. | |
483 Example : $cdna_coding_start = $exon->cdna_coding_start($transcript); | |
484 Description : Returns the start position of the coding region of the | |
485 exon in cDNA coordinates. Returns undef if the whole | |
486 exon is non-coding. | |
487 Since an exon may be part of one or more transcripts, | |
488 the relevant transcript must be given as argument to | |
489 this method. | |
490 Return type : Integer or undef | |
491 Exceptions : Throws if the given argument is not a transcript. | |
492 Caller : General | |
493 Status : Stable | |
494 | |
495 =cut | |
496 | |
497 sub cdna_coding_start { | |
498 my ($self, $transcript) = @_; | |
499 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); | |
500 | |
501 my $id = $transcript->dbID(); | |
502 | |
503 if(defined $id && exists $self->{cdna_coding_start}->{$id}) { | |
504 return $self->{cdna_coding_start}->{$id}; | |
505 } | |
506 | |
507 my $cdna_coding_start; | |
508 my $transcript_coding_start = $transcript->cdna_coding_start(); | |
509 if(defined $transcript_coding_start) { | |
510 my $cdna_start = $self->cdna_start($transcript); | |
511 | |
512 if ( $transcript_coding_start < $cdna_start ) { | |
513 # Coding region starts upstream of this exon... | |
514 | |
515 if ( $transcript->cdna_coding_end() < $cdna_start ) { | |
516 # ... and also ends upstream of this exon. | |
517 $cdna_coding_start = undef; | |
518 } | |
519 else { | |
520 # ... and does not end upstream of this exon. | |
521 $cdna_coding_start = $cdna_start; | |
522 } | |
523 } else { | |
524 # Coding region starts either within or downstream of this | |
525 # exon. | |
526 | |
527 if ( $transcript_coding_start <= $self->cdna_end($transcript) ) { | |
528 # Coding region starts within this exon. | |
529 $cdna_coding_start = $transcript_coding_start; | |
530 } | |
531 else { | |
532 # Coding region starts downstream of this exon. | |
533 $cdna_coding_start = undef; | |
534 } | |
535 } | |
536 } | |
537 else { | |
538 $cdna_coding_start = undef; | |
539 } | |
540 | |
541 if(defined $id) { | |
542 $self->{cdna_coding_start}->{$id} = $cdna_coding_start; | |
543 $self->{cdna_coding_end}->{$id} = undef if ! defined $cdna_coding_start; | |
544 } | |
545 | |
546 return $cdna_coding_start; | |
547 } ## end sub cdna_coding_start | |
548 | |
549 =head2 cdna_coding_end | |
550 | |
551 Arg [1] : Bio::EnsEMBL::Transcript $transcript | |
552 The transcript for which cDNA coordinates should be | |
553 relative to. | |
554 Example : $cdna_coding_end = $exon->cdna_coding_end($transcript); | |
555 Description : Returns the end position of the coding region of the | |
556 exon in cDNA coordinates. Returns undef if the whole | |
557 exon is non-coding. | |
558 Since an exon may be part of one or more transcripts, | |
559 the relevant transcript must be given as argument to | |
560 this method. | |
561 Return type : Integer or undef | |
562 Exceptions : Throws if the given argument is not a transcript. | |
563 Caller : General | |
564 Status : Stable | |
565 | |
566 =cut | |
567 | |
568 sub cdna_coding_end { | |
569 my ($self, $transcript) = @_; | |
570 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); | |
571 | |
572 my $id = $transcript->dbID(); | |
573 | |
574 if(defined $id && exists $self->{cdna_coding_end}->{$id}) { | |
575 return $self->{cdna_coding_end}->{$id}; | |
576 } | |
577 | |
578 my $cdna_coding_end; | |
579 my $transcript_coding_end = $transcript->cdna_coding_end(); | |
580 if(defined $transcript_coding_end) { | |
581 my $cdna_end = $self->cdna_end($transcript); | |
582 | |
583 if ( $transcript_coding_end > $cdna_end ) { | |
584 | |
585 # Coding region ends downstream of this exon... | |
586 if ( $transcript->cdna_coding_start() > $cdna_end ) { | |
587 # ... and also starts downstream of this exon. | |
588 $cdna_coding_end = undef; | |
589 } | |
590 else { | |
591 # ... and does not start downstream of this exon. | |
592 $cdna_coding_end = $cdna_end; | |
593 } | |
594 } | |
595 else { | |
596 # Coding region ends either within or upstream of this | |
597 # exon. | |
598 | |
599 if ( $transcript_coding_end >= $self->cdna_start($transcript) ) { | |
600 # Coding region ends within this exon. | |
601 $cdna_coding_end = $transcript_coding_end; | |
602 } | |
603 else { | |
604 # Coding region ends upstream of this exon. | |
605 $cdna_coding_end = undef; | |
606 } | |
607 } | |
608 } | |
609 else { | |
610 $cdna_coding_end = undef; | |
611 } | |
612 | |
613 if(defined $id) { | |
614 $self->{cdna_coding_end}->{$id} = $cdna_coding_end; | |
615 $self->{cdna_coding_start}->{$id} = undef if ! defined $cdna_coding_end; | |
616 } | |
617 | |
618 return $cdna_coding_end; | |
619 } ## end sub cdna_coding_end | |
620 | |
621 =head2 coding_region_start | |
622 | |
623 Arg [1] : Bio::EnsEMBL::Transcript $transcript | |
624 Example : $coding_region_start = | |
625 $exon->coding_region_start($transcript); | |
626 Description : Returns the start position of the coding region | |
627 of the exon in slice-relative coordinates on the | |
628 forward strand. Returns undef if the whole exon is | |
629 non-coding. | |
630 Since an exon may be part of one or more transcripts, | |
631 the relevant transcript must be given as argument to | |
632 this method. | |
633 Return type : Integer or undef | |
634 Exceptions : Throws if the given argument is not a transcript. | |
635 Caller : General | |
636 Status : Stable | |
637 | |
638 =cut | |
639 | |
640 # The implementation of this method is analogous to the implementation | |
641 # of cdna_coding_start(). | |
642 | |
643 sub coding_region_start { | |
644 my ($self, $transcript) = @_; | |
645 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); | |
646 | |
647 my $id = $transcript->dbID(); | |
648 | |
649 if(defined $id && exists $self->{coding_region_start}->{$id}) { | |
650 return $self->{coding_region_start}->{$id}; | |
651 } | |
652 | |
653 my $coding_region_start; | |
654 my $transcript_coding_start = $transcript->coding_region_start(); | |
655 if(defined $transcript_coding_start) { | |
656 my $start = $self->start(); | |
657 | |
658 if ( $transcript_coding_start < $start ) { | |
659 # Coding region starts upstream of this exon... | |
660 | |
661 if ( $transcript->coding_region_end() < $start ) { | |
662 # ... and also ends upstream of this exon. | |
663 $coding_region_start = undef; | |
664 } | |
665 else { | |
666 # ... and does not end upstream of this exon. | |
667 $coding_region_start = $start; | |
668 } | |
669 } | |
670 else { | |
671 # Coding region starts either within or downstream of this | |
672 # exon. | |
673 | |
674 if ( $transcript_coding_start <= $self->end() ) { | |
675 # Coding region starts within this exon. | |
676 $coding_region_start = $transcript_coding_start; | |
677 } | |
678 else { | |
679 # Coding region starts downstream of this exon. | |
680 $coding_region_start = undef; | |
681 } | |
682 } | |
683 } | |
684 else { | |
685 $coding_region_start = undef; | |
686 } | |
687 | |
688 if(defined $id) { | |
689 $self->{coding_region_start}->{$id} = $coding_region_start; | |
690 $self->{coding_region_end}->{$id} = undef if ! defined $coding_region_start; | |
691 } | |
692 | |
693 return $coding_region_start; | |
694 } ## end sub coding_region_start | |
695 | |
696 =head2 coding_region_end | |
697 | |
698 Arg [1] : Bio::EnsEMBL::Transcript $transcript | |
699 Example : $coding_region_end = | |
700 $exon->coding_region_end($transcript); | |
701 Description : Returns the end position of the coding region of | |
702 the exon in slice-relative coordinates on the | |
703 forward strand. Returns undef if the whole exon is | |
704 non-coding. | |
705 Since an exon may be part of one or more transcripts, | |
706 the relevant transcript must be given as argument to | |
707 this method. | |
708 Return type : Integer or undef | |
709 Exceptions : Throws if the given argument is not a transcript. | |
710 Caller : General | |
711 Status : Stable | |
712 | |
713 =cut | |
714 | |
715 # The implementation of this method is analogous to the implementation | |
716 # of cdna_coding_end(). | |
717 | |
718 sub coding_region_end { | |
719 my ($self, $transcript) = @_; | |
720 assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript'); | |
721 | |
722 my $id = $transcript->dbID(); | |
723 | |
724 if(defined $id && exists $self->{coding_region_end}->{$id}) { | |
725 return $self->{coding_region_end}->{$id}; | |
726 } | |
727 | |
728 my $coding_region_end; | |
729 my $transcript_coding_end = $transcript->coding_region_end(); | |
730 if(defined $transcript_coding_end) { | |
731 | |
732 my $end = $self->end(); | |
733 if($transcript_coding_end > $end) { | |
734 # Coding region ends downstream of this exon... | |
735 | |
736 if ( $transcript->coding_region_start() > $end ) { | |
737 # ... and also starts downstream of this exon. | |
738 $coding_region_end = undef; | |
739 } | |
740 else { | |
741 # ... and does not start downstream of this exon. | |
742 $coding_region_end = $end; | |
743 } | |
744 } | |
745 else { | |
746 # Coding region ends either within or upstream of this | |
747 # exon. | |
748 if ( $transcript_coding_end >= $self->start() ) { | |
749 $coding_region_end = $transcript_coding_end; | |
750 } | |
751 else { | |
752 $coding_region_end = undef; | |
753 } | |
754 } | |
755 } | |
756 else { | |
757 # This is a non-coding transcript. | |
758 $coding_region_end = undef; | |
759 } | |
760 | |
761 if(defined $id) { | |
762 $self->{coding_region_end}->{$id} = $coding_region_end; | |
763 $self->{coding_region_start}->{$id} = undef if ! defined $coding_region_end; | |
764 } | |
765 | |
766 return $coding_region_end; | |
767 } ## end sub coding_region_end | |
768 | |
769 =head2 slice | |
770 | |
771 Arg [1] : Bio::EnsEMBL::Slice | |
772 Example : $slice = $exon->slice(); | |
773 Description: Getter/Setter for the slice this exon is on. The superclass | |
774 implmentation is overridden to flush the internal sequence | |
775 cache if this value is altered | |
776 Returntype : Bio::EnsEMBL::Slice | |
777 Exceptions : none | |
778 Caller : general | |
779 Status : Stable | |
780 | |
781 =cut | |
782 | |
783 sub slice { | |
784 my ( $self, $slice ) = @_; | |
785 | |
786 if ( defined($slice) ) { | |
787 # If a new slice was provided, flush the internal sequence cache and | |
788 # transfer all supporting evidence to the new slice. | |
789 | |
790 delete $self->{'_seq_cache'}; | |
791 | |
792 if ( exists( $self->{'_supporting_evidence'} ) ) { | |
793 my @new_features; | |
794 | |
795 for my $old_feature ( @{ $self->{'_supporting_evidence'} } ) { | |
796 | |
797 my $new_feature; | |
798 | |
799 if ( defined( $old_feature->slice() ) ) { | |
800 $new_feature = $old_feature->transfer($slice); | |
801 } else { | |
802 # If the old feature does not have a slice, assume transfer is | |
803 # not necessary. | |
804 $new_feature = $old_feature; | |
805 } | |
806 | |
807 push( @new_features, $new_feature ); | |
808 } | |
809 | |
810 $self->{'_supporting_evidence'} = \@new_features; | |
811 } | |
812 | |
813 return $self->SUPER::slice($slice); | |
814 } elsif ( @_ > 1 ) { | |
815 return $self->SUPER::slice(undef); | |
816 } else { | |
817 return $self->SUPER::slice(); | |
818 } | |
819 } ## end sub slice | |
820 | |
821 =head2 equals | |
822 | |
823 Arg [1] : Bio::EnsEMBL::Exon exon | |
824 Example : if ($exonA->equals($exonB)) { ... } | |
825 Description : Compares two exons for equality. | |
826 The test for eqality goes through the following list | |
827 and terminates at the first true match: | |
828 | |
829 1. If Bio::EnsEMBL::Feature::equals() returns false, | |
830 then the exons are *not* equal. | |
831 2. If both exons have stable IDs: if these are the | |
832 same, the exons are equal, otherwise not. | |
833 3. If the exons have the same start, end, strand, and | |
834 phase, then they are equal, otherwise not. | |
835 | |
836 Return type : Boolean (0, 1) | |
837 | |
838 Exceptions : Thrown if a non-transcript is passed as the argument. | |
839 | |
840 =cut | |
841 | |
842 sub equals { | |
843 my ( $self, $exon ) = @_; | |
844 | |
845 if ( !defined($exon) ) { return 0 } | |
846 if ( $self eq $exon ) { return 1 } | |
847 | |
848 assert_ref( $exon, 'Bio::EnsEMBL::Exon' ); | |
849 | |
850 my $feature_equals = $self->SUPER::equals($exon); | |
851 if ( defined($feature_equals) && $feature_equals == 0 ) { | |
852 return 0; | |
853 } | |
854 | |
855 if ( defined( $self->stable_id() ) && defined( $exon->stable_id() ) ) | |
856 { | |
857 if ( $self->stable_id() eq $exon->stable_id() ) { | |
858 return 1; | |
859 } | |
860 else { | |
861 return 0; | |
862 } | |
863 } | |
864 | |
865 if ( $self->start() == $exon->start() && | |
866 $self->end() == $exon->end() && | |
867 $self->strand() == $exon->strand() && | |
868 $self->phase() == $exon->phase() && | |
869 $self->end_phase() == $exon->end_phase() ) | |
870 { | |
871 return 1; | |
872 } | |
873 | |
874 return 0; | |
875 } ## end sub equals | |
876 | |
877 =head2 move | |
878 | |
879 Arg [1] : int start | |
880 Arg [2] : int end | |
881 Arg [3] : (optional) int strand | |
882 Example : None | |
883 Description: Sets the start, end and strand in one call rather than in | |
884 3 seperate calls to the start(), end() and strand() methods. | |
885 This is for convenience and for speed when this needs to be | |
886 done within a tight loop. This overrides the superclass | |
887 move() method so that the internal sequence cache can be | |
888 flushed if the exon if moved. | |
889 Returntype : none | |
890 Exceptions : Thrown is invalid arguments are provided | |
891 Caller : general | |
892 Status : Stable | |
893 | |
894 =cut | |
895 | |
896 sub move { | |
897 my $self = shift; | |
898 # flush the internal sequence cache | |
899 delete $self->{'_seq_cache'}; | |
900 return $self->SUPER::move(@_); | |
901 } | |
902 | |
903 | |
904 =head2 transform | |
905 | |
906 Arg 1 : String $coordinate_system_name | |
907 Arg [2] : String $coordinate_system_version | |
908 Description: moves this exon to the given coordinate system. If this exon has | |
909 attached supporting evidence, they move as well. | |
910 Returntype : Bio::EnsEMBL::Exon | |
911 Exceptions : wrong parameters | |
912 Caller : general | |
913 Status : Stable | |
914 | |
915 =cut | |
916 | |
917 sub transform { | |
918 my $self = shift; | |
919 | |
920 # catch for old style transform calls | |
921 if( !@_ || ( ref $_[0] && | |
922 ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" )) | |
923 )) { | |
924 deprecate('Calling transform without a coord system name is deprecated.'); | |
925 return $self->_deprecated_transform(@_); | |
926 } | |
927 | |
928 my $new_exon = $self->SUPER::transform( @_ ); | |
929 if (not defined $new_exon or | |
930 $new_exon->length != $self->length) { | |
931 return undef; | |
932 } | |
933 | |
934 if( exists $self->{'_supporting_evidence'} ) { | |
935 my @new_features; | |
936 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) { | |
937 my $new_feature = $old_feature->transform( @_ ); | |
938 if (defined $new_feature) { | |
939 push( @new_features, $new_feature ); | |
940 } | |
941 } | |
942 $new_exon->{'_supporting_evidence'} = \@new_features; | |
943 } | |
944 | |
945 #dont want to share the same sequence cache | |
946 delete $new_exon->{'_seq_cache'}; | |
947 | |
948 return $new_exon; | |
949 } | |
950 | |
951 | |
952 =head2 transfer | |
953 | |
954 Arg [1] : Bio::EnsEMBL::Slice $destination_slice | |
955 Example : none | |
956 Description: Moves this Exon to given target slice coordinates. If Features | |
957 are attached they are moved as well. Returns a new exon. | |
958 Returntype : Bio::EnsEMBL::Gene | |
959 Exceptions : none | |
960 Caller : general | |
961 Status : Stable | |
962 | |
963 =cut | |
964 | |
965 sub transfer { | |
966 my $self = shift; | |
967 | |
968 my $new_exon = $self->SUPER::transfer( @_ ); | |
969 return undef unless $new_exon; | |
970 | |
971 if( exists $self->{'_supporting_evidence'} ) { | |
972 my @new_features; | |
973 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) { | |
974 my $new_feature = $old_feature->transfer( @_ ); | |
975 push( @new_features, $new_feature ); | |
976 } | |
977 $new_exon->{'_supporting_evidence'} = \@new_features; | |
978 } | |
979 | |
980 #dont want to share the same sequence cache | |
981 delete $new_exon->{'_seq_cache'}; | |
982 | |
983 return $new_exon; | |
984 } | |
985 | |
986 | |
987 =head2 add_supporting_features | |
988 | |
989 Arg [1] : Bio::EnsEMBL::Feature $feature | |
990 Example : $exon->add_supporting_features(@features); | |
991 Description: Adds a list of supporting features to this exon. | |
992 Duplicate features are not added. | |
993 If supporting features are added manually in this | |
994 way, prior to calling get_all_supporting_features then the | |
995 get_all_supporting_features call will not retrieve supporting | |
996 features from the database. | |
997 Returntype : none | |
998 Exceptions : throw if any of the features are not Feature | |
999 throw if any of the features are not in the same coordinate | |
1000 system as the exon | |
1001 Caller : general | |
1002 Status : Stable | |
1003 | |
1004 =cut | |
1005 | |
1006 sub add_supporting_features { | |
1007 my ($self,@features) = @_; | |
1008 | |
1009 return unless @features; | |
1010 | |
1011 $self->{_supporting_evidence} ||= []; | |
1012 | |
1013 # check whether this feature object has been added already | |
1014 FEATURE: foreach my $feature (@features) { | |
1015 unless($feature && $feature->isa("Bio::EnsEMBL::Feature")) { | |
1016 throw("Supporting feat [$feature] not a " . | |
1017 "Bio::EnsEMBL::Feature"); | |
1018 } | |
1019 | |
1020 if ((defined $self->slice() && defined $feature->slice())&& | |
1021 ( $self->slice()->name() ne $feature->slice()->name())){ | |
1022 throw("Supporting feat not in same coord system as exon\n" . | |
1023 "exon is attached to [".$self->slice()->name()."]\n" . | |
1024 "feat is attached to [".$feature->slice()->name()."]"); | |
1025 } | |
1026 | |
1027 foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){ | |
1028 # compare objects | |
1029 if ( $feature == $added_feature ){ | |
1030 # this feature has already been added | |
1031 next FEATURE; | |
1032 } | |
1033 } | |
1034 | |
1035 # no duplicate was found, add the feature | |
1036 push(@{$self->{_supporting_evidence}},$feature); | |
1037 } | |
1038 } | |
1039 | |
1040 | |
1041 =head2 flush_supporting_features | |
1042 | |
1043 Example : $exon->flush_supporting_features; | |
1044 Description : Removes all supporting evidence from the exon. | |
1045 Return type : (Empty) listref | |
1046 Exceptions : none | |
1047 Caller : general | |
1048 Status : Stable | |
1049 | |
1050 =cut | |
1051 | |
1052 sub flush_supporting_features { | |
1053 my $self = shift; | |
1054 $self->{'_supporting_evidence'} = []; | |
1055 } | |
1056 | |
1057 | |
1058 =head2 get_all_supporting_features | |
1059 | |
1060 Arg [1] : none | |
1061 Example : @evidence = @{$exon->get_all_supporting_features()}; | |
1062 Description: Retreives any supporting features added manually by | |
1063 calls to add_supporting_features. If no features have been | |
1064 added manually and this exon is in a database (i.e. it h | |
1065 Returntype : listreference of Bio::EnsEMBL::BaseAlignFeature objects | |
1066 Exceptions : none | |
1067 Caller : general | |
1068 Status : Stable | |
1069 | |
1070 =cut | |
1071 | |
1072 sub get_all_supporting_features { | |
1073 my $self = shift; | |
1074 | |
1075 if( !exists $self->{_supporting_evidence} ) { | |
1076 if($self->adaptor) { | |
1077 my $sfa = $self->adaptor->db->get_SupportingFeatureAdaptor(); | |
1078 $self->{_supporting_evidence} = $sfa->fetch_all_by_Exon($self); | |
1079 } | |
1080 } | |
1081 | |
1082 return $self->{_supporting_evidence} || []; | |
1083 } | |
1084 | |
1085 | |
1086 =head2 find_supporting_evidence | |
1087 | |
1088 # This method is only for genebuild backwards compatibility. | |
1089 # Avoid using it if possible | |
1090 | |
1091 Arg [1] : Bio::EnsEMBL::Feature $features | |
1092 The list of features to search for supporting (i.e. overlapping) | |
1093 evidence. | |
1094 Arg [2] : (optional) boolean $sorted | |
1095 Used to speed up the calculation of overlapping features. | |
1096 Should be set to true if the list of features is sorted in | |
1097 ascending order on their start coordinates. | |
1098 Example : $exon->find_supporting_evidence(\@features); | |
1099 Description: Looks through all the similarity features and | |
1100 stores as supporting features any feature | |
1101 that overlaps with an exon. | |
1102 Returntype : none | |
1103 Exceptions : none | |
1104 Caller : general | |
1105 Status : Medium Risk | |
1106 | |
1107 =cut | |
1108 | |
1109 sub find_supporting_evidence { | |
1110 my ($self,$features,$sorted) = @_; | |
1111 | |
1112 foreach my $f (@$features) { | |
1113 # return if we have a sorted feature array | |
1114 if ($sorted == 1 && $f->start > $self->end) { | |
1115 return; | |
1116 } | |
1117 if ($f->sub_SeqFeature) { | |
1118 my @subf = $f->sub_SeqFeature; | |
1119 | |
1120 $self->find_supporting_evidence(\@subf); | |
1121 } | |
1122 else { | |
1123 if ($f->entire_seq()->name eq $self->slice()->name) { | |
1124 if ($f->end >= $self->start && $f->start <= $self->end && $f->strand == $self->strand) { | |
1125 $self->add_supporting_features($f); | |
1126 } | |
1127 } | |
1128 } | |
1129 } | |
1130 } | |
1131 | |
1132 | |
1133 =head2 stable_id | |
1134 | |
1135 Arg [1] : string $stable_id | |
1136 Example : none | |
1137 Description: get/set for attribute stable_id | |
1138 Returntype : string | |
1139 Exceptions : none | |
1140 Caller : general | |
1141 Status : Stable | |
1142 | |
1143 =cut | |
1144 | |
1145 sub stable_id { | |
1146 my $self = shift; | |
1147 $self->{'stable_id'} = shift if( @_ ); | |
1148 return $self->{'stable_id'}; | |
1149 } | |
1150 | |
1151 | |
1152 =head2 created_date | |
1153 | |
1154 Arg [1] : string $created_date | |
1155 Example : none | |
1156 Description: get/set for attribute created_date | |
1157 Returntype : string | |
1158 Exceptions : none | |
1159 Caller : general | |
1160 Status : Stable | |
1161 | |
1162 =cut | |
1163 | |
1164 sub created_date { | |
1165 my $self = shift; | |
1166 $self->{'created_date'} = shift if ( @_ ); | |
1167 return $self->{'created_date'}; | |
1168 } | |
1169 | |
1170 | |
1171 =head2 modified_date | |
1172 | |
1173 Arg [1] : string $modified_date | |
1174 Example : none | |
1175 Description: get/set for attribute modified_date | |
1176 Returntype : string | |
1177 Exceptions : none | |
1178 Caller : general | |
1179 Status : Stable | |
1180 | |
1181 =cut | |
1182 | |
1183 sub modified_date { | |
1184 my $self = shift; | |
1185 $self->{'modified_date'} = shift if ( @_ ); | |
1186 return $self->{'modified_date'}; | |
1187 } | |
1188 | |
1189 | |
1190 =head2 version | |
1191 | |
1192 Arg [1] : string $version | |
1193 Example : none | |
1194 Description: get/set for attribute version | |
1195 Returntype : string | |
1196 Exceptions : none | |
1197 Caller : general | |
1198 Status : Stable | |
1199 | |
1200 =cut | |
1201 | |
1202 sub version { | |
1203 my $self = shift; | |
1204 $self->{'version'} = shift if( @_ ); | |
1205 return $self->{'version'}; | |
1206 } | |
1207 | |
1208 | |
1209 =head2 is_current | |
1210 | |
1211 Arg [1] : Boolean $is_current | |
1212 Example : $exon->is_current(1) | |
1213 Description: Getter/setter for is_current state of this exon. | |
1214 Returntype : Int | |
1215 Exceptions : none | |
1216 Caller : general | |
1217 Status : Stable | |
1218 | |
1219 =cut | |
1220 | |
1221 sub is_current { | |
1222 my ( $self, $value ) = @_; | |
1223 | |
1224 if ( defined($value) ) { | |
1225 $self->{'is_current'} = $value; | |
1226 } | |
1227 return $self->{'is_current'}; | |
1228 } | |
1229 | |
1230 =head2 is_constitutive | |
1231 | |
1232 Arg [1] : Boolean $is_constitutive | |
1233 Example : $exon->is_constitutive(0) | |
1234 Description: Getter/setter for is_constitutive state of this exon. | |
1235 Returntype : Int | |
1236 Exceptions : none | |
1237 Caller : general | |
1238 Status : Stable | |
1239 | |
1240 =cut | |
1241 | |
1242 sub is_constitutive { | |
1243 my ( $self, $value ) = @_; | |
1244 | |
1245 if ( defined($value) ) { | |
1246 $self->{'is_constitutive'} = $value; | |
1247 } | |
1248 return $self->{'is_constitutive'}; | |
1249 } | |
1250 | |
1251 | |
1252 =head2 adjust_start_end | |
1253 | |
1254 Arg 1 : int $start_adjustment | |
1255 Arg 2 : int $end_adjustment | |
1256 Example : none | |
1257 Description: returns a new Exon with this much shifted coordinates | |
1258 Returntype : Bio::EnsEMBL::Exon | |
1259 Exceptions : none | |
1260 Caller : Transcript->get_all_translateable_Exons() | |
1261 Status : Stable | |
1262 | |
1263 =cut | |
1264 | |
1265 sub adjust_start_end { | |
1266 my ( $self, $start_adjust, $end_adjust ) = @_; | |
1267 | |
1268 my $new_exon = Bio::EnsEMBL::Exon->new(); | |
1269 %{$new_exon} = %{$self}; | |
1270 | |
1271 #invalidate the sequence cache | |
1272 delete $new_exon->{'_seq_cache'}; | |
1273 | |
1274 if( $self->strand() == 1 ) { | |
1275 $new_exon->start( $self->start() + $start_adjust ); | |
1276 $new_exon->end( $self->end() + $end_adjust ) | |
1277 } else { | |
1278 $new_exon->start( $self->start() - $end_adjust ); | |
1279 $new_exon->end( $self->end() - $start_adjust ) | |
1280 } | |
1281 | |
1282 return $new_exon; | |
1283 } | |
1284 | |
1285 | |
1286 =head2 peptide | |
1287 | |
1288 Arg [1] : Bio::EnsEMBL::Transcript $tr | |
1289 Example : my $pep_str = $exon->peptide($transcript)->seq; | |
1290 Description: Retrieves the portion of the transcripts peptide | |
1291 encoded by this exon. The transcript argument is necessary | |
1292 because outside of the context of a transcript it is not | |
1293 possible to correctly determine the translation. Note that | |
1294 an entire amino acid will be present at the exon boundaries | |
1295 even if only a partial codon is present. Therefore the | |
1296 concatenation of all of the peptides of a transcripts exons | |
1297 is not the same as a transcripts translation because the | |
1298 summation may contain duplicated amino acids at splice sites. | |
1299 In the case that this exon is entirely UTR, a Bio::Seq object | |
1300 with an empty sequence string is returned. | |
1301 Returntype : Bio::Seq | |
1302 Exceptions : thrown if transcript argument is not provided | |
1303 Caller : general | |
1304 Status : Stable | |
1305 | |
1306 =cut | |
1307 | |
1308 sub peptide { | |
1309 my $self = shift; | |
1310 my $tr = shift; | |
1311 | |
1312 unless($tr && ref($tr) && $tr->isa('Bio::EnsEMBL::Transcript')) { | |
1313 throw("transcript arg must be Bio::EnsEMBL:::Transcript not [$tr]"); | |
1314 } | |
1315 | |
1316 #convert exons coordinates to peptide coordinates | |
1317 my $tmp_exon = $self->transfer($tr->slice); | |
1318 if (!$tmp_exon) { | |
1319 throw("Couldn't transfer exon to transcript's slice"); | |
1320 } | |
1321 | |
1322 my @coords = | |
1323 $tr->genomic2pep($tmp_exon->start, $tmp_exon->end, $tmp_exon->strand); | |
1324 | |
1325 #filter out gaps | |
1326 @coords = grep {$_->isa('Bio::EnsEMBL::Mapper::Coordinate')} @coords; | |
1327 | |
1328 #if this is UTR then the peptide will be empty string | |
1329 my $pep_str = ''; | |
1330 | |
1331 | |
1332 if(scalar(@coords) > 1) { | |
1333 my $coord = $self->_merge_ajoining_coords(\@coords); | |
1334 if($coord) { | |
1335 @coords = ($coord); | |
1336 } | |
1337 else { | |
1338 my ($e_id, $tr_id) = ($self->stable_id(), $tr->stable_id()); | |
1339 throw("Error. Exon maps to multiple locations in peptide and those". | |
1340 " locations are not continuous." . | |
1341 " Is this exon [$e_id] a member of this transcript [$tr_id]?"); | |
1342 } | |
1343 } | |
1344 elsif(scalar(@coords) == 1) { | |
1345 my $c = $coords[0]; | |
1346 my $pep = $tr->translate; | |
1347 | |
1348 #bioperl doesn't give back residues for incomplete codons | |
1349 #make sure we don't subseq too far... | |
1350 my ($start, $end); | |
1351 $end = ($c->end > $pep->length) ? $pep->length : $c->end; | |
1352 $start = ($c->start < $end) ? $c->start : $end; | |
1353 $pep_str = $tr->translate->subseq($start, $end); | |
1354 } | |
1355 | |
1356 return | |
1357 Bio::Seq->new( -seq => $pep_str, | |
1358 -moltype => 'protein', | |
1359 -alphabet => 'protein', | |
1360 -id => $self->display_id ); | |
1361 } | |
1362 | |
1363 =head2 _merge_ajoining_coords | |
1364 | |
1365 Arg [1] : ArrayRef of Bio::EnsEMBL::Mapper::Coordinate objects | |
1366 Example : | |
1367 Description : Merges coords which are ajoining or overlapping | |
1368 Returntype : Bio::EnsEMBL::Mapper::Coordinate or undef if it cannot happen | |
1369 Exceptions : Exception if the cooords cannot be condensed into one location | |
1370 Caller : internal | |
1371 Status : Development | |
1372 | |
1373 =cut | |
1374 | |
1375 sub _merge_ajoining_coords { | |
1376 my ($self, $coords) = @_; | |
1377 | |
1378 my $okay = 1; | |
1379 my $coord = shift @{$coords}; | |
1380 my $start = $coord->start(); | |
1381 my $last_end = $coord->end(); | |
1382 foreach my $other_coord (@{$coords}) { | |
1383 if( ($last_end + 1) >= $other_coord->start() ) { | |
1384 $last_end = $other_coord->end(); | |
1385 } | |
1386 else { | |
1387 $okay = 0; | |
1388 last; | |
1389 } | |
1390 } | |
1391 | |
1392 if(!$okay) { | |
1393 return; | |
1394 } | |
1395 | |
1396 my $new_coord = Bio::EnsEMBL::Mapper::Coordinate->new( | |
1397 $coord->id(), $start, $last_end, $coord->strand(), $coord->rank()); | |
1398 return $new_coord; | |
1399 } | |
1400 | |
1401 | |
1402 | |
1403 | |
1404 =head2 seq | |
1405 | |
1406 Arg [1] : none | |
1407 Example : my $seq_str = $exon->seq->seq; | |
1408 Description: Retrieves the dna sequence of this Exon. | |
1409 Returned in a Bio::Seq object. Note that the sequence may | |
1410 include UTRs (or even be entirely UTR). | |
1411 Returntype : Bio::Seq or undef | |
1412 Exceptions : warning if argument passed, | |
1413 warning if exon does not have attatched slice | |
1414 warning if exon strand is not defined (or 0) | |
1415 Caller : general | |
1416 Status : Stable | |
1417 | |
1418 =cut | |
1419 | |
1420 sub seq { | |
1421 my ( $self, $arg ) = @_; | |
1422 | |
1423 if ( defined $arg ) { | |
1424 warning("seq setting on Exon not supported currently"); | |
1425 $self->{'_seq_cache'} = $arg->seq(); | |
1426 } | |
1427 | |
1428 if ( !defined( $self->{'_seq_cache'} ) ) { | |
1429 my $seq; | |
1430 | |
1431 if ( !defined $self->slice() ) { | |
1432 warning("Cannot retrieve seq for exon without slice\n"); | |
1433 return undef; | |
1434 } | |
1435 | |
1436 if ( !$self->strand() ) { | |
1437 warning("Cannot retrieve seq for unstranded exon\n"); | |
1438 return undef; | |
1439 } | |
1440 | |
1441 if ($self->slice->is_circular() ) { | |
1442 if ( $self->slice->start > $self->slice->end) { | |
1443 # Normally exons overlapping chromosome origin will have negative feature start, but slice will be from 1 .. length | |
1444 # But in case you got an exon attached to a sub slice try this | |
1445 my $mid_point = $self->slice()->seq_region_length() - $self->slice()->start() + 1; | |
1446 my $seq1 = $self->slice()->subseq( $self->start(), $mid_point, $self->strand() ); | |
1447 | |
1448 my $seq2 = $self->slice()->subseq( $mid_point + 1, $self->end(), $self->strand() ); | |
1449 | |
1450 $seq = $self->strand() > 0 ? "$seq1$seq2" : "$seq2$seq1"; | |
1451 } elsif ( $self->start < 0 || $self->start > $self->end) { | |
1452 # Normally exons overlapping chromosome origin will be 0 based, and can have negative start | |
1453 # But if you go via sub_Slice it gives you chromosome based coordinates, i.e it will have start greater then end | |
1454 my $start_point = $self->slice->seq_region_length + $self->slice->start; | |
1455 my $mid_point = $self->slice->seq_region_length; | |
1456 my $seq1 = $self->slice->subseq( $self->start, $mid_point, $self->strand); | |
1457 my $seq2 = $self->slice->subseq(1, $self->end, $self->strand ); | |
1458 $seq = $self->strand > 0 ? "$seq1$seq2" : "$seq2$seq1"; | |
1459 } else { | |
1460 # End this is the case for genes not overlapping the origin | |
1461 $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() ); | |
1462 } | |
1463 } else { | |
1464 $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() ); | |
1465 } | |
1466 | |
1467 $self->{'_seq_cache'} = $seq; | |
1468 } ## end if ( !defined( $self->...)) | |
1469 | |
1470 return | |
1471 Bio::Seq->new( -seq => $self->{'_seq_cache'}, | |
1472 -id => $self->display_id, | |
1473 -moltype => 'dna', | |
1474 -alphabet => 'dna' ); | |
1475 } ## end sub seq | |
1476 | |
1477 | |
1478 =head2 hashkey | |
1479 | |
1480 Arg [1] : none | |
1481 Example : if(exists $hash{$exon->hashkey}) { do_something(); } | |
1482 Description: Returns a unique hashkey that can be used to uniquely identify | |
1483 this exon. Exons are considered to be identical if they share | |
1484 the same seq_region, start, end, strand, phase, end_phase. | |
1485 Note that this will consider two exons on different slices | |
1486 to be different, even if they actually are not. | |
1487 Returntype : string formatted as slice_name-start-end-strand-phase-end_phase | |
1488 Exceptions : thrown if not all the necessary attributes needed to generate | |
1489 a unique hash value are set | |
1490 set | |
1491 Caller : general | |
1492 Status : Stable | |
1493 | |
1494 =cut | |
1495 | |
1496 sub hashkey { | |
1497 my $self = shift; | |
1498 | |
1499 my $slice = $self->{'slice'}; | |
1500 my $slice_name = ($slice) ? $slice->name() : undef; | |
1501 my $start = $self->{'start'}; | |
1502 my $end = $self->{'end'}; | |
1503 my $strand = $self->{'strand'}; | |
1504 my $phase = $self->{'phase'}; | |
1505 my $end_phase = $self->{'end_phase'}; | |
1506 | |
1507 if(!defined($slice_name)) { | |
1508 throw('Slice must be set to generate correct hashkey.'); | |
1509 } | |
1510 | |
1511 if(!defined($start)) { | |
1512 warning("start attribute must be defined to generate correct hashkey."); | |
1513 } | |
1514 | |
1515 if(!defined($end)) { | |
1516 throw("end attribute must be defined to generate correct hashkey."); | |
1517 } | |
1518 | |
1519 if(!defined($strand)) { | |
1520 throw("strand attribute must be defined to generate correct hashkey."); | |
1521 } | |
1522 | |
1523 if(!defined($phase)) { | |
1524 throw("phase attribute must be defined to generate correct hashkey."); | |
1525 } | |
1526 | |
1527 if(!defined($end_phase)) { | |
1528 throw("end_phase attribute must be defined to generate correct hashkey."); | |
1529 } | |
1530 | |
1531 return "$slice_name-$start-$end-$strand-$phase-$end_phase"; | |
1532 } | |
1533 | |
1534 | |
1535 =head2 display_id | |
1536 | |
1537 Arg [1] : none | |
1538 Example : print $exons->display_id(); | |
1539 Description: This method returns a string that is considered to be | |
1540 the 'display' identifier. For exons this is (depending on | |
1541 availability and in this order) the stable Id, the dbID or an | |
1542 empty string. | |
1543 Returntype : string | |
1544 Exceptions : none | |
1545 Caller : web drawing code | |
1546 Status : Stable | |
1547 | |
1548 =cut | |
1549 | |
1550 sub display_id { | |
1551 my $self = shift; | |
1552 return $self->{'stable_id'} || $self->dbID || ''; | |
1553 } | |
1554 | |
1555 | |
1556 =head2 load | |
1557 | |
1558 Args : None | |
1559 Example : $exon->load(); | |
1560 Description : The Ensembl API makes extensive use of | |
1561 lazy-loading. Under some circumstances (e.g., | |
1562 when copying genes between databases), all data of | |
1563 an object needs to be fully loaded. This method | |
1564 loads the parts of the object that are usually | |
1565 lazy-loaded. | |
1566 Returns : Nothing. | |
1567 | |
1568 =cut | |
1569 | |
1570 sub load { | |
1571 my ($self) = @_; | |
1572 | |
1573 $self->analysis(); | |
1574 $self->stable_id(); | |
1575 $self->get_all_supporting_features(); | |
1576 } | |
1577 | |
1578 =head1 DEPRECATED METHODS | |
1579 | |
1580 =cut | |
1581 | |
1582 | |
1583 =head2 _get_stable_entry_info | |
1584 | |
1585 Description: DEPRECATED. | |
1586 | |
1587 =cut | |
1588 | |
1589 sub _get_stable_entry_info { | |
1590 my $self = shift; | |
1591 deprecate( "This function shouldnt be called any more" ); | |
1592 if( !defined $self->adaptor ) { | |
1593 return undef; | |
1594 } | |
1595 $self->adaptor->get_stable_entry_info($self); | |
1596 } | |
1597 | |
1598 | |
1599 =head2 temporary_id | |
1600 | |
1601 Description: DEPRECATED. This should not be necessary. | |
1602 | |
1603 =cut | |
1604 | |
1605 sub temporary_id { | |
1606 my $self = shift; | |
1607 deprecate('It should not be necessary to use this method.'); | |
1608 $self->{'tempID'} = shift if(@_); | |
1609 return $self->{'tempID'}; | |
1610 } | |
1611 | |
1612 | |
1613 =head2 created | |
1614 | |
1615 Description: DEPRECATED. Do not use. | |
1616 | |
1617 =cut | |
1618 | |
1619 sub created { | |
1620 my ($self,$value) = @_; | |
1621 deprecate( "Created attribute not supported any more." ); | |
1622 if(defined $value ) { | |
1623 $self->{'_created'} = $value; | |
1624 } | |
1625 return $self->{'_created'}; | |
1626 } | |
1627 | |
1628 =head2 modified | |
1629 | |
1630 Description: DEPRECATED. Do not use. | |
1631 | |
1632 =cut | |
1633 | |
1634 | |
1635 sub modified { | |
1636 my ($self,$value) = @_; | |
1637 deprecate( "Modified attribute not supported any more." ); | |
1638 if( defined $value ) { | |
1639 $self->{'_modified'} = $value; | |
1640 } | |
1641 return $self->{'_modified'}; | |
1642 } | |
1643 | |
1644 | |
1645 =head2 type | |
1646 | |
1647 Description: DEPRECATED. Do not use. | |
1648 | |
1649 =cut | |
1650 | |
1651 sub type { | |
1652 my ($self,$value) = @_; | |
1653 deprecate("Type attribute not supported anymore."); | |
1654 if (defined($value)) { | |
1655 $self->{'type'} = $value; | |
1656 } | |
1657 return $self->{'type'}; | |
1658 } | |
1659 | |
1660 | |
1661 1; |