comparison variant_effect_predictor/Bio/EnsEMBL/Transcript.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
24
25 =head1 SYNOPSIS
26
27 Creation:
28
29 my $tran = new Bio::EnsEMBL::Transcript();
30 my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
31
32 Manipulation:
33
34 # Returns an array of Exon objects
35 my @exons = @{ $tran->get_all_Exons() };
36
37 # Returns the peptide translation of the exons as a Bio::Seq
38 if ( $tran->translation() ) {
39 my $pep = $tran->translate();
40 } else {
41 print "Transcript ", $tran->stable_id(), " is non-coding\n";
42 }
43
44 =head1 DESCRIPTION
45
46 A representation of a transcript within the Ensembl system. A transcript
47 consists of a set of Exons and (possibly) a Translation which defines the
48 coding and non-coding regions of the exons.
49
50 =cut
51
52 package Bio::EnsEMBL::Transcript;
53
54 use strict;
55
56 use Bio::EnsEMBL::Feature;
57 use Bio::EnsEMBL::Intron;
58 use Bio::EnsEMBL::TranscriptMapper;
59 use Bio::EnsEMBL::Utils::TranscriptSNPs;
60 use Bio::EnsEMBL::SeqEdit;
61
62 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
63 use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );
64 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
65
66 use vars qw(@ISA);
67 @ISA = qw(Bio::EnsEMBL::Feature);
68
69
70 =head2 new
71
72 Arg [-EXONS] :
73 reference to list of Bio::EnsEMBL::Exon objects - exons which make up
74 this transcript
75 Arg [-STABLE_ID] :
76 string - the stable identifier of this transcript
77 Arg [-VERSION] :
78 int - the version of the stable identifier of this transcript
79 Arg [-EXTERNAL_NAME] :
80 string - the external database name associated with this transcript
81 Arg [-EXTERNAL_DB] :
82 string - the name of the database the external name is from
83 Arg [-EXTERNAL_STATUS]:
84 string - the status of the external identifier
85 Arg [-DISPLAY_XREF]:
86 Bio::EnsEMBL::DBEntry - The external database entry that is used
87 to label this transcript when it is displayed.
88 Arg [-CREATED_DATE]:
89 string - the date the transcript was created
90 Arg [-MODIFIED_DATE]:
91 string - the date the transcript was last modified
92 Arg [-DESCRIPTION]:
93 string - the transcipts description
94 Arg [-BIOTYPE]:
95 string - the biotype e.g. "protein_coding"
96 Arg [-STATUS]:
97 string - the transcripts status i.e. "KNOWN","NOVEL"
98 Arg [-IS_CURRENT]:
99 Boolean - specifies if this is the current version of the transcript
100 Example : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
101 Description: Constructor. Instantiates a Transcript object.
102 Returntype : Bio::EnsEMBL::Transcript
103 Exceptions : throw on bad arguments
104 Caller : general
105 Status : Stable
106
107 =cut
108
109 sub new {
110 my $proto = shift;
111
112 my $class = ref($proto) || $proto;
113
114 my $self = $class->SUPER::new(@_);
115
116 my (
117 $exons, $stable_id, $version,
118 $external_name, $external_db, $external_status,
119 $display_xref, $created_date, $modified_date,
120 $description, $biotype, $confidence,
121 $external_db_name, $status, $is_current
122 );
123
124 # Catch for old style constructor calling:
125 if ( ( @_ > 0 ) && ref( $_[0] ) ) {
126 $exons = [@_];
127 deprecate( "Transcript constructor should use named arguments.\n"
128 . "Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);\n"
129 . "instead of Bio::EnsEMBL::Transcript->new(\@exons);" );
130 } else {
131 (
132 $exons, $stable_id, $version,
133 $external_name, $external_db, $external_status,
134 $display_xref, $created_date, $modified_date,
135 $description, $biotype, $confidence,
136 $external_db_name, $status, $is_current
137 )
138 = rearrange( [
139 'EXONS', 'STABLE_ID',
140 'VERSION', 'EXTERNAL_NAME',
141 'EXTERNAL_DB', 'EXTERNAL_STATUS',
142 'DISPLAY_XREF', 'CREATED_DATE',
143 'MODIFIED_DATE', 'DESCRIPTION',
144 'BIOTYPE', 'CONFIDENCE',
145 'EXTERNAL_DB_NAME', 'STATUS',
146 'IS_CURRENT'
147 ],
148 @_
149 );
150 }
151
152 if ($exons) {
153 $self->{'_trans_exon_array'} = $exons;
154 $self->recalculate_coordinates();
155 }
156
157 $self->stable_id($stable_id);
158 $self->version($version);
159 $self->{'created_date'} = $created_date;
160 $self->{'modified_date'} = $modified_date;
161 $self->external_name($external_name) if ( defined $external_name );
162 $self->external_db($external_db) if ( defined $external_db );
163 $self->external_status($external_status)
164 if ( defined $external_status );
165 $self->display_xref($display_xref) if ( defined $display_xref );
166 $self->edits_enabled(1);
167
168 $self->description($description);
169 $self->status($confidence); # old style name
170 $self->status($status); # new style name
171 $self->biotype($biotype);
172
173 # default is_current
174 $is_current = 1 unless ( defined($is_current) );
175 $self->{'is_current'} = $is_current;
176
177 return $self;
178 } ## end sub new
179
180 =head2 get_all_DBLinks
181
182 Arg [1] : String database name (optional)
183 SQL wildcard characters (_ and %) can be used to
184 specify patterns.
185
186 Example : my @dblinks = @{ $transcript->get_all_DBLinks() };
187 my @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };
188
189 Description: Retrieves *all* related DBEntries for this
190 transcript. This includes all DBEntries that are
191 associated with the corresponding translation.
192
193 If you only want to retrieve the DBEntries associated
194 with the transcript (and not the translation) then
195 you should use the get_all_DBEntries() call instead.
196
197 Note: Each entry may be listed more than once. No
198 uniqueness checks are done. Also if you put in an
199 incorrect external database name no checks are done
200 to see if this exists, you will just get an empty
201 list.
202
203 Return type: Listref of Bio::EnsEMBL::DBEntry objects
204 Exceptions : none
205 Caller : general
206 Status : Stable
207
208 =cut
209
210 sub get_all_DBLinks {
211 my ( $self, $db_name_exp, $ex_db_type ) = @_;
212
213 my @links =
214 @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
215
216 # Add all of the transcript and translation xrefs to the return list.
217 my $translation = $self->translation();
218 if ( defined($translation) ) {
219 push( @links,
220 @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
221 );
222 }
223
224 @links = sort { _compare_xrefs() } @links;
225
226 return \@links;
227 }
228
229 =head2 get_all_xrefs
230
231 Arg [1] : String database name (optional)
232 SQL wildcard characters (_ and %) can be used to
233 specify patterns.
234
235 Example : @xrefs = @{ $transcript->get_all_xrefs() };
236 @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };
237
238 Description: Retrieves *all* related xrefs for this transcript.
239 This includes all xrefs that are associated with the
240 corresponding translation of this transcript.
241
242 If you want to retrieve the xrefs associated with
243 only the transcript (and not the translation) then
244 you should use the get_all_object_xrefs() method
245 instead.
246
247 Note: Each entry may be listed more than once. No
248 uniqueness checks are done. Also if you put in an
249 incorrect external database name no checks are done
250 to see if this exists, you will just get an empty
251 list.
252
253 NB: This method is an alias for the
254 get_all_DBLinks() method.
255
256 Return type: Listref of Bio::EnsEMBL::DBEntry objects
257
258 Status : Stable
259
260 =cut
261
262 sub get_all_xrefs {
263 my $self = shift;
264 return $self->get_all_DBLinks(@_);
265 }
266
267 =head2 get_all_DBEntries
268
269 Arg [1] : (optional) String, external database name
270
271 Arg [2] : (optional) String, external database type
272
273 Example : my @dbentries = @{ $transcript->get_all_DBEntries() };
274
275 Description: Retrieves DBEntries (xrefs) for this transcript.
276 This does *not* include the corresponding
277 translations DBEntries (see get_all_DBLinks()).
278
279 This method will attempt to lazy-load DBEntries
280 from a database if an adaptor is available and no
281 DBEntries are present on the transcript (i.e. they
282 have not already been added or loaded).
283
284 Returntype : Listref of Bio::EnsEMBL::DBEntry objects
285 Exceptions : none
286 Caller : get_all_DBLinks, TranscriptAdaptor::store
287 Status : Stable
288
289 =cut
290
291 sub get_all_DBEntries {
292 my ( $self, $ex_db_exp, $ex_db_type ) = @_;
293
294 my $cache_name = 'dbentries';
295
296 if ( defined($ex_db_exp) ) {
297 $cache_name .= $ex_db_exp;
298 }
299
300 if ( defined($ex_db_type) ) {
301 $cache_name .= $ex_db_type;
302 }
303
304 # if not cached, retrieve all of the xrefs for this transcript
305 if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
306 {
307 $self->{$cache_name} =
308 $self->adaptor()->db()->get_DBEntryAdaptor()
309 ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
310 }
311
312 $self->{$cache_name} ||= [];
313
314 return $self->{$cache_name};
315 } ## end sub get_all_DBEntries
316
317 =head2 get_all_object_xrefs
318
319 Arg [1] : (optional) String, external database name
320
321 Arg [2] : (optional) String, external_db type
322
323 Example : @oxrefs = @{ $transcript->get_all_object_xrefs() };
324
325 Description: Retrieves xrefs for this transcript. This does
326 *not* include xrefs that are associated with the
327 corresponding translations of this transcript (see
328 get_all_xrefs()).
329
330 This method will attempt to lazy-load xrefs from a
331 database if an adaptor is available and no xrefs are
332 present on the transcript (i.e. they have not already
333 been added or loaded).
334
335 NB: This method is an alias for the
336 get_all_DBentries() method.
337
338 Return type: Listref of Bio::EnsEMBL::DBEntry objects
339
340 Status : Stable
341
342 =cut
343
344 sub get_all_object_xrefs {
345 my $self = shift;
346 return $self->get_all_DBEntries(@_);
347 }
348
349 =head2 add_DBEntry
350
351 Arg [1] : Bio::EnsEMBL::DBEntry $dbe
352 The dbEntry to be added
353 Example : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
354 $transcript->add_DBEntry($dbe);
355 Description: Associates a DBEntry with this transcript. Note that adding
356 DBEntries will prevent future lazy-loading of DBEntries for this
357 gene (see get_all_DBEntries).
358 Returntype : none
359 Exceptions : thrown on incorrect argument type
360 Caller : general
361 Status : Stable
362
363 =cut
364
365 sub add_DBEntry {
366 my $self = shift;
367 my $dbe = shift;
368
369 unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
370 throw('Expected DBEntry argument');
371 }
372
373 $self->{'dbentries'} ||= [];
374 push @{$self->{'dbentries'}}, $dbe;
375 }
376
377
378 =head2 get_all_supporting_features
379
380 Example : my @evidence = @{ $transcript->get_all_supporting_features };
381 Description: Retreives any supporting features added manually by
382 calls to add_supporting_features.
383 Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
384 Exceptions : none
385 Caller : general
386 Status : Stable
387
388 =cut
389
390 sub get_all_supporting_features {
391 my $self = shift;
392
393 if( !exists $self->{_supporting_evidence} ) {
394 if($self->adaptor) {
395 my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
396 $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
397 }
398 }
399
400 return $self->{_supporting_evidence} || [];
401 }
402
403
404 =head2 add_supporting_features
405
406 Arg [1-N] : Bio::EnsEMBL::FeaturePair $feature
407 The supporting features to add
408 Example : $transcript->add_supporting_features(@features);
409 Description: Adds a list of supporting features to this Transcript.
410 The added features can be retieved by
411 get_all_supporting_features().
412 Returntype : none
413 Exceptions : throw if any of the features are not FeaturePairs
414 throw if any of the features are not in the same coordinate
415 system as the Transcript
416 Caller : general
417 Status : Stable
418
419 =cut
420
421 sub add_supporting_features {
422 my ($self, @features) = @_;
423
424 return unless @features;
425
426 $self->{_supporting_evidence} ||= [];
427
428 # check whether this feature object has been added already
429 FEATURE: foreach my $feature (@features) {
430
431 if (!defined($feature) || ref($feature) eq "ARRAY") {
432 throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
433 }
434 if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
435 print "feature = " . $feature . "\n";
436 throw("Supporting feat [$feature] not a " .
437 "Bio::EnsEMBL::FeaturePair");
438 }
439
440 if ((defined $self->slice() && defined $feature->slice())&&
441 ( $self->slice()->name() ne $feature->slice()->name())){
442 throw("Supporting feat not in same coord system as exon\n" .
443 "exon is attached to [".$self->slice()->name()."]\n" .
444 "feat is attached to [".$feature->slice()->name()."]");
445 }
446
447 foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
448 # compare objects
449 if ( $feature == $added_feature ){
450 #this feature has already been added
451 next FEATURE;
452 }
453 }
454
455 #no duplicate was found, add the feature
456 push(@{$self->{_supporting_evidence}}, $feature);
457 }
458 }
459
460
461 =head2 flush_supporting_features
462
463 Example : $transcript->flush_supporting_features;
464 Description : Removes all supporting evidence from the transcript.
465 Return type : (Empty) listref
466 Exceptions : none
467 Caller : general
468 Status : Stable
469
470 =cut
471
472 sub flush_supporting_features {
473 my $self = shift;
474 $self->{'_supporting_evidence'} = [];
475 }
476
477
478 =head2 external_db
479
480 Arg [1] : (optional) String - name of external db to set
481 Example : $transcript->external_db('HGNC');
482 Description: Getter/setter for attribute external_db. The db is the one that
483 belongs to the external_name.
484 Returntype : String
485 Exceptions : none
486 Caller : general
487 Status : Stable
488
489 =cut
490
491 sub external_db {
492 my ( $self, $ext_dbname ) = @_;
493
494 if(defined $ext_dbname) {
495 return ( $self->{'external_db'} = $ext_dbname );
496 }
497
498 if( exists $self->{'external_db'} ) {
499 return $self->{'external_db'};
500 }
501
502 my $display_xref = $self->display_xref();
503
504 if( defined $display_xref ) {
505 return $display_xref->dbname()
506 } else {
507 return undef;
508 }
509 }
510
511
512 =head2 external_status
513
514 Arg [1] : (optional) String - status of the external db
515 Example : $transcript->external_status('KNOWNXREF');
516 Description: Getter/setter for attribute external_status. The status of
517 the external db of the one that belongs to the external_name.
518 Returntype : String
519 Exceptions : none
520 Caller : general
521 Status : Stable
522
523 =cut
524
525 sub external_status {
526 my ( $self, $ext_status ) = @_;
527
528 if(defined $ext_status) {
529 return ( $self->{'external_status'} = $ext_status );
530 }
531
532 if( exists $self->{'external_status'} ) {
533 return $self->{'external_status'};
534 }
535
536 my $display_xref = $self->display_xref();
537
538 if( defined $display_xref ) {
539 return $display_xref->status()
540 } else {
541 return undef;
542 }
543 }
544
545
546 =head2 external_name
547
548 Arg [1] : (optional) String - the external name to set
549 Example : $transcript->external_name('BRCA2-001');
550 Description: Getter/setter for attribute external_name.
551 Returntype : String or undef
552 Exceptions : none
553 Caller : general
554 Status : Stable
555
556 =cut
557
558 sub external_name {
559 my ($self, $ext_name) = @_;
560
561 if(defined $ext_name) {
562 return ( $self->{'external_name'} = $ext_name );
563 }
564
565 if( exists $self->{'external_name'} ) {
566 return $self->{'external_name'};
567 }
568
569 my $display_xref = $self->display_xref();
570
571 if( defined $display_xref ) {
572 return $display_xref->display_id()
573 } else {
574 return undef;
575 }
576 }
577
578
579 =head2 is_known
580
581 Example : print "Transcript ".$transcript->stable_id." is KNOWN\n" if
582 $transcript->is_known;
583 Description: Returns TRUE if this gene has a status of 'KNOWN'
584 Returntype : TRUE if known, FALSE otherwise
585 Exceptions : none
586 Caller : general
587 Status : Stable
588
589 =cut
590
591 sub is_known {
592 my $self = shift;
593 return ( $self->{'status'} eq "KNOWN" || $self->{'status'} eq "KNOWN_BY_PROJECTION" );
594 }
595
596
597 =head2 status
598
599 Arg [1] : string $status
600 Example : none
601 Description: get/set for attribute status
602 Returntype : string
603 Exceptions : none
604 Caller : general
605 Status : Medium Risk
606
607 =cut
608
609 sub status {
610 my $self = shift;
611 $self->{'status'} = shift if( @_ );
612 return $self->{'status'};
613 }
614
615 =head2 biotype
616
617 Arg [1] : string $biotype
618 Example : none
619 Description: get/set for attribute biotype
620 Returntype : string
621 Exceptions : none
622 Caller : general
623 Status : Stable
624
625 =cut
626
627 sub biotype {
628 my $self = shift;
629 $self->{'biotype'} = shift if( @_ );
630 return ( $self->{'biotype'} || "protein_coding" );
631 }
632
633
634 =head2 display_xref
635
636 Arg [1] : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
637 Example : $transcript->display_xref($db_entry);
638 Description: Getter/setter for display_xref for this transcript.
639 Returntype : Bio::EnsEMBL::DBEntry
640 Exceptions : none
641 Caller : general
642 Status : Stable
643
644 =cut
645
646 sub display_xref {
647 my $self = shift;
648 $self->{'display_xref'} = shift if(@_);
649 return $self->{'display_xref'};
650 }
651
652 =head2 is_canonical
653
654 Args [1] : (optional) Boolean is_canonical
655
656 Example : if ($transcript->is_canonical()) { ... }
657
658 Description : Returns true (non-zero) if the transcript is the
659 canonical transcript of its gene, false (0) if not. If the code
660 returns an undefined it is because its state is not currently
661 known. Internally the code will consult the database for this
662 value if it is unknown and the transcript has a dbID and an
663 attached adaptor
664
665 Return type : Boolean
666
667 Status : Stable
668
669 =cut
670
671 sub is_canonical {
672 my ( $self, $value ) = @_;
673
674 #Shortcut call
675 return $self->{is_canonical} if defined $self->{is_canonical};
676
677 if ( defined($value) ) {
678 $self->{is_canonical} = ( $value ? 1 : 0 );
679 }
680 else {
681 if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
682 $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
683 }
684 }
685
686 return $self->{is_canonical};
687 }
688
689 =head2 translation
690
691 Args : None
692 Example : if ( $transcript->translation() ) {
693 print( $transcript->translation()->stable_id(), "\n" );
694 } else {
695 print("Pseudogene\n");
696 }
697 Description: Getter/setter for the Translation object which
698 defines the CDS (and as a result the peptide encoded
699 by) this transcript. This function will return
700 undef if this transcript is a pseudogene, i.e. a
701 non-translating transcript such as an ncRNA. This
702 is the accepted method of determining whether a
703 transcript is a pseudogene or not.
704 Returntype : Bio::EnsEMBL::Translation
705 Exceptions : none
706 Caller : general
707 Status : Stable
708
709 =cut
710
711 sub translation {
712 my ( $self, $translation ) = @_;
713
714 if ( defined($translation) ) {
715 assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
716
717 $self->{'translation'} = $translation;
718 $translation->transcript($self);
719
720 $self->{'cdna_coding_start'} = undef;
721 $self->{'cdna_coding_end'} = undef;
722
723 $self->{'coding_region_start'} = undef;
724 $self->{'coding_region_end'} = undef;
725
726 $self->{'transcript_mapper'} = undef;
727
728 } elsif ( @_ > 1 ) {
729 if ( defined( $self->{'translation'} ) ) {
730 # Removing existing translation
731
732 $self->{'translation'}->transcript(undef);
733 delete( $self->{'translation'} );
734
735 $self->{'cdna_coding_start'} = undef;
736 $self->{'cdna_coding_end'} = undef;
737
738 $self->{'coding_region_start'} = undef;
739 $self->{'coding_region_end'} = undef;
740
741 $self->{'transcript_mapper'} = undef;
742 }
743 } elsif ( !exists( $self->{'translation'} )
744 && defined( $self->adaptor() ) )
745 {
746 $self->{'translation'} =
747 $self->adaptor()->db()->get_TranslationAdaptor()
748 ->fetch_by_Transcript($self);
749 }
750
751 return $self->{'translation'};
752 } ## end sub translation
753
754 =head2 get_all_alternative_translations
755
756 Args : None
757 Example :
758
759 my @alt_translations =
760 @{ $transcript->get_all_alternative_translations() };
761
762 Description: Fetches all alternative translations defined for this
763 transcript. The canonical translation is not returned.
764
765 Returntype : Arrayref to Bio::EnsEMBL::Translation
766 Exceptions : None
767 Caller : General
768 Status : Stable
769
770 =cut
771
772 sub get_all_alternative_translations {
773 my ($self) = @_;
774
775 if ( !exists( $self->{'alternative_translations'} )
776 && defined( $self->adaptor() ) )
777 {
778 my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
779 my @translations =
780 @{ $pa->fetch_all_alternative_by_Transcript($self) };
781
782 $self->{'alternative_translations'} = \@translations;
783 }
784
785 return $self->{'alternative_translations'};
786 }
787
788 =head2 add_alternative_translation
789
790 Args : Bio::EnsEMBL::Translation $translation
791 Example :
792
793 $transcript->add_alternative_translation($translation);
794
795 Description: Adds an alternative translation to this transcript.
796 Returntype : None
797 Exceptions : None
798 Caller : General
799 Status : Stable
800
801 =cut
802
803 sub add_alternative_translation {
804 my ( $self, $translation ) = @_;
805
806 if ( !( defined($translation)
807 && ref($translation)
808 && $translation->isa('Bio::EnsEMBL::Translation') ) )
809 {
810 throw("Bio::EnsEMBL::Translation argument expected.");
811 }
812
813 # Load the existsing alternative translations from the database if
814 # they haven't already been loaded.
815 $self->get_all_alternative_translations();
816
817 push( @{ $self->{'alternative_translations'} }, $translation );
818 }
819
820 =head2 spliced_seq
821
822 Args : none
823 Example : none
824 Description: Retrieves all Exon sequences and concats them together.
825 No phase padding magic is done, even if phases do not align.
826 Returntype : Text
827 Exceptions : none
828 Caller : general
829 Status : Stable
830
831 =cut
832
833 sub spliced_seq {
834 my ( $self ) = @_;
835
836 my $seq_string = "";
837 for my $ex ( @{$self->get_all_Exons()} ) {
838 my $seq = $ex->seq();
839
840 if(!$seq) {
841 warning("Could not obtain seq for exon. Transcript sequence may not " .
842 "be correct.");
843 $seq_string .= 'N' x $ex->length();
844 } else {
845 $seq_string .= $seq->seq();
846 }
847 }
848
849 # apply post transcriptional edits
850 if($self->edits_enabled()) {
851 my @seqeds = @{$self->get_all_SeqEdits()};
852
853 # sort edits in reverse order to remove complication of
854 # adjusting downstream edits
855 @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
856
857 foreach my $se (@seqeds) {
858 $se->apply_edit(\$seq_string);
859 }
860 }
861
862 return $seq_string;
863 }
864
865
866 =head2 translateable_seq
867
868 Args : none
869 Example : print $transcript->translateable_seq(), "\n";
870 Description: Returns a sequence string which is the the translateable part
871 of the transcripts sequence. This is formed by splicing all
872 Exon sequences together and apply all defined RNA edits.
873 Then the coding part of the sequence is extracted and returned.
874 The code will not support monkey exons any more. If you want to
875 have non phase matching exons, defined appropriate _rna_edit
876 attributes!
877
878 An empty string is returned if this transcript is a pseudogene
879 (i.e. is non-translateable).
880 Returntype : Text
881 Exceptions : none
882 Caller : general
883 Status : Stable
884
885 =cut
886
887 sub translateable_seq {
888 my ( $self ) = @_;
889
890 if ( !$self->translation() ) {
891 return '';
892 }
893
894 my $mrna = $self->spliced_seq();
895
896 my $start = $self->cdna_coding_start();
897 my $end = $self->cdna_coding_end();
898
899 $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
900
901 my $start_phase = $self->translation->start_Exon->phase();
902 if( $start_phase > 0 ) {
903 $mrna = "N"x$start_phase . $mrna;
904 }
905 if( ! $start || ! $end ) {
906 return "";
907 }
908
909 return $mrna;
910 }
911
912
913 =head2 cdna_coding_start
914
915 Arg [1] : (optional) $value
916 Example : $relative_coding_start = $transcript->cdna_coding_start;
917 Description: Retrieves the position of the coding start of this transcript
918 in cdna coordinates (relative to the start of the 5prime end of
919 the transcript, excluding introns, including utrs).
920
921 This will return undef if this is a pseudogene (i.e. a
922 transcript with no translation).
923 Returntype : int
924 Exceptions : none
925 Caller : five_prime_utr, get_all_snps, general
926 Status : Stable
927
928 =cut
929
930 sub cdna_coding_start {
931 my $self = shift;
932
933 if( @_ ) {
934 $self->{'cdna_coding_start'} = shift;
935 }
936
937 if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
938 # calc coding start relative from the start of translation (in cdna coords)
939 my $start = 0;
940
941 my @exons = @{$self->get_all_Exons};
942 my $exon;
943
944 while($exon = shift @exons) {
945 if($exon == $self->translation->start_Exon) {
946 #add the utr portion of the start exon
947 $start += $self->translation->start;
948 last;
949 } else {
950 #add the entire length of this non-coding exon
951 $start += $exon->length;
952 }
953 }
954
955 # adjust cdna coords if sequence edits are enabled
956 if($self->edits_enabled()) {
957 my @seqeds = @{$self->get_all_SeqEdits()};
958 # sort in reverse order to avoid adjustment of downstream edits
959 @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
960
961 foreach my $se (@seqeds) {
962 # use less than start so that start of CDS can be extended
963 if($se->start() < $start) {
964 $start += $se->length_diff();
965 }
966 }
967 }
968
969 $self->{'cdna_coding_start'} = $start;
970 }
971
972 return $self->{'cdna_coding_start'};
973 }
974
975
976 =head2 cdna_coding_end
977
978 Arg [1] : (optional) $value
979 Example : $cdna_coding_end = $transcript->cdna_coding_end;
980 Description: Retrieves the end of the coding region of this transcript in
981 cdna coordinates (relative to the five prime end of the
982 transcript, excluding introns, including utrs).
983
984 This will return undef if this transcript is a pseudogene
985 (i.e. a transcript with no translation and therefor no CDS).
986 Returntype : int
987 Exceptions : none
988 Caller : general
989 Status : Stable
990
991 =cut
992
993 sub cdna_coding_end {
994 my $self = shift;
995
996 if( @_ ) {
997 $self->{'cdna_coding_end'} = shift;
998 }
999
1000 if(!defined $self->{'cdna_coding_end'} && defined $self->translation) {
1001 my @exons = @{$self->get_all_Exons};
1002
1003 my $end = 0;
1004 while(my $exon = shift @exons) {
1005 if($exon == $self->translation->end_Exon) {
1006 # add coding portion of the final coding exon
1007 $end += $self->translation->end;
1008 last;
1009 } else {
1010 # add entire exon
1011 $end += $exon->length;
1012 }
1013 }
1014
1015 # adjust cdna coords if sequence edits are enabled
1016 if($self->edits_enabled()) {
1017 my @seqeds = @{$self->get_all_SeqEdits()};
1018 # sort in reverse order to avoid adjustment of downstream edits
1019 @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
1020
1021 foreach my $se (@seqeds) {
1022 # use less than or equal to end+1 so end of the CDS can be extended
1023 if($se->start() <= $end + 1) {
1024 $end += $se->length_diff();
1025 }
1026 }
1027 }
1028
1029 $self->{'cdna_coding_end'} = $end;
1030 }
1031
1032 return $self->{'cdna_coding_end'};
1033 }
1034
1035
1036 =head2 coding_region_start
1037
1038 Arg [1] : (optional) $value
1039 Example : $coding_region_start = $transcript->coding_region_start
1040 Description: Retrieves the start of the coding region of this transcript
1041 in genomic coordinates (i.e. in either slice or contig coords).
1042 By convention, the coding_region_start is always lower than
1043 the value returned by the coding_end method.
1044 The value returned by this function is NOT the biological
1045 coding start since on the reverse strand the biological coding
1046 start would be the higher genomic value.
1047
1048 This function will return undef if this is a pseudogene
1049 (a non-translated transcript).
1050 Returntype : int
1051 Exceptions : none
1052 Caller : general
1053 Status : Stable
1054
1055 =cut
1056
1057 sub coding_region_start {
1058 my ($self, $value) = @_;
1059
1060 if( defined $value ) {
1061 $self->{'coding_region_start'} = $value;
1062 } elsif(!defined $self->{'coding_region_start'} &&
1063 defined $self->translation) {
1064 #calculate the coding start from the translation
1065 my $start;
1066 my $strand = $self->translation()->start_Exon->strand();
1067 if( $strand == 1 ) {
1068 $start = $self->translation()->start_Exon->start();
1069 $start += ( $self->translation()->start() - 1 );
1070 } else {
1071 $start = $self->translation()->end_Exon->end();
1072 $start -= ( $self->translation()->end() - 1 );
1073 }
1074 $self->{'coding_region_start'} = $start;
1075 }
1076
1077 return $self->{'coding_region_start'};
1078 }
1079
1080
1081 =head2 coding_region_end
1082
1083 Arg [1] : (optional) $value
1084 Example : $coding_region_end = $transcript->coding_region_end
1085 Description: Retrieves the end of the coding region of this transcript
1086 in genomic coordinates (i.e. in either slice or contig coords).
1087 By convention, the coding_region_end is always higher than the
1088 value returned by the coding_region_start method.
1089 The value returned by this function is NOT the biological
1090 coding end since on the reverse strand the biological coding
1091 end would be the lower genomic value.
1092
1093 This function will return undef if this is a pseudogene
1094 (a non-translated transcript).
1095 Returntype : int
1096 Exceptions : none
1097 Caller : general
1098 Status : Stable
1099
1100 =cut
1101
1102 sub coding_region_end {
1103 my ($self, $value ) = @_;
1104
1105 my $strand;
1106 my $end;
1107
1108 if( defined $value ) {
1109 $self->{'coding_region_end'} = $value;
1110 } elsif( ! defined $self->{'coding_region_end'}
1111 && defined $self->translation() ) {
1112 $strand = $self->translation()->start_Exon->strand();
1113 if( $strand == 1 ) {
1114 $end = $self->translation()->end_Exon->start();
1115 $end += ( $self->translation()->end() - 1 );
1116 } else {
1117 $end = $self->translation()->start_Exon->end();
1118 $end -= ( $self->translation()->start() - 1 );
1119 }
1120 $self->{'coding_region_end'} = $end;
1121 }
1122
1123 return $self->{'coding_region_end'};
1124 }
1125
1126
1127 =head2 edits_enabled
1128
1129 Arg [1] : (optional) boolean $newval
1130 Example : $transcript->edits_enabled(1);
1131 Description: Enables/Disables the application of SeqEdits to this transcript.
1132 Edits are enabled by default, and affect the cdna/mrna
1133 sequences coordinates and the resultant translation.
1134 Returntype : boolean - the current value of the edits
1135 Exceptions : none
1136 Caller : general, cdna_coding_start, cdna_coding_end, length
1137 Status : Stable
1138
1139 =cut
1140
1141 sub edits_enabled {
1142 my ( $self, $boolean ) = @_;
1143
1144 if ( defined($boolean) ) {
1145 $self->{'edits_enabled'} = $boolean;
1146
1147 # flush cached values that will be different with/without edits
1148 $self->{'cdna_coding_start'} = undef;
1149 $self->{'cdna_coding_end'} = undef;
1150 $self->{'transcript_mapper'} = undef;
1151 }
1152
1153 return $self->{'edits_enabled'};
1154 }
1155
1156
1157 =head2 get_all_SeqEdits
1158
1159 Arg [1] : none
1160 Example : my @seqeds = @{$transcript->get_all_SeqEdits()};
1161 Description: Retrieves all post transcriptional sequence modifications for
1162 this transcript.
1163 Returntype : Bio::EnsEMBL::SeqEdit
1164 Exceptions : none
1165 Caller : spliced_seq()
1166 Status : Stable
1167
1168 =cut
1169
1170 sub get_all_SeqEdits {
1171 my $self = shift;
1172
1173 my @seqeds;
1174
1175 my $attribs = $self->get_all_Attributes('_rna_edit');
1176
1177 # convert attributes to SeqEdit objects
1178 foreach my $a (@$attribs) {
1179 push @seqeds, Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $a);
1180 }
1181
1182 return \@seqeds;
1183 }
1184
1185
1186 =head2 get_all_Attributes
1187
1188 Arg [1] : optional string $attrib_code
1189 The code of the attribute type to retrieve values for.
1190 Example : ($rna_edits) = @{$transcript->get_all_Attributes('_rna_edit')};
1191 @transc_attributes = @{$transcript->get_all_Attributes()};
1192 Description: Gets a list of Attributes of this transcript.
1193 Optionally just get Attrubutes for given code.
1194 Returntype : listref Bio::EnsEMBL::Attribute
1195 Exceptions : warning if transcript does not have attached adaptor and
1196 attempts lazy load.
1197 Caller : general
1198 Status : Stable
1199
1200 =cut
1201
1202 sub get_all_Attributes {
1203 my $self = shift;
1204 my $attrib_code = shift;
1205
1206 if( ! exists $self->{'attributes' } ) {
1207 if(!$self->adaptor() ) {
1208 return [];
1209 }
1210
1211 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
1212 $self->{'attributes'} = $attribute_adaptor->fetch_all_by_Transcript($self);
1213 }
1214
1215 if( defined $attrib_code) {
1216 my @results = grep { uc($_->code()) eq uc($attrib_code) }
1217 @{$self->{'attributes'}};
1218 return \@results;
1219 } else {
1220 return $self->{'attributes'};
1221 }
1222 }
1223
1224
1225 =head2 add_Attributes
1226
1227 Arg [1...] : Bio::EnsEMBL::Attribute $attribute
1228 You can have more Attributes as arguments, all will be added.
1229 Example : $transcript->add_Attributes($rna_edit_attribute);
1230 Description: Adds an Attribute to the Transcript. Usefull to do _rna_edits.
1231 If you add an attribute before you retrieve any from database,
1232 lazy load will be disabled.
1233 Returntype : none
1234 Exceptions : throw on incorrect arguments
1235 Caller : general
1236 Status : Stable
1237
1238 =cut
1239
1240 sub add_Attributes {
1241 my ( $self, @attribs ) = @_;
1242
1243 if ( !exists( $self->{'attributes'} ) ) {
1244 $self->{'attributes'} = [];
1245 }
1246
1247 my $seq_change = 0;
1248 foreach my $attrib (@attribs) {
1249 assert_ref( $attrib, 'Bio::EnsEMBL::Attribute' );
1250
1251 push( @{ $self->{'attributes'} }, $attrib );
1252
1253 if ( $attrib->code() eq "_rna_edit" ) {
1254 $seq_change = 1;
1255 }
1256 }
1257
1258 if ($seq_change) {
1259 my $translation = $self->translation();
1260 if ( defined($translation) ) {
1261 delete( $translation->{'seq'} );
1262 }
1263 }
1264
1265 # flush cdna coord cache b/c we may have added a SeqEdit
1266 delete( $self->{'cdna_coding_start'} );
1267 delete( $self->{'cdna_coding_end'} );
1268 delete( $self->{'transcript_mapper'} );
1269 } ## end sub add_Attributes
1270
1271
1272 =head2 add_Exon
1273
1274 Title : add_Exon
1275 Usage : $trans->add_Exon($exon)
1276 Returns : None
1277 Args [1]: Bio::EnsEMBL::Exon object to add
1278 Args [2]: rank
1279 Exceptions: throws if not a valid Bio::EnsEMBL::Exon
1280 : or exon clashes with another one
1281 Status : Stable
1282
1283 =cut
1284
1285 sub add_Exon {
1286 my ( $self, $exon, $rank ) = @_;
1287
1288 assert_ref( $exon, 'Bio::EnsEMBL::Exon' );
1289
1290 $self->{'_trans_exon_array'} ||= [];
1291
1292 if ( defined($rank) ) {
1293 $self->{'_trans_exon_array'}->[ $rank - 1 ] = $exon;
1294 return;
1295 }
1296
1297 my $was_added = 0;
1298
1299 my $ea = $self->{'_trans_exon_array'};
1300
1301 if ( @{$ea} ) {
1302 if ( $exon->strand() == 1 ) {
1303
1304 my $exon_start = $exon->start();
1305
1306 if ( $exon_start > $ea->[-1]->end() ) {
1307 push( @{$ea}, $exon );
1308 $was_added = 1;
1309 } else {
1310 # Insert it at correct place
1311
1312 my $i = 0;
1313 foreach my $e ( @{$ea} ) {
1314 if ( $exon_start < $e->start() ) {
1315 if ( $exon->end() >= $e->start() ) {
1316 # Overlap
1317 last;
1318 }
1319 if ( $i and $exon_start <= $ea->[$i-1]->end() ) {
1320 # Overlap
1321 last;
1322 }
1323 splice( @{$ea}, $i, 0, $exon );
1324 $was_added = 1;
1325 last;
1326 }
1327 ++$i;
1328 }
1329
1330 }
1331
1332 } else {
1333
1334 my $exon_end = $exon->end();
1335
1336 if ( $exon_end < $ea->[-1]->start() ) {
1337 push( @{$ea}, $exon );
1338 $was_added = 1;
1339 } else {
1340 # Insert it at correct place
1341
1342 my $i = 0;
1343 foreach my $e ( @{$ea} ) {
1344 if ( $exon_end > $e->end() ) {
1345 if ( $exon->start() <= $e->end() ) {
1346 # Overlap
1347 last;
1348 }
1349 if ( $i and $exon_end >= $ea->[$i-1]->start() ) {
1350 # Overlap
1351 last;
1352 }
1353 splice( @{$ea}, $i, 0, $exon );
1354 $was_added = 1;
1355 last;
1356 }
1357 ++$i;
1358 }
1359
1360 }
1361
1362 } ## end else [ if ( $exon->strand() ==...)]
1363 } else {
1364 push( @{$ea}, $exon );
1365 $was_added = 1;
1366 }
1367
1368 # sanity check:
1369 if ( !$was_added ) {
1370 # The exon was not added because it was overloapping with an
1371 # existing exon.
1372 my $all_str = '';
1373
1374 foreach my $e ( @{$ea} ) {
1375 $all_str .= ' '
1376 . $e->start() . '-'
1377 . $e->end() . ' ('
1378 . $e->strand() . ') '
1379 . ( $e->stable_id() || '' ) . "\n";
1380 }
1381
1382 my $cur_str = ' '
1383 . $exon->start() . '-'
1384 . $exon->end() . ' ('
1385 . $exon->strand() . ') '
1386 . ( $exon->stable_id() || '' ) . "\n";
1387
1388 throw( "Exon overlaps with other exon in same transcript.\n"
1389 . "Transcript Exons:\n$all_str\n"
1390 . "This Exon:\n$cur_str" );
1391 }
1392
1393 # recalculate start, end, slice, strand
1394 $self->recalculate_coordinates();
1395 } ## end sub add_Exon
1396
1397
1398 =head2 get_all_Exons
1399
1400 Arg [CONSTITUTIVE] : Boolean
1401 Only return constitutive exons if true (non-zero)
1402
1403 Examples : my @exons = @{ $transcript->get_all_Exons() };
1404
1405 my @exons =
1406 @{ $transcript->get_all_Exons( -constitutive => 1 ) };
1407
1408 Description: Returns an listref of the exons in this transcript
1409 in order, i.e. the first exon in the listref is the
1410 5prime most exon in the transcript. Only returns
1411 constitutive exons if the CONSTITUTIVE argument is
1412 true.
1413
1414 Returntype : listref to Bio::EnsEMBL::Exon objects
1415 Exceptions : none
1416 Caller : general
1417 Status : Stable
1418
1419 =cut
1420
1421 sub get_all_Exons {
1422 my ( $self, @args ) = @_;
1423
1424 my $constitutive;
1425 if (@args) {
1426 $constitutive = rearrange( ['CONSTITUTIVE'], @args );
1427 }
1428
1429 if (!defined( $self->{'_trans_exon_array'} )
1430 && defined( $self->adaptor() ) )
1431 {
1432 $self->{'_trans_exon_array'} =
1433 $self->adaptor()->db()->get_ExonAdaptor()
1434 ->fetch_all_by_Transcript($self);
1435 }
1436
1437 my @result;
1438 if ( defined($constitutive) && $constitutive != 0 ) {
1439 foreach my $exon ( @{ $self->{'_trans_exon_array'} } ) {
1440 if ( $exon->is_constitutive() ) {
1441 push( @result, $exon );
1442 }
1443 }
1444 } else {
1445 @result = @{ $self->{'_trans_exon_array'} };
1446 }
1447
1448 return \@result;
1449 } ## end sub get_all_Exons
1450
1451 =head2 get_all_constitutive_Exons
1452
1453 Arg : None
1454
1455 Examples : my @exons = @{ $transcript->get_all_constitutive_Exons() };
1456
1457 Description: Returns an listref of the constitutive exons in this
1458 transcript in order, i.e. the first exon in the
1459 listref is the 5prime most exon in the transcript.
1460
1461 Returntype : listref to Bio::EnsEMBL::Exon objects
1462 Exceptions : none
1463 Caller : general
1464 Status : Stable
1465
1466 =cut
1467
1468 sub get_all_constitutive_Exons {
1469 my ($self) = @_;
1470 return $self->get_all_Exons( '-constitutive' => 1 );
1471 }
1472
1473 =head2 get_all_IntronSupportingEvidence
1474
1475 Example : $ise->get_all_IntronSupportingEvidence();
1476 Description : Fetches all ISE instances linked to this Transript
1477 Returntype : ArrayRef[Bio::EnsEMBL::IntronSupportEvidence] retrieved from
1478 the DB or from those added via C<add_IntronSupportingEvidence>
1479 Exceptions : None
1480
1481 =cut
1482
1483 sub get_all_IntronSupportingEvidence {
1484 my ($self) = @_;
1485 if(! defined $self->{_ise_array} && defined $self->adaptor()) {
1486 my $isea = $self->adaptor()->db()->get_IntronSupportingEvidenceAdaptor();
1487 $self->{_ise_array} = $isea->fetch_all_by_Transcript($self);
1488 }
1489 return $self->{_ise_array};
1490 }
1491
1492
1493 =head2 add_IntronSupportingEvidence
1494
1495 Arg [1] : Bio::EnsEMBL::IntronSupportEvidence Object to add
1496 Example : $ise->add_IntronSupportingEvidence($ise);
1497 Description : Adds the IntronSupportEvidence instance to this Transcript. The
1498 code checks to see if it is a unique ISE instance
1499 Returntype : Boolean; true means it was added. False means it was not
1500 as this ISE was already attached
1501 Exceptions : None
1502
1503 =cut
1504
1505 sub add_IntronSupportingEvidence {
1506 my ($self, $ise) = @_;
1507 assert_ref($ise, 'Bio::EnsEMBL::IntronSupportingEvidence', 'IntronSupportingEvidence');
1508 my $unique = 1;
1509 foreach my $other_ise (@{$self->{_ise_array}}) {
1510 if($ise->equals($other_ise)) {
1511 $unique = 0;
1512 last;
1513 }
1514 }
1515 if($unique) {
1516 push(@{$self->{_ise_array}}, $ise);
1517 return 1;
1518 }
1519 return 0;
1520 }
1521
1522 =head2 get_all_Introns
1523
1524 Arg [1] : none
1525 Example : my @introns = @{$transcript->get_all_Introns()};
1526 Description: Returns an listref of the introns in this transcript in order.
1527 i.e. the first intron in the listref is the 5prime most exon in
1528 the transcript.
1529 Returntype : listref to Bio::EnsEMBL::Intron objects
1530 Exceptions : none
1531 Caller : general
1532 Status : Stable
1533
1534 =cut
1535
1536 sub get_all_Introns {
1537 my ($self) = @_;
1538 if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
1539 $self->{'_trans_exon_array'} = $self->adaptor()->db()->
1540 get_ExonAdaptor()->fetch_all_by_Transcript( $self );
1541 }
1542
1543 my @introns=();
1544 my @exons = @{$self->{'_trans_exon_array'}};
1545 for(my $i=0; $i < scalar(@exons)-1; $i++){
1546 my $intron = new Bio::EnsEMBL::Intron($exons[$i],$exons[$i+1]);
1547 push(@introns, $intron)
1548 }
1549 return \@introns;
1550 }
1551
1552
1553 =head2 length
1554
1555 Args : none
1556 Example : my $t_length = $transcript->length
1557 Description: Returns the sum of the length of all the exons in the transcript.
1558 Returntype : int
1559 Exceptions : none
1560 Caller : general
1561 Status : Stable
1562
1563 =cut
1564
1565 sub length {
1566 my( $self ) = @_;
1567
1568 my $length = 0;
1569 foreach my $ex (@{$self->get_all_Exons}) {
1570 $length += $ex->length;
1571 }
1572
1573 # adjust the length if post transcriptional edits are enabled
1574 if($self->edits_enabled()) {
1575 foreach my $se (@{$self->get_all_SeqEdits()}) {
1576 $length += $se->length_diff();
1577 }
1578 }
1579
1580 return $length;
1581 }
1582
1583
1584 =head2 flush_Exons
1585
1586 Arg [1] : none
1587 Example : $transcript->flush_Exons();
1588 Description: Removes all Exons from this transcript and flushes related
1589 internal caches.
1590 Returntype : none
1591 Exceptions : none
1592 Caller : general
1593 Status : Stable
1594
1595 =cut
1596
1597 sub flush_Exons {
1598 my ($self) = @_;
1599
1600 $self->{'transcript_mapper'} = undef;
1601 $self->{'coding_region_start'} = undef;
1602 $self->{'coding_region_end'} = undef;
1603 $self->{'cdna_coding_start'} = undef;
1604 $self->{'cdna_coding_end'} = undef;
1605 $self->{'start'} = undef;
1606 $self->{'end'} = undef;
1607 $self->{'strand'} = undef;
1608
1609 $self->{'_trans_exon_array'} = [];
1610 }
1611
1612 =head2 flush_IntronSupportingEvidence
1613
1614 Example : $transcript->flush_IntronSupportingEvidence();
1615 Description: Removes all IntronSupportingEvidence from this transcript
1616 Returntype : none
1617 Exceptions : none
1618 Caller : general
1619 Status : Stable
1620
1621 =cut
1622
1623 sub flush_IntronSupportingEvidence {
1624 my ($self) = @_;
1625 $self->{_ise_array} = [];
1626 return;
1627 }
1628
1629 =head2 five_prime_utr
1630
1631 Arg [1] : none
1632 Example : my $five_prime = $transcrpt->five_prime_utr
1633 or warn "No five prime UTR";
1634 Description: Obtains a Bio::Seq object of the five prime UTR of this
1635 transcript. If this transcript is a pseudogene
1636 (i.e. non-translating) or has no five prime UTR undef is
1637 returned instead.
1638 Returntype : Bio::Seq or undef
1639 Exceptions : none
1640 Caller : general
1641 Status : Stable
1642
1643 =cut
1644
1645 sub five_prime_utr {
1646 my $self = shift;
1647
1648 my $cdna_coding_start = $self->cdna_coding_start();
1649
1650 return undef if(!$cdna_coding_start);
1651
1652 my $seq = substr($self->spliced_seq, 0, $cdna_coding_start - 1);
1653
1654 return undef if(!$seq);
1655
1656 return
1657 Bio::Seq->new( -id => $self->display_id,
1658 -moltype => 'dna',
1659 -alphabet => 'dna',
1660 -seq => $seq );
1661 }
1662
1663
1664 =head2 three_prime_utr
1665
1666 Arg [1] : none
1667 Example : my $three_prime = $transcrpt->three_prime_utr
1668 or warn "No three prime UTR";
1669 Description: Obtains a Bio::Seq object of the three prime UTR of this
1670 transcript. If this transcript is a pseudogene
1671 (i.e. non-translating) or has no three prime UTR,
1672 undef is returned instead.
1673 Returntype : Bio::Seq or undef
1674 Exceptions : none
1675 Caller : general
1676 Status : Stable
1677
1678 =cut
1679
1680 sub three_prime_utr {
1681 my $self = shift;
1682
1683 my $cdna_coding_end = $self->cdna_coding_end();
1684
1685 return undef if(!$cdna_coding_end);
1686
1687 my $seq = substr($self->spliced_seq, $cdna_coding_end);
1688
1689 return undef if(!$seq);
1690
1691 return
1692 Bio::Seq->new( -id => $self->display_id,
1693 -moltype => 'dna',
1694 -alphabet => 'dna',
1695 -seq => $seq );
1696 }
1697
1698 =head2 five_prime_utr_Feature
1699
1700 Example : my $five_prime = $transcrpt->five_prime_utr_Feature
1701 or warn "No five prime UTR";
1702 Description: Returns the genomic coordinates of the start and end of the
1703 5' UTR of this transcript. Note that if you want the sequence
1704 of the 5' UTR use C<five_prime_utr> as this will return the
1705 sequence from the spliced transcript.
1706 Returntype : Bio::EnsEMBL::Feature or undef if there is no UTR
1707 Exceptions : none
1708
1709 =cut
1710
1711 sub five_prime_utr_Feature {
1712 my ($self) = @_;
1713 my ($start, $end);
1714 my $cdna_coding = $self->cdna_coding_start();
1715 my ($genomic_pos) = $self->cdna2genomic($cdna_coding, $cdna_coding);
1716 if($self->strand() == 1) {
1717 $start = $self->seq_region_start();
1718 if($start == $genomic_pos->start()) {
1719 return; # just return as we have no UTR
1720 }
1721 $end = $genomic_pos->start() - 1;
1722 }
1723 else {
1724 $end = $self->seq_region_end();
1725 if($end == $genomic_pos->start()) {
1726 return; # just return as we have no UTR
1727 }
1728 $start = $genomic_pos->start() + 1;
1729 }
1730
1731 my $feature = Bio::EnsEMBL::Feature->new(
1732 -START => $start,
1733 -END => $end,
1734 -STRAND => $self->strand(),
1735 -SLICE => $self->slice(),
1736 );
1737 return $feature;
1738 }
1739
1740 =head2 three_prime_utr_Feature
1741
1742 Example : my $five_prime = $transcrpt->three_prime_utr_Feature
1743 or warn "No three prime UTR";
1744 Description: Returns the genomic coordinates of the start and end of the
1745 3' UTR of this transcript. Note that if you want the sequence
1746 of the 3' UTR use C<three_prime_utr> as this will return the
1747 sequence from the spliced transcript.
1748 Returntype : Bio::EnsEMBL::Feature or undef if there is no UTR
1749 Exceptions : none
1750
1751 =cut
1752
1753 sub three_prime_utr_Feature {
1754 my ($self) = @_;
1755 my ($start, $end);
1756 my $cdna_coding = $self->cdna_coding_end();
1757 my ($genomic_pos) = $self->cdna2genomic($cdna_coding, $cdna_coding);
1758 if($self->strand() == 1) {
1759 $end = $self->seq_region_end();
1760 if($end == $genomic_pos->start()) {
1761 return; # just return as we have no UTR
1762 }
1763 $start = $genomic_pos->start() + 1;
1764 }
1765 else {
1766 $start = $self->seq_region_start();
1767 if($start == $genomic_pos->start()) {
1768 return; # just return as we have no UTR
1769 }
1770 $end = $genomic_pos->start() - 1;
1771 }
1772 my $feature = Bio::EnsEMBL::Feature->new(
1773 -START => $start,
1774 -END => $end,
1775 -STRAND => $self->strand(),
1776 -SLICE => $self->slice(),
1777 );
1778 return $feature;
1779 }
1780
1781
1782 =head2 get_all_translateable_Exons
1783
1784 Args : none
1785 Example : none
1786 Description: Returns a list of exons that translate with the
1787 start and end exons truncated to the CDS regions.
1788 This function does not take into account any SeqEdits
1789 (post transcriptional RNA modifictions) when constructing the
1790 the 'translateable' exons, and it does not update the phase
1791 information of the created 'translateable' exons.
1792
1793 If this transcript is a pseudogene (i.e. non-translateable)
1794 a reference to an empty list is returned.
1795
1796 Returntype : listref Bio::EnsEMBL::Exon
1797 Exceptions : throw if translation has invalid information
1798 Caller : Genebuild
1799 Status : Stable
1800
1801 =cut
1802
1803
1804 sub get_all_translateable_Exons {
1805 my ( $self ) = @_;
1806
1807 #return an empty list if there is no translation (i.e. pseudogene)
1808 my $translation = $self->translation or return [];
1809 my $start_exon = $translation->start_Exon;
1810 my $end_exon = $translation->end_Exon;
1811 my $t_start = $translation->start;
1812 my $t_end = $translation->end;
1813
1814 my( @translateable );
1815
1816 foreach my $ex (@{$self->get_all_Exons}) {
1817
1818 if ($ex ne $start_exon and ! @translateable) {
1819 next; # Not yet in translated region
1820 }
1821
1822 my $length = $ex->length;
1823
1824 my $adjust_start = 0;
1825 my $adjust_end = 0;
1826 # Adjust to translation start if this is the start exon
1827 if ($ex == $start_exon ) {
1828 if ($t_start < 1 or $t_start > $length) {
1829 warning("WARN: Translation start '$t_start' is outside exon $ex length=$length");
1830 return [];
1831 }
1832 $adjust_start = $t_start - 1;
1833 }
1834
1835 # Adjust to translation end if this is the end exon
1836 if ($ex == $end_exon) {
1837 # if ($t_end < 1 or $t_end > $length) {
1838 # throw("Translation end '$t_end' is outside exon $ex length=$length");
1839 # }
1840 $adjust_end = $t_end - $length;
1841 }
1842
1843 # Make a truncated exon if the translation start or
1844 # end causes the coordinates to be altered.
1845 if ($adjust_end || $adjust_start) {
1846 my $newex = $ex->adjust_start_end( $adjust_start, $adjust_end );
1847
1848 push( @translateable, $newex );
1849 } else {
1850 push(@translateable, $ex);
1851 }
1852
1853 # Exit the loop when we've found the last exon
1854 last if $ex eq $end_exon;
1855 }
1856 return \@translateable;
1857 }
1858
1859
1860 =head2 translate
1861
1862 Args : none
1863 Example : none
1864 Description: Return the peptide (plus eventual stop codon) for
1865 this transcript. Does N-padding of non-phase
1866 matching exons. It uses translateable_seq
1867 internally. Returns undef if this Transcript does
1868 not have a translation (i.e. pseudogene).
1869 Returntype : Bio::Seq or undef
1870 Exceptions : none
1871 Caller : general
1872 Status : Stable
1873
1874 =cut
1875
1876 sub translate {
1877 my ($self) = @_;
1878
1879 if ( !defined( $self->translation() ) ) { return undef }
1880
1881 my $mrna = $self->translateable_seq();
1882
1883 # Alternative codon tables (such as the mitochondrial codon table)
1884 # can be specified for a sequence region via the seq_region_attrib
1885 # table. A list of codon tables and their codes is at:
1886 # http://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c
1887
1888 my $codon_table_id;
1889 my ( $complete5, $complete3 );
1890 if ( defined( $self->slice() ) ) {
1891 my $attrib;
1892
1893 ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
1894 if ( defined($attrib) ) {
1895 $codon_table_id = $attrib->value();
1896 }
1897
1898 ($attrib) = @{ $self->slice()->get_all_Attributes('complete5') };
1899 if ( defined($attrib) ) {
1900 $complete5 = $attrib->value();
1901 }
1902
1903 ($attrib) = @{ $self->slice()->get_all_Attributes('complete3') };
1904 if ( defined($attrib) ) {
1905 $complete3 = $attrib->value();
1906 }
1907 }
1908 $codon_table_id ||= 1; # default vertebrate codon table
1909
1910 # Remove final stop codon from the mrna if it is present. Produced
1911 # peptides will not have '*' at end. If terminal stop codon is
1912 # desired call translatable_seq directly and produce a translation
1913 # from it.
1914
1915 if ( CORE::length($mrna) % 3 == 0 ) {
1916 my $codon_table =
1917 Bio::Tools::CodonTable->new( -id => $codon_table_id );
1918
1919 if ( $codon_table->is_ter_codon( substr( $mrna, -3, 3 ) ) ) {
1920 substr( $mrna, -3, 3, '' );
1921 }
1922 }
1923
1924 if ( CORE::length($mrna) < 1 ) { return undef }
1925
1926 my $display_id = $self->translation->display_id()
1927 || scalar( $self->translation() );
1928
1929 my $peptide = Bio::Seq->new( -seq => $mrna,
1930 -moltype => 'dna',
1931 -alphabet => 'dna',
1932 -id => $display_id );
1933
1934 my $translation =
1935 $peptide->translate( undef, undef, undef, $codon_table_id, undef,
1936 undef, $complete5, $complete3 );
1937
1938 if ( $self->edits_enabled() ) {
1939 $self->translation()->modify_translation($translation);
1940 }
1941
1942 return $translation;
1943 } ## end sub translate
1944
1945
1946 =head2 seq
1947
1948 Description: Returns a Bio::Seq object which consists of just
1949 : the sequence of the exons concatenated together,
1950 : without messing about with padding with N\'s from
1951 : Exon phases like B<dna_seq> does.
1952 Args : none
1953 Example : none
1954 Returntype : Bio::Seq
1955 Exceptions : none
1956 Caller : general
1957 Status : Stable
1958
1959 =cut
1960
1961 sub seq {
1962 my ($self) = @_;
1963
1964 return
1965 Bio::Seq->new( -id => $self->display_id,
1966 -moltype => 'dna',
1967 -alphabet => 'dna',
1968 -seq => $self->spliced_seq );
1969 }
1970
1971
1972 =head2 pep2genomic
1973
1974 Description: See Bio::EnsEMBL::TranscriptMapper::pep2genomic
1975
1976 =cut
1977
1978 sub pep2genomic {
1979 my $self = shift;
1980 return $self->get_TranscriptMapper()->pep2genomic(@_);
1981 }
1982
1983
1984 =head2 genomic2pep
1985
1986 Description: See Bio::EnsEMBL::TranscriptMapper::genomic2pep
1987
1988 =cut
1989
1990 sub genomic2pep {
1991 my $self = shift;
1992 return $self->get_TranscriptMapper()->genomic2pep(@_);
1993 }
1994
1995
1996 =head2 cdna2genomic
1997
1998 Description: See Bio::EnsEMBL::TranscriptMapper::cdna2genomic
1999
2000 =cut
2001
2002 sub cdna2genomic {
2003 my $self = shift;
2004 return $self->get_TranscriptMapper()->cdna2genomic(@_);
2005 }
2006
2007
2008 =head2 genomic2cdna
2009
2010 Description: See Bio::EnsEMBL::TranscriptMapper::genomic2cdna
2011
2012 =cut
2013
2014 sub genomic2cdna {
2015 my $self = shift;
2016 return $self->get_TranscriptMapper->genomic2cdna(@_);
2017 }
2018
2019
2020 =head2 get_TranscriptMapper
2021
2022 Args : none
2023 Example : my $trans_mapper = $transcript->get_TranscriptMapper();
2024 Description: Gets a TranscriptMapper object which can be used to perform
2025 a variety of coordinate conversions relating this transcript,
2026 genomic sequence and peptide resulting from this transcripts
2027 translation.
2028 Returntype : Bio::EnsEMBL::TranscriptMapper
2029 Exceptions : none
2030 Caller : cdna2genomic, pep2genomic, genomic2cdna, cdna2genomic
2031 Status : Stable
2032
2033 =cut
2034
2035 sub get_TranscriptMapper {
2036 my ( $self ) = @_;
2037 return $self->{'transcript_mapper'} ||=
2038 Bio::EnsEMBL::TranscriptMapper->new($self);
2039 }
2040
2041
2042 =head2 start_Exon
2043
2044 Title : start_Exon
2045 Usage : $start_exon = $transcript->start_Exon;
2046 Returntype : Bio::EnsEMBL::Exon
2047 Description : The first exon in the transcript.
2048 Args : NONE
2049 Status : Stable
2050
2051 =cut
2052
2053 sub start_Exon {
2054 my $self = shift;
2055 return $self->get_all_Exons()->[0];
2056 }
2057
2058
2059 =head2 end_Exon
2060
2061 Title : end_exon
2062 Usage : $end_exon = $transcript->end_Exon;
2063 Description : The last exon in the transcript.
2064 Returntype : Bio::EnsEMBL::Exon
2065 Args : NONE
2066 Status : Stable
2067
2068 =cut
2069
2070 sub end_Exon {
2071 my $self = shift;
2072 return $self->get_all_Exons()->[-1];
2073 }
2074
2075
2076 =head2 description
2077
2078 Title : description
2079 Usage : $obj->description($newval)
2080 Function:
2081 Returns : String
2082 Args : newvalue (optional)
2083 Status : Stable
2084
2085 =cut
2086
2087 sub description {
2088 my $self = shift;
2089 $self->{'description'} = shift if( @_ );
2090 return $self->{'description'};
2091 }
2092
2093
2094 =head2 version
2095
2096 Title : version
2097 Usage : $obj->version()
2098 Function:
2099 Returns : String
2100 Args :
2101 Status : Stable
2102
2103 =cut
2104
2105 sub version {
2106 my $self = shift;
2107 $self->{'version'} = shift if( @_ );
2108 return $self->{'version'};
2109 }
2110
2111
2112 =head2 stable_id
2113
2114 Title : stable_id
2115 Usage : $obj->stable_id
2116 Function:
2117 Returns : String
2118 Args :
2119 Status : Stable
2120
2121 =cut
2122
2123 sub stable_id {
2124 my $self = shift;
2125 $self->{'stable_id'} = shift if( @_ );
2126 return $self->{'stable_id'};
2127 }
2128
2129
2130 =head2 is_current
2131
2132 Arg [1] : Boolean $is_current
2133 Example : $transcript->is_current(1)
2134 Description: Getter/setter for is_current state of this transcript.
2135 Returntype : Int
2136 Exceptions : none
2137 Caller : general
2138 Status : Stable
2139
2140 =cut
2141
2142 sub is_current {
2143 my $self = shift;
2144 $self->{'is_current'} = shift if (@_);
2145 return $self->{'is_current'};
2146 }
2147
2148
2149 =head2 created_date
2150
2151 Arg [1] : (optional) string to be used for the created date
2152 Example : none
2153 Description: get/set for attribute created date
2154 Returntype : string
2155 Exceptions : none
2156 Caller : general
2157 Status : Stable
2158
2159 =cut
2160
2161 sub created_date {
2162 my $self = shift;
2163 $self->{'created_date'} = shift if ( @_ );
2164 return $self->{'created_date'};
2165 }
2166
2167
2168 =head2 modified_date
2169
2170 Arg [1] : (optional) string to be used for the modified date
2171 Example : none
2172 Description: get/set for attribute modified date
2173 Returntype : string
2174 Exceptions : none
2175 Caller : general
2176 Status : Stable
2177
2178 =cut
2179
2180 sub modified_date {
2181 my $self = shift;
2182 $self->{'modified_date'} = shift if ( @_ );
2183 return $self->{'modified_date'};
2184 }
2185
2186
2187 =head2 swap_exons
2188
2189 Arg [1] : Bio::EnsEMBL::Exon $old_Exon
2190 An exon that should be replaced
2191 Arg [2] : Bio::EnsEMBL::Exon $new_Exon
2192 The replacement Exon
2193 Example : none
2194 Description: exchange an exon in the current Exon list with a given one.
2195 Usually done before storing of Gene, so the Exons can
2196 be shared between Transcripts.
2197 Returntype : none
2198 Exceptions : none
2199 Caller : GeneAdaptor->store()
2200 Status : Stable
2201
2202 =cut
2203
2204 sub swap_exons {
2205 my ( $self, $old_exon, $new_exon ) = @_;
2206
2207 my $arref = $self->{'_trans_exon_array'};
2208 for(my $i = 0; $i < @$arref; $i++) {
2209 if($arref->[$i] == $old_exon) {
2210 $arref->[$i] = $new_exon;
2211 last;
2212 }
2213 }
2214
2215 if( defined $self->{'translation'} ) {
2216 if( $self->translation()->start_Exon() == $old_exon ) {
2217 $self->translation()->start_Exon( $new_exon );
2218 }
2219 if( $self->translation()->end_Exon() == $old_exon ) {
2220 $self->translation()->end_Exon( $new_exon );
2221 }
2222 }
2223 }
2224
2225
2226 =head2 equals
2227
2228 Arg [1] : Bio::EnsEMBL::Transcript transcript
2229 Example : if ($transcriptA->equals($transcriptB)) { ... }
2230 Description : Compares two transcripts for equality.
2231 The test for eqality goes through the following list
2232 and terminates at the first true match:
2233
2234 1. If Bio::EnsEMBL::Feature::equals() returns false,
2235 then the transcripts are *not* equal.
2236 2. If the biotypes differ, then the transcripts are
2237 *not* equal.
2238 3. If both transcripts have stable IDs: if these are
2239 the same, the transcripts are equal, otherwise not.
2240 4. If both transcripts have the same number of exons
2241 and if these are (when compared pair-wise sorted by
2242 start-position and length) the same, then they are
2243 equal, otherwise not.
2244
2245 Return type : Boolean (0, 1)
2246
2247 Exceptions : Thrown if a non-transcript is passed as the argument.
2248
2249 =cut
2250
2251 sub equals {
2252 my ( $self, $transcript ) = @_;
2253
2254 if ( !defined($transcript) ) { return 0 }
2255 if ( $self eq $transcript ) { return 1 }
2256
2257 assert_ref( $transcript, 'Bio::EnsEMBL::Transcript' );
2258
2259 my $feature_equals = $self->SUPER::equals($transcript);
2260 if ( defined($feature_equals) && $feature_equals == 0 ) {
2261 return 0;
2262 }
2263
2264 if ( $self->biotype() ne $transcript->biotype() ) {
2265 return 0;
2266 }
2267
2268 if ( defined( $self->stable_id() ) &&
2269 defined( $transcript->stable_id() ) )
2270 {
2271 if ( $self->stable_id() eq $transcript->stable_id() ) {
2272 return 1;
2273 }
2274 else {
2275 return 0;
2276 }
2277 }
2278
2279 my @self_exons = sort {
2280 $a->start() <=> $b->start() ||
2281 $a->length() <=> $b->length()
2282 } @{ $self->get_all_Exons() };
2283 my @transcript_exons = sort {
2284 $a->start() <=> $b->start() ||
2285 $a->length() <=> $b->length()
2286 } @{ $transcript->get_all_Exons() };
2287
2288 if ( scalar(@self_exons) != scalar(@transcript_exons) ) {
2289 return 0;
2290 }
2291
2292 while (@self_exons) {
2293 my $self_exon = shift(@self_exons);
2294 my $transcript_exon = shift(@transcript_exons);
2295
2296 if ( !$self_exon->equals($transcript_exon) ) {
2297 return 0;
2298 }
2299 }
2300
2301 return 1;
2302 } ## end sub equals
2303
2304 =head2 transform
2305
2306 Arg 1 : String $coordinate_system_name
2307 Arg [2] : String $coordinate_system_version
2308 Example : $transcript = $transcript->transform('contig');
2309 $transcript = $transcript->transform('chromosome', 'NCBI33');
2310 Description: Moves this Transcript to the given coordinate system.
2311 If this Transcript has Exons attached, they move as well.
2312 A new Transcript is returned. If the transcript cannot be
2313 transformed to the destination coordinate system undef is
2314 returned instead.
2315 Returntype : Bio::EnsEMBL::Transcript
2316 Exceptions : wrong parameters
2317 Caller : general
2318 Status : Medium Risk
2319 : deprecation needs to be removed at some time
2320
2321 =cut
2322
2323
2324 sub transform {
2325 my $self = shift;
2326
2327 # catch for old style transform calls
2328 if( ref $_[0] eq 'HASH') {
2329 deprecate("Calling transform with a hashref is deprecate.\n" .
2330 'Use $trans->transfer($slice) or ' .
2331 '$trans->transform("coordsysname") instead.');
2332 my (undef, $new_ex) = each(%{$_[0]});
2333 return $self->transfer($new_ex->slice);
2334 }
2335
2336 my $new_transcript = $self->SUPER::transform(@_);
2337 if ( !defined($new_transcript) ) {
2338 my @segments = @{ $self->project(@_) };
2339 # if it projects, maybe the exons transform well?
2340 # lazy load them here
2341 if ( !@segments ) {
2342 return undef;
2343 }
2344 $self->get_all_Exons();
2345 }
2346
2347
2348 if( exists $self->{'_trans_exon_array'} ) {
2349 my @new_exons;
2350 my ( $low_start, $hi_end, $slice );
2351 # we want to check whether the transform preserved 5prime 3prime
2352 # ordering. This assumes 5->3 order. No complaints on transsplicing.
2353
2354 my ( $last_new_start, $last_old_strand,
2355 $last_new_strand, $start_exon, $end_exon,
2356 $last_seq_region_name );
2357 my $first = 1;
2358 my $ignore_order = 0;
2359 my $order_broken = 0;
2360
2361 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
2362 my $new_exon = $old_exon->transform( @_ );
2363 return undef if( !defined $new_exon );
2364 if( ! defined $new_transcript ) {
2365 if( !$first ) {
2366 if( $old_exon->strand() != $last_old_strand ) {
2367 # transsplicing, ignore ordering
2368 $ignore_order = 1;
2369 }
2370
2371 if( $new_exon->slice()->seq_region_name() ne
2372 $last_seq_region_name ) {
2373 return undef;
2374 }
2375
2376 if( $last_new_strand == 1 and
2377 $new_exon->start() < $last_new_start ) {
2378 $order_broken = 1;
2379 }
2380
2381 if( $last_new_strand == -1 and
2382 $new_exon->start() > $last_new_start ) {
2383 $order_broken = 1;
2384 }
2385
2386 #additional check that if exons were on same strand previously, they should be again
2387 if(($last_old_strand == $old_exon->strand()) and !($last_new_strand == $new_exon->strand())){
2388 return undef;
2389 }
2390
2391 if( $new_exon->start() < $low_start ) {
2392 $low_start = $new_exon->start();
2393 }
2394 if( $new_exon->end() > $hi_end ) {
2395 $hi_end = $new_exon->end();
2396 }
2397 } else {
2398 $first = 0;
2399 $low_start = $new_exon->start();
2400 $hi_end = $new_exon->end();
2401 }
2402
2403 $last_seq_region_name = $new_exon->slice()->seq_region_name();
2404 $last_old_strand = $old_exon->strand();
2405 $last_new_start = $new_exon->start();
2406 $last_new_strand = $new_exon->strand();
2407 }
2408
2409 if( defined $self->{'translation'} ) {
2410 if( $self->translation()->start_Exon() == $old_exon ) {
2411 $start_exon = $new_exon;
2412 }
2413 if( $self->translation()->end_Exon() == $old_exon ) {
2414 $end_exon = $new_exon;
2415 }
2416 }
2417 push( @new_exons, $new_exon );
2418 }
2419
2420 if( $order_broken && !$ignore_order ) {
2421 warning( "Order of exons broken in transform of ".$self->dbID() );
2422 return undef;
2423 }
2424
2425 if( !defined $new_transcript ) {
2426 %$new_transcript = %$self;
2427 bless $new_transcript, ref( $self );
2428 $new_transcript->start( $low_start );
2429 $new_transcript->end( $hi_end );
2430 $new_transcript->slice( $new_exons[0]->slice() );
2431 $new_transcript->strand( $new_exons[0]->strand() );
2432 }
2433
2434 $new_transcript->{'_trans_exon_array'} = \@new_exons;
2435
2436 # should be ok to do inside exon array loop
2437 # translations only exist together with the exons ...
2438
2439 if( defined $self->{'translation'} ) {
2440 my $new_translation;
2441 %$new_translation = %{$self->{'translation'}};;
2442 bless $new_translation, ref( $self->{'translation'} );
2443 $new_transcript->{'translation'} = $new_translation;
2444 $new_translation->start_Exon( $start_exon );
2445 $new_translation->end_Exon( $end_exon );
2446 }
2447 }
2448
2449 if( exists $self->{'_supporting_evidence'} ) {
2450 my @new_features;
2451 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
2452 my $new_feature = $old_feature->transform( @_ );
2453 if (defined $new_feature) {
2454 push @new_features, $new_feature;
2455 }
2456 }
2457 $new_transcript->{'_supporting_evidence'} = \@new_features;
2458 }
2459
2460 if(exists $self->{_ise_array}) {
2461 my @new_features;
2462 foreach my $old_feature ( @{$self->{_ise_array}} ) {
2463 my $new_feature = $old_feature->transform(@_);
2464 push( @new_features, $new_feature );
2465 }
2466 $new_transcript->{_ise_array} = \@new_features;
2467 }
2468
2469
2470 # flush cached internal values that depend on the exon coords
2471 $new_transcript->{'transcript_mapper'} = undef;
2472 $new_transcript->{'coding_region_start'} = undef;
2473 $new_transcript->{'coding_region_end'} = undef;
2474 $new_transcript->{'cdna_coding_start'} = undef;
2475 $new_transcript->{'cdna_coding_end'} = undef;
2476
2477 return $new_transcript;
2478 }
2479
2480
2481 =head2 transfer
2482
2483 Arg 1 : Bio::EnsEMBL::Slice $destination_slice
2484 Example : $transcript = $transcript->transfer($slice);
2485 Description: Moves this transcript to the given slice.
2486 If this Transcripts has Exons attached, they move as well.
2487 Returntype : Bio::EnsEMBL::Transcript
2488 Exceptions : none
2489 Caller : general
2490 Status : Stable
2491
2492 =cut
2493
2494
2495 sub transfer {
2496 my $self = shift;
2497
2498 my $new_transcript = $self->SUPER::transfer( @_ );
2499 return undef unless $new_transcript;
2500
2501 if( defined $self->{'translation'} ) {
2502 my $new_translation;
2503 %$new_translation = %{$self->{'translation'}};;
2504 bless $new_translation, ref( $self->{'translation'} );
2505 $new_transcript->{'translation'} = $new_translation;
2506 }
2507
2508 if( exists $self->{'_trans_exon_array'} ) {
2509 my @new_exons;
2510 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
2511 my $new_exon = $old_exon->transfer( @_ );
2512 if( defined $new_transcript->{'translation'} ) {
2513 if( $new_transcript->translation()->start_Exon() == $old_exon ) {
2514 $new_transcript->translation()->start_Exon( $new_exon );
2515 }
2516 if( $new_transcript->translation()->end_Exon() == $old_exon ) {
2517 $new_transcript->translation()->end_Exon( $new_exon );
2518 }
2519 }
2520 push( @new_exons, $new_exon );
2521 }
2522
2523 $new_transcript->{'_trans_exon_array'} = \@new_exons;
2524 }
2525
2526 if( exists $self->{'_supporting_evidence'} ) {
2527 my @new_features;
2528 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
2529 my $new_feature = $old_feature->transfer( @_ );
2530 push( @new_features, $new_feature );
2531 }
2532 $new_transcript->{'_supporting_evidence'} = \@new_features;
2533 }
2534
2535 if(exists $self->{_ise_array}) {
2536 my @new_features;
2537 foreach my $old_feature ( @{$self->{_ise_array}} ) {
2538 my $new_feature = $old_feature->transfer(@_);
2539 push( @new_features, $new_feature );
2540 }
2541 $new_transcript->{_ise_array} = \@new_features;
2542 }
2543
2544
2545 # flush cached internal values that depend on the exon coords
2546 $new_transcript->{'transcript_mapper'} = undef;
2547 $new_transcript->{'coding_region_start'} = undef;
2548 $new_transcript->{'coding_region_end'} = undef;
2549 $new_transcript->{'cdna_coding_start'} = undef;
2550 $new_transcript->{'cdna_coding_end'} = undef;
2551
2552 return $new_transcript;
2553 }
2554
2555
2556 =head recalculate_coordinates
2557
2558 Args : none
2559 Example : none
2560 Description: called when exon coordinate change happened to recalculate the
2561 coords of the transcript. This method should be called if one
2562 of the exons has been changed.
2563 Returntype : none
2564 Exceptions : none
2565 Caller : internal
2566 Status : Stable
2567
2568 =cut
2569
2570 sub recalculate_coordinates {
2571 my ($self) = @_;
2572
2573 my $exons = $self->get_all_Exons();
2574
2575 if ( !$exons || !@{$exons} ) { return }
2576
2577 my ( $slice, $start, $end, $strand );
2578
2579 my $e_index;
2580 for ( $e_index = 0; $e_index < @{$exons}; $e_index++ ) {
2581 my $e = $exons->[$e_index];
2582
2583 # Skip missing or unmapped exons!
2584 if ( defined($e) && defined( $e->start() ) ) {
2585 $slice = $e->slice();
2586 $strand = $e->strand();
2587 $start = $e->start();
2588 $end = $e->end();
2589
2590 last;
2591 }
2592 }
2593
2594 my $transsplicing = 0;
2595
2596 # Start loop after first exon with coordinates
2597 for ( ; $e_index < @{$exons}; $e_index++ ) {
2598 my $e = $exons->[$e_index];
2599
2600 # Skip missing or unmapped exons!
2601 if ( !defined($e) || !defined( $e->start() ) ) { next }
2602
2603 if ( $e->start() < $start ) {
2604 $start = $e->start();
2605 }
2606
2607 if ( $e->end() > $end ) {
2608 $end = $e->end();
2609 }
2610
2611 if ( defined($slice)
2612 && $e->slice()
2613 && $e->slice()->name() ne $slice->name() )
2614 {
2615 throw( "Exons with different slices "
2616 . "are not allowed on one Transcript" );
2617 }
2618
2619 if ( $e->strand() != $strand ) {
2620 $transsplicing = 1;
2621 }
2622 } ## end for ( ; $e_index < @{$exons...})
2623 if ($transsplicing) {
2624 warning("Transcript contained trans splicing event");
2625 }
2626
2627 $self->start($start);
2628 $self->end($end);
2629 $self->strand($strand);
2630 $self->slice($slice);
2631
2632 # flush cached internal values that depend on the exon coords
2633 $self->{'transcript_mapper'} = undef;
2634 $self->{'coding_region_start'} = undef;
2635 $self->{'coding_region_end'} = undef;
2636 $self->{'cdna_coding_start'} = undef;
2637 $self->{'cdna_coding_end'} = undef;
2638 } ## end sub recalculate_coordinates
2639
2640
2641 =head2 display_id
2642
2643 Arg [1] : none
2644 Example : print $transcript->display_id();
2645 Description: This method returns a string that is considered to be
2646 the 'display' identifier. For transcripts this is (depending on
2647 availability and in this order) the stable Id, the dbID or an
2648 empty string.
2649 Returntype : string
2650 Exceptions : none
2651 Caller : web drawing code
2652 Status : Stable
2653
2654 =cut
2655
2656 sub display_id {
2657 my $self = shift;
2658 return $self->{'stable_id'} || $self->dbID || '';
2659 }
2660
2661
2662 =head2 get_all_peptide_variations
2663
2664 Description: See Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_peptide_variations
2665 Status : At Risk
2666 : Will be replaced with modules from the ensembl-variation package
2667
2668
2669 =cut
2670
2671 sub get_all_peptide_variations {
2672 my ($self, $source, $snps) = @_;
2673
2674 if(!$snps) {
2675 my $shash = Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs($self, $source);
2676 $snps = $shash->{'coding'};
2677 }
2678
2679 return Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_peptide_variations($self,
2680 $snps);
2681 }
2682
2683
2684 =head2 get_all_SNPs
2685
2686 Description: See Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_SNPs
2687
2688 Status : At Risk
2689 : Will be replaced with modules from the ensembl-variation package
2690
2691 =cut
2692
2693 sub get_all_SNPs {
2694 return Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_SNPs(@_);
2695 }
2696
2697
2698 =head2 get_all_cdna_SNPs
2699
2700 Description: See Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs
2701
2702 Status : At Risk
2703 : Will be replaced with modules from the ensembl-variation package
2704
2705 =cut
2706
2707 sub get_all_cdna_SNPs {
2708 return Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs(@_);
2709 }
2710
2711
2712 =head2 get_all_DASFactories
2713
2714 Arg [1] : none
2715 Function : Retrieves a listref of registered DAS objects
2716 Returntype: [ DAS_objects ]
2717 Exceptions:
2718 Caller :
2719 Example : $dasref = $prot->get_all_DASFactories
2720 Status : Stable
2721
2722 =cut
2723
2724 sub get_all_DASFactories {
2725 my $self = shift;
2726 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
2727 }
2728
2729
2730 =head2 get_all_DAS_Features
2731
2732 Arg [1] : none
2733 Example : $features = $prot->get_all_DAS_Features;
2734 Description: Retreives a hash reference to a hash of DAS feature
2735 sets, keyed by the DNS, NOTE the values of this hash
2736 are an anonymous array containing:
2737 (1) a pointer to an array of features;
2738 (2) a pointer to the DAS stylesheet
2739 Returntype : hashref of Bio::SeqFeatures
2740 Exceptions : ?
2741 Caller : webcode
2742 Status : Stable
2743
2744
2745 =cut
2746
2747 sub get_all_DAS_Features {
2748 my ($self,@args) = @_;
2749
2750 my $db = $self->adaptor->db;
2751 my $GeneAdaptor = $db->get_GeneAdaptor;
2752 my $Gene = $GeneAdaptor->fetch_by_transcript_stable_id($self->stable_id);
2753 my $slice = $Gene->feature_Slice;
2754 return $self->SUPER::get_all_DAS_Features($slice);
2755 }
2756
2757
2758
2759 =head2 _compare_xrefs
2760
2761 Description: compare xrefs based on priority (descending), then
2762 name (ascending), then display_label (ascending)
2763
2764 =cut
2765
2766 sub _compare_xrefs {
2767 # compare on priority first (descending)
2768 if ( $a->priority() != $b->priority() ) {
2769 return $b->priority() <=> $a->priority();
2770 } else {
2771 # equal priorities, compare on external_db name
2772 if ( $a->dbname() ne $b->dbname() ) {
2773 return $a->dbname() cmp $b->dbname();
2774 } else {
2775 # equal priorities and names, compare on display_label
2776 return $a->display_id() cmp $b->display_id();
2777 }
2778 }
2779 }
2780
2781
2782 =head2 load
2783
2784 Arg [1] : Boolean $load_xrefs
2785 Load (or don't load) xrefs. Default is to load xrefs.
2786 Example : $transcript->load();
2787 Description : The Ensembl API makes extensive use of
2788 lazy-loading. Under some circumstances (e.g.,
2789 when copying genes between databases), all data of
2790 an object needs to be fully loaded. This method
2791 loads the parts of the object that are usually
2792 lazy-loaded. It will also call the equivalent
2793 method on any translation and on all exons of the
2794 transcript.
2795 Returntype : None
2796
2797 =cut
2798
2799 sub load {
2800 my ( $self, $load_xrefs ) = @_;
2801
2802 if ( !defined($load_xrefs) ) { $load_xrefs = 1 }
2803
2804 my $translation = $self->translation();
2805 if ( defined($translation) ) {
2806 $translation->load($load_xrefs);
2807
2808 my $alt_translations = $self->get_all_alternative_translations();
2809
2810 if ( defined($alt_translations) ) {
2811 foreach my $alt_translation ( @{$alt_translations} ) {
2812 $alt_translation->load($load_xrefs);
2813 }
2814 }
2815 }
2816
2817 foreach my $exon ( @{ $self->get_all_Exons() } ) {
2818 $exon->load();
2819 }
2820
2821 $self->stable_id();
2822 $self->analysis();
2823 $self->get_all_Attributes();
2824 $self->get_all_supporting_features();
2825
2826 if ($load_xrefs) {
2827 $self->get_all_DBEntries();
2828 }
2829
2830 } ## end sub load
2831
2832 =head2 summary_as_hash
2833
2834 Example : $transcript_summary = $transcript->summary_as_hash();
2835 Description : Extends Feature::summary_as_hash
2836 Retrieves a summary of this Transcript.
2837 Returns : hashref of descriptive strings
2838 Status : Intended for internal use
2839 =cut
2840
2841 sub summary_as_hash {
2842 my $self = shift;
2843 my $summary_ref = $self->SUPER::summary_as_hash;
2844 $summary_ref->{'description'} = $self->description;
2845 $summary_ref->{'biotype'} = $self->biotype;
2846 my $parent_gene = $self->get_Gene();
2847 $summary_ref->{'Parent'} = $parent_gene->display_id;
2848 return $summary_ref;
2849 }
2850
2851 =head2 get_Gene
2852
2853 Example : $gene = $transcript->get_Gene;
2854 Description : Locates the parent Gene using a transcript dbID
2855 Returns : Bio::EnsEMBL::Gene
2856
2857 =cut
2858
2859 sub get_Gene {
2860 my $self = shift;
2861 my $gene_adaptor = $self->adaptor->db->get_GeneAdaptor();
2862 my $parent_gene = $gene_adaptor->fetch_by_transcript_id($self->dbID);
2863 return $parent_gene;
2864 }
2865
2866 ###########################
2867 # DEPRECATED METHODS FOLLOW
2868 ###########################
2869
2870 =head2 sort
2871
2872 Description: DEPRECATED. This method is no longer needed. Exons are sorted
2873 automatically when added to the transcript.
2874
2875 =cut
2876
2877 sub sort {
2878 my $self = shift;
2879
2880 deprecate( "Exons are kept sorted, you dont have to call sort any more" );
2881 # Fetch all the features
2882 my @exons = @{$self->get_all_Exons()};
2883
2884 # Empty the feature table
2885 $self->flush_Exons();
2886
2887 # Now sort the exons and put back in the feature table
2888 my $strand = $exons[0]->strand;
2889
2890 if ($strand == 1) {
2891 @exons = sort { $a->start <=> $b->start } @exons;
2892 } elsif ($strand == -1) {
2893 @exons = sort { $b->start <=> $a->start } @exons;
2894 }
2895
2896 foreach my $e (@exons) {
2897 $self->add_Exon($e);
2898 }
2899 }
2900
2901
2902 # _translation_id
2903 # Usage : DEPRECATED - not needed anymore
2904
2905 sub _translation_id {
2906 my $self = shift;
2907 deprecate( "This method shouldnt be necessary any more" );
2908 if( @_ ) {
2909 my $value = shift;
2910 $self->{'_translation_id'} = $value;
2911 }
2912 return $self->{'_translation_id'};
2913
2914 }
2915
2916
2917 =head2 created
2918
2919 Description: DEPRECATED - this attribute is not part of transcript anymore
2920
2921 =cut
2922
2923 sub created{
2924 my $obj = shift;
2925 deprecate( "This attribute is no longer supported" );
2926 if( @_ ) {
2927 my $value = shift;
2928 $obj->{'created'} = $value;
2929 }
2930 return $obj->{'created'};
2931 }
2932
2933
2934 =head2 modified
2935
2936 Description: DEPRECATED - this attribute is not part of transcript anymore
2937
2938 =cut
2939
2940 sub modified{
2941 my $obj = shift;
2942 deprecate( "This attribute is no longer supported" );
2943 if( @_ ) {
2944 my $value = shift;
2945 $obj->{'modified'} = $value;
2946 }
2947 return $obj->{'modified'};
2948 }
2949
2950
2951 =head2 temporary_id
2952
2953 Function: DEPRECATED: Use dbID or stable_id or something else instead
2954
2955 =cut
2956
2957 sub temporary_id{
2958 my ($obj,$value) = @_;
2959 deprecate( "I cant see what a temporary_id is good for, please use dbID" .
2960 "or stableID or\ntry without an id." );
2961 if( defined $value) {
2962 $obj->{'temporary_id'} = $value;
2963 }
2964 return $obj->{'temporary_id'};
2965 }
2966
2967
2968 =head2 type
2969
2970 Description: DEPRECATED. Use biotype() instead.
2971
2972 =cut
2973
2974 sub type {
2975 deprecate("Use biotype() instead");
2976 biotype(@_);
2977 }
2978
2979
2980 =head2 confidence
2981
2982 Description: DEPRECATED. Use status() instead.
2983
2984 =cut
2985
2986 sub confidence {
2987 deprecate("Use status() instead");
2988 status(@_);
2989 }
2990
2991
2992 1;
2993