0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 Creation:
|
|
28
|
|
29 my $tran = new Bio::EnsEMBL::Transcript();
|
|
30 my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
|
|
31
|
|
32 Manipulation:
|
|
33
|
|
34 # Returns an array of Exon objects
|
|
35 my @exons = @{ $tran->get_all_Exons() };
|
|
36
|
|
37 # Returns the peptide translation of the exons as a Bio::Seq
|
|
38 if ( $tran->translation() ) {
|
|
39 my $pep = $tran->translate();
|
|
40 } else {
|
|
41 print "Transcript ", $tran->stable_id(), " is non-coding\n";
|
|
42 }
|
|
43
|
|
44 =head1 DESCRIPTION
|
|
45
|
|
46 A representation of a transcript within the Ensembl system. A transcript
|
|
47 consists of a set of Exons and (possibly) a Translation which defines the
|
|
48 coding and non-coding regions of the exons.
|
|
49
|
|
50 =cut
|
|
51
|
|
52 package Bio::EnsEMBL::Transcript;
|
|
53
|
|
54 use strict;
|
|
55
|
|
56 use Bio::EnsEMBL::Feature;
|
|
57 use Bio::EnsEMBL::Intron;
|
|
58 use Bio::EnsEMBL::TranscriptMapper;
|
|
59 use Bio::EnsEMBL::Utils::TranscriptSNPs;
|
|
60 use Bio::EnsEMBL::SeqEdit;
|
|
61
|
|
62 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
63 use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw );
|
|
64 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
|
|
65
|
|
66 use vars qw(@ISA);
|
|
67 @ISA = qw(Bio::EnsEMBL::Feature);
|
|
68
|
|
69
|
|
70 =head2 new
|
|
71
|
|
72 Arg [-EXONS] :
|
|
73 reference to list of Bio::EnsEMBL::Exon objects - exons which make up
|
|
74 this transcript
|
|
75 Arg [-STABLE_ID] :
|
|
76 string - the stable identifier of this transcript
|
|
77 Arg [-VERSION] :
|
|
78 int - the version of the stable identifier of this transcript
|
|
79 Arg [-EXTERNAL_NAME] :
|
|
80 string - the external database name associated with this transcript
|
|
81 Arg [-EXTERNAL_DB] :
|
|
82 string - the name of the database the external name is from
|
|
83 Arg [-EXTERNAL_STATUS]:
|
|
84 string - the status of the external identifier
|
|
85 Arg [-DISPLAY_XREF]:
|
|
86 Bio::EnsEMBL::DBEntry - The external database entry that is used
|
|
87 to label this transcript when it is displayed.
|
|
88 Arg [-CREATED_DATE]:
|
|
89 string - the date the transcript was created
|
|
90 Arg [-MODIFIED_DATE]:
|
|
91 string - the date the transcript was last modified
|
|
92 Arg [-DESCRIPTION]:
|
|
93 string - the transcipts description
|
|
94 Arg [-BIOTYPE]:
|
|
95 string - the biotype e.g. "protein_coding"
|
|
96 Arg [-STATUS]:
|
|
97 string - the transcripts status i.e. "KNOWN","NOVEL"
|
|
98 Arg [-IS_CURRENT]:
|
|
99 Boolean - specifies if this is the current version of the transcript
|
|
100 Example : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
|
|
101 Description: Constructor. Instantiates a Transcript object.
|
|
102 Returntype : Bio::EnsEMBL::Transcript
|
|
103 Exceptions : throw on bad arguments
|
|
104 Caller : general
|
|
105 Status : Stable
|
|
106
|
|
107 =cut
|
|
108
|
|
109 sub new {
|
|
110 my $proto = shift;
|
|
111
|
|
112 my $class = ref($proto) || $proto;
|
|
113
|
|
114 my $self = $class->SUPER::new(@_);
|
|
115
|
|
116 my (
|
|
117 $exons, $stable_id, $version,
|
|
118 $external_name, $external_db, $external_status,
|
|
119 $display_xref, $created_date, $modified_date,
|
|
120 $description, $biotype, $confidence,
|
|
121 $external_db_name, $status, $is_current
|
|
122 );
|
|
123
|
|
124 # Catch for old style constructor calling:
|
|
125 if ( ( @_ > 0 ) && ref( $_[0] ) ) {
|
|
126 $exons = [@_];
|
|
127 deprecate( "Transcript constructor should use named arguments.\n"
|
|
128 . "Use Bio::EnsEMBL::Transcript->new(-EXONS => \@exons);\n"
|
|
129 . "instead of Bio::EnsEMBL::Transcript->new(\@exons);" );
|
|
130 } else {
|
|
131 (
|
|
132 $exons, $stable_id, $version,
|
|
133 $external_name, $external_db, $external_status,
|
|
134 $display_xref, $created_date, $modified_date,
|
|
135 $description, $biotype, $confidence,
|
|
136 $external_db_name, $status, $is_current
|
|
137 )
|
|
138 = rearrange( [
|
|
139 'EXONS', 'STABLE_ID',
|
|
140 'VERSION', 'EXTERNAL_NAME',
|
|
141 'EXTERNAL_DB', 'EXTERNAL_STATUS',
|
|
142 'DISPLAY_XREF', 'CREATED_DATE',
|
|
143 'MODIFIED_DATE', 'DESCRIPTION',
|
|
144 'BIOTYPE', 'CONFIDENCE',
|
|
145 'EXTERNAL_DB_NAME', 'STATUS',
|
|
146 'IS_CURRENT'
|
|
147 ],
|
|
148 @_
|
|
149 );
|
|
150 }
|
|
151
|
|
152 if ($exons) {
|
|
153 $self->{'_trans_exon_array'} = $exons;
|
|
154 $self->recalculate_coordinates();
|
|
155 }
|
|
156
|
|
157 $self->stable_id($stable_id);
|
|
158 $self->version($version);
|
|
159 $self->{'created_date'} = $created_date;
|
|
160 $self->{'modified_date'} = $modified_date;
|
|
161 $self->external_name($external_name) if ( defined $external_name );
|
|
162 $self->external_db($external_db) if ( defined $external_db );
|
|
163 $self->external_status($external_status)
|
|
164 if ( defined $external_status );
|
|
165 $self->display_xref($display_xref) if ( defined $display_xref );
|
|
166 $self->edits_enabled(1);
|
|
167
|
|
168 $self->description($description);
|
|
169 $self->status($confidence); # old style name
|
|
170 $self->status($status); # new style name
|
|
171 $self->biotype($biotype);
|
|
172
|
|
173 # default is_current
|
|
174 $is_current = 1 unless ( defined($is_current) );
|
|
175 $self->{'is_current'} = $is_current;
|
|
176
|
|
177 return $self;
|
|
178 } ## end sub new
|
|
179
|
|
180 =head2 get_all_DBLinks
|
|
181
|
|
182 Arg [1] : String database name (optional)
|
|
183 SQL wildcard characters (_ and %) can be used to
|
|
184 specify patterns.
|
|
185
|
|
186 Example : my @dblinks = @{ $transcript->get_all_DBLinks() };
|
|
187 my @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };
|
|
188
|
|
189 Description: Retrieves *all* related DBEntries for this
|
|
190 transcript. This includes all DBEntries that are
|
|
191 associated with the corresponding translation.
|
|
192
|
|
193 If you only want to retrieve the DBEntries associated
|
|
194 with the transcript (and not the translation) then
|
|
195 you should use the get_all_DBEntries() call instead.
|
|
196
|
|
197 Note: Each entry may be listed more than once. No
|
|
198 uniqueness checks are done. Also if you put in an
|
|
199 incorrect external database name no checks are done
|
|
200 to see if this exists, you will just get an empty
|
|
201 list.
|
|
202
|
|
203 Return type: Listref of Bio::EnsEMBL::DBEntry objects
|
|
204 Exceptions : none
|
|
205 Caller : general
|
|
206 Status : Stable
|
|
207
|
|
208 =cut
|
|
209
|
|
210 sub get_all_DBLinks {
|
|
211 my ( $self, $db_name_exp, $ex_db_type ) = @_;
|
|
212
|
|
213 my @links =
|
|
214 @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
|
|
215
|
|
216 # Add all of the transcript and translation xrefs to the return list.
|
|
217 my $translation = $self->translation();
|
|
218 if ( defined($translation) ) {
|
|
219 push( @links,
|
|
220 @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
|
|
221 );
|
|
222 }
|
|
223
|
|
224 @links = sort { _compare_xrefs() } @links;
|
|
225
|
|
226 return \@links;
|
|
227 }
|
|
228
|
|
229 =head2 get_all_xrefs
|
|
230
|
|
231 Arg [1] : String database name (optional)
|
|
232 SQL wildcard characters (_ and %) can be used to
|
|
233 specify patterns.
|
|
234
|
|
235 Example : @xrefs = @{ $transcript->get_all_xrefs() };
|
|
236 @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };
|
|
237
|
|
238 Description: Retrieves *all* related xrefs for this transcript.
|
|
239 This includes all xrefs that are associated with the
|
|
240 corresponding translation of this transcript.
|
|
241
|
|
242 If you want to retrieve the xrefs associated with
|
|
243 only the transcript (and not the translation) then
|
|
244 you should use the get_all_object_xrefs() method
|
|
245 instead.
|
|
246
|
|
247 Note: Each entry may be listed more than once. No
|
|
248 uniqueness checks are done. Also if you put in an
|
|
249 incorrect external database name no checks are done
|
|
250 to see if this exists, you will just get an empty
|
|
251 list.
|
|
252
|
|
253 NB: This method is an alias for the
|
|
254 get_all_DBLinks() method.
|
|
255
|
|
256 Return type: Listref of Bio::EnsEMBL::DBEntry objects
|
|
257
|
|
258 Status : Stable
|
|
259
|
|
260 =cut
|
|
261
|
|
262 sub get_all_xrefs {
|
|
263 my $self = shift;
|
|
264 return $self->get_all_DBLinks(@_);
|
|
265 }
|
|
266
|
|
267 =head2 get_all_DBEntries
|
|
268
|
|
269 Arg [1] : (optional) String, external database name
|
|
270
|
|
271 Arg [2] : (optional) String, external database type
|
|
272
|
|
273 Example : my @dbentries = @{ $transcript->get_all_DBEntries() };
|
|
274
|
|
275 Description: Retrieves DBEntries (xrefs) for this transcript.
|
|
276 This does *not* include the corresponding
|
|
277 translations DBEntries (see get_all_DBLinks()).
|
|
278
|
|
279 This method will attempt to lazy-load DBEntries
|
|
280 from a database if an adaptor is available and no
|
|
281 DBEntries are present on the transcript (i.e. they
|
|
282 have not already been added or loaded).
|
|
283
|
|
284 Returntype : Listref of Bio::EnsEMBL::DBEntry objects
|
|
285 Exceptions : none
|
|
286 Caller : get_all_DBLinks, TranscriptAdaptor::store
|
|
287 Status : Stable
|
|
288
|
|
289 =cut
|
|
290
|
|
291 sub get_all_DBEntries {
|
|
292 my ( $self, $ex_db_exp, $ex_db_type ) = @_;
|
|
293
|
|
294 my $cache_name = 'dbentries';
|
|
295
|
|
296 if ( defined($ex_db_exp) ) {
|
|
297 $cache_name .= $ex_db_exp;
|
|
298 }
|
|
299
|
|
300 if ( defined($ex_db_type) ) {
|
|
301 $cache_name .= $ex_db_type;
|
|
302 }
|
|
303
|
|
304 # if not cached, retrieve all of the xrefs for this transcript
|
|
305 if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
|
|
306 {
|
|
307 $self->{$cache_name} =
|
|
308 $self->adaptor()->db()->get_DBEntryAdaptor()
|
|
309 ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
|
|
310 }
|
|
311
|
|
312 $self->{$cache_name} ||= [];
|
|
313
|
|
314 return $self->{$cache_name};
|
|
315 } ## end sub get_all_DBEntries
|
|
316
|
|
317 =head2 get_all_object_xrefs
|
|
318
|
|
319 Arg [1] : (optional) String, external database name
|
|
320
|
|
321 Arg [2] : (optional) String, external_db type
|
|
322
|
|
323 Example : @oxrefs = @{ $transcript->get_all_object_xrefs() };
|
|
324
|
|
325 Description: Retrieves xrefs for this transcript. This does
|
|
326 *not* include xrefs that are associated with the
|
|
327 corresponding translations of this transcript (see
|
|
328 get_all_xrefs()).
|
|
329
|
|
330 This method will attempt to lazy-load xrefs from a
|
|
331 database if an adaptor is available and no xrefs are
|
|
332 present on the transcript (i.e. they have not already
|
|
333 been added or loaded).
|
|
334
|
|
335 NB: This method is an alias for the
|
|
336 get_all_DBentries() method.
|
|
337
|
|
338 Return type: Listref of Bio::EnsEMBL::DBEntry objects
|
|
339
|
|
340 Status : Stable
|
|
341
|
|
342 =cut
|
|
343
|
|
344 sub get_all_object_xrefs {
|
|
345 my $self = shift;
|
|
346 return $self->get_all_DBEntries(@_);
|
|
347 }
|
|
348
|
|
349 =head2 add_DBEntry
|
|
350
|
|
351 Arg [1] : Bio::EnsEMBL::DBEntry $dbe
|
|
352 The dbEntry to be added
|
|
353 Example : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
|
|
354 $transcript->add_DBEntry($dbe);
|
|
355 Description: Associates a DBEntry with this transcript. Note that adding
|
|
356 DBEntries will prevent future lazy-loading of DBEntries for this
|
|
357 gene (see get_all_DBEntries).
|
|
358 Returntype : none
|
|
359 Exceptions : thrown on incorrect argument type
|
|
360 Caller : general
|
|
361 Status : Stable
|
|
362
|
|
363 =cut
|
|
364
|
|
365 sub add_DBEntry {
|
|
366 my $self = shift;
|
|
367 my $dbe = shift;
|
|
368
|
|
369 unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
|
|
370 throw('Expected DBEntry argument');
|
|
371 }
|
|
372
|
|
373 $self->{'dbentries'} ||= [];
|
|
374 push @{$self->{'dbentries'}}, $dbe;
|
|
375 }
|
|
376
|
|
377
|
|
378 =head2 get_all_supporting_features
|
|
379
|
|
380 Example : my @evidence = @{ $transcript->get_all_supporting_features };
|
|
381 Description: Retreives any supporting features added manually by
|
|
382 calls to add_supporting_features.
|
|
383 Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
|
|
384 Exceptions : none
|
|
385 Caller : general
|
|
386 Status : Stable
|
|
387
|
|
388 =cut
|
|
389
|
|
390 sub get_all_supporting_features {
|
|
391 my $self = shift;
|
|
392
|
|
393 if( !exists $self->{_supporting_evidence} ) {
|
|
394 if($self->adaptor) {
|
|
395 my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
|
|
396 $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
|
|
397 }
|
|
398 }
|
|
399
|
|
400 return $self->{_supporting_evidence} || [];
|
|
401 }
|
|
402
|
|
403
|
|
404 =head2 add_supporting_features
|
|
405
|
|
406 Arg [1-N] : Bio::EnsEMBL::FeaturePair $feature
|
|
407 The supporting features to add
|
|
408 Example : $transcript->add_supporting_features(@features);
|
|
409 Description: Adds a list of supporting features to this Transcript.
|
|
410 The added features can be retieved by
|
|
411 get_all_supporting_features().
|
|
412 Returntype : none
|
|
413 Exceptions : throw if any of the features are not FeaturePairs
|
|
414 throw if any of the features are not in the same coordinate
|
|
415 system as the Transcript
|
|
416 Caller : general
|
|
417 Status : Stable
|
|
418
|
|
419 =cut
|
|
420
|
|
421 sub add_supporting_features {
|
|
422 my ($self, @features) = @_;
|
|
423
|
|
424 return unless @features;
|
|
425
|
|
426 $self->{_supporting_evidence} ||= [];
|
|
427
|
|
428 # check whether this feature object has been added already
|
|
429 FEATURE: foreach my $feature (@features) {
|
|
430
|
|
431 if (!defined($feature) || ref($feature) eq "ARRAY") {
|
|
432 throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
|
|
433 }
|
|
434 if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
|
|
435 print "feature = " . $feature . "\n";
|
|
436 throw("Supporting feat [$feature] not a " .
|
|
437 "Bio::EnsEMBL::FeaturePair");
|
|
438 }
|
|
439
|
|
440 if ((defined $self->slice() && defined $feature->slice())&&
|
|
441 ( $self->slice()->name() ne $feature->slice()->name())){
|
|
442 throw("Supporting feat not in same coord system as exon\n" .
|
|
443 "exon is attached to [".$self->slice()->name()."]\n" .
|
|
444 "feat is attached to [".$feature->slice()->name()."]");
|
|
445 }
|
|
446
|
|
447 foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
|
|
448 # compare objects
|
|
449 if ( $feature == $added_feature ){
|
|
450 #this feature has already been added
|
|
451 next FEATURE;
|
|
452 }
|
|
453 }
|
|
454
|
|
455 #no duplicate was found, add the feature
|
|
456 push(@{$self->{_supporting_evidence}}, $feature);
|
|
457 }
|
|
458 }
|
|
459
|
|
460
|
|
461 =head2 flush_supporting_features
|
|
462
|
|
463 Example : $transcript->flush_supporting_features;
|
|
464 Description : Removes all supporting evidence from the transcript.
|
|
465 Return type : (Empty) listref
|
|
466 Exceptions : none
|
|
467 Caller : general
|
|
468 Status : Stable
|
|
469
|
|
470 =cut
|
|
471
|
|
472 sub flush_supporting_features {
|
|
473 my $self = shift;
|
|
474 $self->{'_supporting_evidence'} = [];
|
|
475 }
|
|
476
|
|
477
|
|
478 =head2 external_db
|
|
479
|
|
480 Arg [1] : (optional) String - name of external db to set
|
|
481 Example : $transcript->external_db('HGNC');
|
|
482 Description: Getter/setter for attribute external_db. The db is the one that
|
|
483 belongs to the external_name.
|
|
484 Returntype : String
|
|
485 Exceptions : none
|
|
486 Caller : general
|
|
487 Status : Stable
|
|
488
|
|
489 =cut
|
|
490
|
|
491 sub external_db {
|
|
492 my ( $self, $ext_dbname ) = @_;
|
|
493
|
|
494 if(defined $ext_dbname) {
|
|
495 return ( $self->{'external_db'} = $ext_dbname );
|
|
496 }
|
|
497
|
|
498 if( exists $self->{'external_db'} ) {
|
|
499 return $self->{'external_db'};
|
|
500 }
|
|
501
|
|
502 my $display_xref = $self->display_xref();
|
|
503
|
|
504 if( defined $display_xref ) {
|
|
505 return $display_xref->dbname()
|
|
506 } else {
|
|
507 return undef;
|
|
508 }
|
|
509 }
|
|
510
|
|
511
|
|
512 =head2 external_status
|
|
513
|
|
514 Arg [1] : (optional) String - status of the external db
|
|
515 Example : $transcript->external_status('KNOWNXREF');
|
|
516 Description: Getter/setter for attribute external_status. The status of
|
|
517 the external db of the one that belongs to the external_name.
|
|
518 Returntype : String
|
|
519 Exceptions : none
|
|
520 Caller : general
|
|
521 Status : Stable
|
|
522
|
|
523 =cut
|
|
524
|
|
525 sub external_status {
|
|
526 my ( $self, $ext_status ) = @_;
|
|
527
|
|
528 if(defined $ext_status) {
|
|
529 return ( $self->{'external_status'} = $ext_status );
|
|
530 }
|
|
531
|
|
532 if( exists $self->{'external_status'} ) {
|
|
533 return $self->{'external_status'};
|
|
534 }
|
|
535
|
|
536 my $display_xref = $self->display_xref();
|
|
537
|
|
538 if( defined $display_xref ) {
|
|
539 return $display_xref->status()
|
|
540 } else {
|
|
541 return undef;
|
|
542 }
|
|
543 }
|
|
544
|
|
545
|
|
546 =head2 external_name
|
|
547
|
|
548 Arg [1] : (optional) String - the external name to set
|
|
549 Example : $transcript->external_name('BRCA2-001');
|
|
550 Description: Getter/setter for attribute external_name.
|
|
551 Returntype : String or undef
|
|
552 Exceptions : none
|
|
553 Caller : general
|
|
554 Status : Stable
|
|
555
|
|
556 =cut
|
|
557
|
|
558 sub external_name {
|
|
559 my ($self, $ext_name) = @_;
|
|
560
|
|
561 if(defined $ext_name) {
|
|
562 return ( $self->{'external_name'} = $ext_name );
|
|
563 }
|
|
564
|
|
565 if( exists $self->{'external_name'} ) {
|
|
566 return $self->{'external_name'};
|
|
567 }
|
|
568
|
|
569 my $display_xref = $self->display_xref();
|
|
570
|
|
571 if( defined $display_xref ) {
|
|
572 return $display_xref->display_id()
|
|
573 } else {
|
|
574 return undef;
|
|
575 }
|
|
576 }
|
|
577
|
|
578
|
|
579 =head2 is_known
|
|
580
|
|
581 Example : print "Transcript ".$transcript->stable_id." is KNOWN\n" if
|
|
582 $transcript->is_known;
|
|
583 Description: Returns TRUE if this gene has a status of 'KNOWN'
|
|
584 Returntype : TRUE if known, FALSE otherwise
|
|
585 Exceptions : none
|
|
586 Caller : general
|
|
587 Status : Stable
|
|
588
|
|
589 =cut
|
|
590
|
|
591 sub is_known {
|
|
592 my $self = shift;
|
|
593 return ( $self->{'status'} eq "KNOWN" || $self->{'status'} eq "KNOWN_BY_PROJECTION" );
|
|
594 }
|
|
595
|
|
596
|
|
597 =head2 status
|
|
598
|
|
599 Arg [1] : string $status
|
|
600 Example : none
|
|
601 Description: get/set for attribute status
|
|
602 Returntype : string
|
|
603 Exceptions : none
|
|
604 Caller : general
|
|
605 Status : Medium Risk
|
|
606
|
|
607 =cut
|
|
608
|
|
609 sub status {
|
|
610 my $self = shift;
|
|
611 $self->{'status'} = shift if( @_ );
|
|
612 return $self->{'status'};
|
|
613 }
|
|
614
|
|
615 =head2 biotype
|
|
616
|
|
617 Arg [1] : string $biotype
|
|
618 Example : none
|
|
619 Description: get/set for attribute biotype
|
|
620 Returntype : string
|
|
621 Exceptions : none
|
|
622 Caller : general
|
|
623 Status : Stable
|
|
624
|
|
625 =cut
|
|
626
|
|
627 sub biotype {
|
|
628 my $self = shift;
|
|
629 $self->{'biotype'} = shift if( @_ );
|
|
630 return ( $self->{'biotype'} || "protein_coding" );
|
|
631 }
|
|
632
|
|
633
|
|
634 =head2 display_xref
|
|
635
|
|
636 Arg [1] : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
|
|
637 Example : $transcript->display_xref($db_entry);
|
|
638 Description: Getter/setter for display_xref for this transcript.
|
|
639 Returntype : Bio::EnsEMBL::DBEntry
|
|
640 Exceptions : none
|
|
641 Caller : general
|
|
642 Status : Stable
|
|
643
|
|
644 =cut
|
|
645
|
|
646 sub display_xref {
|
|
647 my $self = shift;
|
|
648 $self->{'display_xref'} = shift if(@_);
|
|
649 return $self->{'display_xref'};
|
|
650 }
|
|
651
|
|
652 =head2 is_canonical
|
|
653
|
|
654 Args [1] : (optional) Boolean is_canonical
|
|
655
|
|
656 Example : if ($transcript->is_canonical()) { ... }
|
|
657
|
|
658 Description : Returns true (non-zero) if the transcript is the
|
|
659 canonical transcript of its gene, false (0) if not. If the code
|
|
660 returns an undefined it is because its state is not currently
|
|
661 known. Internally the code will consult the database for this
|
|
662 value if it is unknown and the transcript has a dbID and an
|
|
663 attached adaptor
|
|
664
|
|
665 Return type : Boolean
|
|
666
|
|
667 Status : Stable
|
|
668
|
|
669 =cut
|
|
670
|
|
671 sub is_canonical {
|
|
672 my ( $self, $value ) = @_;
|
|
673
|
|
674 #Shortcut call
|
|
675 return $self->{is_canonical} if defined $self->{is_canonical};
|
|
676
|
|
677 if ( defined($value) ) {
|
|
678 $self->{is_canonical} = ( $value ? 1 : 0 );
|
|
679 }
|
|
680 else {
|
|
681 if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
|
|
682 $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
|
|
683 }
|
|
684 }
|
|
685
|
|
686 return $self->{is_canonical};
|
|
687 }
|
|
688
|
|
689 =head2 translation
|
|
690
|
|
691 Args : None
|
|
692 Example : if ( $transcript->translation() ) {
|
|
693 print( $transcript->translation()->stable_id(), "\n" );
|
|
694 } else {
|
|
695 print("Pseudogene\n");
|
|
696 }
|
|
697 Description: Getter/setter for the Translation object which
|
|
698 defines the CDS (and as a result the peptide encoded
|
|
699 by) this transcript. This function will return
|
|
700 undef if this transcript is a pseudogene, i.e. a
|
|
701 non-translating transcript such as an ncRNA. This
|
|
702 is the accepted method of determining whether a
|
|
703 transcript is a pseudogene or not.
|
|
704 Returntype : Bio::EnsEMBL::Translation
|
|
705 Exceptions : none
|
|
706 Caller : general
|
|
707 Status : Stable
|
|
708
|
|
709 =cut
|
|
710
|
|
711 sub translation {
|
|
712 my ( $self, $translation ) = @_;
|
|
713
|
|
714 if ( defined($translation) ) {
|
|
715 assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
|
|
716
|
|
717 $self->{'translation'} = $translation;
|
|
718 $translation->transcript($self);
|
|
719
|
|
720 $self->{'cdna_coding_start'} = undef;
|
|
721 $self->{'cdna_coding_end'} = undef;
|
|
722
|
|
723 $self->{'coding_region_start'} = undef;
|
|
724 $self->{'coding_region_end'} = undef;
|
|
725
|
|
726 $self->{'transcript_mapper'} = undef;
|
|
727
|
|
728 } elsif ( @_ > 1 ) {
|
|
729 if ( defined( $self->{'translation'} ) ) {
|
|
730 # Removing existing translation
|
|
731
|
|
732 $self->{'translation'}->transcript(undef);
|
|
733 delete( $self->{'translation'} );
|
|
734
|
|
735 $self->{'cdna_coding_start'} = undef;
|
|
736 $self->{'cdna_coding_end'} = undef;
|
|
737
|
|
738 $self->{'coding_region_start'} = undef;
|
|
739 $self->{'coding_region_end'} = undef;
|
|
740
|
|
741 $self->{'transcript_mapper'} = undef;
|
|
742 }
|
|
743 } elsif ( !exists( $self->{'translation'} )
|
|
744 && defined( $self->adaptor() ) )
|
|
745 {
|
|
746 $self->{'translation'} =
|
|
747 $self->adaptor()->db()->get_TranslationAdaptor()
|
|
748 ->fetch_by_Transcript($self);
|
|
749 }
|
|
750
|
|
751 return $self->{'translation'};
|
|
752 } ## end sub translation
|
|
753
|
|
754 =head2 get_all_alternative_translations
|
|
755
|
|
756 Args : None
|
|
757 Example :
|
|
758
|
|
759 my @alt_translations =
|
|
760 @{ $transcript->get_all_alternative_translations() };
|
|
761
|
|
762 Description: Fetches all alternative translations defined for this
|
|
763 transcript. The canonical translation is not returned.
|
|
764
|
|
765 Returntype : Arrayref to Bio::EnsEMBL::Translation
|
|
766 Exceptions : None
|
|
767 Caller : General
|
|
768 Status : Stable
|
|
769
|
|
770 =cut
|
|
771
|
|
772 sub get_all_alternative_translations {
|
|
773 my ($self) = @_;
|
|
774
|
|
775 if ( !exists( $self->{'alternative_translations'} )
|
|
776 && defined( $self->adaptor() ) )
|
|
777 {
|
|
778 my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
|
|
779 my @translations =
|
|
780 @{ $pa->fetch_all_alternative_by_Transcript($self) };
|
|
781
|
|
782 $self->{'alternative_translations'} = \@translations;
|
|
783 }
|
|
784
|
|
785 return $self->{'alternative_translations'};
|
|
786 }
|
|
787
|
|
788 =head2 add_alternative_translation
|
|
789
|
|
790 Args : Bio::EnsEMBL::Translation $translation
|
|
791 Example :
|
|
792
|
|
793 $transcript->add_alternative_translation($translation);
|
|
794
|
|
795 Description: Adds an alternative translation to this transcript.
|
|
796 Returntype : None
|
|
797 Exceptions : None
|
|
798 Caller : General
|
|
799 Status : Stable
|
|
800
|
|
801 =cut
|
|
802
|
|
803 sub add_alternative_translation {
|
|
804 my ( $self, $translation ) = @_;
|
|
805
|
|
806 if ( !( defined($translation)
|
|
807 && ref($translation)
|
|
808 && $translation->isa('Bio::EnsEMBL::Translation') ) )
|
|
809 {
|
|
810 throw("Bio::EnsEMBL::Translation argument expected.");
|
|
811 }
|
|
812
|
|
813 # Load the existsing alternative translations from the database if
|
|
814 # they haven't already been loaded.
|
|
815 $self->get_all_alternative_translations();
|
|
816
|
|
817 push( @{ $self->{'alternative_translations'} }, $translation );
|
|
818 }
|
|
819
|
|
820 =head2 spliced_seq
|
|
821
|
|
822 Args : none
|
|
823 Example : none
|
|
824 Description: Retrieves all Exon sequences and concats them together.
|
|
825 No phase padding magic is done, even if phases do not align.
|
|
826 Returntype : Text
|
|
827 Exceptions : none
|
|
828 Caller : general
|
|
829 Status : Stable
|
|
830
|
|
831 =cut
|
|
832
|
|
833 sub spliced_seq {
|
|
834 my ( $self ) = @_;
|
|
835
|
|
836 my $seq_string = "";
|
|
837 for my $ex ( @{$self->get_all_Exons()} ) {
|
|
838 my $seq = $ex->seq();
|
|
839
|
|
840 if(!$seq) {
|
|
841 warning("Could not obtain seq for exon. Transcript sequence may not " .
|
|
842 "be correct.");
|
|
843 $seq_string .= 'N' x $ex->length();
|
|
844 } else {
|
|
845 $seq_string .= $seq->seq();
|
|
846 }
|
|
847 }
|
|
848
|
|
849 # apply post transcriptional edits
|
|
850 if($self->edits_enabled()) {
|
|
851 my @seqeds = @{$self->get_all_SeqEdits()};
|
|
852
|
|
853 # sort edits in reverse order to remove complication of
|
|
854 # adjusting downstream edits
|
|
855 @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
|
|
856
|
|
857 foreach my $se (@seqeds) {
|
|
858 $se->apply_edit(\$seq_string);
|
|
859 }
|
|
860 }
|
|
861
|
|
862 return $seq_string;
|
|
863 }
|
|
864
|
|
865
|
|
866 =head2 translateable_seq
|
|
867
|
|
868 Args : none
|
|
869 Example : print $transcript->translateable_seq(), "\n";
|
|
870 Description: Returns a sequence string which is the the translateable part
|
|
871 of the transcripts sequence. This is formed by splicing all
|
|
872 Exon sequences together and apply all defined RNA edits.
|
|
873 Then the coding part of the sequence is extracted and returned.
|
|
874 The code will not support monkey exons any more. If you want to
|
|
875 have non phase matching exons, defined appropriate _rna_edit
|
|
876 attributes!
|
|
877
|
|
878 An empty string is returned if this transcript is a pseudogene
|
|
879 (i.e. is non-translateable).
|
|
880 Returntype : Text
|
|
881 Exceptions : none
|
|
882 Caller : general
|
|
883 Status : Stable
|
|
884
|
|
885 =cut
|
|
886
|
|
887 sub translateable_seq {
|
|
888 my ( $self ) = @_;
|
|
889
|
|
890 if ( !$self->translation() ) {
|
|
891 return '';
|
|
892 }
|
|
893
|
|
894 my $mrna = $self->spliced_seq();
|
|
895
|
|
896 my $start = $self->cdna_coding_start();
|
|
897 my $end = $self->cdna_coding_end();
|
|
898
|
|
899 $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
|
|
900
|
|
901 my $start_phase = $self->translation->start_Exon->phase();
|
|
902 if( $start_phase > 0 ) {
|
|
903 $mrna = "N"x$start_phase . $mrna;
|
|
904 }
|
|
905 if( ! $start || ! $end ) {
|
|
906 return "";
|
|
907 }
|
|
908
|
|
909 return $mrna;
|
|
910 }
|
|
911
|
|
912
|
|
913 =head2 cdna_coding_start
|
|
914
|
|
915 Arg [1] : (optional) $value
|
|
916 Example : $relative_coding_start = $transcript->cdna_coding_start;
|
|
917 Description: Retrieves the position of the coding start of this transcript
|
|
918 in cdna coordinates (relative to the start of the 5prime end of
|
|
919 the transcript, excluding introns, including utrs).
|
|
920
|
|
921 This will return undef if this is a pseudogene (i.e. a
|
|
922 transcript with no translation).
|
|
923 Returntype : int
|
|
924 Exceptions : none
|
|
925 Caller : five_prime_utr, get_all_snps, general
|
|
926 Status : Stable
|
|
927
|
|
928 =cut
|
|
929
|
|
930 sub cdna_coding_start {
|
|
931 my $self = shift;
|
|
932
|
|
933 if( @_ ) {
|
|
934 $self->{'cdna_coding_start'} = shift;
|
|
935 }
|
|
936
|
|
937 if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
|
|
938 # calc coding start relative from the start of translation (in cdna coords)
|
|
939 my $start = 0;
|
|
940
|
|
941 my @exons = @{$self->get_all_Exons};
|
|
942 my $exon;
|
|
943
|
|
944 while($exon = shift @exons) {
|
|
945 if($exon == $self->translation->start_Exon) {
|
|
946 #add the utr portion of the start exon
|
|
947 $start += $self->translation->start;
|
|
948 last;
|
|
949 } else {
|
|
950 #add the entire length of this non-coding exon
|
|
951 $start += $exon->length;
|
|
952 }
|
|
953 }
|
|
954
|
|
955 # adjust cdna coords if sequence edits are enabled
|
|
956 if($self->edits_enabled()) {
|
|
957 my @seqeds = @{$self->get_all_SeqEdits()};
|
|
958 # sort in reverse order to avoid adjustment of downstream edits
|
|
959 @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
|
|
960
|
|
961 foreach my $se (@seqeds) {
|
|
962 # use less than start so that start of CDS can be extended
|
|
963 if($se->start() < $start) {
|
|
964 $start += $se->length_diff();
|
|
965 }
|
|
966 }
|
|
967 }
|
|
968
|
|
969 $self->{'cdna_coding_start'} = $start;
|
|
970 }
|
|
971
|
|
972 return $self->{'cdna_coding_start'};
|
|
973 }
|
|
974
|
|
975
|
|
976 =head2 cdna_coding_end
|
|
977
|
|
978 Arg [1] : (optional) $value
|
|
979 Example : $cdna_coding_end = $transcript->cdna_coding_end;
|
|
980 Description: Retrieves the end of the coding region of this transcript in
|
|
981 cdna coordinates (relative to the five prime end of the
|
|
982 transcript, excluding introns, including utrs).
|
|
983
|
|
984 This will return undef if this transcript is a pseudogene
|
|
985 (i.e. a transcript with no translation and therefor no CDS).
|
|
986 Returntype : int
|
|
987 Exceptions : none
|
|
988 Caller : general
|
|
989 Status : Stable
|
|
990
|
|
991 =cut
|
|
992
|
|
993 sub cdna_coding_end {
|
|
994 my $self = shift;
|
|
995
|
|
996 if( @_ ) {
|
|
997 $self->{'cdna_coding_end'} = shift;
|
|
998 }
|
|
999
|
|
1000 if(!defined $self->{'cdna_coding_end'} && defined $self->translation) {
|
|
1001 my @exons = @{$self->get_all_Exons};
|
|
1002
|
|
1003 my $end = 0;
|
|
1004 while(my $exon = shift @exons) {
|
|
1005 if($exon == $self->translation->end_Exon) {
|
|
1006 # add coding portion of the final coding exon
|
|
1007 $end += $self->translation->end;
|
|
1008 last;
|
|
1009 } else {
|
|
1010 # add entire exon
|
|
1011 $end += $exon->length;
|
|
1012 }
|
|
1013 }
|
|
1014
|
|
1015 # adjust cdna coords if sequence edits are enabled
|
|
1016 if($self->edits_enabled()) {
|
|
1017 my @seqeds = @{$self->get_all_SeqEdits()};
|
|
1018 # sort in reverse order to avoid adjustment of downstream edits
|
|
1019 @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
|
|
1020
|
|
1021 foreach my $se (@seqeds) {
|
|
1022 # use less than or equal to end+1 so end of the CDS can be extended
|
|
1023 if($se->start() <= $end + 1) {
|
|
1024 $end += $se->length_diff();
|
|
1025 }
|
|
1026 }
|
|
1027 }
|
|
1028
|
|
1029 $self->{'cdna_coding_end'} = $end;
|
|
1030 }
|
|
1031
|
|
1032 return $self->{'cdna_coding_end'};
|
|
1033 }
|
|
1034
|
|
1035
|
|
1036 =head2 coding_region_start
|
|
1037
|
|
1038 Arg [1] : (optional) $value
|
|
1039 Example : $coding_region_start = $transcript->coding_region_start
|
|
1040 Description: Retrieves the start of the coding region of this transcript
|
|
1041 in genomic coordinates (i.e. in either slice or contig coords).
|
|
1042 By convention, the coding_region_start is always lower than
|
|
1043 the value returned by the coding_end method.
|
|
1044 The value returned by this function is NOT the biological
|
|
1045 coding start since on the reverse strand the biological coding
|
|
1046 start would be the higher genomic value.
|
|
1047
|
|
1048 This function will return undef if this is a pseudogene
|
|
1049 (a non-translated transcript).
|
|
1050 Returntype : int
|
|
1051 Exceptions : none
|
|
1052 Caller : general
|
|
1053 Status : Stable
|
|
1054
|
|
1055 =cut
|
|
1056
|
|
1057 sub coding_region_start {
|
|
1058 my ($self, $value) = @_;
|
|
1059
|
|
1060 if( defined $value ) {
|
|
1061 $self->{'coding_region_start'} = $value;
|
|
1062 } elsif(!defined $self->{'coding_region_start'} &&
|
|
1063 defined $self->translation) {
|
|
1064 #calculate the coding start from the translation
|
|
1065 my $start;
|
|
1066 my $strand = $self->translation()->start_Exon->strand();
|
|
1067 if( $strand == 1 ) {
|
|
1068 $start = $self->translation()->start_Exon->start();
|
|
1069 $start += ( $self->translation()->start() - 1 );
|
|
1070 } else {
|
|
1071 $start = $self->translation()->end_Exon->end();
|
|
1072 $start -= ( $self->translation()->end() - 1 );
|
|
1073 }
|
|
1074 $self->{'coding_region_start'} = $start;
|
|
1075 }
|
|
1076
|
|
1077 return $self->{'coding_region_start'};
|
|
1078 }
|
|
1079
|
|
1080
|
|
1081 =head2 coding_region_end
|
|
1082
|
|
1083 Arg [1] : (optional) $value
|
|
1084 Example : $coding_region_end = $transcript->coding_region_end
|
|
1085 Description: Retrieves the end of the coding region of this transcript
|
|
1086 in genomic coordinates (i.e. in either slice or contig coords).
|
|
1087 By convention, the coding_region_end is always higher than the
|
|
1088 value returned by the coding_region_start method.
|
|
1089 The value returned by this function is NOT the biological
|
|
1090 coding end since on the reverse strand the biological coding
|
|
1091 end would be the lower genomic value.
|
|
1092
|
|
1093 This function will return undef if this is a pseudogene
|
|
1094 (a non-translated transcript).
|
|
1095 Returntype : int
|
|
1096 Exceptions : none
|
|
1097 Caller : general
|
|
1098 Status : Stable
|
|
1099
|
|
1100 =cut
|
|
1101
|
|
1102 sub coding_region_end {
|
|
1103 my ($self, $value ) = @_;
|
|
1104
|
|
1105 my $strand;
|
|
1106 my $end;
|
|
1107
|
|
1108 if( defined $value ) {
|
|
1109 $self->{'coding_region_end'} = $value;
|
|
1110 } elsif( ! defined $self->{'coding_region_end'}
|
|
1111 && defined $self->translation() ) {
|
|
1112 $strand = $self->translation()->start_Exon->strand();
|
|
1113 if( $strand == 1 ) {
|
|
1114 $end = $self->translation()->end_Exon->start();
|
|
1115 $end += ( $self->translation()->end() - 1 );
|
|
1116 } else {
|
|
1117 $end = $self->translation()->start_Exon->end();
|
|
1118 $end -= ( $self->translation()->start() - 1 );
|
|
1119 }
|
|
1120 $self->{'coding_region_end'} = $end;
|
|
1121 }
|
|
1122
|
|
1123 return $self->{'coding_region_end'};
|
|
1124 }
|
|
1125
|
|
1126
|
|
1127 =head2 edits_enabled
|
|
1128
|
|
1129 Arg [1] : (optional) boolean $newval
|
|
1130 Example : $transcript->edits_enabled(1);
|
|
1131 Description: Enables/Disables the application of SeqEdits to this transcript.
|
|
1132 Edits are enabled by default, and affect the cdna/mrna
|
|
1133 sequences coordinates and the resultant translation.
|
|
1134 Returntype : boolean - the current value of the edits
|
|
1135 Exceptions : none
|
|
1136 Caller : general, cdna_coding_start, cdna_coding_end, length
|
|
1137 Status : Stable
|
|
1138
|
|
1139 =cut
|
|
1140
|
|
1141 sub edits_enabled {
|
|
1142 my ( $self, $boolean ) = @_;
|
|
1143
|
|
1144 if ( defined($boolean) ) {
|
|
1145 $self->{'edits_enabled'} = $boolean;
|
|
1146
|
|
1147 # flush cached values that will be different with/without edits
|
|
1148 $self->{'cdna_coding_start'} = undef;
|
|
1149 $self->{'cdna_coding_end'} = undef;
|
|
1150 $self->{'transcript_mapper'} = undef;
|
|
1151 }
|
|
1152
|
|
1153 return $self->{'edits_enabled'};
|
|
1154 }
|
|
1155
|
|
1156
|
|
1157 =head2 get_all_SeqEdits
|
|
1158
|
|
1159 Arg [1] : none
|
|
1160 Example : my @seqeds = @{$transcript->get_all_SeqEdits()};
|
|
1161 Description: Retrieves all post transcriptional sequence modifications for
|
|
1162 this transcript.
|
|
1163 Returntype : Bio::EnsEMBL::SeqEdit
|
|
1164 Exceptions : none
|
|
1165 Caller : spliced_seq()
|
|
1166 Status : Stable
|
|
1167
|
|
1168 =cut
|
|
1169
|
|
1170 sub get_all_SeqEdits {
|
|
1171 my $self = shift;
|
|
1172
|
|
1173 my @seqeds;
|
|
1174
|
|
1175 my $attribs = $self->get_all_Attributes('_rna_edit');
|
|
1176
|
|
1177 # convert attributes to SeqEdit objects
|
|
1178 foreach my $a (@$attribs) {
|
|
1179 push @seqeds, Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $a);
|
|
1180 }
|
|
1181
|
|
1182 return \@seqeds;
|
|
1183 }
|
|
1184
|
|
1185
|
|
1186 =head2 get_all_Attributes
|
|
1187
|
|
1188 Arg [1] : optional string $attrib_code
|
|
1189 The code of the attribute type to retrieve values for.
|
|
1190 Example : ($rna_edits) = @{$transcript->get_all_Attributes('_rna_edit')};
|
|
1191 @transc_attributes = @{$transcript->get_all_Attributes()};
|
|
1192 Description: Gets a list of Attributes of this transcript.
|
|
1193 Optionally just get Attrubutes for given code.
|
|
1194 Returntype : listref Bio::EnsEMBL::Attribute
|
|
1195 Exceptions : warning if transcript does not have attached adaptor and
|
|
1196 attempts lazy load.
|
|
1197 Caller : general
|
|
1198 Status : Stable
|
|
1199
|
|
1200 =cut
|
|
1201
|
|
1202 sub get_all_Attributes {
|
|
1203 my $self = shift;
|
|
1204 my $attrib_code = shift;
|
|
1205
|
|
1206 if( ! exists $self->{'attributes' } ) {
|
|
1207 if(!$self->adaptor() ) {
|
|
1208 return [];
|
|
1209 }
|
|
1210
|
|
1211 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
|
|
1212 $self->{'attributes'} = $attribute_adaptor->fetch_all_by_Transcript($self);
|
|
1213 }
|
|
1214
|
|
1215 if( defined $attrib_code) {
|
|
1216 my @results = grep { uc($_->code()) eq uc($attrib_code) }
|
|
1217 @{$self->{'attributes'}};
|
|
1218 return \@results;
|
|
1219 } else {
|
|
1220 return $self->{'attributes'};
|
|
1221 }
|
|
1222 }
|
|
1223
|
|
1224
|
|
1225 =head2 add_Attributes
|
|
1226
|
|
1227 Arg [1...] : Bio::EnsEMBL::Attribute $attribute
|
|
1228 You can have more Attributes as arguments, all will be added.
|
|
1229 Example : $transcript->add_Attributes($rna_edit_attribute);
|
|
1230 Description: Adds an Attribute to the Transcript. Usefull to do _rna_edits.
|
|
1231 If you add an attribute before you retrieve any from database,
|
|
1232 lazy load will be disabled.
|
|
1233 Returntype : none
|
|
1234 Exceptions : throw on incorrect arguments
|
|
1235 Caller : general
|
|
1236 Status : Stable
|
|
1237
|
|
1238 =cut
|
|
1239
|
|
1240 sub add_Attributes {
|
|
1241 my ( $self, @attribs ) = @_;
|
|
1242
|
|
1243 if ( !exists( $self->{'attributes'} ) ) {
|
|
1244 $self->{'attributes'} = [];
|
|
1245 }
|
|
1246
|
|
1247 my $seq_change = 0;
|
|
1248 foreach my $attrib (@attribs) {
|
|
1249 assert_ref( $attrib, 'Bio::EnsEMBL::Attribute' );
|
|
1250
|
|
1251 push( @{ $self->{'attributes'} }, $attrib );
|
|
1252
|
|
1253 if ( $attrib->code() eq "_rna_edit" ) {
|
|
1254 $seq_change = 1;
|
|
1255 }
|
|
1256 }
|
|
1257
|
|
1258 if ($seq_change) {
|
|
1259 my $translation = $self->translation();
|
|
1260 if ( defined($translation) ) {
|
|
1261 delete( $translation->{'seq'} );
|
|
1262 }
|
|
1263 }
|
|
1264
|
|
1265 # flush cdna coord cache b/c we may have added a SeqEdit
|
|
1266 delete( $self->{'cdna_coding_start'} );
|
|
1267 delete( $self->{'cdna_coding_end'} );
|
|
1268 delete( $self->{'transcript_mapper'} );
|
|
1269 } ## end sub add_Attributes
|
|
1270
|
|
1271
|
|
1272 =head2 add_Exon
|
|
1273
|
|
1274 Title : add_Exon
|
|
1275 Usage : $trans->add_Exon($exon)
|
|
1276 Returns : None
|
|
1277 Args [1]: Bio::EnsEMBL::Exon object to add
|
|
1278 Args [2]: rank
|
|
1279 Exceptions: throws if not a valid Bio::EnsEMBL::Exon
|
|
1280 : or exon clashes with another one
|
|
1281 Status : Stable
|
|
1282
|
|
1283 =cut
|
|
1284
|
|
1285 sub add_Exon {
|
|
1286 my ( $self, $exon, $rank ) = @_;
|
|
1287
|
|
1288 assert_ref( $exon, 'Bio::EnsEMBL::Exon' );
|
|
1289
|
|
1290 $self->{'_trans_exon_array'} ||= [];
|
|
1291
|
|
1292 if ( defined($rank) ) {
|
|
1293 $self->{'_trans_exon_array'}->[ $rank - 1 ] = $exon;
|
|
1294 return;
|
|
1295 }
|
|
1296
|
|
1297 my $was_added = 0;
|
|
1298
|
|
1299 my $ea = $self->{'_trans_exon_array'};
|
|
1300
|
|
1301 if ( @{$ea} ) {
|
|
1302 if ( $exon->strand() == 1 ) {
|
|
1303
|
|
1304 my $exon_start = $exon->start();
|
|
1305
|
|
1306 if ( $exon_start > $ea->[-1]->end() ) {
|
|
1307 push( @{$ea}, $exon );
|
|
1308 $was_added = 1;
|
|
1309 } else {
|
|
1310 # Insert it at correct place
|
|
1311
|
|
1312 my $i = 0;
|
|
1313 foreach my $e ( @{$ea} ) {
|
|
1314 if ( $exon_start < $e->start() ) {
|
|
1315 if ( $exon->end() >= $e->start() ) {
|
|
1316 # Overlap
|
|
1317 last;
|
|
1318 }
|
|
1319 if ( $i and $exon_start <= $ea->[$i-1]->end() ) {
|
|
1320 # Overlap
|
|
1321 last;
|
|
1322 }
|
|
1323 splice( @{$ea}, $i, 0, $exon );
|
|
1324 $was_added = 1;
|
|
1325 last;
|
|
1326 }
|
|
1327 ++$i;
|
|
1328 }
|
|
1329
|
|
1330 }
|
|
1331
|
|
1332 } else {
|
|
1333
|
|
1334 my $exon_end = $exon->end();
|
|
1335
|
|
1336 if ( $exon_end < $ea->[-1]->start() ) {
|
|
1337 push( @{$ea}, $exon );
|
|
1338 $was_added = 1;
|
|
1339 } else {
|
|
1340 # Insert it at correct place
|
|
1341
|
|
1342 my $i = 0;
|
|
1343 foreach my $e ( @{$ea} ) {
|
|
1344 if ( $exon_end > $e->end() ) {
|
|
1345 if ( $exon->start() <= $e->end() ) {
|
|
1346 # Overlap
|
|
1347 last;
|
|
1348 }
|
|
1349 if ( $i and $exon_end >= $ea->[$i-1]->start() ) {
|
|
1350 # Overlap
|
|
1351 last;
|
|
1352 }
|
|
1353 splice( @{$ea}, $i, 0, $exon );
|
|
1354 $was_added = 1;
|
|
1355 last;
|
|
1356 }
|
|
1357 ++$i;
|
|
1358 }
|
|
1359
|
|
1360 }
|
|
1361
|
|
1362 } ## end else [ if ( $exon->strand() ==...)]
|
|
1363 } else {
|
|
1364 push( @{$ea}, $exon );
|
|
1365 $was_added = 1;
|
|
1366 }
|
|
1367
|
|
1368 # sanity check:
|
|
1369 if ( !$was_added ) {
|
|
1370 # The exon was not added because it was overloapping with an
|
|
1371 # existing exon.
|
|
1372 my $all_str = '';
|
|
1373
|
|
1374 foreach my $e ( @{$ea} ) {
|
|
1375 $all_str .= ' '
|
|
1376 . $e->start() . '-'
|
|
1377 . $e->end() . ' ('
|
|
1378 . $e->strand() . ') '
|
|
1379 . ( $e->stable_id() || '' ) . "\n";
|
|
1380 }
|
|
1381
|
|
1382 my $cur_str = ' '
|
|
1383 . $exon->start() . '-'
|
|
1384 . $exon->end() . ' ('
|
|
1385 . $exon->strand() . ') '
|
|
1386 . ( $exon->stable_id() || '' ) . "\n";
|
|
1387
|
|
1388 throw( "Exon overlaps with other exon in same transcript.\n"
|
|
1389 . "Transcript Exons:\n$all_str\n"
|
|
1390 . "This Exon:\n$cur_str" );
|
|
1391 }
|
|
1392
|
|
1393 # recalculate start, end, slice, strand
|
|
1394 $self->recalculate_coordinates();
|
|
1395 } ## end sub add_Exon
|
|
1396
|
|
1397
|
|
1398 =head2 get_all_Exons
|
|
1399
|
|
1400 Arg [CONSTITUTIVE] : Boolean
|
|
1401 Only return constitutive exons if true (non-zero)
|
|
1402
|
|
1403 Examples : my @exons = @{ $transcript->get_all_Exons() };
|
|
1404
|
|
1405 my @exons =
|
|
1406 @{ $transcript->get_all_Exons( -constitutive => 1 ) };
|
|
1407
|
|
1408 Description: Returns an listref of the exons in this transcript
|
|
1409 in order, i.e. the first exon in the listref is the
|
|
1410 5prime most exon in the transcript. Only returns
|
|
1411 constitutive exons if the CONSTITUTIVE argument is
|
|
1412 true.
|
|
1413
|
|
1414 Returntype : listref to Bio::EnsEMBL::Exon objects
|
|
1415 Exceptions : none
|
|
1416 Caller : general
|
|
1417 Status : Stable
|
|
1418
|
|
1419 =cut
|
|
1420
|
|
1421 sub get_all_Exons {
|
|
1422 my ( $self, @args ) = @_;
|
|
1423
|
|
1424 my $constitutive;
|
|
1425 if (@args) {
|
|
1426 $constitutive = rearrange( ['CONSTITUTIVE'], @args );
|
|
1427 }
|
|
1428
|
|
1429 if (!defined( $self->{'_trans_exon_array'} )
|
|
1430 && defined( $self->adaptor() ) )
|
|
1431 {
|
|
1432 $self->{'_trans_exon_array'} =
|
|
1433 $self->adaptor()->db()->get_ExonAdaptor()
|
|
1434 ->fetch_all_by_Transcript($self);
|
|
1435 }
|
|
1436
|
|
1437 my @result;
|
|
1438 if ( defined($constitutive) && $constitutive != 0 ) {
|
|
1439 foreach my $exon ( @{ $self->{'_trans_exon_array'} } ) {
|
|
1440 if ( $exon->is_constitutive() ) {
|
|
1441 push( @result, $exon );
|
|
1442 }
|
|
1443 }
|
|
1444 } else {
|
|
1445 @result = @{ $self->{'_trans_exon_array'} };
|
|
1446 }
|
|
1447
|
|
1448 return \@result;
|
|
1449 } ## end sub get_all_Exons
|
|
1450
|
|
1451 =head2 get_all_constitutive_Exons
|
|
1452
|
|
1453 Arg : None
|
|
1454
|
|
1455 Examples : my @exons = @{ $transcript->get_all_constitutive_Exons() };
|
|
1456
|
|
1457 Description: Returns an listref of the constitutive exons in this
|
|
1458 transcript in order, i.e. the first exon in the
|
|
1459 listref is the 5prime most exon in the transcript.
|
|
1460
|
|
1461 Returntype : listref to Bio::EnsEMBL::Exon objects
|
|
1462 Exceptions : none
|
|
1463 Caller : general
|
|
1464 Status : Stable
|
|
1465
|
|
1466 =cut
|
|
1467
|
|
1468 sub get_all_constitutive_Exons {
|
|
1469 my ($self) = @_;
|
|
1470 return $self->get_all_Exons( '-constitutive' => 1 );
|
|
1471 }
|
|
1472
|
|
1473 =head2 get_all_IntronSupportingEvidence
|
|
1474
|
|
1475 Example : $ise->get_all_IntronSupportingEvidence();
|
|
1476 Description : Fetches all ISE instances linked to this Transript
|
|
1477 Returntype : ArrayRef[Bio::EnsEMBL::IntronSupportEvidence] retrieved from
|
|
1478 the DB or from those added via C<add_IntronSupportingEvidence>
|
|
1479 Exceptions : None
|
|
1480
|
|
1481 =cut
|
|
1482
|
|
1483 sub get_all_IntronSupportingEvidence {
|
|
1484 my ($self) = @_;
|
|
1485 if(! defined $self->{_ise_array} && defined $self->adaptor()) {
|
|
1486 my $isea = $self->adaptor()->db()->get_IntronSupportingEvidenceAdaptor();
|
|
1487 $self->{_ise_array} = $isea->fetch_all_by_Transcript($self);
|
|
1488 }
|
|
1489 return $self->{_ise_array};
|
|
1490 }
|
|
1491
|
|
1492
|
|
1493 =head2 add_IntronSupportingEvidence
|
|
1494
|
|
1495 Arg [1] : Bio::EnsEMBL::IntronSupportEvidence Object to add
|
|
1496 Example : $ise->add_IntronSupportingEvidence($ise);
|
|
1497 Description : Adds the IntronSupportEvidence instance to this Transcript. The
|
|
1498 code checks to see if it is a unique ISE instance
|
|
1499 Returntype : Boolean; true means it was added. False means it was not
|
|
1500 as this ISE was already attached
|
|
1501 Exceptions : None
|
|
1502
|
|
1503 =cut
|
|
1504
|
|
1505 sub add_IntronSupportingEvidence {
|
|
1506 my ($self, $ise) = @_;
|
|
1507 assert_ref($ise, 'Bio::EnsEMBL::IntronSupportingEvidence', 'IntronSupportingEvidence');
|
|
1508 my $unique = 1;
|
|
1509 foreach my $other_ise (@{$self->{_ise_array}}) {
|
|
1510 if($ise->equals($other_ise)) {
|
|
1511 $unique = 0;
|
|
1512 last;
|
|
1513 }
|
|
1514 }
|
|
1515 if($unique) {
|
|
1516 push(@{$self->{_ise_array}}, $ise);
|
|
1517 return 1;
|
|
1518 }
|
|
1519 return 0;
|
|
1520 }
|
|
1521
|
|
1522 =head2 get_all_Introns
|
|
1523
|
|
1524 Arg [1] : none
|
|
1525 Example : my @introns = @{$transcript->get_all_Introns()};
|
|
1526 Description: Returns an listref of the introns in this transcript in order.
|
|
1527 i.e. the first intron in the listref is the 5prime most exon in
|
|
1528 the transcript.
|
|
1529 Returntype : listref to Bio::EnsEMBL::Intron objects
|
|
1530 Exceptions : none
|
|
1531 Caller : general
|
|
1532 Status : Stable
|
|
1533
|
|
1534 =cut
|
|
1535
|
|
1536 sub get_all_Introns {
|
|
1537 my ($self) = @_;
|
|
1538 if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
|
|
1539 $self->{'_trans_exon_array'} = $self->adaptor()->db()->
|
|
1540 get_ExonAdaptor()->fetch_all_by_Transcript( $self );
|
|
1541 }
|
|
1542
|
|
1543 my @introns=();
|
|
1544 my @exons = @{$self->{'_trans_exon_array'}};
|
|
1545 for(my $i=0; $i < scalar(@exons)-1; $i++){
|
|
1546 my $intron = new Bio::EnsEMBL::Intron($exons[$i],$exons[$i+1]);
|
|
1547 push(@introns, $intron)
|
|
1548 }
|
|
1549 return \@introns;
|
|
1550 }
|
|
1551
|
|
1552
|
|
1553 =head2 length
|
|
1554
|
|
1555 Args : none
|
|
1556 Example : my $t_length = $transcript->length
|
|
1557 Description: Returns the sum of the length of all the exons in the transcript.
|
|
1558 Returntype : int
|
|
1559 Exceptions : none
|
|
1560 Caller : general
|
|
1561 Status : Stable
|
|
1562
|
|
1563 =cut
|
|
1564
|
|
1565 sub length {
|
|
1566 my( $self ) = @_;
|
|
1567
|
|
1568 my $length = 0;
|
|
1569 foreach my $ex (@{$self->get_all_Exons}) {
|
|
1570 $length += $ex->length;
|
|
1571 }
|
|
1572
|
|
1573 # adjust the length if post transcriptional edits are enabled
|
|
1574 if($self->edits_enabled()) {
|
|
1575 foreach my $se (@{$self->get_all_SeqEdits()}) {
|
|
1576 $length += $se->length_diff();
|
|
1577 }
|
|
1578 }
|
|
1579
|
|
1580 return $length;
|
|
1581 }
|
|
1582
|
|
1583
|
|
1584 =head2 flush_Exons
|
|
1585
|
|
1586 Arg [1] : none
|
|
1587 Example : $transcript->flush_Exons();
|
|
1588 Description: Removes all Exons from this transcript and flushes related
|
|
1589 internal caches.
|
|
1590 Returntype : none
|
|
1591 Exceptions : none
|
|
1592 Caller : general
|
|
1593 Status : Stable
|
|
1594
|
|
1595 =cut
|
|
1596
|
|
1597 sub flush_Exons {
|
|
1598 my ($self) = @_;
|
|
1599
|
|
1600 $self->{'transcript_mapper'} = undef;
|
|
1601 $self->{'coding_region_start'} = undef;
|
|
1602 $self->{'coding_region_end'} = undef;
|
|
1603 $self->{'cdna_coding_start'} = undef;
|
|
1604 $self->{'cdna_coding_end'} = undef;
|
|
1605 $self->{'start'} = undef;
|
|
1606 $self->{'end'} = undef;
|
|
1607 $self->{'strand'} = undef;
|
|
1608
|
|
1609 $self->{'_trans_exon_array'} = [];
|
|
1610 }
|
|
1611
|
|
1612 =head2 flush_IntronSupportingEvidence
|
|
1613
|
|
1614 Example : $transcript->flush_IntronSupportingEvidence();
|
|
1615 Description: Removes all IntronSupportingEvidence from this transcript
|
|
1616 Returntype : none
|
|
1617 Exceptions : none
|
|
1618 Caller : general
|
|
1619 Status : Stable
|
|
1620
|
|
1621 =cut
|
|
1622
|
|
1623 sub flush_IntronSupportingEvidence {
|
|
1624 my ($self) = @_;
|
|
1625 $self->{_ise_array} = [];
|
|
1626 return;
|
|
1627 }
|
|
1628
|
|
1629 =head2 five_prime_utr
|
|
1630
|
|
1631 Arg [1] : none
|
|
1632 Example : my $five_prime = $transcrpt->five_prime_utr
|
|
1633 or warn "No five prime UTR";
|
|
1634 Description: Obtains a Bio::Seq object of the five prime UTR of this
|
|
1635 transcript. If this transcript is a pseudogene
|
|
1636 (i.e. non-translating) or has no five prime UTR undef is
|
|
1637 returned instead.
|
|
1638 Returntype : Bio::Seq or undef
|
|
1639 Exceptions : none
|
|
1640 Caller : general
|
|
1641 Status : Stable
|
|
1642
|
|
1643 =cut
|
|
1644
|
|
1645 sub five_prime_utr {
|
|
1646 my $self = shift;
|
|
1647
|
|
1648 my $cdna_coding_start = $self->cdna_coding_start();
|
|
1649
|
|
1650 return undef if(!$cdna_coding_start);
|
|
1651
|
|
1652 my $seq = substr($self->spliced_seq, 0, $cdna_coding_start - 1);
|
|
1653
|
|
1654 return undef if(!$seq);
|
|
1655
|
|
1656 return
|
|
1657 Bio::Seq->new( -id => $self->display_id,
|
|
1658 -moltype => 'dna',
|
|
1659 -alphabet => 'dna',
|
|
1660 -seq => $seq );
|
|
1661 }
|
|
1662
|
|
1663
|
|
1664 =head2 three_prime_utr
|
|
1665
|
|
1666 Arg [1] : none
|
|
1667 Example : my $three_prime = $transcrpt->three_prime_utr
|
|
1668 or warn "No three prime UTR";
|
|
1669 Description: Obtains a Bio::Seq object of the three prime UTR of this
|
|
1670 transcript. If this transcript is a pseudogene
|
|
1671 (i.e. non-translating) or has no three prime UTR,
|
|
1672 undef is returned instead.
|
|
1673 Returntype : Bio::Seq or undef
|
|
1674 Exceptions : none
|
|
1675 Caller : general
|
|
1676 Status : Stable
|
|
1677
|
|
1678 =cut
|
|
1679
|
|
1680 sub three_prime_utr {
|
|
1681 my $self = shift;
|
|
1682
|
|
1683 my $cdna_coding_end = $self->cdna_coding_end();
|
|
1684
|
|
1685 return undef if(!$cdna_coding_end);
|
|
1686
|
|
1687 my $seq = substr($self->spliced_seq, $cdna_coding_end);
|
|
1688
|
|
1689 return undef if(!$seq);
|
|
1690
|
|
1691 return
|
|
1692 Bio::Seq->new( -id => $self->display_id,
|
|
1693 -moltype => 'dna',
|
|
1694 -alphabet => 'dna',
|
|
1695 -seq => $seq );
|
|
1696 }
|
|
1697
|
|
1698 =head2 five_prime_utr_Feature
|
|
1699
|
|
1700 Example : my $five_prime = $transcrpt->five_prime_utr_Feature
|
|
1701 or warn "No five prime UTR";
|
|
1702 Description: Returns the genomic coordinates of the start and end of the
|
|
1703 5' UTR of this transcript. Note that if you want the sequence
|
|
1704 of the 5' UTR use C<five_prime_utr> as this will return the
|
|
1705 sequence from the spliced transcript.
|
|
1706 Returntype : Bio::EnsEMBL::Feature or undef if there is no UTR
|
|
1707 Exceptions : none
|
|
1708
|
|
1709 =cut
|
|
1710
|
|
1711 sub five_prime_utr_Feature {
|
|
1712 my ($self) = @_;
|
|
1713 my ($start, $end);
|
|
1714 my $cdna_coding = $self->cdna_coding_start();
|
|
1715 my ($genomic_pos) = $self->cdna2genomic($cdna_coding, $cdna_coding);
|
|
1716 if($self->strand() == 1) {
|
|
1717 $start = $self->seq_region_start();
|
|
1718 if($start == $genomic_pos->start()) {
|
|
1719 return; # just return as we have no UTR
|
|
1720 }
|
|
1721 $end = $genomic_pos->start() - 1;
|
|
1722 }
|
|
1723 else {
|
|
1724 $end = $self->seq_region_end();
|
|
1725 if($end == $genomic_pos->start()) {
|
|
1726 return; # just return as we have no UTR
|
|
1727 }
|
|
1728 $start = $genomic_pos->start() + 1;
|
|
1729 }
|
|
1730
|
|
1731 my $feature = Bio::EnsEMBL::Feature->new(
|
|
1732 -START => $start,
|
|
1733 -END => $end,
|
|
1734 -STRAND => $self->strand(),
|
|
1735 -SLICE => $self->slice(),
|
|
1736 );
|
|
1737 return $feature;
|
|
1738 }
|
|
1739
|
|
1740 =head2 three_prime_utr_Feature
|
|
1741
|
|
1742 Example : my $five_prime = $transcrpt->three_prime_utr_Feature
|
|
1743 or warn "No three prime UTR";
|
|
1744 Description: Returns the genomic coordinates of the start and end of the
|
|
1745 3' UTR of this transcript. Note that if you want the sequence
|
|
1746 of the 3' UTR use C<three_prime_utr> as this will return the
|
|
1747 sequence from the spliced transcript.
|
|
1748 Returntype : Bio::EnsEMBL::Feature or undef if there is no UTR
|
|
1749 Exceptions : none
|
|
1750
|
|
1751 =cut
|
|
1752
|
|
1753 sub three_prime_utr_Feature {
|
|
1754 my ($self) = @_;
|
|
1755 my ($start, $end);
|
|
1756 my $cdna_coding = $self->cdna_coding_end();
|
|
1757 my ($genomic_pos) = $self->cdna2genomic($cdna_coding, $cdna_coding);
|
|
1758 if($self->strand() == 1) {
|
|
1759 $end = $self->seq_region_end();
|
|
1760 if($end == $genomic_pos->start()) {
|
|
1761 return; # just return as we have no UTR
|
|
1762 }
|
|
1763 $start = $genomic_pos->start() + 1;
|
|
1764 }
|
|
1765 else {
|
|
1766 $start = $self->seq_region_start();
|
|
1767 if($start == $genomic_pos->start()) {
|
|
1768 return; # just return as we have no UTR
|
|
1769 }
|
|
1770 $end = $genomic_pos->start() - 1;
|
|
1771 }
|
|
1772 my $feature = Bio::EnsEMBL::Feature->new(
|
|
1773 -START => $start,
|
|
1774 -END => $end,
|
|
1775 -STRAND => $self->strand(),
|
|
1776 -SLICE => $self->slice(),
|
|
1777 );
|
|
1778 return $feature;
|
|
1779 }
|
|
1780
|
|
1781
|
|
1782 =head2 get_all_translateable_Exons
|
|
1783
|
|
1784 Args : none
|
|
1785 Example : none
|
|
1786 Description: Returns a list of exons that translate with the
|
|
1787 start and end exons truncated to the CDS regions.
|
|
1788 This function does not take into account any SeqEdits
|
|
1789 (post transcriptional RNA modifictions) when constructing the
|
|
1790 the 'translateable' exons, and it does not update the phase
|
|
1791 information of the created 'translateable' exons.
|
|
1792
|
|
1793 If this transcript is a pseudogene (i.e. non-translateable)
|
|
1794 a reference to an empty list is returned.
|
|
1795
|
|
1796 Returntype : listref Bio::EnsEMBL::Exon
|
|
1797 Exceptions : throw if translation has invalid information
|
|
1798 Caller : Genebuild
|
|
1799 Status : Stable
|
|
1800
|
|
1801 =cut
|
|
1802
|
|
1803
|
|
1804 sub get_all_translateable_Exons {
|
|
1805 my ( $self ) = @_;
|
|
1806
|
|
1807 #return an empty list if there is no translation (i.e. pseudogene)
|
|
1808 my $translation = $self->translation or return [];
|
|
1809 my $start_exon = $translation->start_Exon;
|
|
1810 my $end_exon = $translation->end_Exon;
|
|
1811 my $t_start = $translation->start;
|
|
1812 my $t_end = $translation->end;
|
|
1813
|
|
1814 my( @translateable );
|
|
1815
|
|
1816 foreach my $ex (@{$self->get_all_Exons}) {
|
|
1817
|
|
1818 if ($ex ne $start_exon and ! @translateable) {
|
|
1819 next; # Not yet in translated region
|
|
1820 }
|
|
1821
|
|
1822 my $length = $ex->length;
|
|
1823
|
|
1824 my $adjust_start = 0;
|
|
1825 my $adjust_end = 0;
|
|
1826 # Adjust to translation start if this is the start exon
|
|
1827 if ($ex == $start_exon ) {
|
|
1828 if ($t_start < 1 or $t_start > $length) {
|
|
1829 warning("WARN: Translation start '$t_start' is outside exon $ex length=$length");
|
|
1830 return [];
|
|
1831 }
|
|
1832 $adjust_start = $t_start - 1;
|
|
1833 }
|
|
1834
|
|
1835 # Adjust to translation end if this is the end exon
|
|
1836 if ($ex == $end_exon) {
|
|
1837 # if ($t_end < 1 or $t_end > $length) {
|
|
1838 # throw("Translation end '$t_end' is outside exon $ex length=$length");
|
|
1839 # }
|
|
1840 $adjust_end = $t_end - $length;
|
|
1841 }
|
|
1842
|
|
1843 # Make a truncated exon if the translation start or
|
|
1844 # end causes the coordinates to be altered.
|
|
1845 if ($adjust_end || $adjust_start) {
|
|
1846 my $newex = $ex->adjust_start_end( $adjust_start, $adjust_end );
|
|
1847
|
|
1848 push( @translateable, $newex );
|
|
1849 } else {
|
|
1850 push(@translateable, $ex);
|
|
1851 }
|
|
1852
|
|
1853 # Exit the loop when we've found the last exon
|
|
1854 last if $ex eq $end_exon;
|
|
1855 }
|
|
1856 return \@translateable;
|
|
1857 }
|
|
1858
|
|
1859
|
|
1860 =head2 translate
|
|
1861
|
|
1862 Args : none
|
|
1863 Example : none
|
|
1864 Description: Return the peptide (plus eventual stop codon) for
|
|
1865 this transcript. Does N-padding of non-phase
|
|
1866 matching exons. It uses translateable_seq
|
|
1867 internally. Returns undef if this Transcript does
|
|
1868 not have a translation (i.e. pseudogene).
|
|
1869 Returntype : Bio::Seq or undef
|
|
1870 Exceptions : none
|
|
1871 Caller : general
|
|
1872 Status : Stable
|
|
1873
|
|
1874 =cut
|
|
1875
|
|
1876 sub translate {
|
|
1877 my ($self) = @_;
|
|
1878
|
|
1879 if ( !defined( $self->translation() ) ) { return undef }
|
|
1880
|
|
1881 my $mrna = $self->translateable_seq();
|
|
1882
|
|
1883 # Alternative codon tables (such as the mitochondrial codon table)
|
|
1884 # can be specified for a sequence region via the seq_region_attrib
|
|
1885 # table. A list of codon tables and their codes is at:
|
|
1886 # http://www.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c
|
|
1887
|
|
1888 my $codon_table_id;
|
|
1889 my ( $complete5, $complete3 );
|
|
1890 if ( defined( $self->slice() ) ) {
|
|
1891 my $attrib;
|
|
1892
|
|
1893 ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
|
|
1894 if ( defined($attrib) ) {
|
|
1895 $codon_table_id = $attrib->value();
|
|
1896 }
|
|
1897
|
|
1898 ($attrib) = @{ $self->slice()->get_all_Attributes('complete5') };
|
|
1899 if ( defined($attrib) ) {
|
|
1900 $complete5 = $attrib->value();
|
|
1901 }
|
|
1902
|
|
1903 ($attrib) = @{ $self->slice()->get_all_Attributes('complete3') };
|
|
1904 if ( defined($attrib) ) {
|
|
1905 $complete3 = $attrib->value();
|
|
1906 }
|
|
1907 }
|
|
1908 $codon_table_id ||= 1; # default vertebrate codon table
|
|
1909
|
|
1910 # Remove final stop codon from the mrna if it is present. Produced
|
|
1911 # peptides will not have '*' at end. If terminal stop codon is
|
|
1912 # desired call translatable_seq directly and produce a translation
|
|
1913 # from it.
|
|
1914
|
|
1915 if ( CORE::length($mrna) % 3 == 0 ) {
|
|
1916 my $codon_table =
|
|
1917 Bio::Tools::CodonTable->new( -id => $codon_table_id );
|
|
1918
|
|
1919 if ( $codon_table->is_ter_codon( substr( $mrna, -3, 3 ) ) ) {
|
|
1920 substr( $mrna, -3, 3, '' );
|
|
1921 }
|
|
1922 }
|
|
1923
|
|
1924 if ( CORE::length($mrna) < 1 ) { return undef }
|
|
1925
|
|
1926 my $display_id = $self->translation->display_id()
|
|
1927 || scalar( $self->translation() );
|
|
1928
|
|
1929 my $peptide = Bio::Seq->new( -seq => $mrna,
|
|
1930 -moltype => 'dna',
|
|
1931 -alphabet => 'dna',
|
|
1932 -id => $display_id );
|
|
1933
|
|
1934 my $translation =
|
|
1935 $peptide->translate( undef, undef, undef, $codon_table_id, undef,
|
|
1936 undef, $complete5, $complete3 );
|
|
1937
|
|
1938 if ( $self->edits_enabled() ) {
|
|
1939 $self->translation()->modify_translation($translation);
|
|
1940 }
|
|
1941
|
|
1942 return $translation;
|
|
1943 } ## end sub translate
|
|
1944
|
|
1945
|
|
1946 =head2 seq
|
|
1947
|
|
1948 Description: Returns a Bio::Seq object which consists of just
|
|
1949 : the sequence of the exons concatenated together,
|
|
1950 : without messing about with padding with N\'s from
|
|
1951 : Exon phases like B<dna_seq> does.
|
|
1952 Args : none
|
|
1953 Example : none
|
|
1954 Returntype : Bio::Seq
|
|
1955 Exceptions : none
|
|
1956 Caller : general
|
|
1957 Status : Stable
|
|
1958
|
|
1959 =cut
|
|
1960
|
|
1961 sub seq {
|
|
1962 my ($self) = @_;
|
|
1963
|
|
1964 return
|
|
1965 Bio::Seq->new( -id => $self->display_id,
|
|
1966 -moltype => 'dna',
|
|
1967 -alphabet => 'dna',
|
|
1968 -seq => $self->spliced_seq );
|
|
1969 }
|
|
1970
|
|
1971
|
|
1972 =head2 pep2genomic
|
|
1973
|
|
1974 Description: See Bio::EnsEMBL::TranscriptMapper::pep2genomic
|
|
1975
|
|
1976 =cut
|
|
1977
|
|
1978 sub pep2genomic {
|
|
1979 my $self = shift;
|
|
1980 return $self->get_TranscriptMapper()->pep2genomic(@_);
|
|
1981 }
|
|
1982
|
|
1983
|
|
1984 =head2 genomic2pep
|
|
1985
|
|
1986 Description: See Bio::EnsEMBL::TranscriptMapper::genomic2pep
|
|
1987
|
|
1988 =cut
|
|
1989
|
|
1990 sub genomic2pep {
|
|
1991 my $self = shift;
|
|
1992 return $self->get_TranscriptMapper()->genomic2pep(@_);
|
|
1993 }
|
|
1994
|
|
1995
|
|
1996 =head2 cdna2genomic
|
|
1997
|
|
1998 Description: See Bio::EnsEMBL::TranscriptMapper::cdna2genomic
|
|
1999
|
|
2000 =cut
|
|
2001
|
|
2002 sub cdna2genomic {
|
|
2003 my $self = shift;
|
|
2004 return $self->get_TranscriptMapper()->cdna2genomic(@_);
|
|
2005 }
|
|
2006
|
|
2007
|
|
2008 =head2 genomic2cdna
|
|
2009
|
|
2010 Description: See Bio::EnsEMBL::TranscriptMapper::genomic2cdna
|
|
2011
|
|
2012 =cut
|
|
2013
|
|
2014 sub genomic2cdna {
|
|
2015 my $self = shift;
|
|
2016 return $self->get_TranscriptMapper->genomic2cdna(@_);
|
|
2017 }
|
|
2018
|
|
2019
|
|
2020 =head2 get_TranscriptMapper
|
|
2021
|
|
2022 Args : none
|
|
2023 Example : my $trans_mapper = $transcript->get_TranscriptMapper();
|
|
2024 Description: Gets a TranscriptMapper object which can be used to perform
|
|
2025 a variety of coordinate conversions relating this transcript,
|
|
2026 genomic sequence and peptide resulting from this transcripts
|
|
2027 translation.
|
|
2028 Returntype : Bio::EnsEMBL::TranscriptMapper
|
|
2029 Exceptions : none
|
|
2030 Caller : cdna2genomic, pep2genomic, genomic2cdna, cdna2genomic
|
|
2031 Status : Stable
|
|
2032
|
|
2033 =cut
|
|
2034
|
|
2035 sub get_TranscriptMapper {
|
|
2036 my ( $self ) = @_;
|
|
2037 return $self->{'transcript_mapper'} ||=
|
|
2038 Bio::EnsEMBL::TranscriptMapper->new($self);
|
|
2039 }
|
|
2040
|
|
2041
|
|
2042 =head2 start_Exon
|
|
2043
|
|
2044 Title : start_Exon
|
|
2045 Usage : $start_exon = $transcript->start_Exon;
|
|
2046 Returntype : Bio::EnsEMBL::Exon
|
|
2047 Description : The first exon in the transcript.
|
|
2048 Args : NONE
|
|
2049 Status : Stable
|
|
2050
|
|
2051 =cut
|
|
2052
|
|
2053 sub start_Exon {
|
|
2054 my $self = shift;
|
|
2055 return $self->get_all_Exons()->[0];
|
|
2056 }
|
|
2057
|
|
2058
|
|
2059 =head2 end_Exon
|
|
2060
|
|
2061 Title : end_exon
|
|
2062 Usage : $end_exon = $transcript->end_Exon;
|
|
2063 Description : The last exon in the transcript.
|
|
2064 Returntype : Bio::EnsEMBL::Exon
|
|
2065 Args : NONE
|
|
2066 Status : Stable
|
|
2067
|
|
2068 =cut
|
|
2069
|
|
2070 sub end_Exon {
|
|
2071 my $self = shift;
|
|
2072 return $self->get_all_Exons()->[-1];
|
|
2073 }
|
|
2074
|
|
2075
|
|
2076 =head2 description
|
|
2077
|
|
2078 Title : description
|
|
2079 Usage : $obj->description($newval)
|
|
2080 Function:
|
|
2081 Returns : String
|
|
2082 Args : newvalue (optional)
|
|
2083 Status : Stable
|
|
2084
|
|
2085 =cut
|
|
2086
|
|
2087 sub description {
|
|
2088 my $self = shift;
|
|
2089 $self->{'description'} = shift if( @_ );
|
|
2090 return $self->{'description'};
|
|
2091 }
|
|
2092
|
|
2093
|
|
2094 =head2 version
|
|
2095
|
|
2096 Title : version
|
|
2097 Usage : $obj->version()
|
|
2098 Function:
|
|
2099 Returns : String
|
|
2100 Args :
|
|
2101 Status : Stable
|
|
2102
|
|
2103 =cut
|
|
2104
|
|
2105 sub version {
|
|
2106 my $self = shift;
|
|
2107 $self->{'version'} = shift if( @_ );
|
|
2108 return $self->{'version'};
|
|
2109 }
|
|
2110
|
|
2111
|
|
2112 =head2 stable_id
|
|
2113
|
|
2114 Title : stable_id
|
|
2115 Usage : $obj->stable_id
|
|
2116 Function:
|
|
2117 Returns : String
|
|
2118 Args :
|
|
2119 Status : Stable
|
|
2120
|
|
2121 =cut
|
|
2122
|
|
2123 sub stable_id {
|
|
2124 my $self = shift;
|
|
2125 $self->{'stable_id'} = shift if( @_ );
|
|
2126 return $self->{'stable_id'};
|
|
2127 }
|
|
2128
|
|
2129
|
|
2130 =head2 is_current
|
|
2131
|
|
2132 Arg [1] : Boolean $is_current
|
|
2133 Example : $transcript->is_current(1)
|
|
2134 Description: Getter/setter for is_current state of this transcript.
|
|
2135 Returntype : Int
|
|
2136 Exceptions : none
|
|
2137 Caller : general
|
|
2138 Status : Stable
|
|
2139
|
|
2140 =cut
|
|
2141
|
|
2142 sub is_current {
|
|
2143 my $self = shift;
|
|
2144 $self->{'is_current'} = shift if (@_);
|
|
2145 return $self->{'is_current'};
|
|
2146 }
|
|
2147
|
|
2148
|
|
2149 =head2 created_date
|
|
2150
|
|
2151 Arg [1] : (optional) string to be used for the created date
|
|
2152 Example : none
|
|
2153 Description: get/set for attribute created date
|
|
2154 Returntype : string
|
|
2155 Exceptions : none
|
|
2156 Caller : general
|
|
2157 Status : Stable
|
|
2158
|
|
2159 =cut
|
|
2160
|
|
2161 sub created_date {
|
|
2162 my $self = shift;
|
|
2163 $self->{'created_date'} = shift if ( @_ );
|
|
2164 return $self->{'created_date'};
|
|
2165 }
|
|
2166
|
|
2167
|
|
2168 =head2 modified_date
|
|
2169
|
|
2170 Arg [1] : (optional) string to be used for the modified date
|
|
2171 Example : none
|
|
2172 Description: get/set for attribute modified date
|
|
2173 Returntype : string
|
|
2174 Exceptions : none
|
|
2175 Caller : general
|
|
2176 Status : Stable
|
|
2177
|
|
2178 =cut
|
|
2179
|
|
2180 sub modified_date {
|
|
2181 my $self = shift;
|
|
2182 $self->{'modified_date'} = shift if ( @_ );
|
|
2183 return $self->{'modified_date'};
|
|
2184 }
|
|
2185
|
|
2186
|
|
2187 =head2 swap_exons
|
|
2188
|
|
2189 Arg [1] : Bio::EnsEMBL::Exon $old_Exon
|
|
2190 An exon that should be replaced
|
|
2191 Arg [2] : Bio::EnsEMBL::Exon $new_Exon
|
|
2192 The replacement Exon
|
|
2193 Example : none
|
|
2194 Description: exchange an exon in the current Exon list with a given one.
|
|
2195 Usually done before storing of Gene, so the Exons can
|
|
2196 be shared between Transcripts.
|
|
2197 Returntype : none
|
|
2198 Exceptions : none
|
|
2199 Caller : GeneAdaptor->store()
|
|
2200 Status : Stable
|
|
2201
|
|
2202 =cut
|
|
2203
|
|
2204 sub swap_exons {
|
|
2205 my ( $self, $old_exon, $new_exon ) = @_;
|
|
2206
|
|
2207 my $arref = $self->{'_trans_exon_array'};
|
|
2208 for(my $i = 0; $i < @$arref; $i++) {
|
|
2209 if($arref->[$i] == $old_exon) {
|
|
2210 $arref->[$i] = $new_exon;
|
|
2211 last;
|
|
2212 }
|
|
2213 }
|
|
2214
|
|
2215 if( defined $self->{'translation'} ) {
|
|
2216 if( $self->translation()->start_Exon() == $old_exon ) {
|
|
2217 $self->translation()->start_Exon( $new_exon );
|
|
2218 }
|
|
2219 if( $self->translation()->end_Exon() == $old_exon ) {
|
|
2220 $self->translation()->end_Exon( $new_exon );
|
|
2221 }
|
|
2222 }
|
|
2223 }
|
|
2224
|
|
2225
|
|
2226 =head2 equals
|
|
2227
|
|
2228 Arg [1] : Bio::EnsEMBL::Transcript transcript
|
|
2229 Example : if ($transcriptA->equals($transcriptB)) { ... }
|
|
2230 Description : Compares two transcripts for equality.
|
|
2231 The test for eqality goes through the following list
|
|
2232 and terminates at the first true match:
|
|
2233
|
|
2234 1. If Bio::EnsEMBL::Feature::equals() returns false,
|
|
2235 then the transcripts are *not* equal.
|
|
2236 2. If the biotypes differ, then the transcripts are
|
|
2237 *not* equal.
|
|
2238 3. If both transcripts have stable IDs: if these are
|
|
2239 the same, the transcripts are equal, otherwise not.
|
|
2240 4. If both transcripts have the same number of exons
|
|
2241 and if these are (when compared pair-wise sorted by
|
|
2242 start-position and length) the same, then they are
|
|
2243 equal, otherwise not.
|
|
2244
|
|
2245 Return type : Boolean (0, 1)
|
|
2246
|
|
2247 Exceptions : Thrown if a non-transcript is passed as the argument.
|
|
2248
|
|
2249 =cut
|
|
2250
|
|
2251 sub equals {
|
|
2252 my ( $self, $transcript ) = @_;
|
|
2253
|
|
2254 if ( !defined($transcript) ) { return 0 }
|
|
2255 if ( $self eq $transcript ) { return 1 }
|
|
2256
|
|
2257 assert_ref( $transcript, 'Bio::EnsEMBL::Transcript' );
|
|
2258
|
|
2259 my $feature_equals = $self->SUPER::equals($transcript);
|
|
2260 if ( defined($feature_equals) && $feature_equals == 0 ) {
|
|
2261 return 0;
|
|
2262 }
|
|
2263
|
|
2264 if ( $self->biotype() ne $transcript->biotype() ) {
|
|
2265 return 0;
|
|
2266 }
|
|
2267
|
|
2268 if ( defined( $self->stable_id() ) &&
|
|
2269 defined( $transcript->stable_id() ) )
|
|
2270 {
|
|
2271 if ( $self->stable_id() eq $transcript->stable_id() ) {
|
|
2272 return 1;
|
|
2273 }
|
|
2274 else {
|
|
2275 return 0;
|
|
2276 }
|
|
2277 }
|
|
2278
|
|
2279 my @self_exons = sort {
|
|
2280 $a->start() <=> $b->start() ||
|
|
2281 $a->length() <=> $b->length()
|
|
2282 } @{ $self->get_all_Exons() };
|
|
2283 my @transcript_exons = sort {
|
|
2284 $a->start() <=> $b->start() ||
|
|
2285 $a->length() <=> $b->length()
|
|
2286 } @{ $transcript->get_all_Exons() };
|
|
2287
|
|
2288 if ( scalar(@self_exons) != scalar(@transcript_exons) ) {
|
|
2289 return 0;
|
|
2290 }
|
|
2291
|
|
2292 while (@self_exons) {
|
|
2293 my $self_exon = shift(@self_exons);
|
|
2294 my $transcript_exon = shift(@transcript_exons);
|
|
2295
|
|
2296 if ( !$self_exon->equals($transcript_exon) ) {
|
|
2297 return 0;
|
|
2298 }
|
|
2299 }
|
|
2300
|
|
2301 return 1;
|
|
2302 } ## end sub equals
|
|
2303
|
|
2304 =head2 transform
|
|
2305
|
|
2306 Arg 1 : String $coordinate_system_name
|
|
2307 Arg [2] : String $coordinate_system_version
|
|
2308 Example : $transcript = $transcript->transform('contig');
|
|
2309 $transcript = $transcript->transform('chromosome', 'NCBI33');
|
|
2310 Description: Moves this Transcript to the given coordinate system.
|
|
2311 If this Transcript has Exons attached, they move as well.
|
|
2312 A new Transcript is returned. If the transcript cannot be
|
|
2313 transformed to the destination coordinate system undef is
|
|
2314 returned instead.
|
|
2315 Returntype : Bio::EnsEMBL::Transcript
|
|
2316 Exceptions : wrong parameters
|
|
2317 Caller : general
|
|
2318 Status : Medium Risk
|
|
2319 : deprecation needs to be removed at some time
|
|
2320
|
|
2321 =cut
|
|
2322
|
|
2323
|
|
2324 sub transform {
|
|
2325 my $self = shift;
|
|
2326
|
|
2327 # catch for old style transform calls
|
|
2328 if( ref $_[0] eq 'HASH') {
|
|
2329 deprecate("Calling transform with a hashref is deprecate.\n" .
|
|
2330 'Use $trans->transfer($slice) or ' .
|
|
2331 '$trans->transform("coordsysname") instead.');
|
|
2332 my (undef, $new_ex) = each(%{$_[0]});
|
|
2333 return $self->transfer($new_ex->slice);
|
|
2334 }
|
|
2335
|
|
2336 my $new_transcript = $self->SUPER::transform(@_);
|
|
2337 if ( !defined($new_transcript) ) {
|
|
2338 my @segments = @{ $self->project(@_) };
|
|
2339 # if it projects, maybe the exons transform well?
|
|
2340 # lazy load them here
|
|
2341 if ( !@segments ) {
|
|
2342 return undef;
|
|
2343 }
|
|
2344 $self->get_all_Exons();
|
|
2345 }
|
|
2346
|
|
2347
|
|
2348 if( exists $self->{'_trans_exon_array'} ) {
|
|
2349 my @new_exons;
|
|
2350 my ( $low_start, $hi_end, $slice );
|
|
2351 # we want to check whether the transform preserved 5prime 3prime
|
|
2352 # ordering. This assumes 5->3 order. No complaints on transsplicing.
|
|
2353
|
|
2354 my ( $last_new_start, $last_old_strand,
|
|
2355 $last_new_strand, $start_exon, $end_exon,
|
|
2356 $last_seq_region_name );
|
|
2357 my $first = 1;
|
|
2358 my $ignore_order = 0;
|
|
2359 my $order_broken = 0;
|
|
2360
|
|
2361 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
|
|
2362 my $new_exon = $old_exon->transform( @_ );
|
|
2363 return undef if( !defined $new_exon );
|
|
2364 if( ! defined $new_transcript ) {
|
|
2365 if( !$first ) {
|
|
2366 if( $old_exon->strand() != $last_old_strand ) {
|
|
2367 # transsplicing, ignore ordering
|
|
2368 $ignore_order = 1;
|
|
2369 }
|
|
2370
|
|
2371 if( $new_exon->slice()->seq_region_name() ne
|
|
2372 $last_seq_region_name ) {
|
|
2373 return undef;
|
|
2374 }
|
|
2375
|
|
2376 if( $last_new_strand == 1 and
|
|
2377 $new_exon->start() < $last_new_start ) {
|
|
2378 $order_broken = 1;
|
|
2379 }
|
|
2380
|
|
2381 if( $last_new_strand == -1 and
|
|
2382 $new_exon->start() > $last_new_start ) {
|
|
2383 $order_broken = 1;
|
|
2384 }
|
|
2385
|
|
2386 #additional check that if exons were on same strand previously, they should be again
|
|
2387 if(($last_old_strand == $old_exon->strand()) and !($last_new_strand == $new_exon->strand())){
|
|
2388 return undef;
|
|
2389 }
|
|
2390
|
|
2391 if( $new_exon->start() < $low_start ) {
|
|
2392 $low_start = $new_exon->start();
|
|
2393 }
|
|
2394 if( $new_exon->end() > $hi_end ) {
|
|
2395 $hi_end = $new_exon->end();
|
|
2396 }
|
|
2397 } else {
|
|
2398 $first = 0;
|
|
2399 $low_start = $new_exon->start();
|
|
2400 $hi_end = $new_exon->end();
|
|
2401 }
|
|
2402
|
|
2403 $last_seq_region_name = $new_exon->slice()->seq_region_name();
|
|
2404 $last_old_strand = $old_exon->strand();
|
|
2405 $last_new_start = $new_exon->start();
|
|
2406 $last_new_strand = $new_exon->strand();
|
|
2407 }
|
|
2408
|
|
2409 if( defined $self->{'translation'} ) {
|
|
2410 if( $self->translation()->start_Exon() == $old_exon ) {
|
|
2411 $start_exon = $new_exon;
|
|
2412 }
|
|
2413 if( $self->translation()->end_Exon() == $old_exon ) {
|
|
2414 $end_exon = $new_exon;
|
|
2415 }
|
|
2416 }
|
|
2417 push( @new_exons, $new_exon );
|
|
2418 }
|
|
2419
|
|
2420 if( $order_broken && !$ignore_order ) {
|
|
2421 warning( "Order of exons broken in transform of ".$self->dbID() );
|
|
2422 return undef;
|
|
2423 }
|
|
2424
|
|
2425 if( !defined $new_transcript ) {
|
|
2426 %$new_transcript = %$self;
|
|
2427 bless $new_transcript, ref( $self );
|
|
2428 $new_transcript->start( $low_start );
|
|
2429 $new_transcript->end( $hi_end );
|
|
2430 $new_transcript->slice( $new_exons[0]->slice() );
|
|
2431 $new_transcript->strand( $new_exons[0]->strand() );
|
|
2432 }
|
|
2433
|
|
2434 $new_transcript->{'_trans_exon_array'} = \@new_exons;
|
|
2435
|
|
2436 # should be ok to do inside exon array loop
|
|
2437 # translations only exist together with the exons ...
|
|
2438
|
|
2439 if( defined $self->{'translation'} ) {
|
|
2440 my $new_translation;
|
|
2441 %$new_translation = %{$self->{'translation'}};;
|
|
2442 bless $new_translation, ref( $self->{'translation'} );
|
|
2443 $new_transcript->{'translation'} = $new_translation;
|
|
2444 $new_translation->start_Exon( $start_exon );
|
|
2445 $new_translation->end_Exon( $end_exon );
|
|
2446 }
|
|
2447 }
|
|
2448
|
|
2449 if( exists $self->{'_supporting_evidence'} ) {
|
|
2450 my @new_features;
|
|
2451 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
|
|
2452 my $new_feature = $old_feature->transform( @_ );
|
|
2453 if (defined $new_feature) {
|
|
2454 push @new_features, $new_feature;
|
|
2455 }
|
|
2456 }
|
|
2457 $new_transcript->{'_supporting_evidence'} = \@new_features;
|
|
2458 }
|
|
2459
|
|
2460 if(exists $self->{_ise_array}) {
|
|
2461 my @new_features;
|
|
2462 foreach my $old_feature ( @{$self->{_ise_array}} ) {
|
|
2463 my $new_feature = $old_feature->transform(@_);
|
|
2464 push( @new_features, $new_feature );
|
|
2465 }
|
|
2466 $new_transcript->{_ise_array} = \@new_features;
|
|
2467 }
|
|
2468
|
|
2469
|
|
2470 # flush cached internal values that depend on the exon coords
|
|
2471 $new_transcript->{'transcript_mapper'} = undef;
|
|
2472 $new_transcript->{'coding_region_start'} = undef;
|
|
2473 $new_transcript->{'coding_region_end'} = undef;
|
|
2474 $new_transcript->{'cdna_coding_start'} = undef;
|
|
2475 $new_transcript->{'cdna_coding_end'} = undef;
|
|
2476
|
|
2477 return $new_transcript;
|
|
2478 }
|
|
2479
|
|
2480
|
|
2481 =head2 transfer
|
|
2482
|
|
2483 Arg 1 : Bio::EnsEMBL::Slice $destination_slice
|
|
2484 Example : $transcript = $transcript->transfer($slice);
|
|
2485 Description: Moves this transcript to the given slice.
|
|
2486 If this Transcripts has Exons attached, they move as well.
|
|
2487 Returntype : Bio::EnsEMBL::Transcript
|
|
2488 Exceptions : none
|
|
2489 Caller : general
|
|
2490 Status : Stable
|
|
2491
|
|
2492 =cut
|
|
2493
|
|
2494
|
|
2495 sub transfer {
|
|
2496 my $self = shift;
|
|
2497
|
|
2498 my $new_transcript = $self->SUPER::transfer( @_ );
|
|
2499 return undef unless $new_transcript;
|
|
2500
|
|
2501 if( defined $self->{'translation'} ) {
|
|
2502 my $new_translation;
|
|
2503 %$new_translation = %{$self->{'translation'}};;
|
|
2504 bless $new_translation, ref( $self->{'translation'} );
|
|
2505 $new_transcript->{'translation'} = $new_translation;
|
|
2506 }
|
|
2507
|
|
2508 if( exists $self->{'_trans_exon_array'} ) {
|
|
2509 my @new_exons;
|
|
2510 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
|
|
2511 my $new_exon = $old_exon->transfer( @_ );
|
|
2512 if( defined $new_transcript->{'translation'} ) {
|
|
2513 if( $new_transcript->translation()->start_Exon() == $old_exon ) {
|
|
2514 $new_transcript->translation()->start_Exon( $new_exon );
|
|
2515 }
|
|
2516 if( $new_transcript->translation()->end_Exon() == $old_exon ) {
|
|
2517 $new_transcript->translation()->end_Exon( $new_exon );
|
|
2518 }
|
|
2519 }
|
|
2520 push( @new_exons, $new_exon );
|
|
2521 }
|
|
2522
|
|
2523 $new_transcript->{'_trans_exon_array'} = \@new_exons;
|
|
2524 }
|
|
2525
|
|
2526 if( exists $self->{'_supporting_evidence'} ) {
|
|
2527 my @new_features;
|
|
2528 for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
|
|
2529 my $new_feature = $old_feature->transfer( @_ );
|
|
2530 push( @new_features, $new_feature );
|
|
2531 }
|
|
2532 $new_transcript->{'_supporting_evidence'} = \@new_features;
|
|
2533 }
|
|
2534
|
|
2535 if(exists $self->{_ise_array}) {
|
|
2536 my @new_features;
|
|
2537 foreach my $old_feature ( @{$self->{_ise_array}} ) {
|
|
2538 my $new_feature = $old_feature->transfer(@_);
|
|
2539 push( @new_features, $new_feature );
|
|
2540 }
|
|
2541 $new_transcript->{_ise_array} = \@new_features;
|
|
2542 }
|
|
2543
|
|
2544
|
|
2545 # flush cached internal values that depend on the exon coords
|
|
2546 $new_transcript->{'transcript_mapper'} = undef;
|
|
2547 $new_transcript->{'coding_region_start'} = undef;
|
|
2548 $new_transcript->{'coding_region_end'} = undef;
|
|
2549 $new_transcript->{'cdna_coding_start'} = undef;
|
|
2550 $new_transcript->{'cdna_coding_end'} = undef;
|
|
2551
|
|
2552 return $new_transcript;
|
|
2553 }
|
|
2554
|
|
2555
|
|
2556 =head recalculate_coordinates
|
|
2557
|
|
2558 Args : none
|
|
2559 Example : none
|
|
2560 Description: called when exon coordinate change happened to recalculate the
|
|
2561 coords of the transcript. This method should be called if one
|
|
2562 of the exons has been changed.
|
|
2563 Returntype : none
|
|
2564 Exceptions : none
|
|
2565 Caller : internal
|
|
2566 Status : Stable
|
|
2567
|
|
2568 =cut
|
|
2569
|
|
2570 sub recalculate_coordinates {
|
|
2571 my ($self) = @_;
|
|
2572
|
|
2573 my $exons = $self->get_all_Exons();
|
|
2574
|
|
2575 if ( !$exons || !@{$exons} ) { return }
|
|
2576
|
|
2577 my ( $slice, $start, $end, $strand );
|
|
2578
|
|
2579 my $e_index;
|
|
2580 for ( $e_index = 0; $e_index < @{$exons}; $e_index++ ) {
|
|
2581 my $e = $exons->[$e_index];
|
|
2582
|
|
2583 # Skip missing or unmapped exons!
|
|
2584 if ( defined($e) && defined( $e->start() ) ) {
|
|
2585 $slice = $e->slice();
|
|
2586 $strand = $e->strand();
|
|
2587 $start = $e->start();
|
|
2588 $end = $e->end();
|
|
2589
|
|
2590 last;
|
|
2591 }
|
|
2592 }
|
|
2593
|
|
2594 my $transsplicing = 0;
|
|
2595
|
|
2596 # Start loop after first exon with coordinates
|
|
2597 for ( ; $e_index < @{$exons}; $e_index++ ) {
|
|
2598 my $e = $exons->[$e_index];
|
|
2599
|
|
2600 # Skip missing or unmapped exons!
|
|
2601 if ( !defined($e) || !defined( $e->start() ) ) { next }
|
|
2602
|
|
2603 if ( $e->start() < $start ) {
|
|
2604 $start = $e->start();
|
|
2605 }
|
|
2606
|
|
2607 if ( $e->end() > $end ) {
|
|
2608 $end = $e->end();
|
|
2609 }
|
|
2610
|
|
2611 if ( defined($slice)
|
|
2612 && $e->slice()
|
|
2613 && $e->slice()->name() ne $slice->name() )
|
|
2614 {
|
|
2615 throw( "Exons with different slices "
|
|
2616 . "are not allowed on one Transcript" );
|
|
2617 }
|
|
2618
|
|
2619 if ( $e->strand() != $strand ) {
|
|
2620 $transsplicing = 1;
|
|
2621 }
|
|
2622 } ## end for ( ; $e_index < @{$exons...})
|
|
2623 if ($transsplicing) {
|
|
2624 warning("Transcript contained trans splicing event");
|
|
2625 }
|
|
2626
|
|
2627 $self->start($start);
|
|
2628 $self->end($end);
|
|
2629 $self->strand($strand);
|
|
2630 $self->slice($slice);
|
|
2631
|
|
2632 # flush cached internal values that depend on the exon coords
|
|
2633 $self->{'transcript_mapper'} = undef;
|
|
2634 $self->{'coding_region_start'} = undef;
|
|
2635 $self->{'coding_region_end'} = undef;
|
|
2636 $self->{'cdna_coding_start'} = undef;
|
|
2637 $self->{'cdna_coding_end'} = undef;
|
|
2638 } ## end sub recalculate_coordinates
|
|
2639
|
|
2640
|
|
2641 =head2 display_id
|
|
2642
|
|
2643 Arg [1] : none
|
|
2644 Example : print $transcript->display_id();
|
|
2645 Description: This method returns a string that is considered to be
|
|
2646 the 'display' identifier. For transcripts this is (depending on
|
|
2647 availability and in this order) the stable Id, the dbID or an
|
|
2648 empty string.
|
|
2649 Returntype : string
|
|
2650 Exceptions : none
|
|
2651 Caller : web drawing code
|
|
2652 Status : Stable
|
|
2653
|
|
2654 =cut
|
|
2655
|
|
2656 sub display_id {
|
|
2657 my $self = shift;
|
|
2658 return $self->{'stable_id'} || $self->dbID || '';
|
|
2659 }
|
|
2660
|
|
2661
|
|
2662 =head2 get_all_peptide_variations
|
|
2663
|
|
2664 Description: See Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_peptide_variations
|
|
2665 Status : At Risk
|
|
2666 : Will be replaced with modules from the ensembl-variation package
|
|
2667
|
|
2668
|
|
2669 =cut
|
|
2670
|
|
2671 sub get_all_peptide_variations {
|
|
2672 my ($self, $source, $snps) = @_;
|
|
2673
|
|
2674 if(!$snps) {
|
|
2675 my $shash = Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs($self, $source);
|
|
2676 $snps = $shash->{'coding'};
|
|
2677 }
|
|
2678
|
|
2679 return Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_peptide_variations($self,
|
|
2680 $snps);
|
|
2681 }
|
|
2682
|
|
2683
|
|
2684 =head2 get_all_SNPs
|
|
2685
|
|
2686 Description: See Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_SNPs
|
|
2687
|
|
2688 Status : At Risk
|
|
2689 : Will be replaced with modules from the ensembl-variation package
|
|
2690
|
|
2691 =cut
|
|
2692
|
|
2693 sub get_all_SNPs {
|
|
2694 return Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_SNPs(@_);
|
|
2695 }
|
|
2696
|
|
2697
|
|
2698 =head2 get_all_cdna_SNPs
|
|
2699
|
|
2700 Description: See Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs
|
|
2701
|
|
2702 Status : At Risk
|
|
2703 : Will be replaced with modules from the ensembl-variation package
|
|
2704
|
|
2705 =cut
|
|
2706
|
|
2707 sub get_all_cdna_SNPs {
|
|
2708 return Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs(@_);
|
|
2709 }
|
|
2710
|
|
2711
|
|
2712 =head2 get_all_DASFactories
|
|
2713
|
|
2714 Arg [1] : none
|
|
2715 Function : Retrieves a listref of registered DAS objects
|
|
2716 Returntype: [ DAS_objects ]
|
|
2717 Exceptions:
|
|
2718 Caller :
|
|
2719 Example : $dasref = $prot->get_all_DASFactories
|
|
2720 Status : Stable
|
|
2721
|
|
2722 =cut
|
|
2723
|
|
2724 sub get_all_DASFactories {
|
|
2725 my $self = shift;
|
|
2726 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
|
|
2727 }
|
|
2728
|
|
2729
|
|
2730 =head2 get_all_DAS_Features
|
|
2731
|
|
2732 Arg [1] : none
|
|
2733 Example : $features = $prot->get_all_DAS_Features;
|
|
2734 Description: Retreives a hash reference to a hash of DAS feature
|
|
2735 sets, keyed by the DNS, NOTE the values of this hash
|
|
2736 are an anonymous array containing:
|
|
2737 (1) a pointer to an array of features;
|
|
2738 (2) a pointer to the DAS stylesheet
|
|
2739 Returntype : hashref of Bio::SeqFeatures
|
|
2740 Exceptions : ?
|
|
2741 Caller : webcode
|
|
2742 Status : Stable
|
|
2743
|
|
2744
|
|
2745 =cut
|
|
2746
|
|
2747 sub get_all_DAS_Features {
|
|
2748 my ($self,@args) = @_;
|
|
2749
|
|
2750 my $db = $self->adaptor->db;
|
|
2751 my $GeneAdaptor = $db->get_GeneAdaptor;
|
|
2752 my $Gene = $GeneAdaptor->fetch_by_transcript_stable_id($self->stable_id);
|
|
2753 my $slice = $Gene->feature_Slice;
|
|
2754 return $self->SUPER::get_all_DAS_Features($slice);
|
|
2755 }
|
|
2756
|
|
2757
|
|
2758
|
|
2759 =head2 _compare_xrefs
|
|
2760
|
|
2761 Description: compare xrefs based on priority (descending), then
|
|
2762 name (ascending), then display_label (ascending)
|
|
2763
|
|
2764 =cut
|
|
2765
|
|
2766 sub _compare_xrefs {
|
|
2767 # compare on priority first (descending)
|
|
2768 if ( $a->priority() != $b->priority() ) {
|
|
2769 return $b->priority() <=> $a->priority();
|
|
2770 } else {
|
|
2771 # equal priorities, compare on external_db name
|
|
2772 if ( $a->dbname() ne $b->dbname() ) {
|
|
2773 return $a->dbname() cmp $b->dbname();
|
|
2774 } else {
|
|
2775 # equal priorities and names, compare on display_label
|
|
2776 return $a->display_id() cmp $b->display_id();
|
|
2777 }
|
|
2778 }
|
|
2779 }
|
|
2780
|
|
2781
|
|
2782 =head2 load
|
|
2783
|
|
2784 Arg [1] : Boolean $load_xrefs
|
|
2785 Load (or don't load) xrefs. Default is to load xrefs.
|
|
2786 Example : $transcript->load();
|
|
2787 Description : The Ensembl API makes extensive use of
|
|
2788 lazy-loading. Under some circumstances (e.g.,
|
|
2789 when copying genes between databases), all data of
|
|
2790 an object needs to be fully loaded. This method
|
|
2791 loads the parts of the object that are usually
|
|
2792 lazy-loaded. It will also call the equivalent
|
|
2793 method on any translation and on all exons of the
|
|
2794 transcript.
|
|
2795 Returntype : None
|
|
2796
|
|
2797 =cut
|
|
2798
|
|
2799 sub load {
|
|
2800 my ( $self, $load_xrefs ) = @_;
|
|
2801
|
|
2802 if ( !defined($load_xrefs) ) { $load_xrefs = 1 }
|
|
2803
|
|
2804 my $translation = $self->translation();
|
|
2805 if ( defined($translation) ) {
|
|
2806 $translation->load($load_xrefs);
|
|
2807
|
|
2808 my $alt_translations = $self->get_all_alternative_translations();
|
|
2809
|
|
2810 if ( defined($alt_translations) ) {
|
|
2811 foreach my $alt_translation ( @{$alt_translations} ) {
|
|
2812 $alt_translation->load($load_xrefs);
|
|
2813 }
|
|
2814 }
|
|
2815 }
|
|
2816
|
|
2817 foreach my $exon ( @{ $self->get_all_Exons() } ) {
|
|
2818 $exon->load();
|
|
2819 }
|
|
2820
|
|
2821 $self->stable_id();
|
|
2822 $self->analysis();
|
|
2823 $self->get_all_Attributes();
|
|
2824 $self->get_all_supporting_features();
|
|
2825
|
|
2826 if ($load_xrefs) {
|
|
2827 $self->get_all_DBEntries();
|
|
2828 }
|
|
2829
|
|
2830 } ## end sub load
|
|
2831
|
|
2832 =head2 summary_as_hash
|
|
2833
|
|
2834 Example : $transcript_summary = $transcript->summary_as_hash();
|
|
2835 Description : Extends Feature::summary_as_hash
|
|
2836 Retrieves a summary of this Transcript.
|
|
2837 Returns : hashref of descriptive strings
|
|
2838 Status : Intended for internal use
|
|
2839 =cut
|
|
2840
|
|
2841 sub summary_as_hash {
|
|
2842 my $self = shift;
|
|
2843 my $summary_ref = $self->SUPER::summary_as_hash;
|
|
2844 $summary_ref->{'description'} = $self->description;
|
|
2845 $summary_ref->{'biotype'} = $self->biotype;
|
|
2846 my $parent_gene = $self->get_Gene();
|
|
2847 $summary_ref->{'Parent'} = $parent_gene->display_id;
|
|
2848 return $summary_ref;
|
|
2849 }
|
|
2850
|
|
2851 =head2 get_Gene
|
|
2852
|
|
2853 Example : $gene = $transcript->get_Gene;
|
|
2854 Description : Locates the parent Gene using a transcript dbID
|
|
2855 Returns : Bio::EnsEMBL::Gene
|
|
2856
|
|
2857 =cut
|
|
2858
|
|
2859 sub get_Gene {
|
|
2860 my $self = shift;
|
|
2861 my $gene_adaptor = $self->adaptor->db->get_GeneAdaptor();
|
|
2862 my $parent_gene = $gene_adaptor->fetch_by_transcript_id($self->dbID);
|
|
2863 return $parent_gene;
|
|
2864 }
|
|
2865
|
|
2866 ###########################
|
|
2867 # DEPRECATED METHODS FOLLOW
|
|
2868 ###########################
|
|
2869
|
|
2870 =head2 sort
|
|
2871
|
|
2872 Description: DEPRECATED. This method is no longer needed. Exons are sorted
|
|
2873 automatically when added to the transcript.
|
|
2874
|
|
2875 =cut
|
|
2876
|
|
2877 sub sort {
|
|
2878 my $self = shift;
|
|
2879
|
|
2880 deprecate( "Exons are kept sorted, you dont have to call sort any more" );
|
|
2881 # Fetch all the features
|
|
2882 my @exons = @{$self->get_all_Exons()};
|
|
2883
|
|
2884 # Empty the feature table
|
|
2885 $self->flush_Exons();
|
|
2886
|
|
2887 # Now sort the exons and put back in the feature table
|
|
2888 my $strand = $exons[0]->strand;
|
|
2889
|
|
2890 if ($strand == 1) {
|
|
2891 @exons = sort { $a->start <=> $b->start } @exons;
|
|
2892 } elsif ($strand == -1) {
|
|
2893 @exons = sort { $b->start <=> $a->start } @exons;
|
|
2894 }
|
|
2895
|
|
2896 foreach my $e (@exons) {
|
|
2897 $self->add_Exon($e);
|
|
2898 }
|
|
2899 }
|
|
2900
|
|
2901
|
|
2902 # _translation_id
|
|
2903 # Usage : DEPRECATED - not needed anymore
|
|
2904
|
|
2905 sub _translation_id {
|
|
2906 my $self = shift;
|
|
2907 deprecate( "This method shouldnt be necessary any more" );
|
|
2908 if( @_ ) {
|
|
2909 my $value = shift;
|
|
2910 $self->{'_translation_id'} = $value;
|
|
2911 }
|
|
2912 return $self->{'_translation_id'};
|
|
2913
|
|
2914 }
|
|
2915
|
|
2916
|
|
2917 =head2 created
|
|
2918
|
|
2919 Description: DEPRECATED - this attribute is not part of transcript anymore
|
|
2920
|
|
2921 =cut
|
|
2922
|
|
2923 sub created{
|
|
2924 my $obj = shift;
|
|
2925 deprecate( "This attribute is no longer supported" );
|
|
2926 if( @_ ) {
|
|
2927 my $value = shift;
|
|
2928 $obj->{'created'} = $value;
|
|
2929 }
|
|
2930 return $obj->{'created'};
|
|
2931 }
|
|
2932
|
|
2933
|
|
2934 =head2 modified
|
|
2935
|
|
2936 Description: DEPRECATED - this attribute is not part of transcript anymore
|
|
2937
|
|
2938 =cut
|
|
2939
|
|
2940 sub modified{
|
|
2941 my $obj = shift;
|
|
2942 deprecate( "This attribute is no longer supported" );
|
|
2943 if( @_ ) {
|
|
2944 my $value = shift;
|
|
2945 $obj->{'modified'} = $value;
|
|
2946 }
|
|
2947 return $obj->{'modified'};
|
|
2948 }
|
|
2949
|
|
2950
|
|
2951 =head2 temporary_id
|
|
2952
|
|
2953 Function: DEPRECATED: Use dbID or stable_id or something else instead
|
|
2954
|
|
2955 =cut
|
|
2956
|
|
2957 sub temporary_id{
|
|
2958 my ($obj,$value) = @_;
|
|
2959 deprecate( "I cant see what a temporary_id is good for, please use dbID" .
|
|
2960 "or stableID or\ntry without an id." );
|
|
2961 if( defined $value) {
|
|
2962 $obj->{'temporary_id'} = $value;
|
|
2963 }
|
|
2964 return $obj->{'temporary_id'};
|
|
2965 }
|
|
2966
|
|
2967
|
|
2968 =head2 type
|
|
2969
|
|
2970 Description: DEPRECATED. Use biotype() instead.
|
|
2971
|
|
2972 =cut
|
|
2973
|
|
2974 sub type {
|
|
2975 deprecate("Use biotype() instead");
|
|
2976 biotype(@_);
|
|
2977 }
|
|
2978
|
|
2979
|
|
2980 =head2 confidence
|
|
2981
|
|
2982 Description: DEPRECATED. Use status() instead.
|
|
2983
|
|
2984 =cut
|
|
2985
|
|
2986 sub confidence {
|
|
2987 deprecate("Use status() instead");
|
|
2988 status(@_);
|
|
2989 }
|
|
2990
|
|
2991
|
|
2992 1;
|
|
2993
|