0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 PredictionTranscript
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 =head1 DESCRIPTION
|
|
28
|
|
29 Container for single transcript ab initio gene prediction such as
|
|
30 GenScan or SNAP. Is directly storable/retrievable in Ensembl using
|
|
31 PredictionTranscriptAdaptor.
|
|
32
|
|
33 Creation:
|
|
34
|
|
35 my $tran = new Bio::EnsEMBL::PredictionTranscript();
|
|
36 $tran->add_Exon($pred_exon);
|
|
37
|
|
38 my $tran =
|
|
39 new Bio::EnsEMBL::PredictionTranscript( -EXONS => @pred_exons );
|
|
40
|
|
41 Manipulation:
|
|
42
|
|
43 # Returns an array of PredictionExon objects
|
|
44 my @pred_exons = @{ $tran->get_all_Exons };
|
|
45
|
|
46 # Returns the peptide translation as string
|
|
47 my $pep = $tran->translate()->seq();
|
|
48
|
|
49 # Get the exon cdna sequence.
|
|
50 my $cdna = $trans->spliced_seq();
|
|
51
|
|
52 =head1 METHODS
|
|
53
|
|
54 =cut
|
|
55
|
|
56 package Bio::EnsEMBL::PredictionTranscript;
|
|
57
|
|
58 use vars qw(@ISA);
|
|
59 use strict;
|
|
60
|
|
61 use Bio::Seq;
|
|
62 use Bio::EnsEMBL::Feature;
|
|
63 use Bio::EnsEMBL::Transcript;
|
|
64 use Bio::EnsEMBL::Translation;
|
|
65
|
|
66 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
|
|
67 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
68
|
|
69 @ISA = qw(Bio::EnsEMBL::Transcript);
|
|
70
|
|
71
|
|
72 =head2 new
|
|
73
|
|
74 Arg [-DISPLAY_LABEL]
|
|
75 string - a displayable identifier for this prediction
|
|
76 Arg [...] : See Bio::EnsEMBL::Transcript superclass constructor
|
|
77 Example : $pt = Bio::EnsEMBL::PredictionTranscript->new
|
|
78 ( '-start' => $seq_region_start,
|
|
79 '-end' => $seq_region_end,
|
|
80 '-strand' => $seq_region_strand,
|
|
81 '-adaptor' => $self,
|
|
82 '-slice' => $slice,
|
|
83 '-analysis' => $analysis,
|
|
84 '-dbID' => $prediction_transcript_id,
|
|
85 '-display_label' => $display_label);
|
|
86 Description: Constructor. Creates a new Bio::EnsEMBL::PredictionTranscript
|
|
87 object
|
|
88 Returntype : Bio::EnsEMBL::PredictionTranscript
|
|
89 Exceptions : none
|
|
90 Caller : general
|
|
91 Status : Stable
|
|
92
|
|
93 =cut
|
|
94
|
|
95 sub new {
|
|
96 my $class = shift;
|
|
97
|
|
98 my $self = $class->SUPER::new(@_);
|
|
99
|
|
100 my ($display_label) = rearrange(['DISPLAY_LABEL'], @_);
|
|
101
|
|
102 $self->{'display_label'} = $display_label;
|
|
103
|
|
104 return $self;
|
|
105 }
|
|
106
|
|
107
|
|
108 =head2 coding_region_start
|
|
109
|
|
110 Arg [1] : none
|
|
111 Example : $coding_region_start = $pt->coding_region_start
|
|
112 Description: Retrieves the start of the coding region of this transcript in
|
|
113 slice coordinates. For prediction transcripts this
|
|
114 is always the start of the transcript (i.e. there is no UTR).
|
|
115 By convention, the coding_region_start is always lower than
|
|
116 the value returned by the coding_end method.
|
|
117 The value returned by this function is NOT the biological
|
|
118 coding start since on the reverse strand the biological coding
|
|
119 start would be the higher genomic value.
|
|
120 Returntype : int
|
|
121 Exceptions : none
|
|
122 Caller : general
|
|
123 Status : Stable
|
|
124
|
|
125 =cut
|
|
126
|
|
127 sub coding_region_start {
|
|
128 my $self = shift;
|
|
129 return $self->start();
|
|
130 }
|
|
131
|
|
132
|
|
133 =head2 coding_region_end
|
|
134
|
|
135 Arg [1] : none
|
|
136 Example : $coding_region_end = $transcript->coding_region_end
|
|
137 Description: Retrieves the start of the coding region of this prediction
|
|
138 transcript. For prediction transcripts this is always the same
|
|
139 as the end since no UTRs are stored.
|
|
140 By convention, the coding_region_end is always higher than the
|
|
141 value returned by the coding_region_start method.
|
|
142 The value returned by this function is NOT the biological
|
|
143 coding start since on the reverse strand the biological coding
|
|
144 end would be the lower genomic value.
|
|
145 Returntype : int
|
|
146 Exceptions : none
|
|
147 Caller : general
|
|
148 Status : Stable
|
|
149
|
|
150 =cut
|
|
151
|
|
152 sub coding_region_end {
|
|
153 my $self = shift;
|
|
154 return $self->end();
|
|
155 }
|
|
156
|
|
157
|
|
158
|
|
159 =head2 get_all_translateable_Exons
|
|
160
|
|
161 Arg [1] : none
|
|
162 Example : $exons = $self->get_all_translateable_Exons
|
|
163 Description: Retrieves the translateable portion of all exons in this
|
|
164 transcript. For prediction transcripts this means all exons
|
|
165 since no UTRs are stored for them.
|
|
166 Returntype : listref of Bio::EnsEMBL::PredictionExons
|
|
167 Exceptions : none
|
|
168 Caller : general
|
|
169 Status : Stable
|
|
170
|
|
171 =cut
|
|
172
|
|
173 sub get_all_translateable_Exons {
|
|
174 my $self = shift;
|
|
175 return $self->get_all_Exons();
|
|
176 }
|
|
177
|
|
178
|
|
179 =head2 display_label
|
|
180
|
|
181 Arg [1] : string $newval (optional)
|
|
182 The new value to set the display_label attribute to
|
|
183 Example : $display_label = $pt->display_label()
|
|
184 Description: Getter/Setter for a displayable identifier for this
|
|
185 prediction transcript.
|
|
186 Returntype : string
|
|
187 Exceptions : none
|
|
188 Caller : general
|
|
189 Status : Stable
|
|
190
|
|
191 =cut
|
|
192
|
|
193 sub display_label{
|
|
194 my $self = shift;
|
|
195 return $self->{'display_label'} = shift if(@_);
|
|
196 return $self->{'display_label'};
|
|
197 }
|
|
198
|
|
199
|
|
200
|
|
201 =head2 stable_id
|
|
202
|
|
203 Arg [1] : none
|
|
204 Example : print $pt->stable_id();
|
|
205 Description: Gets a 'stable' identifier for this prediction transcript. Note
|
|
206 that prediction transcripts do not have true *stable*
|
|
207 identifiers (i.e. identifiers maintained between releases).
|
|
208 This method chains to the display_label method and is intended
|
|
209 to provide polymorphism with the Transcript class.
|
|
210 Returntype : string
|
|
211 Exceptions : none
|
|
212 Caller : general
|
|
213 Status : Stable
|
|
214
|
|
215 =cut
|
|
216
|
|
217 sub stable_id { return display_label(@_); }
|
|
218
|
|
219 sub get_all_DBEntries { return []; }
|
|
220
|
|
221 sub get_all_DBLinks { return []; }
|
|
222
|
|
223 sub add_DBEntry {}
|
|
224
|
|
225 sub external_db { return undef; }
|
|
226
|
|
227 sub external_status { return undef; }
|
|
228
|
|
229 sub external_name { return undef; }
|
|
230
|
|
231 sub is_known { return 0;}
|
|
232
|
|
233
|
|
234 =head2 translation
|
|
235
|
|
236 Arg [1] : none
|
|
237 Example : $translation = $pt->translation();
|
|
238 Description: Retrieves a Bio::EnsEMBL::Translation object for this prediction
|
|
239 transcript. Note that this translation is generated on the fly
|
|
240 and is not stored in the database. The translation always
|
|
241 spans the entire transcript (no UTRs; all CDS) and does not
|
|
242 have an associated dbID, stable_id or adaptor.
|
|
243 Returntype : int
|
|
244 Exceptions : none
|
|
245 Caller : general
|
|
246 Status : Stable
|
|
247
|
|
248 =cut
|
|
249
|
|
250 sub translation {
|
|
251 my $self = shift;
|
|
252
|
|
253 #calculate translation on the fly
|
|
254 my $strand = $self->strand();
|
|
255
|
|
256 my $start_exon;
|
|
257 my $end_exon;
|
|
258
|
|
259 my @exons = @{$self->get_all_Exons()};
|
|
260
|
|
261 return undef if(!@exons);
|
|
262
|
|
263 $start_exon = $exons[0];
|
|
264 $end_exon = $exons[-1];
|
|
265
|
|
266 my $pta;
|
|
267
|
|
268 if($self->adaptor()) {
|
|
269 $pta = $self->adaptor()->db()->get_TranslationAdaptor();
|
|
270 } else {
|
|
271 #warning("PredictionTranscript has no adaptor, may not be able to obtain " .
|
|
272 # "translation");
|
|
273 }
|
|
274
|
|
275 my $Xseq = $self->spliced_seq();
|
|
276 my $start_phase = $start_exon->phase;
|
|
277 if( $start_phase > 0 ) {
|
|
278 $Xseq = "N"x$start_phase . $Xseq;
|
|
279 }
|
|
280
|
|
281 my $tmpSeq = new Bio::Seq( -id => $self->display_id,
|
|
282 -seq => $Xseq,
|
|
283 -moltype => 'dna',
|
|
284 -alphabet => 'dna' );
|
|
285
|
|
286 return Bio::EnsEMBL::Translation->new
|
|
287 (-ADAPTOR => $pta,
|
|
288 -START_EXON => $start_exon,
|
|
289 -END_EXON => $end_exon,
|
|
290 -SEQ_START => 1,
|
|
291 -SEQ_END => $end_exon->length(),
|
|
292 -SEQ => $tmpSeq->translate()->seq());
|
|
293 }
|
|
294
|
|
295
|
|
296
|
|
297 =head2 translate
|
|
298
|
|
299 Args : none
|
|
300 Function : Give a peptide translation of all exons currently in
|
|
301 the PT. Gives empty string when none is in.
|
|
302 Returntype: a Bio::Seq as in transcript->translate()
|
|
303 Exceptions: none
|
|
304 Caller : general
|
|
305 Status : Stable
|
|
306
|
|
307 =cut
|
|
308
|
|
309
|
|
310 sub translate {
|
|
311 my ($self) = @_;
|
|
312
|
|
313 my $dna = $self->translateable_seq();
|
|
314
|
|
315 my $codon_table_id;
|
|
316 if ( defined( $self->slice() ) ) {
|
|
317 my $attrib;
|
|
318
|
|
319 ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
|
|
320 if ( defined($attrib) ) {
|
|
321 $codon_table_id = $attrib->value();
|
|
322 }
|
|
323 }
|
|
324 $codon_table_id ||= 1; #default will be vertebrates
|
|
325
|
|
326 if( CORE::length( $dna ) % 3 == 0 ) {
|
|
327 # $dna =~ s/TAG$|TGA$|TAA$//i;
|
|
328 my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id );
|
|
329
|
|
330 if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) {
|
|
331 substr( $dna, -3, 3, '' );
|
|
332 }
|
|
333 }
|
|
334 # the above line will remove the final stop codon from the mrna
|
|
335 # sequence produced if it is present, this is so any peptide produced
|
|
336 # won't have a terminal stop codon
|
|
337 # if you want to have a terminal stop codon either comment this line out
|
|
338 # or call translatable seq directly and produce a translation from it
|
|
339
|
|
340 my $bioseq = new Bio::Seq( -id => $self->display_id,
|
|
341 -seq => $dna,
|
|
342 -moltype => 'dna',
|
|
343 -alphabet => 'dna' );
|
|
344
|
|
345 my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id);
|
|
346
|
|
347 return $translation;
|
|
348 }
|
|
349
|
|
350
|
|
351 =head2 cdna_coding_start
|
|
352
|
|
353 Arg [1] : none
|
|
354 Example : $relative_coding_start = $transcript->cdna_coding_start();
|
|
355 Description: Retrieves the position of the coding start of this transcript
|
|
356 in cdna coordinates (relative to the start of the 5prime end of
|
|
357 the transcript, excluding introns, including utrs). This is
|
|
358 always 1 for prediction transcripts because they have no UTRs.
|
|
359 Returntype : int
|
|
360 Exceptions : none
|
|
361 Caller : five_prime_utr, get_all_snps, general
|
|
362 Status : Stable
|
|
363
|
|
364 =cut
|
|
365
|
|
366 sub cdna_coding_start { return 1 }
|
|
367
|
|
368
|
|
369
|
|
370 =head2 cdna_coding_end
|
|
371
|
|
372 Arg [1] : none
|
|
373 Example : $relative_coding_start = $transcript->cdna_coding_end();
|
|
374 Description: Retrieves the position of the coding end of this transcript
|
|
375 in cdna coordinates (relative to the start of the 5prime end of
|
|
376 the transcript, excluding introns, including utrs). This is
|
|
377 always te length of the cdna for prediction transcripts because
|
|
378 they have no UTRs.
|
|
379 Returntype : int
|
|
380 Exceptions : none
|
|
381 Caller : five_prime_utr, get_all_snps, general
|
|
382 Status : Stable
|
|
383
|
|
384 =cut
|
|
385
|
|
386 sub cdna_coding_end {
|
|
387 my ($self) = @_;
|
|
388 return length( $self->spliced_seq() );
|
|
389 }
|
|
390
|
|
391
|
|
392 =head2 transform
|
|
393
|
|
394 Arg 1 : String $coordinate_system_name
|
|
395 Arg [2] : String $coordinate_system_version
|
|
396 Example : $ptrans = $ptrans->transform('chromosome', 'NCBI33');
|
|
397 $ptrans = $ptrans->transform('clone');
|
|
398 Description: Moves this PredictionTranscript to the given coordinate system.
|
|
399 If this Transcript has Exons attached, they move as well.
|
|
400 A new Transcript is returned or undefined if this PT is not
|
|
401 defined in the new coordinate system.
|
|
402 Returntype : Bio::EnsEMBL::PredictionTranscript
|
|
403 Exceptions : wrong parameters
|
|
404 Caller : general
|
|
405 Status : Stable
|
|
406
|
|
407 =cut
|
|
408
|
|
409 sub transform {
|
|
410 my $self = shift;
|
|
411
|
|
412 # catch for old style transform calls
|
|
413 if( ref $_[0] && ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))) {
|
|
414 throw("transform needs coordinate systems details now," .
|
|
415 "please use transfer");
|
|
416 }
|
|
417
|
|
418 my $new_transcript = Bio::EnsEMBL::Feature::transform($self, @_ );
|
|
419 return undef unless $new_transcript;
|
|
420
|
|
421 #go through the _trans_exon_array so as not to prompt lazy-loading
|
|
422 if(exists($self->{'_trans_exon_array'})) {
|
|
423 my @new_exons;
|
|
424 foreach my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
|
|
425 my $new_exon = $old_exon->transform(@_);
|
|
426 push(@new_exons, $new_exon);
|
|
427 }
|
|
428 $new_transcript->{'_trans_exon_array'} = \@new_exons;
|
|
429 }
|
|
430
|
|
431 return $new_transcript;
|
|
432 }
|
|
433
|
|
434
|
|
435
|
|
436 =head2 transfer
|
|
437
|
|
438 Arg 1 : Bio::EnsEMBL::Slice $destination_slice
|
|
439 Example : $ptrans = $ptrans->transfer($slice);
|
|
440 Description: Moves this PredictionTranscript to the given slice.
|
|
441 If this Transcripts has Exons attached, they move as well.
|
|
442 If this transcript cannot be moved then undef is returned
|
|
443 instead.
|
|
444 Returntype : Bio::EnsEMBL::PredictionTranscript
|
|
445 Exceptions : none
|
|
446 Caller : general
|
|
447 Status : Stable
|
|
448
|
|
449 =cut
|
|
450
|
|
451 sub transfer {
|
|
452 my $self = shift;
|
|
453
|
|
454 my $new_transcript = $self->SUPER::transfer( @_ );
|
|
455 return undef unless $new_transcript;
|
|
456
|
|
457 if( exists $self->{'_trans_exon_array'} ) {
|
|
458 my @new_exons;
|
|
459 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
|
|
460 my $new_exon = $old_exon->transfer( @_ );
|
|
461 push( @new_exons, $new_exon );
|
|
462 }
|
|
463
|
|
464 $new_transcript->{'_trans_exon_array'} = \@new_exons;
|
|
465 }
|
|
466
|
|
467 return $new_transcript;
|
|
468 }
|
|
469
|
|
470 =head2 get_all_Exons
|
|
471
|
|
472 Arg [1] : none
|
|
473 Example : my @exons = @{$transcript->get_all_Exons()};
|
|
474 Description: Returns an listref of the exons in this transcipr in order.
|
|
475 i.e. the first exon in the listref is the 5prime most exon in
|
|
476 the transcript.
|
|
477 Returntype : a list reference to Bio::EnsEMBL::Exon objects
|
|
478 Exceptions : none
|
|
479 Caller : general
|
|
480 Status : Stable
|
|
481
|
|
482 =cut
|
|
483
|
|
484 sub get_all_Exons {
|
|
485 my ($self) = @_;
|
|
486 if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
|
|
487 $self->{'_trans_exon_array'} = $self->adaptor()->db()->
|
|
488 get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self );
|
|
489 }
|
|
490 return $self->{'_trans_exon_array'};
|
|
491 }
|
|
492
|
|
493 =head2 display_id
|
|
494
|
|
495 Arg [1] : none
|
|
496 Example : print $rf->display_id();
|
|
497 Description: This method returns a string that is considered to be
|
|
498 the 'display' identifier. For prediction transcripts this is
|
|
499 (depending on availability and in this order) the stable Id, the
|
|
500 dbID or an empty string.
|
|
501 Returntype : string
|
|
502 Exceptions : none
|
|
503 Caller : web drawing code
|
|
504 Status : Stable
|
|
505
|
|
506 =cut
|
|
507
|
|
508 sub display_id {
|
|
509 my $self = shift;
|
|
510 return $self->stable_id || $self->dbID || '';
|
|
511 }
|
|
512
|
|
513 =head2 get_all_Attributes
|
|
514
|
|
515 Arg [1] : none
|
|
516 Example :
|
|
517 Description: DOES NOTHING, Returns empty listref. Provided here to prevent
|
|
518 Transcript attributes being returned for PredictionTranscripts.
|
|
519 Returntype : EMPTY listref Bio::EnsEMBL::Attribute
|
|
520 Exceptions : none
|
|
521 Caller : general
|
|
522 Status : At risk
|
|
523
|
|
524 =cut
|
|
525
|
|
526 sub get_all_Attributes {
|
|
527 my $self = shift;
|
|
528
|
|
529 return [];
|
|
530 }
|
|
531
|
|
532
|
|
533
|
|
534 =head2 get_exon_count
|
|
535
|
|
536 Description: DEPRECATED - use get_all_Exons instead
|
|
537
|
|
538 =cut
|
|
539
|
|
540 sub get_exon_count {
|
|
541 my $self = shift;
|
|
542 deprecate('Use scalar(@{$transcript->get_all_Exon()s}) instead');
|
|
543 return scalar( @{$self->get_all_Exons} );
|
|
544 }
|
|
545
|
|
546
|
|
547 =head2 set_exon_count
|
|
548
|
|
549 Description: DEPRECATED - this method does nothing now
|
|
550
|
|
551 =cut
|
|
552
|
|
553 sub set_exon_count {
|
|
554 deprecate('This method no longer does anything.');
|
|
555 }
|
|
556
|
|
557
|
|
558
|
|
559 =head2 get_cdna
|
|
560
|
|
561 Description : DEPRECATED - use spliced_seq() or translateable_seq instead
|
|
562
|
|
563 =cut
|
|
564
|
|
565 sub get_cdna {
|
|
566 my $self = shift;
|
|
567 deprecate('use spliced_seq instead');
|
|
568 return $self->spliced_seq();
|
|
569 }
|
|
570
|
|
571 1;
|