comparison variant_effect_predictor/Bio/EnsEMBL/PredictionTranscript.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 PredictionTranscript
24
25 =head1 SYNOPSIS
26
27 =head1 DESCRIPTION
28
29 Container for single transcript ab initio gene prediction such as
30 GenScan or SNAP. Is directly storable/retrievable in Ensembl using
31 PredictionTranscriptAdaptor.
32
33 Creation:
34
35 my $tran = new Bio::EnsEMBL::PredictionTranscript();
36 $tran->add_Exon($pred_exon);
37
38 my $tran =
39 new Bio::EnsEMBL::PredictionTranscript( -EXONS => @pred_exons );
40
41 Manipulation:
42
43 # Returns an array of PredictionExon objects
44 my @pred_exons = @{ $tran->get_all_Exons };
45
46 # Returns the peptide translation as string
47 my $pep = $tran->translate()->seq();
48
49 # Get the exon cdna sequence.
50 my $cdna = $trans->spliced_seq();
51
52 =head1 METHODS
53
54 =cut
55
56 package Bio::EnsEMBL::PredictionTranscript;
57
58 use vars qw(@ISA);
59 use strict;
60
61 use Bio::Seq;
62 use Bio::EnsEMBL::Feature;
63 use Bio::EnsEMBL::Transcript;
64 use Bio::EnsEMBL::Translation;
65
66 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
67 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
68
69 @ISA = qw(Bio::EnsEMBL::Transcript);
70
71
72 =head2 new
73
74 Arg [-DISPLAY_LABEL]
75 string - a displayable identifier for this prediction
76 Arg [...] : See Bio::EnsEMBL::Transcript superclass constructor
77 Example : $pt = Bio::EnsEMBL::PredictionTranscript->new
78 ( '-start' => $seq_region_start,
79 '-end' => $seq_region_end,
80 '-strand' => $seq_region_strand,
81 '-adaptor' => $self,
82 '-slice' => $slice,
83 '-analysis' => $analysis,
84 '-dbID' => $prediction_transcript_id,
85 '-display_label' => $display_label);
86 Description: Constructor. Creates a new Bio::EnsEMBL::PredictionTranscript
87 object
88 Returntype : Bio::EnsEMBL::PredictionTranscript
89 Exceptions : none
90 Caller : general
91 Status : Stable
92
93 =cut
94
95 sub new {
96 my $class = shift;
97
98 my $self = $class->SUPER::new(@_);
99
100 my ($display_label) = rearrange(['DISPLAY_LABEL'], @_);
101
102 $self->{'display_label'} = $display_label;
103
104 return $self;
105 }
106
107
108 =head2 coding_region_start
109
110 Arg [1] : none
111 Example : $coding_region_start = $pt->coding_region_start
112 Description: Retrieves the start of the coding region of this transcript in
113 slice coordinates. For prediction transcripts this
114 is always the start of the transcript (i.e. there is no UTR).
115 By convention, the coding_region_start is always lower than
116 the value returned by the coding_end method.
117 The value returned by this function is NOT the biological
118 coding start since on the reverse strand the biological coding
119 start would be the higher genomic value.
120 Returntype : int
121 Exceptions : none
122 Caller : general
123 Status : Stable
124
125 =cut
126
127 sub coding_region_start {
128 my $self = shift;
129 return $self->start();
130 }
131
132
133 =head2 coding_region_end
134
135 Arg [1] : none
136 Example : $coding_region_end = $transcript->coding_region_end
137 Description: Retrieves the start of the coding region of this prediction
138 transcript. For prediction transcripts this is always the same
139 as the end since no UTRs are stored.
140 By convention, the coding_region_end is always higher than the
141 value returned by the coding_region_start method.
142 The value returned by this function is NOT the biological
143 coding start since on the reverse strand the biological coding
144 end would be the lower genomic value.
145 Returntype : int
146 Exceptions : none
147 Caller : general
148 Status : Stable
149
150 =cut
151
152 sub coding_region_end {
153 my $self = shift;
154 return $self->end();
155 }
156
157
158
159 =head2 get_all_translateable_Exons
160
161 Arg [1] : none
162 Example : $exons = $self->get_all_translateable_Exons
163 Description: Retrieves the translateable portion of all exons in this
164 transcript. For prediction transcripts this means all exons
165 since no UTRs are stored for them.
166 Returntype : listref of Bio::EnsEMBL::PredictionExons
167 Exceptions : none
168 Caller : general
169 Status : Stable
170
171 =cut
172
173 sub get_all_translateable_Exons {
174 my $self = shift;
175 return $self->get_all_Exons();
176 }
177
178
179 =head2 display_label
180
181 Arg [1] : string $newval (optional)
182 The new value to set the display_label attribute to
183 Example : $display_label = $pt->display_label()
184 Description: Getter/Setter for a displayable identifier for this
185 prediction transcript.
186 Returntype : string
187 Exceptions : none
188 Caller : general
189 Status : Stable
190
191 =cut
192
193 sub display_label{
194 my $self = shift;
195 return $self->{'display_label'} = shift if(@_);
196 return $self->{'display_label'};
197 }
198
199
200
201 =head2 stable_id
202
203 Arg [1] : none
204 Example : print $pt->stable_id();
205 Description: Gets a 'stable' identifier for this prediction transcript. Note
206 that prediction transcripts do not have true *stable*
207 identifiers (i.e. identifiers maintained between releases).
208 This method chains to the display_label method and is intended
209 to provide polymorphism with the Transcript class.
210 Returntype : string
211 Exceptions : none
212 Caller : general
213 Status : Stable
214
215 =cut
216
217 sub stable_id { return display_label(@_); }
218
219 sub get_all_DBEntries { return []; }
220
221 sub get_all_DBLinks { return []; }
222
223 sub add_DBEntry {}
224
225 sub external_db { return undef; }
226
227 sub external_status { return undef; }
228
229 sub external_name { return undef; }
230
231 sub is_known { return 0;}
232
233
234 =head2 translation
235
236 Arg [1] : none
237 Example : $translation = $pt->translation();
238 Description: Retrieves a Bio::EnsEMBL::Translation object for this prediction
239 transcript. Note that this translation is generated on the fly
240 and is not stored in the database. The translation always
241 spans the entire transcript (no UTRs; all CDS) and does not
242 have an associated dbID, stable_id or adaptor.
243 Returntype : int
244 Exceptions : none
245 Caller : general
246 Status : Stable
247
248 =cut
249
250 sub translation {
251 my $self = shift;
252
253 #calculate translation on the fly
254 my $strand = $self->strand();
255
256 my $start_exon;
257 my $end_exon;
258
259 my @exons = @{$self->get_all_Exons()};
260
261 return undef if(!@exons);
262
263 $start_exon = $exons[0];
264 $end_exon = $exons[-1];
265
266 my $pta;
267
268 if($self->adaptor()) {
269 $pta = $self->adaptor()->db()->get_TranslationAdaptor();
270 } else {
271 #warning("PredictionTranscript has no adaptor, may not be able to obtain " .
272 # "translation");
273 }
274
275 my $Xseq = $self->spliced_seq();
276 my $start_phase = $start_exon->phase;
277 if( $start_phase > 0 ) {
278 $Xseq = "N"x$start_phase . $Xseq;
279 }
280
281 my $tmpSeq = new Bio::Seq( -id => $self->display_id,
282 -seq => $Xseq,
283 -moltype => 'dna',
284 -alphabet => 'dna' );
285
286 return Bio::EnsEMBL::Translation->new
287 (-ADAPTOR => $pta,
288 -START_EXON => $start_exon,
289 -END_EXON => $end_exon,
290 -SEQ_START => 1,
291 -SEQ_END => $end_exon->length(),
292 -SEQ => $tmpSeq->translate()->seq());
293 }
294
295
296
297 =head2 translate
298
299 Args : none
300 Function : Give a peptide translation of all exons currently in
301 the PT. Gives empty string when none is in.
302 Returntype: a Bio::Seq as in transcript->translate()
303 Exceptions: none
304 Caller : general
305 Status : Stable
306
307 =cut
308
309
310 sub translate {
311 my ($self) = @_;
312
313 my $dna = $self->translateable_seq();
314
315 my $codon_table_id;
316 if ( defined( $self->slice() ) ) {
317 my $attrib;
318
319 ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
320 if ( defined($attrib) ) {
321 $codon_table_id = $attrib->value();
322 }
323 }
324 $codon_table_id ||= 1; #default will be vertebrates
325
326 if( CORE::length( $dna ) % 3 == 0 ) {
327 # $dna =~ s/TAG$|TGA$|TAA$//i;
328 my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id );
329
330 if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) {
331 substr( $dna, -3, 3, '' );
332 }
333 }
334 # the above line will remove the final stop codon from the mrna
335 # sequence produced if it is present, this is so any peptide produced
336 # won't have a terminal stop codon
337 # if you want to have a terminal stop codon either comment this line out
338 # or call translatable seq directly and produce a translation from it
339
340 my $bioseq = new Bio::Seq( -id => $self->display_id,
341 -seq => $dna,
342 -moltype => 'dna',
343 -alphabet => 'dna' );
344
345 my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id);
346
347 return $translation;
348 }
349
350
351 =head2 cdna_coding_start
352
353 Arg [1] : none
354 Example : $relative_coding_start = $transcript->cdna_coding_start();
355 Description: Retrieves the position of the coding start of this transcript
356 in cdna coordinates (relative to the start of the 5prime end of
357 the transcript, excluding introns, including utrs). This is
358 always 1 for prediction transcripts because they have no UTRs.
359 Returntype : int
360 Exceptions : none
361 Caller : five_prime_utr, get_all_snps, general
362 Status : Stable
363
364 =cut
365
366 sub cdna_coding_start { return 1 }
367
368
369
370 =head2 cdna_coding_end
371
372 Arg [1] : none
373 Example : $relative_coding_start = $transcript->cdna_coding_end();
374 Description: Retrieves the position of the coding end of this transcript
375 in cdna coordinates (relative to the start of the 5prime end of
376 the transcript, excluding introns, including utrs). This is
377 always te length of the cdna for prediction transcripts because
378 they have no UTRs.
379 Returntype : int
380 Exceptions : none
381 Caller : five_prime_utr, get_all_snps, general
382 Status : Stable
383
384 =cut
385
386 sub cdna_coding_end {
387 my ($self) = @_;
388 return length( $self->spliced_seq() );
389 }
390
391
392 =head2 transform
393
394 Arg 1 : String $coordinate_system_name
395 Arg [2] : String $coordinate_system_version
396 Example : $ptrans = $ptrans->transform('chromosome', 'NCBI33');
397 $ptrans = $ptrans->transform('clone');
398 Description: Moves this PredictionTranscript to the given coordinate system.
399 If this Transcript has Exons attached, they move as well.
400 A new Transcript is returned or undefined if this PT is not
401 defined in the new coordinate system.
402 Returntype : Bio::EnsEMBL::PredictionTranscript
403 Exceptions : wrong parameters
404 Caller : general
405 Status : Stable
406
407 =cut
408
409 sub transform {
410 my $self = shift;
411
412 # catch for old style transform calls
413 if( ref $_[0] && ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))) {
414 throw("transform needs coordinate systems details now," .
415 "please use transfer");
416 }
417
418 my $new_transcript = Bio::EnsEMBL::Feature::transform($self, @_ );
419 return undef unless $new_transcript;
420
421 #go through the _trans_exon_array so as not to prompt lazy-loading
422 if(exists($self->{'_trans_exon_array'})) {
423 my @new_exons;
424 foreach my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
425 my $new_exon = $old_exon->transform(@_);
426 push(@new_exons, $new_exon);
427 }
428 $new_transcript->{'_trans_exon_array'} = \@new_exons;
429 }
430
431 return $new_transcript;
432 }
433
434
435
436 =head2 transfer
437
438 Arg 1 : Bio::EnsEMBL::Slice $destination_slice
439 Example : $ptrans = $ptrans->transfer($slice);
440 Description: Moves this PredictionTranscript to the given slice.
441 If this Transcripts has Exons attached, they move as well.
442 If this transcript cannot be moved then undef is returned
443 instead.
444 Returntype : Bio::EnsEMBL::PredictionTranscript
445 Exceptions : none
446 Caller : general
447 Status : Stable
448
449 =cut
450
451 sub transfer {
452 my $self = shift;
453
454 my $new_transcript = $self->SUPER::transfer( @_ );
455 return undef unless $new_transcript;
456
457 if( exists $self->{'_trans_exon_array'} ) {
458 my @new_exons;
459 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
460 my $new_exon = $old_exon->transfer( @_ );
461 push( @new_exons, $new_exon );
462 }
463
464 $new_transcript->{'_trans_exon_array'} = \@new_exons;
465 }
466
467 return $new_transcript;
468 }
469
470 =head2 get_all_Exons
471
472 Arg [1] : none
473 Example : my @exons = @{$transcript->get_all_Exons()};
474 Description: Returns an listref of the exons in this transcipr in order.
475 i.e. the first exon in the listref is the 5prime most exon in
476 the transcript.
477 Returntype : a list reference to Bio::EnsEMBL::Exon objects
478 Exceptions : none
479 Caller : general
480 Status : Stable
481
482 =cut
483
484 sub get_all_Exons {
485 my ($self) = @_;
486 if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
487 $self->{'_trans_exon_array'} = $self->adaptor()->db()->
488 get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self );
489 }
490 return $self->{'_trans_exon_array'};
491 }
492
493 =head2 display_id
494
495 Arg [1] : none
496 Example : print $rf->display_id();
497 Description: This method returns a string that is considered to be
498 the 'display' identifier. For prediction transcripts this is
499 (depending on availability and in this order) the stable Id, the
500 dbID or an empty string.
501 Returntype : string
502 Exceptions : none
503 Caller : web drawing code
504 Status : Stable
505
506 =cut
507
508 sub display_id {
509 my $self = shift;
510 return $self->stable_id || $self->dbID || '';
511 }
512
513 =head2 get_all_Attributes
514
515 Arg [1] : none
516 Example :
517 Description: DOES NOTHING, Returns empty listref. Provided here to prevent
518 Transcript attributes being returned for PredictionTranscripts.
519 Returntype : EMPTY listref Bio::EnsEMBL::Attribute
520 Exceptions : none
521 Caller : general
522 Status : At risk
523
524 =cut
525
526 sub get_all_Attributes {
527 my $self = shift;
528
529 return [];
530 }
531
532
533
534 =head2 get_exon_count
535
536 Description: DEPRECATED - use get_all_Exons instead
537
538 =cut
539
540 sub get_exon_count {
541 my $self = shift;
542 deprecate('Use scalar(@{$transcript->get_all_Exon()s}) instead');
543 return scalar( @{$self->get_all_Exons} );
544 }
545
546
547 =head2 set_exon_count
548
549 Description: DEPRECATED - this method does nothing now
550
551 =cut
552
553 sub set_exon_count {
554 deprecate('This method no longer does anything.');
555 }
556
557
558
559 =head2 get_cdna
560
561 Description : DEPRECATED - use spliced_seq() or translateable_seq instead
562
563 =cut
564
565 sub get_cdna {
566 my $self = shift;
567 deprecate('use spliced_seq instead');
568 return $self->spliced_seq();
569 }
570
571 1;