Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/PredictionTranscript.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 PredictionTranscript | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 =head1 DESCRIPTION | |
28 | |
29 Container for single transcript ab initio gene prediction such as | |
30 GenScan or SNAP. Is directly storable/retrievable in Ensembl using | |
31 PredictionTranscriptAdaptor. | |
32 | |
33 Creation: | |
34 | |
35 my $tran = new Bio::EnsEMBL::PredictionTranscript(); | |
36 $tran->add_Exon($pred_exon); | |
37 | |
38 my $tran = | |
39 new Bio::EnsEMBL::PredictionTranscript( -EXONS => @pred_exons ); | |
40 | |
41 Manipulation: | |
42 | |
43 # Returns an array of PredictionExon objects | |
44 my @pred_exons = @{ $tran->get_all_Exons }; | |
45 | |
46 # Returns the peptide translation as string | |
47 my $pep = $tran->translate()->seq(); | |
48 | |
49 # Get the exon cdna sequence. | |
50 my $cdna = $trans->spliced_seq(); | |
51 | |
52 =head1 METHODS | |
53 | |
54 =cut | |
55 | |
56 package Bio::EnsEMBL::PredictionTranscript; | |
57 | |
58 use vars qw(@ISA); | |
59 use strict; | |
60 | |
61 use Bio::Seq; | |
62 use Bio::EnsEMBL::Feature; | |
63 use Bio::EnsEMBL::Transcript; | |
64 use Bio::EnsEMBL::Translation; | |
65 | |
66 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); | |
67 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
68 | |
69 @ISA = qw(Bio::EnsEMBL::Transcript); | |
70 | |
71 | |
72 =head2 new | |
73 | |
74 Arg [-DISPLAY_LABEL] | |
75 string - a displayable identifier for this prediction | |
76 Arg [...] : See Bio::EnsEMBL::Transcript superclass constructor | |
77 Example : $pt = Bio::EnsEMBL::PredictionTranscript->new | |
78 ( '-start' => $seq_region_start, | |
79 '-end' => $seq_region_end, | |
80 '-strand' => $seq_region_strand, | |
81 '-adaptor' => $self, | |
82 '-slice' => $slice, | |
83 '-analysis' => $analysis, | |
84 '-dbID' => $prediction_transcript_id, | |
85 '-display_label' => $display_label); | |
86 Description: Constructor. Creates a new Bio::EnsEMBL::PredictionTranscript | |
87 object | |
88 Returntype : Bio::EnsEMBL::PredictionTranscript | |
89 Exceptions : none | |
90 Caller : general | |
91 Status : Stable | |
92 | |
93 =cut | |
94 | |
95 sub new { | |
96 my $class = shift; | |
97 | |
98 my $self = $class->SUPER::new(@_); | |
99 | |
100 my ($display_label) = rearrange(['DISPLAY_LABEL'], @_); | |
101 | |
102 $self->{'display_label'} = $display_label; | |
103 | |
104 return $self; | |
105 } | |
106 | |
107 | |
108 =head2 coding_region_start | |
109 | |
110 Arg [1] : none | |
111 Example : $coding_region_start = $pt->coding_region_start | |
112 Description: Retrieves the start of the coding region of this transcript in | |
113 slice coordinates. For prediction transcripts this | |
114 is always the start of the transcript (i.e. there is no UTR). | |
115 By convention, the coding_region_start is always lower than | |
116 the value returned by the coding_end method. | |
117 The value returned by this function is NOT the biological | |
118 coding start since on the reverse strand the biological coding | |
119 start would be the higher genomic value. | |
120 Returntype : int | |
121 Exceptions : none | |
122 Caller : general | |
123 Status : Stable | |
124 | |
125 =cut | |
126 | |
127 sub coding_region_start { | |
128 my $self = shift; | |
129 return $self->start(); | |
130 } | |
131 | |
132 | |
133 =head2 coding_region_end | |
134 | |
135 Arg [1] : none | |
136 Example : $coding_region_end = $transcript->coding_region_end | |
137 Description: Retrieves the start of the coding region of this prediction | |
138 transcript. For prediction transcripts this is always the same | |
139 as the end since no UTRs are stored. | |
140 By convention, the coding_region_end is always higher than the | |
141 value returned by the coding_region_start method. | |
142 The value returned by this function is NOT the biological | |
143 coding start since on the reverse strand the biological coding | |
144 end would be the lower genomic value. | |
145 Returntype : int | |
146 Exceptions : none | |
147 Caller : general | |
148 Status : Stable | |
149 | |
150 =cut | |
151 | |
152 sub coding_region_end { | |
153 my $self = shift; | |
154 return $self->end(); | |
155 } | |
156 | |
157 | |
158 | |
159 =head2 get_all_translateable_Exons | |
160 | |
161 Arg [1] : none | |
162 Example : $exons = $self->get_all_translateable_Exons | |
163 Description: Retrieves the translateable portion of all exons in this | |
164 transcript. For prediction transcripts this means all exons | |
165 since no UTRs are stored for them. | |
166 Returntype : listref of Bio::EnsEMBL::PredictionExons | |
167 Exceptions : none | |
168 Caller : general | |
169 Status : Stable | |
170 | |
171 =cut | |
172 | |
173 sub get_all_translateable_Exons { | |
174 my $self = shift; | |
175 return $self->get_all_Exons(); | |
176 } | |
177 | |
178 | |
179 =head2 display_label | |
180 | |
181 Arg [1] : string $newval (optional) | |
182 The new value to set the display_label attribute to | |
183 Example : $display_label = $pt->display_label() | |
184 Description: Getter/Setter for a displayable identifier for this | |
185 prediction transcript. | |
186 Returntype : string | |
187 Exceptions : none | |
188 Caller : general | |
189 Status : Stable | |
190 | |
191 =cut | |
192 | |
193 sub display_label{ | |
194 my $self = shift; | |
195 return $self->{'display_label'} = shift if(@_); | |
196 return $self->{'display_label'}; | |
197 } | |
198 | |
199 | |
200 | |
201 =head2 stable_id | |
202 | |
203 Arg [1] : none | |
204 Example : print $pt->stable_id(); | |
205 Description: Gets a 'stable' identifier for this prediction transcript. Note | |
206 that prediction transcripts do not have true *stable* | |
207 identifiers (i.e. identifiers maintained between releases). | |
208 This method chains to the display_label method and is intended | |
209 to provide polymorphism with the Transcript class. | |
210 Returntype : string | |
211 Exceptions : none | |
212 Caller : general | |
213 Status : Stable | |
214 | |
215 =cut | |
216 | |
217 sub stable_id { return display_label(@_); } | |
218 | |
219 sub get_all_DBEntries { return []; } | |
220 | |
221 sub get_all_DBLinks { return []; } | |
222 | |
223 sub add_DBEntry {} | |
224 | |
225 sub external_db { return undef; } | |
226 | |
227 sub external_status { return undef; } | |
228 | |
229 sub external_name { return undef; } | |
230 | |
231 sub is_known { return 0;} | |
232 | |
233 | |
234 =head2 translation | |
235 | |
236 Arg [1] : none | |
237 Example : $translation = $pt->translation(); | |
238 Description: Retrieves a Bio::EnsEMBL::Translation object for this prediction | |
239 transcript. Note that this translation is generated on the fly | |
240 and is not stored in the database. The translation always | |
241 spans the entire transcript (no UTRs; all CDS) and does not | |
242 have an associated dbID, stable_id or adaptor. | |
243 Returntype : int | |
244 Exceptions : none | |
245 Caller : general | |
246 Status : Stable | |
247 | |
248 =cut | |
249 | |
250 sub translation { | |
251 my $self = shift; | |
252 | |
253 #calculate translation on the fly | |
254 my $strand = $self->strand(); | |
255 | |
256 my $start_exon; | |
257 my $end_exon; | |
258 | |
259 my @exons = @{$self->get_all_Exons()}; | |
260 | |
261 return undef if(!@exons); | |
262 | |
263 $start_exon = $exons[0]; | |
264 $end_exon = $exons[-1]; | |
265 | |
266 my $pta; | |
267 | |
268 if($self->adaptor()) { | |
269 $pta = $self->adaptor()->db()->get_TranslationAdaptor(); | |
270 } else { | |
271 #warning("PredictionTranscript has no adaptor, may not be able to obtain " . | |
272 # "translation"); | |
273 } | |
274 | |
275 my $Xseq = $self->spliced_seq(); | |
276 my $start_phase = $start_exon->phase; | |
277 if( $start_phase > 0 ) { | |
278 $Xseq = "N"x$start_phase . $Xseq; | |
279 } | |
280 | |
281 my $tmpSeq = new Bio::Seq( -id => $self->display_id, | |
282 -seq => $Xseq, | |
283 -moltype => 'dna', | |
284 -alphabet => 'dna' ); | |
285 | |
286 return Bio::EnsEMBL::Translation->new | |
287 (-ADAPTOR => $pta, | |
288 -START_EXON => $start_exon, | |
289 -END_EXON => $end_exon, | |
290 -SEQ_START => 1, | |
291 -SEQ_END => $end_exon->length(), | |
292 -SEQ => $tmpSeq->translate()->seq()); | |
293 } | |
294 | |
295 | |
296 | |
297 =head2 translate | |
298 | |
299 Args : none | |
300 Function : Give a peptide translation of all exons currently in | |
301 the PT. Gives empty string when none is in. | |
302 Returntype: a Bio::Seq as in transcript->translate() | |
303 Exceptions: none | |
304 Caller : general | |
305 Status : Stable | |
306 | |
307 =cut | |
308 | |
309 | |
310 sub translate { | |
311 my ($self) = @_; | |
312 | |
313 my $dna = $self->translateable_seq(); | |
314 | |
315 my $codon_table_id; | |
316 if ( defined( $self->slice() ) ) { | |
317 my $attrib; | |
318 | |
319 ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') }; | |
320 if ( defined($attrib) ) { | |
321 $codon_table_id = $attrib->value(); | |
322 } | |
323 } | |
324 $codon_table_id ||= 1; #default will be vertebrates | |
325 | |
326 if( CORE::length( $dna ) % 3 == 0 ) { | |
327 # $dna =~ s/TAG$|TGA$|TAA$//i; | |
328 my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id ); | |
329 | |
330 if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) { | |
331 substr( $dna, -3, 3, '' ); | |
332 } | |
333 } | |
334 # the above line will remove the final stop codon from the mrna | |
335 # sequence produced if it is present, this is so any peptide produced | |
336 # won't have a terminal stop codon | |
337 # if you want to have a terminal stop codon either comment this line out | |
338 # or call translatable seq directly and produce a translation from it | |
339 | |
340 my $bioseq = new Bio::Seq( -id => $self->display_id, | |
341 -seq => $dna, | |
342 -moltype => 'dna', | |
343 -alphabet => 'dna' ); | |
344 | |
345 my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id); | |
346 | |
347 return $translation; | |
348 } | |
349 | |
350 | |
351 =head2 cdna_coding_start | |
352 | |
353 Arg [1] : none | |
354 Example : $relative_coding_start = $transcript->cdna_coding_start(); | |
355 Description: Retrieves the position of the coding start of this transcript | |
356 in cdna coordinates (relative to the start of the 5prime end of | |
357 the transcript, excluding introns, including utrs). This is | |
358 always 1 for prediction transcripts because they have no UTRs. | |
359 Returntype : int | |
360 Exceptions : none | |
361 Caller : five_prime_utr, get_all_snps, general | |
362 Status : Stable | |
363 | |
364 =cut | |
365 | |
366 sub cdna_coding_start { return 1 } | |
367 | |
368 | |
369 | |
370 =head2 cdna_coding_end | |
371 | |
372 Arg [1] : none | |
373 Example : $relative_coding_start = $transcript->cdna_coding_end(); | |
374 Description: Retrieves the position of the coding end of this transcript | |
375 in cdna coordinates (relative to the start of the 5prime end of | |
376 the transcript, excluding introns, including utrs). This is | |
377 always te length of the cdna for prediction transcripts because | |
378 they have no UTRs. | |
379 Returntype : int | |
380 Exceptions : none | |
381 Caller : five_prime_utr, get_all_snps, general | |
382 Status : Stable | |
383 | |
384 =cut | |
385 | |
386 sub cdna_coding_end { | |
387 my ($self) = @_; | |
388 return length( $self->spliced_seq() ); | |
389 } | |
390 | |
391 | |
392 =head2 transform | |
393 | |
394 Arg 1 : String $coordinate_system_name | |
395 Arg [2] : String $coordinate_system_version | |
396 Example : $ptrans = $ptrans->transform('chromosome', 'NCBI33'); | |
397 $ptrans = $ptrans->transform('clone'); | |
398 Description: Moves this PredictionTranscript to the given coordinate system. | |
399 If this Transcript has Exons attached, they move as well. | |
400 A new Transcript is returned or undefined if this PT is not | |
401 defined in the new coordinate system. | |
402 Returntype : Bio::EnsEMBL::PredictionTranscript | |
403 Exceptions : wrong parameters | |
404 Caller : general | |
405 Status : Stable | |
406 | |
407 =cut | |
408 | |
409 sub transform { | |
410 my $self = shift; | |
411 | |
412 # catch for old style transform calls | |
413 if( ref $_[0] && ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))) { | |
414 throw("transform needs coordinate systems details now," . | |
415 "please use transfer"); | |
416 } | |
417 | |
418 my $new_transcript = Bio::EnsEMBL::Feature::transform($self, @_ ); | |
419 return undef unless $new_transcript; | |
420 | |
421 #go through the _trans_exon_array so as not to prompt lazy-loading | |
422 if(exists($self->{'_trans_exon_array'})) { | |
423 my @new_exons; | |
424 foreach my $old_exon ( @{$self->{'_trans_exon_array'}} ) { | |
425 my $new_exon = $old_exon->transform(@_); | |
426 push(@new_exons, $new_exon); | |
427 } | |
428 $new_transcript->{'_trans_exon_array'} = \@new_exons; | |
429 } | |
430 | |
431 return $new_transcript; | |
432 } | |
433 | |
434 | |
435 | |
436 =head2 transfer | |
437 | |
438 Arg 1 : Bio::EnsEMBL::Slice $destination_slice | |
439 Example : $ptrans = $ptrans->transfer($slice); | |
440 Description: Moves this PredictionTranscript to the given slice. | |
441 If this Transcripts has Exons attached, they move as well. | |
442 If this transcript cannot be moved then undef is returned | |
443 instead. | |
444 Returntype : Bio::EnsEMBL::PredictionTranscript | |
445 Exceptions : none | |
446 Caller : general | |
447 Status : Stable | |
448 | |
449 =cut | |
450 | |
451 sub transfer { | |
452 my $self = shift; | |
453 | |
454 my $new_transcript = $self->SUPER::transfer( @_ ); | |
455 return undef unless $new_transcript; | |
456 | |
457 if( exists $self->{'_trans_exon_array'} ) { | |
458 my @new_exons; | |
459 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) { | |
460 my $new_exon = $old_exon->transfer( @_ ); | |
461 push( @new_exons, $new_exon ); | |
462 } | |
463 | |
464 $new_transcript->{'_trans_exon_array'} = \@new_exons; | |
465 } | |
466 | |
467 return $new_transcript; | |
468 } | |
469 | |
470 =head2 get_all_Exons | |
471 | |
472 Arg [1] : none | |
473 Example : my @exons = @{$transcript->get_all_Exons()}; | |
474 Description: Returns an listref of the exons in this transcipr in order. | |
475 i.e. the first exon in the listref is the 5prime most exon in | |
476 the transcript. | |
477 Returntype : a list reference to Bio::EnsEMBL::Exon objects | |
478 Exceptions : none | |
479 Caller : general | |
480 Status : Stable | |
481 | |
482 =cut | |
483 | |
484 sub get_all_Exons { | |
485 my ($self) = @_; | |
486 if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) { | |
487 $self->{'_trans_exon_array'} = $self->adaptor()->db()-> | |
488 get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self ); | |
489 } | |
490 return $self->{'_trans_exon_array'}; | |
491 } | |
492 | |
493 =head2 display_id | |
494 | |
495 Arg [1] : none | |
496 Example : print $rf->display_id(); | |
497 Description: This method returns a string that is considered to be | |
498 the 'display' identifier. For prediction transcripts this is | |
499 (depending on availability and in this order) the stable Id, the | |
500 dbID or an empty string. | |
501 Returntype : string | |
502 Exceptions : none | |
503 Caller : web drawing code | |
504 Status : Stable | |
505 | |
506 =cut | |
507 | |
508 sub display_id { | |
509 my $self = shift; | |
510 return $self->stable_id || $self->dbID || ''; | |
511 } | |
512 | |
513 =head2 get_all_Attributes | |
514 | |
515 Arg [1] : none | |
516 Example : | |
517 Description: DOES NOTHING, Returns empty listref. Provided here to prevent | |
518 Transcript attributes being returned for PredictionTranscripts. | |
519 Returntype : EMPTY listref Bio::EnsEMBL::Attribute | |
520 Exceptions : none | |
521 Caller : general | |
522 Status : At risk | |
523 | |
524 =cut | |
525 | |
526 sub get_all_Attributes { | |
527 my $self = shift; | |
528 | |
529 return []; | |
530 } | |
531 | |
532 | |
533 | |
534 =head2 get_exon_count | |
535 | |
536 Description: DEPRECATED - use get_all_Exons instead | |
537 | |
538 =cut | |
539 | |
540 sub get_exon_count { | |
541 my $self = shift; | |
542 deprecate('Use scalar(@{$transcript->get_all_Exon()s}) instead'); | |
543 return scalar( @{$self->get_all_Exons} ); | |
544 } | |
545 | |
546 | |
547 =head2 set_exon_count | |
548 | |
549 Description: DEPRECATED - this method does nothing now | |
550 | |
551 =cut | |
552 | |
553 sub set_exon_count { | |
554 deprecate('This method no longer does anything.'); | |
555 } | |
556 | |
557 | |
558 | |
559 =head2 get_cdna | |
560 | |
561 Description : DEPRECATED - use spliced_seq() or translateable_seq instead | |
562 | |
563 =cut | |
564 | |
565 sub get_cdna { | |
566 my $self = shift; | |
567 deprecate('use spliced_seq instead'); | |
568 return $self->spliced_seq(); | |
569 } | |
570 | |
571 1; |