Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/PredictionTranscript.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 PredictionTranscript | |
| 24 | |
| 25 =head1 SYNOPSIS | |
| 26 | |
| 27 =head1 DESCRIPTION | |
| 28 | |
| 29 Container for single transcript ab initio gene prediction such as | |
| 30 GenScan or SNAP. Is directly storable/retrievable in Ensembl using | |
| 31 PredictionTranscriptAdaptor. | |
| 32 | |
| 33 Creation: | |
| 34 | |
| 35 my $tran = new Bio::EnsEMBL::PredictionTranscript(); | |
| 36 $tran->add_Exon($pred_exon); | |
| 37 | |
| 38 my $tran = | |
| 39 new Bio::EnsEMBL::PredictionTranscript( -EXONS => @pred_exons ); | |
| 40 | |
| 41 Manipulation: | |
| 42 | |
| 43 # Returns an array of PredictionExon objects | |
| 44 my @pred_exons = @{ $tran->get_all_Exons }; | |
| 45 | |
| 46 # Returns the peptide translation as string | |
| 47 my $pep = $tran->translate()->seq(); | |
| 48 | |
| 49 # Get the exon cdna sequence. | |
| 50 my $cdna = $trans->spliced_seq(); | |
| 51 | |
| 52 =head1 METHODS | |
| 53 | |
| 54 =cut | |
| 55 | |
| 56 package Bio::EnsEMBL::PredictionTranscript; | |
| 57 | |
| 58 use vars qw(@ISA); | |
| 59 use strict; | |
| 60 | |
| 61 use Bio::Seq; | |
| 62 use Bio::EnsEMBL::Feature; | |
| 63 use Bio::EnsEMBL::Transcript; | |
| 64 use Bio::EnsEMBL::Translation; | |
| 65 | |
| 66 use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); | |
| 67 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
| 68 | |
| 69 @ISA = qw(Bio::EnsEMBL::Transcript); | |
| 70 | |
| 71 | |
| 72 =head2 new | |
| 73 | |
| 74 Arg [-DISPLAY_LABEL] | |
| 75 string - a displayable identifier for this prediction | |
| 76 Arg [...] : See Bio::EnsEMBL::Transcript superclass constructor | |
| 77 Example : $pt = Bio::EnsEMBL::PredictionTranscript->new | |
| 78 ( '-start' => $seq_region_start, | |
| 79 '-end' => $seq_region_end, | |
| 80 '-strand' => $seq_region_strand, | |
| 81 '-adaptor' => $self, | |
| 82 '-slice' => $slice, | |
| 83 '-analysis' => $analysis, | |
| 84 '-dbID' => $prediction_transcript_id, | |
| 85 '-display_label' => $display_label); | |
| 86 Description: Constructor. Creates a new Bio::EnsEMBL::PredictionTranscript | |
| 87 object | |
| 88 Returntype : Bio::EnsEMBL::PredictionTranscript | |
| 89 Exceptions : none | |
| 90 Caller : general | |
| 91 Status : Stable | |
| 92 | |
| 93 =cut | |
| 94 | |
| 95 sub new { | |
| 96 my $class = shift; | |
| 97 | |
| 98 my $self = $class->SUPER::new(@_); | |
| 99 | |
| 100 my ($display_label) = rearrange(['DISPLAY_LABEL'], @_); | |
| 101 | |
| 102 $self->{'display_label'} = $display_label; | |
| 103 | |
| 104 return $self; | |
| 105 } | |
| 106 | |
| 107 | |
| 108 =head2 coding_region_start | |
| 109 | |
| 110 Arg [1] : none | |
| 111 Example : $coding_region_start = $pt->coding_region_start | |
| 112 Description: Retrieves the start of the coding region of this transcript in | |
| 113 slice coordinates. For prediction transcripts this | |
| 114 is always the start of the transcript (i.e. there is no UTR). | |
| 115 By convention, the coding_region_start is always lower than | |
| 116 the value returned by the coding_end method. | |
| 117 The value returned by this function is NOT the biological | |
| 118 coding start since on the reverse strand the biological coding | |
| 119 start would be the higher genomic value. | |
| 120 Returntype : int | |
| 121 Exceptions : none | |
| 122 Caller : general | |
| 123 Status : Stable | |
| 124 | |
| 125 =cut | |
| 126 | |
| 127 sub coding_region_start { | |
| 128 my $self = shift; | |
| 129 return $self->start(); | |
| 130 } | |
| 131 | |
| 132 | |
| 133 =head2 coding_region_end | |
| 134 | |
| 135 Arg [1] : none | |
| 136 Example : $coding_region_end = $transcript->coding_region_end | |
| 137 Description: Retrieves the start of the coding region of this prediction | |
| 138 transcript. For prediction transcripts this is always the same | |
| 139 as the end since no UTRs are stored. | |
| 140 By convention, the coding_region_end is always higher than the | |
| 141 value returned by the coding_region_start method. | |
| 142 The value returned by this function is NOT the biological | |
| 143 coding start since on the reverse strand the biological coding | |
| 144 end would be the lower genomic value. | |
| 145 Returntype : int | |
| 146 Exceptions : none | |
| 147 Caller : general | |
| 148 Status : Stable | |
| 149 | |
| 150 =cut | |
| 151 | |
| 152 sub coding_region_end { | |
| 153 my $self = shift; | |
| 154 return $self->end(); | |
| 155 } | |
| 156 | |
| 157 | |
| 158 | |
| 159 =head2 get_all_translateable_Exons | |
| 160 | |
| 161 Arg [1] : none | |
| 162 Example : $exons = $self->get_all_translateable_Exons | |
| 163 Description: Retrieves the translateable portion of all exons in this | |
| 164 transcript. For prediction transcripts this means all exons | |
| 165 since no UTRs are stored for them. | |
| 166 Returntype : listref of Bio::EnsEMBL::PredictionExons | |
| 167 Exceptions : none | |
| 168 Caller : general | |
| 169 Status : Stable | |
| 170 | |
| 171 =cut | |
| 172 | |
| 173 sub get_all_translateable_Exons { | |
| 174 my $self = shift; | |
| 175 return $self->get_all_Exons(); | |
| 176 } | |
| 177 | |
| 178 | |
| 179 =head2 display_label | |
| 180 | |
| 181 Arg [1] : string $newval (optional) | |
| 182 The new value to set the display_label attribute to | |
| 183 Example : $display_label = $pt->display_label() | |
| 184 Description: Getter/Setter for a displayable identifier for this | |
| 185 prediction transcript. | |
| 186 Returntype : string | |
| 187 Exceptions : none | |
| 188 Caller : general | |
| 189 Status : Stable | |
| 190 | |
| 191 =cut | |
| 192 | |
| 193 sub display_label{ | |
| 194 my $self = shift; | |
| 195 return $self->{'display_label'} = shift if(@_); | |
| 196 return $self->{'display_label'}; | |
| 197 } | |
| 198 | |
| 199 | |
| 200 | |
| 201 =head2 stable_id | |
| 202 | |
| 203 Arg [1] : none | |
| 204 Example : print $pt->stable_id(); | |
| 205 Description: Gets a 'stable' identifier for this prediction transcript. Note | |
| 206 that prediction transcripts do not have true *stable* | |
| 207 identifiers (i.e. identifiers maintained between releases). | |
| 208 This method chains to the display_label method and is intended | |
| 209 to provide polymorphism with the Transcript class. | |
| 210 Returntype : string | |
| 211 Exceptions : none | |
| 212 Caller : general | |
| 213 Status : Stable | |
| 214 | |
| 215 =cut | |
| 216 | |
| 217 sub stable_id { return display_label(@_); } | |
| 218 | |
| 219 sub get_all_DBEntries { return []; } | |
| 220 | |
| 221 sub get_all_DBLinks { return []; } | |
| 222 | |
| 223 sub add_DBEntry {} | |
| 224 | |
| 225 sub external_db { return undef; } | |
| 226 | |
| 227 sub external_status { return undef; } | |
| 228 | |
| 229 sub external_name { return undef; } | |
| 230 | |
| 231 sub is_known { return 0;} | |
| 232 | |
| 233 | |
| 234 =head2 translation | |
| 235 | |
| 236 Arg [1] : none | |
| 237 Example : $translation = $pt->translation(); | |
| 238 Description: Retrieves a Bio::EnsEMBL::Translation object for this prediction | |
| 239 transcript. Note that this translation is generated on the fly | |
| 240 and is not stored in the database. The translation always | |
| 241 spans the entire transcript (no UTRs; all CDS) and does not | |
| 242 have an associated dbID, stable_id or adaptor. | |
| 243 Returntype : int | |
| 244 Exceptions : none | |
| 245 Caller : general | |
| 246 Status : Stable | |
| 247 | |
| 248 =cut | |
| 249 | |
| 250 sub translation { | |
| 251 my $self = shift; | |
| 252 | |
| 253 #calculate translation on the fly | |
| 254 my $strand = $self->strand(); | |
| 255 | |
| 256 my $start_exon; | |
| 257 my $end_exon; | |
| 258 | |
| 259 my @exons = @{$self->get_all_Exons()}; | |
| 260 | |
| 261 return undef if(!@exons); | |
| 262 | |
| 263 $start_exon = $exons[0]; | |
| 264 $end_exon = $exons[-1]; | |
| 265 | |
| 266 my $pta; | |
| 267 | |
| 268 if($self->adaptor()) { | |
| 269 $pta = $self->adaptor()->db()->get_TranslationAdaptor(); | |
| 270 } else { | |
| 271 #warning("PredictionTranscript has no adaptor, may not be able to obtain " . | |
| 272 # "translation"); | |
| 273 } | |
| 274 | |
| 275 my $Xseq = $self->spliced_seq(); | |
| 276 my $start_phase = $start_exon->phase; | |
| 277 if( $start_phase > 0 ) { | |
| 278 $Xseq = "N"x$start_phase . $Xseq; | |
| 279 } | |
| 280 | |
| 281 my $tmpSeq = new Bio::Seq( -id => $self->display_id, | |
| 282 -seq => $Xseq, | |
| 283 -moltype => 'dna', | |
| 284 -alphabet => 'dna' ); | |
| 285 | |
| 286 return Bio::EnsEMBL::Translation->new | |
| 287 (-ADAPTOR => $pta, | |
| 288 -START_EXON => $start_exon, | |
| 289 -END_EXON => $end_exon, | |
| 290 -SEQ_START => 1, | |
| 291 -SEQ_END => $end_exon->length(), | |
| 292 -SEQ => $tmpSeq->translate()->seq()); | |
| 293 } | |
| 294 | |
| 295 | |
| 296 | |
| 297 =head2 translate | |
| 298 | |
| 299 Args : none | |
| 300 Function : Give a peptide translation of all exons currently in | |
| 301 the PT. Gives empty string when none is in. | |
| 302 Returntype: a Bio::Seq as in transcript->translate() | |
| 303 Exceptions: none | |
| 304 Caller : general | |
| 305 Status : Stable | |
| 306 | |
| 307 =cut | |
| 308 | |
| 309 | |
| 310 sub translate { | |
| 311 my ($self) = @_; | |
| 312 | |
| 313 my $dna = $self->translateable_seq(); | |
| 314 | |
| 315 my $codon_table_id; | |
| 316 if ( defined( $self->slice() ) ) { | |
| 317 my $attrib; | |
| 318 | |
| 319 ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') }; | |
| 320 if ( defined($attrib) ) { | |
| 321 $codon_table_id = $attrib->value(); | |
| 322 } | |
| 323 } | |
| 324 $codon_table_id ||= 1; #default will be vertebrates | |
| 325 | |
| 326 if( CORE::length( $dna ) % 3 == 0 ) { | |
| 327 # $dna =~ s/TAG$|TGA$|TAA$//i; | |
| 328 my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id ); | |
| 329 | |
| 330 if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) { | |
| 331 substr( $dna, -3, 3, '' ); | |
| 332 } | |
| 333 } | |
| 334 # the above line will remove the final stop codon from the mrna | |
| 335 # sequence produced if it is present, this is so any peptide produced | |
| 336 # won't have a terminal stop codon | |
| 337 # if you want to have a terminal stop codon either comment this line out | |
| 338 # or call translatable seq directly and produce a translation from it | |
| 339 | |
| 340 my $bioseq = new Bio::Seq( -id => $self->display_id, | |
| 341 -seq => $dna, | |
| 342 -moltype => 'dna', | |
| 343 -alphabet => 'dna' ); | |
| 344 | |
| 345 my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id); | |
| 346 | |
| 347 return $translation; | |
| 348 } | |
| 349 | |
| 350 | |
| 351 =head2 cdna_coding_start | |
| 352 | |
| 353 Arg [1] : none | |
| 354 Example : $relative_coding_start = $transcript->cdna_coding_start(); | |
| 355 Description: Retrieves the position of the coding start of this transcript | |
| 356 in cdna coordinates (relative to the start of the 5prime end of | |
| 357 the transcript, excluding introns, including utrs). This is | |
| 358 always 1 for prediction transcripts because they have no UTRs. | |
| 359 Returntype : int | |
| 360 Exceptions : none | |
| 361 Caller : five_prime_utr, get_all_snps, general | |
| 362 Status : Stable | |
| 363 | |
| 364 =cut | |
| 365 | |
| 366 sub cdna_coding_start { return 1 } | |
| 367 | |
| 368 | |
| 369 | |
| 370 =head2 cdna_coding_end | |
| 371 | |
| 372 Arg [1] : none | |
| 373 Example : $relative_coding_start = $transcript->cdna_coding_end(); | |
| 374 Description: Retrieves the position of the coding end of this transcript | |
| 375 in cdna coordinates (relative to the start of the 5prime end of | |
| 376 the transcript, excluding introns, including utrs). This is | |
| 377 always te length of the cdna for prediction transcripts because | |
| 378 they have no UTRs. | |
| 379 Returntype : int | |
| 380 Exceptions : none | |
| 381 Caller : five_prime_utr, get_all_snps, general | |
| 382 Status : Stable | |
| 383 | |
| 384 =cut | |
| 385 | |
| 386 sub cdna_coding_end { | |
| 387 my ($self) = @_; | |
| 388 return length( $self->spliced_seq() ); | |
| 389 } | |
| 390 | |
| 391 | |
| 392 =head2 transform | |
| 393 | |
| 394 Arg 1 : String $coordinate_system_name | |
| 395 Arg [2] : String $coordinate_system_version | |
| 396 Example : $ptrans = $ptrans->transform('chromosome', 'NCBI33'); | |
| 397 $ptrans = $ptrans->transform('clone'); | |
| 398 Description: Moves this PredictionTranscript to the given coordinate system. | |
| 399 If this Transcript has Exons attached, they move as well. | |
| 400 A new Transcript is returned or undefined if this PT is not | |
| 401 defined in the new coordinate system. | |
| 402 Returntype : Bio::EnsEMBL::PredictionTranscript | |
| 403 Exceptions : wrong parameters | |
| 404 Caller : general | |
| 405 Status : Stable | |
| 406 | |
| 407 =cut | |
| 408 | |
| 409 sub transform { | |
| 410 my $self = shift; | |
| 411 | |
| 412 # catch for old style transform calls | |
| 413 if( ref $_[0] && ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))) { | |
| 414 throw("transform needs coordinate systems details now," . | |
| 415 "please use transfer"); | |
| 416 } | |
| 417 | |
| 418 my $new_transcript = Bio::EnsEMBL::Feature::transform($self, @_ ); | |
| 419 return undef unless $new_transcript; | |
| 420 | |
| 421 #go through the _trans_exon_array so as not to prompt lazy-loading | |
| 422 if(exists($self->{'_trans_exon_array'})) { | |
| 423 my @new_exons; | |
| 424 foreach my $old_exon ( @{$self->{'_trans_exon_array'}} ) { | |
| 425 my $new_exon = $old_exon->transform(@_); | |
| 426 push(@new_exons, $new_exon); | |
| 427 } | |
| 428 $new_transcript->{'_trans_exon_array'} = \@new_exons; | |
| 429 } | |
| 430 | |
| 431 return $new_transcript; | |
| 432 } | |
| 433 | |
| 434 | |
| 435 | |
| 436 =head2 transfer | |
| 437 | |
| 438 Arg 1 : Bio::EnsEMBL::Slice $destination_slice | |
| 439 Example : $ptrans = $ptrans->transfer($slice); | |
| 440 Description: Moves this PredictionTranscript to the given slice. | |
| 441 If this Transcripts has Exons attached, they move as well. | |
| 442 If this transcript cannot be moved then undef is returned | |
| 443 instead. | |
| 444 Returntype : Bio::EnsEMBL::PredictionTranscript | |
| 445 Exceptions : none | |
| 446 Caller : general | |
| 447 Status : Stable | |
| 448 | |
| 449 =cut | |
| 450 | |
| 451 sub transfer { | |
| 452 my $self = shift; | |
| 453 | |
| 454 my $new_transcript = $self->SUPER::transfer( @_ ); | |
| 455 return undef unless $new_transcript; | |
| 456 | |
| 457 if( exists $self->{'_trans_exon_array'} ) { | |
| 458 my @new_exons; | |
| 459 for my $old_exon ( @{$self->{'_trans_exon_array'}} ) { | |
| 460 my $new_exon = $old_exon->transfer( @_ ); | |
| 461 push( @new_exons, $new_exon ); | |
| 462 } | |
| 463 | |
| 464 $new_transcript->{'_trans_exon_array'} = \@new_exons; | |
| 465 } | |
| 466 | |
| 467 return $new_transcript; | |
| 468 } | |
| 469 | |
| 470 =head2 get_all_Exons | |
| 471 | |
| 472 Arg [1] : none | |
| 473 Example : my @exons = @{$transcript->get_all_Exons()}; | |
| 474 Description: Returns an listref of the exons in this transcipr in order. | |
| 475 i.e. the first exon in the listref is the 5prime most exon in | |
| 476 the transcript. | |
| 477 Returntype : a list reference to Bio::EnsEMBL::Exon objects | |
| 478 Exceptions : none | |
| 479 Caller : general | |
| 480 Status : Stable | |
| 481 | |
| 482 =cut | |
| 483 | |
| 484 sub get_all_Exons { | |
| 485 my ($self) = @_; | |
| 486 if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) { | |
| 487 $self->{'_trans_exon_array'} = $self->adaptor()->db()-> | |
| 488 get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self ); | |
| 489 } | |
| 490 return $self->{'_trans_exon_array'}; | |
| 491 } | |
| 492 | |
| 493 =head2 display_id | |
| 494 | |
| 495 Arg [1] : none | |
| 496 Example : print $rf->display_id(); | |
| 497 Description: This method returns a string that is considered to be | |
| 498 the 'display' identifier. For prediction transcripts this is | |
| 499 (depending on availability and in this order) the stable Id, the | |
| 500 dbID or an empty string. | |
| 501 Returntype : string | |
| 502 Exceptions : none | |
| 503 Caller : web drawing code | |
| 504 Status : Stable | |
| 505 | |
| 506 =cut | |
| 507 | |
| 508 sub display_id { | |
| 509 my $self = shift; | |
| 510 return $self->stable_id || $self->dbID || ''; | |
| 511 } | |
| 512 | |
| 513 =head2 get_all_Attributes | |
| 514 | |
| 515 Arg [1] : none | |
| 516 Example : | |
| 517 Description: DOES NOTHING, Returns empty listref. Provided here to prevent | |
| 518 Transcript attributes being returned for PredictionTranscripts. | |
| 519 Returntype : EMPTY listref Bio::EnsEMBL::Attribute | |
| 520 Exceptions : none | |
| 521 Caller : general | |
| 522 Status : At risk | |
| 523 | |
| 524 =cut | |
| 525 | |
| 526 sub get_all_Attributes { | |
| 527 my $self = shift; | |
| 528 | |
| 529 return []; | |
| 530 } | |
| 531 | |
| 532 | |
| 533 | |
| 534 =head2 get_exon_count | |
| 535 | |
| 536 Description: DEPRECATED - use get_all_Exons instead | |
| 537 | |
| 538 =cut | |
| 539 | |
| 540 sub get_exon_count { | |
| 541 my $self = shift; | |
| 542 deprecate('Use scalar(@{$transcript->get_all_Exon()s}) instead'); | |
| 543 return scalar( @{$self->get_all_Exons} ); | |
| 544 } | |
| 545 | |
| 546 | |
| 547 =head2 set_exon_count | |
| 548 | |
| 549 Description: DEPRECATED - this method does nothing now | |
| 550 | |
| 551 =cut | |
| 552 | |
| 553 sub set_exon_count { | |
| 554 deprecate('This method no longer does anything.'); | |
| 555 } | |
| 556 | |
| 557 | |
| 558 | |
| 559 =head2 get_cdna | |
| 560 | |
| 561 Description : DEPRECATED - use spliced_seq() or translateable_seq instead | |
| 562 | |
| 563 =cut | |
| 564 | |
| 565 sub get_cdna { | |
| 566 my $self = shift; | |
| 567 deprecate('use spliced_seq instead'); | |
| 568 return $self->spliced_seq(); | |
| 569 } | |
| 570 | |
| 571 1; |
