Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqFeature/Gene/Transcript.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Transcript.pm,v 1.25 2002/12/29 09:37:51 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::SeqFeature::Gene::Transcript | |
| 4 # | |
| 5 # Cared for by Hilmar Lapp <hlapp@gmx.net> | |
| 6 # | |
| 7 # Copyright Hilmar Lapp | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SeqFeature::Gene::Transcript - A feature representing a transcript | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 See documentation of methods. | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 A feature representing a transcript. | |
| 24 | |
| 25 | |
| 26 =head1 FEEDBACK | |
| 27 | |
| 28 =head2 Mailing Lists | |
| 29 | |
| 30 User feedback is an integral part of the evolution of this | |
| 31 and other Bioperl modules. Send your comments and suggestions preferably | |
| 32 to one of the Bioperl mailing lists. | |
| 33 Your participation is much appreciated. | |
| 34 | |
| 35 bioperl-l@bioperl.org - General discussion | |
| 36 http://bio.perl.org/MailList.html - About the mailing lists | |
| 37 | |
| 38 =head2 Reporting Bugs | |
| 39 | |
| 40 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 41 the bugs and their resolution. | |
| 42 Bug reports can be submitted via email or the web: | |
| 43 | |
| 44 bioperl-bugs@bio.perl.org | |
| 45 http://bugzilla.bioperl.org/ | |
| 46 | |
| 47 =head1 AUTHOR - Hilmar Lapp | |
| 48 | |
| 49 Email hlapp@gmx.net | |
| 50 | |
| 51 Describe contact details here | |
| 52 | |
| 53 =head1 APPENDIX | |
| 54 | |
| 55 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
| 56 | |
| 57 =cut | |
| 58 | |
| 59 | |
| 60 # Let the code begin... | |
| 61 | |
| 62 package Bio::SeqFeature::Gene::Transcript; | |
| 63 use vars qw(@ISA); | |
| 64 use strict; | |
| 65 | |
| 66 # Object preamble - inherits from Bio::Root::Object | |
| 67 | |
| 68 use Bio::SeqFeature::Gene::TranscriptI; | |
| 69 use Bio::SeqFeature::Generic; | |
| 70 use Bio::PrimarySeq; | |
| 71 | |
| 72 @ISA = qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::TranscriptI); | |
| 73 | |
| 74 sub new { | |
| 75 my ($caller, @args) = @_; | |
| 76 my $self = $caller->SUPER::new(@args); | |
| 77 my ($primary) = $self->_rearrange([qw(PRIMARY)],@args); | |
| 78 | |
| 79 $primary = 'transcript' unless $primary; | |
| 80 $self->primary_tag($primary); | |
| 81 $self->strand(0) if(! defined($self->strand())); | |
| 82 return $self; | |
| 83 } | |
| 84 | |
| 85 | |
| 86 =head2 promoters | |
| 87 | |
| 88 Title : promoters() | |
| 89 Usage : @proms = $transcript->promoters(); | |
| 90 Function: Get the promoter features/sites of this transcript. | |
| 91 | |
| 92 Note that OO-modeling of regulatory elements is not stable yet. | |
| 93 This means that this method might change or even disappear in a | |
| 94 future release. Be aware of this if you use it. | |
| 95 | |
| 96 Returns : An array of Bio::SeqFeatureI implementing objects representing the | |
| 97 promoter regions or sites. | |
| 98 Args : | |
| 99 | |
| 100 | |
| 101 =cut | |
| 102 | |
| 103 sub promoters { | |
| 104 my ($self) = @_; | |
| 105 return $self->get_feature_type('Bio::SeqFeature::Gene::Promoter'); | |
| 106 } | |
| 107 | |
| 108 =head2 add_promoter | |
| 109 | |
| 110 Title : add_promoter() | |
| 111 Usage : $transcript->add_promoter($feature); | |
| 112 Function: Add a promoter feature/site to this transcript. | |
| 113 | |
| 114 | |
| 115 Note that OO-modeling of regulatory elements is not stable yet. | |
| 116 This means that this method might change or even disappear in a | |
| 117 future release. Be aware of this if you use it. | |
| 118 | |
| 119 Returns : | |
| 120 Args : A Bio::SeqFeatureI implementing object. | |
| 121 | |
| 122 | |
| 123 =cut | |
| 124 | |
| 125 sub add_promoter { | |
| 126 my ($self, $fea) = @_; | |
| 127 $self->_add($fea,'Bio::SeqFeature::Gene::Promoter'); | |
| 128 } | |
| 129 | |
| 130 =head2 flush_promoters | |
| 131 | |
| 132 Title : flush_promoters() | |
| 133 Usage : $transcript->flush_promoters(); | |
| 134 Function: Remove all promoter features/sites from this transcript. | |
| 135 | |
| 136 Note that OO-modeling of regulatory elements is not stable yet. | |
| 137 This means that this method might change or even disappear in a | |
| 138 future release. Be aware of this if you use it. | |
| 139 | |
| 140 Returns : the removed features as a list | |
| 141 Args : none | |
| 142 | |
| 143 | |
| 144 =cut | |
| 145 | |
| 146 sub flush_promoters { | |
| 147 my ($self) = @_; | |
| 148 return $self->_flush('Bio::SeqFeature::Gene::Promoter'); | |
| 149 } | |
| 150 | |
| 151 =head2 exons | |
| 152 | |
| 153 Title : exons() | |
| 154 Usage : @exons = $gene->exons(); | |
| 155 ($inital_exon) = $gene->exons('Initial'); | |
| 156 Function: Get all exon features or all exons of specified type of this | |
| 157 transcript. | |
| 158 | |
| 159 Exon type is treated as a case-insensitive regular expression and | |
| 160 is optional. For consistency, use only the following types: | |
| 161 initial, internal, terminal. | |
| 162 | |
| 163 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects. | |
| 164 Args : An optional string specifying the primary_tag of the feature. | |
| 165 | |
| 166 | |
| 167 =cut | |
| 168 | |
| 169 sub exons { | |
| 170 my ($self, $type) = @_; | |
| 171 return $self->get_unordered_feature_type('Bio::SeqFeature::Gene::ExonI', | |
| 172 $type); | |
| 173 } | |
| 174 | |
| 175 =head2 exons_ordered | |
| 176 | |
| 177 Title : exons_ordered | |
| 178 Usage : @exons = $gene->exons_ordered(); | |
| 179 @exons = $gene->exons_ordered("Internal"); | |
| 180 Function: Get an ordered list of all exon features or all exons of specified | |
| 181 type of this transcript. | |
| 182 | |
| 183 Exon type is treated as a case-insensitive regular expression and | |
| 184 is optional. For consistency, use only the following types: | |
| 185 | |
| 186 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects. | |
| 187 Args : An optional string specifying the primary_tag of the feature. | |
| 188 | |
| 189 =cut | |
| 190 | |
| 191 sub exons_ordered { | |
| 192 my ($self,$type) = @_; | |
| 193 return $self->get_feature_type('Bio::SeqFeature::Gene::ExonI', $type); | |
| 194 } | |
| 195 | |
| 196 =head2 add_exon | |
| 197 | |
| 198 Title : add_exon() | |
| 199 Usage : $transcript->add_exon($exon,'initial'); | |
| 200 Function: Add a exon feature to this transcript. | |
| 201 | |
| 202 The second argument denotes the type of exon. Mixing exons with and | |
| 203 without a type is likely to cause trouble in exons(). Either | |
| 204 leave out the type for all exons or for none. | |
| 205 | |
| 206 Presently, the following types are known: initial, internal, | |
| 207 terminal, utr, utr5prime, and utr3prime (all case-insensitive). | |
| 208 UTR should better be added through utrs()/add_utr(). | |
| 209 | |
| 210 If you wish to use other or additional types, you will almost | |
| 211 certainly have to call exon_type_sortorder() in order to replace | |
| 212 the default sort order, or mrna(), cds(), protein(), and exons() | |
| 213 may yield unexpected results. | |
| 214 | |
| 215 Returns : | |
| 216 Args : A Bio::SeqFeature::Gene::ExonI implementing object. | |
| 217 A string indicating the type of the exon (optional). | |
| 218 | |
| 219 | |
| 220 =cut | |
| 221 | |
| 222 sub add_exon { | |
| 223 my ($self, $fea) = @_; | |
| 224 if(! $fea->isa('Bio::SeqFeature::Gene::ExonI') ) { | |
| 225 $self->throw("$fea does not implement Bio::SeqFeature::Gene::ExonI"); | |
| 226 } | |
| 227 $self->_add($fea,'Bio::SeqFeature::Gene::Exon'); | |
| 228 } | |
| 229 | |
| 230 =head2 flush_exons | |
| 231 | |
| 232 Title : flush_exons() | |
| 233 Usage : $transcript->flush_exons(); | |
| 234 $transcript->flush_exons('terminal'); | |
| 235 Function: Remove all or a certain type of exon features from this transcript. | |
| 236 | |
| 237 See add_exon() for documentation about types. | |
| 238 | |
| 239 Calling without a type will not flush UTRs. Call flush_utrs() for | |
| 240 this purpose. | |
| 241 Returns : the deleted features as a list | |
| 242 Args : A string indicating the type of the exon (optional). | |
| 243 | |
| 244 | |
| 245 =cut | |
| 246 | |
| 247 sub flush_exons { | |
| 248 my ($self, $type) = @_; | |
| 249 return $self->_flush('Bio::SeqFeature::Gene::Exon',$type); | |
| 250 } | |
| 251 | |
| 252 =head2 introns | |
| 253 | |
| 254 Title : introns() | |
| 255 Usage : @introns = $gene->introns(); | |
| 256 Function: Get all intron features this gene structure. | |
| 257 | |
| 258 Note that this implementation generates these features | |
| 259 on-the-fly, that is, it simply treats all regions between | |
| 260 exons as introns, assuming that exons do not overlap. A | |
| 261 consequence is that a consistent correspondence between the | |
| 262 elements in the returned array and the array that exons() | |
| 263 returns will exist only if the exons are properly sorted | |
| 264 within their types (forward for plus- strand and reverse | |
| 265 for minus-strand transcripts). To ensure correctness the | |
| 266 elements in the array returned will always be sorted. | |
| 267 | |
| 268 Returns : An array of Bio::SeqFeature::Gene::Intron objects representing | |
| 269 the intron regions. | |
| 270 Args : | |
| 271 | |
| 272 | |
| 273 =cut | |
| 274 | |
| 275 sub introns { | |
| 276 my ($self) = @_; | |
| 277 my @introns = (); | |
| 278 my @exons = $self->exons(); | |
| 279 my ($strand, $rev_order); | |
| 280 | |
| 281 # if there's 1 or less exons we're done | |
| 282 return () unless($#exons > 0); | |
| 283 # record strand and order (a minus-strand transcript is likely to have | |
| 284 # the exons stacked in reverse order) | |
| 285 foreach my $exon (@exons) { | |
| 286 $strand = $exon->strand(); | |
| 287 last if $strand; # we're done if we've got 1 or -1 | |
| 288 } | |
| 289 $rev_order = ($exons[0]->end() < $exons[1]->start() ? 0 : 1); | |
| 290 | |
| 291 # Make sure exons are sorted. Because we assume they don't overlap, we | |
| 292 # simply sort by start position. | |
| 293 if((! defined($strand)) || ($strand != -1) || (! $rev_order)) { | |
| 294 # always sort forward for plus-strand transcripts, and for negative- | |
| 295 # strand transcripts that appear to be unsorted or forward sorted | |
| 296 @exons = map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, $_->start()] } @exons; | |
| 297 } else { | |
| 298 # sort in reverse order for transcripts on the negative strand and | |
| 299 # found to be in reverse order | |
| 300 @exons = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [ $_, $_->start()] } @exons; | |
| 301 } | |
| 302 # loop over all intervening gaps | |
| 303 for(my $i = 0; $i < $#exons; $i++) { | |
| 304 my ($start, $end); | |
| 305 my $intron; | |
| 306 | |
| 307 if(defined($exons[$i]->strand()) && | |
| 308 (($exons[$i]->strand() * $strand) < 0)) { | |
| 309 $self->throw("Transcript mixes plus and minus strand exons. ". | |
| 310 "Computing introns makes no sense then."); | |
| 311 } | |
| 312 $start = $exons[$i+$rev_order]->end() + 1; # $i or $i+1 | |
| 313 $end = $exons[$i+1-$rev_order]->start() - 1; # $i+1 or $i | |
| 314 $intron = Bio::SeqFeature::Gene::Intron->new( | |
| 315 '-start' => $start, | |
| 316 '-end' => $end, | |
| 317 '-strand' => $strand, | |
| 318 '-primary' => 'intron', | |
| 319 '-source' => ref($self)); | |
| 320 my $seq = $self->entire_seq(); | |
| 321 $intron->attach_seq($seq) if $seq; | |
| 322 $intron->seq_id($self->seq_id()); | |
| 323 push(@introns, $intron); | |
| 324 } | |
| 325 return @introns; | |
| 326 } | |
| 327 | |
| 328 =head2 poly_A_site | |
| 329 | |
| 330 Title : poly_A_site() | |
| 331 Usage : $polyAsite = $transcript->poly_A_site(); | |
| 332 Function: Get/set the poly-adenylation feature/site of this transcript. | |
| 333 Returns : A Bio::SeqFeatureI implementing object representing the | |
| 334 poly-adenylation region. | |
| 335 Args : A Bio::SeqFeatureI implementing object on set, or FALSE to flush | |
| 336 a previously set object. | |
| 337 | |
| 338 | |
| 339 =cut | |
| 340 | |
| 341 sub poly_A_site { | |
| 342 my ($self, $fea) = @_; | |
| 343 if ($fea) { | |
| 344 $self->_add($fea,'Bio::SeqFeature::Gene::Poly_A_site'); | |
| 345 } | |
| 346 return ($self->get_feature_type('Bio::SeqFeature::Gene::Poly_A_site'))[0]; | |
| 347 } | |
| 348 | |
| 349 =head2 utrs | |
| 350 | |
| 351 Title : utrs() | |
| 352 Usage : @utr_sites = $transcript->utrs('utr3prime'); | |
| 353 @utr_sites = $transcript->utrs('utr5prime'); | |
| 354 @utr_sites = $transcript->utrs(); | |
| 355 Function: Get the features representing untranslated regions (UTR) of this | |
| 356 transcript. | |
| 357 | |
| 358 You may provide an argument specifying the type of UTR. Currently | |
| 359 the following types are recognized: utr5prime utr3prime for UTR on the | |
| 360 5' and 3' end of the CDS, respectively. | |
| 361 | |
| 362 Returns : An array of Bio::SeqFeature::Gene::UTR objects | |
| 363 representing the UTR regions or sites. | |
| 364 Args : Optionally, either utr3prime, or utr5prime for the the type of UTR | |
| 365 feature. | |
| 366 | |
| 367 | |
| 368 =cut | |
| 369 | |
| 370 sub utrs { | |
| 371 my ($self, $type) = @_; | |
| 372 return $self->get_feature_type('Bio::SeqFeature::Gene::UTR',$type); | |
| 373 | |
| 374 } | |
| 375 | |
| 376 =head2 add_utr | |
| 377 | |
| 378 Title : add_utr() | |
| 379 Usage : $transcript->add_utr($utrobj, 'utr3prime'); | |
| 380 $transcript->add_utr($utrobj); | |
| 381 Function: Add a UTR feature/site to this transcript. | |
| 382 | |
| 383 The second parameter is optional and denotes the type of the UTR | |
| 384 feature. Presently recognized types include 'utr5prime' and 'utr3prime' | |
| 385 for UTR on the 5' and 3' end of a gene, respectively. | |
| 386 | |
| 387 Calling this method is the same as calling | |
| 388 add_exon($utrobj, 'utr'.$type). In this sense a UTR object is a | |
| 389 special exon object, which is transcribed, not spliced out, but | |
| 390 not translated. | |
| 391 | |
| 392 Note that the object supplied should return FALSE for is_coding(). | |
| 393 Otherwise cds() and friends will become confused. | |
| 394 | |
| 395 Returns : | |
| 396 Args : A Bio::SeqFeature::Gene::UTR implementing object. | |
| 397 | |
| 398 | |
| 399 =cut | |
| 400 | |
| 401 sub add_utr { | |
| 402 my ($self, $fea, $type) = @_; | |
| 403 $self->_add($fea,'Bio::SeqFeature::Gene::UTR',$type); | |
| 404 } | |
| 405 | |
| 406 =head2 flush_utrs | |
| 407 | |
| 408 Title : flush_utrs() | |
| 409 Usage : $transcript->flush_utrs(); | |
| 410 $transcript->flush_utrs('utr3prime'); | |
| 411 Function: Remove all or a specific type of UTR features/sites from this | |
| 412 transcript. | |
| 413 | |
| 414 Cf. add_utr() for documentation about recognized types. | |
| 415 Returns : a list of the removed features | |
| 416 Args : Optionally a string denoting the type of UTR feature. | |
| 417 | |
| 418 | |
| 419 =cut | |
| 420 | |
| 421 sub flush_utrs { | |
| 422 my ($self, $type) = @_; | |
| 423 return $self->_flush('Bio::SeqFeature::Gene::UTR',$type); | |
| 424 } | |
| 425 | |
| 426 =head2 sub_SeqFeature | |
| 427 | |
| 428 Title : sub_SeqFeature | |
| 429 Usage : @feats = $transcript->sub_SeqFeature(); | |
| 430 Function: Returns an array of all subfeatures. | |
| 431 | |
| 432 This method is defined in Bio::SeqFeatureI. We override this here | |
| 433 to include the exon etc features. | |
| 434 | |
| 435 Returns : An array Bio::SeqFeatureI implementing objects. | |
| 436 Args : none | |
| 437 | |
| 438 | |
| 439 =cut | |
| 440 | |
| 441 sub sub_SeqFeature { | |
| 442 my ($self) = @_; | |
| 443 my @feas; | |
| 444 | |
| 445 # get what the parent already has | |
| 446 @feas = $self->SUPER::sub_SeqFeature(); | |
| 447 # add the features we have in addition | |
| 448 push(@feas, $self->exons()); # this includes UTR features | |
| 449 push(@feas, $self->promoters()); | |
| 450 push(@feas, $self->poly_A_site()) if($self->poly_A_site()); | |
| 451 return @feas; | |
| 452 } | |
| 453 | |
| 454 =head2 flush_sub_SeqFeature | |
| 455 | |
| 456 Title : flush_sub_SeqFeature | |
| 457 Usage : $transcript->flush_sub_SeqFeature(); | |
| 458 $transcript->flush_sub_SeqFeature(1); | |
| 459 Function: Removes all subfeatures. | |
| 460 | |
| 461 This method is overridden from Bio::SeqFeature::Generic to flush | |
| 462 all additional subfeatures like exons, promoters, etc., which is | |
| 463 almost certainly not what you want. To remove only features added | |
| 464 through $transcript->add_sub_SeqFeature($feature) pass any | |
| 465 argument evaluating to TRUE. | |
| 466 | |
| 467 Example : | |
| 468 Returns : none | |
| 469 Args : Optionally, an argument evaluating to TRUE will suppress flushing | |
| 470 of all transcript-specific subfeatures (exons etc.). | |
| 471 | |
| 472 | |
| 473 =cut | |
| 474 | |
| 475 sub flush_sub_SeqFeature { | |
| 476 my ($self,$fea_only) = @_; | |
| 477 | |
| 478 $self->SUPER::flush_sub_SeqFeature(); | |
| 479 if(! $fea_only) { | |
| 480 $self->flush_promoters(); | |
| 481 $self->flush_exons(); | |
| 482 $self->flush_utrs(); | |
| 483 $self->poly_A_site(0); | |
| 484 } | |
| 485 } | |
| 486 | |
| 487 | |
| 488 =head2 cds | |
| 489 | |
| 490 Title : cds | |
| 491 Usage : $seq = $transcript->cds(); | |
| 492 Function: Returns the CDS (coding sequence) as defined by the exons | |
| 493 of this transcript and the attached sequence. | |
| 494 | |
| 495 If no sequence is attached this method will return undef. | |
| 496 | |
| 497 Note that the implementation provided here returns a | |
| 498 concatenation of all coding exons, thereby assuming that | |
| 499 exons do not overlap. | |
| 500 | |
| 501 Note also that you cannot set the CDS via this method. Set | |
| 502 a single CDS feature as a single exon, or derive your own | |
| 503 class if you want to store a predicted CDS. | |
| 504 | |
| 505 Example : | |
| 506 Returns : A Bio::PrimarySeqI implementing object. | |
| 507 Args : | |
| 508 | |
| 509 =cut | |
| 510 | |
| 511 sub cds { | |
| 512 my ($self) = @_; | |
| 513 my @exons = $self->exons_ordered(); #this is always sorted properly according to strand | |
| 514 my $strand; | |
| 515 | |
| 516 return undef unless(@exons); | |
| 517 # record strand (a minus-strand transcript must have the exons sorted in | |
| 518 # reverse order) | |
| 519 foreach my $exon (@exons) { | |
| 520 if(defined($exon->strand()) && (! $strand)) { | |
| 521 $strand = $exon->strand(); | |
| 522 } | |
| 523 if($exon->strand() && (($exon->strand() * $strand) < 0)) { | |
| 524 $self->throw("Transcript mixes coding exons on plus and minus ". | |
| 525 "strand. This makes no sense."); | |
| 526 } | |
| 527 } | |
| 528 my $cds = $self->_make_cds(@exons); | |
| 529 return undef unless $cds; | |
| 530 return Bio::PrimarySeq->new('-id' => $self->seq_id(), | |
| 531 '-seq' => $cds, | |
| 532 '-alphabet' => "dna"); | |
| 533 } | |
| 534 | |
| 535 =head2 protein | |
| 536 | |
| 537 Title : protein() | |
| 538 Usage : $protein = $transcript->protein(); | |
| 539 Function: Get the protein encoded by the transcript as a sequence object. | |
| 540 | |
| 541 The implementation provided here simply calls translate() on the | |
| 542 object returned by cds(). | |
| 543 | |
| 544 Returns : A Bio::PrimarySeqI implementing object. | |
| 545 Args : | |
| 546 | |
| 547 | |
| 548 =cut | |
| 549 | |
| 550 sub protein { | |
| 551 my ($self) = @_; | |
| 552 my $seq; | |
| 553 | |
| 554 $seq = $self->cds(); | |
| 555 return $seq->translate() if $seq; | |
| 556 return undef; | |
| 557 } | |
| 558 | |
| 559 =head2 mrna | |
| 560 | |
| 561 Title : mrna() | |
| 562 Usage : $mrna = $transcript->mrna(); | |
| 563 Function: Get the mRNA of the transcript as a sequence object. | |
| 564 | |
| 565 The difference to cds() is that the sequence object returned by | |
| 566 this methods will also include UTR and the poly-adenylation site, | |
| 567 but not promoter sequence (TBD). | |
| 568 | |
| 569 HL: do we really need this method? | |
| 570 | |
| 571 Returns : A Bio::PrimarySeqI implementing object. | |
| 572 Args : | |
| 573 | |
| 574 | |
| 575 =cut | |
| 576 | |
| 577 sub mrna { | |
| 578 my ($self) = @_; | |
| 579 my ($seq, $mrna, $elem); | |
| 580 | |
| 581 # get the coding part | |
| 582 $seq = $self->cds(); | |
| 583 if(! $seq) { | |
| 584 $seq = Bio::PrimarySeq->new('-id' => $self->seq_id(), | |
| 585 '-alphabet' => "rna", | |
| 586 '-seq' => ""); | |
| 587 } | |
| 588 # get and add UTR sequences | |
| 589 $mrna = ""; | |
| 590 foreach $elem ($self->utrs('utr5prime')) { | |
| 591 $mrna .= $elem->seq()->seq(); | |
| 592 } | |
| 593 $seq->seq($mrna . $seq->seq()); | |
| 594 $mrna = ""; | |
| 595 foreach $elem ($self->utrs('utr3prime')) { | |
| 596 $mrna .= $elem->seq()->seq(); | |
| 597 } | |
| 598 $seq->seq($seq->seq() . $mrna); | |
| 599 if($self->poly_A_site()) { | |
| 600 $seq->seq($seq->seq() . $self->poly_A_site()->seq()->seq()); | |
| 601 } | |
| 602 return undef if($seq->length() == 0); | |
| 603 return $seq; | |
| 604 } | |
| 605 | |
| 606 sub _get_typed_keys { | |
| 607 my ($self, $keyprefix, $type) = @_; | |
| 608 my @keys = (); | |
| 609 my @feas = (); | |
| 610 | |
| 611 # make case-insensitive | |
| 612 $type = ($type ? lc($type) : ""); | |
| 613 # pull out all feature types that exist and match | |
| 614 @keys = grep { /^_$keyprefix$type/i; } (keys(%{$self})); | |
| 615 return @keys; | |
| 616 } | |
| 617 | |
| 618 sub _make_cds { | |
| 619 my ($self,@exons) = @_; | |
| 620 my $cds = ""; | |
| 621 | |
| 622 foreach my $exon (@exons) { | |
| 623 next if((! defined($exon->seq())) || (! $exon->is_coding())); | |
| 624 my $phase = length($cds) % 3; | |
| 625 # let's check the simple case | |
| 626 if((! defined($exon->frame())) || ($phase == $exon->frame())) { | |
| 627 # this one fits exactly, or frame of the exon is undefined (should | |
| 628 # we warn about that?); we bypass the $exon->cds() here (hmm, | |
| 629 # not very clean style, but I don't see where this screws up) | |
| 630 $cds .= $exon->seq()->seq(); | |
| 631 } else { | |
| 632 # this one is probably from exon shuffling and needs some work | |
| 633 my $seq = $exon->cds(); # now $seq is guaranteed to be in frame 0 | |
| 634 next if(! $seq); | |
| 635 $seq = $seq->seq(); | |
| 636 # adjustment needed? | |
| 637 if($phase > 0) { | |
| 638 # how many Ns can we chop off the piece to be added? | |
| 639 my $n_crop = 0; | |
| 640 if($seq =~ /^(n+)/i) { | |
| 641 $n_crop = length($1); | |
| 642 } | |
| 643 if($n_crop >= $phase) { | |
| 644 # chop off to match the phase | |
| 645 $seq = substr($seq, $phase); | |
| 646 } else { | |
| 647 # fill in Ns | |
| 648 $seq = ("n" x (3-$phase)) . $seq; | |
| 649 } | |
| 650 } | |
| 651 $cds .= $seq; | |
| 652 } | |
| 653 } | |
| 654 return $cds; | |
| 655 } | |
| 656 | |
| 657 =head2 features | |
| 658 | |
| 659 Title : features | |
| 660 Usage : my @features=$transcript->features; | |
| 661 Function: returns all the features associated with this transcript | |
| 662 Returns : a list of SeqFeatureI implementing objects | |
| 663 Args : none | |
| 664 | |
| 665 | |
| 666 =cut | |
| 667 | |
| 668 | |
| 669 sub features { | |
| 670 my ($self) = shift; | |
| 671 $self->{'_features'} = [] unless defined $self->{'_features'}; | |
| 672 return @{$self->{'_features'}}; | |
| 673 } | |
| 674 | |
| 675 =head2 features_ordered | |
| 676 | |
| 677 Title : features_ordered | |
| 678 Usage : my @features=$transcript->features_ordered; | |
| 679 Function: returns all the features associated with this transcript, | |
| 680 in order by feature start, according to strand | |
| 681 Returns : a list of SeqFeatureI implementing objects | |
| 682 Args : none | |
| 683 | |
| 684 | |
| 685 =cut | |
| 686 | |
| 687 sub features_ordered{ | |
| 688 my ($self) = @_; | |
| 689 return $self->_stranded_sort(@{$self->{'_features'}}); | |
| 690 } | |
| 691 | |
| 692 | |
| 693 sub get_unordered_feature_type{ | |
| 694 my ($self, $type, $pri)=@_; | |
| 695 my @list; | |
| 696 foreach ($self->features) { | |
| 697 if ($_->isa($type)) { | |
| 698 if ($pri && $_->primary_tag !~ /$pri/i) { | |
| 699 next; | |
| 700 } | |
| 701 push @list,$_; | |
| 702 } | |
| 703 } | |
| 704 return @list; | |
| 705 | |
| 706 } | |
| 707 | |
| 708 sub get_feature_type { | |
| 709 my ($self)=shift; | |
| 710 return $self->_stranded_sort($self->get_unordered_feature_type(@_)); | |
| 711 } | |
| 712 | |
| 713 #This was fixed by Gene Cutler - the indexing on the list being reversed | |
| 714 #fixed a bad bug. Thanks Gene! | |
| 715 sub _flush { | |
| 716 my ($self, $type, $pri)=@_; | |
| 717 my @list=$self->features; | |
| 718 my @cut; | |
| 719 for (reverse (0..$#list)) { | |
| 720 if ($list[$_]->isa($type)) { | |
| 721 if ($pri && $list[$_]->primary_tag !~ /$pri/i) { | |
| 722 next; | |
| 723 } | |
| 724 push @cut, splice @list, $_, 1; #remove the element of $type from @list | |
| 725 #and return each of them in @cut | |
| 726 } | |
| 727 } | |
| 728 $self->{'_features'}=\@list; | |
| 729 return reverse @cut; | |
| 730 } | |
| 731 | |
| 732 sub _add { | |
| 733 my ($self, $fea, $type)=@_; | |
| 734 require Bio::SeqFeature::Gene::Promoter; | |
| 735 require Bio::SeqFeature::Gene::UTR; | |
| 736 require Bio::SeqFeature::Gene::Exon; | |
| 737 require Bio::SeqFeature::Gene::Intron; | |
| 738 require Bio::SeqFeature::Gene::Poly_A_site; | |
| 739 | |
| 740 if(! $fea->isa('Bio::SeqFeatureI') ) { | |
| 741 $self->throw("$fea does not implement Bio::SeqFeatureI"); | |
| 742 } | |
| 743 if(! $fea->isa($type) ) { | |
| 744 $fea=$self->_new_of_type($fea,$type); | |
| 745 } | |
| 746 if (! $self->strand) { | |
| 747 $self->strand($fea->strand); | |
| 748 } else { | |
| 749 if ($self->strand * $fea->strand == -1) { | |
| 750 $self->throw("$fea is on opposite strand from $self"); | |
| 751 } | |
| 752 } | |
| 753 | |
| 754 $self->_expand_region($fea); | |
| 755 if(defined($self->entire_seq()) && (! defined($fea->entire_seq())) && | |
| 756 $fea->can('attach_seq')) { | |
| 757 $fea->attach_seq($self->entire_seq()); | |
| 758 } | |
| 759 if (defined $self->parent) { | |
| 760 $self->parent->_expand_region($fea); | |
| 761 } | |
| 762 push(@{$self->{'_features'}}, $fea); | |
| 763 1; | |
| 764 } | |
| 765 | |
| 766 sub _stranded_sort { | |
| 767 my ($self,@list)=@_; | |
| 768 my $strand; | |
| 769 foreach my $fea (@list) { | |
| 770 if($fea->strand()) { | |
| 771 # defined and != 0 | |
| 772 $strand = $fea->strand() if(! $strand); | |
| 773 if(($fea->strand() * $strand) < 0) { | |
| 774 $strand = undef; | |
| 775 last; | |
| 776 } | |
| 777 } | |
| 778 } | |
| 779 if (defined $strand && $strand == - 1) { #reverse strand | |
| 780 return map { $_->[0] } sort {$b->[1] <=> $a->[1]} map { [$_, $_->start] } @list; | |
| 781 } else { #undef or forward strand | |
| 782 return map { $_->[0] } sort {$a->[1] <=> $b->[1]} map { [$_, $_->start] } @list; | |
| 783 } | |
| 784 } | |
| 785 | |
| 786 sub _new_of_type { | |
| 787 my ($self, $fea, $type, $pri)= @_; | |
| 788 my $primary; | |
| 789 if ($pri) { | |
| 790 $primary = $pri; #can set new primary tag if desired | |
| 791 } else { | |
| 792 ($primary) = $type =~ /.*::(.+)/; #or else primary is just end of type string | |
| 793 } | |
| 794 bless $fea,$type; | |
| 795 $fea->primary_tag($primary); | |
| 796 return $fea; | |
| 797 } | |
| 798 | |
| 799 1; |
