Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/SeqFeature/Gene/Transcript.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 # $Id: Transcript.pm,v 1.25 2002/12/29 09:37:51 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::SeqFeature::Gene::Transcript | |
4 # | |
5 # Cared for by Hilmar Lapp <hlapp@gmx.net> | |
6 # | |
7 # Copyright Hilmar Lapp | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::SeqFeature::Gene::Transcript - A feature representing a transcript | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 See documentation of methods. | |
20 | |
21 =head1 DESCRIPTION | |
22 | |
23 A feature representing a transcript. | |
24 | |
25 | |
26 =head1 FEEDBACK | |
27 | |
28 =head2 Mailing Lists | |
29 | |
30 User feedback is an integral part of the evolution of this | |
31 and other Bioperl modules. Send your comments and suggestions preferably | |
32 to one of the Bioperl mailing lists. | |
33 Your participation is much appreciated. | |
34 | |
35 bioperl-l@bioperl.org - General discussion | |
36 http://bio.perl.org/MailList.html - About the mailing lists | |
37 | |
38 =head2 Reporting Bugs | |
39 | |
40 Report bugs to the Bioperl bug tracking system to help us keep track | |
41 the bugs and their resolution. | |
42 Bug reports can be submitted via email or the web: | |
43 | |
44 bioperl-bugs@bio.perl.org | |
45 http://bugzilla.bioperl.org/ | |
46 | |
47 =head1 AUTHOR - Hilmar Lapp | |
48 | |
49 Email hlapp@gmx.net | |
50 | |
51 Describe contact details here | |
52 | |
53 =head1 APPENDIX | |
54 | |
55 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
56 | |
57 =cut | |
58 | |
59 | |
60 # Let the code begin... | |
61 | |
62 package Bio::SeqFeature::Gene::Transcript; | |
63 use vars qw(@ISA); | |
64 use strict; | |
65 | |
66 # Object preamble - inherits from Bio::Root::Object | |
67 | |
68 use Bio::SeqFeature::Gene::TranscriptI; | |
69 use Bio::SeqFeature::Generic; | |
70 use Bio::PrimarySeq; | |
71 | |
72 @ISA = qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::TranscriptI); | |
73 | |
74 sub new { | |
75 my ($caller, @args) = @_; | |
76 my $self = $caller->SUPER::new(@args); | |
77 my ($primary) = $self->_rearrange([qw(PRIMARY)],@args); | |
78 | |
79 $primary = 'transcript' unless $primary; | |
80 $self->primary_tag($primary); | |
81 $self->strand(0) if(! defined($self->strand())); | |
82 return $self; | |
83 } | |
84 | |
85 | |
86 =head2 promoters | |
87 | |
88 Title : promoters() | |
89 Usage : @proms = $transcript->promoters(); | |
90 Function: Get the promoter features/sites of this transcript. | |
91 | |
92 Note that OO-modeling of regulatory elements is not stable yet. | |
93 This means that this method might change or even disappear in a | |
94 future release. Be aware of this if you use it. | |
95 | |
96 Returns : An array of Bio::SeqFeatureI implementing objects representing the | |
97 promoter regions or sites. | |
98 Args : | |
99 | |
100 | |
101 =cut | |
102 | |
103 sub promoters { | |
104 my ($self) = @_; | |
105 return $self->get_feature_type('Bio::SeqFeature::Gene::Promoter'); | |
106 } | |
107 | |
108 =head2 add_promoter | |
109 | |
110 Title : add_promoter() | |
111 Usage : $transcript->add_promoter($feature); | |
112 Function: Add a promoter feature/site to this transcript. | |
113 | |
114 | |
115 Note that OO-modeling of regulatory elements is not stable yet. | |
116 This means that this method might change or even disappear in a | |
117 future release. Be aware of this if you use it. | |
118 | |
119 Returns : | |
120 Args : A Bio::SeqFeatureI implementing object. | |
121 | |
122 | |
123 =cut | |
124 | |
125 sub add_promoter { | |
126 my ($self, $fea) = @_; | |
127 $self->_add($fea,'Bio::SeqFeature::Gene::Promoter'); | |
128 } | |
129 | |
130 =head2 flush_promoters | |
131 | |
132 Title : flush_promoters() | |
133 Usage : $transcript->flush_promoters(); | |
134 Function: Remove all promoter features/sites from this transcript. | |
135 | |
136 Note that OO-modeling of regulatory elements is not stable yet. | |
137 This means that this method might change or even disappear in a | |
138 future release. Be aware of this if you use it. | |
139 | |
140 Returns : the removed features as a list | |
141 Args : none | |
142 | |
143 | |
144 =cut | |
145 | |
146 sub flush_promoters { | |
147 my ($self) = @_; | |
148 return $self->_flush('Bio::SeqFeature::Gene::Promoter'); | |
149 } | |
150 | |
151 =head2 exons | |
152 | |
153 Title : exons() | |
154 Usage : @exons = $gene->exons(); | |
155 ($inital_exon) = $gene->exons('Initial'); | |
156 Function: Get all exon features or all exons of specified type of this | |
157 transcript. | |
158 | |
159 Exon type is treated as a case-insensitive regular expression and | |
160 is optional. For consistency, use only the following types: | |
161 initial, internal, terminal. | |
162 | |
163 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects. | |
164 Args : An optional string specifying the primary_tag of the feature. | |
165 | |
166 | |
167 =cut | |
168 | |
169 sub exons { | |
170 my ($self, $type) = @_; | |
171 return $self->get_unordered_feature_type('Bio::SeqFeature::Gene::ExonI', | |
172 $type); | |
173 } | |
174 | |
175 =head2 exons_ordered | |
176 | |
177 Title : exons_ordered | |
178 Usage : @exons = $gene->exons_ordered(); | |
179 @exons = $gene->exons_ordered("Internal"); | |
180 Function: Get an ordered list of all exon features or all exons of specified | |
181 type of this transcript. | |
182 | |
183 Exon type is treated as a case-insensitive regular expression and | |
184 is optional. For consistency, use only the following types: | |
185 | |
186 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects. | |
187 Args : An optional string specifying the primary_tag of the feature. | |
188 | |
189 =cut | |
190 | |
191 sub exons_ordered { | |
192 my ($self,$type) = @_; | |
193 return $self->get_feature_type('Bio::SeqFeature::Gene::ExonI', $type); | |
194 } | |
195 | |
196 =head2 add_exon | |
197 | |
198 Title : add_exon() | |
199 Usage : $transcript->add_exon($exon,'initial'); | |
200 Function: Add a exon feature to this transcript. | |
201 | |
202 The second argument denotes the type of exon. Mixing exons with and | |
203 without a type is likely to cause trouble in exons(). Either | |
204 leave out the type for all exons or for none. | |
205 | |
206 Presently, the following types are known: initial, internal, | |
207 terminal, utr, utr5prime, and utr3prime (all case-insensitive). | |
208 UTR should better be added through utrs()/add_utr(). | |
209 | |
210 If you wish to use other or additional types, you will almost | |
211 certainly have to call exon_type_sortorder() in order to replace | |
212 the default sort order, or mrna(), cds(), protein(), and exons() | |
213 may yield unexpected results. | |
214 | |
215 Returns : | |
216 Args : A Bio::SeqFeature::Gene::ExonI implementing object. | |
217 A string indicating the type of the exon (optional). | |
218 | |
219 | |
220 =cut | |
221 | |
222 sub add_exon { | |
223 my ($self, $fea) = @_; | |
224 if(! $fea->isa('Bio::SeqFeature::Gene::ExonI') ) { | |
225 $self->throw("$fea does not implement Bio::SeqFeature::Gene::ExonI"); | |
226 } | |
227 $self->_add($fea,'Bio::SeqFeature::Gene::Exon'); | |
228 } | |
229 | |
230 =head2 flush_exons | |
231 | |
232 Title : flush_exons() | |
233 Usage : $transcript->flush_exons(); | |
234 $transcript->flush_exons('terminal'); | |
235 Function: Remove all or a certain type of exon features from this transcript. | |
236 | |
237 See add_exon() for documentation about types. | |
238 | |
239 Calling without a type will not flush UTRs. Call flush_utrs() for | |
240 this purpose. | |
241 Returns : the deleted features as a list | |
242 Args : A string indicating the type of the exon (optional). | |
243 | |
244 | |
245 =cut | |
246 | |
247 sub flush_exons { | |
248 my ($self, $type) = @_; | |
249 return $self->_flush('Bio::SeqFeature::Gene::Exon',$type); | |
250 } | |
251 | |
252 =head2 introns | |
253 | |
254 Title : introns() | |
255 Usage : @introns = $gene->introns(); | |
256 Function: Get all intron features this gene structure. | |
257 | |
258 Note that this implementation generates these features | |
259 on-the-fly, that is, it simply treats all regions between | |
260 exons as introns, assuming that exons do not overlap. A | |
261 consequence is that a consistent correspondence between the | |
262 elements in the returned array and the array that exons() | |
263 returns will exist only if the exons are properly sorted | |
264 within their types (forward for plus- strand and reverse | |
265 for minus-strand transcripts). To ensure correctness the | |
266 elements in the array returned will always be sorted. | |
267 | |
268 Returns : An array of Bio::SeqFeature::Gene::Intron objects representing | |
269 the intron regions. | |
270 Args : | |
271 | |
272 | |
273 =cut | |
274 | |
275 sub introns { | |
276 my ($self) = @_; | |
277 my @introns = (); | |
278 my @exons = $self->exons(); | |
279 my ($strand, $rev_order); | |
280 | |
281 # if there's 1 or less exons we're done | |
282 return () unless($#exons > 0); | |
283 # record strand and order (a minus-strand transcript is likely to have | |
284 # the exons stacked in reverse order) | |
285 foreach my $exon (@exons) { | |
286 $strand = $exon->strand(); | |
287 last if $strand; # we're done if we've got 1 or -1 | |
288 } | |
289 $rev_order = ($exons[0]->end() < $exons[1]->start() ? 0 : 1); | |
290 | |
291 # Make sure exons are sorted. Because we assume they don't overlap, we | |
292 # simply sort by start position. | |
293 if((! defined($strand)) || ($strand != -1) || (! $rev_order)) { | |
294 # always sort forward for plus-strand transcripts, and for negative- | |
295 # strand transcripts that appear to be unsorted or forward sorted | |
296 @exons = map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, $_->start()] } @exons; | |
297 } else { | |
298 # sort in reverse order for transcripts on the negative strand and | |
299 # found to be in reverse order | |
300 @exons = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [ $_, $_->start()] } @exons; | |
301 } | |
302 # loop over all intervening gaps | |
303 for(my $i = 0; $i < $#exons; $i++) { | |
304 my ($start, $end); | |
305 my $intron; | |
306 | |
307 if(defined($exons[$i]->strand()) && | |
308 (($exons[$i]->strand() * $strand) < 0)) { | |
309 $self->throw("Transcript mixes plus and minus strand exons. ". | |
310 "Computing introns makes no sense then."); | |
311 } | |
312 $start = $exons[$i+$rev_order]->end() + 1; # $i or $i+1 | |
313 $end = $exons[$i+1-$rev_order]->start() - 1; # $i+1 or $i | |
314 $intron = Bio::SeqFeature::Gene::Intron->new( | |
315 '-start' => $start, | |
316 '-end' => $end, | |
317 '-strand' => $strand, | |
318 '-primary' => 'intron', | |
319 '-source' => ref($self)); | |
320 my $seq = $self->entire_seq(); | |
321 $intron->attach_seq($seq) if $seq; | |
322 $intron->seq_id($self->seq_id()); | |
323 push(@introns, $intron); | |
324 } | |
325 return @introns; | |
326 } | |
327 | |
328 =head2 poly_A_site | |
329 | |
330 Title : poly_A_site() | |
331 Usage : $polyAsite = $transcript->poly_A_site(); | |
332 Function: Get/set the poly-adenylation feature/site of this transcript. | |
333 Returns : A Bio::SeqFeatureI implementing object representing the | |
334 poly-adenylation region. | |
335 Args : A Bio::SeqFeatureI implementing object on set, or FALSE to flush | |
336 a previously set object. | |
337 | |
338 | |
339 =cut | |
340 | |
341 sub poly_A_site { | |
342 my ($self, $fea) = @_; | |
343 if ($fea) { | |
344 $self->_add($fea,'Bio::SeqFeature::Gene::Poly_A_site'); | |
345 } | |
346 return ($self->get_feature_type('Bio::SeqFeature::Gene::Poly_A_site'))[0]; | |
347 } | |
348 | |
349 =head2 utrs | |
350 | |
351 Title : utrs() | |
352 Usage : @utr_sites = $transcript->utrs('utr3prime'); | |
353 @utr_sites = $transcript->utrs('utr5prime'); | |
354 @utr_sites = $transcript->utrs(); | |
355 Function: Get the features representing untranslated regions (UTR) of this | |
356 transcript. | |
357 | |
358 You may provide an argument specifying the type of UTR. Currently | |
359 the following types are recognized: utr5prime utr3prime for UTR on the | |
360 5' and 3' end of the CDS, respectively. | |
361 | |
362 Returns : An array of Bio::SeqFeature::Gene::UTR objects | |
363 representing the UTR regions or sites. | |
364 Args : Optionally, either utr3prime, or utr5prime for the the type of UTR | |
365 feature. | |
366 | |
367 | |
368 =cut | |
369 | |
370 sub utrs { | |
371 my ($self, $type) = @_; | |
372 return $self->get_feature_type('Bio::SeqFeature::Gene::UTR',$type); | |
373 | |
374 } | |
375 | |
376 =head2 add_utr | |
377 | |
378 Title : add_utr() | |
379 Usage : $transcript->add_utr($utrobj, 'utr3prime'); | |
380 $transcript->add_utr($utrobj); | |
381 Function: Add a UTR feature/site to this transcript. | |
382 | |
383 The second parameter is optional and denotes the type of the UTR | |
384 feature. Presently recognized types include 'utr5prime' and 'utr3prime' | |
385 for UTR on the 5' and 3' end of a gene, respectively. | |
386 | |
387 Calling this method is the same as calling | |
388 add_exon($utrobj, 'utr'.$type). In this sense a UTR object is a | |
389 special exon object, which is transcribed, not spliced out, but | |
390 not translated. | |
391 | |
392 Note that the object supplied should return FALSE for is_coding(). | |
393 Otherwise cds() and friends will become confused. | |
394 | |
395 Returns : | |
396 Args : A Bio::SeqFeature::Gene::UTR implementing object. | |
397 | |
398 | |
399 =cut | |
400 | |
401 sub add_utr { | |
402 my ($self, $fea, $type) = @_; | |
403 $self->_add($fea,'Bio::SeqFeature::Gene::UTR',$type); | |
404 } | |
405 | |
406 =head2 flush_utrs | |
407 | |
408 Title : flush_utrs() | |
409 Usage : $transcript->flush_utrs(); | |
410 $transcript->flush_utrs('utr3prime'); | |
411 Function: Remove all or a specific type of UTR features/sites from this | |
412 transcript. | |
413 | |
414 Cf. add_utr() for documentation about recognized types. | |
415 Returns : a list of the removed features | |
416 Args : Optionally a string denoting the type of UTR feature. | |
417 | |
418 | |
419 =cut | |
420 | |
421 sub flush_utrs { | |
422 my ($self, $type) = @_; | |
423 return $self->_flush('Bio::SeqFeature::Gene::UTR',$type); | |
424 } | |
425 | |
426 =head2 sub_SeqFeature | |
427 | |
428 Title : sub_SeqFeature | |
429 Usage : @feats = $transcript->sub_SeqFeature(); | |
430 Function: Returns an array of all subfeatures. | |
431 | |
432 This method is defined in Bio::SeqFeatureI. We override this here | |
433 to include the exon etc features. | |
434 | |
435 Returns : An array Bio::SeqFeatureI implementing objects. | |
436 Args : none | |
437 | |
438 | |
439 =cut | |
440 | |
441 sub sub_SeqFeature { | |
442 my ($self) = @_; | |
443 my @feas; | |
444 | |
445 # get what the parent already has | |
446 @feas = $self->SUPER::sub_SeqFeature(); | |
447 # add the features we have in addition | |
448 push(@feas, $self->exons()); # this includes UTR features | |
449 push(@feas, $self->promoters()); | |
450 push(@feas, $self->poly_A_site()) if($self->poly_A_site()); | |
451 return @feas; | |
452 } | |
453 | |
454 =head2 flush_sub_SeqFeature | |
455 | |
456 Title : flush_sub_SeqFeature | |
457 Usage : $transcript->flush_sub_SeqFeature(); | |
458 $transcript->flush_sub_SeqFeature(1); | |
459 Function: Removes all subfeatures. | |
460 | |
461 This method is overridden from Bio::SeqFeature::Generic to flush | |
462 all additional subfeatures like exons, promoters, etc., which is | |
463 almost certainly not what you want. To remove only features added | |
464 through $transcript->add_sub_SeqFeature($feature) pass any | |
465 argument evaluating to TRUE. | |
466 | |
467 Example : | |
468 Returns : none | |
469 Args : Optionally, an argument evaluating to TRUE will suppress flushing | |
470 of all transcript-specific subfeatures (exons etc.). | |
471 | |
472 | |
473 =cut | |
474 | |
475 sub flush_sub_SeqFeature { | |
476 my ($self,$fea_only) = @_; | |
477 | |
478 $self->SUPER::flush_sub_SeqFeature(); | |
479 if(! $fea_only) { | |
480 $self->flush_promoters(); | |
481 $self->flush_exons(); | |
482 $self->flush_utrs(); | |
483 $self->poly_A_site(0); | |
484 } | |
485 } | |
486 | |
487 | |
488 =head2 cds | |
489 | |
490 Title : cds | |
491 Usage : $seq = $transcript->cds(); | |
492 Function: Returns the CDS (coding sequence) as defined by the exons | |
493 of this transcript and the attached sequence. | |
494 | |
495 If no sequence is attached this method will return undef. | |
496 | |
497 Note that the implementation provided here returns a | |
498 concatenation of all coding exons, thereby assuming that | |
499 exons do not overlap. | |
500 | |
501 Note also that you cannot set the CDS via this method. Set | |
502 a single CDS feature as a single exon, or derive your own | |
503 class if you want to store a predicted CDS. | |
504 | |
505 Example : | |
506 Returns : A Bio::PrimarySeqI implementing object. | |
507 Args : | |
508 | |
509 =cut | |
510 | |
511 sub cds { | |
512 my ($self) = @_; | |
513 my @exons = $self->exons_ordered(); #this is always sorted properly according to strand | |
514 my $strand; | |
515 | |
516 return undef unless(@exons); | |
517 # record strand (a minus-strand transcript must have the exons sorted in | |
518 # reverse order) | |
519 foreach my $exon (@exons) { | |
520 if(defined($exon->strand()) && (! $strand)) { | |
521 $strand = $exon->strand(); | |
522 } | |
523 if($exon->strand() && (($exon->strand() * $strand) < 0)) { | |
524 $self->throw("Transcript mixes coding exons on plus and minus ". | |
525 "strand. This makes no sense."); | |
526 } | |
527 } | |
528 my $cds = $self->_make_cds(@exons); | |
529 return undef unless $cds; | |
530 return Bio::PrimarySeq->new('-id' => $self->seq_id(), | |
531 '-seq' => $cds, | |
532 '-alphabet' => "dna"); | |
533 } | |
534 | |
535 =head2 protein | |
536 | |
537 Title : protein() | |
538 Usage : $protein = $transcript->protein(); | |
539 Function: Get the protein encoded by the transcript as a sequence object. | |
540 | |
541 The implementation provided here simply calls translate() on the | |
542 object returned by cds(). | |
543 | |
544 Returns : A Bio::PrimarySeqI implementing object. | |
545 Args : | |
546 | |
547 | |
548 =cut | |
549 | |
550 sub protein { | |
551 my ($self) = @_; | |
552 my $seq; | |
553 | |
554 $seq = $self->cds(); | |
555 return $seq->translate() if $seq; | |
556 return undef; | |
557 } | |
558 | |
559 =head2 mrna | |
560 | |
561 Title : mrna() | |
562 Usage : $mrna = $transcript->mrna(); | |
563 Function: Get the mRNA of the transcript as a sequence object. | |
564 | |
565 The difference to cds() is that the sequence object returned by | |
566 this methods will also include UTR and the poly-adenylation site, | |
567 but not promoter sequence (TBD). | |
568 | |
569 HL: do we really need this method? | |
570 | |
571 Returns : A Bio::PrimarySeqI implementing object. | |
572 Args : | |
573 | |
574 | |
575 =cut | |
576 | |
577 sub mrna { | |
578 my ($self) = @_; | |
579 my ($seq, $mrna, $elem); | |
580 | |
581 # get the coding part | |
582 $seq = $self->cds(); | |
583 if(! $seq) { | |
584 $seq = Bio::PrimarySeq->new('-id' => $self->seq_id(), | |
585 '-alphabet' => "rna", | |
586 '-seq' => ""); | |
587 } | |
588 # get and add UTR sequences | |
589 $mrna = ""; | |
590 foreach $elem ($self->utrs('utr5prime')) { | |
591 $mrna .= $elem->seq()->seq(); | |
592 } | |
593 $seq->seq($mrna . $seq->seq()); | |
594 $mrna = ""; | |
595 foreach $elem ($self->utrs('utr3prime')) { | |
596 $mrna .= $elem->seq()->seq(); | |
597 } | |
598 $seq->seq($seq->seq() . $mrna); | |
599 if($self->poly_A_site()) { | |
600 $seq->seq($seq->seq() . $self->poly_A_site()->seq()->seq()); | |
601 } | |
602 return undef if($seq->length() == 0); | |
603 return $seq; | |
604 } | |
605 | |
606 sub _get_typed_keys { | |
607 my ($self, $keyprefix, $type) = @_; | |
608 my @keys = (); | |
609 my @feas = (); | |
610 | |
611 # make case-insensitive | |
612 $type = ($type ? lc($type) : ""); | |
613 # pull out all feature types that exist and match | |
614 @keys = grep { /^_$keyprefix$type/i; } (keys(%{$self})); | |
615 return @keys; | |
616 } | |
617 | |
618 sub _make_cds { | |
619 my ($self,@exons) = @_; | |
620 my $cds = ""; | |
621 | |
622 foreach my $exon (@exons) { | |
623 next if((! defined($exon->seq())) || (! $exon->is_coding())); | |
624 my $phase = length($cds) % 3; | |
625 # let's check the simple case | |
626 if((! defined($exon->frame())) || ($phase == $exon->frame())) { | |
627 # this one fits exactly, or frame of the exon is undefined (should | |
628 # we warn about that?); we bypass the $exon->cds() here (hmm, | |
629 # not very clean style, but I don't see where this screws up) | |
630 $cds .= $exon->seq()->seq(); | |
631 } else { | |
632 # this one is probably from exon shuffling and needs some work | |
633 my $seq = $exon->cds(); # now $seq is guaranteed to be in frame 0 | |
634 next if(! $seq); | |
635 $seq = $seq->seq(); | |
636 # adjustment needed? | |
637 if($phase > 0) { | |
638 # how many Ns can we chop off the piece to be added? | |
639 my $n_crop = 0; | |
640 if($seq =~ /^(n+)/i) { | |
641 $n_crop = length($1); | |
642 } | |
643 if($n_crop >= $phase) { | |
644 # chop off to match the phase | |
645 $seq = substr($seq, $phase); | |
646 } else { | |
647 # fill in Ns | |
648 $seq = ("n" x (3-$phase)) . $seq; | |
649 } | |
650 } | |
651 $cds .= $seq; | |
652 } | |
653 } | |
654 return $cds; | |
655 } | |
656 | |
657 =head2 features | |
658 | |
659 Title : features | |
660 Usage : my @features=$transcript->features; | |
661 Function: returns all the features associated with this transcript | |
662 Returns : a list of SeqFeatureI implementing objects | |
663 Args : none | |
664 | |
665 | |
666 =cut | |
667 | |
668 | |
669 sub features { | |
670 my ($self) = shift; | |
671 $self->{'_features'} = [] unless defined $self->{'_features'}; | |
672 return @{$self->{'_features'}}; | |
673 } | |
674 | |
675 =head2 features_ordered | |
676 | |
677 Title : features_ordered | |
678 Usage : my @features=$transcript->features_ordered; | |
679 Function: returns all the features associated with this transcript, | |
680 in order by feature start, according to strand | |
681 Returns : a list of SeqFeatureI implementing objects | |
682 Args : none | |
683 | |
684 | |
685 =cut | |
686 | |
687 sub features_ordered{ | |
688 my ($self) = @_; | |
689 return $self->_stranded_sort(@{$self->{'_features'}}); | |
690 } | |
691 | |
692 | |
693 sub get_unordered_feature_type{ | |
694 my ($self, $type, $pri)=@_; | |
695 my @list; | |
696 foreach ($self->features) { | |
697 if ($_->isa($type)) { | |
698 if ($pri && $_->primary_tag !~ /$pri/i) { | |
699 next; | |
700 } | |
701 push @list,$_; | |
702 } | |
703 } | |
704 return @list; | |
705 | |
706 } | |
707 | |
708 sub get_feature_type { | |
709 my ($self)=shift; | |
710 return $self->_stranded_sort($self->get_unordered_feature_type(@_)); | |
711 } | |
712 | |
713 #This was fixed by Gene Cutler - the indexing on the list being reversed | |
714 #fixed a bad bug. Thanks Gene! | |
715 sub _flush { | |
716 my ($self, $type, $pri)=@_; | |
717 my @list=$self->features; | |
718 my @cut; | |
719 for (reverse (0..$#list)) { | |
720 if ($list[$_]->isa($type)) { | |
721 if ($pri && $list[$_]->primary_tag !~ /$pri/i) { | |
722 next; | |
723 } | |
724 push @cut, splice @list, $_, 1; #remove the element of $type from @list | |
725 #and return each of them in @cut | |
726 } | |
727 } | |
728 $self->{'_features'}=\@list; | |
729 return reverse @cut; | |
730 } | |
731 | |
732 sub _add { | |
733 my ($self, $fea, $type)=@_; | |
734 require Bio::SeqFeature::Gene::Promoter; | |
735 require Bio::SeqFeature::Gene::UTR; | |
736 require Bio::SeqFeature::Gene::Exon; | |
737 require Bio::SeqFeature::Gene::Intron; | |
738 require Bio::SeqFeature::Gene::Poly_A_site; | |
739 | |
740 if(! $fea->isa('Bio::SeqFeatureI') ) { | |
741 $self->throw("$fea does not implement Bio::SeqFeatureI"); | |
742 } | |
743 if(! $fea->isa($type) ) { | |
744 $fea=$self->_new_of_type($fea,$type); | |
745 } | |
746 if (! $self->strand) { | |
747 $self->strand($fea->strand); | |
748 } else { | |
749 if ($self->strand * $fea->strand == -1) { | |
750 $self->throw("$fea is on opposite strand from $self"); | |
751 } | |
752 } | |
753 | |
754 $self->_expand_region($fea); | |
755 if(defined($self->entire_seq()) && (! defined($fea->entire_seq())) && | |
756 $fea->can('attach_seq')) { | |
757 $fea->attach_seq($self->entire_seq()); | |
758 } | |
759 if (defined $self->parent) { | |
760 $self->parent->_expand_region($fea); | |
761 } | |
762 push(@{$self->{'_features'}}, $fea); | |
763 1; | |
764 } | |
765 | |
766 sub _stranded_sort { | |
767 my ($self,@list)=@_; | |
768 my $strand; | |
769 foreach my $fea (@list) { | |
770 if($fea->strand()) { | |
771 # defined and != 0 | |
772 $strand = $fea->strand() if(! $strand); | |
773 if(($fea->strand() * $strand) < 0) { | |
774 $strand = undef; | |
775 last; | |
776 } | |
777 } | |
778 } | |
779 if (defined $strand && $strand == - 1) { #reverse strand | |
780 return map { $_->[0] } sort {$b->[1] <=> $a->[1]} map { [$_, $_->start] } @list; | |
781 } else { #undef or forward strand | |
782 return map { $_->[0] } sort {$a->[1] <=> $b->[1]} map { [$_, $_->start] } @list; | |
783 } | |
784 } | |
785 | |
786 sub _new_of_type { | |
787 my ($self, $fea, $type, $pri)= @_; | |
788 my $primary; | |
789 if ($pri) { | |
790 $primary = $pri; #can set new primary tag if desired | |
791 } else { | |
792 ($primary) = $type =~ /.*::(.+)/; #or else primary is just end of type string | |
793 } | |
794 bless $fea,$type; | |
795 $fea->primary_tag($primary); | |
796 return $fea; | |
797 } | |
798 | |
799 1; |