0
|
1 # $Id: Transcript.pm,v 1.25 2002/12/29 09:37:51 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SeqFeature::Gene::Transcript
|
|
4 #
|
|
5 # Cared for by Hilmar Lapp <hlapp@gmx.net>
|
|
6 #
|
|
7 # Copyright Hilmar Lapp
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SeqFeature::Gene::Transcript - A feature representing a transcript
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 See documentation of methods.
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 A feature representing a transcript.
|
|
24
|
|
25
|
|
26 =head1 FEEDBACK
|
|
27
|
|
28 =head2 Mailing Lists
|
|
29
|
|
30 User feedback is an integral part of the evolution of this
|
|
31 and other Bioperl modules. Send your comments and suggestions preferably
|
|
32 to one of the Bioperl mailing lists.
|
|
33 Your participation is much appreciated.
|
|
34
|
|
35 bioperl-l@bioperl.org - General discussion
|
|
36 http://bio.perl.org/MailList.html - About the mailing lists
|
|
37
|
|
38 =head2 Reporting Bugs
|
|
39
|
|
40 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
41 the bugs and their resolution.
|
|
42 Bug reports can be submitted via email or the web:
|
|
43
|
|
44 bioperl-bugs@bio.perl.org
|
|
45 http://bugzilla.bioperl.org/
|
|
46
|
|
47 =head1 AUTHOR - Hilmar Lapp
|
|
48
|
|
49 Email hlapp@gmx.net
|
|
50
|
|
51 Describe contact details here
|
|
52
|
|
53 =head1 APPENDIX
|
|
54
|
|
55 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
56
|
|
57 =cut
|
|
58
|
|
59
|
|
60 # Let the code begin...
|
|
61
|
|
62 package Bio::SeqFeature::Gene::Transcript;
|
|
63 use vars qw(@ISA);
|
|
64 use strict;
|
|
65
|
|
66 # Object preamble - inherits from Bio::Root::Object
|
|
67
|
|
68 use Bio::SeqFeature::Gene::TranscriptI;
|
|
69 use Bio::SeqFeature::Generic;
|
|
70 use Bio::PrimarySeq;
|
|
71
|
|
72 @ISA = qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::TranscriptI);
|
|
73
|
|
74 sub new {
|
|
75 my ($caller, @args) = @_;
|
|
76 my $self = $caller->SUPER::new(@args);
|
|
77 my ($primary) = $self->_rearrange([qw(PRIMARY)],@args);
|
|
78
|
|
79 $primary = 'transcript' unless $primary;
|
|
80 $self->primary_tag($primary);
|
|
81 $self->strand(0) if(! defined($self->strand()));
|
|
82 return $self;
|
|
83 }
|
|
84
|
|
85
|
|
86 =head2 promoters
|
|
87
|
|
88 Title : promoters()
|
|
89 Usage : @proms = $transcript->promoters();
|
|
90 Function: Get the promoter features/sites of this transcript.
|
|
91
|
|
92 Note that OO-modeling of regulatory elements is not stable yet.
|
|
93 This means that this method might change or even disappear in a
|
|
94 future release. Be aware of this if you use it.
|
|
95
|
|
96 Returns : An array of Bio::SeqFeatureI implementing objects representing the
|
|
97 promoter regions or sites.
|
|
98 Args :
|
|
99
|
|
100
|
|
101 =cut
|
|
102
|
|
103 sub promoters {
|
|
104 my ($self) = @_;
|
|
105 return $self->get_feature_type('Bio::SeqFeature::Gene::Promoter');
|
|
106 }
|
|
107
|
|
108 =head2 add_promoter
|
|
109
|
|
110 Title : add_promoter()
|
|
111 Usage : $transcript->add_promoter($feature);
|
|
112 Function: Add a promoter feature/site to this transcript.
|
|
113
|
|
114
|
|
115 Note that OO-modeling of regulatory elements is not stable yet.
|
|
116 This means that this method might change or even disappear in a
|
|
117 future release. Be aware of this if you use it.
|
|
118
|
|
119 Returns :
|
|
120 Args : A Bio::SeqFeatureI implementing object.
|
|
121
|
|
122
|
|
123 =cut
|
|
124
|
|
125 sub add_promoter {
|
|
126 my ($self, $fea) = @_;
|
|
127 $self->_add($fea,'Bio::SeqFeature::Gene::Promoter');
|
|
128 }
|
|
129
|
|
130 =head2 flush_promoters
|
|
131
|
|
132 Title : flush_promoters()
|
|
133 Usage : $transcript->flush_promoters();
|
|
134 Function: Remove all promoter features/sites from this transcript.
|
|
135
|
|
136 Note that OO-modeling of regulatory elements is not stable yet.
|
|
137 This means that this method might change or even disappear in a
|
|
138 future release. Be aware of this if you use it.
|
|
139
|
|
140 Returns : the removed features as a list
|
|
141 Args : none
|
|
142
|
|
143
|
|
144 =cut
|
|
145
|
|
146 sub flush_promoters {
|
|
147 my ($self) = @_;
|
|
148 return $self->_flush('Bio::SeqFeature::Gene::Promoter');
|
|
149 }
|
|
150
|
|
151 =head2 exons
|
|
152
|
|
153 Title : exons()
|
|
154 Usage : @exons = $gene->exons();
|
|
155 ($inital_exon) = $gene->exons('Initial');
|
|
156 Function: Get all exon features or all exons of specified type of this
|
|
157 transcript.
|
|
158
|
|
159 Exon type is treated as a case-insensitive regular expression and
|
|
160 is optional. For consistency, use only the following types:
|
|
161 initial, internal, terminal.
|
|
162
|
|
163 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
|
|
164 Args : An optional string specifying the primary_tag of the feature.
|
|
165
|
|
166
|
|
167 =cut
|
|
168
|
|
169 sub exons {
|
|
170 my ($self, $type) = @_;
|
|
171 return $self->get_unordered_feature_type('Bio::SeqFeature::Gene::ExonI',
|
|
172 $type);
|
|
173 }
|
|
174
|
|
175 =head2 exons_ordered
|
|
176
|
|
177 Title : exons_ordered
|
|
178 Usage : @exons = $gene->exons_ordered();
|
|
179 @exons = $gene->exons_ordered("Internal");
|
|
180 Function: Get an ordered list of all exon features or all exons of specified
|
|
181 type of this transcript.
|
|
182
|
|
183 Exon type is treated as a case-insensitive regular expression and
|
|
184 is optional. For consistency, use only the following types:
|
|
185
|
|
186 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
|
|
187 Args : An optional string specifying the primary_tag of the feature.
|
|
188
|
|
189 =cut
|
|
190
|
|
191 sub exons_ordered {
|
|
192 my ($self,$type) = @_;
|
|
193 return $self->get_feature_type('Bio::SeqFeature::Gene::ExonI', $type);
|
|
194 }
|
|
195
|
|
196 =head2 add_exon
|
|
197
|
|
198 Title : add_exon()
|
|
199 Usage : $transcript->add_exon($exon,'initial');
|
|
200 Function: Add a exon feature to this transcript.
|
|
201
|
|
202 The second argument denotes the type of exon. Mixing exons with and
|
|
203 without a type is likely to cause trouble in exons(). Either
|
|
204 leave out the type for all exons or for none.
|
|
205
|
|
206 Presently, the following types are known: initial, internal,
|
|
207 terminal, utr, utr5prime, and utr3prime (all case-insensitive).
|
|
208 UTR should better be added through utrs()/add_utr().
|
|
209
|
|
210 If you wish to use other or additional types, you will almost
|
|
211 certainly have to call exon_type_sortorder() in order to replace
|
|
212 the default sort order, or mrna(), cds(), protein(), and exons()
|
|
213 may yield unexpected results.
|
|
214
|
|
215 Returns :
|
|
216 Args : A Bio::SeqFeature::Gene::ExonI implementing object.
|
|
217 A string indicating the type of the exon (optional).
|
|
218
|
|
219
|
|
220 =cut
|
|
221
|
|
222 sub add_exon {
|
|
223 my ($self, $fea) = @_;
|
|
224 if(! $fea->isa('Bio::SeqFeature::Gene::ExonI') ) {
|
|
225 $self->throw("$fea does not implement Bio::SeqFeature::Gene::ExonI");
|
|
226 }
|
|
227 $self->_add($fea,'Bio::SeqFeature::Gene::Exon');
|
|
228 }
|
|
229
|
|
230 =head2 flush_exons
|
|
231
|
|
232 Title : flush_exons()
|
|
233 Usage : $transcript->flush_exons();
|
|
234 $transcript->flush_exons('terminal');
|
|
235 Function: Remove all or a certain type of exon features from this transcript.
|
|
236
|
|
237 See add_exon() for documentation about types.
|
|
238
|
|
239 Calling without a type will not flush UTRs. Call flush_utrs() for
|
|
240 this purpose.
|
|
241 Returns : the deleted features as a list
|
|
242 Args : A string indicating the type of the exon (optional).
|
|
243
|
|
244
|
|
245 =cut
|
|
246
|
|
247 sub flush_exons {
|
|
248 my ($self, $type) = @_;
|
|
249 return $self->_flush('Bio::SeqFeature::Gene::Exon',$type);
|
|
250 }
|
|
251
|
|
252 =head2 introns
|
|
253
|
|
254 Title : introns()
|
|
255 Usage : @introns = $gene->introns();
|
|
256 Function: Get all intron features this gene structure.
|
|
257
|
|
258 Note that this implementation generates these features
|
|
259 on-the-fly, that is, it simply treats all regions between
|
|
260 exons as introns, assuming that exons do not overlap. A
|
|
261 consequence is that a consistent correspondence between the
|
|
262 elements in the returned array and the array that exons()
|
|
263 returns will exist only if the exons are properly sorted
|
|
264 within their types (forward for plus- strand and reverse
|
|
265 for minus-strand transcripts). To ensure correctness the
|
|
266 elements in the array returned will always be sorted.
|
|
267
|
|
268 Returns : An array of Bio::SeqFeature::Gene::Intron objects representing
|
|
269 the intron regions.
|
|
270 Args :
|
|
271
|
|
272
|
|
273 =cut
|
|
274
|
|
275 sub introns {
|
|
276 my ($self) = @_;
|
|
277 my @introns = ();
|
|
278 my @exons = $self->exons();
|
|
279 my ($strand, $rev_order);
|
|
280
|
|
281 # if there's 1 or less exons we're done
|
|
282 return () unless($#exons > 0);
|
|
283 # record strand and order (a minus-strand transcript is likely to have
|
|
284 # the exons stacked in reverse order)
|
|
285 foreach my $exon (@exons) {
|
|
286 $strand = $exon->strand();
|
|
287 last if $strand; # we're done if we've got 1 or -1
|
|
288 }
|
|
289 $rev_order = ($exons[0]->end() < $exons[1]->start() ? 0 : 1);
|
|
290
|
|
291 # Make sure exons are sorted. Because we assume they don't overlap, we
|
|
292 # simply sort by start position.
|
|
293 if((! defined($strand)) || ($strand != -1) || (! $rev_order)) {
|
|
294 # always sort forward for plus-strand transcripts, and for negative-
|
|
295 # strand transcripts that appear to be unsorted or forward sorted
|
|
296 @exons = map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, $_->start()] } @exons;
|
|
297 } else {
|
|
298 # sort in reverse order for transcripts on the negative strand and
|
|
299 # found to be in reverse order
|
|
300 @exons = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [ $_, $_->start()] } @exons;
|
|
301 }
|
|
302 # loop over all intervening gaps
|
|
303 for(my $i = 0; $i < $#exons; $i++) {
|
|
304 my ($start, $end);
|
|
305 my $intron;
|
|
306
|
|
307 if(defined($exons[$i]->strand()) &&
|
|
308 (($exons[$i]->strand() * $strand) < 0)) {
|
|
309 $self->throw("Transcript mixes plus and minus strand exons. ".
|
|
310 "Computing introns makes no sense then.");
|
|
311 }
|
|
312 $start = $exons[$i+$rev_order]->end() + 1; # $i or $i+1
|
|
313 $end = $exons[$i+1-$rev_order]->start() - 1; # $i+1 or $i
|
|
314 $intron = Bio::SeqFeature::Gene::Intron->new(
|
|
315 '-start' => $start,
|
|
316 '-end' => $end,
|
|
317 '-strand' => $strand,
|
|
318 '-primary' => 'intron',
|
|
319 '-source' => ref($self));
|
|
320 my $seq = $self->entire_seq();
|
|
321 $intron->attach_seq($seq) if $seq;
|
|
322 $intron->seq_id($self->seq_id());
|
|
323 push(@introns, $intron);
|
|
324 }
|
|
325 return @introns;
|
|
326 }
|
|
327
|
|
328 =head2 poly_A_site
|
|
329
|
|
330 Title : poly_A_site()
|
|
331 Usage : $polyAsite = $transcript->poly_A_site();
|
|
332 Function: Get/set the poly-adenylation feature/site of this transcript.
|
|
333 Returns : A Bio::SeqFeatureI implementing object representing the
|
|
334 poly-adenylation region.
|
|
335 Args : A Bio::SeqFeatureI implementing object on set, or FALSE to flush
|
|
336 a previously set object.
|
|
337
|
|
338
|
|
339 =cut
|
|
340
|
|
341 sub poly_A_site {
|
|
342 my ($self, $fea) = @_;
|
|
343 if ($fea) {
|
|
344 $self->_add($fea,'Bio::SeqFeature::Gene::Poly_A_site');
|
|
345 }
|
|
346 return ($self->get_feature_type('Bio::SeqFeature::Gene::Poly_A_site'))[0];
|
|
347 }
|
|
348
|
|
349 =head2 utrs
|
|
350
|
|
351 Title : utrs()
|
|
352 Usage : @utr_sites = $transcript->utrs('utr3prime');
|
|
353 @utr_sites = $transcript->utrs('utr5prime');
|
|
354 @utr_sites = $transcript->utrs();
|
|
355 Function: Get the features representing untranslated regions (UTR) of this
|
|
356 transcript.
|
|
357
|
|
358 You may provide an argument specifying the type of UTR. Currently
|
|
359 the following types are recognized: utr5prime utr3prime for UTR on the
|
|
360 5' and 3' end of the CDS, respectively.
|
|
361
|
|
362 Returns : An array of Bio::SeqFeature::Gene::UTR objects
|
|
363 representing the UTR regions or sites.
|
|
364 Args : Optionally, either utr3prime, or utr5prime for the the type of UTR
|
|
365 feature.
|
|
366
|
|
367
|
|
368 =cut
|
|
369
|
|
370 sub utrs {
|
|
371 my ($self, $type) = @_;
|
|
372 return $self->get_feature_type('Bio::SeqFeature::Gene::UTR',$type);
|
|
373
|
|
374 }
|
|
375
|
|
376 =head2 add_utr
|
|
377
|
|
378 Title : add_utr()
|
|
379 Usage : $transcript->add_utr($utrobj, 'utr3prime');
|
|
380 $transcript->add_utr($utrobj);
|
|
381 Function: Add a UTR feature/site to this transcript.
|
|
382
|
|
383 The second parameter is optional and denotes the type of the UTR
|
|
384 feature. Presently recognized types include 'utr5prime' and 'utr3prime'
|
|
385 for UTR on the 5' and 3' end of a gene, respectively.
|
|
386
|
|
387 Calling this method is the same as calling
|
|
388 add_exon($utrobj, 'utr'.$type). In this sense a UTR object is a
|
|
389 special exon object, which is transcribed, not spliced out, but
|
|
390 not translated.
|
|
391
|
|
392 Note that the object supplied should return FALSE for is_coding().
|
|
393 Otherwise cds() and friends will become confused.
|
|
394
|
|
395 Returns :
|
|
396 Args : A Bio::SeqFeature::Gene::UTR implementing object.
|
|
397
|
|
398
|
|
399 =cut
|
|
400
|
|
401 sub add_utr {
|
|
402 my ($self, $fea, $type) = @_;
|
|
403 $self->_add($fea,'Bio::SeqFeature::Gene::UTR',$type);
|
|
404 }
|
|
405
|
|
406 =head2 flush_utrs
|
|
407
|
|
408 Title : flush_utrs()
|
|
409 Usage : $transcript->flush_utrs();
|
|
410 $transcript->flush_utrs('utr3prime');
|
|
411 Function: Remove all or a specific type of UTR features/sites from this
|
|
412 transcript.
|
|
413
|
|
414 Cf. add_utr() for documentation about recognized types.
|
|
415 Returns : a list of the removed features
|
|
416 Args : Optionally a string denoting the type of UTR feature.
|
|
417
|
|
418
|
|
419 =cut
|
|
420
|
|
421 sub flush_utrs {
|
|
422 my ($self, $type) = @_;
|
|
423 return $self->_flush('Bio::SeqFeature::Gene::UTR',$type);
|
|
424 }
|
|
425
|
|
426 =head2 sub_SeqFeature
|
|
427
|
|
428 Title : sub_SeqFeature
|
|
429 Usage : @feats = $transcript->sub_SeqFeature();
|
|
430 Function: Returns an array of all subfeatures.
|
|
431
|
|
432 This method is defined in Bio::SeqFeatureI. We override this here
|
|
433 to include the exon etc features.
|
|
434
|
|
435 Returns : An array Bio::SeqFeatureI implementing objects.
|
|
436 Args : none
|
|
437
|
|
438
|
|
439 =cut
|
|
440
|
|
441 sub sub_SeqFeature {
|
|
442 my ($self) = @_;
|
|
443 my @feas;
|
|
444
|
|
445 # get what the parent already has
|
|
446 @feas = $self->SUPER::sub_SeqFeature();
|
|
447 # add the features we have in addition
|
|
448 push(@feas, $self->exons()); # this includes UTR features
|
|
449 push(@feas, $self->promoters());
|
|
450 push(@feas, $self->poly_A_site()) if($self->poly_A_site());
|
|
451 return @feas;
|
|
452 }
|
|
453
|
|
454 =head2 flush_sub_SeqFeature
|
|
455
|
|
456 Title : flush_sub_SeqFeature
|
|
457 Usage : $transcript->flush_sub_SeqFeature();
|
|
458 $transcript->flush_sub_SeqFeature(1);
|
|
459 Function: Removes all subfeatures.
|
|
460
|
|
461 This method is overridden from Bio::SeqFeature::Generic to flush
|
|
462 all additional subfeatures like exons, promoters, etc., which is
|
|
463 almost certainly not what you want. To remove only features added
|
|
464 through $transcript->add_sub_SeqFeature($feature) pass any
|
|
465 argument evaluating to TRUE.
|
|
466
|
|
467 Example :
|
|
468 Returns : none
|
|
469 Args : Optionally, an argument evaluating to TRUE will suppress flushing
|
|
470 of all transcript-specific subfeatures (exons etc.).
|
|
471
|
|
472
|
|
473 =cut
|
|
474
|
|
475 sub flush_sub_SeqFeature {
|
|
476 my ($self,$fea_only) = @_;
|
|
477
|
|
478 $self->SUPER::flush_sub_SeqFeature();
|
|
479 if(! $fea_only) {
|
|
480 $self->flush_promoters();
|
|
481 $self->flush_exons();
|
|
482 $self->flush_utrs();
|
|
483 $self->poly_A_site(0);
|
|
484 }
|
|
485 }
|
|
486
|
|
487
|
|
488 =head2 cds
|
|
489
|
|
490 Title : cds
|
|
491 Usage : $seq = $transcript->cds();
|
|
492 Function: Returns the CDS (coding sequence) as defined by the exons
|
|
493 of this transcript and the attached sequence.
|
|
494
|
|
495 If no sequence is attached this method will return undef.
|
|
496
|
|
497 Note that the implementation provided here returns a
|
|
498 concatenation of all coding exons, thereby assuming that
|
|
499 exons do not overlap.
|
|
500
|
|
501 Note also that you cannot set the CDS via this method. Set
|
|
502 a single CDS feature as a single exon, or derive your own
|
|
503 class if you want to store a predicted CDS.
|
|
504
|
|
505 Example :
|
|
506 Returns : A Bio::PrimarySeqI implementing object.
|
|
507 Args :
|
|
508
|
|
509 =cut
|
|
510
|
|
511 sub cds {
|
|
512 my ($self) = @_;
|
|
513 my @exons = $self->exons_ordered(); #this is always sorted properly according to strand
|
|
514 my $strand;
|
|
515
|
|
516 return undef unless(@exons);
|
|
517 # record strand (a minus-strand transcript must have the exons sorted in
|
|
518 # reverse order)
|
|
519 foreach my $exon (@exons) {
|
|
520 if(defined($exon->strand()) && (! $strand)) {
|
|
521 $strand = $exon->strand();
|
|
522 }
|
|
523 if($exon->strand() && (($exon->strand() * $strand) < 0)) {
|
|
524 $self->throw("Transcript mixes coding exons on plus and minus ".
|
|
525 "strand. This makes no sense.");
|
|
526 }
|
|
527 }
|
|
528 my $cds = $self->_make_cds(@exons);
|
|
529 return undef unless $cds;
|
|
530 return Bio::PrimarySeq->new('-id' => $self->seq_id(),
|
|
531 '-seq' => $cds,
|
|
532 '-alphabet' => "dna");
|
|
533 }
|
|
534
|
|
535 =head2 protein
|
|
536
|
|
537 Title : protein()
|
|
538 Usage : $protein = $transcript->protein();
|
|
539 Function: Get the protein encoded by the transcript as a sequence object.
|
|
540
|
|
541 The implementation provided here simply calls translate() on the
|
|
542 object returned by cds().
|
|
543
|
|
544 Returns : A Bio::PrimarySeqI implementing object.
|
|
545 Args :
|
|
546
|
|
547
|
|
548 =cut
|
|
549
|
|
550 sub protein {
|
|
551 my ($self) = @_;
|
|
552 my $seq;
|
|
553
|
|
554 $seq = $self->cds();
|
|
555 return $seq->translate() if $seq;
|
|
556 return undef;
|
|
557 }
|
|
558
|
|
559 =head2 mrna
|
|
560
|
|
561 Title : mrna()
|
|
562 Usage : $mrna = $transcript->mrna();
|
|
563 Function: Get the mRNA of the transcript as a sequence object.
|
|
564
|
|
565 The difference to cds() is that the sequence object returned by
|
|
566 this methods will also include UTR and the poly-adenylation site,
|
|
567 but not promoter sequence (TBD).
|
|
568
|
|
569 HL: do we really need this method?
|
|
570
|
|
571 Returns : A Bio::PrimarySeqI implementing object.
|
|
572 Args :
|
|
573
|
|
574
|
|
575 =cut
|
|
576
|
|
577 sub mrna {
|
|
578 my ($self) = @_;
|
|
579 my ($seq, $mrna, $elem);
|
|
580
|
|
581 # get the coding part
|
|
582 $seq = $self->cds();
|
|
583 if(! $seq) {
|
|
584 $seq = Bio::PrimarySeq->new('-id' => $self->seq_id(),
|
|
585 '-alphabet' => "rna",
|
|
586 '-seq' => "");
|
|
587 }
|
|
588 # get and add UTR sequences
|
|
589 $mrna = "";
|
|
590 foreach $elem ($self->utrs('utr5prime')) {
|
|
591 $mrna .= $elem->seq()->seq();
|
|
592 }
|
|
593 $seq->seq($mrna . $seq->seq());
|
|
594 $mrna = "";
|
|
595 foreach $elem ($self->utrs('utr3prime')) {
|
|
596 $mrna .= $elem->seq()->seq();
|
|
597 }
|
|
598 $seq->seq($seq->seq() . $mrna);
|
|
599 if($self->poly_A_site()) {
|
|
600 $seq->seq($seq->seq() . $self->poly_A_site()->seq()->seq());
|
|
601 }
|
|
602 return undef if($seq->length() == 0);
|
|
603 return $seq;
|
|
604 }
|
|
605
|
|
606 sub _get_typed_keys {
|
|
607 my ($self, $keyprefix, $type) = @_;
|
|
608 my @keys = ();
|
|
609 my @feas = ();
|
|
610
|
|
611 # make case-insensitive
|
|
612 $type = ($type ? lc($type) : "");
|
|
613 # pull out all feature types that exist and match
|
|
614 @keys = grep { /^_$keyprefix$type/i; } (keys(%{$self}));
|
|
615 return @keys;
|
|
616 }
|
|
617
|
|
618 sub _make_cds {
|
|
619 my ($self,@exons) = @_;
|
|
620 my $cds = "";
|
|
621
|
|
622 foreach my $exon (@exons) {
|
|
623 next if((! defined($exon->seq())) || (! $exon->is_coding()));
|
|
624 my $phase = length($cds) % 3;
|
|
625 # let's check the simple case
|
|
626 if((! defined($exon->frame())) || ($phase == $exon->frame())) {
|
|
627 # this one fits exactly, or frame of the exon is undefined (should
|
|
628 # we warn about that?); we bypass the $exon->cds() here (hmm,
|
|
629 # not very clean style, but I don't see where this screws up)
|
|
630 $cds .= $exon->seq()->seq();
|
|
631 } else {
|
|
632 # this one is probably from exon shuffling and needs some work
|
|
633 my $seq = $exon->cds(); # now $seq is guaranteed to be in frame 0
|
|
634 next if(! $seq);
|
|
635 $seq = $seq->seq();
|
|
636 # adjustment needed?
|
|
637 if($phase > 0) {
|
|
638 # how many Ns can we chop off the piece to be added?
|
|
639 my $n_crop = 0;
|
|
640 if($seq =~ /^(n+)/i) {
|
|
641 $n_crop = length($1);
|
|
642 }
|
|
643 if($n_crop >= $phase) {
|
|
644 # chop off to match the phase
|
|
645 $seq = substr($seq, $phase);
|
|
646 } else {
|
|
647 # fill in Ns
|
|
648 $seq = ("n" x (3-$phase)) . $seq;
|
|
649 }
|
|
650 }
|
|
651 $cds .= $seq;
|
|
652 }
|
|
653 }
|
|
654 return $cds;
|
|
655 }
|
|
656
|
|
657 =head2 features
|
|
658
|
|
659 Title : features
|
|
660 Usage : my @features=$transcript->features;
|
|
661 Function: returns all the features associated with this transcript
|
|
662 Returns : a list of SeqFeatureI implementing objects
|
|
663 Args : none
|
|
664
|
|
665
|
|
666 =cut
|
|
667
|
|
668
|
|
669 sub features {
|
|
670 my ($self) = shift;
|
|
671 $self->{'_features'} = [] unless defined $self->{'_features'};
|
|
672 return @{$self->{'_features'}};
|
|
673 }
|
|
674
|
|
675 =head2 features_ordered
|
|
676
|
|
677 Title : features_ordered
|
|
678 Usage : my @features=$transcript->features_ordered;
|
|
679 Function: returns all the features associated with this transcript,
|
|
680 in order by feature start, according to strand
|
|
681 Returns : a list of SeqFeatureI implementing objects
|
|
682 Args : none
|
|
683
|
|
684
|
|
685 =cut
|
|
686
|
|
687 sub features_ordered{
|
|
688 my ($self) = @_;
|
|
689 return $self->_stranded_sort(@{$self->{'_features'}});
|
|
690 }
|
|
691
|
|
692
|
|
693 sub get_unordered_feature_type{
|
|
694 my ($self, $type, $pri)=@_;
|
|
695 my @list;
|
|
696 foreach ($self->features) {
|
|
697 if ($_->isa($type)) {
|
|
698 if ($pri && $_->primary_tag !~ /$pri/i) {
|
|
699 next;
|
|
700 }
|
|
701 push @list,$_;
|
|
702 }
|
|
703 }
|
|
704 return @list;
|
|
705
|
|
706 }
|
|
707
|
|
708 sub get_feature_type {
|
|
709 my ($self)=shift;
|
|
710 return $self->_stranded_sort($self->get_unordered_feature_type(@_));
|
|
711 }
|
|
712
|
|
713 #This was fixed by Gene Cutler - the indexing on the list being reversed
|
|
714 #fixed a bad bug. Thanks Gene!
|
|
715 sub _flush {
|
|
716 my ($self, $type, $pri)=@_;
|
|
717 my @list=$self->features;
|
|
718 my @cut;
|
|
719 for (reverse (0..$#list)) {
|
|
720 if ($list[$_]->isa($type)) {
|
|
721 if ($pri && $list[$_]->primary_tag !~ /$pri/i) {
|
|
722 next;
|
|
723 }
|
|
724 push @cut, splice @list, $_, 1; #remove the element of $type from @list
|
|
725 #and return each of them in @cut
|
|
726 }
|
|
727 }
|
|
728 $self->{'_features'}=\@list;
|
|
729 return reverse @cut;
|
|
730 }
|
|
731
|
|
732 sub _add {
|
|
733 my ($self, $fea, $type)=@_;
|
|
734 require Bio::SeqFeature::Gene::Promoter;
|
|
735 require Bio::SeqFeature::Gene::UTR;
|
|
736 require Bio::SeqFeature::Gene::Exon;
|
|
737 require Bio::SeqFeature::Gene::Intron;
|
|
738 require Bio::SeqFeature::Gene::Poly_A_site;
|
|
739
|
|
740 if(! $fea->isa('Bio::SeqFeatureI') ) {
|
|
741 $self->throw("$fea does not implement Bio::SeqFeatureI");
|
|
742 }
|
|
743 if(! $fea->isa($type) ) {
|
|
744 $fea=$self->_new_of_type($fea,$type);
|
|
745 }
|
|
746 if (! $self->strand) {
|
|
747 $self->strand($fea->strand);
|
|
748 } else {
|
|
749 if ($self->strand * $fea->strand == -1) {
|
|
750 $self->throw("$fea is on opposite strand from $self");
|
|
751 }
|
|
752 }
|
|
753
|
|
754 $self->_expand_region($fea);
|
|
755 if(defined($self->entire_seq()) && (! defined($fea->entire_seq())) &&
|
|
756 $fea->can('attach_seq')) {
|
|
757 $fea->attach_seq($self->entire_seq());
|
|
758 }
|
|
759 if (defined $self->parent) {
|
|
760 $self->parent->_expand_region($fea);
|
|
761 }
|
|
762 push(@{$self->{'_features'}}, $fea);
|
|
763 1;
|
|
764 }
|
|
765
|
|
766 sub _stranded_sort {
|
|
767 my ($self,@list)=@_;
|
|
768 my $strand;
|
|
769 foreach my $fea (@list) {
|
|
770 if($fea->strand()) {
|
|
771 # defined and != 0
|
|
772 $strand = $fea->strand() if(! $strand);
|
|
773 if(($fea->strand() * $strand) < 0) {
|
|
774 $strand = undef;
|
|
775 last;
|
|
776 }
|
|
777 }
|
|
778 }
|
|
779 if (defined $strand && $strand == - 1) { #reverse strand
|
|
780 return map { $_->[0] } sort {$b->[1] <=> $a->[1]} map { [$_, $_->start] } @list;
|
|
781 } else { #undef or forward strand
|
|
782 return map { $_->[0] } sort {$a->[1] <=> $b->[1]} map { [$_, $_->start] } @list;
|
|
783 }
|
|
784 }
|
|
785
|
|
786 sub _new_of_type {
|
|
787 my ($self, $fea, $type, $pri)= @_;
|
|
788 my $primary;
|
|
789 if ($pri) {
|
|
790 $primary = $pri; #can set new primary tag if desired
|
|
791 } else {
|
|
792 ($primary) = $type =~ /.*::(.+)/; #or else primary is just end of type string
|
|
793 }
|
|
794 bless $fea,$type;
|
|
795 $fea->primary_tag($primary);
|
|
796 return $fea;
|
|
797 }
|
|
798
|
|
799 1;
|