0
|
1 # $Id: GeneStructure.pm,v 1.14 2002/10/22 07:38:41 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SeqFeature::Gene::GeneStructure
|
|
4 #
|
|
5 # Cared for by Hilmar Lapp <hlapp@gmx.net>
|
|
6 #
|
|
7 # Copyright Hilmar Lapp
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SeqFeature::Gene::GeneStructure - A feature representing an arbitrarily
|
|
16 complex structure of a gene
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 See documentation of methods.
|
|
21
|
|
22 =head1 DESCRIPTION
|
|
23
|
|
24 A feature representing a gene structure. As of now, a gene structure
|
|
25 really is only a collection of transcripts. See
|
|
26 Bio::SeqFeature::Gene::TranscriptI (interface) and
|
|
27 Bio::SeqFeature::Gene::Transcript (implementation) for the features of
|
|
28 such objects.
|
|
29
|
|
30 =head1 FEEDBACK
|
|
31
|
|
32 =head2 Mailing Lists
|
|
33
|
|
34 User feedback is an integral part of the evolution of this
|
|
35 and other Bioperl modules. Send your comments and suggestions preferably
|
|
36 to one of the Bioperl mailing lists.
|
|
37 Your participation is much appreciated.
|
|
38
|
|
39 bioperl-l@bioperl.org - General discussion
|
|
40 http://bio.perl.org/MailList.html - About the mailing lists
|
|
41
|
|
42 =head2 Reporting Bugs
|
|
43
|
|
44 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
45 the bugs and their resolution.
|
|
46 Bug reports can be submitted via email or the web:
|
|
47
|
|
48 bioperl-bugs@bio.perl.org
|
|
49 http://bugzilla.bioperl.org/
|
|
50
|
|
51 =head1 AUTHOR - Hilmar Lapp
|
|
52
|
|
53 Email hlapp@gmx.net
|
|
54
|
|
55 Describe contact details here
|
|
56
|
|
57 =head1 APPENDIX
|
|
58
|
|
59 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
60
|
|
61 =cut
|
|
62
|
|
63
|
|
64 # Let the code begin...
|
|
65
|
|
66
|
|
67 package Bio::SeqFeature::Gene::GeneStructure;
|
|
68 use vars qw(@ISA);
|
|
69 use strict;
|
|
70
|
|
71 use Bio::SeqFeature::Generic;
|
|
72 use Bio::SeqFeature::Gene::GeneStructureI;
|
|
73
|
|
74 @ISA = qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::GeneStructureI);
|
|
75
|
|
76
|
|
77 sub new {
|
|
78 my ($caller, @args) = @_;
|
|
79 my $self = $caller->SUPER::new(@args);
|
|
80 my ($primary) =
|
|
81 $self->_rearrange([qw(PRIMARY
|
|
82 )],@args);
|
|
83
|
|
84 $primary = 'genestructure' unless $primary;
|
|
85 $self->primary_tag($primary);
|
|
86 $self->strand(0) if(! defined($self->strand()));
|
|
87 return $self;
|
|
88 }
|
|
89
|
|
90 =head2 transcripts
|
|
91
|
|
92 Title : transcripts
|
|
93 Usage : @transcripts = $gene->transcripts();
|
|
94 Function: Get the transcripts of this gene structure. Many gene structures
|
|
95 will have only one transcript.
|
|
96
|
|
97 Returns : An array of Bio::SeqFeature::Gene::TranscriptI implementing objects.
|
|
98 Args :
|
|
99
|
|
100
|
|
101 =cut
|
|
102
|
|
103 sub transcripts {
|
|
104 my ($self) = @_;
|
|
105
|
|
106 return () unless exists($self->{'_transcripts'});
|
|
107 return @{$self->{'_transcripts'}};
|
|
108 }
|
|
109
|
|
110 =head2 add_transcript
|
|
111
|
|
112 Title : add_transcript()
|
|
113 Usage : $gene->add_transcript($transcript);
|
|
114 Function: Add a transcript to this gene structure.
|
|
115 Returns :
|
|
116 Args : A Bio::SeqFeature::Gene::TranscriptI implementing object.
|
|
117
|
|
118
|
|
119 =cut
|
|
120
|
|
121 sub add_transcript {
|
|
122 my ($self, $fea) = @_;
|
|
123
|
|
124 if(!$fea || ! $fea->isa('Bio::SeqFeature::Gene::TranscriptI') ) {
|
|
125 $self->throw("$fea does not implement Bio::SeqFeature::Gene::TranscriptI");
|
|
126 }
|
|
127 if(! exists($self->{'_transcripts'})) {
|
|
128 $self->{'_transcripts'} = [];
|
|
129 }
|
|
130 $self->_expand_region($fea);
|
|
131 $fea->parent($self);
|
|
132 push(@{$self->{'_transcripts'}}, $fea);
|
|
133 }
|
|
134
|
|
135 =head2 flush_transcripts
|
|
136
|
|
137 Title : flush_transcripts()
|
|
138 Usage : $gene->flush_transcripts();
|
|
139 Function: Remove all transcripts from this gene structure.
|
|
140 Returns :
|
|
141 Args :
|
|
142
|
|
143
|
|
144 =cut
|
|
145
|
|
146 sub flush_transcripts {
|
|
147 my ($self) = @_;
|
|
148
|
|
149 if(exists($self->{'_transcripts'})) {
|
|
150 delete($self->{'_transcripts'});
|
|
151 }
|
|
152 }
|
|
153
|
|
154 =head2 add_transcript_as_features
|
|
155
|
|
156 Title : add_transcript_as_features
|
|
157 Usage : $gene->add_transcript_as_features(@featurelist);
|
|
158 Function: take a list of Bio::SeqFeatureI objects and turn them into a
|
|
159 Bio::SeqFeature::Gene::Transcript object. Add that transcript to the gene.
|
|
160 Returns : nothing
|
|
161 Args : a list of Bio::SeqFeatureI compliant objects
|
|
162
|
|
163
|
|
164 =cut
|
|
165
|
|
166 sub add_transcript_as_features{
|
|
167 my ($self,@features) = @_;
|
|
168 my $transcript=Bio::SeqFeature::Gene::Transcript->new;
|
|
169 foreach my $fea (@features) {
|
|
170
|
|
171 if ($fea->primary_tag =~ /utr/i) { #UTR / utr/ 3' utr / utr5 etc.
|
|
172 $transcript->add_utr($fea);
|
|
173 } elsif ($fea->primary_tag =~ /promot/i) { #allow for spelling differences
|
|
174 $transcript->add_promoter($fea);
|
|
175 } elsif ($fea->primary_tag =~ /poly.*A/i) { #polyA, POLY_A, etc.
|
|
176 $transcript->poly_A_site($fea);
|
|
177 } else { #assume the rest are exons
|
|
178 $transcript->add_exon($fea);
|
|
179 }
|
|
180 }
|
|
181 $self->add_transcript($transcript);
|
|
182
|
|
183 }
|
|
184
|
|
185
|
|
186 =head2 promoters
|
|
187
|
|
188 Title : promoters
|
|
189 Usage : @prom_sites = $gene->promoters();
|
|
190 Function: Get the promoter features of this gene structure.
|
|
191
|
|
192 This method basically merges the promoters returned by transcripts.
|
|
193
|
|
194 Note that OO-modeling of regulatory elements is not stable yet.
|
|
195 This means that this method might change or even disappear in a
|
|
196 future release. Be aware of this if you use it.
|
|
197
|
|
198 Returns : An array of Bio::SeqFeatureI implementing objects.
|
|
199 Args :
|
|
200
|
|
201
|
|
202 =cut
|
|
203
|
|
204 sub promoters {
|
|
205 my ($self) = @_;
|
|
206 my @transcripts = $self->transcripts();
|
|
207 my @feas = ();
|
|
208
|
|
209 foreach my $tr (@transcripts) {
|
|
210 push(@feas, $tr->promoters());
|
|
211 }
|
|
212 return @feas;
|
|
213 }
|
|
214
|
|
215
|
|
216 =head2 exons
|
|
217
|
|
218 Title : exons()
|
|
219 Usage : @exons = $gene->exons();
|
|
220 @inital_exons = $gene->exons('Initial');
|
|
221 Function: Get all exon features or all exons of a specified type of this gene
|
|
222 structure.
|
|
223
|
|
224 Exon type is treated as a case-insensitive regular expression and
|
|
225 optional. For consistency, use only the following types:
|
|
226 initial, internal, terminal, utr, utr5prime, and utr3prime.
|
|
227 A special and virtual type is 'coding', which refers to all types
|
|
228 except utr.
|
|
229
|
|
230 This method basically merges the exons returned by transcripts.
|
|
231
|
|
232 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
|
|
233 Args : An optional string specifying the type of exon.
|
|
234
|
|
235
|
|
236 =cut
|
|
237
|
|
238 sub exons {
|
|
239 my ($self, @args) = @_;
|
|
240 my @transcripts = $self->transcripts();
|
|
241 my @feas = ();
|
|
242
|
|
243 foreach my $tr (@transcripts) {
|
|
244 push(@feas, $tr->exons(@args));
|
|
245 }
|
|
246 return @feas;
|
|
247 }
|
|
248
|
|
249 =head2 introns
|
|
250
|
|
251 Title : introns()
|
|
252 Usage : @introns = $gene->introns();
|
|
253 Function: Get all introns of this gene structure.
|
|
254
|
|
255 Note that this class currently generates these features on-the-fly,
|
|
256 that is, it simply treats all regions between exons as introns.
|
|
257 It assumes that the exons in the transcripts do not overlap.
|
|
258
|
|
259 This method basically merges the introns returned by transcripts.
|
|
260
|
|
261 Returns : An array of Bio::SeqFeatureI implementing objects.
|
|
262 Args :
|
|
263
|
|
264
|
|
265 =cut
|
|
266
|
|
267 sub introns {
|
|
268 my ($self) = @_;
|
|
269 my @transcripts = $self->transcripts();
|
|
270 my @feas = ();
|
|
271
|
|
272 foreach my $tr (@transcripts) {
|
|
273 push(@feas, $tr->introns());
|
|
274 }
|
|
275 return @feas;
|
|
276 }
|
|
277
|
|
278 =head2 poly_A_sites
|
|
279
|
|
280 Title : poly_A_sites()
|
|
281 Usage : @polyAsites = $gene->poly_A_sites();
|
|
282 Function: Get the poly-adenylation sites of this gene structure.
|
|
283
|
|
284 This method basically merges the poly-adenylation sites returned by
|
|
285 transcripts.
|
|
286
|
|
287 Returns : An array of Bio::SeqFeatureI implementing objects.
|
|
288 Args :
|
|
289
|
|
290
|
|
291 =cut
|
|
292
|
|
293 sub poly_A_sites {
|
|
294 my ($self) = @_;
|
|
295 my @transcripts = $self->transcripts();
|
|
296 my @feas = ();
|
|
297
|
|
298 foreach my $tr (@transcripts) {
|
|
299 push(@feas, $tr->poly_A_site());
|
|
300 }
|
|
301 return @feas;
|
|
302 }
|
|
303
|
|
304 =head2 utrs
|
|
305
|
|
306 Title : utrs()
|
|
307 Usage : @utr_sites = $gene->utrs('3prime');
|
|
308 @utr_sites = $gene->utrs('5prime');
|
|
309 @utr_sites = $gene->utrs();
|
|
310 Function: Get the features representing untranslated regions (UTR) of this
|
|
311 gene structure.
|
|
312
|
|
313 You may provide an argument specifying the type of UTR. Currently
|
|
314 the following types are recognized: 5prime 3prime for UTR on the
|
|
315 5' and 3' end of the CDS, respectively.
|
|
316
|
|
317 This method basically merges the UTRs returned by transcripts.
|
|
318
|
|
319 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
|
|
320 representing the UTR regions or sites.
|
|
321 Args : Optionally, either 3prime, or 5prime for the the type of UTR
|
|
322 feature.
|
|
323
|
|
324
|
|
325 =cut
|
|
326
|
|
327 sub utrs {
|
|
328 my ($self,@args) = @_;
|
|
329 my @transcripts = $self->transcripts();
|
|
330 my @feas = ();
|
|
331
|
|
332 foreach my $tr (@transcripts) {
|
|
333 push(@feas, $tr->utrs(@args));
|
|
334 }
|
|
335 return @feas;
|
|
336 }
|
|
337
|
|
338 =head2 sub_SeqFeature
|
|
339
|
|
340 Title : sub_SeqFeature
|
|
341 Usage : @feats = $gene->sub_SeqFeature();
|
|
342 Function: Returns an array of all subfeatures.
|
|
343
|
|
344 This method is defined in Bio::SeqFeatureI. We override this here
|
|
345 to include the transcripts.
|
|
346
|
|
347 Returns : An array Bio::SeqFeatureI implementing objects.
|
|
348 Args : none
|
|
349
|
|
350
|
|
351 =cut
|
|
352
|
|
353 sub sub_SeqFeature {
|
|
354 my ($self) = @_;
|
|
355 my @feas = ();
|
|
356
|
|
357 # get what the parent already has
|
|
358 @feas = $self->SUPER::sub_SeqFeature();
|
|
359 push(@feas, $self->transcripts());
|
|
360 return @feas;
|
|
361 }
|
|
362
|
|
363 =head2 flush_sub_SeqFeature
|
|
364
|
|
365 Title : flush_sub_SeqFeature
|
|
366 Usage : $gene->flush_sub_SeqFeature();
|
|
367 $gene->flush_sub_SeqFeature(1);
|
|
368 Function: Removes all subfeatures.
|
|
369
|
|
370 This method is overridden from Bio::SeqFeature::Generic to flush
|
|
371 all additional subfeatures, i.e., transcripts, which is
|
|
372 almost certainly not what you want. To remove only features added
|
|
373 through $gene->add_sub_SeqFeature($feature) pass any
|
|
374 argument evaluating to TRUE.
|
|
375
|
|
376 Example :
|
|
377 Returns : none
|
|
378 Args : Optionally, an argument evaluating to TRUE will suppress flushing
|
|
379 of all gene structure-specific subfeatures (transcripts).
|
|
380
|
|
381
|
|
382 =cut
|
|
383
|
|
384 sub flush_sub_SeqFeature {
|
|
385 my ($self,$fea_only) = @_;
|
|
386
|
|
387 $self->SUPER::flush_sub_SeqFeature();
|
|
388 if(! $fea_only) {
|
|
389 $self->flush_transcripts();
|
|
390 }
|
|
391 }
|
|
392
|
|
393 1;
|
|
394
|
|
395
|
|
396
|
|
397
|
|
398
|
|
399
|
|
400
|
|
401
|