comparison variant_effect_predictor/Bio/SeqFeature/Gene/GeneStructure.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: GeneStructure.pm,v 1.14 2002/10/22 07:38:41 lapp Exp $
2 #
3 # BioPerl module for Bio::SeqFeature::Gene::GeneStructure
4 #
5 # Cared for by Hilmar Lapp <hlapp@gmx.net>
6 #
7 # Copyright Hilmar Lapp
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::SeqFeature::Gene::GeneStructure - A feature representing an arbitrarily
16 complex structure of a gene
17
18 =head1 SYNOPSIS
19
20 See documentation of methods.
21
22 =head1 DESCRIPTION
23
24 A feature representing a gene structure. As of now, a gene structure
25 really is only a collection of transcripts. See
26 Bio::SeqFeature::Gene::TranscriptI (interface) and
27 Bio::SeqFeature::Gene::Transcript (implementation) for the features of
28 such objects.
29
30 =head1 FEEDBACK
31
32 =head2 Mailing Lists
33
34 User feedback is an integral part of the evolution of this
35 and other Bioperl modules. Send your comments and suggestions preferably
36 to one of the Bioperl mailing lists.
37 Your participation is much appreciated.
38
39 bioperl-l@bioperl.org - General discussion
40 http://bio.perl.org/MailList.html - About the mailing lists
41
42 =head2 Reporting Bugs
43
44 Report bugs to the Bioperl bug tracking system to help us keep track
45 the bugs and their resolution.
46 Bug reports can be submitted via email or the web:
47
48 bioperl-bugs@bio.perl.org
49 http://bugzilla.bioperl.org/
50
51 =head1 AUTHOR - Hilmar Lapp
52
53 Email hlapp@gmx.net
54
55 Describe contact details here
56
57 =head1 APPENDIX
58
59 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
60
61 =cut
62
63
64 # Let the code begin...
65
66
67 package Bio::SeqFeature::Gene::GeneStructure;
68 use vars qw(@ISA);
69 use strict;
70
71 use Bio::SeqFeature::Generic;
72 use Bio::SeqFeature::Gene::GeneStructureI;
73
74 @ISA = qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::GeneStructureI);
75
76
77 sub new {
78 my ($caller, @args) = @_;
79 my $self = $caller->SUPER::new(@args);
80 my ($primary) =
81 $self->_rearrange([qw(PRIMARY
82 )],@args);
83
84 $primary = 'genestructure' unless $primary;
85 $self->primary_tag($primary);
86 $self->strand(0) if(! defined($self->strand()));
87 return $self;
88 }
89
90 =head2 transcripts
91
92 Title : transcripts
93 Usage : @transcripts = $gene->transcripts();
94 Function: Get the transcripts of this gene structure. Many gene structures
95 will have only one transcript.
96
97 Returns : An array of Bio::SeqFeature::Gene::TranscriptI implementing objects.
98 Args :
99
100
101 =cut
102
103 sub transcripts {
104 my ($self) = @_;
105
106 return () unless exists($self->{'_transcripts'});
107 return @{$self->{'_transcripts'}};
108 }
109
110 =head2 add_transcript
111
112 Title : add_transcript()
113 Usage : $gene->add_transcript($transcript);
114 Function: Add a transcript to this gene structure.
115 Returns :
116 Args : A Bio::SeqFeature::Gene::TranscriptI implementing object.
117
118
119 =cut
120
121 sub add_transcript {
122 my ($self, $fea) = @_;
123
124 if(!$fea || ! $fea->isa('Bio::SeqFeature::Gene::TranscriptI') ) {
125 $self->throw("$fea does not implement Bio::SeqFeature::Gene::TranscriptI");
126 }
127 if(! exists($self->{'_transcripts'})) {
128 $self->{'_transcripts'} = [];
129 }
130 $self->_expand_region($fea);
131 $fea->parent($self);
132 push(@{$self->{'_transcripts'}}, $fea);
133 }
134
135 =head2 flush_transcripts
136
137 Title : flush_transcripts()
138 Usage : $gene->flush_transcripts();
139 Function: Remove all transcripts from this gene structure.
140 Returns :
141 Args :
142
143
144 =cut
145
146 sub flush_transcripts {
147 my ($self) = @_;
148
149 if(exists($self->{'_transcripts'})) {
150 delete($self->{'_transcripts'});
151 }
152 }
153
154 =head2 add_transcript_as_features
155
156 Title : add_transcript_as_features
157 Usage : $gene->add_transcript_as_features(@featurelist);
158 Function: take a list of Bio::SeqFeatureI objects and turn them into a
159 Bio::SeqFeature::Gene::Transcript object. Add that transcript to the gene.
160 Returns : nothing
161 Args : a list of Bio::SeqFeatureI compliant objects
162
163
164 =cut
165
166 sub add_transcript_as_features{
167 my ($self,@features) = @_;
168 my $transcript=Bio::SeqFeature::Gene::Transcript->new;
169 foreach my $fea (@features) {
170
171 if ($fea->primary_tag =~ /utr/i) { #UTR / utr/ 3' utr / utr5 etc.
172 $transcript->add_utr($fea);
173 } elsif ($fea->primary_tag =~ /promot/i) { #allow for spelling differences
174 $transcript->add_promoter($fea);
175 } elsif ($fea->primary_tag =~ /poly.*A/i) { #polyA, POLY_A, etc.
176 $transcript->poly_A_site($fea);
177 } else { #assume the rest are exons
178 $transcript->add_exon($fea);
179 }
180 }
181 $self->add_transcript($transcript);
182
183 }
184
185
186 =head2 promoters
187
188 Title : promoters
189 Usage : @prom_sites = $gene->promoters();
190 Function: Get the promoter features of this gene structure.
191
192 This method basically merges the promoters returned by transcripts.
193
194 Note that OO-modeling of regulatory elements is not stable yet.
195 This means that this method might change or even disappear in a
196 future release. Be aware of this if you use it.
197
198 Returns : An array of Bio::SeqFeatureI implementing objects.
199 Args :
200
201
202 =cut
203
204 sub promoters {
205 my ($self) = @_;
206 my @transcripts = $self->transcripts();
207 my @feas = ();
208
209 foreach my $tr (@transcripts) {
210 push(@feas, $tr->promoters());
211 }
212 return @feas;
213 }
214
215
216 =head2 exons
217
218 Title : exons()
219 Usage : @exons = $gene->exons();
220 @inital_exons = $gene->exons('Initial');
221 Function: Get all exon features or all exons of a specified type of this gene
222 structure.
223
224 Exon type is treated as a case-insensitive regular expression and
225 optional. For consistency, use only the following types:
226 initial, internal, terminal, utr, utr5prime, and utr3prime.
227 A special and virtual type is 'coding', which refers to all types
228 except utr.
229
230 This method basically merges the exons returned by transcripts.
231
232 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
233 Args : An optional string specifying the type of exon.
234
235
236 =cut
237
238 sub exons {
239 my ($self, @args) = @_;
240 my @transcripts = $self->transcripts();
241 my @feas = ();
242
243 foreach my $tr (@transcripts) {
244 push(@feas, $tr->exons(@args));
245 }
246 return @feas;
247 }
248
249 =head2 introns
250
251 Title : introns()
252 Usage : @introns = $gene->introns();
253 Function: Get all introns of this gene structure.
254
255 Note that this class currently generates these features on-the-fly,
256 that is, it simply treats all regions between exons as introns.
257 It assumes that the exons in the transcripts do not overlap.
258
259 This method basically merges the introns returned by transcripts.
260
261 Returns : An array of Bio::SeqFeatureI implementing objects.
262 Args :
263
264
265 =cut
266
267 sub introns {
268 my ($self) = @_;
269 my @transcripts = $self->transcripts();
270 my @feas = ();
271
272 foreach my $tr (@transcripts) {
273 push(@feas, $tr->introns());
274 }
275 return @feas;
276 }
277
278 =head2 poly_A_sites
279
280 Title : poly_A_sites()
281 Usage : @polyAsites = $gene->poly_A_sites();
282 Function: Get the poly-adenylation sites of this gene structure.
283
284 This method basically merges the poly-adenylation sites returned by
285 transcripts.
286
287 Returns : An array of Bio::SeqFeatureI implementing objects.
288 Args :
289
290
291 =cut
292
293 sub poly_A_sites {
294 my ($self) = @_;
295 my @transcripts = $self->transcripts();
296 my @feas = ();
297
298 foreach my $tr (@transcripts) {
299 push(@feas, $tr->poly_A_site());
300 }
301 return @feas;
302 }
303
304 =head2 utrs
305
306 Title : utrs()
307 Usage : @utr_sites = $gene->utrs('3prime');
308 @utr_sites = $gene->utrs('5prime');
309 @utr_sites = $gene->utrs();
310 Function: Get the features representing untranslated regions (UTR) of this
311 gene structure.
312
313 You may provide an argument specifying the type of UTR. Currently
314 the following types are recognized: 5prime 3prime for UTR on the
315 5' and 3' end of the CDS, respectively.
316
317 This method basically merges the UTRs returned by transcripts.
318
319 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
320 representing the UTR regions or sites.
321 Args : Optionally, either 3prime, or 5prime for the the type of UTR
322 feature.
323
324
325 =cut
326
327 sub utrs {
328 my ($self,@args) = @_;
329 my @transcripts = $self->transcripts();
330 my @feas = ();
331
332 foreach my $tr (@transcripts) {
333 push(@feas, $tr->utrs(@args));
334 }
335 return @feas;
336 }
337
338 =head2 sub_SeqFeature
339
340 Title : sub_SeqFeature
341 Usage : @feats = $gene->sub_SeqFeature();
342 Function: Returns an array of all subfeatures.
343
344 This method is defined in Bio::SeqFeatureI. We override this here
345 to include the transcripts.
346
347 Returns : An array Bio::SeqFeatureI implementing objects.
348 Args : none
349
350
351 =cut
352
353 sub sub_SeqFeature {
354 my ($self) = @_;
355 my @feas = ();
356
357 # get what the parent already has
358 @feas = $self->SUPER::sub_SeqFeature();
359 push(@feas, $self->transcripts());
360 return @feas;
361 }
362
363 =head2 flush_sub_SeqFeature
364
365 Title : flush_sub_SeqFeature
366 Usage : $gene->flush_sub_SeqFeature();
367 $gene->flush_sub_SeqFeature(1);
368 Function: Removes all subfeatures.
369
370 This method is overridden from Bio::SeqFeature::Generic to flush
371 all additional subfeatures, i.e., transcripts, which is
372 almost certainly not what you want. To remove only features added
373 through $gene->add_sub_SeqFeature($feature) pass any
374 argument evaluating to TRUE.
375
376 Example :
377 Returns : none
378 Args : Optionally, an argument evaluating to TRUE will suppress flushing
379 of all gene structure-specific subfeatures (transcripts).
380
381
382 =cut
383
384 sub flush_sub_SeqFeature {
385 my ($self,$fea_only) = @_;
386
387 $self->SUPER::flush_sub_SeqFeature();
388 if(! $fea_only) {
389 $self->flush_transcripts();
390 }
391 }
392
393 1;
394
395
396
397
398
399
400
401