comparison variant_effect_predictor/Bio/LiveSeq/Gene.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Gene.pm,v 1.13 2001/06/18 08:27:53 heikki Exp $
2 #
3 # bioperl module for Bio::LiveSeq::Gene
4 #
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
6 #
7 # Copyright Joseph Insana
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::LiveSeq::Gene - Range abstract class for LiveSeq
16
17 =head1 SYNOPSIS
18
19 # documentation needed
20
21 =head1 DESCRIPTION
22
23 This is used as storage for all object references concerning a particular gene.
24
25 =head1 AUTHOR - Joseph A.L. Insana
26
27 Email: Insana@ebi.ac.uk, jinsana@gmx.net
28
29 Address:
30
31 EMBL Outstation, European Bioinformatics Institute
32 Wellcome Trust Genome Campus, Hinxton
33 Cambs. CB10 1SD, United Kingdom
34
35 =head1 APPENDIX
36
37 The rest of the documentation details each of the object
38 methods. Internal methods are usually preceded with a _
39
40 =cut
41
42 # Let the code begin...
43
44 package Bio::LiveSeq::Gene;
45 $VERSION=2.3;
46
47 # Version history:
48 # Tue Apr 4 15:22:41 BST 2000 v 1.0 begun
49 # Tue Apr 4 16:19:27 BST 2000 v 1.1 completed new()
50 # Tue Apr 4 19:15:41 BST 2000 v 1.2 tested. Working. Simple methods created.
51 # Wed Apr 5 01:26:58 BST 2000 v 1.21 multiplicity, featuresnum() created
52 # Wed Apr 5 02:16:01 BST 2000 v 1.22 added upbound and downbound attributes
53 # Fri Apr 7 02:03:39 BST 2000 v 1.3 added printfeaturesnum and _set_Gene_in_all
54 # Fri Apr 7 02:53:05 BST 2000 v 2.0 added maxtranscript object creation
55 # Wed Jun 28 18:38:45 BST 2000 v 2.1 chaged croak to carp + return(-1)
56 # Wed Jul 12 15:19:26 BST 2000 v 2.11 ->strand call protected by if(ref(transcript))
57 # Tue Jan 30 14:15:42 EST 2001 v 2.2 delete_Obj added, to flush circular references
58 # Wed Apr 4 13:29:59 BST 2001 v 2.3 LiveSeq-wide warn and verbose added
59
60 use strict;
61 use Carp;
62 use vars qw($VERSION @ISA);
63 use Bio::LiveSeq::Prim_Transcript 1.0; # needed to create maxtranscript obj
64
65 #use Bio::LiveSeq::SeqI 2.11; # uses SeqI, inherits from it
66 #@ISA=qw(Bio::LiveSeq::SeqI);
67
68 =head2 new
69
70 Title : new
71 Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name",
72 -features => $hashref
73 -upbound => $min
74 -downbound => $max);
75
76 Function: generates a new Bio::LiveSeq::Gene
77 Returns : reference to a new object of class Gene
78 Errorcode -1
79 Args : one string and one hashreference containing all features defined
80 for the Gene and the references to the LiveSeq objects for those
81 features.
82 Two labels for defining boundaries of the gene. Usually the
83 boundaries will reflect max span of transcript, exon... features,
84 while the DNA sequence will be created with some flanking regions
85 (e.g. with the EMBL_SRS::gene2liveseq routine).
86 If these two labels are not given, they will default to the start
87 and end of the DNA object.
88 Note : the format of the hash has to be like
89 DNA => reference to LiveSeq::DNA object
90 Transcripts => reference to array of transcripts objrefs
91 Transclations => reference to array of transcripts objrefs
92 Exons => ....
93 Introns => ....
94 Prim_Transcripts => ....
95 Repeat_Units => ....
96 Repeat_Regions => ....
97 Only DNA and Transcripts are mandatory
98
99 =cut
100
101 sub new {
102 my ($thing, %args) = @_;
103 my $class = ref($thing) || $thing;
104 my ($i,$self,%gene);
105
106 my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound});
107
108 unless (ref($inputfeatures) eq "HASH") {
109 carp "$class not initialised because features hash not given";
110 return (-1);
111 }
112
113 my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not
114 my $features=\%features; # the ones input'ed, that could get destroyed
115
116 my $DNA=$features->{'DNA'};
117 unless (ref($DNA) eq "Bio::LiveSeq::DNA") {
118 carp "$class not initialised because DNA feature not found";
119 return (-1);
120 }
121
122 my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features
123
124 my ($start,$end);
125
126 my @Transcripts=@{$features->{'Transcripts'}};
127
128 my $strand;
129 unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") {
130 $self->warn("$class not initialised: first Transcript not a LiveSeq object");
131 return (-1);
132 } else {
133 $strand=$Transcripts[0]->strand; # for maxtranscript consistency check
134 }
135
136 for $i (@Transcripts) {
137 ($start,$end)=($i->start,$i->end);
138 unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
139 $self->warn("$class not initialised because of problems in Transcripts feature");
140 return (-1);
141 } else {
142 }
143 unless($minstart) { $minstart=$start; } # initialize
144 unless($maxend) { $maxend=$end; } # initialize
145 if ($i->strand != $strand) {
146 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
147 return (-1);
148 }
149 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
150 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
151 }
152 my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions;
153 my @Prim_Transcripts; my @Exons;
154 if (defined($features->{'Translations'})) {
155 @Translations=@{$features->{'Translations'}}; }
156 if (defined($features->{'Exons'})) {
157 @Exons=@{$features->{'Exons'}}; }
158 if (defined($features->{'Introns'})) {
159 @Introns=@{$features->{'Introns'}}; }
160 if (defined($features->{'Repeat_Units'})) {
161 @Repeat_Units=@{$features->{'Repeat_Units'}}; }
162 if (defined($features->{'Repeat_Regions'})) {
163 @Repeat_Regions=@{$features->{'Repeat_Regions'}}; }
164 if (defined($features->{'Prim_Transcripts'})) {
165 @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; }
166
167
168 if (@Translations) {
169 for $i (@Translations) {
170 ($start,$end)=($i->start,$i->end);
171 unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) {
172 $self->warn("$class not initialised because of problems in Translations feature");
173 return (-1);
174 }
175 }
176 }
177 if (@Exons) {
178 for $i (@Exons) {
179 ($start,$end)=($i->start,$i->end);
180 unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) {
181 $self->warn("$class not initialised because of problems in Exons feature");
182 return (-1);
183 }
184 if ($i->strand != $strand) {
185 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
186 return (-1);
187 }
188 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
189 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
190 }
191 }
192 if (@Introns) {
193 for $i (@Introns) {
194 ($start,$end)=($i->start,$i->end);
195 unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) {
196 $self->warn("$class not initialised because of problems in Introns feature");
197 return (-1);
198 }
199 }
200 }
201 if (@Repeat_Units) {
202 for $i (@Repeat_Units) {
203 ($start,$end)=($i->start,$i->end);
204 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) {
205 $self->warn("$class not initialised because of problems in Repeat_Units feature");
206 return (-1);
207 }
208 }
209 }
210 if (@Repeat_Regions) {
211 for $i (@Repeat_Regions) {
212 ($start,$end)=($i->start,$i->end);
213 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) {
214 $self->warn("$class not initialised because of problems in Repeat_Regions feature");
215 return (-1);
216 }
217 }
218 }
219 if (@Prim_Transcripts) {
220 for $i (@Prim_Transcripts) {
221 ($start,$end)=($i->start,$i->end);
222 unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
223 $self->warn("$class not initialised because of problems in Prim_Transcripts feature");
224 return (-1);
225 }
226 if ($i->strand != $strand) {
227 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
228 return (-1);
229 }
230 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
231 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
232 }
233 }
234
235 # create an array containing all obj references for all Gene Features
236 # useful for _set_Gene_in_all
237 my @allfeatures;
238 push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts);
239
240 # create hash holding numbers for Gene Features
241 my %multiplicity;
242 my $key; my @array;
243 foreach $key (keys(%features)) {
244 unless ($key eq "DNA") {
245 @array=@{$features{$key}};
246 $multiplicity{$key}=scalar(@array);
247 }
248 }
249 $multiplicity{DNA}=1;
250
251 # create maxtranscript object. It's a Prim_Transcript with start as the
252 # minimum start and end as the maximum end.
253 # usually these start and end will be the same as the gene->upbound and
254 # gene->downbound, but maybe there could be cases when this will be false
255 # (e.g. with repeat_units just before the prim_transcript or first exon,
256 # but still labelled with the same /gene qualifier)
257
258 my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA);
259
260
261 # check the upbound downbound parameters
262 if (defined($upbound)) {
263 unless ($DNA->valid($upbound)) {
264 $self->warn("$class not initialised because upbound label not valid");
265 return (-1);
266 }
267 } else {
268 $upbound=$DNA->start;
269 }
270 if (defined($downbound)) {
271 unless ($DNA->valid($downbound)) {
272 $self->warn("$class not initialised because downbound label not valid");
273 return (-1);
274 }
275 } else {
276 $downbound=$DNA->end;
277 }
278
279 %gene = (name => $name, features => $features,multiplicity => \%multiplicity,
280 upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript);
281 $self = \%gene;
282 $self = bless $self, $class;
283 _set_Gene_in_all($self,@allfeatures);
284 return $self;
285 }
286
287 # this sets the "gene" objref in all the objects "belonging" to the Gene,
288 # i.e. in all its Features.
289 sub _set_Gene_in_all {
290 my $Gene=shift;
291 my $self;
292 foreach $self (@_) {
293 $self->gene($Gene);
294 }
295 }
296
297 # you can get or set the name of the gene
298 sub name {
299 my ($self,$value) = @_;
300 if (defined $value) {
301 $self->{'name'} = $value;
302 }
303 unless (exists $self->{'name'}) {
304 return "unknown";
305 } else {
306 return $self->{'name'};
307 }
308 }
309
310 # gets the features hash
311 sub features {
312 my $self=shift;
313 return ($self->{'features'});
314 }
315 sub get_DNA {
316 my $self=shift;
317 return ($self->{'features'}->{'DNA'});
318 }
319 sub get_Transcripts {
320 my $self=shift;
321 return ($self->{'features'}->{'Transcripts'});
322 }
323 sub get_Translations {
324 my $self=shift;
325 return ($self->{'features'}->{'Translations'});
326 }
327 sub get_Prim_Transcripts {
328 my $self=shift;
329 return ($self->{'features'}->{'Prim_Transcripts'});
330 }
331 sub get_Repeat_Units {
332 my $self=shift;
333 return ($self->{'features'}->{'Repeat_Units'});
334 }
335 sub get_Repeat_Regions {
336 my $self=shift;
337 return ($self->{'features'}->{'Repeat_Regions'});
338 }
339 sub get_Introns {
340 my $self=shift;
341 return ($self->{'features'}->{'Introns'});
342 }
343 sub get_Exons {
344 my $self=shift;
345 return ($self->{'features'}->{'Exons'});
346 }
347 sub featuresnum {
348 my $self=shift;
349 return ($self->{'multiplicity'});
350 }
351 sub upbound {
352 my $self=shift;
353 return ($self->{'upbound'});
354 }
355 sub downbound {
356 my $self=shift;
357 return ($self->{'downbound'});
358 }
359 sub printfeaturesnum {
360 my $self=shift;
361 my ($key,$value);
362 my %hash=%{$self->featuresnum};
363 foreach $key (keys(%hash)) {
364 $value=$hash{$key};
365 print "\t$key => $value\n";
366 }
367 }
368 sub maxtranscript {
369 my $self=shift;
370 return ($self->{'maxtranscript'});
371 }
372
373 sub delete_Obj {
374 my $self = shift;
375 my @values= values %{$self};
376 my @keys= keys %{$self};
377
378 foreach my $key ( @keys ) {
379 delete $self->{$key};
380 }
381 foreach my $value ( @values ) {
382 if (index(ref($value),"LiveSeq") != -1) { # object case
383 eval {
384 # delete $self->{$value};
385 $value->delete_Obj;
386 };
387 } elsif (index(ref($value),"ARRAY") != -1) { # array case
388 my @array=@{$value};
389 my $element;
390 foreach $element (@array) {
391 eval {
392 $element->delete_Obj;
393 };
394 }
395 } elsif (index(ref($value),"HASH") != -1) { # object case
396 my %hash=%{$value};
397 my $element;
398 foreach $element (%hash) {
399 eval {
400 $element->delete_Obj;
401 };
402 }
403 }
404 }
405 return(1);
406 }
407
408
409 =head2 verbose
410
411 Title : verbose
412 Usage : $self->verbose(0)
413 Function: Sets verbose level for how ->warn behaves
414 -1 = silent: no warning
415 0 = reduced: minimal warnings
416 1 = default: all warnings
417 2 = extended: all warnings + stack trace dump
418 3 = paranoid: a warning becomes a throw and the program dies
419
420 Note: a quick way to set all LiveSeq objects at the same verbosity
421 level is to change the DNA level object, since they all look to
422 that one if their verbosity_level attribute is not set.
423 But the method offers fine tuning possibility by changing the
424 verbose level of each object in a different way.
425
426 So for example, after $loader= and $gene= have been retrieved
427 by a program, the command $gene->verbose(0); would
428 set the default verbosity level to 0 for all objects.
429
430 Returns : the current verbosity level
431 Args : -1,0,1,2 or 3
432
433 =cut
434
435
436 sub verbose {
437 my $self=shift;
438 my $value = shift;
439 return $self->{'features'}->{'DNA'}->verbose($value);
440 }
441
442 sub warn {
443 my $self=shift;
444 my $value = shift;
445 return $self->{'features'}->{'DNA'}->warn($value);
446 }
447
448
449
450 1;