0
|
1 # $Id: Gene.pm,v 1.13 2001/06/18 08:27:53 heikki Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::LiveSeq::Gene
|
|
4 #
|
|
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
|
|
6 #
|
|
7 # Copyright Joseph Insana
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::LiveSeq::Gene - Range abstract class for LiveSeq
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # documentation needed
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 This is used as storage for all object references concerning a particular gene.
|
|
24
|
|
25 =head1 AUTHOR - Joseph A.L. Insana
|
|
26
|
|
27 Email: Insana@ebi.ac.uk, jinsana@gmx.net
|
|
28
|
|
29 Address:
|
|
30
|
|
31 EMBL Outstation, European Bioinformatics Institute
|
|
32 Wellcome Trust Genome Campus, Hinxton
|
|
33 Cambs. CB10 1SD, United Kingdom
|
|
34
|
|
35 =head1 APPENDIX
|
|
36
|
|
37 The rest of the documentation details each of the object
|
|
38 methods. Internal methods are usually preceded with a _
|
|
39
|
|
40 =cut
|
|
41
|
|
42 # Let the code begin...
|
|
43
|
|
44 package Bio::LiveSeq::Gene;
|
|
45 $VERSION=2.3;
|
|
46
|
|
47 # Version history:
|
|
48 # Tue Apr 4 15:22:41 BST 2000 v 1.0 begun
|
|
49 # Tue Apr 4 16:19:27 BST 2000 v 1.1 completed new()
|
|
50 # Tue Apr 4 19:15:41 BST 2000 v 1.2 tested. Working. Simple methods created.
|
|
51 # Wed Apr 5 01:26:58 BST 2000 v 1.21 multiplicity, featuresnum() created
|
|
52 # Wed Apr 5 02:16:01 BST 2000 v 1.22 added upbound and downbound attributes
|
|
53 # Fri Apr 7 02:03:39 BST 2000 v 1.3 added printfeaturesnum and _set_Gene_in_all
|
|
54 # Fri Apr 7 02:53:05 BST 2000 v 2.0 added maxtranscript object creation
|
|
55 # Wed Jun 28 18:38:45 BST 2000 v 2.1 chaged croak to carp + return(-1)
|
|
56 # Wed Jul 12 15:19:26 BST 2000 v 2.11 ->strand call protected by if(ref(transcript))
|
|
57 # Tue Jan 30 14:15:42 EST 2001 v 2.2 delete_Obj added, to flush circular references
|
|
58 # Wed Apr 4 13:29:59 BST 2001 v 2.3 LiveSeq-wide warn and verbose added
|
|
59
|
|
60 use strict;
|
|
61 use Carp;
|
|
62 use vars qw($VERSION @ISA);
|
|
63 use Bio::LiveSeq::Prim_Transcript 1.0; # needed to create maxtranscript obj
|
|
64
|
|
65 #use Bio::LiveSeq::SeqI 2.11; # uses SeqI, inherits from it
|
|
66 #@ISA=qw(Bio::LiveSeq::SeqI);
|
|
67
|
|
68 =head2 new
|
|
69
|
|
70 Title : new
|
|
71 Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name",
|
|
72 -features => $hashref
|
|
73 -upbound => $min
|
|
74 -downbound => $max);
|
|
75
|
|
76 Function: generates a new Bio::LiveSeq::Gene
|
|
77 Returns : reference to a new object of class Gene
|
|
78 Errorcode -1
|
|
79 Args : one string and one hashreference containing all features defined
|
|
80 for the Gene and the references to the LiveSeq objects for those
|
|
81 features.
|
|
82 Two labels for defining boundaries of the gene. Usually the
|
|
83 boundaries will reflect max span of transcript, exon... features,
|
|
84 while the DNA sequence will be created with some flanking regions
|
|
85 (e.g. with the EMBL_SRS::gene2liveseq routine).
|
|
86 If these two labels are not given, they will default to the start
|
|
87 and end of the DNA object.
|
|
88 Note : the format of the hash has to be like
|
|
89 DNA => reference to LiveSeq::DNA object
|
|
90 Transcripts => reference to array of transcripts objrefs
|
|
91 Transclations => reference to array of transcripts objrefs
|
|
92 Exons => ....
|
|
93 Introns => ....
|
|
94 Prim_Transcripts => ....
|
|
95 Repeat_Units => ....
|
|
96 Repeat_Regions => ....
|
|
97 Only DNA and Transcripts are mandatory
|
|
98
|
|
99 =cut
|
|
100
|
|
101 sub new {
|
|
102 my ($thing, %args) = @_;
|
|
103 my $class = ref($thing) || $thing;
|
|
104 my ($i,$self,%gene);
|
|
105
|
|
106 my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound});
|
|
107
|
|
108 unless (ref($inputfeatures) eq "HASH") {
|
|
109 carp "$class not initialised because features hash not given";
|
|
110 return (-1);
|
|
111 }
|
|
112
|
|
113 my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not
|
|
114 my $features=\%features; # the ones input'ed, that could get destroyed
|
|
115
|
|
116 my $DNA=$features->{'DNA'};
|
|
117 unless (ref($DNA) eq "Bio::LiveSeq::DNA") {
|
|
118 carp "$class not initialised because DNA feature not found";
|
|
119 return (-1);
|
|
120 }
|
|
121
|
|
122 my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features
|
|
123
|
|
124 my ($start,$end);
|
|
125
|
|
126 my @Transcripts=@{$features->{'Transcripts'}};
|
|
127
|
|
128 my $strand;
|
|
129 unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") {
|
|
130 $self->warn("$class not initialised: first Transcript not a LiveSeq object");
|
|
131 return (-1);
|
|
132 } else {
|
|
133 $strand=$Transcripts[0]->strand; # for maxtranscript consistency check
|
|
134 }
|
|
135
|
|
136 for $i (@Transcripts) {
|
|
137 ($start,$end)=($i->start,$i->end);
|
|
138 unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
|
|
139 $self->warn("$class not initialised because of problems in Transcripts feature");
|
|
140 return (-1);
|
|
141 } else {
|
|
142 }
|
|
143 unless($minstart) { $minstart=$start; } # initialize
|
|
144 unless($maxend) { $maxend=$end; } # initialize
|
|
145 if ($i->strand != $strand) {
|
|
146 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
|
|
147 return (-1);
|
|
148 }
|
|
149 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
|
|
150 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
|
|
151 }
|
|
152 my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions;
|
|
153 my @Prim_Transcripts; my @Exons;
|
|
154 if (defined($features->{'Translations'})) {
|
|
155 @Translations=@{$features->{'Translations'}}; }
|
|
156 if (defined($features->{'Exons'})) {
|
|
157 @Exons=@{$features->{'Exons'}}; }
|
|
158 if (defined($features->{'Introns'})) {
|
|
159 @Introns=@{$features->{'Introns'}}; }
|
|
160 if (defined($features->{'Repeat_Units'})) {
|
|
161 @Repeat_Units=@{$features->{'Repeat_Units'}}; }
|
|
162 if (defined($features->{'Repeat_Regions'})) {
|
|
163 @Repeat_Regions=@{$features->{'Repeat_Regions'}}; }
|
|
164 if (defined($features->{'Prim_Transcripts'})) {
|
|
165 @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; }
|
|
166
|
|
167
|
|
168 if (@Translations) {
|
|
169 for $i (@Translations) {
|
|
170 ($start,$end)=($i->start,$i->end);
|
|
171 unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) {
|
|
172 $self->warn("$class not initialised because of problems in Translations feature");
|
|
173 return (-1);
|
|
174 }
|
|
175 }
|
|
176 }
|
|
177 if (@Exons) {
|
|
178 for $i (@Exons) {
|
|
179 ($start,$end)=($i->start,$i->end);
|
|
180 unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) {
|
|
181 $self->warn("$class not initialised because of problems in Exons feature");
|
|
182 return (-1);
|
|
183 }
|
|
184 if ($i->strand != $strand) {
|
|
185 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
|
|
186 return (-1);
|
|
187 }
|
|
188 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
|
|
189 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
|
|
190 }
|
|
191 }
|
|
192 if (@Introns) {
|
|
193 for $i (@Introns) {
|
|
194 ($start,$end)=($i->start,$i->end);
|
|
195 unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) {
|
|
196 $self->warn("$class not initialised because of problems in Introns feature");
|
|
197 return (-1);
|
|
198 }
|
|
199 }
|
|
200 }
|
|
201 if (@Repeat_Units) {
|
|
202 for $i (@Repeat_Units) {
|
|
203 ($start,$end)=($i->start,$i->end);
|
|
204 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) {
|
|
205 $self->warn("$class not initialised because of problems in Repeat_Units feature");
|
|
206 return (-1);
|
|
207 }
|
|
208 }
|
|
209 }
|
|
210 if (@Repeat_Regions) {
|
|
211 for $i (@Repeat_Regions) {
|
|
212 ($start,$end)=($i->start,$i->end);
|
|
213 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) {
|
|
214 $self->warn("$class not initialised because of problems in Repeat_Regions feature");
|
|
215 return (-1);
|
|
216 }
|
|
217 }
|
|
218 }
|
|
219 if (@Prim_Transcripts) {
|
|
220 for $i (@Prim_Transcripts) {
|
|
221 ($start,$end)=($i->start,$i->end);
|
|
222 unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
|
|
223 $self->warn("$class not initialised because of problems in Prim_Transcripts feature");
|
|
224 return (-1);
|
|
225 }
|
|
226 if ($i->strand != $strand) {
|
|
227 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
|
|
228 return (-1);
|
|
229 }
|
|
230 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
|
|
231 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
|
|
232 }
|
|
233 }
|
|
234
|
|
235 # create an array containing all obj references for all Gene Features
|
|
236 # useful for _set_Gene_in_all
|
|
237 my @allfeatures;
|
|
238 push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts);
|
|
239
|
|
240 # create hash holding numbers for Gene Features
|
|
241 my %multiplicity;
|
|
242 my $key; my @array;
|
|
243 foreach $key (keys(%features)) {
|
|
244 unless ($key eq "DNA") {
|
|
245 @array=@{$features{$key}};
|
|
246 $multiplicity{$key}=scalar(@array);
|
|
247 }
|
|
248 }
|
|
249 $multiplicity{DNA}=1;
|
|
250
|
|
251 # create maxtranscript object. It's a Prim_Transcript with start as the
|
|
252 # minimum start and end as the maximum end.
|
|
253 # usually these start and end will be the same as the gene->upbound and
|
|
254 # gene->downbound, but maybe there could be cases when this will be false
|
|
255 # (e.g. with repeat_units just before the prim_transcript or first exon,
|
|
256 # but still labelled with the same /gene qualifier)
|
|
257
|
|
258 my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA);
|
|
259
|
|
260
|
|
261 # check the upbound downbound parameters
|
|
262 if (defined($upbound)) {
|
|
263 unless ($DNA->valid($upbound)) {
|
|
264 $self->warn("$class not initialised because upbound label not valid");
|
|
265 return (-1);
|
|
266 }
|
|
267 } else {
|
|
268 $upbound=$DNA->start;
|
|
269 }
|
|
270 if (defined($downbound)) {
|
|
271 unless ($DNA->valid($downbound)) {
|
|
272 $self->warn("$class not initialised because downbound label not valid");
|
|
273 return (-1);
|
|
274 }
|
|
275 } else {
|
|
276 $downbound=$DNA->end;
|
|
277 }
|
|
278
|
|
279 %gene = (name => $name, features => $features,multiplicity => \%multiplicity,
|
|
280 upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript);
|
|
281 $self = \%gene;
|
|
282 $self = bless $self, $class;
|
|
283 _set_Gene_in_all($self,@allfeatures);
|
|
284 return $self;
|
|
285 }
|
|
286
|
|
287 # this sets the "gene" objref in all the objects "belonging" to the Gene,
|
|
288 # i.e. in all its Features.
|
|
289 sub _set_Gene_in_all {
|
|
290 my $Gene=shift;
|
|
291 my $self;
|
|
292 foreach $self (@_) {
|
|
293 $self->gene($Gene);
|
|
294 }
|
|
295 }
|
|
296
|
|
297 # you can get or set the name of the gene
|
|
298 sub name {
|
|
299 my ($self,$value) = @_;
|
|
300 if (defined $value) {
|
|
301 $self->{'name'} = $value;
|
|
302 }
|
|
303 unless (exists $self->{'name'}) {
|
|
304 return "unknown";
|
|
305 } else {
|
|
306 return $self->{'name'};
|
|
307 }
|
|
308 }
|
|
309
|
|
310 # gets the features hash
|
|
311 sub features {
|
|
312 my $self=shift;
|
|
313 return ($self->{'features'});
|
|
314 }
|
|
315 sub get_DNA {
|
|
316 my $self=shift;
|
|
317 return ($self->{'features'}->{'DNA'});
|
|
318 }
|
|
319 sub get_Transcripts {
|
|
320 my $self=shift;
|
|
321 return ($self->{'features'}->{'Transcripts'});
|
|
322 }
|
|
323 sub get_Translations {
|
|
324 my $self=shift;
|
|
325 return ($self->{'features'}->{'Translations'});
|
|
326 }
|
|
327 sub get_Prim_Transcripts {
|
|
328 my $self=shift;
|
|
329 return ($self->{'features'}->{'Prim_Transcripts'});
|
|
330 }
|
|
331 sub get_Repeat_Units {
|
|
332 my $self=shift;
|
|
333 return ($self->{'features'}->{'Repeat_Units'});
|
|
334 }
|
|
335 sub get_Repeat_Regions {
|
|
336 my $self=shift;
|
|
337 return ($self->{'features'}->{'Repeat_Regions'});
|
|
338 }
|
|
339 sub get_Introns {
|
|
340 my $self=shift;
|
|
341 return ($self->{'features'}->{'Introns'});
|
|
342 }
|
|
343 sub get_Exons {
|
|
344 my $self=shift;
|
|
345 return ($self->{'features'}->{'Exons'});
|
|
346 }
|
|
347 sub featuresnum {
|
|
348 my $self=shift;
|
|
349 return ($self->{'multiplicity'});
|
|
350 }
|
|
351 sub upbound {
|
|
352 my $self=shift;
|
|
353 return ($self->{'upbound'});
|
|
354 }
|
|
355 sub downbound {
|
|
356 my $self=shift;
|
|
357 return ($self->{'downbound'});
|
|
358 }
|
|
359 sub printfeaturesnum {
|
|
360 my $self=shift;
|
|
361 my ($key,$value);
|
|
362 my %hash=%{$self->featuresnum};
|
|
363 foreach $key (keys(%hash)) {
|
|
364 $value=$hash{$key};
|
|
365 print "\t$key => $value\n";
|
|
366 }
|
|
367 }
|
|
368 sub maxtranscript {
|
|
369 my $self=shift;
|
|
370 return ($self->{'maxtranscript'});
|
|
371 }
|
|
372
|
|
373 sub delete_Obj {
|
|
374 my $self = shift;
|
|
375 my @values= values %{$self};
|
|
376 my @keys= keys %{$self};
|
|
377
|
|
378 foreach my $key ( @keys ) {
|
|
379 delete $self->{$key};
|
|
380 }
|
|
381 foreach my $value ( @values ) {
|
|
382 if (index(ref($value),"LiveSeq") != -1) { # object case
|
|
383 eval {
|
|
384 # delete $self->{$value};
|
|
385 $value->delete_Obj;
|
|
386 };
|
|
387 } elsif (index(ref($value),"ARRAY") != -1) { # array case
|
|
388 my @array=@{$value};
|
|
389 my $element;
|
|
390 foreach $element (@array) {
|
|
391 eval {
|
|
392 $element->delete_Obj;
|
|
393 };
|
|
394 }
|
|
395 } elsif (index(ref($value),"HASH") != -1) { # object case
|
|
396 my %hash=%{$value};
|
|
397 my $element;
|
|
398 foreach $element (%hash) {
|
|
399 eval {
|
|
400 $element->delete_Obj;
|
|
401 };
|
|
402 }
|
|
403 }
|
|
404 }
|
|
405 return(1);
|
|
406 }
|
|
407
|
|
408
|
|
409 =head2 verbose
|
|
410
|
|
411 Title : verbose
|
|
412 Usage : $self->verbose(0)
|
|
413 Function: Sets verbose level for how ->warn behaves
|
|
414 -1 = silent: no warning
|
|
415 0 = reduced: minimal warnings
|
|
416 1 = default: all warnings
|
|
417 2 = extended: all warnings + stack trace dump
|
|
418 3 = paranoid: a warning becomes a throw and the program dies
|
|
419
|
|
420 Note: a quick way to set all LiveSeq objects at the same verbosity
|
|
421 level is to change the DNA level object, since they all look to
|
|
422 that one if their verbosity_level attribute is not set.
|
|
423 But the method offers fine tuning possibility by changing the
|
|
424 verbose level of each object in a different way.
|
|
425
|
|
426 So for example, after $loader= and $gene= have been retrieved
|
|
427 by a program, the command $gene->verbose(0); would
|
|
428 set the default verbosity level to 0 for all objects.
|
|
429
|
|
430 Returns : the current verbosity level
|
|
431 Args : -1,0,1,2 or 3
|
|
432
|
|
433 =cut
|
|
434
|
|
435
|
|
436 sub verbose {
|
|
437 my $self=shift;
|
|
438 my $value = shift;
|
|
439 return $self->{'features'}->{'DNA'}->verbose($value);
|
|
440 }
|
|
441
|
|
442 sub warn {
|
|
443 my $self=shift;
|
|
444 my $value = shift;
|
|
445 return $self->{'features'}->{'DNA'}->warn($value);
|
|
446 }
|
|
447
|
|
448
|
|
449
|
|
450 1;
|