Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/Gene.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Gene.pm,v 1.13 2001/06/18 08:27:53 heikki Exp $ | |
2 # | |
3 # bioperl module for Bio::LiveSeq::Gene | |
4 # | |
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
6 # | |
7 # Copyright Joseph Insana | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::LiveSeq::Gene - Range abstract class for LiveSeq | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # documentation needed | |
20 | |
21 =head1 DESCRIPTION | |
22 | |
23 This is used as storage for all object references concerning a particular gene. | |
24 | |
25 =head1 AUTHOR - Joseph A.L. Insana | |
26 | |
27 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
28 | |
29 Address: | |
30 | |
31 EMBL Outstation, European Bioinformatics Institute | |
32 Wellcome Trust Genome Campus, Hinxton | |
33 Cambs. CB10 1SD, United Kingdom | |
34 | |
35 =head1 APPENDIX | |
36 | |
37 The rest of the documentation details each of the object | |
38 methods. Internal methods are usually preceded with a _ | |
39 | |
40 =cut | |
41 | |
42 # Let the code begin... | |
43 | |
44 package Bio::LiveSeq::Gene; | |
45 $VERSION=2.3; | |
46 | |
47 # Version history: | |
48 # Tue Apr 4 15:22:41 BST 2000 v 1.0 begun | |
49 # Tue Apr 4 16:19:27 BST 2000 v 1.1 completed new() | |
50 # Tue Apr 4 19:15:41 BST 2000 v 1.2 tested. Working. Simple methods created. | |
51 # Wed Apr 5 01:26:58 BST 2000 v 1.21 multiplicity, featuresnum() created | |
52 # Wed Apr 5 02:16:01 BST 2000 v 1.22 added upbound and downbound attributes | |
53 # Fri Apr 7 02:03:39 BST 2000 v 1.3 added printfeaturesnum and _set_Gene_in_all | |
54 # Fri Apr 7 02:53:05 BST 2000 v 2.0 added maxtranscript object creation | |
55 # Wed Jun 28 18:38:45 BST 2000 v 2.1 chaged croak to carp + return(-1) | |
56 # Wed Jul 12 15:19:26 BST 2000 v 2.11 ->strand call protected by if(ref(transcript)) | |
57 # Tue Jan 30 14:15:42 EST 2001 v 2.2 delete_Obj added, to flush circular references | |
58 # Wed Apr 4 13:29:59 BST 2001 v 2.3 LiveSeq-wide warn and verbose added | |
59 | |
60 use strict; | |
61 use Carp; | |
62 use vars qw($VERSION @ISA); | |
63 use Bio::LiveSeq::Prim_Transcript 1.0; # needed to create maxtranscript obj | |
64 | |
65 #use Bio::LiveSeq::SeqI 2.11; # uses SeqI, inherits from it | |
66 #@ISA=qw(Bio::LiveSeq::SeqI); | |
67 | |
68 =head2 new | |
69 | |
70 Title : new | |
71 Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name", | |
72 -features => $hashref | |
73 -upbound => $min | |
74 -downbound => $max); | |
75 | |
76 Function: generates a new Bio::LiveSeq::Gene | |
77 Returns : reference to a new object of class Gene | |
78 Errorcode -1 | |
79 Args : one string and one hashreference containing all features defined | |
80 for the Gene and the references to the LiveSeq objects for those | |
81 features. | |
82 Two labels for defining boundaries of the gene. Usually the | |
83 boundaries will reflect max span of transcript, exon... features, | |
84 while the DNA sequence will be created with some flanking regions | |
85 (e.g. with the EMBL_SRS::gene2liveseq routine). | |
86 If these two labels are not given, they will default to the start | |
87 and end of the DNA object. | |
88 Note : the format of the hash has to be like | |
89 DNA => reference to LiveSeq::DNA object | |
90 Transcripts => reference to array of transcripts objrefs | |
91 Transclations => reference to array of transcripts objrefs | |
92 Exons => .... | |
93 Introns => .... | |
94 Prim_Transcripts => .... | |
95 Repeat_Units => .... | |
96 Repeat_Regions => .... | |
97 Only DNA and Transcripts are mandatory | |
98 | |
99 =cut | |
100 | |
101 sub new { | |
102 my ($thing, %args) = @_; | |
103 my $class = ref($thing) || $thing; | |
104 my ($i,$self,%gene); | |
105 | |
106 my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound}); | |
107 | |
108 unless (ref($inputfeatures) eq "HASH") { | |
109 carp "$class not initialised because features hash not given"; | |
110 return (-1); | |
111 } | |
112 | |
113 my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not | |
114 my $features=\%features; # the ones input'ed, that could get destroyed | |
115 | |
116 my $DNA=$features->{'DNA'}; | |
117 unless (ref($DNA) eq "Bio::LiveSeq::DNA") { | |
118 carp "$class not initialised because DNA feature not found"; | |
119 return (-1); | |
120 } | |
121 | |
122 my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features | |
123 | |
124 my ($start,$end); | |
125 | |
126 my @Transcripts=@{$features->{'Transcripts'}}; | |
127 | |
128 my $strand; | |
129 unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") { | |
130 $self->warn("$class not initialised: first Transcript not a LiveSeq object"); | |
131 return (-1); | |
132 } else { | |
133 $strand=$Transcripts[0]->strand; # for maxtranscript consistency check | |
134 } | |
135 | |
136 for $i (@Transcripts) { | |
137 ($start,$end)=($i->start,$i->end); | |
138 unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
139 $self->warn("$class not initialised because of problems in Transcripts feature"); | |
140 return (-1); | |
141 } else { | |
142 } | |
143 unless($minstart) { $minstart=$start; } # initialize | |
144 unless($maxend) { $maxend=$end; } # initialize | |
145 if ($i->strand != $strand) { | |
146 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); | |
147 return (-1); | |
148 } | |
149 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } | |
150 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } | |
151 } | |
152 my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions; | |
153 my @Prim_Transcripts; my @Exons; | |
154 if (defined($features->{'Translations'})) { | |
155 @Translations=@{$features->{'Translations'}}; } | |
156 if (defined($features->{'Exons'})) { | |
157 @Exons=@{$features->{'Exons'}}; } | |
158 if (defined($features->{'Introns'})) { | |
159 @Introns=@{$features->{'Introns'}}; } | |
160 if (defined($features->{'Repeat_Units'})) { | |
161 @Repeat_Units=@{$features->{'Repeat_Units'}}; } | |
162 if (defined($features->{'Repeat_Regions'})) { | |
163 @Repeat_Regions=@{$features->{'Repeat_Regions'}}; } | |
164 if (defined($features->{'Prim_Transcripts'})) { | |
165 @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; } | |
166 | |
167 | |
168 if (@Translations) { | |
169 for $i (@Translations) { | |
170 ($start,$end)=($i->start,$i->end); | |
171 unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
172 $self->warn("$class not initialised because of problems in Translations feature"); | |
173 return (-1); | |
174 } | |
175 } | |
176 } | |
177 if (@Exons) { | |
178 for $i (@Exons) { | |
179 ($start,$end)=($i->start,$i->end); | |
180 unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
181 $self->warn("$class not initialised because of problems in Exons feature"); | |
182 return (-1); | |
183 } | |
184 if ($i->strand != $strand) { | |
185 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); | |
186 return (-1); | |
187 } | |
188 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } | |
189 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } | |
190 } | |
191 } | |
192 if (@Introns) { | |
193 for $i (@Introns) { | |
194 ($start,$end)=($i->start,$i->end); | |
195 unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
196 $self->warn("$class not initialised because of problems in Introns feature"); | |
197 return (-1); | |
198 } | |
199 } | |
200 } | |
201 if (@Repeat_Units) { | |
202 for $i (@Repeat_Units) { | |
203 ($start,$end)=($i->start,$i->end); | |
204 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
205 $self->warn("$class not initialised because of problems in Repeat_Units feature"); | |
206 return (-1); | |
207 } | |
208 } | |
209 } | |
210 if (@Repeat_Regions) { | |
211 for $i (@Repeat_Regions) { | |
212 ($start,$end)=($i->start,$i->end); | |
213 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
214 $self->warn("$class not initialised because of problems in Repeat_Regions feature"); | |
215 return (-1); | |
216 } | |
217 } | |
218 } | |
219 if (@Prim_Transcripts) { | |
220 for $i (@Prim_Transcripts) { | |
221 ($start,$end)=($i->start,$i->end); | |
222 unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
223 $self->warn("$class not initialised because of problems in Prim_Transcripts feature"); | |
224 return (-1); | |
225 } | |
226 if ($i->strand != $strand) { | |
227 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); | |
228 return (-1); | |
229 } | |
230 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } | |
231 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } | |
232 } | |
233 } | |
234 | |
235 # create an array containing all obj references for all Gene Features | |
236 # useful for _set_Gene_in_all | |
237 my @allfeatures; | |
238 push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts); | |
239 | |
240 # create hash holding numbers for Gene Features | |
241 my %multiplicity; | |
242 my $key; my @array; | |
243 foreach $key (keys(%features)) { | |
244 unless ($key eq "DNA") { | |
245 @array=@{$features{$key}}; | |
246 $multiplicity{$key}=scalar(@array); | |
247 } | |
248 } | |
249 $multiplicity{DNA}=1; | |
250 | |
251 # create maxtranscript object. It's a Prim_Transcript with start as the | |
252 # minimum start and end as the maximum end. | |
253 # usually these start and end will be the same as the gene->upbound and | |
254 # gene->downbound, but maybe there could be cases when this will be false | |
255 # (e.g. with repeat_units just before the prim_transcript or first exon, | |
256 # but still labelled with the same /gene qualifier) | |
257 | |
258 my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA); | |
259 | |
260 | |
261 # check the upbound downbound parameters | |
262 if (defined($upbound)) { | |
263 unless ($DNA->valid($upbound)) { | |
264 $self->warn("$class not initialised because upbound label not valid"); | |
265 return (-1); | |
266 } | |
267 } else { | |
268 $upbound=$DNA->start; | |
269 } | |
270 if (defined($downbound)) { | |
271 unless ($DNA->valid($downbound)) { | |
272 $self->warn("$class not initialised because downbound label not valid"); | |
273 return (-1); | |
274 } | |
275 } else { | |
276 $downbound=$DNA->end; | |
277 } | |
278 | |
279 %gene = (name => $name, features => $features,multiplicity => \%multiplicity, | |
280 upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript); | |
281 $self = \%gene; | |
282 $self = bless $self, $class; | |
283 _set_Gene_in_all($self,@allfeatures); | |
284 return $self; | |
285 } | |
286 | |
287 # this sets the "gene" objref in all the objects "belonging" to the Gene, | |
288 # i.e. in all its Features. | |
289 sub _set_Gene_in_all { | |
290 my $Gene=shift; | |
291 my $self; | |
292 foreach $self (@_) { | |
293 $self->gene($Gene); | |
294 } | |
295 } | |
296 | |
297 # you can get or set the name of the gene | |
298 sub name { | |
299 my ($self,$value) = @_; | |
300 if (defined $value) { | |
301 $self->{'name'} = $value; | |
302 } | |
303 unless (exists $self->{'name'}) { | |
304 return "unknown"; | |
305 } else { | |
306 return $self->{'name'}; | |
307 } | |
308 } | |
309 | |
310 # gets the features hash | |
311 sub features { | |
312 my $self=shift; | |
313 return ($self->{'features'}); | |
314 } | |
315 sub get_DNA { | |
316 my $self=shift; | |
317 return ($self->{'features'}->{'DNA'}); | |
318 } | |
319 sub get_Transcripts { | |
320 my $self=shift; | |
321 return ($self->{'features'}->{'Transcripts'}); | |
322 } | |
323 sub get_Translations { | |
324 my $self=shift; | |
325 return ($self->{'features'}->{'Translations'}); | |
326 } | |
327 sub get_Prim_Transcripts { | |
328 my $self=shift; | |
329 return ($self->{'features'}->{'Prim_Transcripts'}); | |
330 } | |
331 sub get_Repeat_Units { | |
332 my $self=shift; | |
333 return ($self->{'features'}->{'Repeat_Units'}); | |
334 } | |
335 sub get_Repeat_Regions { | |
336 my $self=shift; | |
337 return ($self->{'features'}->{'Repeat_Regions'}); | |
338 } | |
339 sub get_Introns { | |
340 my $self=shift; | |
341 return ($self->{'features'}->{'Introns'}); | |
342 } | |
343 sub get_Exons { | |
344 my $self=shift; | |
345 return ($self->{'features'}->{'Exons'}); | |
346 } | |
347 sub featuresnum { | |
348 my $self=shift; | |
349 return ($self->{'multiplicity'}); | |
350 } | |
351 sub upbound { | |
352 my $self=shift; | |
353 return ($self->{'upbound'}); | |
354 } | |
355 sub downbound { | |
356 my $self=shift; | |
357 return ($self->{'downbound'}); | |
358 } | |
359 sub printfeaturesnum { | |
360 my $self=shift; | |
361 my ($key,$value); | |
362 my %hash=%{$self->featuresnum}; | |
363 foreach $key (keys(%hash)) { | |
364 $value=$hash{$key}; | |
365 print "\t$key => $value\n"; | |
366 } | |
367 } | |
368 sub maxtranscript { | |
369 my $self=shift; | |
370 return ($self->{'maxtranscript'}); | |
371 } | |
372 | |
373 sub delete_Obj { | |
374 my $self = shift; | |
375 my @values= values %{$self}; | |
376 my @keys= keys %{$self}; | |
377 | |
378 foreach my $key ( @keys ) { | |
379 delete $self->{$key}; | |
380 } | |
381 foreach my $value ( @values ) { | |
382 if (index(ref($value),"LiveSeq") != -1) { # object case | |
383 eval { | |
384 # delete $self->{$value}; | |
385 $value->delete_Obj; | |
386 }; | |
387 } elsif (index(ref($value),"ARRAY") != -1) { # array case | |
388 my @array=@{$value}; | |
389 my $element; | |
390 foreach $element (@array) { | |
391 eval { | |
392 $element->delete_Obj; | |
393 }; | |
394 } | |
395 } elsif (index(ref($value),"HASH") != -1) { # object case | |
396 my %hash=%{$value}; | |
397 my $element; | |
398 foreach $element (%hash) { | |
399 eval { | |
400 $element->delete_Obj; | |
401 }; | |
402 } | |
403 } | |
404 } | |
405 return(1); | |
406 } | |
407 | |
408 | |
409 =head2 verbose | |
410 | |
411 Title : verbose | |
412 Usage : $self->verbose(0) | |
413 Function: Sets verbose level for how ->warn behaves | |
414 -1 = silent: no warning | |
415 0 = reduced: minimal warnings | |
416 1 = default: all warnings | |
417 2 = extended: all warnings + stack trace dump | |
418 3 = paranoid: a warning becomes a throw and the program dies | |
419 | |
420 Note: a quick way to set all LiveSeq objects at the same verbosity | |
421 level is to change the DNA level object, since they all look to | |
422 that one if their verbosity_level attribute is not set. | |
423 But the method offers fine tuning possibility by changing the | |
424 verbose level of each object in a different way. | |
425 | |
426 So for example, after $loader= and $gene= have been retrieved | |
427 by a program, the command $gene->verbose(0); would | |
428 set the default verbosity level to 0 for all objects. | |
429 | |
430 Returns : the current verbosity level | |
431 Args : -1,0,1,2 or 3 | |
432 | |
433 =cut | |
434 | |
435 | |
436 sub verbose { | |
437 my $self=shift; | |
438 my $value = shift; | |
439 return $self->{'features'}->{'DNA'}->verbose($value); | |
440 } | |
441 | |
442 sub warn { | |
443 my $self=shift; | |
444 my $value = shift; | |
445 return $self->{'features'}->{'DNA'}->warn($value); | |
446 } | |
447 | |
448 | |
449 | |
450 1; |