Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/Gene.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: Gene.pm,v 1.13 2001/06/18 08:27:53 heikki Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::LiveSeq::Gene | |
| 4 # | |
| 5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
| 6 # | |
| 7 # Copyright Joseph Insana | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::LiveSeq::Gene - Range abstract class for LiveSeq | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 # documentation needed | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This is used as storage for all object references concerning a particular gene. | |
| 24 | |
| 25 =head1 AUTHOR - Joseph A.L. Insana | |
| 26 | |
| 27 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
| 28 | |
| 29 Address: | |
| 30 | |
| 31 EMBL Outstation, European Bioinformatics Institute | |
| 32 Wellcome Trust Genome Campus, Hinxton | |
| 33 Cambs. CB10 1SD, United Kingdom | |
| 34 | |
| 35 =head1 APPENDIX | |
| 36 | |
| 37 The rest of the documentation details each of the object | |
| 38 methods. Internal methods are usually preceded with a _ | |
| 39 | |
| 40 =cut | |
| 41 | |
| 42 # Let the code begin... | |
| 43 | |
| 44 package Bio::LiveSeq::Gene; | |
| 45 $VERSION=2.3; | |
| 46 | |
| 47 # Version history: | |
| 48 # Tue Apr 4 15:22:41 BST 2000 v 1.0 begun | |
| 49 # Tue Apr 4 16:19:27 BST 2000 v 1.1 completed new() | |
| 50 # Tue Apr 4 19:15:41 BST 2000 v 1.2 tested. Working. Simple methods created. | |
| 51 # Wed Apr 5 01:26:58 BST 2000 v 1.21 multiplicity, featuresnum() created | |
| 52 # Wed Apr 5 02:16:01 BST 2000 v 1.22 added upbound and downbound attributes | |
| 53 # Fri Apr 7 02:03:39 BST 2000 v 1.3 added printfeaturesnum and _set_Gene_in_all | |
| 54 # Fri Apr 7 02:53:05 BST 2000 v 2.0 added maxtranscript object creation | |
| 55 # Wed Jun 28 18:38:45 BST 2000 v 2.1 chaged croak to carp + return(-1) | |
| 56 # Wed Jul 12 15:19:26 BST 2000 v 2.11 ->strand call protected by if(ref(transcript)) | |
| 57 # Tue Jan 30 14:15:42 EST 2001 v 2.2 delete_Obj added, to flush circular references | |
| 58 # Wed Apr 4 13:29:59 BST 2001 v 2.3 LiveSeq-wide warn and verbose added | |
| 59 | |
| 60 use strict; | |
| 61 use Carp; | |
| 62 use vars qw($VERSION @ISA); | |
| 63 use Bio::LiveSeq::Prim_Transcript 1.0; # needed to create maxtranscript obj | |
| 64 | |
| 65 #use Bio::LiveSeq::SeqI 2.11; # uses SeqI, inherits from it | |
| 66 #@ISA=qw(Bio::LiveSeq::SeqI); | |
| 67 | |
| 68 =head2 new | |
| 69 | |
| 70 Title : new | |
| 71 Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name", | |
| 72 -features => $hashref | |
| 73 -upbound => $min | |
| 74 -downbound => $max); | |
| 75 | |
| 76 Function: generates a new Bio::LiveSeq::Gene | |
| 77 Returns : reference to a new object of class Gene | |
| 78 Errorcode -1 | |
| 79 Args : one string and one hashreference containing all features defined | |
| 80 for the Gene and the references to the LiveSeq objects for those | |
| 81 features. | |
| 82 Two labels for defining boundaries of the gene. Usually the | |
| 83 boundaries will reflect max span of transcript, exon... features, | |
| 84 while the DNA sequence will be created with some flanking regions | |
| 85 (e.g. with the EMBL_SRS::gene2liveseq routine). | |
| 86 If these two labels are not given, they will default to the start | |
| 87 and end of the DNA object. | |
| 88 Note : the format of the hash has to be like | |
| 89 DNA => reference to LiveSeq::DNA object | |
| 90 Transcripts => reference to array of transcripts objrefs | |
| 91 Transclations => reference to array of transcripts objrefs | |
| 92 Exons => .... | |
| 93 Introns => .... | |
| 94 Prim_Transcripts => .... | |
| 95 Repeat_Units => .... | |
| 96 Repeat_Regions => .... | |
| 97 Only DNA and Transcripts are mandatory | |
| 98 | |
| 99 =cut | |
| 100 | |
| 101 sub new { | |
| 102 my ($thing, %args) = @_; | |
| 103 my $class = ref($thing) || $thing; | |
| 104 my ($i,$self,%gene); | |
| 105 | |
| 106 my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound}); | |
| 107 | |
| 108 unless (ref($inputfeatures) eq "HASH") { | |
| 109 carp "$class not initialised because features hash not given"; | |
| 110 return (-1); | |
| 111 } | |
| 112 | |
| 113 my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not | |
| 114 my $features=\%features; # the ones input'ed, that could get destroyed | |
| 115 | |
| 116 my $DNA=$features->{'DNA'}; | |
| 117 unless (ref($DNA) eq "Bio::LiveSeq::DNA") { | |
| 118 carp "$class not initialised because DNA feature not found"; | |
| 119 return (-1); | |
| 120 } | |
| 121 | |
| 122 my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features | |
| 123 | |
| 124 my ($start,$end); | |
| 125 | |
| 126 my @Transcripts=@{$features->{'Transcripts'}}; | |
| 127 | |
| 128 my $strand; | |
| 129 unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") { | |
| 130 $self->warn("$class not initialised: first Transcript not a LiveSeq object"); | |
| 131 return (-1); | |
| 132 } else { | |
| 133 $strand=$Transcripts[0]->strand; # for maxtranscript consistency check | |
| 134 } | |
| 135 | |
| 136 for $i (@Transcripts) { | |
| 137 ($start,$end)=($i->start,$i->end); | |
| 138 unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
| 139 $self->warn("$class not initialised because of problems in Transcripts feature"); | |
| 140 return (-1); | |
| 141 } else { | |
| 142 } | |
| 143 unless($minstart) { $minstart=$start; } # initialize | |
| 144 unless($maxend) { $maxend=$end; } # initialize | |
| 145 if ($i->strand != $strand) { | |
| 146 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); | |
| 147 return (-1); | |
| 148 } | |
| 149 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } | |
| 150 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } | |
| 151 } | |
| 152 my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions; | |
| 153 my @Prim_Transcripts; my @Exons; | |
| 154 if (defined($features->{'Translations'})) { | |
| 155 @Translations=@{$features->{'Translations'}}; } | |
| 156 if (defined($features->{'Exons'})) { | |
| 157 @Exons=@{$features->{'Exons'}}; } | |
| 158 if (defined($features->{'Introns'})) { | |
| 159 @Introns=@{$features->{'Introns'}}; } | |
| 160 if (defined($features->{'Repeat_Units'})) { | |
| 161 @Repeat_Units=@{$features->{'Repeat_Units'}}; } | |
| 162 if (defined($features->{'Repeat_Regions'})) { | |
| 163 @Repeat_Regions=@{$features->{'Repeat_Regions'}}; } | |
| 164 if (defined($features->{'Prim_Transcripts'})) { | |
| 165 @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; } | |
| 166 | |
| 167 | |
| 168 if (@Translations) { | |
| 169 for $i (@Translations) { | |
| 170 ($start,$end)=($i->start,$i->end); | |
| 171 unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
| 172 $self->warn("$class not initialised because of problems in Translations feature"); | |
| 173 return (-1); | |
| 174 } | |
| 175 } | |
| 176 } | |
| 177 if (@Exons) { | |
| 178 for $i (@Exons) { | |
| 179 ($start,$end)=($i->start,$i->end); | |
| 180 unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
| 181 $self->warn("$class not initialised because of problems in Exons feature"); | |
| 182 return (-1); | |
| 183 } | |
| 184 if ($i->strand != $strand) { | |
| 185 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); | |
| 186 return (-1); | |
| 187 } | |
| 188 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } | |
| 189 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } | |
| 190 } | |
| 191 } | |
| 192 if (@Introns) { | |
| 193 for $i (@Introns) { | |
| 194 ($start,$end)=($i->start,$i->end); | |
| 195 unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
| 196 $self->warn("$class not initialised because of problems in Introns feature"); | |
| 197 return (-1); | |
| 198 } | |
| 199 } | |
| 200 } | |
| 201 if (@Repeat_Units) { | |
| 202 for $i (@Repeat_Units) { | |
| 203 ($start,$end)=($i->start,$i->end); | |
| 204 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
| 205 $self->warn("$class not initialised because of problems in Repeat_Units feature"); | |
| 206 return (-1); | |
| 207 } | |
| 208 } | |
| 209 } | |
| 210 if (@Repeat_Regions) { | |
| 211 for $i (@Repeat_Regions) { | |
| 212 ($start,$end)=($i->start,$i->end); | |
| 213 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
| 214 $self->warn("$class not initialised because of problems in Repeat_Regions feature"); | |
| 215 return (-1); | |
| 216 } | |
| 217 } | |
| 218 } | |
| 219 if (@Prim_Transcripts) { | |
| 220 for $i (@Prim_Transcripts) { | |
| 221 ($start,$end)=($i->start,$i->end); | |
| 222 unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) { | |
| 223 $self->warn("$class not initialised because of problems in Prim_Transcripts feature"); | |
| 224 return (-1); | |
| 225 } | |
| 226 if ($i->strand != $strand) { | |
| 227 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); | |
| 228 return (-1); | |
| 229 } | |
| 230 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } | |
| 231 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } | |
| 232 } | |
| 233 } | |
| 234 | |
| 235 # create an array containing all obj references for all Gene Features | |
| 236 # useful for _set_Gene_in_all | |
| 237 my @allfeatures; | |
| 238 push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts); | |
| 239 | |
| 240 # create hash holding numbers for Gene Features | |
| 241 my %multiplicity; | |
| 242 my $key; my @array; | |
| 243 foreach $key (keys(%features)) { | |
| 244 unless ($key eq "DNA") { | |
| 245 @array=@{$features{$key}}; | |
| 246 $multiplicity{$key}=scalar(@array); | |
| 247 } | |
| 248 } | |
| 249 $multiplicity{DNA}=1; | |
| 250 | |
| 251 # create maxtranscript object. It's a Prim_Transcript with start as the | |
| 252 # minimum start and end as the maximum end. | |
| 253 # usually these start and end will be the same as the gene->upbound and | |
| 254 # gene->downbound, but maybe there could be cases when this will be false | |
| 255 # (e.g. with repeat_units just before the prim_transcript or first exon, | |
| 256 # but still labelled with the same /gene qualifier) | |
| 257 | |
| 258 my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA); | |
| 259 | |
| 260 | |
| 261 # check the upbound downbound parameters | |
| 262 if (defined($upbound)) { | |
| 263 unless ($DNA->valid($upbound)) { | |
| 264 $self->warn("$class not initialised because upbound label not valid"); | |
| 265 return (-1); | |
| 266 } | |
| 267 } else { | |
| 268 $upbound=$DNA->start; | |
| 269 } | |
| 270 if (defined($downbound)) { | |
| 271 unless ($DNA->valid($downbound)) { | |
| 272 $self->warn("$class not initialised because downbound label not valid"); | |
| 273 return (-1); | |
| 274 } | |
| 275 } else { | |
| 276 $downbound=$DNA->end; | |
| 277 } | |
| 278 | |
| 279 %gene = (name => $name, features => $features,multiplicity => \%multiplicity, | |
| 280 upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript); | |
| 281 $self = \%gene; | |
| 282 $self = bless $self, $class; | |
| 283 _set_Gene_in_all($self,@allfeatures); | |
| 284 return $self; | |
| 285 } | |
| 286 | |
| 287 # this sets the "gene" objref in all the objects "belonging" to the Gene, | |
| 288 # i.e. in all its Features. | |
| 289 sub _set_Gene_in_all { | |
| 290 my $Gene=shift; | |
| 291 my $self; | |
| 292 foreach $self (@_) { | |
| 293 $self->gene($Gene); | |
| 294 } | |
| 295 } | |
| 296 | |
| 297 # you can get or set the name of the gene | |
| 298 sub name { | |
| 299 my ($self,$value) = @_; | |
| 300 if (defined $value) { | |
| 301 $self->{'name'} = $value; | |
| 302 } | |
| 303 unless (exists $self->{'name'}) { | |
| 304 return "unknown"; | |
| 305 } else { | |
| 306 return $self->{'name'}; | |
| 307 } | |
| 308 } | |
| 309 | |
| 310 # gets the features hash | |
| 311 sub features { | |
| 312 my $self=shift; | |
| 313 return ($self->{'features'}); | |
| 314 } | |
| 315 sub get_DNA { | |
| 316 my $self=shift; | |
| 317 return ($self->{'features'}->{'DNA'}); | |
| 318 } | |
| 319 sub get_Transcripts { | |
| 320 my $self=shift; | |
| 321 return ($self->{'features'}->{'Transcripts'}); | |
| 322 } | |
| 323 sub get_Translations { | |
| 324 my $self=shift; | |
| 325 return ($self->{'features'}->{'Translations'}); | |
| 326 } | |
| 327 sub get_Prim_Transcripts { | |
| 328 my $self=shift; | |
| 329 return ($self->{'features'}->{'Prim_Transcripts'}); | |
| 330 } | |
| 331 sub get_Repeat_Units { | |
| 332 my $self=shift; | |
| 333 return ($self->{'features'}->{'Repeat_Units'}); | |
| 334 } | |
| 335 sub get_Repeat_Regions { | |
| 336 my $self=shift; | |
| 337 return ($self->{'features'}->{'Repeat_Regions'}); | |
| 338 } | |
| 339 sub get_Introns { | |
| 340 my $self=shift; | |
| 341 return ($self->{'features'}->{'Introns'}); | |
| 342 } | |
| 343 sub get_Exons { | |
| 344 my $self=shift; | |
| 345 return ($self->{'features'}->{'Exons'}); | |
| 346 } | |
| 347 sub featuresnum { | |
| 348 my $self=shift; | |
| 349 return ($self->{'multiplicity'}); | |
| 350 } | |
| 351 sub upbound { | |
| 352 my $self=shift; | |
| 353 return ($self->{'upbound'}); | |
| 354 } | |
| 355 sub downbound { | |
| 356 my $self=shift; | |
| 357 return ($self->{'downbound'}); | |
| 358 } | |
| 359 sub printfeaturesnum { | |
| 360 my $self=shift; | |
| 361 my ($key,$value); | |
| 362 my %hash=%{$self->featuresnum}; | |
| 363 foreach $key (keys(%hash)) { | |
| 364 $value=$hash{$key}; | |
| 365 print "\t$key => $value\n"; | |
| 366 } | |
| 367 } | |
| 368 sub maxtranscript { | |
| 369 my $self=shift; | |
| 370 return ($self->{'maxtranscript'}); | |
| 371 } | |
| 372 | |
| 373 sub delete_Obj { | |
| 374 my $self = shift; | |
| 375 my @values= values %{$self}; | |
| 376 my @keys= keys %{$self}; | |
| 377 | |
| 378 foreach my $key ( @keys ) { | |
| 379 delete $self->{$key}; | |
| 380 } | |
| 381 foreach my $value ( @values ) { | |
| 382 if (index(ref($value),"LiveSeq") != -1) { # object case | |
| 383 eval { | |
| 384 # delete $self->{$value}; | |
| 385 $value->delete_Obj; | |
| 386 }; | |
| 387 } elsif (index(ref($value),"ARRAY") != -1) { # array case | |
| 388 my @array=@{$value}; | |
| 389 my $element; | |
| 390 foreach $element (@array) { | |
| 391 eval { | |
| 392 $element->delete_Obj; | |
| 393 }; | |
| 394 } | |
| 395 } elsif (index(ref($value),"HASH") != -1) { # object case | |
| 396 my %hash=%{$value}; | |
| 397 my $element; | |
| 398 foreach $element (%hash) { | |
| 399 eval { | |
| 400 $element->delete_Obj; | |
| 401 }; | |
| 402 } | |
| 403 } | |
| 404 } | |
| 405 return(1); | |
| 406 } | |
| 407 | |
| 408 | |
| 409 =head2 verbose | |
| 410 | |
| 411 Title : verbose | |
| 412 Usage : $self->verbose(0) | |
| 413 Function: Sets verbose level for how ->warn behaves | |
| 414 -1 = silent: no warning | |
| 415 0 = reduced: minimal warnings | |
| 416 1 = default: all warnings | |
| 417 2 = extended: all warnings + stack trace dump | |
| 418 3 = paranoid: a warning becomes a throw and the program dies | |
| 419 | |
| 420 Note: a quick way to set all LiveSeq objects at the same verbosity | |
| 421 level is to change the DNA level object, since they all look to | |
| 422 that one if their verbosity_level attribute is not set. | |
| 423 But the method offers fine tuning possibility by changing the | |
| 424 verbose level of each object in a different way. | |
| 425 | |
| 426 So for example, after $loader= and $gene= have been retrieved | |
| 427 by a program, the command $gene->verbose(0); would | |
| 428 set the default verbosity level to 0 for all objects. | |
| 429 | |
| 430 Returns : the current verbosity level | |
| 431 Args : -1,0,1,2 or 3 | |
| 432 | |
| 433 =cut | |
| 434 | |
| 435 | |
| 436 sub verbose { | |
| 437 my $self=shift; | |
| 438 my $value = shift; | |
| 439 return $self->{'features'}->{'DNA'}->verbose($value); | |
| 440 } | |
| 441 | |
| 442 sub warn { | |
| 443 my $self=shift; | |
| 444 my $value = shift; | |
| 445 return $self->{'features'}->{'DNA'}->warn($value); | |
| 446 } | |
| 447 | |
| 448 | |
| 449 | |
| 450 1; |
