Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/LiveSeq/Gene.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/LiveSeq/Gene.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,450 @@ +# $Id: Gene.pm,v 1.13 2001/06/18 08:27:53 heikki Exp $ +# +# bioperl module for Bio::LiveSeq::Gene +# +# Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> +# +# Copyright Joseph Insana +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code + +=head1 NAME + +Bio::LiveSeq::Gene - Range abstract class for LiveSeq + +=head1 SYNOPSIS + + # documentation needed + +=head1 DESCRIPTION + +This is used as storage for all object references concerning a particular gene. + +=head1 AUTHOR - Joseph A.L. Insana + +Email: Insana@ebi.ac.uk, jinsana@gmx.net + +Address: + + EMBL Outstation, European Bioinformatics Institute + Wellcome Trust Genome Campus, Hinxton + Cambs. CB10 1SD, United Kingdom + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::LiveSeq::Gene; +$VERSION=2.3; + +# Version history: +# Tue Apr 4 15:22:41 BST 2000 v 1.0 begun +# Tue Apr 4 16:19:27 BST 2000 v 1.1 completed new() +# Tue Apr 4 19:15:41 BST 2000 v 1.2 tested. Working. Simple methods created. +# Wed Apr 5 01:26:58 BST 2000 v 1.21 multiplicity, featuresnum() created +# Wed Apr 5 02:16:01 BST 2000 v 1.22 added upbound and downbound attributes +# Fri Apr 7 02:03:39 BST 2000 v 1.3 added printfeaturesnum and _set_Gene_in_all +# Fri Apr 7 02:53:05 BST 2000 v 2.0 added maxtranscript object creation +# Wed Jun 28 18:38:45 BST 2000 v 2.1 chaged croak to carp + return(-1) +# Wed Jul 12 15:19:26 BST 2000 v 2.11 ->strand call protected by if(ref(transcript)) +# Tue Jan 30 14:15:42 EST 2001 v 2.2 delete_Obj added, to flush circular references +# Wed Apr 4 13:29:59 BST 2001 v 2.3 LiveSeq-wide warn and verbose added + +use strict; +use Carp; +use vars qw($VERSION @ISA); +use Bio::LiveSeq::Prim_Transcript 1.0; # needed to create maxtranscript obj + +#use Bio::LiveSeq::SeqI 2.11; # uses SeqI, inherits from it +#@ISA=qw(Bio::LiveSeq::SeqI); + +=head2 new + + Title : new + Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name", + -features => $hashref + -upbound => $min + -downbound => $max); + + Function: generates a new Bio::LiveSeq::Gene + Returns : reference to a new object of class Gene + Errorcode -1 + Args : one string and one hashreference containing all features defined + for the Gene and the references to the LiveSeq objects for those + features. + Two labels for defining boundaries of the gene. Usually the + boundaries will reflect max span of transcript, exon... features, + while the DNA sequence will be created with some flanking regions + (e.g. with the EMBL_SRS::gene2liveseq routine). + If these two labels are not given, they will default to the start + and end of the DNA object. + Note : the format of the hash has to be like + DNA => reference to LiveSeq::DNA object + Transcripts => reference to array of transcripts objrefs + Transclations => reference to array of transcripts objrefs + Exons => .... + Introns => .... + Prim_Transcripts => .... + Repeat_Units => .... + Repeat_Regions => .... + Only DNA and Transcripts are mandatory + +=cut + +sub new { + my ($thing, %args) = @_; + my $class = ref($thing) || $thing; + my ($i,$self,%gene); + + my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound}); + + unless (ref($inputfeatures) eq "HASH") { + carp "$class not initialised because features hash not given"; + return (-1); + } + + my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not + my $features=\%features; # the ones input'ed, that could get destroyed + + my $DNA=$features->{'DNA'}; + unless (ref($DNA) eq "Bio::LiveSeq::DNA") { + carp "$class not initialised because DNA feature not found"; + return (-1); + } + + my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features + + my ($start,$end); + + my @Transcripts=@{$features->{'Transcripts'}}; + + my $strand; + unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") { + $self->warn("$class not initialised: first Transcript not a LiveSeq object"); + return (-1); + } else { + $strand=$Transcripts[0]->strand; # for maxtranscript consistency check + } + + for $i (@Transcripts) { + ($start,$end)=($i->start,$i->end); + unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) { + $self->warn("$class not initialised because of problems in Transcripts feature"); + return (-1); + } else { + } + unless($minstart) { $minstart=$start; } # initialize + unless($maxend) { $maxend=$end; } # initialize + if ($i->strand != $strand) { + $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); + return (-1); + } + if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } + if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } + } + my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions; + my @Prim_Transcripts; my @Exons; + if (defined($features->{'Translations'})) { + @Translations=@{$features->{'Translations'}}; } + if (defined($features->{'Exons'})) { + @Exons=@{$features->{'Exons'}}; } + if (defined($features->{'Introns'})) { + @Introns=@{$features->{'Introns'}}; } + if (defined($features->{'Repeat_Units'})) { + @Repeat_Units=@{$features->{'Repeat_Units'}}; } + if (defined($features->{'Repeat_Regions'})) { + @Repeat_Regions=@{$features->{'Repeat_Regions'}}; } + if (defined($features->{'Prim_Transcripts'})) { + @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; } + + + if (@Translations) { + for $i (@Translations) { + ($start,$end)=($i->start,$i->end); + unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) { + $self->warn("$class not initialised because of problems in Translations feature"); + return (-1); + } + } + } + if (@Exons) { + for $i (@Exons) { + ($start,$end)=($i->start,$i->end); + unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) { + $self->warn("$class not initialised because of problems in Exons feature"); + return (-1); + } + if ($i->strand != $strand) { + $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); + return (-1); + } + if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } + if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } + } + } + if (@Introns) { + for $i (@Introns) { + ($start,$end)=($i->start,$i->end); + unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) { + $self->warn("$class not initialised because of problems in Introns feature"); + return (-1); + } + } + } + if (@Repeat_Units) { + for $i (@Repeat_Units) { + ($start,$end)=($i->start,$i->end); + unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) { + $self->warn("$class not initialised because of problems in Repeat_Units feature"); + return (-1); + } + } + } + if (@Repeat_Regions) { + for $i (@Repeat_Regions) { + ($start,$end)=($i->start,$i->end); + unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) { + $self->warn("$class not initialised because of problems in Repeat_Regions feature"); + return (-1); + } + } + } + if (@Prim_Transcripts) { + for $i (@Prim_Transcripts) { + ($start,$end)=($i->start,$i->end); + unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) { + $self->warn("$class not initialised because of problems in Prim_Transcripts feature"); + return (-1); + } + if ($i->strand != $strand) { + $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!"); + return (-1); + } + if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; } + if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; } + } + } + + # create an array containing all obj references for all Gene Features + # useful for _set_Gene_in_all + my @allfeatures; + push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts); + + # create hash holding numbers for Gene Features + my %multiplicity; + my $key; my @array; + foreach $key (keys(%features)) { + unless ($key eq "DNA") { + @array=@{$features{$key}}; + $multiplicity{$key}=scalar(@array); + } + } + $multiplicity{DNA}=1; + + # create maxtranscript object. It's a Prim_Transcript with start as the + # minimum start and end as the maximum end. + # usually these start and end will be the same as the gene->upbound and + # gene->downbound, but maybe there could be cases when this will be false + # (e.g. with repeat_units just before the prim_transcript or first exon, + # but still labelled with the same /gene qualifier) + + my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA); + + + # check the upbound downbound parameters + if (defined($upbound)) { + unless ($DNA->valid($upbound)) { + $self->warn("$class not initialised because upbound label not valid"); + return (-1); + } + } else { + $upbound=$DNA->start; + } + if (defined($downbound)) { + unless ($DNA->valid($downbound)) { + $self->warn("$class not initialised because downbound label not valid"); + return (-1); + } + } else { + $downbound=$DNA->end; + } + + %gene = (name => $name, features => $features,multiplicity => \%multiplicity, + upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript); + $self = \%gene; + $self = bless $self, $class; + _set_Gene_in_all($self,@allfeatures); + return $self; +} + +# this sets the "gene" objref in all the objects "belonging" to the Gene, +# i.e. in all its Features. +sub _set_Gene_in_all { + my $Gene=shift; + my $self; + foreach $self (@_) { + $self->gene($Gene); + } +} + +# you can get or set the name of the gene +sub name { + my ($self,$value) = @_; + if (defined $value) { + $self->{'name'} = $value; + } + unless (exists $self->{'name'}) { + return "unknown"; + } else { + return $self->{'name'}; + } +} + +# gets the features hash +sub features { + my $self=shift; + return ($self->{'features'}); +} +sub get_DNA { + my $self=shift; + return ($self->{'features'}->{'DNA'}); +} +sub get_Transcripts { + my $self=shift; + return ($self->{'features'}->{'Transcripts'}); +} +sub get_Translations { + my $self=shift; + return ($self->{'features'}->{'Translations'}); +} +sub get_Prim_Transcripts { + my $self=shift; + return ($self->{'features'}->{'Prim_Transcripts'}); +} +sub get_Repeat_Units { + my $self=shift; + return ($self->{'features'}->{'Repeat_Units'}); +} +sub get_Repeat_Regions { + my $self=shift; + return ($self->{'features'}->{'Repeat_Regions'}); +} +sub get_Introns { + my $self=shift; + return ($self->{'features'}->{'Introns'}); +} +sub get_Exons { + my $self=shift; + return ($self->{'features'}->{'Exons'}); +} +sub featuresnum { + my $self=shift; + return ($self->{'multiplicity'}); +} +sub upbound { + my $self=shift; + return ($self->{'upbound'}); +} +sub downbound { + my $self=shift; + return ($self->{'downbound'}); +} +sub printfeaturesnum { + my $self=shift; + my ($key,$value); + my %hash=%{$self->featuresnum}; + foreach $key (keys(%hash)) { + $value=$hash{$key}; + print "\t$key => $value\n"; + } +} +sub maxtranscript { + my $self=shift; + return ($self->{'maxtranscript'}); +} + +sub delete_Obj { + my $self = shift; + my @values= values %{$self}; + my @keys= keys %{$self}; + + foreach my $key ( @keys ) { + delete $self->{$key}; + } + foreach my $value ( @values ) { + if (index(ref($value),"LiveSeq") != -1) { # object case + eval { + # delete $self->{$value}; + $value->delete_Obj; + }; + } elsif (index(ref($value),"ARRAY") != -1) { # array case + my @array=@{$value}; + my $element; + foreach $element (@array) { + eval { + $element->delete_Obj; + }; + } + } elsif (index(ref($value),"HASH") != -1) { # object case + my %hash=%{$value}; + my $element; + foreach $element (%hash) { + eval { + $element->delete_Obj; + }; + } + } + } + return(1); +} + + +=head2 verbose + + Title : verbose + Usage : $self->verbose(0) + Function: Sets verbose level for how ->warn behaves + -1 = silent: no warning + 0 = reduced: minimal warnings + 1 = default: all warnings + 2 = extended: all warnings + stack trace dump + 3 = paranoid: a warning becomes a throw and the program dies + + Note: a quick way to set all LiveSeq objects at the same verbosity + level is to change the DNA level object, since they all look to + that one if their verbosity_level attribute is not set. + But the method offers fine tuning possibility by changing the + verbose level of each object in a different way. + + So for example, after $loader= and $gene= have been retrieved + by a program, the command $gene->verbose(0); would + set the default verbosity level to 0 for all objects. + + Returns : the current verbosity level + Args : -1,0,1,2 or 3 + +=cut + + +sub verbose { + my $self=shift; + my $value = shift; + return $self->{'features'}->{'DNA'}->verbose($value); +} + +sub warn { + my $self=shift; + my $value = shift; + return $self->{'features'}->{'DNA'}->warn($value); +} + + + +1;