Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Compara/GenomeDB.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Compara/GenomeDB.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,451 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=head1 NAME + +Bio::EnsEMBL::Compara::GenomeDB - DESCRIPTION of Object + +=head1 SYNOPSIS + use Bio::EnsEMBL::Compara::DnaFrag; + my $genome_db = new Bio::EnsEMBL::Compara::GenomeDB(); + +SET VALUES + $genome_db->dbID(22); + $genome_db->dba($dba); + $genome_db->name("Homo sapiens"); + $genome_db->assembly("NCBI36"); + $genome_db->taxon_id(9606); + $genome_db->taxon($taxon); + $genome_db->genebuild("2006-12-Ensembl"); + $genome_db->assembly_default(1); + $genome_db->locator("Bio::EnsEMBL::DBSQL::DBAdaptor/host=???;port=???;user=???;dbname=homo_sapiens_core_51_36m;species=Homo sapiens;disconnect_when_inactive=1"); + +GET VALUES + $dbID = $genome_db->dbID; + $genome_db_adaptor = $genome_db->adaptor; + $name = $genome_db->name; + $assembly = $genome_db->assembly; + $taxon_id = $genome_db->taxon_id; + $taxon = $genome_db->taxon; + $genebuild = $genome_db->genebuild; + $assembly_default = $genome_db->assembly_default; + $locator = $genome_db->locator; + + +=head1 DESCRIPTION + +The GenomeDB object stores information about each species including the taxon_id, species name, assembly, genebuild and the location of the core database. + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::EnsEMBL::Compara::GenomeDB; + +use strict; + +use Bio::EnsEMBL::Utils::Exception qw(warning deprecate throw); +use Bio::EnsEMBL::DBLoader; + +=head2 new + + Example : + my $genome_db = new Bio::EnsEMBL::Compara::GenomeDB(); + $genome_db->dba($dba); + $genome_db->name("Homo sapiens"); + $genome_db->assembly("NCBI36"); + $genome_db->taxon_id(9606); + $genome_db->dbID(22); + $genome_db->genebuild("2006-12-Ensembl"); + + Description: Creates a new GenomeDB object + Returntype : Bio::EnsEMBL::Compara::GenomeDB + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub new { + my($caller, $dba, $name, $assembly, $taxon_id, $dbID, $genebuild) = @_; + + my $class = ref($caller) || $caller; + my $self = bless({}, $class); + + $dba && $self->db_adaptor($dba); + $name && $self->name($name); + $assembly && $self->assembly($assembly); + $taxon_id && $self->taxon_id($taxon_id); + $dbID && $self->dbID($dbID); + $genebuild && $self->genebuild($genebuild); + + return $self; +} + +=head2 new_fast + + Arg [1] : hash reference $hashref + Example : + Description: This is an ultra fast constructor which requires knowledge of + the objects internals to be used. + Returntype : Bio::EnsEMBL::Compara::GenomeDB + Exceptions : none + Caller : Bio::EnsEMBL::Compara::DBSQL::GenomeDBAdaptor + Status : Stable + +=cut + +sub new_fast { + my $class = shift; + my $hashref = shift; + + return bless $hashref, $class; +} + + +=head2 db_adaptor + + Arg [1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba + The DBAdaptor containing sequence information for the genome + represented by this object. + Example : $gdb->db_adaptor($dba); + Description: Getter/Setter for the DBAdaptor containing sequence + information for the genome represented by this object. + Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor + Caller : general + Status : Stable + +=cut + +sub db_adaptor { + my ( $self, $dba ) = @_; + + if($dba) { + $self->{'_db_adaptor'} = ($dba && $dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) + ? $dba + : undef; + } + + unless($self->{'_db_adaptor'}) { + $self->{'_db_adaptor'} = $self->connect_to_genome_locator; + } + + return $self->{'_db_adaptor'}; +} + + + +=head2 name + + Arg [1] : (optional) string $value + Example : $gdb->name('Homo sapiens'); + Description: Getter setter for the name of this genome database, usually + just the species name. + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub name{ + my ($self,$value) = @_; + + if( defined $value) { + $self->{'name'} = $value; + } + return $self->{'name'}; +} + + +=head2 short_name + + Example : $gdb->short_name; + Description: The name of this genome in the Gspe ('G'enera + 'spe'cies) format. Can also handle 'G'enera 's'pecies + 's'ub 's'pecies (Gsss) + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub short_name { + my $self = shift; + my $name = $self->name; + $name =~ s/\b(\w)/\U$1/g; + $name =~ s/\_/\ /g; + unless( $name =~ s/(\S)\S*\s(\S)\S*\s(\S)\S*\s(\S).*/$1$2$3$4/ ){ + unless( $name =~ s/(\S)\S*\s(\S)\S*\s(\S{2,2}).*/$1$2$3/ ){ + unless( $name =~ s/(\S)\S*\s(\S{3,3}).*/$1$2/ ){ + $name = substr( $name, 0, 4 ); + } + } + } + return $name; +} + +=head2 get_short_name + + Example : $gdb->get_short_name; + Description: The name of this genome in the Gspe ('G'enera + 'spe'cies) format. Can also handle 'G'enera 's'pecies + 's'ub 's'pecies (Gsss) + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub get_short_name { + my $self = shift; + return $self->short_name; +} + + +=head2 dbID + + Arg [1] : (optional) int $value the new value of this objects database + identifier + Example : $dbID = $genome_db->dbID; + Description: Getter/Setter for the internal identifier of this GenomeDB + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub dbID{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'dbID'} = $value; + } + return $self->{'dbID'}; +} + + +=head2 adaptor + + Arg [1] : (optional) Bio::EnsEMBL::Compara::GenomeDBAdaptor $adaptor + Example : $adaptor = $GenomeDB->adaptor(); + Description: Getter/Setter for the GenomeDB object adaptor used + by this GenomeDB for database interaction. + Returntype : Bio::EnsEMBL::Compara::GenomeDBAdaptor + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub adaptor{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'adaptor'} = $value; + } + return $self->{'adaptor'}; +} + + +=head2 assembly + + Arg [1] : (optional) string + Example : $gdb->assembly('NCBI36'); + Description: Getter/Setter for the assembly type of this genome db. + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub assembly { + my $self = shift; + my $assembly = shift; + + if($assembly) { + $self->{'assembly'} = $assembly; + } + return $self->{'assembly'}; +} + +=head2 assembly_default + + Arg [1] : (optional) int + Example : $gdb->assembly_default(1); + Description: Getter/Setter for the assembly_default of this genome db. + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub assembly_default { + my $self = shift; + my $boolean = shift; + + if(defined $boolean) { + $self->{'assembly_default'} = $boolean; + } + $self->{'assembly_default'}='1' unless(defined($self->{'assembly_default'})); + return $self->{'assembly_default'}; +} + +=head2 genebuild + + Arg [1] : (optional) string + Example : $gdb->genebuild('2006-12-Ensembl'); + Description: Getter/Setter for the genebuild type of this genome db. + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub genebuild { + my $self = shift; + $self->{'genebuild'} = shift if (@_); + $self->{'genebuild'}='' unless(defined($self->{'genebuild'})); + return $self->{'genebuild'}; +} + + +=head2 taxon_id + + Arg [1] : (optional) int + Example : $gdb->taxon_id(9606); + Description: Getter/Setter for the taxon id of the contained genome db + Returntype : int + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub taxon_id { + my $self = shift; + my $taxon_id = shift; + + if(defined $taxon_id) { + $self->{'taxon_id'} = $taxon_id; + } + return $self->{'taxon_id'}; +} + +=head2 taxon + + Description: uses taxon_id to fetch the NCBITaxon object + Returntype : Bio::EnsEMBL::Compara::NCBITaxon object + Exceptions : if taxon_id or adaptor not defined + Caller : general + Status : Stable + +=cut + +sub taxon { + my $self = shift; + + return $self->{'_taxon'} if(defined $self->{'_taxon'}); + + unless (defined $self->taxon_id and $self->adaptor) { + throw("can't fetch Taxon without a taxon_id and an adaptor"); + } + my $ncbi_taxon_adaptor = $self->adaptor->db->get_NCBITaxonAdaptor; + $self->{'_taxon'} = $ncbi_taxon_adaptor->fetch_node_by_taxon_id($self->{'taxon_id'}); + return $self->{'_taxon'}; +} + + +=head2 locator + + Arg [1] : string + Description: Returns a string which describes where the external genome (ensembl core) + database base is located. Locator format is: + "Bio::EnsEMBL::DBSQL::DBAdaptor/host=ecs4port=3351;user=ensro;dbname=mus_musculus_core_20_32" + Returntype : string + Exceptions : none + Caller : general + Status : Stable + +=cut + +sub locator { + my $self = shift; + $self->{'locator'} = shift if (@_); + $self->{'locator'}='' unless(defined($self->{'locator'})); + return $self->{'locator'}; +} + +=head2 connect_to_genome_locator + + Arg [1] : string + Description: uses the locator string to connect to the external genome database + Returntype : DBConnection/DBAdaptor defined in locator string + (usually a Bio::EnsEMBL::DBSQL::DBAdaptor) + return undef if locator undefined or unable to connect + Exceptions : none + Caller : internal private method + Status : Stable + +=cut + +sub connect_to_genome_locator { + my $self = shift; + + return undef if($self->locator eq ''); + + my $genomeDBA = undef; + eval {$genomeDBA = Bio::EnsEMBL::DBLoader->new($self->locator); }; + warn "The locator could not be loaded because: $@" if $@; + return $genomeDBA; +} + + +=head2 toString + + Args : (none) + Example : print $dbID->toString()."\n"; + Description: returns a stringified representation of the object + Returntype : string + +=cut + +sub toString { + my $self = shift; + + return ref($self).": dbID=".($self->dbID || '?') + .", name='".$self->name + ."', assembly='".$self->assembly + ."', genebuild='".$self->genebuild + ."', taxon_id='".$self->taxon_id + ."', locator='".$self->locator + ."'"; +} + + +1;