Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Compara/MethodLinkSpeciesSet.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Compara/MethodLinkSpeciesSet.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,555 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=head1 NAME + +Bio::EnsEMBL::Compara::MethodLinkSpeciesSet - +Relates every method_link with the species_set for which it has been used + +=head1 SYNOPSIS + + use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet; + my $method_link_species_set = Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new( + -adaptor => $method_link_species_set_adaptor, + -method => Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ'), + -species_set_obj => Bio::EnsEMBL::Compara::SpeciesSet->new( -genome_dbs => [$gdb1, $gdb2, $gdb3]), + -max_alignment_length => 10000, + ); + +SET VALUES + $method_link_species_set->dbID( 12 ); + $method_link_species_set->adaptor( $mlss_adaptor ); + $method_link_species_set->method( Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ') ); + $method_link_species_set->species_set( Bio::EnsEMBL::Compara::SpeciesSet->new( -genome_dbs => [$gdb1, $gdb2, $gdb3]) ); + $method_link_species_set->max_alignment_length( 10000 ); + +GET VALUES + my $mlss_id = $method_link_species_set->dbID(); + my $mlss_adaptor = $method_link_species_set->adaptor(); + my $method = $method_link_species_set->method(); + my $method_link_id = $method_link_species_set->method->dbID(); + my $method_link_type = $method_link_species_set->method->type(); + my $species_set = $method_link_species_set->species_set_obj(); + my $species_set_id = $method_link_species_set->species_set_obj->dbID(); + my $genome_dbs = $method_link_species_set->species_set_obj->genome_dbs(); + my $max_alignment_length = $method_link_species_set->max_alignment_length(); + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + + + +package Bio::EnsEMBL::Compara::MethodLinkSpeciesSet; + +use strict; + +use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate); +use Bio::EnsEMBL::Utils::Argument qw(rearrange); +use Bio::EnsEMBL::Compara::Method; +use Bio::EnsEMBL::Compara::SpeciesSet; + +use base ( 'Bio::EnsEMBL::Storable', # inherit dbID(), adaptor() and new() methods + 'Bio::EnsEMBL::Compara::Taggable' # inherit everything related to tagability + ); + +my $DEFAULT_MAX_ALIGNMENT = 20000; + + +=head2 new (CONSTRUCTOR) + + Arg [-DBID] : (opt.) int $dbID (the database internal ID for this object) + Arg [-ADAPTOR] : (opt.) Bio::EnsEMBL::Compara::DBSQL::MethodLinkSpeciesSetAdaptor $adaptor + (the adaptor for connecting to the database) + Arg [-METHOD] : Bio::EnsEMBL::Compara::Method $method object + Arg [-SPECIES_SET_OBJ]: Bio::EnsEMBL::Compara::SpeciesSet $species_set object + Arg [-NAME] : (opt.) string $name (the name for this method_link_species_set) + Arg [-SOURCE] : (opt.) string $source (the source of these data) + Arg [-URL] : (opt.) string $url (the original url of these data) + Arg [-MAX_ALGINMENT_LENGTH] + : (opt.) int $max_alignment_length (the length of the largest alignment + for this MethodLinkSpeciesSet (only used for genomic alignments) + Example : my $method_link_species_set = Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new( + -adaptor => $method_link_species_set_adaptor, + -method => Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ' ), + -species_set => [$gdb1, $gdb2, $gdb3], + -max_alignment_length => 10000, + ); + Description : Creates a new MethodLinkSpeciesSet object + Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object + Exceptions : none + Caller : general + +=cut + +sub new { + my $caller = shift @_; + my $class = ref($caller) || $caller; + + my $self = $class->SUPER::new(@_); # deal with Storable stuff + + my ($method, $method_link_id, $method_link_type, $method_link_class, + $species_set_obj, $species_set, $species_set_id, + $name, $source, $url, $max_alignment_length) = + rearrange([qw( + METHOD METHOD_LINK_ID METHOD_LINK_TYPE METHOD_LINK_CLASS + SPECIES_SET_OBJ SPECIES_SET SPECIES_SET_ID + NAME SOURCE URL MAX_ALIGNMENT_LENGTH)], @_); + + if($method) { + $self->method($method); + } else { + warning("Please consider using -method to set the method instead of older/deprecated ways to do it"); + } + + # the following three should generate a deprecated warning: + $self->method_link_id($method_link_id) if (defined ($method_link_id)); + $self->method_link_type($method_link_type) if (defined ($method_link_type)); + $self->method_link_class($method_link_class) if (defined ($method_link_class)); + + warning("method has not been set in MLSS->new") unless($self->method()); + + $self->species_set_obj($species_set_obj) if (defined ($species_set_obj)); + $self->species_set($species_set) if (defined ($species_set)); + $self->species_set_id($species_set_id) if (defined ($species_set_id)); + + warning("species_set_obj has not been set in MLSS->new") unless($self->species_set_obj()); + + $self->name($name) if (defined ($name)); + $self->source($source) if (defined ($source)); + $self->url($url) if (defined ($url)); + $self->max_alignment_length($max_alignment_length) if (defined ($max_alignment_length)); + + return $self; +} + + +sub new_fast { + my $class = shift; + my $hashref = shift; + + return bless $hashref, $class; +} + + +sub method { + my $self = shift @_; + + if(@_) { + $self->{'method'} = shift @_; + } + + return $self->{'method'}; +} + + +=head2 method_link_id + + Arg [1] : (opt.) integer method_link_id + Example : my $meth_lnk_id = $method_link_species_set->method_link_id(); + Example : $method_link_species_set->method_link_id(23); + Description: get/set for attribute method_link_id + Returntype : integer + Exceptions : none + Caller : general + Status : DEPRECATED, use $mlss->method->dbID instead + +=cut + +sub method_link_id { + my $self = shift @_; + + deprecate("MLSS->method_link_id() is DEPRECATED, please use MLSS->method->dbID()"); + + if(@_) { + if($self->method) { + $self->method->dbID( @_ ); + } else { + $self->method( Bio::EnsEMBL::Compara::Method->new(-dbID => @_) ); + } + } + + # type is known => fetch the method from DB and set all of its attributes + if (!$self->method->dbID and $self->adaptor and my $type = $self->method->type) { + my $method_adaptor = $self->adaptor->db->getMethodAdaptor; + if( my $fetched_method = $method_adaptor->fetch_by_type( $type ) ) { + $self->method( $fetched_method ); + } else { + warning("Could not fetch method by type '$type'"); + } + } + + return $self->method->dbID(); +} + + +=head2 method_link_type + + Arg [1] : (opt.) string method_link_type + Example : my $meth_lnk_type = $method_link_species_set->method_link_type(); + Example : $method_link_species_set->method_link_type("BLASTZ_NET"); + Description: get/set for attribute method_link_type + Returntype : string + Exceptions : none + Caller : general + Status : DEPRECATED, use $mlss->method->type instead + +=cut + +sub method_link_type { + my $self = shift @_; + + deprecate("MLSS->method_link_type() is DEPRECATED, please use MLSS->method->type()"); + + if(@_) { + if($self->method) { + $self->method->type( @_ ); + } else { + $self->method( Bio::EnsEMBL::Compara::Method->new(-type => @_) ); + } + } + + # dbID is known => fetch the method from DB and set all of its attributes + if (!$self->method->type and $self->adaptor and my $dbID = $self->method->dbID) { + my $method_adaptor = $self->adaptor->db->getMethodAdaptor; + if( my $fetched_method = $method_adaptor->fetch_by_dbID( $dbID ) ) { + $self->method( $fetched_method ); + } else { + warning("Could not fetch method by dbID '$dbID'"); + } + } + + return $self->method->type(); +} + + +=head2 method_link_class + + Arg [1] : (opt.) string method_link_class + Example : my $meth_lnk_class = $method_link_species_set->method_link_class(); + Example : $method_link_species_set->method_link_class("GenomicAlignBlock.multiple_alignment"); + Description: get/set for attribute method_link_class + Returntype : string + Exceptions : none + Caller : general + Status : DEPRECATED, use $mlss->method->class instead + +=cut + +sub method_link_class { + my $self = shift @_; + + deprecate("MLSS->method_link_class() is DEPRECATED, please use MLSS->method->class()"); + + if(@_) { + if($self->method) { + $self->method->class( @_ ); + } else { + $self->method( Bio::EnsEMBL::Compara::Method->new(-class => @_) ); + } + } + + # dbID is known => fetch the method from DB and set all of its attributes + if (!$self->method->class and $self->adaptor and my $dbID = $self->method->dbID) { + my $method_adaptor = $self->adaptor->db->getMethodAdaptor; + if( my $fetched_method = $method_adaptor->fetch_by_dbID( $dbID ) ) { + $self->method( $fetched_method ); + } else { + warning("Could not fetch method by dbID '$dbID'"); + } + } + + return $self->method->class(); +} + + +=head2 species_set_obj + + Arg [1] : (opt.) Bio::EnsEMBL::Compara::SpeciesSet species_set object + Example : my $species_set_obj = $mlss->species_set_obj(); + Example : $mlss->species_set_obj( $species_set_obj ); + Description: getter/setter for species_set_obj attribute + Returntype : Bio::EnsEMBL::Compara::SpeciesSet + Exceptions : none + Caller : general + +=cut + +sub species_set_obj { + my $self = shift @_; + + if(@_) { + $self->{'species_set'} = shift @_; + } + + return $self->{'species_set'}; +} + + +sub _set_genome_dbs { + my ($self, $arg) = @_; + + my %genome_db_hash = (); + foreach my $gdb (@$arg) { + throw("undefined value used as a Bio::EnsEMBL::Compara::GenomeDB\n") if (!defined($gdb)); + throw("$gdb must be a Bio::EnsEMBL::Compara::GenomeDB\n") unless $gdb->isa("Bio::EnsEMBL::Compara::GenomeDB"); + + if(defined $genome_db_hash{$gdb->dbID}) { + warn("GenomeDB (".$gdb->name."; dbID=".$gdb->dbID .") appears twice in this Bio::EnsEMBL::Compara::MethodLinkSpeciesSet\n"); + } else { + $genome_db_hash{$gdb->dbID} = $gdb; + } + } + my $genome_dbs = [ values %genome_db_hash ] ; + + my $species_set_id = $self->adaptor && $self->adaptor->db->get_SpeciesSetAdaptor->find_species_set_id_by_GenomeDBs_mix( $genome_dbs ); + + my $ss_obj = Bio::EnsEMBL::Compara::SpeciesSet->new( + -genome_dbs => $genome_dbs, + $species_set_id ? (-species_set_id => $species_set_id) : (), + ); + $self->species_set_obj( $ss_obj ); +} + + + +=head2 species_set_id + + Arg [1] : (opt.) integer species_set_id + Example : my $species_set_id = $method_link_species_set->species_set_id(); + Example : $method_link_species_set->species_set_id(23); + Description: get/set for attribute species_set_id + Returntype : integer + Exceptions : none + Caller : general + Status : DEPRECATED, use $mlss->species_set_obj->dbID instead + +=cut + +sub species_set_id { + my $self = shift @_; + + deprecate("MLSS->species_set_id() is DEPRECATED, please use MLSS->species_set_obj->dbID()"); + + if(my $species_set_obj = $self->species_set_obj) { + return $species_set_obj->dbID( @_ ); + } else { + warning("SpeciesSet object has not been set, so cannot deal with its dbID"); + return undef; + } +} + + +=head2 species_set + + Arg [1] : (opt.) listref of Bio::EnsEMBL::Compara::GenomeDB objects + Example : my $meth_lnk_species_set = $method_link_species_set->species_set(); + Example : $method_link_species_set->species_set([$gdb1, $gdb2, $gdb3]); + Description: get/set for attribute species_set + Returntype : listref of Bio::EnsEMBL::Compara::GenomeDB objects + Exceptions : Thrown if any argument is not a Bio::EnsEMBL::Compara::GenomeDB + object or a GenomeDB entry appears several times + Caller : general + Status : DEPRECATED, use $mlss->species_set_obj->genome_dbs instead + +=cut + +sub species_set { + my ($self, $arg) = @_; + + deprecate("MLSS->species_set() is DEPRECATED, please use MLSS->species_set_obj->genome_dbs()"); + + if($arg) { + if(UNIVERSAL::isa($arg, 'Bio::EnsEMBL::Compara::SpeciesSet')) { + + $self->species_set_obj( $arg ); + + } elsif((ref($arg) eq 'ARRAY') and @$arg) { + + $self->_set_genome_dbs( $arg ); + + } else { + die "Wrong type of argument to $self->species_set()"; + } + } + return $self->species_set_obj->genome_dbs; # for compatibility, we shall keep this method until everyone has switched to using species_set_obj() +} + + +=head2 name + + Arg [1] : (opt.) string $name + Example : my $name = $method_link_species_set->name(); + Example : $method_link_species_set->name("families"); + Description: get/set for attribute name + Returntype : string + Exceptions : none + Caller : general + +=cut + +sub name { + my ($self, $arg) = @_; + + if (defined($arg)) { + $self->{'name'} = $arg ; + } + + return $self->{'name'}; +} + + +=head2 source + + Arg [1] : (opt.) string $name + Example : my $name = $method_link_species_set->source(); + Example : $method_link_species_set->source("ensembl"); + Description: get/set for attribute source. The source refers to who + generated the data in a first instance (ensembl, ucsc...) + Returntype : string + Exceptions : none + Caller : general + +=cut + +sub source { + my ($self, $arg) = @_; + + if (defined($arg)) { + $self->{'source'} = $arg ; + } + + return $self->{'source'}; +} + + +=head2 url + + Arg [1] : (opt.) string $url + Example : my $name = $method_link_species_set->source(); + Example : $method_link_species_set->url("http://hgdownload.cse.ucsc.edu/goldenPath/monDom1/vsHg17/"); + Description: get/set for attribute url. Defines where the data come from if they + have been imported + Returntype : string + Exceptions : none + Caller : general + +=cut + +sub url { + my ($self, $arg) = @_; + + if (defined($arg)) { + $self->{'url'} = $arg ; + } + + return $self->{'url'}; +} + + +=head2 get_common_classification + + Arg [1] : -none- + Example : my $common_classification = $method_link_species_set-> + get_common_classification(); + Description: This method fetches the taxonimic classifications for all the + species included in this + Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object and + returns the common part of them. + Returntype : array of strings + Exceptions : + Caller : general + +=cut + +sub get_common_classification { + my ($self) = @_; + my $common_classification; + + my $species_set = $self->species_set(); + + foreach my $this_genome_db (@$species_set) { + my @classification = split(" ", $this_genome_db->taxon->classification); + if (!defined($common_classification)) { + @$common_classification = @classification; + } else { + my $new_common_classification = []; + for (my $i = 0; $i <@classification; $i++) { + for (my $j = 0; $j<@$common_classification; $j++) { + if ($classification[$i] eq $common_classification->[$j]) { + push(@$new_common_classification, splice(@$common_classification, $j, 1)); + last; + } + } + } + $common_classification = $new_common_classification; + } + } + + return $common_classification; +} + + +=head2 max_alignment_length + + Arg [1] : (opt.) int $max_alignment_length + Example : my $max_alignment_length = $method_link_species_set-> + max_alignment_length(); + Example : $method_link_species_set->max_alignment_length(1000); + Description: get/set for attribute max_alignment_length + Returntype : integer + Exceptions : + Caller : general + +=cut + +sub max_alignment_length { + my $self = shift @_; + + if(@_) { + $self->add_tag('max_align', shift @_); + } + + return $self->get_value_for_tag('max_align') || $DEFAULT_MAX_ALIGNMENT; +} + + +=head2 toString + + Args : (none) + Example : print $mlss->toString()."\n"; + Description: returns a stringified representation of the method_link_species_set + Returntype : string + +=cut + +sub toString { + my $self = shift; + + return ref($self).": dbID=".($self->dbID || '?'). + ", name='".$self->name. + "', source='".$self->source. + "', url='".$self->url. + "', max_alignment_length=".($self->max_alignment_length || '?'). + ", {".$self->method->toString."} x {".$self->species_set_obj->toString."}"; +} + + +1;