Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Ontology/SimpleGOEngine.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Ontology/SimpleGOEngine.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,888 @@ +# $Id: SimpleGOEngine.pm,v 1.3.2.6 2003/06/30 05:04:06 lapp Exp $ +# +# BioPerl module for Bio::Ontology::SimpleGOEngine +# +# Cared for by Christian M. Zmasek <czmasek@gnf.org> or <cmzmasek@yahoo.com> +# +# (c) Christian M. Zmasek, czmasek@gnf.org, 2002. +# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. +# +# You may distribute this module under the same terms as perl itself. +# Refer to the Perl Artistic License (see the license accompanying this +# software package, or see http://www.perl.com/language/misc/Artistic.html) +# for the terms under which you may use, modify, and redistribute this module. +# +# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +SimpleGOEngine - a Ontology Engine for GO implementing OntologyEngineI + +=head1 SYNOPSIS + + use Bio::Ontology::SimpleGOEngine; + + my $parser = Bio::Ontology::SimpleGOEngine->new + ( -defs_file => "/home/czmasek/GO/GO.defs", + -files => ["/home/czmasek/GO/component.ontology", + "/home/czmasek/GO/function.ontology", + "/home/czmasek/GO/process.ontology"] ); + + my $engine = $parser->parse(); + + my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" ); + my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" ); + + +=head1 DESCRIPTION + +Needs Graph.pm from CPAN. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to the +Bioperl mailing lists Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +report bugs to the Bioperl bug tracking system to help us keep track + the bugs and their resolution. Bug reports can be submitted via + email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR + +Christian M. Zmasek + +Email: czmasek@gnf.org or cmzmasek@yahoo.com + +WWW: http://www.genetics.wustl.edu/eddy/people/zmasek/ + +Address: + + Genomics Institute of the Novartis Research Foundation + 10675 John Jay Hopkins Drive + San Diego, CA 92121 + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + + +package Bio::Ontology::SimpleGOEngine; + +use Graph::Directed; + +use vars qw( @ISA ); +use strict; +use Bio::Root::Root; +use Bio::Ontology::RelationshipType; +use Bio::Ontology::RelationshipFactory; +use Bio::Ontology::OntologyEngineI; + +use constant TRUE => 1; +use constant FALSE => 0; +use constant IS_A => "IS_A"; +use constant PART_OF => "PART_OF"; +use constant TERM => "TERM"; +use constant TYPE => "TYPE"; +use constant ONTOLOGY => "ONTOLOGY"; + +@ISA = qw( Bio::Root::Root + Bio::Ontology::OntologyEngineI ); + + + +=head2 new + + Title : new + Usage : $engine = Bio::Ontology::SimpleGOEngine->new() + Function: Creates a new SimpleGOEngine + Returns : A new SimpleGOEngine object + Args : + +=cut + +sub new { + my( $class, @args ) = @_; + + my $self = $class->SUPER::new( @args ); + + $self->init(); + + return $self; +} # new + + + +=head2 init + + Title : init() + Usage : $engine->init(); + Function: Initializes this Engine. + Returns : + Args : + +=cut + +sub init { + my ( $self ) = @_; + + $self->{ "_is_a_relationship" } = Bio::Ontology::RelationshipType->get_instance( IS_A ); + $self->{ "_part_of_relationship" } = Bio::Ontology::RelationshipType->get_instance( PART_OF ); + + $self->graph( Graph::Directed->new() ); + + # set defaults for the factories + $self->relationship_factory(Bio::Ontology::RelationshipFactory->new( + -type => "Bio::Ontology::Relationship")); + +} # init + + + +=head2 is_a_relationship + + Title : is_a_relationship() + Usage : $IS_A = $engine->is_a_relationship(); + Function: Returns a Bio::Ontology::RelationshipType object for "is-a" + relationships + Returns : Bio::Ontology::RelationshipType set to "IS_A" + Args : + +=cut + +sub is_a_relationship { + my ( $self, $value ) = @_; + + if ( defined $value ) { + $self->throw( "Attempted to change immutable field" ); + } + + return $self->{ "_is_a_relationship" }; +} # is_a_relationship + + + +=head2 part_of_relationship + + Title : part_of_relationship() + Usage : $PART_OF = $engine->part_of_relationship(); + Function: Returns a Bio::Ontology::RelationshipType object for "part-of" + relationships + Returns : Bio::Ontology::RelationshipType set to "PART_OF" + Args : + +=cut + +sub part_of_relationship { + my ( $self, $value ) = @_; + + if ( defined $value ) { + $self->throw( "Attempted to change immutable field" ); + } + + return $self->{ "_part_of_relationship" }; +} # part_of_relationship + + + + +=head2 add_term + + Title : add_term + Usage : $engine->add_term( $term_obj ); + Function: Adds a Bio::Ontology::TermI to this engine + Returns : true if the term was added and false otherwise (e.g., if the + term already existed in the ontology engine) + Args : Bio::Ontology::TermI + + +=cut + +sub add_term { + my ( $self, $term ) = @_; + + return FALSE if $self->has_term( $term ); + + my $goid = $self->_get_id($term); + + $self->graph()->add_vertex( $goid ); + $self->graph()->set_attribute( TERM, $goid, $term ); + + return TRUE; + +} # add_term + + + +=head2 has_term + + Title : has_term + Usage : $engine->has_term( $term ); + Function: Checks whether this engine contains a particular term + Returns : true or false + Args : Bio::Ontology::TermI + or + erm identifier (e.g. "GO:0012345") + + +=cut + +sub has_term { + my ( $self, $term ) = @_; + $term = $self->_get_id( $term ); + if ( $self->graph()->has_vertex( $term ) ) { + return TRUE; + } + else { + return FALSE; + } + +} # has_term + + + +=head2 add_relationship + + Title : add_relationship + Usage : $engine->add_relationship( $relationship ); + $engine->add_relatioship( $subject_term, $predicate_term, $object_term, $ontology ); + $engine->add_relatioship( $subject_id, $predicate_id, $object_id, $ontology); + Function: Adds a relationship to this engine + Returns : true if successfully added, false otherwise + Args : term id, Bio::Ontology::TermI (rel.type), term id, ontology + or + Bio::Ontology::TermI, Bio::Ontology::TermI (rel.type), Bio::Ontology::TermI, ontology + or + Bio::Ontology::RelationshipI + +=cut + +# term objs or term ids +sub add_relationship { + my ( $self, $child, $type, $parent, $ont ) = @_; + + if ( scalar( @_ ) == 2 ) { + $self->_check_class( $child, "Bio::Ontology::RelationshipI" ); + $type = $child->predicate_term(); + $parent = $child->object_term(); + $ont = $child->ontology(); + $child = $child->subject_term(); + } + + + $self->_check_class( $type, "Bio::Ontology::TermI" ); + + my $parentid = $self->_get_id( $parent ); + my $childid = $self->_get_id( $child ); + + my $g = $self->graph(); + + $self->add_term($child) unless $g->has_vertex( $childid ); + $self->add_term($parent) unless $g->has_vertex( $parentid ); + + # This prevents multi graphs. + if ( $g->has_edge( $parentid, $childid ) ) { + return FALSE; + } + + $g->add_edge( $parentid, $childid ); + $g->set_attribute( TYPE, $parentid, $childid, $type ); + $g->set_attribute( ONTOLOGY, $parentid, $childid, $ont ); + + return TRUE; + +} # add_relationship + + + + +=head2 get_relationships + + + Title : get_relationships + Usage : $engine->get_relationships( $term ); + Function: Returns all relationships of a term, or all relationships in + the graph if no term is specified. + Returns : Relationship[] + Args : term id + or + Bio::Ontology::TermI + +=cut + +sub get_relationships { + my ( $self, $term ) = @_; + + my $g = $self->graph(); + + # obtain the ID if term provided + my $termid; + if($term) { + $termid = $self->_get_id( $term ); + # check for presence in the graph + if ( ! $g->has_vertex( $termid ) ) { + $self->throw( "no term with identifier \"$termid\" in ontology" ); + } + } + + # now build the relationships + my $relfact = $self->relationship_factory(); + # we'll build the relationships from edges + my @rels = (); + my @edges = $g->edges($termid); + while(@edges) { + my $startid = shift(@edges); + my $endid = shift(@edges); + my $rel = $relfact->create_object( + -subject_term => $self->get_terms($endid), + -object_term => $self->get_terms($startid), + -predicate_term => $g->get_attribute(TYPE, + $startid, $endid), + -ontology => $g->get_attribute(ONTOLOGY, + $startid, $endid)); + push( @rels, $rel ); + } + + return @rels; + +} # get_relationships + +=head2 get_all_relationships + + + Title : get_all_relationships + Usage : @rels = $engine->get_all_relationships(); + Function: Returns all relationships in the graph. + Returns : Relationship[] + Args : + +=cut + +sub get_all_relationships { + return shift->get_relationships(@_); +} # get_all_relationships + + + +=head2 get_predicate_terms + + Title : get_predicate_terms + Usage : $engine->get_predicate_terms(); + Function: Returns the types of relationships this engine contains + Returns : Bio::Ontology::RelationshipType[] + Args : + + +=cut + +sub get_predicate_terms { + my ( $self ) = @_; + + my @a = ( $self->is_a_relationship(), + $self->part_of_relationship() ); + + return @a; +} # get_predicate_terms + + + + +=head2 get_child_terms + + Title : get_child_terms + Usage : $engine->get_child_terms( $term_obj, @rel_types ); + $engine->get_child_terms( $term_id, @rel_types ); + Function: Returns the children of this term + Returns : Bio::Ontology::TermI[] + Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] + or + term id, Bio::Ontology::RelationshipType[] + + if NO Bio::Ontology::RelationshipType[] is indicated: children + of ALL types are returned + +=cut + +sub get_child_terms { + my ( $self, $term, @types ) = @_; + + return $self->_get_child_parent_terms_helper( $term, TRUE, @types ); + +} # get_child_terms + + +=head2 get_descendant_terms + + Title : get_descendant_terms + Usage : $engine->get_descendant_terms( $term_obj, @rel_types ); + $engine->get_descendant_terms( $term_id, @rel_types ); + Function: Returns the descendants of this term + Returns : Bio::Ontology::TermI[] + Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] + or + term id, Bio::Ontology::RelationshipType[] + + if NO Bio::Ontology::RelationshipType[] is indicated: descendants + of ALL types are returned + +=cut + +sub get_descendant_terms { + my ( $self, $term, @types ) = @_; + + my %ids = (); + my @ids = (); + + $term = $self->_get_id( $term ); + + if ( ! $self->graph()->has_vertex( $term ) ) { + $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" ); + } + + $self->_get_descendant_terms_helper( $term, \%ids, \@types ); + + while( ( my $id ) = each ( %ids ) ) { + push( @ids, $id ); + } + + return $self->get_terms( @ids ); + +} # get_descendant_terms + + + + +=head2 get_parent_terms + + Title : get_parent_terms + Usage : $engine->get_parent_terms( $term_obj, @rel_types ); + $engine->get_parent_terms( $term_id, @rel_types ); + Function: Returns the parents of this term + Returns : Bio::Ontology::TermI[] + Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] + or + term id, Bio::Ontology::RelationshipType[] + + if NO Bio::Ontology::RelationshipType[] is indicated: parents + of ALL types are returned + +=cut + +sub get_parent_terms { + my ( $self, $term, @types ) = @_; + + return $self->_get_child_parent_terms_helper( $term, FALSE, @types ); + +} # get_parent_terms + + + +=head2 get_ancestor_terms + + Title : get_ancestor_terms + Usage : $engine->get_ancestor_terms( $term_obj, @rel_types ); + $engine->get_ancestor_terms( $term_id, @rel_types ); + Function: Returns the ancestors of this term + Returns : Bio::Ontology::TermI[] + Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] + or + term id, Bio::Ontology::RelationshipType[] + + if NO Bio::Ontology::RelationshipType[] is indicated: ancestors + of ALL types are returned + +=cut + +sub get_ancestor_terms { + my ( $self, $term, @types ) = @_; + + my %ids = (); + my @ids = (); + + $term = $self->_get_id( $term ); + + if ( ! $self->graph()->has_vertex( $term ) ) { + $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" ); + } + + $self->_get_ancestor_terms_helper( $term, \%ids, \@types ); + + while( ( my $id ) = each ( %ids ) ) { + push( @ids, $id ); + } + + return $self->get_terms( @ids ); + +} # get_ancestor_terms + + + + + +=head2 get_leaf_terms + + Title : get_leaf_terms + Usage : $engine->get_leaf_terms(); + Function: Returns the leaf terms + Returns : Bio::Ontology::TermI[] + Args : + +=cut + +sub get_leaf_terms { + my ( $self ) = @_; + + my @a = $self->graph()->sink_vertices(); + + return $self->get_terms( @a ); + +} + + + +=head2 get_root_terms() + + Title : get_root_terms + Usage : $engine->get_root_terms(); + Function: Returns the root terms + Returns : Bio::Ontology::TermI[] + Args : + +=cut + +sub get_root_terms { + my ( $self ) = @_; + + + my @a = $self->graph()->source_vertices(); + + return $self->get_terms( @a ); + +} + + +=head2 get_terms + + Title : get_terms + Usage : @terms = $engine->get_terms( "GO:1234567", "GO:2234567" ); + Function: Returns term objects with given identifiers + Returns : Bio::Ontology::TermI[], or the term corresponding to the + first identifier if called in scalar context + Args : term ids[] + + +=cut + +sub get_terms { + my ( $self, @ids ) = @_; + + my @terms = (); + + foreach my $id ( @ids ) { + if ( $self->graph()->has_vertex( $id ) ) { + push( @terms, $self->graph()->get_attribute( TERM, $id ) ); + } + } + + return wantarray ? @terms : shift(@terms); + +} # get_terms + + +=head2 get_all_terms + + Title : get_all_terms + Usage : $engine->get_all_terms(); + Function: Returns all terms in this engine + Returns : Bio::Ontology::TermI[] + Args : + +=cut + +sub get_all_terms { + my ( $self ) = @_; + + return( $self->get_terms( $self->graph()->vertices() ) ); + +} # get_all_terms + + +=head2 find_terms + + Title : find_terms + Usage : ($term) = $oe->find_terms(-identifier => "SO:0000263"); + Function: Find term instances matching queries for their attributes. + + This implementation can efficiently resolve queries by + identifier. + + Example : + Returns : an array of zero or more Bio::Ontology::TermI objects + Args : Named parameters. The following parameters should be recognized + by any implementations: + + -identifier query by the given identifier + -name query by the given name + + +=cut + +sub find_terms{ + my ($self,@args) = @_; + my @terms; + + my ($id,$name) = $self->_rearrange([qw(IDENTIFIER NAME)],@args); + + if(defined($id)) { + @terms = $self->get_terms($id); + } else { + @terms = $self->get_all_terms(); + } + if(defined($name)) { + @terms = grep { $_->name() eq $name; } @terms; + } + return @terms; +} + +=head2 relationship_factory + + Title : relationship_factory + Usage : $fact = $obj->relationship_factory() + Function: Get/set the object factory to be used when relationship + objects are created by the implementation on-the-fly. + + Example : + Returns : value of relationship_factory (a Bio::Factory::ObjectFactoryI + compliant object) + Args : on set, a Bio::Factory::ObjectFactoryI compliant object + + +=cut + +sub relationship_factory{ + my $self = shift; + + return $self->{'relationship_factory'} = shift if @_; + return $self->{'relationship_factory'}; +} + +=head2 term_factory + + Title : term_factory + Usage : $fact = $obj->term_factory() + Function: Get/set the object factory to be used when term objects are + created by the implementation on-the-fly. + + Note that this ontology engine implementation does not + create term objects on the fly, and therefore setting this + attribute is meaningless. + + Example : + Returns : value of term_factory (a Bio::Factory::ObjectFactoryI + compliant object) + Args : on set, a Bio::Factory::ObjectFactoryI compliant object + + +=cut + +sub term_factory{ + my $self = shift; + + if(@_) { + $self->warn("setting term factory, but ".ref($self). + " does not create terms on-the-fly"); + return $self->{'term_factory'} = shift; + } + return $self->{'term_factory'}; +} + +=head2 graph + + Title : graph() + Usage : $engine->graph(); + Function: Returns the Graph this engine is based on + Returns : Graph + Args : + +=cut + +sub graph { + my ( $self, $value ) = @_; + + if ( defined $value ) { + $self->_check_class( $value, "Graph::Directed" ); + $self->{ "_graph" } = $value; + } + + return $self->{ "_graph" }; +} # graph + + + +# Internal methods +# ---------------- + + +# Checks the correct format of a GOBO-formatted id +# Gets the id out of a term or id string +sub _get_id { + my ( $self, $term ) = @_; + + if(ref($term)) { + return $term->GO_id() if $term->isa("Bio::Ontology::GOterm"); + # if not a GOterm, use standard API + $self->throw("Object doesn't implement Bio::Ontology::TermI. ". + "Bummer.") + unless $term->isa("Bio::Ontology::TermI"); + $term = $term->identifier(); + } + # don't fuss if it looks remotely standard + return $term if $term =~ /^[A-Z]{1,8}:\d{3,}$/; + # prefix with something if only numbers + if($term =~ /^\d+$/) { + $self->warn(ref($self).": identifier [$term] is only numbers - ". + "prefixing with 'GO:'"); + return "GO:" . $term; + } + # we shouldn't have gotten here if it's at least a remotely decent ID + $self->warn(ref($self). + ": Are you sure '$term' is a valid identifier? ". + "If you see problems, this may be the cause."); + return $term; +} # _get_id + + +# Helper for getting children and parent terms +sub _get_child_parent_terms_helper { + my ( $self, $term, $do_get_child_terms, @types ) = @_; + + foreach my $type ( @types ) { + $self->_check_class( $type, "Bio::Ontology::TermI" ); + } + + my @relative_terms = (); + + $term = $self->_get_id( $term ); + if ( ! $self->graph()->has_vertex( $term ) ) { + $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" ); + } + + my @all_relative_terms = (); + if ( $do_get_child_terms ) { + @all_relative_terms = $self->graph()->successors( $term ); + } + else { + @all_relative_terms = $self->graph()->predecessors( $term ); + } + + foreach my $relative ( @all_relative_terms ) { + if ( scalar( @types ) > 0 ) { + foreach my $type ( @types ) { + my $relative_type; + if ( $do_get_child_terms ) { + $relative_type = $self->graph()->get_attribute( TYPE, $term, $relative ); + } + else { + $relative_type = $self->graph()->get_attribute( TYPE, $relative, $term ); + } + if ( $relative_type->equals( $type ) ) { + push( @relative_terms, $relative ); + } + } + } + else { + push( @relative_terms, $relative ); + } + } + + return $self->get_terms( @relative_terms ); + +} # get_child_terms + + +# Recursive helper +sub _get_descendant_terms_helper { + my ( $self, $term, $ids_ref, $types_ref ) = @_; + + my @child_terms = $self->get_child_terms( $term, @$types_ref ); + + if ( scalar( @child_terms ) < 1 ) { + return; + } + + foreach my $child_term ( @child_terms ) { + my $child_term_id = $child_term->identifier(); + $ids_ref->{ $child_term_id } = 0; + $self->_get_descendant_terms_helper( $child_term_id, $ids_ref, $types_ref ); + } + +} # _get_descendant_terms_helper + + +# Recursive helper +sub _get_ancestor_terms_helper { + my ( $self, $term, $ids_ref, $types_ref ) = @_; + + my @parent_terms = $self->get_parent_terms( $term, @$types_ref ); + + if ( scalar( @parent_terms ) < 1 ) { + return; + } + + foreach my $parent_term ( @parent_terms ) { + my $parent_term_id = $parent_term->identifier(); + $ids_ref->{ $parent_term_id } = 0; + $self->_get_ancestor_terms_helper( $parent_term_id, $ids_ref, $types_ref ); + } + +} # get_ancestor_terms_helper + + + +sub _check_class { + my ( $self, $value, $expected_class ) = @_; + + if ( ! defined( $value ) ) { + $self->throw( "Found [undef] where [$expected_class] expected" ); + } + elsif ( ! ref( $value ) ) { + $self->throw( "Found [scalar] where [$expected_class] expected" ); + } + elsif ( ! $value->isa( $expected_class ) ) { + $self->throw( "Found [" . ref( $value ) . "] where [$expected_class] expected" ); + } + +} # _check_class + + +################################################################# +# aliases +################################################################# + +*get_relationship_types = \&get_predicate_terms; + + +1;