diff variant_effect_predictor/Bio/OntologyIO/InterProParser.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/OntologyIO/InterProParser.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,248 @@
+# $GNF: projects/gi/symgene/src/perl/seqproc/Bio/OntologyIO/InterProParser.pm,v 1.5 2003/02/07 22:05:58 pdimitro Exp $
+#
+# BioPerl module for InterProParser
+#
+# Cared for by Peter Dimitrov <dimitrov@gnf.org>
+#
+# Copyright Peter Dimitrov
+# (c) Peter Dimitrov, dimitrov@gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+InterProParser - Parser for InterPro xml files.
+
+=head1 SYNOPSIS
+
+    # don't use this module directly - use Bio::OntologyIO with instead
+    my $ipp = Bio::OntologyIO->new( -format  => 'interpro',
+                                    -file    => 't/data/interpro.xml',
+                                    -ontology_engine => 'simple' );
+
+=head1 DESCRIPTION
+
+  Use InterProParser to parse InterPro files in xml format. Typical
+  use is the interpro.xml file published by EBI. The xml records
+  should follow the format described in interpro.dtd, although the dtd
+  file is not needed, and the XML file will not be validated against
+  it.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Peter Dimitrov
+
+Email dimitrov@gnf.org
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::OntologyIO::InterProParser;
+use vars qw(@ISA);
+use strict;
+#use Carp;
+use XML::Parser::PerlSAX;
+use Bio::Ontology::SimpleOntologyEngine;
+use Bio::Ontology::TermFactory;
+use Bio::OntologyIO;
+use Bio::OntologyIO::Handlers::InterProHandler;
+
+@ISA = qw( Bio::OntologyIO );
+
+=head2 new
+
+ Title   : new
+ Usage   :
+ Function: Initializes objects needed for parsing.
+ Example : $ipp = Bio::OntologyIO::InterProParser->new( 
+                                  -file => 't/data/interpro.xml',
+				  -ontology_engine => 'simple' )
+
+ Returns : Object of class Bio::OntologyIO::InterProParser.
+ Args    :
+
+  -file            - file name
+  -ontology_engine - type of ontology engine. Should satisfy the
+                     OntologyEngine interface requirements. Currently
+                     the only option is 'simple'. In the future
+                     Graph.pm based engine will be added to the
+                     choices.
+
+
+=cut
+
+# in reality we let OntologyIO handle the first pass initialization
+# and instead override _initialize().
+sub _initialize{
+    my $self = shift;
+
+    $self->SUPER::_initialize(@_);
+
+    my ($eng,$eng_type,$name) =
+	$self->_rearrange([qw(ENGINE
+			      ONTOLOGY_ENGINE
+			      ONTOLOGY_NAME)
+			   ], @_);
+
+    my $ip_h = Bio::OntologyIO::Handlers::InterProHandler->new(
+			                             -ontology_name => $name);
+
+    if(! $eng) {
+	if(lc($eng_type) eq 'simple') {
+	    $eng = Bio::Ontology::SimpleOntologyEngine->new();
+	} else {
+	    $self->throw("ontology engine type '$eng_type' ".
+			 "not implemented yet");
+	}
+    }
+    if($eng->isa("Bio::Ontology::OntologyI")) {
+	$ip_h->ontology($eng);
+	$eng = $eng->engine() if $eng->can('engine');
+    }
+    $self->{_ontology_engine} = $eng;
+    $ip_h->ontology_engine($eng);
+
+    $self->{_parser} = XML::Parser::PerlSAX->new( Handler => $ip_h );
+    $self->{_interpro_handler} = $ip_h;
+
+    # default term object factory
+    $self->term_factory(Bio::Ontology::TermFactory->new(
+				   -type => "Bio::Ontology::InterProTerm"))
+	unless $self->term_factory();
+    $ip_h->term_factory($self->term_factory());
+
+}
+
+=head2 parse
+
+ Title   : parse
+ Usage   :
+ Function: Performs the actual parsing.
+ Example : $ipp->parse();
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub parse{
+   my $self = shift;
+
+   my $ret = $self->{_parser}->parse( Source => {
+       SystemId => $self->file() } );
+   $self->_is_parsed(1);
+   return $ret;
+}
+
+=head2 next_ontology
+
+ Title   : next_ontology
+ Usage   : $ipp->next_ontology()
+ Function: Parses the input file and returns the next InterPro ontology
+           available.
+
+           Usually there will be only one ontology returned from an
+           InterPro XML input.
+
+ Example : $ipp->next_ontology();
+ Returns : Returns the ontology as a L<Bio::Ontology::OntologyEngineI>
+           compliant object.
+ Args    : 
+
+
+=cut
+
+sub next_ontology{
+  my $self = shift;
+
+  $self->parse() unless $self->_is_parsed();
+  # there is only one ontology in an InterPro source file
+  if(exists($self->{'_ontology_engine'})) {
+      my $ont = $self->{_interpro_handler}->ontology();
+      delete $self->{_ontology_engine};
+      return $ont;
+  }
+  return undef;
+}
+
+=head2 _is_parsed
+
+ Title   : _is_parsed
+ Usage   : $obj->_is_parsed($newval)
+ Function: 
+ Example : 
+ Returns : value of _is_parsed (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub _is_parsed{
+    my $self = shift;
+
+    return $self->{'_is_parsed'} = shift if @_;
+    return $self->{'_is_parsed'};
+}
+
+=head2 secondary_accessions_map
+
+ Title   : secondary_accessions_map
+ Usage   : $obj->secondary_accessions_map()
+ Function:  This method is merely for convenience, and one should
+ normally use the InterProTerm secondary_ids method to access
+ the secondary accessions.
+ Example : $map = $interpro_parser->secondary_accessions_map;
+ Returns : Reference to a hash that maps InterPro identifier to an
+  array reference of secondary accessions following the InterPro
+ xml schema.
+ Args    : Empty hash reference
+
+=cut
+
+sub secondary_accessions_map{
+  my ($self) = @_;
+
+  return $self->{_interpro_handler}->{secondary_accessions_map};
+}
+
+1;