0
|
1 # $GNF: projects/gi/symgene/src/perl/seqproc/Bio/OntologyIO/InterProParser.pm,v 1.5 2003/02/07 22:05:58 pdimitro Exp $
|
|
2 #
|
|
3 # BioPerl module for InterProParser
|
|
4 #
|
|
5 # Cared for by Peter Dimitrov <dimitrov@gnf.org>
|
|
6 #
|
|
7 # Copyright Peter Dimitrov
|
|
8 # (c) Peter Dimitrov, dimitrov@gnf.org, 2002.
|
|
9 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
|
|
10 #
|
|
11 # You may distribute this module under the same terms as perl itself.
|
|
12 # Refer to the Perl Artistic License (see the license accompanying this
|
|
13 # software package, or see http://www.perl.com/language/misc/Artistic.html)
|
|
14 # for the terms under which you may use, modify, and redistribute this module.
|
|
15 #
|
|
16 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
17 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
18 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
19 #
|
|
20 # POD documentation - main docs before the code
|
|
21
|
|
22 =head1 NAME
|
|
23
|
|
24 InterProParser - Parser for InterPro xml files.
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 # don't use this module directly - use Bio::OntologyIO with instead
|
|
29 my $ipp = Bio::OntologyIO->new( -format => 'interpro',
|
|
30 -file => 't/data/interpro.xml',
|
|
31 -ontology_engine => 'simple' );
|
|
32
|
|
33 =head1 DESCRIPTION
|
|
34
|
|
35 Use InterProParser to parse InterPro files in xml format. Typical
|
|
36 use is the interpro.xml file published by EBI. The xml records
|
|
37 should follow the format described in interpro.dtd, although the dtd
|
|
38 file is not needed, and the XML file will not be validated against
|
|
39 it.
|
|
40
|
|
41 =head1 FEEDBACK
|
|
42
|
|
43 =head2 Mailing Lists
|
|
44
|
|
45 User feedback is an integral part of the evolution of this and other
|
|
46 Bioperl modules. Send your comments and suggestions preferably to
|
|
47 the Bioperl mailing list. Your participation is much appreciated.
|
|
48
|
|
49 bioperl-l@bioperl.org - General discussion
|
|
50 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
51
|
|
52 =head2 Reporting Bugs
|
|
53
|
|
54 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
55 of the bugs and their resolution. Bug reports can be submitted via
|
|
56 email or the web:
|
|
57
|
|
58 bioperl-bugs@bioperl.org
|
|
59 http://bugzilla.bioperl.org/
|
|
60
|
|
61 =head1 AUTHOR - Peter Dimitrov
|
|
62
|
|
63 Email dimitrov@gnf.org
|
|
64
|
|
65 =head1 CONTRIBUTORS
|
|
66
|
|
67 Additional contributors names and emails here
|
|
68
|
|
69 =head1 APPENDIX
|
|
70
|
|
71 The rest of the documentation details each of the object methods.
|
|
72 Internal methods are usually preceded with a _
|
|
73
|
|
74 =cut
|
|
75
|
|
76
|
|
77 # Let the code begin...
|
|
78
|
|
79
|
|
80 package Bio::OntologyIO::InterProParser;
|
|
81 use vars qw(@ISA);
|
|
82 use strict;
|
|
83 #use Carp;
|
|
84 use XML::Parser::PerlSAX;
|
|
85 use Bio::Ontology::SimpleOntologyEngine;
|
|
86 use Bio::Ontology::TermFactory;
|
|
87 use Bio::OntologyIO;
|
|
88 use Bio::OntologyIO::Handlers::InterProHandler;
|
|
89
|
|
90 @ISA = qw( Bio::OntologyIO );
|
|
91
|
|
92 =head2 new
|
|
93
|
|
94 Title : new
|
|
95 Usage :
|
|
96 Function: Initializes objects needed for parsing.
|
|
97 Example : $ipp = Bio::OntologyIO::InterProParser->new(
|
|
98 -file => 't/data/interpro.xml',
|
|
99 -ontology_engine => 'simple' )
|
|
100
|
|
101 Returns : Object of class Bio::OntologyIO::InterProParser.
|
|
102 Args :
|
|
103
|
|
104 -file - file name
|
|
105 -ontology_engine - type of ontology engine. Should satisfy the
|
|
106 OntologyEngine interface requirements. Currently
|
|
107 the only option is 'simple'. In the future
|
|
108 Graph.pm based engine will be added to the
|
|
109 choices.
|
|
110
|
|
111
|
|
112 =cut
|
|
113
|
|
114 # in reality we let OntologyIO handle the first pass initialization
|
|
115 # and instead override _initialize().
|
|
116 sub _initialize{
|
|
117 my $self = shift;
|
|
118
|
|
119 $self->SUPER::_initialize(@_);
|
|
120
|
|
121 my ($eng,$eng_type,$name) =
|
|
122 $self->_rearrange([qw(ENGINE
|
|
123 ONTOLOGY_ENGINE
|
|
124 ONTOLOGY_NAME)
|
|
125 ], @_);
|
|
126
|
|
127 my $ip_h = Bio::OntologyIO::Handlers::InterProHandler->new(
|
|
128 -ontology_name => $name);
|
|
129
|
|
130 if(! $eng) {
|
|
131 if(lc($eng_type) eq 'simple') {
|
|
132 $eng = Bio::Ontology::SimpleOntologyEngine->new();
|
|
133 } else {
|
|
134 $self->throw("ontology engine type '$eng_type' ".
|
|
135 "not implemented yet");
|
|
136 }
|
|
137 }
|
|
138 if($eng->isa("Bio::Ontology::OntologyI")) {
|
|
139 $ip_h->ontology($eng);
|
|
140 $eng = $eng->engine() if $eng->can('engine');
|
|
141 }
|
|
142 $self->{_ontology_engine} = $eng;
|
|
143 $ip_h->ontology_engine($eng);
|
|
144
|
|
145 $self->{_parser} = XML::Parser::PerlSAX->new( Handler => $ip_h );
|
|
146 $self->{_interpro_handler} = $ip_h;
|
|
147
|
|
148 # default term object factory
|
|
149 $self->term_factory(Bio::Ontology::TermFactory->new(
|
|
150 -type => "Bio::Ontology::InterProTerm"))
|
|
151 unless $self->term_factory();
|
|
152 $ip_h->term_factory($self->term_factory());
|
|
153
|
|
154 }
|
|
155
|
|
156 =head2 parse
|
|
157
|
|
158 Title : parse
|
|
159 Usage :
|
|
160 Function: Performs the actual parsing.
|
|
161 Example : $ipp->parse();
|
|
162 Returns :
|
|
163 Args :
|
|
164
|
|
165
|
|
166 =cut
|
|
167
|
|
168 sub parse{
|
|
169 my $self = shift;
|
|
170
|
|
171 my $ret = $self->{_parser}->parse( Source => {
|
|
172 SystemId => $self->file() } );
|
|
173 $self->_is_parsed(1);
|
|
174 return $ret;
|
|
175 }
|
|
176
|
|
177 =head2 next_ontology
|
|
178
|
|
179 Title : next_ontology
|
|
180 Usage : $ipp->next_ontology()
|
|
181 Function: Parses the input file and returns the next InterPro ontology
|
|
182 available.
|
|
183
|
|
184 Usually there will be only one ontology returned from an
|
|
185 InterPro XML input.
|
|
186
|
|
187 Example : $ipp->next_ontology();
|
|
188 Returns : Returns the ontology as a L<Bio::Ontology::OntologyEngineI>
|
|
189 compliant object.
|
|
190 Args :
|
|
191
|
|
192
|
|
193 =cut
|
|
194
|
|
195 sub next_ontology{
|
|
196 my $self = shift;
|
|
197
|
|
198 $self->parse() unless $self->_is_parsed();
|
|
199 # there is only one ontology in an InterPro source file
|
|
200 if(exists($self->{'_ontology_engine'})) {
|
|
201 my $ont = $self->{_interpro_handler}->ontology();
|
|
202 delete $self->{_ontology_engine};
|
|
203 return $ont;
|
|
204 }
|
|
205 return undef;
|
|
206 }
|
|
207
|
|
208 =head2 _is_parsed
|
|
209
|
|
210 Title : _is_parsed
|
|
211 Usage : $obj->_is_parsed($newval)
|
|
212 Function:
|
|
213 Example :
|
|
214 Returns : value of _is_parsed (a scalar)
|
|
215 Args : on set, new value (a scalar or undef, optional)
|
|
216
|
|
217
|
|
218 =cut
|
|
219
|
|
220 sub _is_parsed{
|
|
221 my $self = shift;
|
|
222
|
|
223 return $self->{'_is_parsed'} = shift if @_;
|
|
224 return $self->{'_is_parsed'};
|
|
225 }
|
|
226
|
|
227 =head2 secondary_accessions_map
|
|
228
|
|
229 Title : secondary_accessions_map
|
|
230 Usage : $obj->secondary_accessions_map()
|
|
231 Function: This method is merely for convenience, and one should
|
|
232 normally use the InterProTerm secondary_ids method to access
|
|
233 the secondary accessions.
|
|
234 Example : $map = $interpro_parser->secondary_accessions_map;
|
|
235 Returns : Reference to a hash that maps InterPro identifier to an
|
|
236 array reference of secondary accessions following the InterPro
|
|
237 xml schema.
|
|
238 Args : Empty hash reference
|
|
239
|
|
240 =cut
|
|
241
|
|
242 sub secondary_accessions_map{
|
|
243 my ($self) = @_;
|
|
244
|
|
245 return $self->{_interpro_handler}->{secondary_accessions_map};
|
|
246 }
|
|
247
|
|
248 1;
|