0
|
1 # $Id: OntologyIO.pm,v 1.3.2.1 2003/03/13 02:09:19 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::OntologyIO
|
|
4 #
|
|
5 # Cared for by Hilmar Lapp <hlapp at gmx.net>
|
|
6 #
|
|
7 # Copyright Hilmar Lapp
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 #
|
|
12 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
|
|
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
|
|
14 #
|
|
15 # You may distribute this module under the same terms as perl itself.
|
|
16 # Refer to the Perl Artistic License (see the license accompanying this
|
|
17 # software package, or see http://www.perl.com/language/misc/Artistic.html)
|
|
18 # for the terms under which you may use, modify, and redistribute this module.
|
|
19 #
|
|
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
23 #
|
|
24
|
|
25 # POD documentation - main docs before the code
|
|
26
|
|
27 =head1 NAME
|
|
28
|
|
29 Bio::OntologyIO - Parser factory for Ontology formats
|
|
30
|
|
31 =head1 SYNOPSIS
|
|
32
|
|
33 use Bio::OntologyIO;
|
|
34
|
|
35 my $parser = Bio::OntologyIO->new(-format => "go", ...);
|
|
36
|
|
37 while(my $ont = $parser->next_ontology()) {
|
|
38 print "read ontology ",$ont->name()," with ",
|
|
39 scalar($ont->get_root_terms)," root terms, and ",
|
|
40 scalar($ont->get_leaf_terms)," leaf terms\n";
|
|
41 }
|
|
42
|
|
43 =head1 DESCRIPTION
|
|
44
|
|
45 This is the parser factory for different ontology sources and
|
|
46 formats. Conceptually, it is very similar to L<Bio::SeqIO>, but the
|
|
47 difference is that the chunk of data returned as an object is an
|
|
48 entire ontology.
|
|
49
|
|
50 =head1 FEEDBACK
|
|
51
|
|
52 =head2 Mailing Lists
|
|
53
|
|
54 User feedback is an integral part of the evolution of this and other
|
|
55 Bioperl modules. Send your comments and suggestions preferably to
|
|
56 the Bioperl mailing list. Your participation is much appreciated.
|
|
57
|
|
58 bioperl-l@bioperl.org - General discussion
|
|
59 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
60
|
|
61 =head2 Reporting Bugs
|
|
62
|
|
63 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
64 of the bugs and their resolution. Bug reports can be submitted via
|
|
65 the web:
|
|
66
|
|
67 http://bugzilla.bioperl.org/
|
|
68
|
|
69 =head1 AUTHOR - Hilmar Lapp
|
|
70
|
|
71 Email hlapp at gmx.net
|
|
72
|
|
73 Describe contact details here
|
|
74
|
|
75 =head1 CONTRIBUTORS
|
|
76
|
|
77 Additional contributors names and emails here
|
|
78
|
|
79 =head1 APPENDIX
|
|
80
|
|
81 The rest of the documentation details each of the object methods.
|
|
82 Internal methods are usually preceded with a _
|
|
83
|
|
84 =cut
|
|
85
|
|
86
|
|
87 # Let the code begin...
|
|
88
|
|
89
|
|
90 package Bio::OntologyIO;
|
|
91 use vars qw(@ISA);
|
|
92 use strict;
|
|
93
|
|
94 # Object preamble - inherits from Bio::Root::Root
|
|
95
|
|
96 use Bio::Root::Root;
|
|
97 use Bio::Root::IO;
|
|
98
|
|
99 @ISA = qw(Bio::Root::Root Bio::Root::IO);
|
|
100
|
|
101 #
|
|
102 # Maps from format name to driver suitable for the format.
|
|
103 #
|
|
104 my %format_driver_map = (
|
|
105 "go" => "goflat",
|
|
106 "so" => "soflat",
|
|
107 "interpro" => "InterProParser",
|
|
108 );
|
|
109
|
|
110 =head2 new
|
|
111
|
|
112 Title : new
|
|
113 Usage : my $parser = Bio::OntologyIO->new(-format => 'go', @args);
|
|
114 Function: Returns a stream of ontologies opened on the specified input
|
|
115 for the specified format.
|
|
116 Returns : An ontology parser (an instance of Bio::OntologyIO) initialized
|
|
117 for the specified format.
|
|
118 Args : Named parameters. Common parameters are
|
|
119
|
|
120 -format - the format of the input; supported right now are
|
|
121 'go' (synonymous with goflat), 'so' (synonymous
|
|
122 with soflat), and 'interpro'
|
|
123 -file - the file holding the data
|
|
124 -fh - the stream providing the data (-file and -fh are
|
|
125 mutually exclusive)
|
|
126 -ontology_name - the name of the ontology
|
|
127 -engine - the L<Bio::Ontology::OntologyEngineI> object
|
|
128 to be reused (will be created otherwise); note
|
|
129 that every L<Bio::Ontology::OntologyI> will
|
|
130 qualify as well since that one inherits from the
|
|
131 former.
|
|
132 -term_factory - the ontology term factory to use. Provide a
|
|
133 value only if you know what you are doing.
|
|
134
|
|
135 DAG-Edit flat file parsers will usually also accept the
|
|
136 following parameters.
|
|
137
|
|
138 -defs_file - the name of the file holding the term
|
|
139 definitions
|
|
140 -files - an array ref holding the file names (for GO,
|
|
141 there will usually be 3 files: component.ontology,
|
|
142 function.ontology, process.ontology)
|
|
143
|
|
144 Other parameters are specific to the parsers.
|
|
145
|
|
146 =cut
|
|
147
|
|
148 sub new {
|
|
149 my ($caller,@args) = @_;
|
|
150 my $class = ref($caller) || $caller;
|
|
151
|
|
152 # or do we want to call SUPER on an object if $caller is an
|
|
153 # object?
|
|
154 if( $class =~ /Bio::OntologyIO::(\S+)/ ) {
|
|
155 my ($self) = $class->SUPER::new(@args);
|
|
156 $self->_initialize(@args);
|
|
157 return $self;
|
|
158 } else {
|
|
159
|
|
160 my %param = @args;
|
|
161 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
|
|
162 my $format = $class->_map_format($param{'-format'});
|
|
163
|
|
164 # normalize capitalization
|
|
165 return undef unless( $class->_load_format_module($format) );
|
|
166 return "Bio::OntologyIO::$format"->new(@args);
|
|
167 }
|
|
168 }
|
|
169
|
|
170 sub _initialize {
|
|
171 my($self, @args) = @_;
|
|
172
|
|
173 # initialize factories etc
|
|
174 my ($eng,$fact,$ontname) =
|
|
175 $self->_rearrange([qw(TERM_FACTORY)
|
|
176 ], @args);
|
|
177
|
|
178 # term object factory
|
|
179 $self->term_factory($fact) if $fact;
|
|
180
|
|
181 # initialize the Bio::Root::IO part
|
|
182 $self->_initialize_io(@args);
|
|
183 }
|
|
184
|
|
185 =head2 next_ontology
|
|
186
|
|
187 Title : next_ontology
|
|
188 Usage : $ont = $stream->next_ontology()
|
|
189 Function: Reads the next ontology object from the stream and returns it.
|
|
190 Returns : a L<Bio::Ontology::OntologyI> compliant object, or undef at the
|
|
191 end of the stream
|
|
192 Args : none
|
|
193
|
|
194
|
|
195 =cut
|
|
196
|
|
197 sub next_ontology {
|
|
198 shift->throw_not_implemented();
|
|
199 }
|
|
200
|
|
201 =head2 term_factory
|
|
202
|
|
203 Title : term_factory
|
|
204 Usage : $obj->term_factory($newval)
|
|
205 Function: Get/set the ontology term factory to use.
|
|
206
|
|
207 As a user of this module it is not necessary to call this
|
|
208 method as there will be default. In order to change the
|
|
209 default, the easiest way is to instantiate
|
|
210 L<Bio::Ontology::TermFactory> with the proper -type
|
|
211 argument. Most if not all parsers will actually use this
|
|
212 very implementation, so even easier than the aforementioned
|
|
213 way is to simply call
|
|
214 $ontio->term_factory->type("Bio::Ontology::MyTerm").
|
|
215
|
|
216 Example :
|
|
217 Returns : value of term_factory (a Bio::Factory::ObjectFactoryI object)
|
|
218 Args : on set, new value (a Bio::Factory::ObjectFactoryI object, optional)
|
|
219
|
|
220
|
|
221 =cut
|
|
222
|
|
223 sub term_factory{
|
|
224 my $self = shift;
|
|
225
|
|
226 return $self->{'term_factory'} = shift if @_;
|
|
227 return $self->{'term_factory'};
|
|
228 }
|
|
229
|
|
230 =head1 Private Methods
|
|
231
|
|
232 Some of these are actually 'protected' in OO speak, which means you
|
|
233 may or will want to utilize them in a derived ontology parser, but
|
|
234 you should not call them from outside.
|
|
235
|
|
236 =cut
|
|
237
|
|
238 =head2 _load_format_module
|
|
239
|
|
240 Title : _load_format_module
|
|
241 Usage : *INTERNAL OntologyIO stuff*
|
|
242 Function: Loads up (like use) a module at run time on demand
|
|
243 Example :
|
|
244 Returns :
|
|
245 Args :
|
|
246
|
|
247 =cut
|
|
248
|
|
249 sub _load_format_module {
|
|
250 my ($self, $format) = @_;
|
|
251 my $module = "Bio::OntologyIO::" . $format;
|
|
252 my $ok;
|
|
253
|
|
254 eval {
|
|
255 $ok = $self->_load_module($module);
|
|
256 };
|
|
257 if ( $@ ) {
|
|
258 print STDERR <<END;
|
|
259 $self: $format cannot be found
|
|
260 Exception $@
|
|
261 For more information about the OntologyIO system please see the docs.
|
|
262 This includes ways of checking for formats at compile time, not run time
|
|
263 END
|
|
264 }
|
|
265 return $ok;
|
|
266 }
|
|
267
|
|
268 sub DESTROY {
|
|
269 my $self = shift;
|
|
270
|
|
271 $self->close();
|
|
272 }
|
|
273
|
|
274 sub _map_format {
|
|
275 my $self = shift;
|
|
276 my $format = shift;
|
|
277 my $mod;
|
|
278
|
|
279 if($format) {
|
|
280 $mod = $format_driver_map{lc($format)};
|
|
281 $mod = lc($format) unless $mod;
|
|
282 } else {
|
|
283 $self->throw("unable to guess ontology format, specify -format");
|
|
284 }
|
|
285 return $mod;
|
|
286 }
|
|
287
|
|
288 1;
|