0
|
1 # $Id: dagflat.pm,v 1.2.2.6 2003/06/30 05:04:06 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::OntologyIO::dagflat
|
|
4 #
|
|
5 # Cared for by Hilmar Lapp, hlapp at gmx.net
|
|
6 #
|
|
7 # (c) Christian M. Zmasek, czmasek@gnf.org, 2002.
|
|
8 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
|
|
9 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
|
|
10 #
|
|
11 # You may distribute this module under the same terms as perl itself.
|
|
12 # Refer to the Perl Artistic License (see the license accompanying this
|
|
13 # software package, or see http://www.perl.com/language/misc/Artistic.html)
|
|
14 # for the terms under which you may use, modify, and redistribute this module.
|
|
15 #
|
|
16 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
17 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
18 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
19 #
|
|
20 # You may distribute this module under the same terms as perl itself
|
|
21
|
|
22 # POD documentation - main docs before the code
|
|
23
|
|
24 =head1 NAME
|
|
25
|
|
26 dagflat - a base class parser for GO flat-file type formats
|
|
27
|
|
28 =head1 SYNOPSIS
|
|
29
|
|
30 use Bio::OntologyIO;
|
|
31
|
|
32 # do not use directly -- use via Bio::OntologyIO
|
|
33 # e.g., the GO parser is a simple extension of this class
|
|
34 my $parser = Bio::OntologyIO->new
|
|
35 ( -format => "go",
|
|
36 -defs_file => "/home/czmasek/GO/GO.defs",
|
|
37 -files => ["/home/czmasek/GO/component.ontology",
|
|
38 "/home/czmasek/GO/function.ontology",
|
|
39 "/home/czmasek/GO/process.ontology"] );
|
|
40
|
|
41 my $go_ontology = $parser->next_ontology();
|
|
42
|
|
43 my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
|
|
44 my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
|
|
45
|
|
46 =head1 DESCRIPTION
|
|
47
|
|
48 Needs Graph.pm from CPAN.
|
|
49
|
|
50 =head1 FEEDBACK
|
|
51
|
|
52 =head2 Mailing Lists
|
|
53
|
|
54 User feedback is an integral part of the evolution of this and other
|
|
55 Bioperl modules. Send your comments and suggestions preferably to the
|
|
56 Bioperl mailing lists Your participation is much appreciated.
|
|
57
|
|
58 bioperl-l@bioperl.org - General discussion
|
|
59 http://bio.perl.org/MailList.html - About the mailing lists
|
|
60
|
|
61 =head2 Reporting Bugs
|
|
62
|
|
63 report bugs to the Bioperl bug tracking system to help us keep track
|
|
64 the bugs and their resolution. Bug reports can be submitted via
|
|
65 email or the web:
|
|
66
|
|
67 bioperl-bugs@bio.perl.org
|
|
68 http://bugzilla.bioperl.org/
|
|
69
|
|
70 =head1 AUTHOR
|
|
71
|
|
72 Christian M. Zmasek
|
|
73
|
|
74 Email: czmasek@gnf.org or cmzmasek@yahoo.com
|
|
75
|
|
76 WWW: http://www.genetics.wustl.edu/eddy/people/zmasek/
|
|
77
|
|
78 Address:
|
|
79
|
|
80 Genomics Institute of the Novartis Research Foundation
|
|
81 10675 John Jay Hopkins Drive
|
|
82 San Diego, CA 92121
|
|
83
|
|
84 =head2 CONTRIBUTOR
|
|
85
|
|
86 Hilmar Lapp, hlapp at gmx.net
|
|
87
|
|
88 =head1 APPENDIX
|
|
89
|
|
90 The rest of the documentation details each of the object
|
|
91 methods. Internal methods are usually preceded with a _
|
|
92
|
|
93 =cut
|
|
94
|
|
95
|
|
96 # Let the code begin...
|
|
97
|
|
98
|
|
99 package Bio::OntologyIO::dagflat;
|
|
100
|
|
101 use vars qw( @ISA );
|
|
102 use strict;
|
|
103
|
|
104 use Bio::Root::IO;
|
|
105 use Bio::Ontology::SimpleGOEngine;
|
|
106 use Bio::Ontology::Ontology;
|
|
107 use Bio::Ontology::TermFactory;
|
|
108 use Bio::OntologyIO;
|
|
109
|
|
110 use constant TRUE => 1;
|
|
111 use constant FALSE => 0;
|
|
112
|
|
113
|
|
114 @ISA = qw( Bio::OntologyIO );
|
|
115
|
|
116
|
|
117 =head2 new
|
|
118
|
|
119 Title : new
|
|
120 Usage : $parser = Bio::OntologyIO->new(
|
|
121 -format => "go",
|
|
122 -defs_file => "/path/to/GO.defs",
|
|
123 -files => ["/path/to/component.ontology",
|
|
124 "/path/to/function.ontology",
|
|
125 "/path/to/process.ontology"] );
|
|
126 Function: Creates a new dagflat parser.
|
|
127 Returns : A new dagflat parser object, implementing L<Bio::OntologyIO>.
|
|
128 Args : -defs_file => the name of the file holding the term
|
|
129 definitions
|
|
130 -files => a single ontology flat file holding the
|
|
131 term relationships, or an array ref holding
|
|
132 the file names (for GO, there will usually be
|
|
133 3 files: component.ontology, function.ontology,
|
|
134 process.ontology)
|
|
135 -file => if there is only a single flat file, it may
|
|
136 also be specified via the -file parameter
|
|
137 -ontology_name => the name of the ontology; if not specified the
|
|
138 parser will auto-discover it by using the term
|
|
139 that starts with a '$', and converting underscores
|
|
140 to spaces
|
|
141 -engine => the Bio::Ontology::OntologyEngineI object
|
|
142 to be reused (will be created otherwise); note
|
|
143 that every Bio::Ontology::OntologyI will
|
|
144 qualify as well since that one inherits from the
|
|
145 former.
|
|
146
|
|
147 =cut
|
|
148
|
|
149 # in reality, we let OntologyIO::new do the instantiation, and override
|
|
150 # _initialize for all initialization work
|
|
151 sub _initialize {
|
|
152 my ($self, @args) = @_;
|
|
153
|
|
154 $self->SUPER::_initialize( @args );
|
|
155
|
|
156 my ( $defs_file_name,$files,$name,$eng ) =
|
|
157 $self->_rearrange([qw( DEFS_FILE
|
|
158 FILES
|
|
159 ONTOLOGY_NAME
|
|
160 ENGINE)
|
|
161 ],
|
|
162 @args );
|
|
163
|
|
164 $self->_done( FALSE );
|
|
165 $self->_not_first_record( FALSE );
|
|
166 $self->_term( "" );
|
|
167 delete $self->{'_ontologies'};
|
|
168
|
|
169 # ontology engine (and possibly name if it's an OntologyI)
|
|
170 $eng = Bio::Ontology::SimpleGOEngine->new() unless $eng;
|
|
171 if($eng->isa("Bio::Ontology::OntologyI")) {
|
|
172 $self->ontology_name($eng->name());
|
|
173 $eng = $eng->engine() if $eng->can('engine');
|
|
174 }
|
|
175 $self->_ont_engine($eng);
|
|
176
|
|
177 # flat files to parse
|
|
178 $self->defs_file( $defs_file_name );
|
|
179 $self->{_flat_files} = $files ? ref($files) ? $files : [$files] : [];
|
|
180
|
|
181 # ontology name (overrides implicit one through OntologyI engine)
|
|
182 $self->ontology_name($name) if $name;
|
|
183
|
|
184 } # _initialize
|
|
185
|
|
186 =head2 ontology_name
|
|
187
|
|
188 Title : ontology_name
|
|
189 Usage : $obj->ontology_name($newval)
|
|
190 Function: Get/set the name of the ontology parsed by this module.
|
|
191 Example :
|
|
192 Returns : value of ontology_name (a scalar)
|
|
193 Args : on set, new value (a scalar or undef, optional)
|
|
194
|
|
195
|
|
196 =cut
|
|
197
|
|
198 sub ontology_name{
|
|
199 my $self = shift;
|
|
200
|
|
201 return $self->{'ontology_name'} = shift if @_;
|
|
202 return $self->{'ontology_name'};
|
|
203 }
|
|
204
|
|
205
|
|
206 =head2 parse
|
|
207
|
|
208 Title : parse()
|
|
209 Usage : $parser->parse();
|
|
210 Function: Parses the files set with "new" or with methods
|
|
211 defs_file and _flat_files.
|
|
212
|
|
213 Normally you should not need to call this method as it will
|
|
214 be called automatically upon the first call to
|
|
215 next_ontology().
|
|
216
|
|
217 Returns : [Bio::Ontology::OntologyEngineI]
|
|
218 Args :
|
|
219
|
|
220 =cut
|
|
221
|
|
222 sub parse {
|
|
223 my $self = shift;
|
|
224
|
|
225 # setup the default term factory if not done by anyone yet
|
|
226 $self->term_factory(Bio::Ontology::TermFactory->new(
|
|
227 -type => "Bio::Ontology::Term"))
|
|
228 unless $self->term_factory();
|
|
229
|
|
230 # create the ontology object itself
|
|
231 my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(),
|
|
232 -engine => $self->_ont_engine());
|
|
233
|
|
234 # parse definitions
|
|
235 while( my $term = $self->_next_term() ) {
|
|
236 $self->_add_term( $term, $ont );
|
|
237 }
|
|
238
|
|
239 # set up the ontology of the relationship types
|
|
240 foreach ($self->_part_of_relationship(), $self->_is_a_relationship()) {
|
|
241 $_->ontology($ont);
|
|
242 }
|
|
243
|
|
244 # pre-seed the IO system with the first flat file if -file wasn't provided
|
|
245 if(! $self->_fh) {
|
|
246 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
|
|
247 }
|
|
248
|
|
249 while($self->_fh) {
|
|
250 $self->_parse_flat_file($ont);
|
|
251 # advance to next flat file if more are available
|
|
252 if(@{$self->_flat_files()}) {
|
|
253 $self->close();
|
|
254 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
|
|
255 } else {
|
|
256 last; # nothing else to parse so terminate the loop
|
|
257 }
|
|
258 }
|
|
259 $self->_add_ontology($ont);
|
|
260
|
|
261 # not needed anywhere, only because of backward compatibility
|
|
262 return $self->_ont_engine();
|
|
263
|
|
264 } # parse
|
|
265
|
|
266 =head2 next_ontology
|
|
267
|
|
268 Title : next_ontology
|
|
269 Usage :
|
|
270 Function: Get the next available ontology from the parser. This is the
|
|
271 method prescribed by Bio::OntologyIO.
|
|
272 Example :
|
|
273 Returns : An object implementing Bio::Ontology::OntologyI, and undef if
|
|
274 there is no more ontology in the input.
|
|
275 Args :
|
|
276
|
|
277
|
|
278 =cut
|
|
279
|
|
280 sub next_ontology{
|
|
281 my $self = shift;
|
|
282
|
|
283 # parse if not done already
|
|
284 $self->parse() unless exists($self->{'_ontologies'});
|
|
285 # return next available ontology
|
|
286 return shift(@{$self->{'_ontologies'}}) if exists($self->{'_ontologies'});
|
|
287 return undef;
|
|
288 }
|
|
289
|
|
290
|
|
291 =head2 defs_file
|
|
292
|
|
293 Title : defs_file
|
|
294 Usage : $parser->defs_file( "GO.defs" );
|
|
295 Function: Set/get for the term definitions filename.
|
|
296 Returns : The term definitions file name [string].
|
|
297 Args : On set, the term definitions file name [string] (optional).
|
|
298
|
|
299 =cut
|
|
300
|
|
301 sub defs_file {
|
|
302 my $self = shift;
|
|
303
|
|
304 if ( @_ ) {
|
|
305 my $f = shift;
|
|
306 $self->{ "_defs_file_name" } = $f;
|
|
307 $self->_defs_io->close() if $self->_defs_io();
|
|
308 if(defined($f)) {
|
|
309 $self->_defs_io( Bio::Root::IO->new( -input => $f ) );
|
|
310 }
|
|
311 }
|
|
312 return $self->{ "_defs_file_name" };
|
|
313 } # defs_file
|
|
314
|
|
315
|
|
316 =head2 _flat_files
|
|
317
|
|
318 Title : _flat_files
|
|
319 Usage : $files_to_parse = $parser->_flat_files();
|
|
320 Function: Get the array of ontology flat files that need to be parsed.
|
|
321
|
|
322 Note that this array will decrease in elements over the
|
|
323 parsing process. Therefore, it's value outside of this
|
|
324 module will be limited. Also, be careful not to alter the
|
|
325 array unless you know what you are doing.
|
|
326
|
|
327 Returns : a reference to an array of zero or more strings
|
|
328 Args : none
|
|
329
|
|
330 =cut
|
|
331
|
|
332 sub _flat_files {
|
|
333 my $self = shift;
|
|
334
|
|
335 $self->{_flat_files} = [] unless exists($self->{_flat_files});
|
|
336 return $self->{_flat_files};
|
|
337 }
|
|
338
|
|
339
|
|
340 # INTERNAL METHODS
|
|
341 # ----------------
|
|
342
|
|
343 =head2 _defs_io
|
|
344
|
|
345 Title : _defs_io
|
|
346 Usage : $obj->_defs_io($newval)
|
|
347 Function: Get/set the Bio::Root::IO instance representing the
|
|
348 definition file, if provided (see defs_file()).
|
|
349 Example :
|
|
350 Returns : value of _defs_io (a Bio::Root::IO object)
|
|
351 Args : on set, new value (a Bio::Root::IO object or undef, optional)
|
|
352
|
|
353
|
|
354 =cut
|
|
355
|
|
356 sub _defs_io{
|
|
357 my $self = shift;
|
|
358
|
|
359 return $self->{'_defs_io'} = shift if @_;
|
|
360 return $self->{'_defs_io'};
|
|
361 }
|
|
362
|
|
363 sub _add_ontology {
|
|
364 my $self = shift;
|
|
365 $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
|
|
366 foreach my $ont (@_) {
|
|
367 $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
|
|
368 unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
|
|
369 # the ontology name may have been auto-discovered while parsing
|
|
370 # the file
|
|
371 $ont->name($self->ontology_name) unless $ont->name();
|
|
372 push(@{$self->{'_ontologies'}}, $ont);
|
|
373 }
|
|
374 }
|
|
375
|
|
376 # This simply delegates. See SimpleGOEngine.
|
|
377 sub _add_term {
|
|
378 my ( $self, $term, $ont ) = @_;
|
|
379
|
|
380 $term->ontology($ont) if $ont && (! $term->ontology);
|
|
381 $self->_ont_engine()->add_term( $term );
|
|
382 } # _add_term
|
|
383
|
|
384
|
|
385
|
|
386 # This simply delegates. See SimpleGOEngine
|
|
387 sub _part_of_relationship {
|
|
388 my $self = shift;
|
|
389
|
|
390 return $self->_ont_engine()->part_of_relationship(@_);
|
|
391 } # _part_of_relationship
|
|
392
|
|
393
|
|
394
|
|
395 # This simply delegates. See SimpleGOEngine
|
|
396 sub _is_a_relationship {
|
|
397 my $self = shift;
|
|
398
|
|
399 return $self->_ont_engine()->is_a_relationship(@_);
|
|
400 } # _is_a_relationship
|
|
401
|
|
402
|
|
403
|
|
404 # This simply delegates. See SimpleGOEngine
|
|
405 sub _add_relationship {
|
|
406 my ( $self, $parent, $child, $type, $ont ) = @_;
|
|
407
|
|
408 # note the triple terminology (subject,predicate,object) corresponds to
|
|
409 # (child,type,parent)
|
|
410 $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
|
|
411
|
|
412
|
|
413 } # _add_relationship
|
|
414
|
|
415
|
|
416 # This simply delegates. See SimpleGOEngine
|
|
417 sub _has_term {
|
|
418 my $self = shift;
|
|
419
|
|
420 return $self->_ont_engine()->has_term( @_ );
|
|
421 } # _add_term
|
|
422
|
|
423
|
|
424
|
|
425 # This parses the relationships files
|
|
426 sub _parse_flat_file {
|
|
427 my $self = shift;
|
|
428 my $ont = shift;
|
|
429
|
|
430 my @stack = ();
|
|
431 my $prev_spaces = -1;
|
|
432 my $prev_term = "";
|
|
433
|
|
434 while( my $line = $self->_readline() ) {
|
|
435
|
|
436 if ( $line =~ /^!/ ) {
|
|
437 next;
|
|
438 }
|
|
439
|
|
440 my $current_term = $self->_get_first_termid( $line );
|
|
441 my @isa_parents = $self->_get_isa_termids( $line );
|
|
442 my @partof_parents = $self->_get_partof_termids( $line );
|
|
443 my @syns = $self->_get_synonyms( $line );
|
|
444 my @sec_go_ids = $self->_get_secondary_termids( $line );
|
|
445 my @cross_refs = $self->_get_db_cross_refs( $line );
|
|
446
|
|
447
|
|
448 if ( ! $self->_has_term( $current_term ) ) {
|
|
449 my $term =$self->_create_ont_entry($self->_get_name($line,
|
|
450 $current_term),
|
|
451 $current_term );
|
|
452 $self->_add_term( $term, $ont );
|
|
453 }
|
|
454
|
|
455 my $current_term_object = $self->_ont_engine()->get_terms( $current_term );
|
|
456
|
|
457 $current_term_object->add_dblink( @cross_refs );
|
|
458 $current_term_object->add_secondary_id( @sec_go_ids );
|
|
459 $current_term_object->add_synonym( @syns );
|
|
460 unless ( $line =~ /^\$/ ) {
|
|
461 $current_term_object->ontology( $ont );
|
|
462 }
|
|
463 foreach my $parent ( @isa_parents ) {
|
|
464 if ( ! $self->_has_term( $parent ) ) {
|
|
465 my $term = $self->_create_ont_entry($self->_get_name($line,
|
|
466 $parent),
|
|
467 $parent );
|
|
468 $self->_add_term( $term, $ont );
|
|
469 }
|
|
470
|
|
471 $self->_add_relationship( $parent,
|
|
472 $current_term,
|
|
473 $self->_is_a_relationship(),
|
|
474 $ont);
|
|
475
|
|
476 }
|
|
477 foreach my $parent ( @partof_parents ) {
|
|
478 if ( ! $self->_has_term( $parent ) ) {
|
|
479 my $term = $self->_create_ont_entry($self->_get_name($line,
|
|
480 $parent),
|
|
481 $parent );
|
|
482 $self->_add_term( $term, $ont );
|
|
483 }
|
|
484
|
|
485 $self->_add_relationship( $parent,
|
|
486 $current_term,
|
|
487 $self->_part_of_relationship(),
|
|
488 $ont);
|
|
489 }
|
|
490
|
|
491 my $current_spaces = $self->_count_spaces( $line );
|
|
492
|
|
493 if ( $current_spaces != $prev_spaces ) {
|
|
494
|
|
495 if ( $current_spaces == $prev_spaces + 1 ) {
|
|
496 push( @stack, $prev_term );
|
|
497 }
|
|
498 elsif ( $current_spaces < $prev_spaces ) {
|
|
499 my $n = $prev_spaces - $current_spaces;
|
|
500 for ( my $i = 0; $i < $n; ++$i ) {
|
|
501 pop( @stack );
|
|
502 }
|
|
503 }
|
|
504 else {
|
|
505 $self->throw( "format error (file ".$self->file.")" );
|
|
506 }
|
|
507 }
|
|
508
|
|
509 my $parent = $stack[ @stack - 1 ];
|
|
510
|
|
511 # add a relationship if the line isn't the one with the root term
|
|
512 # of the ontology (which is also the name of the ontology)
|
|
513 if ( index($line,'$') != 0 ) {
|
|
514 if ( $line !~ /^\s*[<%]/ ) {
|
|
515 $self->throw( "format error (file ".$self->file.")" );
|
|
516 }
|
|
517 my $reltype = ($line =~ /^\s*</) ?
|
|
518 $self->_part_of_relationship() :
|
|
519 $self->_is_a_relationship();
|
|
520 $self->_add_relationship( $parent, $current_term, $reltype,
|
|
521 $ont);
|
|
522 }
|
|
523
|
|
524 $prev_spaces = $current_spaces;
|
|
525
|
|
526 $prev_term = $current_term;
|
|
527
|
|
528 }
|
|
529 return $ont;
|
|
530 } # _parse_relationships_file
|
|
531
|
|
532
|
|
533
|
|
534 # Parses the 1st term id number out of line.
|
|
535 sub _get_first_termid {
|
|
536 my ( $self, $line ) = @_;
|
|
537
|
|
538 if ( $line =~ /;\s*([A-Z]{1,8}:\d{3,})/ ) {
|
|
539 return $1;
|
|
540 }
|
|
541 else {
|
|
542 $self->throw( "format error: no term id in line \"$line\"" );
|
|
543 }
|
|
544
|
|
545 } # _get_first_termid
|
|
546
|
|
547
|
|
548
|
|
549 # Parses the name out of line.
|
|
550 sub _get_name {
|
|
551 my ( $self, $line, $termid ) = @_;
|
|
552
|
|
553 if ( $line =~ /([^;<%]+);\s*$termid/ ) {
|
|
554 my $name = $1;
|
|
555 # remove trailing and leading whitespace
|
|
556 $name =~ s/\s+$//;
|
|
557 $name =~ s/^\s+//;
|
|
558 # remove leading dollar character; also we default the name of the
|
|
559 # ontology to this name if preset to something else
|
|
560 if(index($name,'$') == 0) {
|
|
561 $name = substr($name,1);
|
|
562 # replace underscores by spaces for setting the ontology name
|
|
563 $self->ontology_name(join(" ",split(/_/,$name)))
|
|
564 unless $self->ontology_name();
|
|
565 }
|
|
566 return $name;
|
|
567 }
|
|
568 else {
|
|
569 return undef;
|
|
570 }
|
|
571 } # _get_name
|
|
572
|
|
573
|
|
574 # Parses the synonyms out of line.
|
|
575 sub _get_synonyms {
|
|
576 my ( $self, $line ) = @_;
|
|
577
|
|
578 my @synonyms = ();
|
|
579
|
|
580 while ( $line =~ /synonym\s*:\s*([^;^<^%]+)/g ) {
|
|
581 my $syn = $1;
|
|
582 $syn =~ s/\s+$//;
|
|
583 $syn =~ s/^\s+//;
|
|
584 push( @synonyms, $syn );
|
|
585 }
|
|
586 return @synonyms;
|
|
587
|
|
588 } # _get_synonyms
|
|
589
|
|
590
|
|
591
|
|
592 # Parses the db cross refs out of line.
|
|
593 sub _get_db_cross_refs {
|
|
594 my ( $self, $line ) = @_;
|
|
595
|
|
596 my @refs = ();
|
|
597
|
|
598 while ( $line =~ /;([^;^<^%^:]+:[^;^<^%^:]+)/g ) {
|
|
599 my $ref = $1;
|
|
600 if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) {
|
|
601 next;
|
|
602 }
|
|
603 $ref =~ s/\s+$//;
|
|
604 $ref =~ s/^\s+//;
|
|
605 push( @refs, $ref );
|
|
606 }
|
|
607 return @refs;
|
|
608
|
|
609 }
|
|
610
|
|
611
|
|
612 # Parses the secondary go ids out of a line
|
|
613 sub _get_secondary_termids {
|
|
614 my ( $self, $line ) = @_;
|
|
615 my @secs = ();
|
|
616
|
|
617 while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) {
|
|
618 my $sec = $1;
|
|
619 push( @secs, $sec );
|
|
620 }
|
|
621 return @secs;
|
|
622
|
|
623 } # _get_secondary_termids
|
|
624
|
|
625
|
|
626
|
|
627 # Parses the is a ids out of a line
|
|
628 sub _get_isa_termids {
|
|
629 my ( $self, $line ) = @_;
|
|
630
|
|
631 my @ids = ();
|
|
632
|
|
633 $line =~ s/[A-Z]{1,8}:\d{3,}//;
|
|
634
|
|
635 while ( $line =~ /%[^<^,]*?([A-Z]{1,8}:\d{3,})/g ) {
|
|
636 push( @ids, $1 );
|
|
637 }
|
|
638 return @ids;
|
|
639 } # _get_isa_termids
|
|
640
|
|
641
|
|
642
|
|
643 # Parses the part of ids out of a line
|
|
644 sub _get_partof_termids {
|
|
645 my ( $self, $line ) = @_;
|
|
646
|
|
647 my @ids = ();
|
|
648
|
|
649 $line =~ s/[A-Z]{1,8}:\d{3,}//;
|
|
650
|
|
651 while ( $line =~ /<[^%^,]*?([A-Z]{1,8}:\d{3,})/g ) {
|
|
652 push( @ids, $1 );
|
|
653 }
|
|
654 return @ids;
|
|
655
|
|
656
|
|
657 } # _get_partof_termids
|
|
658
|
|
659
|
|
660
|
|
661
|
|
662 # Counts the spaces at the beginning of a line in the relationships files
|
|
663 sub _count_spaces {
|
|
664 my ( $self, $line ) = @_;
|
|
665
|
|
666 if ( $line =~ /^(\s+)/ ) {
|
|
667 return length( $1 );
|
|
668 }
|
|
669 else {
|
|
670 return 0;
|
|
671 }
|
|
672 } # _count_spaces
|
|
673
|
|
674
|
|
675
|
|
676
|
|
677 # "next" method for parsing the defintions file
|
|
678 sub _next_term {
|
|
679 my ( $self ) = @_;
|
|
680
|
|
681 if ( ($self->_done() == TRUE) || (! $self->_defs_io())) {
|
|
682 return undef;
|
|
683 }
|
|
684
|
|
685 my $line = "";
|
|
686 my $termid = "";
|
|
687 my $next_term = $self->_term();
|
|
688 my $def = "";
|
|
689 my $comment = "";
|
|
690 my @def_refs = ();
|
|
691 my $isobsolete;
|
|
692
|
|
693 while( $line = ( $self->_defs_io->_readline() ) ) {
|
|
694
|
|
695 if ( $line !~ /\S/
|
|
696 || $line =~ /^\s*!/ ) {
|
|
697 next;
|
|
698 }
|
|
699 elsif ( $line =~ /^\s*term:\s*(.+)/ ) {
|
|
700 $self->_term( $1 );
|
|
701 last if $self->_not_first_record();
|
|
702 $next_term = $1;
|
|
703 $self->_not_first_record( TRUE );
|
|
704 }
|
|
705 elsif ( $line =~ /^\s*[a-z]{1,8}id:\s*(.+)/ ) {
|
|
706 $termid = $1;
|
|
707 }
|
|
708 elsif ( $line =~ /^\s*definition:\s*(.+)/ ) {
|
|
709 $def = $1;
|
|
710 $isobsolete = 1 if index($def,"OBSOLETE") == 0;
|
|
711 }
|
|
712 elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) {
|
|
713 push( @def_refs, $1 );
|
|
714 }
|
|
715 elsif ( $line =~ /^\s*comment:\s*(.+)/ ) {
|
|
716 $comment = $1;
|
|
717 }
|
|
718 }
|
|
719 $self->_done( TRUE ) unless $line; # we'll come back until done
|
|
720
|
|
721 return $self->_create_ont_entry( $next_term, $termid, $def,
|
|
722 $comment, \@def_refs, $isobsolete);
|
|
723 } # _next_term
|
|
724
|
|
725
|
|
726
|
|
727
|
|
728
|
|
729 # Holds the GO engine to be parsed into
|
|
730 sub _ont_engine {
|
|
731 my ( $self, $value ) = @_;
|
|
732
|
|
733 if ( defined $value ) {
|
|
734 $self->{ "_ont_engine" } = $value;
|
|
735 }
|
|
736
|
|
737 return $self->{ "_ont_engine" };
|
|
738 } # _ont_engine
|
|
739
|
|
740
|
|
741
|
|
742
|
|
743 # Used to create ontology terms.
|
|
744 # Arguments: name, id
|
|
745 sub _create_ont_entry {
|
|
746 my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_;
|
|
747
|
|
748 if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) {
|
|
749 $obsolete = 1;
|
|
750 }
|
|
751 my $term = $self->term_factory->create_object(-name => $name,
|
|
752 -identifier => $termid,
|
|
753 -definition => $def,
|
|
754 -comment => $cmt,
|
|
755 -dblinks => $dbxrefs,
|
|
756 -is_obsolete => $obsolete);
|
|
757
|
|
758 return $term;
|
|
759 } # _create_ont_entry
|
|
760
|
|
761
|
|
762
|
|
763 # Holds whether first record or not
|
|
764 sub _not_first_record {
|
|
765 my ( $self, $value ) = @_;
|
|
766
|
|
767 if ( defined $value ) {
|
|
768 $self->{ "_not_first_record" } = $value;
|
|
769 }
|
|
770
|
|
771 return $self->{ "_not_first_record" };
|
|
772 } # _not_first_record
|
|
773
|
|
774
|
|
775
|
|
776 # Holds whether done or not
|
|
777 sub _done {
|
|
778 my ( $self, $value ) = @_;
|
|
779
|
|
780 if ( defined $value ) {
|
|
781 $self->{ "_done" } = $value;
|
|
782 }
|
|
783
|
|
784 return $self->{ "_done" };
|
|
785 } # _done
|
|
786
|
|
787
|
|
788 # Holds a term.
|
|
789 sub _term {
|
|
790 my ( $self, $value ) = @_;
|
|
791
|
|
792 if ( defined $value ) {
|
|
793 $self->{ "_term" } = $value;
|
|
794 }
|
|
795
|
|
796 return $self->{ "_term" };
|
|
797 } # _term
|
|
798
|
|
799
|