Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/OntologyIO/dagflat.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: dagflat.pm,v 1.2.2.6 2003/06/30 05:04:06 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::OntologyIO::dagflat | |
4 # | |
5 # Cared for by Hilmar Lapp, hlapp at gmx.net | |
6 # | |
7 # (c) Christian M. Zmasek, czmasek@gnf.org, 2002. | |
8 # (c) Hilmar Lapp, hlapp at gmx.net, 2003. | |
9 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. | |
10 # | |
11 # You may distribute this module under the same terms as perl itself. | |
12 # Refer to the Perl Artistic License (see the license accompanying this | |
13 # software package, or see http://www.perl.com/language/misc/Artistic.html) | |
14 # for the terms under which you may use, modify, and redistribute this module. | |
15 # | |
16 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED | |
17 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | |
18 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
19 # | |
20 # You may distribute this module under the same terms as perl itself | |
21 | |
22 # POD documentation - main docs before the code | |
23 | |
24 =head1 NAME | |
25 | |
26 dagflat - a base class parser for GO flat-file type formats | |
27 | |
28 =head1 SYNOPSIS | |
29 | |
30 use Bio::OntologyIO; | |
31 | |
32 # do not use directly -- use via Bio::OntologyIO | |
33 # e.g., the GO parser is a simple extension of this class | |
34 my $parser = Bio::OntologyIO->new | |
35 ( -format => "go", | |
36 -defs_file => "/home/czmasek/GO/GO.defs", | |
37 -files => ["/home/czmasek/GO/component.ontology", | |
38 "/home/czmasek/GO/function.ontology", | |
39 "/home/czmasek/GO/process.ontology"] ); | |
40 | |
41 my $go_ontology = $parser->next_ontology(); | |
42 | |
43 my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" ); | |
44 my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" ); | |
45 | |
46 =head1 DESCRIPTION | |
47 | |
48 Needs Graph.pm from CPAN. | |
49 | |
50 =head1 FEEDBACK | |
51 | |
52 =head2 Mailing Lists | |
53 | |
54 User feedback is an integral part of the evolution of this and other | |
55 Bioperl modules. Send your comments and suggestions preferably to the | |
56 Bioperl mailing lists Your participation is much appreciated. | |
57 | |
58 bioperl-l@bioperl.org - General discussion | |
59 http://bio.perl.org/MailList.html - About the mailing lists | |
60 | |
61 =head2 Reporting Bugs | |
62 | |
63 report bugs to the Bioperl bug tracking system to help us keep track | |
64 the bugs and their resolution. Bug reports can be submitted via | |
65 email or the web: | |
66 | |
67 bioperl-bugs@bio.perl.org | |
68 http://bugzilla.bioperl.org/ | |
69 | |
70 =head1 AUTHOR | |
71 | |
72 Christian M. Zmasek | |
73 | |
74 Email: czmasek@gnf.org or cmzmasek@yahoo.com | |
75 | |
76 WWW: http://www.genetics.wustl.edu/eddy/people/zmasek/ | |
77 | |
78 Address: | |
79 | |
80 Genomics Institute of the Novartis Research Foundation | |
81 10675 John Jay Hopkins Drive | |
82 San Diego, CA 92121 | |
83 | |
84 =head2 CONTRIBUTOR | |
85 | |
86 Hilmar Lapp, hlapp at gmx.net | |
87 | |
88 =head1 APPENDIX | |
89 | |
90 The rest of the documentation details each of the object | |
91 methods. Internal methods are usually preceded with a _ | |
92 | |
93 =cut | |
94 | |
95 | |
96 # Let the code begin... | |
97 | |
98 | |
99 package Bio::OntologyIO::dagflat; | |
100 | |
101 use vars qw( @ISA ); | |
102 use strict; | |
103 | |
104 use Bio::Root::IO; | |
105 use Bio::Ontology::SimpleGOEngine; | |
106 use Bio::Ontology::Ontology; | |
107 use Bio::Ontology::TermFactory; | |
108 use Bio::OntologyIO; | |
109 | |
110 use constant TRUE => 1; | |
111 use constant FALSE => 0; | |
112 | |
113 | |
114 @ISA = qw( Bio::OntologyIO ); | |
115 | |
116 | |
117 =head2 new | |
118 | |
119 Title : new | |
120 Usage : $parser = Bio::OntologyIO->new( | |
121 -format => "go", | |
122 -defs_file => "/path/to/GO.defs", | |
123 -files => ["/path/to/component.ontology", | |
124 "/path/to/function.ontology", | |
125 "/path/to/process.ontology"] ); | |
126 Function: Creates a new dagflat parser. | |
127 Returns : A new dagflat parser object, implementing L<Bio::OntologyIO>. | |
128 Args : -defs_file => the name of the file holding the term | |
129 definitions | |
130 -files => a single ontology flat file holding the | |
131 term relationships, or an array ref holding | |
132 the file names (for GO, there will usually be | |
133 3 files: component.ontology, function.ontology, | |
134 process.ontology) | |
135 -file => if there is only a single flat file, it may | |
136 also be specified via the -file parameter | |
137 -ontology_name => the name of the ontology; if not specified the | |
138 parser will auto-discover it by using the term | |
139 that starts with a '$', and converting underscores | |
140 to spaces | |
141 -engine => the Bio::Ontology::OntologyEngineI object | |
142 to be reused (will be created otherwise); note | |
143 that every Bio::Ontology::OntologyI will | |
144 qualify as well since that one inherits from the | |
145 former. | |
146 | |
147 =cut | |
148 | |
149 # in reality, we let OntologyIO::new do the instantiation, and override | |
150 # _initialize for all initialization work | |
151 sub _initialize { | |
152 my ($self, @args) = @_; | |
153 | |
154 $self->SUPER::_initialize( @args ); | |
155 | |
156 my ( $defs_file_name,$files,$name,$eng ) = | |
157 $self->_rearrange([qw( DEFS_FILE | |
158 FILES | |
159 ONTOLOGY_NAME | |
160 ENGINE) | |
161 ], | |
162 @args ); | |
163 | |
164 $self->_done( FALSE ); | |
165 $self->_not_first_record( FALSE ); | |
166 $self->_term( "" ); | |
167 delete $self->{'_ontologies'}; | |
168 | |
169 # ontology engine (and possibly name if it's an OntologyI) | |
170 $eng = Bio::Ontology::SimpleGOEngine->new() unless $eng; | |
171 if($eng->isa("Bio::Ontology::OntologyI")) { | |
172 $self->ontology_name($eng->name()); | |
173 $eng = $eng->engine() if $eng->can('engine'); | |
174 } | |
175 $self->_ont_engine($eng); | |
176 | |
177 # flat files to parse | |
178 $self->defs_file( $defs_file_name ); | |
179 $self->{_flat_files} = $files ? ref($files) ? $files : [$files] : []; | |
180 | |
181 # ontology name (overrides implicit one through OntologyI engine) | |
182 $self->ontology_name($name) if $name; | |
183 | |
184 } # _initialize | |
185 | |
186 =head2 ontology_name | |
187 | |
188 Title : ontology_name | |
189 Usage : $obj->ontology_name($newval) | |
190 Function: Get/set the name of the ontology parsed by this module. | |
191 Example : | |
192 Returns : value of ontology_name (a scalar) | |
193 Args : on set, new value (a scalar or undef, optional) | |
194 | |
195 | |
196 =cut | |
197 | |
198 sub ontology_name{ | |
199 my $self = shift; | |
200 | |
201 return $self->{'ontology_name'} = shift if @_; | |
202 return $self->{'ontology_name'}; | |
203 } | |
204 | |
205 | |
206 =head2 parse | |
207 | |
208 Title : parse() | |
209 Usage : $parser->parse(); | |
210 Function: Parses the files set with "new" or with methods | |
211 defs_file and _flat_files. | |
212 | |
213 Normally you should not need to call this method as it will | |
214 be called automatically upon the first call to | |
215 next_ontology(). | |
216 | |
217 Returns : [Bio::Ontology::OntologyEngineI] | |
218 Args : | |
219 | |
220 =cut | |
221 | |
222 sub parse { | |
223 my $self = shift; | |
224 | |
225 # setup the default term factory if not done by anyone yet | |
226 $self->term_factory(Bio::Ontology::TermFactory->new( | |
227 -type => "Bio::Ontology::Term")) | |
228 unless $self->term_factory(); | |
229 | |
230 # create the ontology object itself | |
231 my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(), | |
232 -engine => $self->_ont_engine()); | |
233 | |
234 # parse definitions | |
235 while( my $term = $self->_next_term() ) { | |
236 $self->_add_term( $term, $ont ); | |
237 } | |
238 | |
239 # set up the ontology of the relationship types | |
240 foreach ($self->_part_of_relationship(), $self->_is_a_relationship()) { | |
241 $_->ontology($ont); | |
242 } | |
243 | |
244 # pre-seed the IO system with the first flat file if -file wasn't provided | |
245 if(! $self->_fh) { | |
246 $self->_initialize_io(-file => shift(@{$self->_flat_files()})); | |
247 } | |
248 | |
249 while($self->_fh) { | |
250 $self->_parse_flat_file($ont); | |
251 # advance to next flat file if more are available | |
252 if(@{$self->_flat_files()}) { | |
253 $self->close(); | |
254 $self->_initialize_io(-file => shift(@{$self->_flat_files()})); | |
255 } else { | |
256 last; # nothing else to parse so terminate the loop | |
257 } | |
258 } | |
259 $self->_add_ontology($ont); | |
260 | |
261 # not needed anywhere, only because of backward compatibility | |
262 return $self->_ont_engine(); | |
263 | |
264 } # parse | |
265 | |
266 =head2 next_ontology | |
267 | |
268 Title : next_ontology | |
269 Usage : | |
270 Function: Get the next available ontology from the parser. This is the | |
271 method prescribed by Bio::OntologyIO. | |
272 Example : | |
273 Returns : An object implementing Bio::Ontology::OntologyI, and undef if | |
274 there is no more ontology in the input. | |
275 Args : | |
276 | |
277 | |
278 =cut | |
279 | |
280 sub next_ontology{ | |
281 my $self = shift; | |
282 | |
283 # parse if not done already | |
284 $self->parse() unless exists($self->{'_ontologies'}); | |
285 # return next available ontology | |
286 return shift(@{$self->{'_ontologies'}}) if exists($self->{'_ontologies'}); | |
287 return undef; | |
288 } | |
289 | |
290 | |
291 =head2 defs_file | |
292 | |
293 Title : defs_file | |
294 Usage : $parser->defs_file( "GO.defs" ); | |
295 Function: Set/get for the term definitions filename. | |
296 Returns : The term definitions file name [string]. | |
297 Args : On set, the term definitions file name [string] (optional). | |
298 | |
299 =cut | |
300 | |
301 sub defs_file { | |
302 my $self = shift; | |
303 | |
304 if ( @_ ) { | |
305 my $f = shift; | |
306 $self->{ "_defs_file_name" } = $f; | |
307 $self->_defs_io->close() if $self->_defs_io(); | |
308 if(defined($f)) { | |
309 $self->_defs_io( Bio::Root::IO->new( -input => $f ) ); | |
310 } | |
311 } | |
312 return $self->{ "_defs_file_name" }; | |
313 } # defs_file | |
314 | |
315 | |
316 =head2 _flat_files | |
317 | |
318 Title : _flat_files | |
319 Usage : $files_to_parse = $parser->_flat_files(); | |
320 Function: Get the array of ontology flat files that need to be parsed. | |
321 | |
322 Note that this array will decrease in elements over the | |
323 parsing process. Therefore, it's value outside of this | |
324 module will be limited. Also, be careful not to alter the | |
325 array unless you know what you are doing. | |
326 | |
327 Returns : a reference to an array of zero or more strings | |
328 Args : none | |
329 | |
330 =cut | |
331 | |
332 sub _flat_files { | |
333 my $self = shift; | |
334 | |
335 $self->{_flat_files} = [] unless exists($self->{_flat_files}); | |
336 return $self->{_flat_files}; | |
337 } | |
338 | |
339 | |
340 # INTERNAL METHODS | |
341 # ---------------- | |
342 | |
343 =head2 _defs_io | |
344 | |
345 Title : _defs_io | |
346 Usage : $obj->_defs_io($newval) | |
347 Function: Get/set the Bio::Root::IO instance representing the | |
348 definition file, if provided (see defs_file()). | |
349 Example : | |
350 Returns : value of _defs_io (a Bio::Root::IO object) | |
351 Args : on set, new value (a Bio::Root::IO object or undef, optional) | |
352 | |
353 | |
354 =cut | |
355 | |
356 sub _defs_io{ | |
357 my $self = shift; | |
358 | |
359 return $self->{'_defs_io'} = shift if @_; | |
360 return $self->{'_defs_io'}; | |
361 } | |
362 | |
363 sub _add_ontology { | |
364 my $self = shift; | |
365 $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'}); | |
366 foreach my $ont (@_) { | |
367 $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI") | |
368 unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI"); | |
369 # the ontology name may have been auto-discovered while parsing | |
370 # the file | |
371 $ont->name($self->ontology_name) unless $ont->name(); | |
372 push(@{$self->{'_ontologies'}}, $ont); | |
373 } | |
374 } | |
375 | |
376 # This simply delegates. See SimpleGOEngine. | |
377 sub _add_term { | |
378 my ( $self, $term, $ont ) = @_; | |
379 | |
380 $term->ontology($ont) if $ont && (! $term->ontology); | |
381 $self->_ont_engine()->add_term( $term ); | |
382 } # _add_term | |
383 | |
384 | |
385 | |
386 # This simply delegates. See SimpleGOEngine | |
387 sub _part_of_relationship { | |
388 my $self = shift; | |
389 | |
390 return $self->_ont_engine()->part_of_relationship(@_); | |
391 } # _part_of_relationship | |
392 | |
393 | |
394 | |
395 # This simply delegates. See SimpleGOEngine | |
396 sub _is_a_relationship { | |
397 my $self = shift; | |
398 | |
399 return $self->_ont_engine()->is_a_relationship(@_); | |
400 } # _is_a_relationship | |
401 | |
402 | |
403 | |
404 # This simply delegates. See SimpleGOEngine | |
405 sub _add_relationship { | |
406 my ( $self, $parent, $child, $type, $ont ) = @_; | |
407 | |
408 # note the triple terminology (subject,predicate,object) corresponds to | |
409 # (child,type,parent) | |
410 $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont ); | |
411 | |
412 | |
413 } # _add_relationship | |
414 | |
415 | |
416 # This simply delegates. See SimpleGOEngine | |
417 sub _has_term { | |
418 my $self = shift; | |
419 | |
420 return $self->_ont_engine()->has_term( @_ ); | |
421 } # _add_term | |
422 | |
423 | |
424 | |
425 # This parses the relationships files | |
426 sub _parse_flat_file { | |
427 my $self = shift; | |
428 my $ont = shift; | |
429 | |
430 my @stack = (); | |
431 my $prev_spaces = -1; | |
432 my $prev_term = ""; | |
433 | |
434 while( my $line = $self->_readline() ) { | |
435 | |
436 if ( $line =~ /^!/ ) { | |
437 next; | |
438 } | |
439 | |
440 my $current_term = $self->_get_first_termid( $line ); | |
441 my @isa_parents = $self->_get_isa_termids( $line ); | |
442 my @partof_parents = $self->_get_partof_termids( $line ); | |
443 my @syns = $self->_get_synonyms( $line ); | |
444 my @sec_go_ids = $self->_get_secondary_termids( $line ); | |
445 my @cross_refs = $self->_get_db_cross_refs( $line ); | |
446 | |
447 | |
448 if ( ! $self->_has_term( $current_term ) ) { | |
449 my $term =$self->_create_ont_entry($self->_get_name($line, | |
450 $current_term), | |
451 $current_term ); | |
452 $self->_add_term( $term, $ont ); | |
453 } | |
454 | |
455 my $current_term_object = $self->_ont_engine()->get_terms( $current_term ); | |
456 | |
457 $current_term_object->add_dblink( @cross_refs ); | |
458 $current_term_object->add_secondary_id( @sec_go_ids ); | |
459 $current_term_object->add_synonym( @syns ); | |
460 unless ( $line =~ /^\$/ ) { | |
461 $current_term_object->ontology( $ont ); | |
462 } | |
463 foreach my $parent ( @isa_parents ) { | |
464 if ( ! $self->_has_term( $parent ) ) { | |
465 my $term = $self->_create_ont_entry($self->_get_name($line, | |
466 $parent), | |
467 $parent ); | |
468 $self->_add_term( $term, $ont ); | |
469 } | |
470 | |
471 $self->_add_relationship( $parent, | |
472 $current_term, | |
473 $self->_is_a_relationship(), | |
474 $ont); | |
475 | |
476 } | |
477 foreach my $parent ( @partof_parents ) { | |
478 if ( ! $self->_has_term( $parent ) ) { | |
479 my $term = $self->_create_ont_entry($self->_get_name($line, | |
480 $parent), | |
481 $parent ); | |
482 $self->_add_term( $term, $ont ); | |
483 } | |
484 | |
485 $self->_add_relationship( $parent, | |
486 $current_term, | |
487 $self->_part_of_relationship(), | |
488 $ont); | |
489 } | |
490 | |
491 my $current_spaces = $self->_count_spaces( $line ); | |
492 | |
493 if ( $current_spaces != $prev_spaces ) { | |
494 | |
495 if ( $current_spaces == $prev_spaces + 1 ) { | |
496 push( @stack, $prev_term ); | |
497 } | |
498 elsif ( $current_spaces < $prev_spaces ) { | |
499 my $n = $prev_spaces - $current_spaces; | |
500 for ( my $i = 0; $i < $n; ++$i ) { | |
501 pop( @stack ); | |
502 } | |
503 } | |
504 else { | |
505 $self->throw( "format error (file ".$self->file.")" ); | |
506 } | |
507 } | |
508 | |
509 my $parent = $stack[ @stack - 1 ]; | |
510 | |
511 # add a relationship if the line isn't the one with the root term | |
512 # of the ontology (which is also the name of the ontology) | |
513 if ( index($line,'$') != 0 ) { | |
514 if ( $line !~ /^\s*[<%]/ ) { | |
515 $self->throw( "format error (file ".$self->file.")" ); | |
516 } | |
517 my $reltype = ($line =~ /^\s*</) ? | |
518 $self->_part_of_relationship() : | |
519 $self->_is_a_relationship(); | |
520 $self->_add_relationship( $parent, $current_term, $reltype, | |
521 $ont); | |
522 } | |
523 | |
524 $prev_spaces = $current_spaces; | |
525 | |
526 $prev_term = $current_term; | |
527 | |
528 } | |
529 return $ont; | |
530 } # _parse_relationships_file | |
531 | |
532 | |
533 | |
534 # Parses the 1st term id number out of line. | |
535 sub _get_first_termid { | |
536 my ( $self, $line ) = @_; | |
537 | |
538 if ( $line =~ /;\s*([A-Z]{1,8}:\d{3,})/ ) { | |
539 return $1; | |
540 } | |
541 else { | |
542 $self->throw( "format error: no term id in line \"$line\"" ); | |
543 } | |
544 | |
545 } # _get_first_termid | |
546 | |
547 | |
548 | |
549 # Parses the name out of line. | |
550 sub _get_name { | |
551 my ( $self, $line, $termid ) = @_; | |
552 | |
553 if ( $line =~ /([^;<%]+);\s*$termid/ ) { | |
554 my $name = $1; | |
555 # remove trailing and leading whitespace | |
556 $name =~ s/\s+$//; | |
557 $name =~ s/^\s+//; | |
558 # remove leading dollar character; also we default the name of the | |
559 # ontology to this name if preset to something else | |
560 if(index($name,'$') == 0) { | |
561 $name = substr($name,1); | |
562 # replace underscores by spaces for setting the ontology name | |
563 $self->ontology_name(join(" ",split(/_/,$name))) | |
564 unless $self->ontology_name(); | |
565 } | |
566 return $name; | |
567 } | |
568 else { | |
569 return undef; | |
570 } | |
571 } # _get_name | |
572 | |
573 | |
574 # Parses the synonyms out of line. | |
575 sub _get_synonyms { | |
576 my ( $self, $line ) = @_; | |
577 | |
578 my @synonyms = (); | |
579 | |
580 while ( $line =~ /synonym\s*:\s*([^;^<^%]+)/g ) { | |
581 my $syn = $1; | |
582 $syn =~ s/\s+$//; | |
583 $syn =~ s/^\s+//; | |
584 push( @synonyms, $syn ); | |
585 } | |
586 return @synonyms; | |
587 | |
588 } # _get_synonyms | |
589 | |
590 | |
591 | |
592 # Parses the db cross refs out of line. | |
593 sub _get_db_cross_refs { | |
594 my ( $self, $line ) = @_; | |
595 | |
596 my @refs = (); | |
597 | |
598 while ( $line =~ /;([^;^<^%^:]+:[^;^<^%^:]+)/g ) { | |
599 my $ref = $1; | |
600 if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) { | |
601 next; | |
602 } | |
603 $ref =~ s/\s+$//; | |
604 $ref =~ s/^\s+//; | |
605 push( @refs, $ref ); | |
606 } | |
607 return @refs; | |
608 | |
609 } | |
610 | |
611 | |
612 # Parses the secondary go ids out of a line | |
613 sub _get_secondary_termids { | |
614 my ( $self, $line ) = @_; | |
615 my @secs = (); | |
616 | |
617 while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) { | |
618 my $sec = $1; | |
619 push( @secs, $sec ); | |
620 } | |
621 return @secs; | |
622 | |
623 } # _get_secondary_termids | |
624 | |
625 | |
626 | |
627 # Parses the is a ids out of a line | |
628 sub _get_isa_termids { | |
629 my ( $self, $line ) = @_; | |
630 | |
631 my @ids = (); | |
632 | |
633 $line =~ s/[A-Z]{1,8}:\d{3,}//; | |
634 | |
635 while ( $line =~ /%[^<^,]*?([A-Z]{1,8}:\d{3,})/g ) { | |
636 push( @ids, $1 ); | |
637 } | |
638 return @ids; | |
639 } # _get_isa_termids | |
640 | |
641 | |
642 | |
643 # Parses the part of ids out of a line | |
644 sub _get_partof_termids { | |
645 my ( $self, $line ) = @_; | |
646 | |
647 my @ids = (); | |
648 | |
649 $line =~ s/[A-Z]{1,8}:\d{3,}//; | |
650 | |
651 while ( $line =~ /<[^%^,]*?([A-Z]{1,8}:\d{3,})/g ) { | |
652 push( @ids, $1 ); | |
653 } | |
654 return @ids; | |
655 | |
656 | |
657 } # _get_partof_termids | |
658 | |
659 | |
660 | |
661 | |
662 # Counts the spaces at the beginning of a line in the relationships files | |
663 sub _count_spaces { | |
664 my ( $self, $line ) = @_; | |
665 | |
666 if ( $line =~ /^(\s+)/ ) { | |
667 return length( $1 ); | |
668 } | |
669 else { | |
670 return 0; | |
671 } | |
672 } # _count_spaces | |
673 | |
674 | |
675 | |
676 | |
677 # "next" method for parsing the defintions file | |
678 sub _next_term { | |
679 my ( $self ) = @_; | |
680 | |
681 if ( ($self->_done() == TRUE) || (! $self->_defs_io())) { | |
682 return undef; | |
683 } | |
684 | |
685 my $line = ""; | |
686 my $termid = ""; | |
687 my $next_term = $self->_term(); | |
688 my $def = ""; | |
689 my $comment = ""; | |
690 my @def_refs = (); | |
691 my $isobsolete; | |
692 | |
693 while( $line = ( $self->_defs_io->_readline() ) ) { | |
694 | |
695 if ( $line !~ /\S/ | |
696 || $line =~ /^\s*!/ ) { | |
697 next; | |
698 } | |
699 elsif ( $line =~ /^\s*term:\s*(.+)/ ) { | |
700 $self->_term( $1 ); | |
701 last if $self->_not_first_record(); | |
702 $next_term = $1; | |
703 $self->_not_first_record( TRUE ); | |
704 } | |
705 elsif ( $line =~ /^\s*[a-z]{1,8}id:\s*(.+)/ ) { | |
706 $termid = $1; | |
707 } | |
708 elsif ( $line =~ /^\s*definition:\s*(.+)/ ) { | |
709 $def = $1; | |
710 $isobsolete = 1 if index($def,"OBSOLETE") == 0; | |
711 } | |
712 elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) { | |
713 push( @def_refs, $1 ); | |
714 } | |
715 elsif ( $line =~ /^\s*comment:\s*(.+)/ ) { | |
716 $comment = $1; | |
717 } | |
718 } | |
719 $self->_done( TRUE ) unless $line; # we'll come back until done | |
720 | |
721 return $self->_create_ont_entry( $next_term, $termid, $def, | |
722 $comment, \@def_refs, $isobsolete); | |
723 } # _next_term | |
724 | |
725 | |
726 | |
727 | |
728 | |
729 # Holds the GO engine to be parsed into | |
730 sub _ont_engine { | |
731 my ( $self, $value ) = @_; | |
732 | |
733 if ( defined $value ) { | |
734 $self->{ "_ont_engine" } = $value; | |
735 } | |
736 | |
737 return $self->{ "_ont_engine" }; | |
738 } # _ont_engine | |
739 | |
740 | |
741 | |
742 | |
743 # Used to create ontology terms. | |
744 # Arguments: name, id | |
745 sub _create_ont_entry { | |
746 my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_; | |
747 | |
748 if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) { | |
749 $obsolete = 1; | |
750 } | |
751 my $term = $self->term_factory->create_object(-name => $name, | |
752 -identifier => $termid, | |
753 -definition => $def, | |
754 -comment => $cmt, | |
755 -dblinks => $dbxrefs, | |
756 -is_obsolete => $obsolete); | |
757 | |
758 return $term; | |
759 } # _create_ont_entry | |
760 | |
761 | |
762 | |
763 # Holds whether first record or not | |
764 sub _not_first_record { | |
765 my ( $self, $value ) = @_; | |
766 | |
767 if ( defined $value ) { | |
768 $self->{ "_not_first_record" } = $value; | |
769 } | |
770 | |
771 return $self->{ "_not_first_record" }; | |
772 } # _not_first_record | |
773 | |
774 | |
775 | |
776 # Holds whether done or not | |
777 sub _done { | |
778 my ( $self, $value ) = @_; | |
779 | |
780 if ( defined $value ) { | |
781 $self->{ "_done" } = $value; | |
782 } | |
783 | |
784 return $self->{ "_done" }; | |
785 } # _done | |
786 | |
787 | |
788 # Holds a term. | |
789 sub _term { | |
790 my ( $self, $value ) = @_; | |
791 | |
792 if ( defined $value ) { | |
793 $self->{ "_term" } = $value; | |
794 } | |
795 | |
796 return $self->{ "_term" }; | |
797 } # _term | |
798 | |
799 |