comparison variant_effect_predictor/Bio/OntologyIO/Handlers/InterProHandler.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: InterProHandler.pm,v 1.7.2.2 2003/03/27 10:07:57 lapp Exp $
2 #
3 # BioPerl module for InterProHandler
4 #
5 # Cared for by Peter Dimitrov <dimitrov@gnf.org>
6 #
7 # Copyright Peter Dimitrov
8 # (c) Peter Dimitrov, dimitrov@gnf.org, 2003.
9 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
10 #
11 # You may distribute this module under the same terms as perl itself.
12 # Refer to the Perl Artistic License (see the license accompanying this
13 # software package, or see http://www.perl.com/language/misc/Artistic.html)
14 # for the terms under which you may use, modify, and redistribute this module.
15 #
16 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
17 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
18 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19 #
20 # POD documentation - main docs before the code
21
22 =head1 NAME
23
24 InterProHandler - XML handler class for InterProParser
25
26 =head1 SYNOPSIS
27
28 # do not use directly - used and instantiated by InterProParser
29
30 =head1 DESCRIPTION
31
32 Handles xml events generated by InterProParser when parsing InterPro
33 xml files.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
42
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/MailList.shtml - About the mailing lists
45
46 =head2 Reporting Bugs
47
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 of the bugs and their resolution. Bug reports can be submitted via
50 email or the web:
51
52 bioperl-bugs@bioperl.org
53 http://bugzilla.bioperl.org/
54
55 =head1 AUTHOR - Peter Dimitrov
56
57 Email dimitrov@gnf.org
58
59 =head1 CONTRIBUTORS
60
61 Additional contributors names and emails here
62
63 =head1 APPENDIX
64
65 The rest of the documentation details each of the object methods.
66 Internal methods are usually preceded with a _
67
68 =cut
69
70
71 # Let the code begin...
72
73
74 package Bio::OntologyIO::Handlers::InterProHandler;
75 use vars qw(@ISA);
76 use strict;
77 use Carp;
78 use Bio::Root::Root;
79 use Bio::Ontology::Ontology;
80 use Bio::Ontology::RelationshipType;
81 use Bio::Ontology::SimpleOntologyEngine;
82 use Bio::Annotation::Reference;
83 use Data::Dumper;
84
85 @ISA = qw(Bio::Root::Root);
86
87 my ($record_count, $processed_count, $is_a_rel, $contains_rel, $found_in_rel);
88
89 =head2 new
90
91 Title : new
92 Usage : $h = Bio::OntologyIO::Handlers::InterProHandler->new;
93 Function: Initializes global variables
94 Example :
95 Returns : an InterProHandler object
96 Args :
97
98
99 =cut
100
101 sub new{
102 my ($class, @args) = @_;
103 my $self = $class->SUPER::new(@args);
104
105 my ($eng,$ont,$name,$fact) =
106 $self->_rearrange([qw(ENGINE
107 ONTOLOGY
108 ONTOLOGY_NAME
109 TERM_FACTORY)
110 ],@args);
111
112 if(defined($ont)) {
113 $self->ontology($ont);
114 } else {
115 $name = "InterPro" unless $name;
116 $self->ontology(Bio::Ontology::Ontology->new(-name => $name));
117 }
118 $self->ontology_engine($eng) if $eng;
119
120 $self->term_factory($fact) if $fact;
121
122 $is_a_rel = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
123 $contains_rel = Bio::Ontology::RelationshipType->get_instance( "CONTAINS" );
124 $found_in_rel = Bio::Ontology::RelationshipType->get_instance( "FOUND_IN" );
125 $self->_cite_skip(0);
126 $self->secondary_accessions_map( {} );
127
128 return $self;
129 }
130
131 =head2 ontology_engine
132
133 Title : ontology_engine
134 Usage : $obj->ontology_engine($newval)
135 Function: Get/set ontology engine. Can be initialized only once.
136 Example :
137 Returns : value of ontology_engine (a scalar)
138 Args : new value (a scalar, optional)
139
140
141 =cut
142
143 sub ontology_engine{
144 my ($self, $value) = @_;
145
146 if( defined $value) {
147 if ( defined $self->{'ontology_engine'}) {
148 $self->throw("ontology_engine already defined");
149 } else {
150 $self->throw(ref($value)." does not implement ".
151 "Bio::Ontology::OntologyEngineI. Bummer.")
152 unless $value->isa("Bio::Ontology::OntologyEngineI");
153 $self->{'ontology_engine'} = $value;
154
155 # don't forget to set this as the engine of the ontology, otherwise
156 # those two might not point to the same object
157 my $ont = $self->ontology();
158 if($ont && $ont->can("engine") && (!$ont->engine())) {
159 $ont->engine($value);
160 }
161
162 $self->debug(ref($self) .
163 "::ontology_engine: registering ontology engine (".
164 ref($value)."):\n".
165 $value->to_string."\n");
166 }
167 }
168
169 return $self->{'ontology_engine'};
170 }
171
172 =head2 ontology
173
174 Title : ontology
175 Usage :
176 Function: Get the ontology to add the InterPro terms to.
177
178 The value is determined automatically once ontology_engine
179 has been set and if it hasn't been set before.
180
181 Example :
182 Returns : A L<Bio::Ontology::OntologyI> implementing object.
183 Args : On set, a L<Bio::Ontology::OntologyI> implementing object.
184
185 =cut
186
187 sub ontology{
188 my ($self,$ont) = @_;
189
190 if(defined($ont)) {
191 $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI".
192 ". Bummer.")
193 unless $ont->isa("Bio::Ontology::OntologyI");
194 $self->{'_ontology'} = $ont;
195 }
196 return $self->{'_ontology'};
197 }
198
199 =head2 term_factory
200
201 Title : term_factory
202 Usage : $obj->term_factory($newval)
203 Function: Get/set the ontology term object factory
204 Example :
205 Returns : value of term_factory (a Bio::Factory::ObjectFactory instance)
206 Args : on set, new value (a Bio::Factory::ObjectFactory instance
207 or undef, optional)
208
209
210 =cut
211
212 sub term_factory{
213 my $self = shift;
214
215 return $self->{'term_factory'} = shift if @_;
216 return $self->{'term_factory'};
217 }
218
219 =head2 _cite_skip
220
221 Title : _cite_skip
222 Usage : $obj->_cite_skip($newval)
223 Function:
224 Example :
225 Returns : value of _cite_skip (a scalar)
226 Args : new value (a scalar, optional)
227
228
229 =cut
230
231 sub _cite_skip{
232 my ($self, $value) = @_;
233
234 if( defined $value) {
235 $self->{'_cite_skip'} = $value;
236 }
237
238 return $self->{'_cite_skip'};
239 }
240
241 =head2 _hash
242
243 Title : _hash
244 Usage : $obj->_hash($newval)
245 Function:
246 Example :
247 Returns : value of _hash (a scalar)
248 Args : new value (a scalar, optional)
249
250
251 =cut
252
253 sub _hash{
254 my ($self, $value) = @_;
255
256 if( defined $value) {
257 $self->{'_hash'} = $value;
258 }
259
260 return $self->{'_hash'};
261 }
262
263 =head2 _stack
264
265 Title : _stack
266 Usage : $obj->_stack($newval)
267 Function:
268 Example :
269 Returns : value of _stack (a scalar)
270 Args : new value (a scalar, optional)
271
272
273 =cut
274
275 sub _stack{
276 my ($self, $value) = @_;
277
278 if( defined $value) {
279 $self->{'_stack'} = $value;
280 }
281 return $self->{'_stack'};
282 }
283
284 =head2 _top
285
286 Title : _top
287 Usage :
288 Function:
289 Example :
290 Returns :
291 Args :
292
293
294 =cut
295
296 sub _top{
297 my ($self, $_stack) = @_;
298 my @stack = @{$_stack};
299
300 return (@stack >= 1) ? $stack[@stack - 1] : undef;
301 }
302
303 =head2 _term
304
305 Title : _term
306 Usage : $obj->_term($newval)
307 Function: Get/set method for the term currently processed.
308 Example :
309 Returns : value of term (a scalar)
310 Args : new value (a scalar, optional)
311
312
313 =cut
314
315 sub _term{
316 my ($self, $value) = @_;
317
318 if(defined $value) {
319 $self->{'_term'} = $value;
320 }
321
322 return $self->{'_term'};
323 }
324
325 =head2 _clear_term
326
327 Title : _clear_term
328 Usage :
329 Function: Removes the current term from the handler
330 Example :
331 Returns :
332 Args :
333
334
335 =cut
336
337 sub _clear_term{
338 my ($self) = @_;
339
340 delete $self->{'_term'};
341 }
342
343 =head2 _names
344
345 Title : _names
346 Usage : $obj->_names($newval)
347 Function:
348 Example :
349 Returns : value of _names (a scalar)
350 Args : new value (a scalar, optional)
351
352
353 =cut
354
355 sub _names{
356 my ($self, $value) = @_;
357
358 if( defined $value) {
359 $self->{'_names'} = $value;
360 }
361
362 return $self->{'_names'};
363 }
364
365 =head2 _create_relationship
366
367 Title : _create_relationship
368 Usage :
369 Function: Helper function. Adds relationships to one of the relationship stores.
370 Example :
371 Returns :
372 Args :
373
374
375 =cut
376
377 sub _create_relationship{
378 my ($self, $ref_id, $rel_type_term) = @_;
379 my $ont = $self->ontology();
380 my $fact = $self->term_factory();
381 my $term_temp = ($ont->engine->get_term_by_identifier($ref_id))[0];
382
383 my $rel = Bio::Ontology::Relationship->new( -predicate_term => $rel_type_term );
384
385 if (!defined $term_temp) {
386 $term_temp = $ont->engine->add_term( $fact->create_object( -InterPro_id => $ref_id ) );
387 $ont->engine->mark_uninstantiated($term_temp);
388 }
389 my $rel_type_name = $self->_top($self->_names);
390
391 if ($rel_type_name eq 'parent_list' || $rel_type_name eq 'found_in') {
392 $rel->object_term( $term_temp );
393 $rel->subject_term( $self->_term );
394 } else {
395 $rel->object_term( $self->_term );
396 $rel->subject_term( $term_temp );
397 }
398 $rel->ontology($ont);
399 $ont->add_relationship($rel);
400 }
401
402 =head2 start_element
403
404 Title : start_element
405 Usage :
406 Function: This is a method that is derived from XML::SAX::Base and
407 has to be overridden for processing start of xml element
408 events. Used internally only.
409
410 Example :
411 Returns :
412 Args :
413
414
415 =cut
416
417 sub start_element {
418 my ($self, $element) = @_;
419 my $ont = $self->ontology();
420 my $fact = $self->term_factory();
421
422 if ($element->{Name} eq 'interprodb') {
423 $ont->add_term($fact->create_object(-identifier => "Family",
424 -name => "Family") );
425 $ont->add_term($fact->create_object(-identifier => "Domain",
426 -name => "Domain") );
427 $ont->add_term($fact->create_object(-identifier => "Repeat",
428 -name => "Repeat") );
429 $ont->add_term($fact->create_object(-identifier => "PTM",
430 -name => "post-translational modification"));
431 } elsif ($element->{Name} eq 'interpro') {
432 my %record_args = %{$element->{Attributes}};
433 my $id = $record_args{"id"};
434 my $term_temp = ($ont->engine->get_term_by_identifier($id))[0];
435
436 $self->_term(
437 (!defined $term_temp)
438 ? $ont->add_term( $fact->create_object(-InterPro_id => $id) )
439 : $term_temp
440 );
441
442 $self->_term->ontology( $ont );
443 $self->_term->short_name( $record_args{"short_name"} );
444 $self->_term->protein_count( $record_args{"protein_count"} );
445 $self->_increment_record_count();
446 $self->_stack([{ interpro => undef }]);
447 $self->_names(["interpro"]);
448
449 ## Adding a relationship between the newly created InterPro term
450 ## and the term describing its type
451
452 my $rel = Bio::Ontology::Relationship->new( -predicate_term => $is_a_rel );
453 $rel->object_term( ($ont->engine->get_term_by_identifier($record_args{"type"}))[0] );
454 $rel->subject_term( $self->_term );
455 $rel->ontology($ont);
456 $ont->add_relationship($rel);
457 }
458 elsif (defined $self->_stack) {
459 my %hash = ();
460
461 if (keys %{$element->{Attributes}} > 0) {
462 foreach my $key (keys %{$element->{Attributes}}) {
463 $hash{$key} = $element->{Attributes}->{$key};
464 }
465 }
466 push @{$self->_stack}, \%hash;
467 if ($element->{Name} eq 'rel_ref') {
468 my $ref_id = $element->{Attributes}->{"ipr_ref"};
469 my $parent = $self->_top($self->_names);
470
471 if ($parent eq 'parent_list' || $parent eq 'child_list') {
472 $self->_create_relationship($ref_id, $is_a_rel);
473 }
474 if ($parent eq 'contains' ) {
475 $self->_create_relationship($ref_id, $contains_rel);
476 }
477 if ($parent eq 'found_in' ) {
478 $self->_create_relationship($ref_id, $found_in_rel);
479 }
480 }
481 elsif ($element->{Name} eq 'abstract') {
482 $self->_cite_skip(1);
483 }
484 push @{$self->_names}, $element->{Name};
485 }
486
487 }
488
489 =head2 _char_storage
490
491 Title : _char_storage
492 Usage : $obj->_char_storage($newval)
493 Function:
494 Example :
495 Returns : value of _char_storage (a scalar)
496 Args : new value (a scalar, optional)
497
498
499 =cut
500
501 sub _char_storage{
502 my ($self, $value) = @_;
503
504 if( defined $value) {
505 $self->{'_char_storage'} = $value;
506 }
507
508 return $self->{'_char_storage'};
509 }
510
511 =head2 characters
512
513 Title : characters
514 Usage :
515 Function: This is a method that is derived from XML::SAX::Base and has to be overridden for processing xml characters events. Used internally only.
516 Example :
517 Returns :
518 Args :
519
520
521 =cut
522
523 sub characters {
524 my ($self, $characters) = @_;
525 my $text = $characters->{Data};
526
527 chomp $text;
528 $text =~ s/^(\s+)//;
529 $self->{_char_storage} .= $text;
530
531 }
532
533 =head2 end_element
534
535 Title : end_element
536 Usage :
537 Function: This is a method that is derived from XML::SAX::Base and has to be overridden for processing end of xml element events. Used internally only.
538 Example :
539 Returns :
540 Args :
541
542
543 =cut
544
545 sub end_element {
546 my ($self, $element) = @_;
547
548 if ($element->{Name} eq 'interprodb') {
549 $self->debug("Interpro DB Parser Finished: $record_count read, $processed_count processed\n");
550 }
551 elsif ($element->{Name} eq 'interpro') {
552 $self->_clear_term;
553 $self->_increment_processed_count();
554 }
555 elsif ($element->{Name} ne 'cite') {
556 $self->{_char_storage} =~ s/<\/?p>//g;
557 if ((defined $self->_stack)) {
558 my $current_hash = pop @{$self->_stack};
559 my $parent_hash = $self->_top($self->_stack);
560 my $current_hash_key = pop @{$self->_names};
561
562 if (keys %{$current_hash} > 0 && $self->_char_storage ne "") {
563 $current_hash->{comment} = $self->_char_storage;
564 push @{ $parent_hash->{$current_hash_key} }, $current_hash
565 }
566 elsif ($self->_char_storage ne ""){
567 push @{ $parent_hash->{$current_hash_key} }, { 'accumulated_text_12345' => $self->_char_storage };
568 }
569 elsif (keys %{$current_hash} > 0) {
570 push @{ $parent_hash->{$current_hash_key} }, $current_hash;
571 }
572 if ($element->{Name} eq 'pub_list') {
573 my @refs = ();
574
575 foreach my $pub_record ( @{ $current_hash->{publication} } ) {
576 my $ref = Bio::Annotation::Reference->new;
577 my $loc = $pub_record->{location}->[0];
578
579 $ref->location( $pub_record->{journal}->[0]->{accumulated_text_12345}.", ".$loc->{firstpage}."-".$loc->{lastpage}.", ".$loc->{volume}.", ".$pub_record->{year}->[0]->{accumulated_text_12345});
580 $ref->title( $pub_record->{title}->[0]->{accumulated_text_12345} );
581 my $ttt = $pub_record->{author_list}->[0];
582
583 $ref->authors( $ttt->{accumulated_text_12345} );
584 $ref->medline( scalar($ttt->{dbkey}) )
585 if exists($ttt->{db}) && $ttt->{db} eq "MEDLINE";
586 push @refs, $ref;
587 }
588 $self->_term->add_reference(@refs);
589 }
590 elsif ($element->{Name} eq 'name') {
591 $self->_term->name( $self->_char_storage );
592 }
593 elsif ($element->{Name} eq 'abstract') {
594 $self->_term->definition( $self->_char_storage );
595 $self->_cite_skip(0);
596 }
597 elsif ($element->{Name} eq 'member_list') {
598 my @refs = ();
599
600 foreach my $db_xref ( @{ $current_hash->{db_xref} } ) {
601 push @refs, Bio::Annotation::DBLink->new( -database => $db_xref->{db},
602 -primary_id => $db_xref->{dbkey}
603 );
604 }
605 $self->_term->add_member(@refs);
606 }
607 elsif ($element->{Name} eq 'sec_list') {
608 my @refs = ();
609
610 foreach my $sec_ac ( @{ $current_hash->{sec_ac} } ) {
611 push @refs, $sec_ac->{sec_ac};
612 }
613 $self->_term->add_secondary_id(@refs);
614 $self->secondary_accessions_map->{$self->_term->identifier} = \@refs;
615 }
616 elsif ($element->{Name} eq 'example_list') {
617 my @refs = ();
618
619 foreach my $example ( @{ $current_hash->{example} } ) {
620 push @refs, Bio::Annotation::DBLink->new( -database => $example->{db_xref}->[0]->{db},
621 -primary_id => $example->{db_xref}->[0]->{dbkey},
622 -comment => $example->{comment}
623 );
624 }
625 $self->_term->add_example(@refs);
626 }
627 elsif ($element->{Name} eq 'external_doc_list') {
628 my @refs = ();
629
630 foreach my $db_xref ( @{ $current_hash->{db_xref} } ) {
631 push @refs, Bio::Annotation::DBLink->new( -database => $db_xref->{db},
632 -primary_id => $db_xref->{dbkey}
633 );
634 }
635 $self->_term->add_external_document(@refs);
636 }
637 elsif ($element->{Name} eq 'class_list') {
638 my @refs = ();
639
640 foreach my $classification ( @{ $current_hash->{classification} } ) {
641 push @refs, Bio::Annotation::DBLink->new( -database => $classification->{class_type},
642 -primary_id => $classification->{id}
643 );
644 }
645 $self->_term->class_list(\@refs);
646 }
647 elsif ($element->{Name} eq 'deleted_entries') {
648 my @refs = ();
649
650 foreach my $del_ref ( @{ $current_hash->{del_ref} } ) {
651 my $term = ($self->ontology_engine->get_term_by_identifier( $del_ref->{id} ))[0];
652
653 $term->is_obsolete(1) if defined $term;
654 }
655 }
656 }
657 $self->_char_storage( '' ) if !$self->_cite_skip;
658 }
659 }
660
661 =head2 secondary_accessions_map
662
663 Title : secondary_accessions_map
664 Usage : $obj->secondary_accessions_map($newval)
665 Function:
666 Example : $map = $interpro_handler->secondary_accessions_map();
667 Returns : Reference to a hash that maps InterPro identifier to an
668 array reference of secondary accessions following the InterPro
669 xml schema.
670 Args : Empty hash reference
671
672
673 =cut
674
675 sub secondary_accessions_map{
676 my ($self, $value) = @_;
677
678 if( defined $value) {
679 $self->{'secondary_accessions_map'} = $value;
680 }
681
682 return $self->{'secondary_accessions_map'};
683 }
684
685 =head2 _increment_record_count
686
687 Title : _increment_record_count
688 Usage :
689 Function:
690 Example :
691 Returns :
692 Args :
693
694
695 =cut
696
697 sub _increment_record_count{
698 $record_count++;
699 }
700
701 =head2 _increment_processed_count
702
703 Title : _increment_processed_count
704 Usage :
705 Function:
706 Example :
707 Returns :
708 Args :
709
710
711 =cut
712
713 sub _increment_processed_count{
714 $processed_count++;
715 print $processed_count."\n" if $processed_count % 100 == 0;
716 }
717
718 1;