Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Biblio/IO/pubmedxml.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: pubmedxml.pm,v 1.4 2002/10/22 07:45:13 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module Bio::Biblio::IO::pubmedxml.pm | |
| 4 # | |
| 5 # Cared for by Martin Senger <senger@ebi.ac.uk> | |
| 6 # For copyright and disclaimer see below. | |
| 7 | |
| 8 # POD documentation - main docs before the code | |
| 9 | |
| 10 =head1 NAME | |
| 11 | |
| 12 Bio::Biblio::IO::pubmedxml - A converter of XML files with PUBMED citations | |
| 13 | |
| 14 =head1 SYNOPSIS | |
| 15 | |
| 16 Do not use this object directly, it is recommended to access it and use | |
| 17 it through the I<Bio::Biblio::IO> module: | |
| 18 | |
| 19 use Bio::Biblio::IO; | |
| 20 my $io = new Bio::Biblio::IO (-format => 'pubmedxml'); | |
| 21 | |
| 22 =head1 DESCRIPTION | |
| 23 | |
| 24 This object reads bibliographic citations in XML/MEDLINE format and | |
| 25 converts them into I<Bio::Biblio::RefI> objects. It is an | |
| 26 implementation of methods defined in I<Bio::Biblio::IO>. | |
| 27 | |
| 28 =head1 FEEDBACK | |
| 29 | |
| 30 =head2 Mailing Lists | |
| 31 | |
| 32 User feedback is an integral part of the evolution of this and other | |
| 33 Bioperl modules. Send your comments and suggestions preferably to | |
| 34 the Bioperl mailing list. Your participation is much appreciated. | |
| 35 | |
| 36 bioperl-l@bioperl.org - General discussion | |
| 37 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 38 | |
| 39 =head2 Reporting Bugs | |
| 40 | |
| 41 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 42 of the bugs and their resolution. Bug reports can be submitted via | |
| 43 email or the web: | |
| 44 | |
| 45 bioperl-bugs@bioperl.org | |
| 46 http://bugzilla.bioperl.org/ | |
| 47 | |
| 48 =head1 AUTHOR | |
| 49 | |
| 50 Martin Senger (senger@ebi.ac.uk) | |
| 51 | |
| 52 =head1 COPYRIGHT | |
| 53 | |
| 54 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved. | |
| 55 | |
| 56 This module is free software; you can redistribute it and/or modify | |
| 57 it under the same terms as Perl itself. | |
| 58 | |
| 59 =head1 DISCLAIMER | |
| 60 | |
| 61 This software is provided "as is" without warranty of any kind. | |
| 62 | |
| 63 =head1 APPENDIX | |
| 64 | |
| 65 The main documentation details are to be found in | |
| 66 L<Bio::Biblio::IO>. | |
| 67 | |
| 68 Here is the rest of the object methods. Internal methods are preceded | |
| 69 with an underscore _. | |
| 70 | |
| 71 =cut | |
| 72 | |
| 73 | |
| 74 # Let the code begin... | |
| 75 | |
| 76 | |
| 77 package Bio::Biblio::IO::pubmedxml; | |
| 78 use vars qw(@ISA $VERSION $Revision); | |
| 79 use vars qw(%PCDATA_NAMES %SIMPLE_TREATMENT %POP_DATA_AND_PEEK_OBJ %POP_AND_ADD_DATA_ELEMENT); | |
| 80 | |
| 81 use strict; | |
| 82 | |
| 83 use Bio::Biblio::IO::medlinexml; | |
| 84 | |
| 85 @ISA = qw(Bio::Biblio::IO::medlinexml); | |
| 86 | |
| 87 BEGIN { | |
| 88 # set the version for version checking | |
| 89 $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d.%-02d", @r }; | |
| 90 $Revision = q$Id: pubmedxml.pm,v 1.4 2002/10/22 07:45:13 lapp Exp $; | |
| 91 } | |
| 92 | |
| 93 sub _initialize { | |
| 94 my ($self, @args) = @_; | |
| 95 | |
| 96 # make a hashtable from @args | |
| 97 my %param = @args; | |
| 98 @param { map { lc $_ } keys %param } = values %param; # lowercase keys | |
| 99 | |
| 100 # copy all @args into this object (overwriting what may already be | |
| 101 # there) - changing '-key' into '_key', and making keys lowercase | |
| 102 my $new_key; | |
| 103 foreach my $key (keys %param) { | |
| 104 ($new_key = $key) =~ s/^-/_/; | |
| 105 $self->{ lc $new_key } = $param { $key }; | |
| 106 } | |
| 107 | |
| 108 # find the format for output - and put it into a global $Convert | |
| 109 # because it will be used by the event handler who knows nothing | |
| 110 # about this object | |
| 111 my $result = $self->{'_result'} || 'pubmed2ref'; | |
| 112 $result = "\L$result"; # normalize capitalization to lower case | |
| 113 | |
| 114 # a special case is 'raw' when no converting module is loaded | |
| 115 # and citations will be returned as a hashtable (the one which | |
| 116 # is created during parsing XML file/stream) | |
| 117 unless ($result eq 'raw') { | |
| 118 | |
| 119 # load module with output converter - as defined in $result | |
| 120 if (defined &Bio::Biblio::IO::_load_format_module ($result)) { | |
| 121 $Bio::Biblio::IO::medlinexml::Convert = "Bio::Biblio::IO::$result"->new (@args); | |
| 122 } | |
| 123 } | |
| 124 | |
| 125 # create an instance of the XML parser | |
| 126 # (unless it is already there...) | |
| 127 $self->{'_xml_parser'} = new XML::Parser (Handlers => {Init => \&Bio::Biblio::IO::medlinexml::handle_doc_start, | |
| 128 Start => \&handle_start, | |
| 129 End => \&handle_end, | |
| 130 Char => \&Bio::Biblio::IO::medlinexml::handle_char, | |
| 131 Final => \&Bio::Biblio::IO::medlinexml::handle_doc_end}) | |
| 132 unless $self->{'_xml_parser'}; | |
| 133 | |
| 134 # if there is an argument '-callback' then start parsing at once - | |
| 135 # the registered event handlers will use 'callback' to report | |
| 136 # back after each citation | |
| 137 # | |
| 138 # we need to remember this situation also in a global variable | |
| 139 # because the event handler subroutines know nothing about this | |
| 140 # object (unfortunately) | |
| 141 if ($SUPER::Callback = $self->{'_callback'}) { | |
| 142 $self->_parse; | |
| 143 } | |
| 144 } | |
| 145 | |
| 146 # --------------------------------------------------------------------- | |
| 147 # | |
| 148 # Here are the event handlers (they do the real job!) | |
| 149 # | |
| 150 # Note that these methods do not know anything about the object they | |
| 151 # are part of - they are called as subroutines. not as methods. | |
| 152 # It also means that they need to use global variables to store and | |
| 153 # exchnage intermediate results. | |
| 154 # | |
| 155 # --------------------------------------------------------------------- | |
| 156 | |
| 157 # | |
| 158 # This is a list of #PCDATA elements. | |
| 159 # | |
| 160 %PCDATA_NAMES = | |
| 161 ( | |
| 162 'PublicationStatus' => 1, | |
| 163 'ProviderId' => 1, | |
| 164 'ArticleId' => 1, | |
| 165 'URL' => 1, | |
| 166 ); | |
| 167 | |
| 168 %SIMPLE_TREATMENT = | |
| 169 ( | |
| 170 'History' => 1, | |
| 171 'PubMedArticle' => 1, | |
| 172 'PubmedArticle' => 1, | |
| 173 'PubmedData' => 1, | |
| 174 ); | |
| 175 | |
| 176 %POP_DATA_AND_PEEK_OBJ = | |
| 177 ( | |
| 178 'Year' => 1, | |
| 179 'Month' => 1, | |
| 180 'Day' => 1, | |
| 181 'Hour' => 1, | |
| 182 'Minute' => 1, | |
| 183 'Second' => 1, | |
| 184 'ProviderId' => 1, | |
| 185 'PublicationStatus' => 1, | |
| 186 ); | |
| 187 | |
| 188 %POP_AND_ADD_DATA_ELEMENT = | |
| 189 ( | |
| 190 'PubMedPubDate' => 'pubDates', | |
| 191 'History' => 'histories', | |
| 192 ); | |
| 193 | |
| 194 | |
| 195 =head2 VERSION and Revision | |
| 196 | |
| 197 Usage : print $Bio::Biblio::IO::pubmedxml::VERSION; | |
| 198 print $Bio::Biblio::IO::pubmedxml::Revision; | |
| 199 | |
| 200 =cut | |
| 201 | |
| 202 | |
| 203 sub handle_start { | |
| 204 my ($expat, $e, %attrs) = @_; | |
| 205 # &Bio::Biblio::IO::medlinexml::_debug_object_stack ("START", $e); | |
| 206 | |
| 207 # | |
| 208 # The #PCDATA elements which have an attribute list must | |
| 209 # be first here - because for them I create entries both on | |
| 210 # the @PCDataStack _and_ on @ObjectStack. | |
| 211 # | |
| 212 if ($e eq 'ArticleId') { | |
| 213 my %p = (); | |
| 214 $p{'idType'} = (defined $attrs{'IdType'} ? $attrs{'IdType'} : 'pubmed'); | |
| 215 push (@Bio::Biblio::IO::medlinexml::ObjectStack, \%p); | |
| 216 } | |
| 217 | |
| 218 if ($e eq 'URL') { | |
| 219 my %p = (); | |
| 220 $p{'type'} = $attrs{'type'} if $attrs{'type'}; | |
| 221 $p{'lang'} = $attrs{'lang'} if $attrs{'lang'}; | |
| 222 push (@Bio::Biblio::IO::medlinexml::ObjectStack, \%p); | |
| 223 } | |
| 224 | |
| 225 # | |
| 226 # Then we have #PCDATA elements without an attribute list. | |
| 227 # For them I create an entry on @PCDataStack. | |
| 228 # | |
| 229 if (exists $PCDATA_NAMES{$e}) { | |
| 230 push (@Bio::Biblio::IO::medlinexml::PCDataStack, ''); | |
| 231 | |
| 232 # | |
| 233 # And finally, all non-PCDATA elements go to the objectStack | |
| 234 # | |
| 235 } elsif (exists $SIMPLE_TREATMENT{$e}) { | |
| 236 push (@Bio::Biblio::IO::medlinexml::ObjectStack, {}); | |
| 237 | |
| 238 } elsif ($e eq 'ArticleIdList') { | |
| 239 ; | |
| 240 | |
| 241 } elsif ($e eq 'PubMedPubDate') { | |
| 242 my %p = (); | |
| 243 $p{'pubStatus'} = $attrs{'PubStatus'} if $attrs{'PubStatus'}; | |
| 244 push (@Bio::Biblio::IO::medlinexml::ObjectStack, \%p); | |
| 245 | |
| 246 } else { | |
| 247 &Bio::Biblio::IO::medlinexml::handle_start ($expat, $e, %attrs); | |
| 248 } | |
| 249 } | |
| 250 | |
| 251 sub handle_end { | |
| 252 my ($expat, $e) = @_; | |
| 253 | |
| 254 # | |
| 255 # First I have to deal with those elements which are both PCDATA | |
| 256 # (and therefore they are on the pcdataStack) and which have an | |
| 257 # attribute list (therefore they are also known as a separate | |
| 258 # p-object on the objectStack. | |
| 259 # | |
| 260 if ($e eq 'ArticleId') { | |
| 261 &Bio::Biblio::IO::medlinexml::_data2obj ('id'); | |
| 262 &Bio::Biblio::IO::medlinexml::_add_element ('pubmedArticleIds', pop @Bio::Biblio::IO::medlinexml::ObjectStack); | |
| 263 # &Bio::Biblio::IO::medlinexml::_debug_object_stack ("END", $e); | |
| 264 return; | |
| 265 } | |
| 266 | |
| 267 if ($e eq 'URL') { | |
| 268 &Bio::Biblio::IO::medlinexml::_data2obj ('URL'); | |
| 269 &Bio::Biblio::IO::medlinexml::_add_element ('pubmedURLs', pop @Bio::Biblio::IO::medlinexml::ObjectStack); | |
| 270 # &Bio::Biblio::IO::medlinexml::_debug_object_stack ("END", $e); | |
| 271 return; | |
| 272 } | |
| 273 | |
| 274 | |
| 275 # | |
| 276 # both object and pcdata stacks elements mixed here together | |
| 277 # | |
| 278 | |
| 279 if (exists $POP_DATA_AND_PEEK_OBJ{$e}) { | |
| 280 &Bio::Biblio::IO::medlinexml::_data2obj ("\l$e"); | |
| 281 | |
| 282 } elsif (exists $POP_AND_ADD_DATA_ELEMENT{$e}) { | |
| 283 &Bio::Biblio::IO::medlinexml::_add_element ($POP_AND_ADD_DATA_ELEMENT{$e}, pop @Bio::Biblio::IO::medlinexml::ObjectStack); | |
| 284 | |
| 285 } elsif ($e eq 'MedlineCitation' || | |
| 286 $e eq 'NCBIArticle') { | |
| 287 &Bio::Biblio::IO::medlinexml::_obj2obj ('Citation'); | |
| 288 | |
| 289 } elsif ($e eq 'PubmedData') { | |
| 290 &Bio::Biblio::IO::medlinexml::_obj2obj ('PubmedData'); | |
| 291 | |
| 292 } elsif ($e eq 'PubMedArticle' || | |
| 293 $e eq 'PubmedArticle') { | |
| 294 | |
| 295 # | |
| 296 # Here we finally have the whole citation ready. | |
| 297 # | |
| 298 &Bio::Biblio::IO::medlinexml::_process_citation (pop @Bio::Biblio::IO::medlinexml::ObjectStack); | |
| 299 | |
| 300 } else { | |
| 301 &Bio::Biblio::IO::medlinexml::handle_end ($expat, $e); | |
| 302 } | |
| 303 | |
| 304 # &Bio::Biblio::IO::medlinexml::_debug_object_stack ("END", $e); | |
| 305 | |
| 306 } | |
| 307 | |
| 308 1; | |
| 309 __END__ |
