comparison variant_effect_predictor/Bio/Biblio/IO/medline2ref.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: medline2ref.pm,v 1.10 2002/10/22 07:45:13 lapp Exp $
2 #
3 # BioPerl module Bio::Biblio::IO::medline2ref.pm
4 #
5 # Cared for by Martin Senger <senger@ebi.ac.uk>
6 # For copyright and disclaimer see below.
7
8 # POD documentation - main docs before the code
9
10 =head1 NAME
11
12 Bio::Biblio::IO::medline2ref - A converter of a raw hash to MEDLINE citations
13
14 =head1 SYNOPSIS
15
16 # to be written
17
18 =head1 DESCRIPTION
19
20 # to be written
21
22 =head1 FEEDBACK
23
24 =head2 Mailing Lists
25
26 User feedback is an integral part of the evolution of this and other
27 Bioperl modules. Send your comments and suggestions preferably to
28 the Bioperl mailing list. Your participation is much appreciated.
29
30 bioperl-l@bioperl.org - General discussion
31 http://bioperl.org/MailList.shtml - About the mailing lists
32
33 =head2 Reporting Bugs
34
35 Report bugs to the Bioperl bug tracking system to help us keep track
36 of the bugs and their resolution. Bug reports can be submitted via
37 email or the web:
38
39 bioperl-bugs@bioperl.org
40 http://bugzilla.bioperl.org/
41
42 =head1 AUTHOR
43
44 Martin Senger (senger@ebi.ac.uk)
45
46 =head1 COPYRIGHT
47
48 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
49
50 This module is free software; you can redistribute it and/or modify
51 it under the same terms as Perl itself.
52
53 =head1 DISCLAIMER
54
55 This software is provided "as is" without warranty of any kind.
56
57 =head1 APPENDIX
58
59 Here is the rest of the object methods. Internal methods are preceded
60 with an underscore _.
61
62 =cut
63
64
65 # Let the code begin...
66
67
68 package Bio::Biblio::IO::medline2ref;
69
70 use strict;
71 use vars qw(@ISA $VERSION $Revision);
72
73 use Bio::Root::Root;
74 use Bio::Biblio::MedlineJournal;
75 use Bio::Biblio::MedlineBook;
76 use Bio::Biblio::Provider;
77 use Bio::Biblio::Person;
78 use Bio::Biblio::Organisation;
79
80 @ISA = qw(Bio::Root::Root);
81
82 BEGIN {
83 # set the version for version checking
84 $VERSION = do { my @r = (q$Revision: 1.10 $ =~ /\d+/g); sprintf "%d.%-02d", @r };
85 $Revision = q$Id: medline2ref.pm,v 1.10 2002/10/22 07:45:13 lapp Exp $;
86 }
87
88 # -----------------------------------------------------------------------------
89 sub new {
90 my ($caller, @args) = @_;
91 my $class = ref ($caller) || $caller;
92
93 # object creation and blessing
94 my ($self) = $class->SUPER::new (@args);
95
96 # make a hashtable from @args
97 my %param = @args;
98 @param { map { lc $_ } keys %param } = values %param; # lowercase keys
99
100 # copy all @args into this object (overwriting what may already be
101 # there) - changing '-key' into '_key', and making keys lowercase
102 my $new_key;
103 foreach my $key (keys %param) {
104 ($new_key = $key) =~ s/^-/_/;
105 $self->{ lc $new_key } = $param { $key };
106 }
107
108 # done
109 return $self;
110 }
111
112 # ---------------------------------------------------------------------
113 #
114 # Here is the core...
115 #
116 # ---------------------------------------------------------------------
117
118 sub _load_instance {
119 my ($self, $source) = @_;
120
121 #
122 # MEDLINE has only JournalArticles and BookArticles
123 # but we may create a general Ref if there is no attribute 'article'
124 #
125 my $result;
126 my $article = $$source{'article'};
127 if (defined $article) {
128 if (defined $$article{'journal'}) {
129 $result = $self->_new_instance ('Bio::Biblio::MedlineJournalArticle');
130 $result->type ('JournalArticle');
131 } elsif (defined $$article{'book'}) {
132 $result = $self->_new_instance ('Bio::Biblio::MedlineBookArticle');
133 $result->type ('BookArticle');
134 } else {
135 $result->type ('MedlineArticle');
136 }
137 }
138 $result = $self->_new_instance ('Bio::Biblio::Ref') unless defined $result;
139 return $result;
140 }
141
142 sub convert {
143 my ($self, $source) = @_;
144 my $result = $self->_load_instance ($source);
145
146 if (defined $result->type) {
147 if ($result->type eq 'JournalArticle') {
148 &_convert_journal_article ($result, $source);
149 } elsif ($result->type eq 'BookArticle') {
150 &_convert_book_article ($result, $source);
151 } elsif ($result->type eq 'Article') {
152 &_convert_article ($result, $source);
153 }
154 }
155
156 #
157 # now do the attributes which are the same for all resource types
158 #
159
160 # ...identification is now by MedlineID but the trend is to replace
161 # it by PMID (I have heard) theefore we keep both also separately
162 # from the 'identifier'
163 if (defined $$source{'medlineID'}) {
164 $result->identifier ($$source{'medlineID'});
165 } else {
166 $result->identifier ($$source{'PMID'});
167 }
168 $result->pmid ($$source{'PMID'}) if defined $$source{'PMID'};
169 $result->medline_id ($$source{'medlineID'}) if defined $$source{'medlineID'};
170
171 # ...few others
172 $result->citation_owner ($$source{'owner'}) if defined $$source{'owner'};
173 $result->status ($$source{'status'}) if defined $$source{'status'};
174 $result->number_of_references ($$source{'numberOfReferences'}) if defined $$source{'numberOfReferences'};
175
176 # ...entry status of the citation in the repository
177 my $date;
178 if (defined $$source{'dateRevised'}) {
179 $result->last_modified_date (&_convert_date ($$source{'dateRevised'}));
180 $date = &_convert_date ($$source{'dateCreated'});
181 $result->date_created ($date) if defined $date;
182 $date = &_convert_date ($$source{'dateCompleted'});
183 $result->date_completed ($date) if defined $date;
184 } elsif (defined $$source{'dateCompleted'}) {
185 $result->last_modified_date (&_convert_date ($$source{'dateCompleted'}));
186 $date = &_convert_date ($$source{'dateCreated'});
187 $result->date_created ($date) if defined $date;
188 } elsif (defined $$source{'dateCreated'}) {
189 $result->last_modified_date (&_convert_date ($$source{'dateCreated'}));
190 }
191
192 # ...put citation subsets in a comma-separated string
193 if (defined $$source{'citationSubsets'}) {
194 $result->repository_subset (join (',', @{ $$source{'citationSubsets'} }));
195 }
196
197 # ...MEDLINE's Comments & Corrections will be arrays of hashes
198 if (defined $$source{'commentsCorrections'}) {
199 my $corr = $$source{'commentsCorrections'};
200 $result->comment_ons ($$corr{'commentOns'}) if defined $$corr{'commentOns'};
201 $result->comment_ins ($$corr{'commentIns'}) if defined $$corr{'commentIns'};
202 $result->erratum_ins ($$corr{'erratumIns'}) if defined $$corr{'erratumIns'};
203 $result->erratum_fors ($$corr{'erratumFors'}) if defined $$corr{'erratumFors'};
204 $result->original_report_ins ($$corr{'originalReportIns'}) if defined $$corr{'originalReportIns'};
205 $result->republished_froms ($$corr{'republishedFroms'}) if defined $$corr{'republishedFroms'};
206 $result->republished_ins ($$corr{'republishedIns'}) if defined $$corr{'republishedIns'};
207 $result->retraction_ofs ($$corr{'retractionOfs'}) if defined $$corr{'retractionOfs'};
208 $result->retraction_ins ($$corr{'retractionIns'}) if defined $$corr{'retractionIns'};
209 $result->summary_for_patients_ins ($$corr{'summaryForPatientsIns'}) if defined $$corr{'summaryForPatientsIns'};
210 $result->update_ins ($$corr{'updateIns'}) if defined $$corr{'updateIns'};
211 $result->update_ofs ($$corr{'updateOfs'}) if defined $$corr{'updateOfs'};
212 }
213
214 # ...MEDLINE's GeneSymbols are put in a comma-separated string
215 if (defined $$source{'geneSymbols'}) {
216 $result->gene_symbols (join (',', @{ $$source{'geneSymbols'} }));
217 }
218
219 # ...MEDLINE's GeneralNotes into an array of hashtables, each one
220 # having keys for the 'owner' and the 'note'
221 $result->general_notes ($$source{'generalNotes'}) if defined $$source{'generalNotes'};
222
223 # ...MEDLINE's PersonalNameSubjects into contributors (TBD: is that correct?)
224 if (defined $$source{'personalNameSubjects'}) {
225 my @contributors;
226 foreach my $person ( @{ $$source{'personalNameSubjects'} } ) {
227 push (@contributors, &_convert_personal_name ($person));
228 }
229 $result->contributors (\@contributors);
230 }
231
232 # ...MEDLINE's OtherAbstract into an array of hashtables, each one
233 # having keys for the 'type', 'AbstractText' and the 'copyright'
234 $result->other_abstracts ($$source{'otherAbstracts'}) if defined $$source{'otherAbstracts'};
235 # if (defined $$source{'otherAbstracts'}) {
236 # my @other_abstracts = ();
237 # foreach my $oa ( @{ $$source{'otherAbstracts'} } ) {
238 # if (defined $$oa{'abstractText'}) {
239 # my $abstract = $$oa{'abstractText'};
240 # delete $$oa{'abstractText'};
241 # $$oa{'abstract'} = $$abstract{'abstractText'};
242 # $$oa{'rights'} = $$abstract{'copyrightInformation'} if defined $$abstract{'copyrightInformation'};
243 # push (@other_abstracts, $oa);
244 # }
245 # }
246 # $result->other_abstracts (\@other_abstracts);
247 # }
248
249 # ...MEDLINE's OtherIDs into an array of hashtables, each one
250 # having keys for the 'id', and 'source'
251 $result->other_ids ($$source{'otherIDs'}) if defined $$source{'otherIDs'};
252
253 # ...MEDLINE's Chemicals - store them as an array of hashtables
254 # (each one for each Chemical)
255 $result->chemicals ($$source{'chemicals'}) if defined $$source{'chemicals'};
256
257 # MeshHeadings are put on two places:
258 # - a complete information in a property called "MeshHeadings", and
259 # - only descriptors in the hashtable "subject_headings", together
260 # with the word "MeSH" in "subject_headings_source"
261 if (defined $$source{'meshHeadings'}) {
262 $result->mesh_headings ($$source{'meshHeadings'});
263 my %subject_headings;
264 foreach my $mesh ( @{ $$source{'meshHeadings'} } ) {
265 $subject_headings{ $$mesh{'descriptorName'} } = 1 if defined $$mesh{'descriptorName'};
266 }
267 if (%subject_headings) {
268 $result->subject_headings (\%subject_headings);
269 $result->subject_headings_source ('Mesh');
270 }
271 }
272
273 # ...MEDLINE's keyword lists are merger all together (this may not
274 # be good idea - but again the keywords are better accessible
275 # -TBD?)
276 if (defined $$source{'keywordLists'}) {
277 my %keywords;
278 foreach my $keywords ( @{ $$source{'keywordLists'} } ) {
279 if ($$keywords{'keywords'}) {
280 foreach my $keyword ( @{ $$keywords{'keywords'} } ) {
281 $keywords{$keyword} = 1;
282 }
283 }
284 }
285 $result->keywords (\%keywords) if %keywords;
286 }
287
288 # Done!
289 return $result;
290 }
291
292 # load a module (given as a real module name, e.g. 'Bio::Biblio::MedlineJournalArticle'),
293 # call new() method on it, and return the instance returned by the new() method
294 sub _new_instance {
295 my ($self, $module) = @_;
296 my ($filename);
297 ($filename = $module . '.pm') =~ s|\:\:|/|g;
298 eval { require $filename; };
299 $self->throw ("Loading error when trying '$filename'. $@\n") if $@;
300 return $module->new;
301 }
302
303 #
304 # see OpenBQS specification (http://industry.ebi.ac.uk/openBQS) how
305 # a date should be coded;
306 # TBD: this can be improved - checking is missing, timezones,
307 # converting to UTC...
308 # Also note that this routine does not convert 'medline_date' - it
309 # is stored in a separate attribute without ant conversion.
310 #
311 sub _convert_date {
312 my ($date) = @_;
313 return undef unless
314 exists $$date{'year'} or
315 exists $$date{'month'} or
316 exists $$date{'day'} or
317 exists $$date{'hour'} or
318 exists $$date{'minute'} or
319 exists $$date{'second'};
320
321
322 my $converted = (exists $$date{'year'} ? $$date{'year'} : '0000');
323
324 if (exists $$date{'month'}) {
325 $converted .= '-' . $$date{'month'};
326 } elsif (exists $$date{'day'}) {
327 $converted .= '-00';
328 }
329
330 if (exists $$date{'day'}) {
331 $converted .= '-' . $$date{'day'};
332 } elsif (exists $$date{'hour'}) {
333 $converted .= '-00';
334 }
335
336 if (exists $$date{'hour'}) {
337 $converted .= 'T' . $$date{'hour'} .
338 ':' . (exists $$date{'minute'} ? $$date{'minute'} : '00') .
339 ':' . (exists $$date{'second'} ? $$date{'second'} : '00') . 'Z';
340 }
341 return $converted;
342 }
343
344 # $person is a hash with persons attributes - we need to create and
345 # return a Bio::Biblio::Person object
346 sub _convert_personal_name {
347 my ($person) = @_;
348 foreach my $key (keys %$person) {
349 $$person{"_$key"} = $$person{$key};
350 delete $$person{$key};
351 }
352 new Bio::Biblio::Person (%$person);
353 }
354
355 #
356 # takes journal article related attributes from $article and convert
357 # them into $result and at the end call _convert_article (which is
358 # shared with book article)
359 #
360 sub _convert_journal_article {
361 my ($result, $source) = @_;
362 my $article = $$source{'article'};
363
364 # create and populate both a Journal and a resulting Article objects
365 my $from_journal = $$article{'journal'};
366 my $journal = new Bio::Biblio::MedlineJournal;
367 $journal->name ($$from_journal{'title'}) if defined $$from_journal{'title'};
368 $journal->issn ($$from_journal{'iSSN'}) if defined $$from_journal{'iSSN'};
369 $journal->abbreviation ($$from_journal{'iSOAbbreviation'}) if defined $$from_journal{'iSOAbbreviation'};
370 $journal->coden ($$from_journal{'coden'}) if defined $$from_journal{'coden'};
371 if (defined $$from_journal{'journalIssue'}) {
372 my $issue = $$from_journal{'journalIssue'};
373 $result->volume ($$issue{'volume'}) if defined $$issue{'volume'};
374 $result->issue ($$issue{'issue'}) if defined $$issue{'issue'};
375
376 if (defined $$issue{'pubDate'}) {
377 my $pub_date = $$issue{'pubDate'};
378 my $converted = &_convert_date ($pub_date);
379 $result->date ($converted) if defined $converted;
380
381 # Some parts of a MEDLINE date are stored just as properties
382 # because they have almost non-parseable format :-).
383 $result->medline_date ($$pub_date{'medlineDate'}) if defined $$pub_date{'medlineDate'};
384 $result->season ($$pub_date{'season'}) if defined $$pub_date{'season'};
385 }
386 }
387
388 # ...some attributes are in journalInfo (which is outside of the article)
389 my $journal_info = $$source{'journalInfo'};
390 if (defined $journal_info) {
391 $journal->country ($$journal_info{'country'}) if defined $$journal_info{'country'};
392 $journal->medline_ta ($$journal_info{'medlineTA'}) if defined $$journal_info{'medlineTA'};
393 $journal->medline_code ($$journal_info{'medlineCode'}) if defined $$journal_info{'medlineCode'};
394 $journal->nlm_unique_id ($$journal_info{'nlmUniqueID'}) if defined $$journal_info{'nlmUniqueID'};
395 }
396
397 $result->journal ($journal);
398 &_convert_article ($result, $source);
399 }
400
401 #
402 # takes book article related attributes from $article and convert
403 # them into $result and at the end call _convert_article (which is
404 # shared with journal article)
405 #
406 sub _convert_book_article {
407 my ($result, $source) = @_;
408 my $article = $$source{'article'};
409
410 # create and populate both book and resulting article objects
411 my $from_book = $$article{'book'};
412 my $book = new Bio::Biblio::MedlineBook;
413 $book->title ($$from_book{'title'}) if defined $$from_book{'title'};
414 $book->volume ($$from_book{'volume'}) if defined $$from_book{'volume'};
415 $book->series ($$from_book{'collectionTitle'}) if defined $$from_book{'collectionTitle'};
416
417 if (defined $$from_book{'pubDate'}) {
418 my $pub_date = $$from_book{'pubDate'};
419 my $converted = &_convert_date ($pub_date);
420 $result->pub_date ($converted) if defined $converted;
421
422 # Some parts of a MEDLINE date are stored just as properties
423 # because they have almost non-parseable format :-).
424 $result->medline_date ($$pub_date{'medlineDate'}) if defined $$pub_date{'medlineDate'};
425 $result->season ($$pub_date{'season'}) if defined $$pub_date{'season'};
426 }
427
428 if (defined $$from_book{'publisher'}) {
429 my $publisher = new Bio::Biblio::Organisation;
430 $publisher->name ($$from_book{'publisher'});
431 $book->publisher ($publisher);
432 }
433
434 my @authors = &_convert_providers ($$from_book{'authors'});
435 $book->authors (\@authors) if @authors;
436
437 $result->book ($book);
438 &_convert_article ($result, $source);
439 }
440
441 #
442 # takes from $source article related attributes and convert them into
443 # $article (these attributes are the same both for journal and book
444 # articles
445 #
446 sub _convert_article {
447 my ($article, $source) = @_;
448 my $from_article = $$source{'article'};
449
450 $article->title ($$from_article{'articleTitle'}) if defined $$from_article{'articleTitle'};
451 $article->affiliation ($$from_article{'affiliation'}) if defined $$from_article{'affiliation'};
452 $article->vernacular_title ($$from_article{'vernacularTitle'}) if defined $$from_article{'vernacularTitle'};
453 $article->date_of_electronic_publication
454 ($$from_article{'dateOfElectronicPublication'}) if defined $$from_article{'dateOfElectronicPublication'};
455
456 if (defined $$from_article{'pagination'}) {
457 my $pagination = $$from_article{'pagination'};
458 $article->first_page ($$pagination{'startPage'}) if defined $$pagination{'startPage'};
459 $article->last_page ($$pagination{'endPage'}) if defined $$pagination{'endPage'};
460 $article->medline_page ($$pagination{'medlinePgn'}) if defined $$pagination{'medlinePgn'};
461 }
462
463 if (defined $$from_article{'abstract'}) {
464 my $abstract = $$from_article{'abstract'};
465 $article->abstract ($$abstract{'abstractText'}) if defined $$abstract{'abstractText'};
466 $article->abstract_type ('text/plain');
467 $article->rights ($$abstract{'copyrightInformation'}) if defined $$abstract{'copyrightInformation'};
468 }
469
470 if (defined $$from_article{'languages'}) {
471 my $languages = $$from_article{'languages'}; # ref-array
472 if ( @{ $languages } > 0) {
473 $article->language ( $$languages[0] );
474 }
475 if ( @{ $languages } > 1) {
476 $article->other_languages (join (',', @{ $languages }));
477 }
478 }
479
480 my @authors = &_convert_providers ($$from_article{'authors'});
481 if (@authors) {
482 $article->authors (\@authors);
483 $article->author_list_complete
484 ($$from_article{'authorListComplete'}) if defined $$from_article{'authorListComplete'};
485 }
486
487 # references to database entries are prefixed with database name
488 # (separated by a slash)
489 use Bio::Annotation::DBLink;
490 if (defined $$from_article{'dataBanks'}) {
491 my $databanks = $$from_article{'dataBanks'}; # a ref-array
492 my @references;
493 foreach my $bank ( @{ $databanks } ) {
494 my $db_name = $$bank{'dataBankName'};
495 if (defined $$bank{'accessionNumbers'}) {
496 foreach my $accn ( @{ $$bank{'accessionNumbers'} } ) {
497 my $dblink = new Bio::Annotation::DBLink (-primary_id => $accn);
498 $dblink->database ($db_name); # it does not matter if it is undef
499 push (@references, $dblink);
500 }
501 }
502 }
503 if (@references) {
504 $article->cross_references (\@references);
505 $article->cross_references_list_complete
506 ($$from_article{'dataBankListComplete'}) if defined $$from_article{'dataBankListComplete'};
507 }
508 }
509
510 # grants are stored in an array of hashtables (each of the
511 # hashtables has keys agency, grantID and acronym)
512 $article->grants ($$from_article{'grants'}) if defined $$from_article{'grants'};
513 $article->grant_list_complete
514 ($$from_article{'grantListComplete'}) if defined $$from_article{'grandListComplete'};
515
516 }
517
518 #
519 # takes a ref-array of providers - they can be persons or
520 # organisations, and returns an array of converted providers
521 #
522 sub _convert_providers {
523 my ($providers) = @_;
524 return () unless defined $providers;
525
526 my @results;
527 foreach my $provider ( @{ $providers } ) {
528 if (defined $$provider{'personalName'}) {
529 my $converted = &_convert_personal_name ($$provider{'personalName'});
530 push (@results, $converted) if defined $converted;
531 } elsif (defined $$provider{'collectiveName'}) {
532 push (@results, new Bio::Biblio::Organisation (-name => $$provider{'collectiveName'}));
533 } else {
534 new Bio::Biblio::Provider;
535 }
536 }
537 return () unless @results;
538 return @results;
539 }
540
541 1;
542 __END__