Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Biblio/IO.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: IO.pm,v 1.8 2002/10/22 07:45:11 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Biblio::IO | |
4 # | |
5 # Cared for by Martin Senger <senger@ebi.ac.uk> | |
6 # For copyright and disclaimer see below. | |
7 | |
8 # POD documentation - main docs before the code | |
9 | |
10 =head1 NAME | |
11 | |
12 Bio::Biblio::IO - Handling the bibliographic references | |
13 | |
14 =head1 SYNOPSIS | |
15 | |
16 use Bio::Biblio::IO; | |
17 | |
18 # getting citations from a file | |
19 $in = Bio::Biblio::IO->new ('-file' => 'myfile.xml' , | |
20 '-format' => 'medlinexml'); | |
21 --- OR --- | |
22 | |
23 # getting citations from a string | |
24 $in = Bio::Biblio::IO->new ('-data' => '<MedlineCitation>...</MedlineCitation>' , | |
25 '-format' => 'medlinexml'); | |
26 --- OR --- | |
27 | |
28 # getting citations from a string if IO::String is installed | |
29 use IO::String; | |
30 $in = Bio::Biblio::IO->new ('-fh' => IO::String->new ($citation), | |
31 '-format' => 'medlinexml'); | |
32 | |
33 $in = Bio::Biblio::IO->new(-fh => $io_handle , '-format' => 'medlinexml'); | |
34 | |
35 --- OR --- | |
36 | |
37 # getting citations from any IO handler | |
38 $in = Bio::Biblio::IO->new('-fh' => $io_handle , | |
39 '-format' => 'medlinexml'); | |
40 | |
41 | |
42 # now, having $in, we can read all citations | |
43 while ( my $citation = $in->next_bibref() ) { | |
44 &do_something_with_citation ($citation); | |
45 } | |
46 | |
47 --- OR --- | |
48 | |
49 # again reading all citation but now a callback defined in your | |
50 # code is used (note that the reading starts already when new() | |
51 # is called) | |
52 $io = new Bio::Biblio::IO ('-format' => 'medlinexml', | |
53 '-file' => $testfile, | |
54 '-callback' => \&callback); | |
55 sub callback { | |
56 my $citation = shift; | |
57 print $citation->{'_identifier'} . "\n"; | |
58 } | |
59 | |
60 Now, to actually get a citation in an XML format, | |
61 use I<Bio::Biblio> module which returns an XML string: | |
62 | |
63 use Bio::Biblio; | |
64 my $xml = new Bio::Biblio->get_by_id ('94033980'); | |
65 my $reader = Bio::Biblio::IO->new ('-data' => $xml, | |
66 '-format' => 'medlinexml'); | |
67 | |
68 while (my $citation = $reader->next_bibref()) { | |
69 ... do something here with $citation | |
70 } | |
71 | |
72 And, finally, the resulting citation can be received in different | |
73 output formats: | |
74 | |
75 $io = new Bio::Biblio::IO ('-format' => 'medlinexml', | |
76 '-result' => 'raw'); | |
77 --- OR --- | |
78 | |
79 $io = new Bio::Biblio::IO ('-format' => 'medlinexml', | |
80 '-result' => 'medline2ref'); | |
81 | |
82 --- OR --- | |
83 | |
84 $io = new Bio::Biblio::IO ('-format' => 'pubmedxml', | |
85 '-result' => 'pubmed2ref'); | |
86 | |
87 =head1 DESCRIPTION | |
88 | |
89 Bio::Biblio::IO is a handler module for accessing bibliographic | |
90 citations. The citations can be in different formats - assuming that | |
91 there is a corresponding module knowing that format in Bio::Biblio::IO | |
92 directory (e.g. Bio::Biblio::IO::medlinexml). The format (and the | |
93 module name) is given by the argument I<-format>. | |
94 | |
95 Once an instance of C<Bio::Biblio::IO> class is available, the | |
96 citations can be read by calling repeatedly method I<next_bibref>: | |
97 | |
98 while (my $citation = $reader->next_bibref()) { | |
99 ... do something here with $citation | |
100 } | |
101 | |
102 However, this may imply that all citations were already read into the | |
103 memory. If you expect a huge amount of citations to be read, you may | |
104 choose a I<callback> option. Your subroutine is specified in the | |
105 C<new()> method and is called everytime a new citation is available | |
106 (see an example above in SYNOPSIS). | |
107 | |
108 The citations returned by I<next_bibref> or given to your callback | |
109 routine can be of different formats depending on the argument | |
110 I<-result>. One result type is I<raw> and it is represented by a | |
111 simple, not blessed hash table: | |
112 | |
113 $io = new Bio::Biblio::IO ('-result' => 'raw'); | |
114 | |
115 What other result formats are available depends on the module who | |
116 reads the citations in the first place. At the moment, the following | |
117 ones are available: | |
118 | |
119 $io = new Bio::Biblio::IO ('-result' => 'medline2ref'); | |
120 | |
121 This is a default result format for reading citations by the | |
122 I<medlinexml> module. The C<medlinexml> module is again the default | |
123 one. Which means that you can almost omit arguments (you still need to | |
124 say where the citations come from): | |
125 | |
126 $io = new Bio::Biblio::IO ('-file' => 'data/medline_data.xml'); | |
127 | |
128 Another result format available is for PUBMED citations (which is a | |
129 super-set of the MEDLINE citations having few more tags): | |
130 | |
131 $io = new Bio::Biblio::IO ('-format' => 'pubmedxml', | |
132 '-result' => 'pubmed2ref', | |
133 '-data' => $citation); | |
134 | |
135 Or, because C<pubmed2ref> is a default one for PUBMED citations, you can say just: | |
136 | |
137 $io = new Bio::Biblio::IO ('-format' => 'pubmedxml', | |
138 '-data' => $citation); | |
139 | |
140 Both C<medline2ref> and C<pubmed2ref> results are objects defined in | |
141 the directory C<Bio::Biblio>. | |
142 | |
143 =head1 SEE ALSO | |
144 | |
145 =over | |
146 | |
147 =item * | |
148 | |
149 An example script I<examples/biblio.pl>. It has many options and its | |
150 own help. The relevant options to this IO module are I<-f> | |
151 (specifying what file to read) and I<-O> (specifying what result | |
152 format to achieve). | |
153 | |
154 =item * | |
155 | |
156 OpenBQS home page: http://industry.ebi.ac.uk/openBQS | |
157 | |
158 =item * | |
159 | |
160 Comments to the Perl client: http://industry.ebi.ac.uk/openBQS/Client_perl.html | |
161 | |
162 =back | |
163 | |
164 =head1 FEEDBACK | |
165 | |
166 =head2 Mailing Lists | |
167 | |
168 User feedback is an integral part of the evolution of this | |
169 and other Bioperl modules. Send your comments and suggestions preferably | |
170 to one of the Bioperl mailing lists. | |
171 Your participation is much appreciated. | |
172 | |
173 bioperl-l@bioperl.org - General discussion | |
174 http://bioperl.org/MailList.shtml - About the mailing lists | |
175 | |
176 =head2 Reporting Bugs | |
177 | |
178 Report bugs to the Bioperl bug tracking system to help us keep track | |
179 the bugs and their resolution. | |
180 Bug reports can be submitted via email or the web: | |
181 | |
182 bioperl-bugs@bioperl.org | |
183 http://bugzilla.bioperl.org/ | |
184 | |
185 =head1 AUTHOR | |
186 | |
187 Martin Senger (senger@ebi.ac.uk) | |
188 | |
189 =head1 COPYRIGHT | |
190 | |
191 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved. | |
192 | |
193 This module is free software; you can redistribute it and/or modify | |
194 it under the same terms as Perl itself. | |
195 | |
196 =head1 DISCLAIMER | |
197 | |
198 This software is provided "as is" without warranty of any kind. | |
199 | |
200 =head1 APPENDIX | |
201 | |
202 The rest of the documentation details each of the object | |
203 methods. Internal methods are preceded with a _ | |
204 | |
205 =cut | |
206 | |
207 | |
208 # Let the code begin... | |
209 | |
210 package Bio::Biblio::IO; | |
211 | |
212 use strict; | |
213 use vars qw(@ISA); | |
214 | |
215 use Bio::Root::Root; | |
216 use Bio::Root::IO; | |
217 use Symbol(); | |
218 | |
219 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
220 | |
221 my $entry = 0; | |
222 | |
223 sub new { | |
224 my ($caller, @args) = @_; | |
225 my $class = ref ($caller) || $caller; | |
226 | |
227 # if $caller is an object, or if it is an underlying | |
228 # 'real-work-doing' class (e.g. Bio::Biblio::IO::medlinexml) then | |
229 # we want to call SUPER to create and bless an object | |
230 if( $class =~ /Bio::Biblio::IO::(\S+)/ ) { | |
231 my ($self) = $class->SUPER::new (@args); | |
232 $self->_initialize (@args); | |
233 return $self; | |
234 | |
235 # this is called only the first time when somebody calls: 'new | |
236 # Bio::Biblio::IO (...)', and it actually loads a 'real-work-doing' | |
237 # module and call this new() method again (unless the loaded | |
238 # module has its own new() method) | |
239 } else { | |
240 my %param = @args; | |
241 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
242 my $format = $param{'-format'} || | |
243 $class->_guess_format( $param{-file} || $ARGV[0] ) || | |
244 'medlinexml'; | |
245 $format = "\L$format"; # normalize capitalization to lower case | |
246 | |
247 # load module with the real implementation - as defined in $format | |
248 return undef unless (&_load_format_module ($format)); | |
249 | |
250 # this will call this same method new() - but rather its | |
251 # upper (object) branche | |
252 return "Bio::Biblio::IO::$format"->new(@args); | |
253 } | |
254 } | |
255 | |
256 sub newFh { | |
257 my $class = shift; | |
258 return unless my $self = $class->new(@_); | |
259 return $self->fh; | |
260 } | |
261 | |
262 | |
263 sub fh { | |
264 my $self = shift; | |
265 my $class = ref($self) || $self; | |
266 my $s = Symbol::gensym; | |
267 tie $$s,$class,$self; | |
268 return $s; | |
269 } | |
270 | |
271 # _initialize is chained for all Bio::Biblio::IO classes | |
272 | |
273 sub _initialize { | |
274 my ($self, @args) = @_; | |
275 # initialize the IO part | |
276 $self->_initialize_io (@args); | |
277 } | |
278 | |
279 =head2 next_bibref | |
280 | |
281 Usage : $citation = stream->next_bibref | |
282 Function: Reads the next citation object from the stream and returns it. | |
283 Returns : a Bio::Biblio::Ref citation object, or something else | |
284 (depending on the '-result' argument given in the 'new()' | |
285 method). | |
286 Args : none | |
287 | |
288 =cut | |
289 | |
290 sub next_bibref { | |
291 my ($self) = shift; | |
292 $self->throw ("Sorry, you cannot read from a generic Bio::Biblio::IO object."); | |
293 } | |
294 | |
295 # ----------------------------------------------------------------------------- | |
296 | |
297 =head2 _load_format_module | |
298 | |
299 Usage : $class->_load_format_module ($format) | |
300 Returns : 1 on success, undef on failure | |
301 Args : 'format' should contain the last part of the | |
302 name of a module who does the real implementation | |
303 | |
304 It does (in run-time) a similar thing as | |
305 | |
306 require Bio::Biblio::IO::$format | |
307 | |
308 It throws an exception if it fails to find and load the module | |
309 (for example, because of the compilation errors in the module). | |
310 | |
311 =cut | |
312 | |
313 sub _load_format_module { | |
314 my ($format) = @_; | |
315 my ($module, $load, $m); | |
316 | |
317 $module = "_<Bio/Biblio/IO/$format.pm"; | |
318 $load = "Bio/Biblio/IO/$format.pm"; | |
319 | |
320 return 1 if $main::{$module}; | |
321 eval { | |
322 require $load; | |
323 }; | |
324 if ( $@ ) { | |
325 Bio::Root::Root->throw (<<END); | |
326 $load: $format cannot be found or loaded | |
327 Exception $@ | |
328 For more information about the Biblio system please see the Bio::Biblio::IO docs. | |
329 END | |
330 ; | |
331 return; | |
332 } | |
333 return 1; | |
334 } | |
335 | |
336 =head2 _guess_format | |
337 | |
338 Usage : $class->_guess_format ($filename) | |
339 Returns : string with a guessed format of the input data (e.g. 'medlinexml') | |
340 Args : a file name whose extension can help to guess its format | |
341 | |
342 It makes an expert guess what kind of data are in the given file | |
343 (but be prepare that $filename may be empty). | |
344 | |
345 =cut | |
346 | |
347 sub _guess_format { | |
348 my $class = shift; | |
349 return unless $_ = shift; | |
350 return 'medlinexml' if (/\.(xml|medlinexml)$/i); | |
351 return; | |
352 } | |
353 | |
354 sub DESTROY { | |
355 my $self = shift; | |
356 | |
357 $self->close(); | |
358 } | |
359 | |
360 sub TIEHANDLE { | |
361 my ($class,$val) = @_; | |
362 return bless {'biblio' => $val}, $class; | |
363 } | |
364 | |
365 sub READLINE { | |
366 my $self = shift; | |
367 return $self->{'biblio'}->next_bibref() unless wantarray; | |
368 my (@list, $obj); | |
369 push @list, $obj while $obj = $self->{'biblio'}->next_bibref(); | |
370 return @list; | |
371 } | |
372 | |
373 1; |