comparison variant_effect_predictor/Bio/EnsEMBL/Utils/Iterator.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 package Bio::EnsEMBL::Utils::Iterator;
2
3 =head1 LICENSE
4
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
6 Genome Research Limited. All rights reserved.
7
8 This software is distributed under a modified Apache license.
9 For license details, please see
10
11 http://www.ensembl.org/info/about/code_licence.html
12
13 =head1 CONTACT
14
15 Please email comments or questions to the public Ensembl
16 developers list at <dev@ensembl.org>.
17
18 Questions may also be sent to the Ensembl help desk at
19 <helpdesk@ensembl.org>.
20
21 =cut
22
23 =head1 NAME
24
25 Bio::EnsEMBL::Utils::Iterator
26
27 =head1 SYNOPSIS
28
29 my $variation_iterator =
30 $variation_adaptor->fetch_Iterator_by_VariationSet($1kg_set);
31
32 while ( my $variation = $variation_iterator->next ) {
33 # operate on variation object
34 print $variation->name, "\n";
35 }
36
37 =head1 DESCRIPTION
38
39 Some adaptor methods may return more objects than can fit in memory at once, in these cases
40 you can fetch an iterator object instead of the usual array reference. The iterator object
41 allows you to iterate over the set of objects (using the next() method) without loading the
42 entire set into memory at once. You can tell if an iterator is exhausted with the has_next()
43 method. The peek() method allows you to fetch the next object from the iterator without
44 advancing the iterator - this is useful if you want to check some property of en element in
45 the set while leaving the iterator unchanged.
46
47 You can filter and transform an iterator in an analogous way to using map and grep on arrays
48 using the provided map() and grep() methods. These methods return another iterator, and only
49 perform the filtering and transformation on each element as it is requested, so again these
50 can be used without loading the entire set into memory.
51
52 Iterators can be combined together with the append() method which merges together the
53 iterator it is called on with the list of iterators passed in as arguments. This is
54 somewhat analogous to concatenating arrays with the push function. append() returns a new
55 iterator which iterates over each component iterator until it is exhausted before moving
56 on to the next iterator, in the order in which they are supplied to the method.
57
58 An iterator can be converted to an array (reference) containing all the elements in the
59 set with the to_arrayref() method, but note that this array may consume a lot of memory if
60 the set the iterator is iterating over is large and it is recommended that you do not call
61 this method unless there is no way of working with each element at a time.
62
63 =head1 METHODS
64
65 =cut
66
67 use strict;
68 use warnings;
69
70 use Bio::EnsEMBL::Utils::Exception qw(throw);
71
72 =head2 new
73
74 Argument : either a coderef representing the iterator, in which case this
75 anonymous subroutine is assumed to return the next object in the
76 set when called and to return undef when the set is exhausted,
77 or an arrayref, in which case we return an iterator over this
78 array. If the argument is not defined then we return an 'empty'
79 iterator that immediately returns undef
80
81 Example :
82
83 my @dbIDs = fetch_relevant_dbIDs();
84
85 my $iterator = Bio::EnsEMBL::Utils::Iterator->new(
86 sub { return $self->fetch_by_dbID(shift @dbIDs) }
87 );
88
89 NB: this is a very simple example showing how to call the constructor
90 that would be rather inefficient in practice, real examples should
91 probably be smarter about batching up queries to minimise trips to
92 the database. See examples in the Variation API.
93
94 Description: Constructor, creates a new iterator object
95 Returntype : Bio::EnsEMBL::Utils::Iterator instance
96 Exceptions : thrown if the supplied argument is not the expected
97 Caller : general
98 Status : Experimental
99
100 =cut
101
102 sub new {
103 my $class = shift;
104
105 my $arg = shift;
106
107 my $coderef;
108
109 if (not defined $arg) {
110 # if the user doesn't supply an argument, we create a
111 # simple 'empty' iterator that immediately returns undef
112
113 $coderef = sub { return undef };
114 }
115 elsif (ref $arg eq 'ARRAY') {
116 # if the user supplies an arrayref as an argument, we
117 # create an iterator over this array
118
119 $coderef = sub { return shift @$arg };
120 }
121 elsif (ref $arg eq 'CODE'){
122 $coderef = $arg;
123 }
124 else {
125 throw("The supplied argument does not look like an arrayref or a coderef ".(ref $arg))
126 }
127
128 my $self = {sub => $coderef};
129
130 return bless $self, $class;
131 }
132
133
134 =head2 next
135
136 Example : $obj = $iterator->next
137 Description: returns the next object from this iterator, or undef if the iterator is exhausted
138 Returntype : Object type will depend on what this iterator is iterating over
139 Exceptions : none
140 Caller : general
141 Status : Experimental
142
143 =cut
144
145 sub next {
146 my $self = shift;
147
148 $self->{next} = $self->{sub}->() unless defined $self->{next};
149
150 return delete $self->{next};
151 }
152
153 =head2 has_next
154
155 Example : if ($iterator->has_next) { my $obj = $iterator->next }
156 Description: Boolean - true if this iterator has more elements to fetch, false when
157 it is exhausted
158 Returntype : boolean
159 Exceptions : none
160 Caller : general
161 Status : Experimental
162
163 =cut
164
165 sub has_next {
166 my $self = shift;
167
168 $self->{next} = $self->{sub}->() unless defined $self->{next};
169
170 return defined $self->{next};
171 }
172
173 =head2 peek
174
175 Example : $obj = $iterator->peek
176 Description: returns the next object from this iterator, or undef if the iterator is exhausted,
177 much like next but does not advance the iterator (so the same object will be
178 returned on the following call to next or peek)
179 Returntype : Object type will depend on what this iterator is iterating over
180 Exceptions : none
181 Caller : general
182 Status : Experimental
183
184 =cut
185
186 sub peek {
187 my $self = shift;
188
189 $self->{next} = $self->{sub}->() unless defined $self->{next};
190
191 return $self->{next};
192 }
193
194 =head2 grep
195
196 Example : my $filtered_iterator = $original_iterator->grep(sub {$_->name =~ /^rs/});
197 Description: filter this iterator, returning another iterator
198 Argument : a coderef which returns true if the element should be included in the
199 filtered set, or false if the element should be filtered out. $_ will be
200 set locally to each element in turn so you should be able to write a block
201 in a similar way as for the perl grep function (although it will need to be
202 preceded with the sub keyword). Otherwise you can pass in a reference to a
203 subroutine which expects a single argument with the same behaviour.
204 Returntype : Bio::EnsEMBL::Utils::Iterator
205 Exceptions : thrown if the argument is not a coderef
206 Caller : general
207 Status : Experimental
208
209 =cut
210
211 sub grep {
212 my ($self, $coderef) = @_;
213
214 throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
215
216 return Bio::EnsEMBL::Utils::Iterator->new(sub {
217 while ($self->has_next) {
218 local $_ = $self->next;
219 return $_ if $coderef->($_);
220 }
221 return undef;
222 });
223 }
224
225 =head2 map
226
227 Example : my $transformed_iterator = $original_iterator->map(sub {$_->name});
228 Description: transform the elements of this iterator, returning another iterator
229 Argument : a coderef which returns the desired transformation of each element.
230 $_ will be set locally set to each original element in turn so you
231 should be able to write a block in a similar way as for the perl map
232 function (although it will need to be preceded with the sub keyword).
233 Otherwise you can pass in a reference to a subroutine which expects a
234 single argument with the same behaviour.
235 Returntype : Bio::EnsEMBL::Utils::Iterator
236 Exceptions : thrown if the argument is not a coderef
237 Caller : general
238 Status : Experimental
239
240 =cut
241
242
243 sub map {
244 my ($self, $coderef) = @_;
245
246 throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
247
248 return Bio::EnsEMBL::Utils::Iterator->new(sub {
249 local $_ = $self->next;
250 return defined $_ ? $coderef->($_) : undef;
251 });
252 }
253
254
255 =head2 each
256
257 Example : $iterator->each(sub { print $_->name, "\n"; });
258 Description: Performs a full iteration of the current iterator instance.
259 Argument : a coderef which returns the desired transformation of each element.
260 $_ will be set locally set to each element.
261 Returntype : None
262 Exceptions : thrown if the argument is not a coderef
263 Caller : general
264 Status : Experimental
265
266 =cut
267
268
269 sub each {
270 my ($self, $coderef) = @_;
271 throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
272 while($self->has_next()) {
273 local $_ = $self->next();
274 $coderef->($_);
275 }
276 return;
277 }
278
279
280 =head2 to_arrayref
281
282 Example : my $arrayref = $iterator->to_arrayref;
283 Description: return a reference to an array containing all elements from the
284 iterator. This is created by simply iterating over the iterator
285 until it is exhausted and adding each element in turn to an array.
286 Note that this may consume a lot of memory for iterators over
287 large collections
288 Returntype : arrayref
289 Exceptions : none
290 Caller : general
291 Status : Experimental
292
293 =cut
294
295 sub to_arrayref {
296 my ($self) = @_;
297
298 my @array;
299
300 while ($self->has_next) {
301 push @array, $self->next;
302 }
303
304 return \@array;
305 }
306
307 =head2 append
308
309 Example : my $combined_iterator = $iterator1->append($iterator2, $iterator3);
310 Description: return a new iterator that combines this iterator with the others
311 passed as arguments, this new iterator will iterate over each
312 component iterator (in the order supplied here) until it is
313 exhausted and then move on to the next iterator until all are
314 exhausted
315 Argument : an array of Bio::EnsEMBL::Utils::Iterator objects
316 Returntype : Bio::EnsEMBL::Utils::Iterator
317 Exceptions : thrown if any of the arguments are not iterators
318 Caller : general
319 Status : Experimental
320
321 =cut
322
323 sub append {
324 my ($self, @queue) = @_;
325
326 for my $iterator (@queue) {
327 throw("Argument to append doesn't look like an iterator")
328 unless UNIVERSAL::can($iterator, 'has_next') && UNIVERSAL::can($iterator, 'next');
329 }
330
331 # push ourselves onto the front of the queue
332 unshift @queue, $self;
333
334 return Bio::EnsEMBL::Utils::Iterator->new(sub {
335 # shift off any exhausted iterators
336 while (@queue && not $queue[0]->has_next) {
337 shift @queue;
338 }
339
340 # and return the next object from the iterator at the
341 # head of the queue, or undef if the queue is empty
342 return @queue ? $queue[0]->next : undef;
343 });
344 }
345
346 =head2 take
347
348 Example : my $limited_iterator = $iterator->take(5);
349 Description: return a new iterator that only iterates over the
350 first n elements of this iterator
351 Argument : a positive integer
352 Returntype : Bio::EnsEMBL::Utils::Iterator
353 Exceptions : thrown if the argument is negative
354 Caller : general
355 Status : Experimental
356
357 =cut
358
359 sub take {
360 my ($self, $n) = @_;
361
362 throw("Argument cannot be negative") if $n < 0;
363
364 my $cnt = 0;
365
366 return Bio::EnsEMBL::Utils::Iterator->new(sub {
367 return $cnt++ >= $n ? undef : $self->next;
368 });
369 }
370
371 =head2 skip
372
373 Example : my $limited_iterator = $iterator->skip(5);
374 Description: skip over the first n elements of this iterator (and then return
375 the same iterator for your method chaining convenience)
376 Argument : a positive integer
377 Returntype : Bio::EnsEMBL::Utils::Iterator
378 Exceptions : thrown if the argument is negative
379 Caller : general
380 Status : Experimental
381
382 =cut
383
384 sub skip {
385 my ($self, $n) = @_;
386
387 throw("Argument cannot be negative") if $n < 0;
388
389 $self->next for (0 .. $n-1);
390
391 return $self;
392 }
393
394 =head2 reduce
395
396 Example : my $tot_length = $iterator->reduce(sub { $_[0] + $_[1]->length }, 0);
397 Description: reduce this iterator with the provided coderef, using the (optional)
398 second argument as the initial value of the accumulator
399 Argument[1]: a coderef that expects 2 arguments, the current accumulator
400 value and the next element in the set, and returns the next
401 accumulator value. Unless the optional second argument is
402 provided the first accumulator value passed in will be the
403 first element in the set
404 Argument[2]: (optional) an initial value to use for the accumulator instead
405 of the first value of the set
406 Returntype : returntype of the coderef
407 Exceptions : thrown if the argument is not a coderef
408 Caller : general
409 Status : Experimental
410
411 =cut
412
413 sub reduce {
414 my ($self, $coderef, $init_val) = @_;
415
416 throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
417
418 my $result = defined $init_val ? $init_val : $self->next;
419
420 while ($self->has_next) {
421 $result = $coderef->($result, $self->next);
422 }
423
424 return $result;
425 }
426
427 1;
428