Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/Iterator.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 package Bio::EnsEMBL::Utils::Iterator; | |
| 2 | |
| 3 =head1 LICENSE | |
| 4 | |
| 5 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 6 Genome Research Limited. All rights reserved. | |
| 7 | |
| 8 This software is distributed under a modified Apache license. | |
| 9 For license details, please see | |
| 10 | |
| 11 http://www.ensembl.org/info/about/code_licence.html | |
| 12 | |
| 13 =head1 CONTACT | |
| 14 | |
| 15 Please email comments or questions to the public Ensembl | |
| 16 developers list at <dev@ensembl.org>. | |
| 17 | |
| 18 Questions may also be sent to the Ensembl help desk at | |
| 19 <helpdesk@ensembl.org>. | |
| 20 | |
| 21 =cut | |
| 22 | |
| 23 =head1 NAME | |
| 24 | |
| 25 Bio::EnsEMBL::Utils::Iterator | |
| 26 | |
| 27 =head1 SYNOPSIS | |
| 28 | |
| 29 my $variation_iterator = | |
| 30 $variation_adaptor->fetch_Iterator_by_VariationSet($1kg_set); | |
| 31 | |
| 32 while ( my $variation = $variation_iterator->next ) { | |
| 33 # operate on variation object | |
| 34 print $variation->name, "\n"; | |
| 35 } | |
| 36 | |
| 37 =head1 DESCRIPTION | |
| 38 | |
| 39 Some adaptor methods may return more objects than can fit in memory at once, in these cases | |
| 40 you can fetch an iterator object instead of the usual array reference. The iterator object | |
| 41 allows you to iterate over the set of objects (using the next() method) without loading the | |
| 42 entire set into memory at once. You can tell if an iterator is exhausted with the has_next() | |
| 43 method. The peek() method allows you to fetch the next object from the iterator without | |
| 44 advancing the iterator - this is useful if you want to check some property of en element in | |
| 45 the set while leaving the iterator unchanged. | |
| 46 | |
| 47 You can filter and transform an iterator in an analogous way to using map and grep on arrays | |
| 48 using the provided map() and grep() methods. These methods return another iterator, and only | |
| 49 perform the filtering and transformation on each element as it is requested, so again these | |
| 50 can be used without loading the entire set into memory. | |
| 51 | |
| 52 Iterators can be combined together with the append() method which merges together the | |
| 53 iterator it is called on with the list of iterators passed in as arguments. This is | |
| 54 somewhat analogous to concatenating arrays with the push function. append() returns a new | |
| 55 iterator which iterates over each component iterator until it is exhausted before moving | |
| 56 on to the next iterator, in the order in which they are supplied to the method. | |
| 57 | |
| 58 An iterator can be converted to an array (reference) containing all the elements in the | |
| 59 set with the to_arrayref() method, but note that this array may consume a lot of memory if | |
| 60 the set the iterator is iterating over is large and it is recommended that you do not call | |
| 61 this method unless there is no way of working with each element at a time. | |
| 62 | |
| 63 =head1 METHODS | |
| 64 | |
| 65 =cut | |
| 66 | |
| 67 use strict; | |
| 68 use warnings; | |
| 69 | |
| 70 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
| 71 | |
| 72 =head2 new | |
| 73 | |
| 74 Argument : either a coderef representing the iterator, in which case this | |
| 75 anonymous subroutine is assumed to return the next object in the | |
| 76 set when called and to return undef when the set is exhausted, | |
| 77 or an arrayref, in which case we return an iterator over this | |
| 78 array. If the argument is not defined then we return an 'empty' | |
| 79 iterator that immediately returns undef | |
| 80 | |
| 81 Example : | |
| 82 | |
| 83 my @dbIDs = fetch_relevant_dbIDs(); | |
| 84 | |
| 85 my $iterator = Bio::EnsEMBL::Utils::Iterator->new( | |
| 86 sub { return $self->fetch_by_dbID(shift @dbIDs) } | |
| 87 ); | |
| 88 | |
| 89 NB: this is a very simple example showing how to call the constructor | |
| 90 that would be rather inefficient in practice, real examples should | |
| 91 probably be smarter about batching up queries to minimise trips to | |
| 92 the database. See examples in the Variation API. | |
| 93 | |
| 94 Description: Constructor, creates a new iterator object | |
| 95 Returntype : Bio::EnsEMBL::Utils::Iterator instance | |
| 96 Exceptions : thrown if the supplied argument is not the expected | |
| 97 Caller : general | |
| 98 Status : Experimental | |
| 99 | |
| 100 =cut | |
| 101 | |
| 102 sub new { | |
| 103 my $class = shift; | |
| 104 | |
| 105 my $arg = shift; | |
| 106 | |
| 107 my $coderef; | |
| 108 | |
| 109 if (not defined $arg) { | |
| 110 # if the user doesn't supply an argument, we create a | |
| 111 # simple 'empty' iterator that immediately returns undef | |
| 112 | |
| 113 $coderef = sub { return undef }; | |
| 114 } | |
| 115 elsif (ref $arg eq 'ARRAY') { | |
| 116 # if the user supplies an arrayref as an argument, we | |
| 117 # create an iterator over this array | |
| 118 | |
| 119 $coderef = sub { return shift @$arg }; | |
| 120 } | |
| 121 elsif (ref $arg eq 'CODE'){ | |
| 122 $coderef = $arg; | |
| 123 } | |
| 124 else { | |
| 125 throw("The supplied argument does not look like an arrayref or a coderef ".(ref $arg)) | |
| 126 } | |
| 127 | |
| 128 my $self = {sub => $coderef}; | |
| 129 | |
| 130 return bless $self, $class; | |
| 131 } | |
| 132 | |
| 133 | |
| 134 =head2 next | |
| 135 | |
| 136 Example : $obj = $iterator->next | |
| 137 Description: returns the next object from this iterator, or undef if the iterator is exhausted | |
| 138 Returntype : Object type will depend on what this iterator is iterating over | |
| 139 Exceptions : none | |
| 140 Caller : general | |
| 141 Status : Experimental | |
| 142 | |
| 143 =cut | |
| 144 | |
| 145 sub next { | |
| 146 my $self = shift; | |
| 147 | |
| 148 $self->{next} = $self->{sub}->() unless defined $self->{next}; | |
| 149 | |
| 150 return delete $self->{next}; | |
| 151 } | |
| 152 | |
| 153 =head2 has_next | |
| 154 | |
| 155 Example : if ($iterator->has_next) { my $obj = $iterator->next } | |
| 156 Description: Boolean - true if this iterator has more elements to fetch, false when | |
| 157 it is exhausted | |
| 158 Returntype : boolean | |
| 159 Exceptions : none | |
| 160 Caller : general | |
| 161 Status : Experimental | |
| 162 | |
| 163 =cut | |
| 164 | |
| 165 sub has_next { | |
| 166 my $self = shift; | |
| 167 | |
| 168 $self->{next} = $self->{sub}->() unless defined $self->{next}; | |
| 169 | |
| 170 return defined $self->{next}; | |
| 171 } | |
| 172 | |
| 173 =head2 peek | |
| 174 | |
| 175 Example : $obj = $iterator->peek | |
| 176 Description: returns the next object from this iterator, or undef if the iterator is exhausted, | |
| 177 much like next but does not advance the iterator (so the same object will be | |
| 178 returned on the following call to next or peek) | |
| 179 Returntype : Object type will depend on what this iterator is iterating over | |
| 180 Exceptions : none | |
| 181 Caller : general | |
| 182 Status : Experimental | |
| 183 | |
| 184 =cut | |
| 185 | |
| 186 sub peek { | |
| 187 my $self = shift; | |
| 188 | |
| 189 $self->{next} = $self->{sub}->() unless defined $self->{next}; | |
| 190 | |
| 191 return $self->{next}; | |
| 192 } | |
| 193 | |
| 194 =head2 grep | |
| 195 | |
| 196 Example : my $filtered_iterator = $original_iterator->grep(sub {$_->name =~ /^rs/}); | |
| 197 Description: filter this iterator, returning another iterator | |
| 198 Argument : a coderef which returns true if the element should be included in the | |
| 199 filtered set, or false if the element should be filtered out. $_ will be | |
| 200 set locally to each element in turn so you should be able to write a block | |
| 201 in a similar way as for the perl grep function (although it will need to be | |
| 202 preceded with the sub keyword). Otherwise you can pass in a reference to a | |
| 203 subroutine which expects a single argument with the same behaviour. | |
| 204 Returntype : Bio::EnsEMBL::Utils::Iterator | |
| 205 Exceptions : thrown if the argument is not a coderef | |
| 206 Caller : general | |
| 207 Status : Experimental | |
| 208 | |
| 209 =cut | |
| 210 | |
| 211 sub grep { | |
| 212 my ($self, $coderef) = @_; | |
| 213 | |
| 214 throw('Argument should be a coderef') unless ref $coderef eq 'CODE'; | |
| 215 | |
| 216 return Bio::EnsEMBL::Utils::Iterator->new(sub { | |
| 217 while ($self->has_next) { | |
| 218 local $_ = $self->next; | |
| 219 return $_ if $coderef->($_); | |
| 220 } | |
| 221 return undef; | |
| 222 }); | |
| 223 } | |
| 224 | |
| 225 =head2 map | |
| 226 | |
| 227 Example : my $transformed_iterator = $original_iterator->map(sub {$_->name}); | |
| 228 Description: transform the elements of this iterator, returning another iterator | |
| 229 Argument : a coderef which returns the desired transformation of each element. | |
| 230 $_ will be set locally set to each original element in turn so you | |
| 231 should be able to write a block in a similar way as for the perl map | |
| 232 function (although it will need to be preceded with the sub keyword). | |
| 233 Otherwise you can pass in a reference to a subroutine which expects a | |
| 234 single argument with the same behaviour. | |
| 235 Returntype : Bio::EnsEMBL::Utils::Iterator | |
| 236 Exceptions : thrown if the argument is not a coderef | |
| 237 Caller : general | |
| 238 Status : Experimental | |
| 239 | |
| 240 =cut | |
| 241 | |
| 242 | |
| 243 sub map { | |
| 244 my ($self, $coderef) = @_; | |
| 245 | |
| 246 throw('Argument should be a coderef') unless ref $coderef eq 'CODE'; | |
| 247 | |
| 248 return Bio::EnsEMBL::Utils::Iterator->new(sub { | |
| 249 local $_ = $self->next; | |
| 250 return defined $_ ? $coderef->($_) : undef; | |
| 251 }); | |
| 252 } | |
| 253 | |
| 254 | |
| 255 =head2 each | |
| 256 | |
| 257 Example : $iterator->each(sub { print $_->name, "\n"; }); | |
| 258 Description: Performs a full iteration of the current iterator instance. | |
| 259 Argument : a coderef which returns the desired transformation of each element. | |
| 260 $_ will be set locally set to each element. | |
| 261 Returntype : None | |
| 262 Exceptions : thrown if the argument is not a coderef | |
| 263 Caller : general | |
| 264 Status : Experimental | |
| 265 | |
| 266 =cut | |
| 267 | |
| 268 | |
| 269 sub each { | |
| 270 my ($self, $coderef) = @_; | |
| 271 throw('Argument should be a coderef') unless ref $coderef eq 'CODE'; | |
| 272 while($self->has_next()) { | |
| 273 local $_ = $self->next(); | |
| 274 $coderef->($_); | |
| 275 } | |
| 276 return; | |
| 277 } | |
| 278 | |
| 279 | |
| 280 =head2 to_arrayref | |
| 281 | |
| 282 Example : my $arrayref = $iterator->to_arrayref; | |
| 283 Description: return a reference to an array containing all elements from the | |
| 284 iterator. This is created by simply iterating over the iterator | |
| 285 until it is exhausted and adding each element in turn to an array. | |
| 286 Note that this may consume a lot of memory for iterators over | |
| 287 large collections | |
| 288 Returntype : arrayref | |
| 289 Exceptions : none | |
| 290 Caller : general | |
| 291 Status : Experimental | |
| 292 | |
| 293 =cut | |
| 294 | |
| 295 sub to_arrayref { | |
| 296 my ($self) = @_; | |
| 297 | |
| 298 my @array; | |
| 299 | |
| 300 while ($self->has_next) { | |
| 301 push @array, $self->next; | |
| 302 } | |
| 303 | |
| 304 return \@array; | |
| 305 } | |
| 306 | |
| 307 =head2 append | |
| 308 | |
| 309 Example : my $combined_iterator = $iterator1->append($iterator2, $iterator3); | |
| 310 Description: return a new iterator that combines this iterator with the others | |
| 311 passed as arguments, this new iterator will iterate over each | |
| 312 component iterator (in the order supplied here) until it is | |
| 313 exhausted and then move on to the next iterator until all are | |
| 314 exhausted | |
| 315 Argument : an array of Bio::EnsEMBL::Utils::Iterator objects | |
| 316 Returntype : Bio::EnsEMBL::Utils::Iterator | |
| 317 Exceptions : thrown if any of the arguments are not iterators | |
| 318 Caller : general | |
| 319 Status : Experimental | |
| 320 | |
| 321 =cut | |
| 322 | |
| 323 sub append { | |
| 324 my ($self, @queue) = @_; | |
| 325 | |
| 326 for my $iterator (@queue) { | |
| 327 throw("Argument to append doesn't look like an iterator") | |
| 328 unless UNIVERSAL::can($iterator, 'has_next') && UNIVERSAL::can($iterator, 'next'); | |
| 329 } | |
| 330 | |
| 331 # push ourselves onto the front of the queue | |
| 332 unshift @queue, $self; | |
| 333 | |
| 334 return Bio::EnsEMBL::Utils::Iterator->new(sub { | |
| 335 # shift off any exhausted iterators | |
| 336 while (@queue && not $queue[0]->has_next) { | |
| 337 shift @queue; | |
| 338 } | |
| 339 | |
| 340 # and return the next object from the iterator at the | |
| 341 # head of the queue, or undef if the queue is empty | |
| 342 return @queue ? $queue[0]->next : undef; | |
| 343 }); | |
| 344 } | |
| 345 | |
| 346 =head2 take | |
| 347 | |
| 348 Example : my $limited_iterator = $iterator->take(5); | |
| 349 Description: return a new iterator that only iterates over the | |
| 350 first n elements of this iterator | |
| 351 Argument : a positive integer | |
| 352 Returntype : Bio::EnsEMBL::Utils::Iterator | |
| 353 Exceptions : thrown if the argument is negative | |
| 354 Caller : general | |
| 355 Status : Experimental | |
| 356 | |
| 357 =cut | |
| 358 | |
| 359 sub take { | |
| 360 my ($self, $n) = @_; | |
| 361 | |
| 362 throw("Argument cannot be negative") if $n < 0; | |
| 363 | |
| 364 my $cnt = 0; | |
| 365 | |
| 366 return Bio::EnsEMBL::Utils::Iterator->new(sub { | |
| 367 return $cnt++ >= $n ? undef : $self->next; | |
| 368 }); | |
| 369 } | |
| 370 | |
| 371 =head2 skip | |
| 372 | |
| 373 Example : my $limited_iterator = $iterator->skip(5); | |
| 374 Description: skip over the first n elements of this iterator (and then return | |
| 375 the same iterator for your method chaining convenience) | |
| 376 Argument : a positive integer | |
| 377 Returntype : Bio::EnsEMBL::Utils::Iterator | |
| 378 Exceptions : thrown if the argument is negative | |
| 379 Caller : general | |
| 380 Status : Experimental | |
| 381 | |
| 382 =cut | |
| 383 | |
| 384 sub skip { | |
| 385 my ($self, $n) = @_; | |
| 386 | |
| 387 throw("Argument cannot be negative") if $n < 0; | |
| 388 | |
| 389 $self->next for (0 .. $n-1); | |
| 390 | |
| 391 return $self; | |
| 392 } | |
| 393 | |
| 394 =head2 reduce | |
| 395 | |
| 396 Example : my $tot_length = $iterator->reduce(sub { $_[0] + $_[1]->length }, 0); | |
| 397 Description: reduce this iterator with the provided coderef, using the (optional) | |
| 398 second argument as the initial value of the accumulator | |
| 399 Argument[1]: a coderef that expects 2 arguments, the current accumulator | |
| 400 value and the next element in the set, and returns the next | |
| 401 accumulator value. Unless the optional second argument is | |
| 402 provided the first accumulator value passed in will be the | |
| 403 first element in the set | |
| 404 Argument[2]: (optional) an initial value to use for the accumulator instead | |
| 405 of the first value of the set | |
| 406 Returntype : returntype of the coderef | |
| 407 Exceptions : thrown if the argument is not a coderef | |
| 408 Caller : general | |
| 409 Status : Experimental | |
| 410 | |
| 411 =cut | |
| 412 | |
| 413 sub reduce { | |
| 414 my ($self, $coderef, $init_val) = @_; | |
| 415 | |
| 416 throw('Argument should be a coderef') unless ref $coderef eq 'CODE'; | |
| 417 | |
| 418 my $result = defined $init_val ? $init_val : $self->next; | |
| 419 | |
| 420 while ($self->has_next) { | |
| 421 $result = $coderef->($result, $self->next); | |
| 422 } | |
| 423 | |
| 424 return $result; | |
| 425 } | |
| 426 | |
| 427 1; | |
| 428 |
