Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Seq/SeqBuilder.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: SeqBuilder.pm,v 1.6 2002/10/22 07:45:20 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Seq::SeqBuilder | |
4 # | |
5 # Cared for by Hilmar Lapp <hlapp at gmx.net> | |
6 # | |
7 # Copyright Hilmar Lapp | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # | |
12 # (c) Hilmar Lapp, hlapp at gmx.net, 2002. | |
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. | |
14 # | |
15 # You may distribute this module under the same terms as perl itself. | |
16 # Refer to the Perl Artistic License (see the license accompanying this | |
17 # software package, or see http://www.perl.com/language/misc/Artistic.html) | |
18 # for the terms under which you may use, modify, and redistribute this module. | |
19 # | |
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED | |
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | |
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
23 # | |
24 | |
25 # POD documentation - main docs before the code | |
26 | |
27 =head1 NAME | |
28 | |
29 Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers | |
30 | |
31 =head1 SYNOPSIS | |
32 | |
33 use Bio::SeqIO; | |
34 | |
35 # usually you won't instantiate this yourself -- a SeqIO object | |
36 # will have one already | |
37 my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank"); | |
38 my $builder = $seqin->sequence_builder(); | |
39 | |
40 # if you need only sequence, id, and description (e.g. for | |
41 # conversion to FASTA format): | |
42 $builder->want_none(); | |
43 $builder->add_wanted_slot('display_id','desc','seq'); | |
44 | |
45 # if you want everything except the sequence and features | |
46 $builder->want_all(1); # this is the default if it's untouched | |
47 $builder->add_unwanted_slot('seq','features'); | |
48 | |
49 # if you want only human sequences shorter than 5kb and skip all | |
50 # others | |
51 $builder->add_object_condition(sub { | |
52 my $h = shift; | |
53 return 0 if $h->{'-length'} > 5000; | |
54 return 0 if exists($h->{'-species'}) && | |
55 ($h->{'-species'}->binomial() ne "Homo sapiens"); | |
56 return 1; | |
57 }); | |
58 | |
59 # when you are finished with configuring the builder, just use | |
60 # the SeqIO API as you would normally | |
61 while(my $seq = $seqin->next_seq()) { | |
62 # do something | |
63 } | |
64 | |
65 =head1 DESCRIPTION | |
66 | |
67 This is an implementation of L<Bio::Factory::ObjectBuilderI> used by | |
68 parsers of rich sequence streams. It provides for a relatively | |
69 easy-to-use configurator of the parsing flow. | |
70 | |
71 Configuring the parsing process may be for you if you need much less | |
72 information, or much less sequences, than the stream actually | |
73 contains. Configuration can in both cases speed up the parsing time | |
74 considerably, because unwanted sections or the rest of unwanted | |
75 sequences are skipped over by the parser. | |
76 | |
77 See the methods of the class-specific implementation section for | |
78 further documentation of what can be configured. | |
79 | |
80 =head1 FEEDBACK | |
81 | |
82 =head2 Mailing Lists | |
83 | |
84 User feedback is an integral part of the evolution of this and other | |
85 Bioperl modules. Send your comments and suggestions preferably to | |
86 the Bioperl mailing list. Your participation is much appreciated. | |
87 | |
88 bioperl-l@bioperl.org - General discussion | |
89 http://bioperl.org/MailList.shtml - About the mailing lists | |
90 | |
91 =head2 Reporting Bugs | |
92 | |
93 Report bugs to the Bioperl bug tracking system to help us keep track | |
94 of the bugs and their resolution. Bug reports can be submitted via | |
95 email or the web: | |
96 | |
97 bioperl-bugs@bioperl.org | |
98 http://bugzilla.bioperl.org/ | |
99 | |
100 =head1 AUTHOR - Hilmar Lapp | |
101 | |
102 Email hlapp at gmx.net | |
103 | |
104 =head1 CONTRIBUTORS | |
105 | |
106 Additional contributors names and emails here | |
107 | |
108 =head1 APPENDIX | |
109 | |
110 The rest of the documentation details each of the object methods. | |
111 Internal methods are usually preceded with a _ | |
112 | |
113 =cut | |
114 | |
115 | |
116 # Let the code begin... | |
117 | |
118 | |
119 package Bio::Seq::SeqBuilder; | |
120 use vars qw(@ISA); | |
121 use strict; | |
122 | |
123 # Object preamble - inherits from Bio::Root::Root | |
124 | |
125 use Bio::Root::Root; | |
126 use Bio::Factory::ObjectBuilderI; | |
127 | |
128 @ISA = qw(Bio::Root::Root Bio::Factory::ObjectBuilderI); | |
129 | |
130 my %slot_param_map = ("add_SeqFeature" => "features", | |
131 ); | |
132 my %param_slot_map = ("features" => "add_SeqFeature", | |
133 ); | |
134 | |
135 =head2 new | |
136 | |
137 Title : new | |
138 Usage : my $obj = new Bio::Seq::SeqBuilder(); | |
139 Function: Builds a new Bio::Seq::SeqBuilder object | |
140 Returns : an instance of Bio::Seq::SeqBuilder | |
141 Args : | |
142 | |
143 | |
144 =cut | |
145 | |
146 sub new { | |
147 my($class,@args) = @_; | |
148 | |
149 my $self = $class->SUPER::new(@args); | |
150 | |
151 $self->{'wanted_slots'} = []; | |
152 $self->{'unwanted_slots'} = []; | |
153 $self->{'object_conds'} = []; | |
154 $self->{'_objhash'} = {}; | |
155 $self->want_all(1); | |
156 | |
157 return $self; | |
158 } | |
159 | |
160 =head1 Methods for implementing L<Bio::Factory::ObjectBuilderI> | |
161 | |
162 =cut | |
163 | |
164 =head2 want_slot | |
165 | |
166 Title : want_slot | |
167 Usage : | |
168 Function: Whether or not the object builder wants to populate the | |
169 specified slot of the object to be built. | |
170 | |
171 The slot can be specified either as the name of the | |
172 respective method, or the initialization parameter that | |
173 would be otherwise passed to new() of the object to be | |
174 built. | |
175 | |
176 Note that usually only the parser will call this | |
177 method. Use add_wanted_slots and add_unwanted_slots for | |
178 configuration. | |
179 | |
180 Example : | |
181 Returns : TRUE if the object builder wants to populate the slot, and | |
182 FALSE otherwise. | |
183 Args : the name of the slot (a string) | |
184 | |
185 | |
186 =cut | |
187 | |
188 sub want_slot{ | |
189 my ($self,$slot) = @_; | |
190 my $ok = 0; | |
191 | |
192 $slot = substr($slot,1) if substr($slot,0,1) eq '-'; | |
193 if($self->want_all()) { | |
194 foreach ($self->get_unwanted_slots()) { | |
195 # this always overrides in want-all mode | |
196 return 0 if($slot eq $_); | |
197 } | |
198 if(! exists($self->{'_objskel'})) { | |
199 $self->{'_objskel'} = $self->sequence_factory->create_object(); | |
200 } | |
201 if(exists($param_slot_map{$slot})) { | |
202 $ok = $self->{'_objskel'}->can($param_slot_map{$slot}); | |
203 } else { | |
204 $ok = $self->{'_objskel'}->can($slot); | |
205 } | |
206 return $ok if $ok; | |
207 # even if the object 'cannot' do this slot, it might have been | |
208 # added to the list of wanted slot, so carry on | |
209 } | |
210 foreach ($self->get_wanted_slots()) { | |
211 if($slot eq $_) { | |
212 $ok = 1; | |
213 last; | |
214 } | |
215 } | |
216 return $ok; | |
217 } | |
218 | |
219 =head2 add_slot_value | |
220 | |
221 Title : add_slot_value | |
222 Usage : | |
223 Function: Adds one or more values to the specified slot of the object | |
224 to be built. | |
225 | |
226 Naming the slot is the same as for want_slot(). | |
227 | |
228 The object builder may further filter the content to be | |
229 set, or even completely ignore the request. | |
230 | |
231 If this method reports failure, the caller should not add | |
232 more values to the same slot. In addition, the caller may | |
233 find it appropriate to abandon the object being built | |
234 altogether. | |
235 | |
236 This implementation will allow the caller to overwrite the | |
237 return value from want_slot(), because the slot is not | |
238 checked against want_slot(). | |
239 | |
240 Note that usually only the parser will call this method, | |
241 but you may call it from anywhere if you know what you are | |
242 doing. A derived class may be used to further manipulate | |
243 the value to be added. | |
244 | |
245 Example : | |
246 Returns : TRUE on success, and FALSE otherwise | |
247 Args : the name of the slot (a string) | |
248 parameters determining the value to be set | |
249 | |
250 OR | |
251 | |
252 alternatively, a list of slotname/value pairs in the style | |
253 of named parameters as they would be passed to new(), where | |
254 each element at an even index is the parameter (slot) name | |
255 starting with a dash, and each element at an odd index is | |
256 the value of the preceding name. | |
257 | |
258 | |
259 =cut | |
260 | |
261 sub add_slot_value{ | |
262 my ($self,$slot,@args) = @_; | |
263 | |
264 my $h = $self->{'_objhash'}; | |
265 return unless $h; | |
266 # multiple named parameter variant of calling? | |
267 if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) { | |
268 unshift(@args, $slot); | |
269 while(@args) { | |
270 my $key = shift(@args); | |
271 $h->{$key} = shift(@args); | |
272 } | |
273 } else { | |
274 if($slot eq 'add_SeqFeature') { | |
275 $slot = '-'.$slot_param_map{$slot}; | |
276 $h->{$slot} = [] unless $h->{$slot}; | |
277 push(@{$h->{$slot}}, @args); | |
278 } else { | |
279 $slot = '-'.$slot unless substr($slot,0,1) eq '-'; | |
280 $h->{$slot} = $args[0]; | |
281 } | |
282 } | |
283 return 1; | |
284 } | |
285 | |
286 =head2 want_object | |
287 | |
288 Title : want_object | |
289 Usage : | |
290 Function: Whether or not the object builder is still interested in | |
291 continuing with the object being built. | |
292 | |
293 If this method returns FALSE, the caller should not add any | |
294 more values to slots, or otherwise risks that the builder | |
295 throws an exception. In addition, make_object() is likely | |
296 to return undef after this method returned FALSE. | |
297 | |
298 Note that usually only the parser will call this | |
299 method. Use add_object_condition for configuration. | |
300 | |
301 Example : | |
302 Returns : TRUE if the object builder wants to continue building | |
303 the present object, and FALSE otherwise. | |
304 Args : none | |
305 | |
306 | |
307 =cut | |
308 | |
309 sub want_object{ | |
310 my $self = shift; | |
311 | |
312 my $ok = 1; | |
313 foreach my $cond ($self->get_object_conditions()) { | |
314 $ok = &$cond($self->{'_objhash'}); | |
315 last unless $ok; | |
316 } | |
317 delete $self->{'_objhash'} unless $ok; | |
318 return $ok; | |
319 } | |
320 | |
321 =head2 make_object | |
322 | |
323 Title : make_object | |
324 Usage : | |
325 Function: Get the built object. | |
326 | |
327 This method is allowed to return undef if no value has ever | |
328 been added since the last call to make_object(), or if | |
329 want_object() returned FALSE (or would have returned FALSE) | |
330 before calling this method. | |
331 | |
332 For an implementation that allows consecutive building of | |
333 objects, a caller must call this method once, and only | |
334 once, between subsequent objects to be built. I.e., a call | |
335 to make_object implies 'end_object.' | |
336 | |
337 Example : | |
338 Returns : the object that was built | |
339 Args : none | |
340 | |
341 | |
342 =cut | |
343 | |
344 sub make_object{ | |
345 my $self = shift; | |
346 | |
347 my $obj; | |
348 if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) { | |
349 $obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}}); | |
350 } | |
351 $self->{'_objhash'} = {}; # reset | |
352 return $obj; | |
353 } | |
354 | |
355 =head1 Implementation specific methods | |
356 | |
357 These methods allow to conveniently configure this sequence object | |
358 builder as to which slots are desired, and under which circumstances a | |
359 sequence object should be abandoned altogether. The default mode is | |
360 want_all(1), which means the builder will report all slots as wanted | |
361 that the object created by the sequence factory supports. | |
362 | |
363 You can add specific slots you want through add_wanted_slots(). In | |
364 most cases, you will want to call want_none() before in order to relax | |
365 zero acceptance through a list of wanted slots. | |
366 | |
367 Alternatively, you can add specific unwanted slots through | |
368 add_unwanted_slots(). In this case, you will usually want to call | |
369 want_all(1) before (which is the default if you never touched the | |
370 builder) to restrict unrestricted acceptance. | |
371 | |
372 I.e., want_all(1) means want all slots except for the unwanted, and | |
373 want_none() means only those explicitly wanted. | |
374 | |
375 If a slot is in both the unwanted and the wanted list, the following | |
376 rules hold. In want-all mode, the unwanted list overrules. In | |
377 want-none mode, the wanted list overrides the unwanted list. If this | |
378 is confusing to you, just try to avoid having slots at the same time | |
379 in the wanted and the unwanted lists. | |
380 | |
381 =cut | |
382 | |
383 =head2 get_wanted_slots | |
384 | |
385 Title : get_wanted_slots | |
386 Usage : $obj->get_wanted_slots($newval) | |
387 Function: Get the list of wanted slots | |
388 Example : | |
389 Returns : a list of strings | |
390 Args : | |
391 | |
392 | |
393 =cut | |
394 | |
395 sub get_wanted_slots{ | |
396 my $self = shift; | |
397 | |
398 return @{$self->{'wanted_slots'}}; | |
399 } | |
400 | |
401 =head2 add_wanted_slot | |
402 | |
403 Title : add_wanted_slot | |
404 Usage : | |
405 Function: Adds the specified slots to the list of wanted slots. | |
406 Example : | |
407 Returns : TRUE | |
408 Args : an array of slot names (strings) | |
409 | |
410 | |
411 =cut | |
412 | |
413 sub add_wanted_slot{ | |
414 my ($self,@slots) = @_; | |
415 | |
416 my $myslots = $self->{'wanted_slots'}; | |
417 foreach my $slot (@slots) { | |
418 if(! grep { $slot eq $_; } @$myslots) { | |
419 push(@$myslots, $slot); | |
420 } | |
421 } | |
422 return 1; | |
423 } | |
424 | |
425 =head2 remove_wanted_slots | |
426 | |
427 Title : remove_wanted_slots | |
428 Usage : | |
429 Function: Removes all wanted slots added previously through | |
430 add_wanted_slots(). | |
431 Example : | |
432 Returns : the previous list of wanted slot names | |
433 Args : none | |
434 | |
435 | |
436 =cut | |
437 | |
438 sub remove_wanted_slots{ | |
439 my $self = shift; | |
440 my @slots = $self->get_wanted_slots(); | |
441 $self->{'wanted_slots'} = []; | |
442 return @slots; | |
443 } | |
444 | |
445 =head2 get_unwanted_slots | |
446 | |
447 Title : get_unwanted_slots | |
448 Usage : $obj->get_unwanted_slots($newval) | |
449 Function: Get the list of unwanted slots. | |
450 Example : | |
451 Returns : a list of strings | |
452 Args : none | |
453 | |
454 | |
455 =cut | |
456 | |
457 sub get_unwanted_slots{ | |
458 my $self = shift; | |
459 | |
460 return @{$self->{'unwanted_slots'}}; | |
461 } | |
462 | |
463 =head2 add_unwanted_slot | |
464 | |
465 Title : add_unwanted_slot | |
466 Usage : | |
467 Function: Adds the specified slots to the list of unwanted slots. | |
468 Example : | |
469 Returns : TRUE | |
470 Args : an array of slot names (strings) | |
471 | |
472 | |
473 =cut | |
474 | |
475 sub add_unwanted_slot{ | |
476 my ($self,@slots) = @_; | |
477 | |
478 my $myslots = $self->{'unwanted_slots'}; | |
479 foreach my $slot (@slots) { | |
480 if(! grep { $slot eq $_; } @$myslots) { | |
481 push(@$myslots, $slot); | |
482 } | |
483 } | |
484 return 1; | |
485 } | |
486 | |
487 =head2 remove_unwanted_slots | |
488 | |
489 Title : remove_unwanted_slots | |
490 Usage : | |
491 Function: Removes the list of unwanted slots added previously through | |
492 add_unwanted_slots(). | |
493 Example : | |
494 Returns : the previous list of unwanted slot names | |
495 Args : none | |
496 | |
497 | |
498 =cut | |
499 | |
500 sub remove_unwanted_slots{ | |
501 my $self = shift; | |
502 my @slots = $self->get_unwanted_slots(); | |
503 $self->{'unwanted_slots'} = []; | |
504 return @slots; | |
505 } | |
506 | |
507 =head2 want_none | |
508 | |
509 Title : want_none | |
510 Usage : | |
511 Function: Disables all slots. After calling this method, want_slot() | |
512 will return FALSE regardless of slot name. | |
513 | |
514 This is different from removed_wanted_slots() in that it | |
515 also sets want_all() to FALSE. Note that it also resets the | |
516 list of unwanted slots in order to avoid slots being in | |
517 both lists. | |
518 | |
519 Example : | |
520 Returns : TRUE | |
521 Args : none | |
522 | |
523 | |
524 =cut | |
525 | |
526 sub want_none{ | |
527 my $self = shift; | |
528 | |
529 $self->want_all(0); | |
530 $self->remove_wanted_slots(); | |
531 $self->remove_unwanted_slots(); | |
532 return 1; | |
533 } | |
534 | |
535 =head2 want_all | |
536 | |
537 Title : want_all | |
538 Usage : $obj->want_all($newval) | |
539 Function: Whether or not this sequence object builder wants to | |
540 populate all slots that the object has. Whether an object | |
541 supports a slot is generally determined by what can() | |
542 returns. You can add additional 'virtual' slots by calling | |
543 add_wanted_slot. | |
544 | |
545 This will be ON by default. Call $obj->want_none() to | |
546 disable all slots. | |
547 | |
548 Example : | |
549 Returns : TRUE if this builder wants to populate all slots, and | |
550 FALSE otherwise. | |
551 Args : on set, new value (a scalar or undef, optional) | |
552 | |
553 | |
554 =cut | |
555 | |
556 sub want_all{ | |
557 my $self = shift; | |
558 | |
559 return $self->{'want_all'} = shift if @_; | |
560 return $self->{'want_all'}; | |
561 } | |
562 | |
563 =head2 get_object_conditions | |
564 | |
565 Title : get_object_conditions | |
566 Usage : | |
567 Function: Get the list of conditions an object must meet in order to | |
568 be 'wanted.' See want_object() for where this is used. | |
569 | |
570 Conditions in this implementation are closures (anonymous | |
571 functions) which are passed one parameter, a hash reference | |
572 the keys of which are equal to initialization | |
573 paramaters. The closure must return TRUE to make the object | |
574 'wanted.' | |
575 | |
576 Conditions will be implicitly ANDed. | |
577 | |
578 Example : | |
579 Returns : a list of closures | |
580 Args : none | |
581 | |
582 | |
583 =cut | |
584 | |
585 sub get_object_conditions{ | |
586 my $self = shift; | |
587 | |
588 return @{$self->{'object_conds'}}; | |
589 } | |
590 | |
591 =head2 add_object_condition | |
592 | |
593 Title : add_object_condition | |
594 Usage : | |
595 Function: Adds a condition an object must meet in order to be 'wanted.' | |
596 See want_object() for where this is used. | |
597 | |
598 Conditions in this implementation must be closures | |
599 (anonymous functions). These will be passed one parameter, | |
600 which is a hash reference with the sequence object | |
601 initialization paramters being the keys. | |
602 | |
603 Conditions are implicitly ANDed. If you want other | |
604 operators, perform those tests inside of one closure | |
605 instead of multiple. This will also be more efficient. | |
606 | |
607 Example : | |
608 Returns : TRUE | |
609 Args : the list of conditions | |
610 | |
611 | |
612 =cut | |
613 | |
614 sub add_object_condition{ | |
615 my ($self,@conds) = @_; | |
616 | |
617 if(grep { ref($_) ne 'CODE'; } @conds) { | |
618 $self->throw("conditions against which to validate an object ". | |
619 "must be anonymous code blocks"); | |
620 } | |
621 push(@{$self->{'object_conds'}}, @conds); | |
622 return 1; | |
623 } | |
624 | |
625 =head2 remove_object_conditions | |
626 | |
627 Title : remove_object_conditions | |
628 Usage : | |
629 Function: Removes the conditions an object must meet in order to be | |
630 'wanted.' | |
631 Example : | |
632 Returns : The list of previously set conditions (an array of closures) | |
633 Args : none | |
634 | |
635 | |
636 =cut | |
637 | |
638 sub remove_object_conditions{ | |
639 my $self = shift; | |
640 my @conds = $self->get_object_conditions(); | |
641 $self->{'object_conds'} = []; | |
642 return @conds; | |
643 } | |
644 | |
645 =head1 Methods to control what type of object is built | |
646 | |
647 =cut | |
648 | |
649 =head2 sequence_factory | |
650 | |
651 Title : sequence_factory | |
652 Usage : $obj->sequence_factory($newval) | |
653 Function: Get/set the sequence factory to be used by this object | |
654 builder. | |
655 Example : | |
656 Returns : the Bio::Factory::SequenceFactoryI implementing object to use | |
657 Args : on set, new value (a Bio::Factory::SequenceFactoryI | |
658 implementing object or undef, optional) | |
659 | |
660 | |
661 =cut | |
662 | |
663 sub sequence_factory{ | |
664 my $self = shift; | |
665 | |
666 if(@_) { | |
667 delete $self->{'_objskel'}; | |
668 return $self->{'sequence_factory'} = shift; | |
669 } | |
670 return $self->{'sequence_factory'}; | |
671 } | |
672 | |
673 1; |