comparison variant_effect_predictor/Bio/Seq/SeqBuilder.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: SeqBuilder.pm,v 1.6 2002/10/22 07:45:20 lapp Exp $
2 #
3 # BioPerl module for Bio::Seq::SeqBuilder
4 #
5 # Cared for by Hilmar Lapp <hlapp at gmx.net>
6 #
7 # Copyright Hilmar Lapp
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 #
12 # (c) Hilmar Lapp, hlapp at gmx.net, 2002.
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
14 #
15 # You may distribute this module under the same terms as perl itself.
16 # Refer to the Perl Artistic License (see the license accompanying this
17 # software package, or see http://www.perl.com/language/misc/Artistic.html)
18 # for the terms under which you may use, modify, and redistribute this module.
19 #
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
23 #
24
25 # POD documentation - main docs before the code
26
27 =head1 NAME
28
29 Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers
30
31 =head1 SYNOPSIS
32
33 use Bio::SeqIO;
34
35 # usually you won't instantiate this yourself -- a SeqIO object
36 # will have one already
37 my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank");
38 my $builder = $seqin->sequence_builder();
39
40 # if you need only sequence, id, and description (e.g. for
41 # conversion to FASTA format):
42 $builder->want_none();
43 $builder->add_wanted_slot('display_id','desc','seq');
44
45 # if you want everything except the sequence and features
46 $builder->want_all(1); # this is the default if it's untouched
47 $builder->add_unwanted_slot('seq','features');
48
49 # if you want only human sequences shorter than 5kb and skip all
50 # others
51 $builder->add_object_condition(sub {
52 my $h = shift;
53 return 0 if $h->{'-length'} > 5000;
54 return 0 if exists($h->{'-species'}) &&
55 ($h->{'-species'}->binomial() ne "Homo sapiens");
56 return 1;
57 });
58
59 # when you are finished with configuring the builder, just use
60 # the SeqIO API as you would normally
61 while(my $seq = $seqin->next_seq()) {
62 # do something
63 }
64
65 =head1 DESCRIPTION
66
67 This is an implementation of L<Bio::Factory::ObjectBuilderI> used by
68 parsers of rich sequence streams. It provides for a relatively
69 easy-to-use configurator of the parsing flow.
70
71 Configuring the parsing process may be for you if you need much less
72 information, or much less sequences, than the stream actually
73 contains. Configuration can in both cases speed up the parsing time
74 considerably, because unwanted sections or the rest of unwanted
75 sequences are skipped over by the parser.
76
77 See the methods of the class-specific implementation section for
78 further documentation of what can be configured.
79
80 =head1 FEEDBACK
81
82 =head2 Mailing Lists
83
84 User feedback is an integral part of the evolution of this and other
85 Bioperl modules. Send your comments and suggestions preferably to
86 the Bioperl mailing list. Your participation is much appreciated.
87
88 bioperl-l@bioperl.org - General discussion
89 http://bioperl.org/MailList.shtml - About the mailing lists
90
91 =head2 Reporting Bugs
92
93 Report bugs to the Bioperl bug tracking system to help us keep track
94 of the bugs and their resolution. Bug reports can be submitted via
95 email or the web:
96
97 bioperl-bugs@bioperl.org
98 http://bugzilla.bioperl.org/
99
100 =head1 AUTHOR - Hilmar Lapp
101
102 Email hlapp at gmx.net
103
104 =head1 CONTRIBUTORS
105
106 Additional contributors names and emails here
107
108 =head1 APPENDIX
109
110 The rest of the documentation details each of the object methods.
111 Internal methods are usually preceded with a _
112
113 =cut
114
115
116 # Let the code begin...
117
118
119 package Bio::Seq::SeqBuilder;
120 use vars qw(@ISA);
121 use strict;
122
123 # Object preamble - inherits from Bio::Root::Root
124
125 use Bio::Root::Root;
126 use Bio::Factory::ObjectBuilderI;
127
128 @ISA = qw(Bio::Root::Root Bio::Factory::ObjectBuilderI);
129
130 my %slot_param_map = ("add_SeqFeature" => "features",
131 );
132 my %param_slot_map = ("features" => "add_SeqFeature",
133 );
134
135 =head2 new
136
137 Title : new
138 Usage : my $obj = new Bio::Seq::SeqBuilder();
139 Function: Builds a new Bio::Seq::SeqBuilder object
140 Returns : an instance of Bio::Seq::SeqBuilder
141 Args :
142
143
144 =cut
145
146 sub new {
147 my($class,@args) = @_;
148
149 my $self = $class->SUPER::new(@args);
150
151 $self->{'wanted_slots'} = [];
152 $self->{'unwanted_slots'} = [];
153 $self->{'object_conds'} = [];
154 $self->{'_objhash'} = {};
155 $self->want_all(1);
156
157 return $self;
158 }
159
160 =head1 Methods for implementing L<Bio::Factory::ObjectBuilderI>
161
162 =cut
163
164 =head2 want_slot
165
166 Title : want_slot
167 Usage :
168 Function: Whether or not the object builder wants to populate the
169 specified slot of the object to be built.
170
171 The slot can be specified either as the name of the
172 respective method, or the initialization parameter that
173 would be otherwise passed to new() of the object to be
174 built.
175
176 Note that usually only the parser will call this
177 method. Use add_wanted_slots and add_unwanted_slots for
178 configuration.
179
180 Example :
181 Returns : TRUE if the object builder wants to populate the slot, and
182 FALSE otherwise.
183 Args : the name of the slot (a string)
184
185
186 =cut
187
188 sub want_slot{
189 my ($self,$slot) = @_;
190 my $ok = 0;
191
192 $slot = substr($slot,1) if substr($slot,0,1) eq '-';
193 if($self->want_all()) {
194 foreach ($self->get_unwanted_slots()) {
195 # this always overrides in want-all mode
196 return 0 if($slot eq $_);
197 }
198 if(! exists($self->{'_objskel'})) {
199 $self->{'_objskel'} = $self->sequence_factory->create_object();
200 }
201 if(exists($param_slot_map{$slot})) {
202 $ok = $self->{'_objskel'}->can($param_slot_map{$slot});
203 } else {
204 $ok = $self->{'_objskel'}->can($slot);
205 }
206 return $ok if $ok;
207 # even if the object 'cannot' do this slot, it might have been
208 # added to the list of wanted slot, so carry on
209 }
210 foreach ($self->get_wanted_slots()) {
211 if($slot eq $_) {
212 $ok = 1;
213 last;
214 }
215 }
216 return $ok;
217 }
218
219 =head2 add_slot_value
220
221 Title : add_slot_value
222 Usage :
223 Function: Adds one or more values to the specified slot of the object
224 to be built.
225
226 Naming the slot is the same as for want_slot().
227
228 The object builder may further filter the content to be
229 set, or even completely ignore the request.
230
231 If this method reports failure, the caller should not add
232 more values to the same slot. In addition, the caller may
233 find it appropriate to abandon the object being built
234 altogether.
235
236 This implementation will allow the caller to overwrite the
237 return value from want_slot(), because the slot is not
238 checked against want_slot().
239
240 Note that usually only the parser will call this method,
241 but you may call it from anywhere if you know what you are
242 doing. A derived class may be used to further manipulate
243 the value to be added.
244
245 Example :
246 Returns : TRUE on success, and FALSE otherwise
247 Args : the name of the slot (a string)
248 parameters determining the value to be set
249
250 OR
251
252 alternatively, a list of slotname/value pairs in the style
253 of named parameters as they would be passed to new(), where
254 each element at an even index is the parameter (slot) name
255 starting with a dash, and each element at an odd index is
256 the value of the preceding name.
257
258
259 =cut
260
261 sub add_slot_value{
262 my ($self,$slot,@args) = @_;
263
264 my $h = $self->{'_objhash'};
265 return unless $h;
266 # multiple named parameter variant of calling?
267 if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) {
268 unshift(@args, $slot);
269 while(@args) {
270 my $key = shift(@args);
271 $h->{$key} = shift(@args);
272 }
273 } else {
274 if($slot eq 'add_SeqFeature') {
275 $slot = '-'.$slot_param_map{$slot};
276 $h->{$slot} = [] unless $h->{$slot};
277 push(@{$h->{$slot}}, @args);
278 } else {
279 $slot = '-'.$slot unless substr($slot,0,1) eq '-';
280 $h->{$slot} = $args[0];
281 }
282 }
283 return 1;
284 }
285
286 =head2 want_object
287
288 Title : want_object
289 Usage :
290 Function: Whether or not the object builder is still interested in
291 continuing with the object being built.
292
293 If this method returns FALSE, the caller should not add any
294 more values to slots, or otherwise risks that the builder
295 throws an exception. In addition, make_object() is likely
296 to return undef after this method returned FALSE.
297
298 Note that usually only the parser will call this
299 method. Use add_object_condition for configuration.
300
301 Example :
302 Returns : TRUE if the object builder wants to continue building
303 the present object, and FALSE otherwise.
304 Args : none
305
306
307 =cut
308
309 sub want_object{
310 my $self = shift;
311
312 my $ok = 1;
313 foreach my $cond ($self->get_object_conditions()) {
314 $ok = &$cond($self->{'_objhash'});
315 last unless $ok;
316 }
317 delete $self->{'_objhash'} unless $ok;
318 return $ok;
319 }
320
321 =head2 make_object
322
323 Title : make_object
324 Usage :
325 Function: Get the built object.
326
327 This method is allowed to return undef if no value has ever
328 been added since the last call to make_object(), or if
329 want_object() returned FALSE (or would have returned FALSE)
330 before calling this method.
331
332 For an implementation that allows consecutive building of
333 objects, a caller must call this method once, and only
334 once, between subsequent objects to be built. I.e., a call
335 to make_object implies 'end_object.'
336
337 Example :
338 Returns : the object that was built
339 Args : none
340
341
342 =cut
343
344 sub make_object{
345 my $self = shift;
346
347 my $obj;
348 if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) {
349 $obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}});
350 }
351 $self->{'_objhash'} = {}; # reset
352 return $obj;
353 }
354
355 =head1 Implementation specific methods
356
357 These methods allow to conveniently configure this sequence object
358 builder as to which slots are desired, and under which circumstances a
359 sequence object should be abandoned altogether. The default mode is
360 want_all(1), which means the builder will report all slots as wanted
361 that the object created by the sequence factory supports.
362
363 You can add specific slots you want through add_wanted_slots(). In
364 most cases, you will want to call want_none() before in order to relax
365 zero acceptance through a list of wanted slots.
366
367 Alternatively, you can add specific unwanted slots through
368 add_unwanted_slots(). In this case, you will usually want to call
369 want_all(1) before (which is the default if you never touched the
370 builder) to restrict unrestricted acceptance.
371
372 I.e., want_all(1) means want all slots except for the unwanted, and
373 want_none() means only those explicitly wanted.
374
375 If a slot is in both the unwanted and the wanted list, the following
376 rules hold. In want-all mode, the unwanted list overrules. In
377 want-none mode, the wanted list overrides the unwanted list. If this
378 is confusing to you, just try to avoid having slots at the same time
379 in the wanted and the unwanted lists.
380
381 =cut
382
383 =head2 get_wanted_slots
384
385 Title : get_wanted_slots
386 Usage : $obj->get_wanted_slots($newval)
387 Function: Get the list of wanted slots
388 Example :
389 Returns : a list of strings
390 Args :
391
392
393 =cut
394
395 sub get_wanted_slots{
396 my $self = shift;
397
398 return @{$self->{'wanted_slots'}};
399 }
400
401 =head2 add_wanted_slot
402
403 Title : add_wanted_slot
404 Usage :
405 Function: Adds the specified slots to the list of wanted slots.
406 Example :
407 Returns : TRUE
408 Args : an array of slot names (strings)
409
410
411 =cut
412
413 sub add_wanted_slot{
414 my ($self,@slots) = @_;
415
416 my $myslots = $self->{'wanted_slots'};
417 foreach my $slot (@slots) {
418 if(! grep { $slot eq $_; } @$myslots) {
419 push(@$myslots, $slot);
420 }
421 }
422 return 1;
423 }
424
425 =head2 remove_wanted_slots
426
427 Title : remove_wanted_slots
428 Usage :
429 Function: Removes all wanted slots added previously through
430 add_wanted_slots().
431 Example :
432 Returns : the previous list of wanted slot names
433 Args : none
434
435
436 =cut
437
438 sub remove_wanted_slots{
439 my $self = shift;
440 my @slots = $self->get_wanted_slots();
441 $self->{'wanted_slots'} = [];
442 return @slots;
443 }
444
445 =head2 get_unwanted_slots
446
447 Title : get_unwanted_slots
448 Usage : $obj->get_unwanted_slots($newval)
449 Function: Get the list of unwanted slots.
450 Example :
451 Returns : a list of strings
452 Args : none
453
454
455 =cut
456
457 sub get_unwanted_slots{
458 my $self = shift;
459
460 return @{$self->{'unwanted_slots'}};
461 }
462
463 =head2 add_unwanted_slot
464
465 Title : add_unwanted_slot
466 Usage :
467 Function: Adds the specified slots to the list of unwanted slots.
468 Example :
469 Returns : TRUE
470 Args : an array of slot names (strings)
471
472
473 =cut
474
475 sub add_unwanted_slot{
476 my ($self,@slots) = @_;
477
478 my $myslots = $self->{'unwanted_slots'};
479 foreach my $slot (@slots) {
480 if(! grep { $slot eq $_; } @$myslots) {
481 push(@$myslots, $slot);
482 }
483 }
484 return 1;
485 }
486
487 =head2 remove_unwanted_slots
488
489 Title : remove_unwanted_slots
490 Usage :
491 Function: Removes the list of unwanted slots added previously through
492 add_unwanted_slots().
493 Example :
494 Returns : the previous list of unwanted slot names
495 Args : none
496
497
498 =cut
499
500 sub remove_unwanted_slots{
501 my $self = shift;
502 my @slots = $self->get_unwanted_slots();
503 $self->{'unwanted_slots'} = [];
504 return @slots;
505 }
506
507 =head2 want_none
508
509 Title : want_none
510 Usage :
511 Function: Disables all slots. After calling this method, want_slot()
512 will return FALSE regardless of slot name.
513
514 This is different from removed_wanted_slots() in that it
515 also sets want_all() to FALSE. Note that it also resets the
516 list of unwanted slots in order to avoid slots being in
517 both lists.
518
519 Example :
520 Returns : TRUE
521 Args : none
522
523
524 =cut
525
526 sub want_none{
527 my $self = shift;
528
529 $self->want_all(0);
530 $self->remove_wanted_slots();
531 $self->remove_unwanted_slots();
532 return 1;
533 }
534
535 =head2 want_all
536
537 Title : want_all
538 Usage : $obj->want_all($newval)
539 Function: Whether or not this sequence object builder wants to
540 populate all slots that the object has. Whether an object
541 supports a slot is generally determined by what can()
542 returns. You can add additional 'virtual' slots by calling
543 add_wanted_slot.
544
545 This will be ON by default. Call $obj->want_none() to
546 disable all slots.
547
548 Example :
549 Returns : TRUE if this builder wants to populate all slots, and
550 FALSE otherwise.
551 Args : on set, new value (a scalar or undef, optional)
552
553
554 =cut
555
556 sub want_all{
557 my $self = shift;
558
559 return $self->{'want_all'} = shift if @_;
560 return $self->{'want_all'};
561 }
562
563 =head2 get_object_conditions
564
565 Title : get_object_conditions
566 Usage :
567 Function: Get the list of conditions an object must meet in order to
568 be 'wanted.' See want_object() for where this is used.
569
570 Conditions in this implementation are closures (anonymous
571 functions) which are passed one parameter, a hash reference
572 the keys of which are equal to initialization
573 paramaters. The closure must return TRUE to make the object
574 'wanted.'
575
576 Conditions will be implicitly ANDed.
577
578 Example :
579 Returns : a list of closures
580 Args : none
581
582
583 =cut
584
585 sub get_object_conditions{
586 my $self = shift;
587
588 return @{$self->{'object_conds'}};
589 }
590
591 =head2 add_object_condition
592
593 Title : add_object_condition
594 Usage :
595 Function: Adds a condition an object must meet in order to be 'wanted.'
596 See want_object() for where this is used.
597
598 Conditions in this implementation must be closures
599 (anonymous functions). These will be passed one parameter,
600 which is a hash reference with the sequence object
601 initialization paramters being the keys.
602
603 Conditions are implicitly ANDed. If you want other
604 operators, perform those tests inside of one closure
605 instead of multiple. This will also be more efficient.
606
607 Example :
608 Returns : TRUE
609 Args : the list of conditions
610
611
612 =cut
613
614 sub add_object_condition{
615 my ($self,@conds) = @_;
616
617 if(grep { ref($_) ne 'CODE'; } @conds) {
618 $self->throw("conditions against which to validate an object ".
619 "must be anonymous code blocks");
620 }
621 push(@{$self->{'object_conds'}}, @conds);
622 return 1;
623 }
624
625 =head2 remove_object_conditions
626
627 Title : remove_object_conditions
628 Usage :
629 Function: Removes the conditions an object must meet in order to be
630 'wanted.'
631 Example :
632 Returns : The list of previously set conditions (an array of closures)
633 Args : none
634
635
636 =cut
637
638 sub remove_object_conditions{
639 my $self = shift;
640 my @conds = $self->get_object_conditions();
641 $self->{'object_conds'} = [];
642 return @conds;
643 }
644
645 =head1 Methods to control what type of object is built
646
647 =cut
648
649 =head2 sequence_factory
650
651 Title : sequence_factory
652 Usage : $obj->sequence_factory($newval)
653 Function: Get/set the sequence factory to be used by this object
654 builder.
655 Example :
656 Returns : the Bio::Factory::SequenceFactoryI implementing object to use
657 Args : on set, new value (a Bio::Factory::SequenceFactoryI
658 implementing object or undef, optional)
659
660
661 =cut
662
663 sub sequence_factory{
664 my $self = shift;
665
666 if(@_) {
667 delete $self->{'_objskel'};
668 return $self->{'sequence_factory'} = shift;
669 }
670 return $self->{'sequence_factory'};
671 }
672
673 1;