Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/Seq/SeqBuilder.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Seq/SeqBuilder.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,673 @@ +# $Id: SeqBuilder.pm,v 1.6 2002/10/22 07:45:20 lapp Exp $ +# +# BioPerl module for Bio::Seq::SeqBuilder +# +# Cared for by Hilmar Lapp <hlapp at gmx.net> +# +# Copyright Hilmar Lapp +# +# You may distribute this module under the same terms as perl itself + +# +# (c) Hilmar Lapp, hlapp at gmx.net, 2002. +# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. +# +# You may distribute this module under the same terms as perl itself. +# Refer to the Perl Artistic License (see the license accompanying this +# software package, or see http://www.perl.com/language/misc/Artistic.html) +# for the terms under which you may use, modify, and redistribute this module. +# +# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. +# + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers + +=head1 SYNOPSIS + + use Bio::SeqIO; + + # usually you won't instantiate this yourself -- a SeqIO object + # will have one already + my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank"); + my $builder = $seqin->sequence_builder(); + + # if you need only sequence, id, and description (e.g. for + # conversion to FASTA format): + $builder->want_none(); + $builder->add_wanted_slot('display_id','desc','seq'); + + # if you want everything except the sequence and features + $builder->want_all(1); # this is the default if it's untouched + $builder->add_unwanted_slot('seq','features'); + + # if you want only human sequences shorter than 5kb and skip all + # others + $builder->add_object_condition(sub { + my $h = shift; + return 0 if $h->{'-length'} > 5000; + return 0 if exists($h->{'-species'}) && + ($h->{'-species'}->binomial() ne "Homo sapiens"); + return 1; + }); + + # when you are finished with configuring the builder, just use + # the SeqIO API as you would normally + while(my $seq = $seqin->next_seq()) { + # do something + } + +=head1 DESCRIPTION + +This is an implementation of L<Bio::Factory::ObjectBuilderI> used by +parsers of rich sequence streams. It provides for a relatively +easy-to-use configurator of the parsing flow. + +Configuring the parsing process may be for you if you need much less +information, or much less sequences, than the stream actually +contains. Configuration can in both cases speed up the parsing time +considerably, because unwanted sections or the rest of unwanted +sequences are skipped over by the parser. + +See the methods of the class-specific implementation section for +further documentation of what can be configured. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Hilmar Lapp + +Email hlapp at gmx.net + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Seq::SeqBuilder; +use vars qw(@ISA); +use strict; + +# Object preamble - inherits from Bio::Root::Root + +use Bio::Root::Root; +use Bio::Factory::ObjectBuilderI; + +@ISA = qw(Bio::Root::Root Bio::Factory::ObjectBuilderI); + +my %slot_param_map = ("add_SeqFeature" => "features", + ); +my %param_slot_map = ("features" => "add_SeqFeature", + ); + +=head2 new + + Title : new + Usage : my $obj = new Bio::Seq::SeqBuilder(); + Function: Builds a new Bio::Seq::SeqBuilder object + Returns : an instance of Bio::Seq::SeqBuilder + Args : + + +=cut + +sub new { + my($class,@args) = @_; + + my $self = $class->SUPER::new(@args); + + $self->{'wanted_slots'} = []; + $self->{'unwanted_slots'} = []; + $self->{'object_conds'} = []; + $self->{'_objhash'} = {}; + $self->want_all(1); + + return $self; +} + +=head1 Methods for implementing L<Bio::Factory::ObjectBuilderI> + +=cut + +=head2 want_slot + + Title : want_slot + Usage : + Function: Whether or not the object builder wants to populate the + specified slot of the object to be built. + + The slot can be specified either as the name of the + respective method, or the initialization parameter that + would be otherwise passed to new() of the object to be + built. + + Note that usually only the parser will call this + method. Use add_wanted_slots and add_unwanted_slots for + configuration. + + Example : + Returns : TRUE if the object builder wants to populate the slot, and + FALSE otherwise. + Args : the name of the slot (a string) + + +=cut + +sub want_slot{ + my ($self,$slot) = @_; + my $ok = 0; + + $slot = substr($slot,1) if substr($slot,0,1) eq '-'; + if($self->want_all()) { + foreach ($self->get_unwanted_slots()) { + # this always overrides in want-all mode + return 0 if($slot eq $_); + } + if(! exists($self->{'_objskel'})) { + $self->{'_objskel'} = $self->sequence_factory->create_object(); + } + if(exists($param_slot_map{$slot})) { + $ok = $self->{'_objskel'}->can($param_slot_map{$slot}); + } else { + $ok = $self->{'_objskel'}->can($slot); + } + return $ok if $ok; + # even if the object 'cannot' do this slot, it might have been + # added to the list of wanted slot, so carry on + } + foreach ($self->get_wanted_slots()) { + if($slot eq $_) { + $ok = 1; + last; + } + } + return $ok; +} + +=head2 add_slot_value + + Title : add_slot_value + Usage : + Function: Adds one or more values to the specified slot of the object + to be built. + + Naming the slot is the same as for want_slot(). + + The object builder may further filter the content to be + set, or even completely ignore the request. + + If this method reports failure, the caller should not add + more values to the same slot. In addition, the caller may + find it appropriate to abandon the object being built + altogether. + + This implementation will allow the caller to overwrite the + return value from want_slot(), because the slot is not + checked against want_slot(). + + Note that usually only the parser will call this method, + but you may call it from anywhere if you know what you are + doing. A derived class may be used to further manipulate + the value to be added. + + Example : + Returns : TRUE on success, and FALSE otherwise + Args : the name of the slot (a string) + parameters determining the value to be set + + OR + + alternatively, a list of slotname/value pairs in the style + of named parameters as they would be passed to new(), where + each element at an even index is the parameter (slot) name + starting with a dash, and each element at an odd index is + the value of the preceding name. + + +=cut + +sub add_slot_value{ + my ($self,$slot,@args) = @_; + + my $h = $self->{'_objhash'}; + return unless $h; + # multiple named parameter variant of calling? + if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) { + unshift(@args, $slot); + while(@args) { + my $key = shift(@args); + $h->{$key} = shift(@args); + } + } else { + if($slot eq 'add_SeqFeature') { + $slot = '-'.$slot_param_map{$slot}; + $h->{$slot} = [] unless $h->{$slot}; + push(@{$h->{$slot}}, @args); + } else { + $slot = '-'.$slot unless substr($slot,0,1) eq '-'; + $h->{$slot} = $args[0]; + } + } + return 1; +} + +=head2 want_object + + Title : want_object + Usage : + Function: Whether or not the object builder is still interested in + continuing with the object being built. + + If this method returns FALSE, the caller should not add any + more values to slots, or otherwise risks that the builder + throws an exception. In addition, make_object() is likely + to return undef after this method returned FALSE. + + Note that usually only the parser will call this + method. Use add_object_condition for configuration. + + Example : + Returns : TRUE if the object builder wants to continue building + the present object, and FALSE otherwise. + Args : none + + +=cut + +sub want_object{ + my $self = shift; + + my $ok = 1; + foreach my $cond ($self->get_object_conditions()) { + $ok = &$cond($self->{'_objhash'}); + last unless $ok; + } + delete $self->{'_objhash'} unless $ok; + return $ok; +} + +=head2 make_object + + Title : make_object + Usage : + Function: Get the built object. + + This method is allowed to return undef if no value has ever + been added since the last call to make_object(), or if + want_object() returned FALSE (or would have returned FALSE) + before calling this method. + + For an implementation that allows consecutive building of + objects, a caller must call this method once, and only + once, between subsequent objects to be built. I.e., a call + to make_object implies 'end_object.' + + Example : + Returns : the object that was built + Args : none + + +=cut + +sub make_object{ + my $self = shift; + + my $obj; + if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) { + $obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}}); + } + $self->{'_objhash'} = {}; # reset + return $obj; +} + +=head1 Implementation specific methods + +These methods allow to conveniently configure this sequence object +builder as to which slots are desired, and under which circumstances a +sequence object should be abandoned altogether. The default mode is +want_all(1), which means the builder will report all slots as wanted +that the object created by the sequence factory supports. + +You can add specific slots you want through add_wanted_slots(). In +most cases, you will want to call want_none() before in order to relax +zero acceptance through a list of wanted slots. + +Alternatively, you can add specific unwanted slots through +add_unwanted_slots(). In this case, you will usually want to call +want_all(1) before (which is the default if you never touched the +builder) to restrict unrestricted acceptance. + +I.e., want_all(1) means want all slots except for the unwanted, and +want_none() means only those explicitly wanted. + +If a slot is in both the unwanted and the wanted list, the following +rules hold. In want-all mode, the unwanted list overrules. In +want-none mode, the wanted list overrides the unwanted list. If this +is confusing to you, just try to avoid having slots at the same time +in the wanted and the unwanted lists. + +=cut + +=head2 get_wanted_slots + + Title : get_wanted_slots + Usage : $obj->get_wanted_slots($newval) + Function: Get the list of wanted slots + Example : + Returns : a list of strings + Args : + + +=cut + +sub get_wanted_slots{ + my $self = shift; + + return @{$self->{'wanted_slots'}}; +} + +=head2 add_wanted_slot + + Title : add_wanted_slot + Usage : + Function: Adds the specified slots to the list of wanted slots. + Example : + Returns : TRUE + Args : an array of slot names (strings) + + +=cut + +sub add_wanted_slot{ + my ($self,@slots) = @_; + + my $myslots = $self->{'wanted_slots'}; + foreach my $slot (@slots) { + if(! grep { $slot eq $_; } @$myslots) { + push(@$myslots, $slot); + } + } + return 1; +} + +=head2 remove_wanted_slots + + Title : remove_wanted_slots + Usage : + Function: Removes all wanted slots added previously through + add_wanted_slots(). + Example : + Returns : the previous list of wanted slot names + Args : none + + +=cut + +sub remove_wanted_slots{ + my $self = shift; + my @slots = $self->get_wanted_slots(); + $self->{'wanted_slots'} = []; + return @slots; +} + +=head2 get_unwanted_slots + + Title : get_unwanted_slots + Usage : $obj->get_unwanted_slots($newval) + Function: Get the list of unwanted slots. + Example : + Returns : a list of strings + Args : none + + +=cut + +sub get_unwanted_slots{ + my $self = shift; + + return @{$self->{'unwanted_slots'}}; +} + +=head2 add_unwanted_slot + + Title : add_unwanted_slot + Usage : + Function: Adds the specified slots to the list of unwanted slots. + Example : + Returns : TRUE + Args : an array of slot names (strings) + + +=cut + +sub add_unwanted_slot{ + my ($self,@slots) = @_; + + my $myslots = $self->{'unwanted_slots'}; + foreach my $slot (@slots) { + if(! grep { $slot eq $_; } @$myslots) { + push(@$myslots, $slot); + } + } + return 1; +} + +=head2 remove_unwanted_slots + + Title : remove_unwanted_slots + Usage : + Function: Removes the list of unwanted slots added previously through + add_unwanted_slots(). + Example : + Returns : the previous list of unwanted slot names + Args : none + + +=cut + +sub remove_unwanted_slots{ + my $self = shift; + my @slots = $self->get_unwanted_slots(); + $self->{'unwanted_slots'} = []; + return @slots; +} + +=head2 want_none + + Title : want_none + Usage : + Function: Disables all slots. After calling this method, want_slot() + will return FALSE regardless of slot name. + + This is different from removed_wanted_slots() in that it + also sets want_all() to FALSE. Note that it also resets the + list of unwanted slots in order to avoid slots being in + both lists. + + Example : + Returns : TRUE + Args : none + + +=cut + +sub want_none{ + my $self = shift; + + $self->want_all(0); + $self->remove_wanted_slots(); + $self->remove_unwanted_slots(); + return 1; +} + +=head2 want_all + + Title : want_all + Usage : $obj->want_all($newval) + Function: Whether or not this sequence object builder wants to + populate all slots that the object has. Whether an object + supports a slot is generally determined by what can() + returns. You can add additional 'virtual' slots by calling + add_wanted_slot. + + This will be ON by default. Call $obj->want_none() to + disable all slots. + + Example : + Returns : TRUE if this builder wants to populate all slots, and + FALSE otherwise. + Args : on set, new value (a scalar or undef, optional) + + +=cut + +sub want_all{ + my $self = shift; + + return $self->{'want_all'} = shift if @_; + return $self->{'want_all'}; +} + +=head2 get_object_conditions + + Title : get_object_conditions + Usage : + Function: Get the list of conditions an object must meet in order to + be 'wanted.' See want_object() for where this is used. + + Conditions in this implementation are closures (anonymous + functions) which are passed one parameter, a hash reference + the keys of which are equal to initialization + paramaters. The closure must return TRUE to make the object + 'wanted.' + + Conditions will be implicitly ANDed. + + Example : + Returns : a list of closures + Args : none + + +=cut + +sub get_object_conditions{ + my $self = shift; + + return @{$self->{'object_conds'}}; +} + +=head2 add_object_condition + + Title : add_object_condition + Usage : + Function: Adds a condition an object must meet in order to be 'wanted.' + See want_object() for where this is used. + + Conditions in this implementation must be closures + (anonymous functions). These will be passed one parameter, + which is a hash reference with the sequence object + initialization paramters being the keys. + + Conditions are implicitly ANDed. If you want other + operators, perform those tests inside of one closure + instead of multiple. This will also be more efficient. + + Example : + Returns : TRUE + Args : the list of conditions + + +=cut + +sub add_object_condition{ + my ($self,@conds) = @_; + + if(grep { ref($_) ne 'CODE'; } @conds) { + $self->throw("conditions against which to validate an object ". + "must be anonymous code blocks"); + } + push(@{$self->{'object_conds'}}, @conds); + return 1; +} + +=head2 remove_object_conditions + + Title : remove_object_conditions + Usage : + Function: Removes the conditions an object must meet in order to be + 'wanted.' + Example : + Returns : The list of previously set conditions (an array of closures) + Args : none + + +=cut + +sub remove_object_conditions{ + my $self = shift; + my @conds = $self->get_object_conditions(); + $self->{'object_conds'} = []; + return @conds; +} + +=head1 Methods to control what type of object is built + +=cut + +=head2 sequence_factory + + Title : sequence_factory + Usage : $obj->sequence_factory($newval) + Function: Get/set the sequence factory to be used by this object + builder. + Example : + Returns : the Bio::Factory::SequenceFactoryI implementing object to use + Args : on set, new value (a Bio::Factory::SequenceFactoryI + implementing object or undef, optional) + + +=cut + +sub sequence_factory{ + my $self = shift; + + if(@_) { + delete $self->{'_objskel'}; + return $self->{'sequence_factory'} = shift; + } + return $self->{'sequence_factory'}; +} + +1;