diff variant_effect_predictor/Bio/Factory/FTLocationFactory.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Factory/FTLocationFactory.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,235 @@
+# $Id: FTLocationFactory.pm,v 1.9.2.4 2003/09/14 19:15:39 jason Exp $
+#
+# BioPerl module for Bio::Factory::FTLocationFactory
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::FTLocationFactory - A FeatureTable Location Parser
+
+=head1 SYNOPSIS
+
+    # parse a string into a location object
+    $loc = Bio::Factory::FTLocationFactory->from_string("join(100..200, 400..500");
+
+=head1 DESCRIPTION
+
+Implementation of string-encoded location parsing for the Genbank feature table
+encoding of locations.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::FTLocationFactory;
+use vars qw(@ISA);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+use Bio::Factory::LocationFactoryI;
+use Bio::Location::Simple;
+use Bio::Location::Split;
+use Bio::Location::Fuzzy;
+
+
+@ISA = qw(Bio::Root::Root Bio::Factory::LocationFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Factory::FTLocationFactory();
+ Function: Builds a new Bio::Factory::FTLocationFactory object 
+ Returns : an instance of Bio::Factory::FTLocationFactory
+ Args    :
+
+
+=cut
+
+=head2 from_string
+
+ Title   : from_string
+ Usage   : $loc = $locfactory->from_string("100..200");
+ Function: Parses the given string and returns a Bio::LocationI implementing
+           object representing the location encoded by the string.
+
+           This implementation parses the Genbank feature table
+           encoding of locations.
+ Example :
+ Returns : A Bio::LocationI implementing object.
+ Args    : A string.
+
+
+=cut
+
+sub from_string{
+    # the third parameter is purely optional and indicates a recursive
+    # call if set
+    my ($self,$locstr,$is_rec) = @_;
+    my $loc;
+
+    # there is no place in FT-formatted location strings where whitespace 
+    # carries meaning, so strip it off entirely upfront
+    $locstr =~ s/\s+//g if ! $is_rec;
+
+    # does it contain an operator?
+    if($locstr =~ /^([A-Za-z]+)\((.*)\)$/) {
+	# yes:
+	my $op = $1;
+	my $oparg = $2;
+	if($op eq "complement") {
+	    # parse the argument recursively, then set the strand to -1
+	    $loc = $self->from_string($oparg, 1);
+	    $loc->strand(-1);
+	} elsif(($op eq "join") || ($op eq "order") || ($op eq "bond")) {
+	    # This is a split location. Split into components and parse each
+	    # one recursively, then gather into a SplitLocationI instance.
+	    #
+	    # Note: The following code will /not/ work with nested
+	    # joins (you want to have grammar-based parsing for that).
+	    $loc = Bio::Location::Split->new(-verbose => $self->verbose,
+					     -splittype => $op);
+	    foreach my $substr (split(/,/, $oparg)) {
+		$loc->add_sub_Location($self->from_string($substr, 1));
+	    }
+	} else {
+	    $self->throw("operator \"$op\" unrecognized by parser");
+	}
+    } else {
+	# no operator, parse away
+	$loc = $self->_parse_location($locstr);
+    }
+    return $loc;
+}
+
+=head2 _parse_location
+
+ Title   : _parse_location
+ Usage   : $loc = $locfactory->_parse_location( $loc_string)
+
+ Function: Parses the given location string and returns a location object 
+           with start() and end() and strand() set appropriately.
+           Note that this method is private.
+ Returns : A Bio::LocationI implementing object or undef on failure
+ Args    : location string
+
+=cut
+
+sub _parse_location {
+    my ($self, $locstr) = @_;
+    my ($loc, $seqid);
+
+    $self->debug( "Location parse, processing $locstr\n");
+
+    # 'remote' location?
+    if($locstr =~ /^(\S+):(.*)$/) {
+	# yes; memorize remote ID and strip from location string
+	$seqid = $1;
+	$locstr = $2;
+    }
+    
+    # split into start and end
+    my ($start, $end) = split(/\.\./, $locstr);
+    # remove enclosing parentheses if any; note that because of parentheses
+    # possibly surrounding the entire location the parentheses around start
+    # and/or may be asymmetrical
+    $start =~ s/^\(+//;
+    $start =~ s/\)+$//;
+    $end   =~ s/^\(+// if $end;
+    $end   =~ s/\)+$// if $end;
+
+    # Is this a simple (exact) or a fuzzy location? Simples have exact start
+    # and end, or is between two adjacent bases. Everything else is fuzzy.
+    my $loctype = ".."; # exact with start and end as default
+    my $locclass = "Bio::Location::Simple";
+    if(! defined($end)) {
+	if($locstr =~ /(\d+)([\.\^])(\d+)/) {
+	    $start = $1;
+	    $end = $3;
+	    $loctype = $2;
+	    $locclass = "Bio::Location::Fuzzy"
+		unless (abs($end - $start) <= 1) && ($loctype eq "^");
+	    
+	} else {
+	    $end = $start;
+	}
+    }
+    if ( ($start =~ /[\>\<\?\.\^]/) || ($end   =~ /[\>\<\?\.\^]/) ) {
+	$locclass = 'Bio::Location::Fuzzy';
+    } 
+
+    # instantiate location and initialize
+    $loc = $locclass->new(-verbose => $self->verbose,
+			  -start   => $start, 
+			  -end     => $end, 
+			  -strand  => 1,
+			  -location_type => $loctype);
+    # set remote ID if remote location
+    if($seqid) {
+	$loc->is_remote(1);
+	$loc->seq_id($seqid);
+    }
+
+    # done (hopefully)
+    return $loc;
+    
+}
+
+1;