comparison variant_effect_predictor/Bio/Factory/FTLocationFactory.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: FTLocationFactory.pm,v 1.9.2.4 2003/09/14 19:15:39 jason Exp $
2 #
3 # BioPerl module for Bio::Factory::FTLocationFactory
4 #
5 # Cared for by Hilmar Lapp <hlapp at gmx.net>
6 #
7 # Copyright Hilmar Lapp
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 #
12 # (c) Hilmar Lapp, hlapp at gnf.org, 2002.
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
14 #
15 # You may distribute this module under the same terms as perl itself.
16 # Refer to the Perl Artistic License (see the license accompanying this
17 # software package, or see http://www.perl.com/language/misc/Artistic.html)
18 # for the terms under which you may use, modify, and redistribute this module.
19 #
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
23 #
24
25 # POD documentation - main docs before the code
26
27 =head1 NAME
28
29 Bio::Factory::FTLocationFactory - A FeatureTable Location Parser
30
31 =head1 SYNOPSIS
32
33 # parse a string into a location object
34 $loc = Bio::Factory::FTLocationFactory->from_string("join(100..200, 400..500");
35
36 =head1 DESCRIPTION
37
38 Implementation of string-encoded location parsing for the Genbank feature table
39 encoding of locations.
40
41 =head1 FEEDBACK
42
43 =head2 Mailing Lists
44
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to
47 the Bioperl mailing list. Your participation is much appreciated.
48
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/MailList.shtml - About the mailing lists
51
52 =head2 Reporting Bugs
53
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 of the bugs and their resolution. Bug reports can be submitted via
56 email or the web:
57
58 bioperl-bugs@bioperl.org
59 http://bugzilla.bioperl.org/
60
61 =head1 AUTHOR - Hilmar Lapp
62
63 Email hlapp at gmx.net
64
65 =head1 CONTRIBUTORS
66
67 Additional contributors names and emails here
68
69 =head1 APPENDIX
70
71 The rest of the documentation details each of the object methods.
72 Internal methods are usually preceded with a _
73
74 =cut
75
76
77 # Let the code begin...
78
79
80 package Bio::Factory::FTLocationFactory;
81 use vars qw(@ISA);
82 use strict;
83
84 # Object preamble - inherits from Bio::Root::Root
85
86 use Bio::Root::Root;
87 use Bio::Factory::LocationFactoryI;
88 use Bio::Location::Simple;
89 use Bio::Location::Split;
90 use Bio::Location::Fuzzy;
91
92
93 @ISA = qw(Bio::Root::Root Bio::Factory::LocationFactoryI);
94
95 =head2 new
96
97 Title : new
98 Usage : my $obj = new Bio::Factory::FTLocationFactory();
99 Function: Builds a new Bio::Factory::FTLocationFactory object
100 Returns : an instance of Bio::Factory::FTLocationFactory
101 Args :
102
103
104 =cut
105
106 =head2 from_string
107
108 Title : from_string
109 Usage : $loc = $locfactory->from_string("100..200");
110 Function: Parses the given string and returns a Bio::LocationI implementing
111 object representing the location encoded by the string.
112
113 This implementation parses the Genbank feature table
114 encoding of locations.
115 Example :
116 Returns : A Bio::LocationI implementing object.
117 Args : A string.
118
119
120 =cut
121
122 sub from_string{
123 # the third parameter is purely optional and indicates a recursive
124 # call if set
125 my ($self,$locstr,$is_rec) = @_;
126 my $loc;
127
128 # there is no place in FT-formatted location strings where whitespace
129 # carries meaning, so strip it off entirely upfront
130 $locstr =~ s/\s+//g if ! $is_rec;
131
132 # does it contain an operator?
133 if($locstr =~ /^([A-Za-z]+)\((.*)\)$/) {
134 # yes:
135 my $op = $1;
136 my $oparg = $2;
137 if($op eq "complement") {
138 # parse the argument recursively, then set the strand to -1
139 $loc = $self->from_string($oparg, 1);
140 $loc->strand(-1);
141 } elsif(($op eq "join") || ($op eq "order") || ($op eq "bond")) {
142 # This is a split location. Split into components and parse each
143 # one recursively, then gather into a SplitLocationI instance.
144 #
145 # Note: The following code will /not/ work with nested
146 # joins (you want to have grammar-based parsing for that).
147 $loc = Bio::Location::Split->new(-verbose => $self->verbose,
148 -splittype => $op);
149 foreach my $substr (split(/,/, $oparg)) {
150 $loc->add_sub_Location($self->from_string($substr, 1));
151 }
152 } else {
153 $self->throw("operator \"$op\" unrecognized by parser");
154 }
155 } else {
156 # no operator, parse away
157 $loc = $self->_parse_location($locstr);
158 }
159 return $loc;
160 }
161
162 =head2 _parse_location
163
164 Title : _parse_location
165 Usage : $loc = $locfactory->_parse_location( $loc_string)
166
167 Function: Parses the given location string and returns a location object
168 with start() and end() and strand() set appropriately.
169 Note that this method is private.
170 Returns : A Bio::LocationI implementing object or undef on failure
171 Args : location string
172
173 =cut
174
175 sub _parse_location {
176 my ($self, $locstr) = @_;
177 my ($loc, $seqid);
178
179 $self->debug( "Location parse, processing $locstr\n");
180
181 # 'remote' location?
182 if($locstr =~ /^(\S+):(.*)$/) {
183 # yes; memorize remote ID and strip from location string
184 $seqid = $1;
185 $locstr = $2;
186 }
187
188 # split into start and end
189 my ($start, $end) = split(/\.\./, $locstr);
190 # remove enclosing parentheses if any; note that because of parentheses
191 # possibly surrounding the entire location the parentheses around start
192 # and/or may be asymmetrical
193 $start =~ s/^\(+//;
194 $start =~ s/\)+$//;
195 $end =~ s/^\(+// if $end;
196 $end =~ s/\)+$// if $end;
197
198 # Is this a simple (exact) or a fuzzy location? Simples have exact start
199 # and end, or is between two adjacent bases. Everything else is fuzzy.
200 my $loctype = ".."; # exact with start and end as default
201 my $locclass = "Bio::Location::Simple";
202 if(! defined($end)) {
203 if($locstr =~ /(\d+)([\.\^])(\d+)/) {
204 $start = $1;
205 $end = $3;
206 $loctype = $2;
207 $locclass = "Bio::Location::Fuzzy"
208 unless (abs($end - $start) <= 1) && ($loctype eq "^");
209
210 } else {
211 $end = $start;
212 }
213 }
214 if ( ($start =~ /[\>\<\?\.\^]/) || ($end =~ /[\>\<\?\.\^]/) ) {
215 $locclass = 'Bio::Location::Fuzzy';
216 }
217
218 # instantiate location and initialize
219 $loc = $locclass->new(-verbose => $self->verbose,
220 -start => $start,
221 -end => $end,
222 -strand => 1,
223 -location_type => $loctype);
224 # set remote ID if remote location
225 if($seqid) {
226 $loc->is_remote(1);
227 $loc->seq_id($seqid);
228 }
229
230 # done (hopefully)
231 return $loc;
232
233 }
234
235 1;