0
|
1 # $Id: FTLocationFactory.pm,v 1.9.2.4 2003/09/14 19:15:39 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Factory::FTLocationFactory
|
|
4 #
|
|
5 # Cared for by Hilmar Lapp <hlapp at gmx.net>
|
|
6 #
|
|
7 # Copyright Hilmar Lapp
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 #
|
|
12 # (c) Hilmar Lapp, hlapp at gnf.org, 2002.
|
|
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
|
|
14 #
|
|
15 # You may distribute this module under the same terms as perl itself.
|
|
16 # Refer to the Perl Artistic License (see the license accompanying this
|
|
17 # software package, or see http://www.perl.com/language/misc/Artistic.html)
|
|
18 # for the terms under which you may use, modify, and redistribute this module.
|
|
19 #
|
|
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
23 #
|
|
24
|
|
25 # POD documentation - main docs before the code
|
|
26
|
|
27 =head1 NAME
|
|
28
|
|
29 Bio::Factory::FTLocationFactory - A FeatureTable Location Parser
|
|
30
|
|
31 =head1 SYNOPSIS
|
|
32
|
|
33 # parse a string into a location object
|
|
34 $loc = Bio::Factory::FTLocationFactory->from_string("join(100..200, 400..500");
|
|
35
|
|
36 =head1 DESCRIPTION
|
|
37
|
|
38 Implementation of string-encoded location parsing for the Genbank feature table
|
|
39 encoding of locations.
|
|
40
|
|
41 =head1 FEEDBACK
|
|
42
|
|
43 =head2 Mailing Lists
|
|
44
|
|
45 User feedback is an integral part of the evolution of this and other
|
|
46 Bioperl modules. Send your comments and suggestions preferably to
|
|
47 the Bioperl mailing list. Your participation is much appreciated.
|
|
48
|
|
49 bioperl-l@bioperl.org - General discussion
|
|
50 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
51
|
|
52 =head2 Reporting Bugs
|
|
53
|
|
54 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
55 of the bugs and their resolution. Bug reports can be submitted via
|
|
56 email or the web:
|
|
57
|
|
58 bioperl-bugs@bioperl.org
|
|
59 http://bugzilla.bioperl.org/
|
|
60
|
|
61 =head1 AUTHOR - Hilmar Lapp
|
|
62
|
|
63 Email hlapp at gmx.net
|
|
64
|
|
65 =head1 CONTRIBUTORS
|
|
66
|
|
67 Additional contributors names and emails here
|
|
68
|
|
69 =head1 APPENDIX
|
|
70
|
|
71 The rest of the documentation details each of the object methods.
|
|
72 Internal methods are usually preceded with a _
|
|
73
|
|
74 =cut
|
|
75
|
|
76
|
|
77 # Let the code begin...
|
|
78
|
|
79
|
|
80 package Bio::Factory::FTLocationFactory;
|
|
81 use vars qw(@ISA);
|
|
82 use strict;
|
|
83
|
|
84 # Object preamble - inherits from Bio::Root::Root
|
|
85
|
|
86 use Bio::Root::Root;
|
|
87 use Bio::Factory::LocationFactoryI;
|
|
88 use Bio::Location::Simple;
|
|
89 use Bio::Location::Split;
|
|
90 use Bio::Location::Fuzzy;
|
|
91
|
|
92
|
|
93 @ISA = qw(Bio::Root::Root Bio::Factory::LocationFactoryI);
|
|
94
|
|
95 =head2 new
|
|
96
|
|
97 Title : new
|
|
98 Usage : my $obj = new Bio::Factory::FTLocationFactory();
|
|
99 Function: Builds a new Bio::Factory::FTLocationFactory object
|
|
100 Returns : an instance of Bio::Factory::FTLocationFactory
|
|
101 Args :
|
|
102
|
|
103
|
|
104 =cut
|
|
105
|
|
106 =head2 from_string
|
|
107
|
|
108 Title : from_string
|
|
109 Usage : $loc = $locfactory->from_string("100..200");
|
|
110 Function: Parses the given string and returns a Bio::LocationI implementing
|
|
111 object representing the location encoded by the string.
|
|
112
|
|
113 This implementation parses the Genbank feature table
|
|
114 encoding of locations.
|
|
115 Example :
|
|
116 Returns : A Bio::LocationI implementing object.
|
|
117 Args : A string.
|
|
118
|
|
119
|
|
120 =cut
|
|
121
|
|
122 sub from_string{
|
|
123 # the third parameter is purely optional and indicates a recursive
|
|
124 # call if set
|
|
125 my ($self,$locstr,$is_rec) = @_;
|
|
126 my $loc;
|
|
127
|
|
128 # there is no place in FT-formatted location strings where whitespace
|
|
129 # carries meaning, so strip it off entirely upfront
|
|
130 $locstr =~ s/\s+//g if ! $is_rec;
|
|
131
|
|
132 # does it contain an operator?
|
|
133 if($locstr =~ /^([A-Za-z]+)\((.*)\)$/) {
|
|
134 # yes:
|
|
135 my $op = $1;
|
|
136 my $oparg = $2;
|
|
137 if($op eq "complement") {
|
|
138 # parse the argument recursively, then set the strand to -1
|
|
139 $loc = $self->from_string($oparg, 1);
|
|
140 $loc->strand(-1);
|
|
141 } elsif(($op eq "join") || ($op eq "order") || ($op eq "bond")) {
|
|
142 # This is a split location. Split into components and parse each
|
|
143 # one recursively, then gather into a SplitLocationI instance.
|
|
144 #
|
|
145 # Note: The following code will /not/ work with nested
|
|
146 # joins (you want to have grammar-based parsing for that).
|
|
147 $loc = Bio::Location::Split->new(-verbose => $self->verbose,
|
|
148 -splittype => $op);
|
|
149 foreach my $substr (split(/,/, $oparg)) {
|
|
150 $loc->add_sub_Location($self->from_string($substr, 1));
|
|
151 }
|
|
152 } else {
|
|
153 $self->throw("operator \"$op\" unrecognized by parser");
|
|
154 }
|
|
155 } else {
|
|
156 # no operator, parse away
|
|
157 $loc = $self->_parse_location($locstr);
|
|
158 }
|
|
159 return $loc;
|
|
160 }
|
|
161
|
|
162 =head2 _parse_location
|
|
163
|
|
164 Title : _parse_location
|
|
165 Usage : $loc = $locfactory->_parse_location( $loc_string)
|
|
166
|
|
167 Function: Parses the given location string and returns a location object
|
|
168 with start() and end() and strand() set appropriately.
|
|
169 Note that this method is private.
|
|
170 Returns : A Bio::LocationI implementing object or undef on failure
|
|
171 Args : location string
|
|
172
|
|
173 =cut
|
|
174
|
|
175 sub _parse_location {
|
|
176 my ($self, $locstr) = @_;
|
|
177 my ($loc, $seqid);
|
|
178
|
|
179 $self->debug( "Location parse, processing $locstr\n");
|
|
180
|
|
181 # 'remote' location?
|
|
182 if($locstr =~ /^(\S+):(.*)$/) {
|
|
183 # yes; memorize remote ID and strip from location string
|
|
184 $seqid = $1;
|
|
185 $locstr = $2;
|
|
186 }
|
|
187
|
|
188 # split into start and end
|
|
189 my ($start, $end) = split(/\.\./, $locstr);
|
|
190 # remove enclosing parentheses if any; note that because of parentheses
|
|
191 # possibly surrounding the entire location the parentheses around start
|
|
192 # and/or may be asymmetrical
|
|
193 $start =~ s/^\(+//;
|
|
194 $start =~ s/\)+$//;
|
|
195 $end =~ s/^\(+// if $end;
|
|
196 $end =~ s/\)+$// if $end;
|
|
197
|
|
198 # Is this a simple (exact) or a fuzzy location? Simples have exact start
|
|
199 # and end, or is between two adjacent bases. Everything else is fuzzy.
|
|
200 my $loctype = ".."; # exact with start and end as default
|
|
201 my $locclass = "Bio::Location::Simple";
|
|
202 if(! defined($end)) {
|
|
203 if($locstr =~ /(\d+)([\.\^])(\d+)/) {
|
|
204 $start = $1;
|
|
205 $end = $3;
|
|
206 $loctype = $2;
|
|
207 $locclass = "Bio::Location::Fuzzy"
|
|
208 unless (abs($end - $start) <= 1) && ($loctype eq "^");
|
|
209
|
|
210 } else {
|
|
211 $end = $start;
|
|
212 }
|
|
213 }
|
|
214 if ( ($start =~ /[\>\<\?\.\^]/) || ($end =~ /[\>\<\?\.\^]/) ) {
|
|
215 $locclass = 'Bio::Location::Fuzzy';
|
|
216 }
|
|
217
|
|
218 # instantiate location and initialize
|
|
219 $loc = $locclass->new(-verbose => $self->verbose,
|
|
220 -start => $start,
|
|
221 -end => $end,
|
|
222 -strand => 1,
|
|
223 -location_type => $loctype);
|
|
224 # set remote ID if remote location
|
|
225 if($seqid) {
|
|
226 $loc->is_remote(1);
|
|
227 $loc->seq_id($seqid);
|
|
228 }
|
|
229
|
|
230 # done (hopefully)
|
|
231 return $loc;
|
|
232
|
|
233 }
|
|
234
|
|
235 1;
|