comparison variant_effect_predictor/Bio/Location/Fuzzy.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: Fuzzy.pm,v 1.24 2002/12/01 00:05:20 jason Exp $
2 #
3 # BioPerl module for Bio::Location::Fuzzy
4 # Cared for by Jason Stajich <jason@bioperl.org>
5 #
6 # Copyright Jason Stajich
7 #
8 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::Location::Fuzzy - Implementation of a Location on a Sequence
14 which has unclear start and/or end locations
15
16 =head1 SYNOPSIS
17
18 use Bio::Location::Fuzzy;
19 my $fuzzylocation = new Bio::Location::Fuzzy(-start => '<30',
20 -end => 90,
21 -location_type => '.');
22
23 print "location string is ", $fuzzylocation->to_FTstring(), "\n";
24 print "location is of the type ", $fuzzylocation->location_type, "\n";
25
26 =head1 DESCRIPTION
27
28 This module contains the necessary methods for representing a
29 Fuzzy Location, one that does not have clear start and/or end points.
30 This will initially serve to handle features from Genbank/EMBL feature
31 tables that are written as 1^100 meaning between bases 1 and 100 or
32 E<lt>100..300 meaning it starts somewhere before 100. Advanced
33 implementations of this interface may be able to handle the necessary
34 logic of overlaps/intersection/contains/union. It was constructed to
35 handle fuzzy locations that can be represented in Genbank/EMBL.
36
37 =head1 FEEDBACK
38
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
42
43 bioperl-l@bioperl.org - General discussion
44 http://bio.perl.org/MailList.html - About the mailing lists
45
46 =head2 Reporting Bugs
47
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via email
50 or the web:
51
52 bioperl-bugs@bio.perl.org
53 http://bugzilla.bioperl.org/
54
55 =head1 AUTHOR - Jason Stajich
56
57 Email jason@bioperl.org
58
59 =head1 APPENDIX
60
61 The rest of the documentation details each of the object
62 methods. Internal methods are usually preceded with a _
63
64 =cut
65
66 # Let the code begin...
67
68 package Bio::Location::Fuzzy;
69 use vars qw(@ISA );
70 use strict;
71
72 use Bio::Location::FuzzyLocationI;
73 use Bio::Location::Atomic;
74
75 @ISA = qw(Bio::Location::Atomic Bio::Location::FuzzyLocationI );
76
77 BEGIN {
78 use vars qw( %FUZZYCODES %FUZZYPOINTENCODE %FUZZYRANGEENCODE
79 @LOCATIONCODESBSANE );
80
81 @LOCATIONCODESBSANE = (undef, 'EXACT', 'WITHIN', 'BETWEEN',
82 'BEFORE', 'AFTER');
83
84 %FUZZYCODES = ( 'EXACT' => '..', # Position is 'exact
85 # Exact position is unknown, but is within the range specified, ((1.2)..100)
86 'WITHIN' => '.',
87 # 1^2
88 'BETWEEN' => '^',
89 # <100
90 'BEFORE' => '<',
91 # >10
92 'AFTER' => '>');
93
94 # The following regular expressions map to fuzzy location types. Every
95 # expression must match the complete encoded point string, and must
96 # contain two groups identifying min and max. Empty matches are automatic.
97 # converted to undef, except for 'EXACT', for which max is set to equal
98 # min.
99 %FUZZYPOINTENCODE = (
100 '\>(\d+)(.{0})' => 'AFTER',
101 '\<(.{0})(\d+)' => 'BEFORE',
102 '(\d+)' => 'EXACT',
103 '(\d+)(.{0})\>' => 'AFTER',
104 '(.{0})(\d+)\<' => 'BEFORE',
105 '(\d+)\.(\d+)' => 'WITHIN',
106 '(\d+)\^(\d+)' => 'BETWEEN',
107 );
108
109 %FUZZYRANGEENCODE = ( '\.' => 'WITHIN',
110 '\.\.' => 'EXACT',
111 '\^' => 'BETWEEN' );
112
113 }
114
115 =head2 new
116
117 Title : new
118 Usage : my $fuzzyloc = new Bio::Location::Fuzzy( @args);
119 Function:
120 Returns :
121 Args : -start => value for start (initialize by superclass)
122 -end => value for end (initialize by superclass)
123 -strand => value for strand (initialize by superclass)
124 -location_type => either ('EXACT', 'WITHIN', 'BETWEEN') OR
125 ( 1,2,3)
126 -start_ext=> extension for start - defaults to 0,
127 -start_fuz= fuzzy code for start can be
128 ( 'EXACT', 'WITHIN', 'BETWEEN', 'BEFORE', 'AFTER') OR
129 a value 1 - 5 corresponding to index+1 above
130 -end_ext=> extension for end - defaults to 0,
131 -end_fuz= fuzzy code for end can be
132 ( 'EXACT', 'WITHIN', 'BETWEEN', 'BEFORE', 'AFTER') OR
133 a value 1 - 5 corresponding to index+1 above
134
135 =cut
136
137 sub new {
138 my ($class, @args) = @_;
139 my $self = $class->SUPER::new(@args);
140 my ($location_type, $start_ext, $start_fuz, $end_ext, $end_fuz) =
141 $self->_rearrange([ qw(LOCATION_TYPE START_EXT START_FUZ
142 END_EXT END_FUZ )
143 ], @args);
144
145 $location_type && $self->location_type($location_type);
146 $start_ext && $self->max_start($self->min_start + $start_ext);
147 $end_ext && $self->max_end($self->min_end + $end_ext);
148 $start_fuz && $self->start_pos_type($start_fuz);
149 $end_fuz && $self->end_pos_type($end_fuz);
150
151 return $self;
152 }
153
154 =head2 location_type
155
156 Title : location_type
157 Usage : my $location_type = $location->location_type();
158 Function: Get location type encoded as text
159 Returns : string ('EXACT', 'WITHIN', 'BETWEEN')
160 Args : none
161
162 =cut
163
164 sub location_type {
165 my ($self,$value) = @_;
166 if( defined $value || ! defined $self->{'_location_type'} ) {
167 $value = 'EXACT' unless defined $value;
168 if(! defined $FUZZYCODES{$value}) {
169 $value = uc($value);
170 if( $value =~ /\.\./ ) {
171 $value = 'EXACT';
172 } elsif( $value =~ /^\.$/ ) {
173 $value = 'WITHIN';
174 } elsif( $value =~ /\^/ ) {
175 $value = 'BETWEEN';
176
177
178 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [". $self->start. "] and [". $self->end. "]")
179 if defined $self->start && defined $self->end && ($self->end - 1 == $self->start);
180
181
182 } elsif( $value ne 'EXACT' && $value ne 'WITHIN' &&
183 $value ne 'BETWEEN' ) {
184 $self->throw("Did not specify a valid location type");
185 }
186 }
187 $self->{'_location_type'} = $value;
188 }
189 return $self->{'_location_type'};
190 }
191
192 =head1 LocationI methods
193
194 =head2 length
195
196 Title : length
197 Usage : $length = $fuzzy_loc->length();
198 Function: Get the length of this location.
199
200 Note that the length of a fuzzy location will always depend
201 on the currently active interpretation of start and end. The
202 result will therefore vary for different CoordinatePolicy objects.
203
204 Returns : an integer
205 Args : none
206
207 =cut
208
209 #sub length {
210 # my($self) = @_;
211 # return $self->SUPER::length() if( !$self->start || !$self->end);
212 # $self->warn('Length is not valid for a FuzzyLocation');
213 # return 0;
214 #}
215
216 =head2 start
217
218 Title : start
219 Usage : $start = $fuzzy->start();
220 Function: get/set start of this range, handling fuzzy_starts
221 Returns : a positive integer representing the start of the location
222 Args : start location on set (can be fuzzy point string)
223
224 =cut
225
226 sub start {
227 my($self,$value) = @_;
228 if( defined $value ) {
229 my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
230 $self->start_pos_type($encode);
231 $self->min_start($min);
232 $self->max_start($max);
233 }
234
235 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [". $self->SUPER::start. "] and [". $self->SUPER::end. "]")
236 if $self->location_type eq 'BETWEEN' && defined $self->SUPER::end && ($self->SUPER::end - 1 == $self->SUPER::start);
237
238 return $self->SUPER::start();
239 }
240
241 =head2 end
242
243 Title : end
244 Usage : $end = $fuzzy->end();
245 Function: get/set end of this range, handling fuzzy_ends
246 Returns : a positive integer representing the end of the range
247 Args : end location on set (can be fuzzy string)
248
249 =cut
250
251 sub end {
252 my($self,$value) = @_;
253 if( defined $value ) {
254 my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
255 $self->end_pos_type($encode);
256 $self->min_end($min);
257 $self->max_end($max);
258 }
259
260 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [". $self->SUPER::start. "] and [". $self->SUPER::end. "]")
261 if $self->location_type eq 'BETWEEN' && defined $self->SUPER::start && ($self->SUPER::end - 1 == $self->SUPER::start);
262
263 return $self->SUPER::end();
264 }
265
266 =head2 min_start
267
268 Title : min_start
269 Usage : $min_start = $fuzzy->min_start();
270 Function: get/set the minimum starting point
271 Returns : the minimum starting point from the contained sublocations
272 Args : integer or undef on set
273
274 =cut
275
276 sub min_start {
277 my ($self,@args) = @_;
278
279 if(@args) {
280 $self->{'_min_start'} = $args[0]; # the value may be undef!
281 }
282 return $self->{'_min_start'};
283 }
284
285 =head2 max_start
286
287 Title : max_start
288 Usage : my $maxstart = $location->max_start();
289 Function: Get/set maximum starting location of feature startpoint
290 Returns : integer or undef if no maximum starting point.
291 Args : integer or undef on set
292
293 =cut
294
295 sub max_start {
296 my ($self,@args) = @_;
297
298 if(@args) {
299 $self->{'_max_start'} = $args[0]; # the value may be undef!
300 }
301 return $self->{'_max_start'};
302 }
303
304 =head2 start_pos_type
305
306 Title : start_pos_type
307 Usage : my $start_pos_type = $location->start_pos_type();
308 Function: Get/set start position type.
309 Returns : type of position coded as text
310 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
311 Args : a string on set
312
313 =cut
314
315 sub start_pos_type {
316 my ($self,$value) = @_;
317 if(defined $value && $value =~ /^\d+$/ ) {
318 if( $value == 0 ) { $value = 'EXACT'; }
319 else {
320 my $v = $LOCATIONCODESBSANE[$value];
321 if( ! defined $v ) {
322 $self->warn("Provided value $value which I don't understand, reverting to 'EXACT'");
323 $v = 'EXACT';
324 }
325 $value = $v;
326 }
327 }
328 if(defined($value)) {
329 $self->{'_start_pos_type'} = $value;
330 }
331 return $self->{'_start_pos_type'};
332 }
333
334 =head2 min_end
335
336 Title : min_end
337 Usage : my $minend = $location->min_end();
338 Function: Get/set minimum ending location of feature endpoint
339 Returns : integer or undef if no minimum ending point.
340 Args : integer or undef on set
341
342 =cut
343
344 sub min_end {
345 my ($self,@args) = @_;
346
347 if(@args) {
348 $self->{'_min_end'} = $args[0]; # the value may be undef!
349 }
350 return $self->{'_min_end'};
351 }
352
353 =head2 max_end
354
355 Title : max_end
356 Usage : my $maxend = $location->max_end();
357 Function: Get/set maximum ending location of feature endpoint
358 Returns : integer or undef if no maximum ending point.
359 Args : integer or undef on set
360
361 =cut
362
363 sub max_end {
364 my ($self,@args) = @_;
365
366 if(@args) {
367 $self->{'_max_end'} = $args[0]; # the value may be undef!
368 }
369 return $self->{'_max_end'};
370 }
371
372 =head2 end_pos_type
373
374 Title : end_pos_type
375 Usage : my $end_pos_type = $location->end_pos_type();
376 Function: Get/set end position type.
377 Returns : type of position coded as text
378 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
379 Args : a string on set
380
381 =cut
382
383 sub end_pos_type {
384 my ($self,$value) = @_;
385 if( defined $value && $value =~ /^\d+$/ ) {
386 if( $value == 0 ) { $value = 'EXACT'; }
387 else {
388 my $v = $LOCATIONCODESBSANE[$value];
389 if( ! defined $v ) {
390 $self->warn("Provided value $value which I don't understand, reverting to 'EXACT'");
391 $v = 'EXACT';
392 }
393 $value = $v;
394 }
395 }
396
397 if(defined($value)) {
398 $self->{'_end_pos_type'} = $value;
399 }
400 return $self->{'_end_pos_type'};
401 }
402
403 =head2 seq_id
404
405 Title : seq_id
406 Usage : my $seqid = $location->seq_id();
407 Function: Get/Set seq_id that location refers to
408 Returns : seq_id
409 Args : [optional] seq_id value to set
410
411 =cut
412
413 =head2 coordinate_policy
414
415 Title : coordinate_policy
416 Usage : $policy = $location->coordinate_policy();
417 $location->coordinate_policy($mypolicy); # set may not be possible
418 Function: Get the coordinate computing policy employed by this object.
419
420 See Bio::Location::CoordinatePolicyI for documentation about
421 the policy object and its use.
422
423 The interface *does not* require implementing classes to accept
424 setting of a different policy. The implementation provided here
425 does, however, allow to do so.
426
427 Implementors of this interface are expected to initialize every
428 new instance with a CoordinatePolicyI object. The implementation
429 provided here will return a default policy object if none has
430 been set yet. To change this default policy object call this
431 method as a class method with an appropriate argument. Note that
432 in this case only subsequently created Location objects will be
433 affected.
434
435 Returns : A Bio::Location::CoordinatePolicyI implementing object.
436 Args : On set, a Bio::Location::CoordinatePolicyI implementing object.
437
438 =cut
439
440 =head2 to_FTstring
441
442 Title : to_FTstring
443 Usage : my $locstr = $location->to_FTstring()
444 Function: Get/Set seq_id that location refers to
445 Returns : seq_id
446 Args : [optional] seq_id value to set
447
448 =cut
449
450 sub to_FTstring {
451 my ($self) = @_;
452 my (%vals) = ( 'start' => $self->start,
453 'min_start' => $self->min_start,
454 'max_start' => $self->max_start,
455 'start_code' => $self->start_pos_type,
456 'end' => $self->end,
457 'min_end' => $self->min_end,
458 'max_end' => $self->max_end,
459 'end_code' => $self->end_pos_type );
460
461 my (%strs) = ( 'start' => '',
462 'end' => '');
463 my ($delimiter) = $FUZZYCODES{$self->location_type};
464 # I'm lazy, lets do this in a loop since behaviour will be the same for
465 # start and end
466 foreach my $point ( qw(start end) ) {
467 if( $vals{$point."_code"} ne 'EXACT' ) {
468
469 if( (!defined $vals{"min_$point"} ||
470 !defined $vals{"max_$point"})
471 && ( $vals{$point."_code"} eq 'WITHIN' ||
472 $vals{$point."_code"} eq 'BETWEEN')
473 ) {
474 $vals{"min_$point"} = '' unless defined $vals{"min_$point"};
475 $vals{"max_$point"} = '' unless defined $vals{"max_$point"};
476
477 $self->warn("Fuzzy codes for start are in a strange state, (".
478 join(",", ($vals{"min_$point"},
479 $vals{"max_$point"},
480 $vals{$point."_code"})). ")");
481 return '';
482 }
483 if( defined $vals{$point."_code"} &&
484 ($vals{$point."_code"} eq 'BEFORE' ||
485 $vals{$point."_code"} eq 'AFTER')
486 ) {
487 $strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
488 }
489 if( defined $vals{"min_$point"} ) {
490 $strs{$point} .= $vals{"min_$point"};
491 }
492 if( defined $vals{$point."_code"} &&
493 ($vals{$point."_code"} eq 'WITHIN' ||
494 $vals{$point."_code"} eq 'BETWEEN')
495 ) {
496 $strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
497 }
498 if( defined $vals{"max_$point"} ) {
499 $strs{$point} .= $vals{"max_$point"};
500 }
501 if(($vals{$point."_code"} eq 'WITHIN') ||
502 ($vals{$point."_code"} eq 'BETWEEN')) {
503 $strs{$point} = "(".$strs{$point}.")";
504 }
505 } else {
506 $strs{$point} = $vals{$point};
507 }
508
509 }
510 my $str = $strs{'start'} . $delimiter . $strs{'end'};
511 if($self->is_remote() && $self->seq_id()) {
512 $str = $self->seq_id() . ":" . $str;
513 }
514 if( $self->strand == -1 ) {
515 $str = "complement(" . $str . ")";
516 } elsif($self->location_type() eq "WITHIN") {
517 $str = "(".$str.")";
518 }
519 return $str;
520 }
521
522 =head2 _fuzzypointdecode
523
524 Title : _fuzzypointdecode
525 Usage : ($type,$min,$max) = $self->_fuzzypointdecode('<5');
526 Function: Decode a fuzzy string.
527 Returns : A 3-element array consisting of the type of location, the
528 minimum integer, and the maximum integer describing the range
529 of coordinates this start or endpoint refers to. Minimum or
530 maximum coordinate may be undefined.
531 : Returns empty array on fail.
532 Args : fuzzypoint string
533
534 =cut
535
536 sub _fuzzypointdecode {
537 my ($self, $string) = @_;
538 return () if( !defined $string);
539 # strip off leading and trailing space
540 $string =~ s/^\s*(\S+)\s*/$1/;
541 foreach my $pattern ( keys %FUZZYPOINTENCODE ) {
542 if( $string =~ /^$pattern$/ ) {
543 my ($min,$max) = ($1,$2);
544 if($FUZZYPOINTENCODE{$pattern} eq 'EXACT') {
545 $max = $min;
546 } else {
547 $max = undef if(length($max) == 0);
548 $min = undef if(length($min) == 0);
549 }
550 return ($FUZZYPOINTENCODE{$pattern},$min,$max);
551 }
552 }
553 if( $self->verbose >= 1 ) {
554 $self->warn("could not find a valid fuzzy encoding for $string");
555 }
556 return ();
557 }
558
559 1;
560