Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Location/Fuzzy.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: Fuzzy.pm,v 1.24 2002/12/01 00:05:20 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::Location::Fuzzy | |
4 # Cared for by Jason Stajich <jason@bioperl.org> | |
5 # | |
6 # Copyright Jason Stajich | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 # POD documentation - main docs before the code | |
10 | |
11 =head1 NAME | |
12 | |
13 Bio::Location::Fuzzy - Implementation of a Location on a Sequence | |
14 which has unclear start and/or end locations | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 use Bio::Location::Fuzzy; | |
19 my $fuzzylocation = new Bio::Location::Fuzzy(-start => '<30', | |
20 -end => 90, | |
21 -location_type => '.'); | |
22 | |
23 print "location string is ", $fuzzylocation->to_FTstring(), "\n"; | |
24 print "location is of the type ", $fuzzylocation->location_type, "\n"; | |
25 | |
26 =head1 DESCRIPTION | |
27 | |
28 This module contains the necessary methods for representing a | |
29 Fuzzy Location, one that does not have clear start and/or end points. | |
30 This will initially serve to handle features from Genbank/EMBL feature | |
31 tables that are written as 1^100 meaning between bases 1 and 100 or | |
32 E<lt>100..300 meaning it starts somewhere before 100. Advanced | |
33 implementations of this interface may be able to handle the necessary | |
34 logic of overlaps/intersection/contains/union. It was constructed to | |
35 handle fuzzy locations that can be represented in Genbank/EMBL. | |
36 | |
37 =head1 FEEDBACK | |
38 | |
39 User feedback is an integral part of the evolution of this and other | |
40 Bioperl modules. Send your comments and suggestions preferably to one | |
41 of the Bioperl mailing lists. Your participation is much appreciated. | |
42 | |
43 bioperl-l@bioperl.org - General discussion | |
44 http://bio.perl.org/MailList.html - About the mailing lists | |
45 | |
46 =head2 Reporting Bugs | |
47 | |
48 Report bugs to the Bioperl bug tracking system to help us keep track | |
49 the bugs and their resolution. Bug reports can be submitted via email | |
50 or the web: | |
51 | |
52 bioperl-bugs@bio.perl.org | |
53 http://bugzilla.bioperl.org/ | |
54 | |
55 =head1 AUTHOR - Jason Stajich | |
56 | |
57 Email jason@bioperl.org | |
58 | |
59 =head1 APPENDIX | |
60 | |
61 The rest of the documentation details each of the object | |
62 methods. Internal methods are usually preceded with a _ | |
63 | |
64 =cut | |
65 | |
66 # Let the code begin... | |
67 | |
68 package Bio::Location::Fuzzy; | |
69 use vars qw(@ISA ); | |
70 use strict; | |
71 | |
72 use Bio::Location::FuzzyLocationI; | |
73 use Bio::Location::Atomic; | |
74 | |
75 @ISA = qw(Bio::Location::Atomic Bio::Location::FuzzyLocationI ); | |
76 | |
77 BEGIN { | |
78 use vars qw( %FUZZYCODES %FUZZYPOINTENCODE %FUZZYRANGEENCODE | |
79 @LOCATIONCODESBSANE ); | |
80 | |
81 @LOCATIONCODESBSANE = (undef, 'EXACT', 'WITHIN', 'BETWEEN', | |
82 'BEFORE', 'AFTER'); | |
83 | |
84 %FUZZYCODES = ( 'EXACT' => '..', # Position is 'exact | |
85 # Exact position is unknown, but is within the range specified, ((1.2)..100) | |
86 'WITHIN' => '.', | |
87 # 1^2 | |
88 'BETWEEN' => '^', | |
89 # <100 | |
90 'BEFORE' => '<', | |
91 # >10 | |
92 'AFTER' => '>'); | |
93 | |
94 # The following regular expressions map to fuzzy location types. Every | |
95 # expression must match the complete encoded point string, and must | |
96 # contain two groups identifying min and max. Empty matches are automatic. | |
97 # converted to undef, except for 'EXACT', for which max is set to equal | |
98 # min. | |
99 %FUZZYPOINTENCODE = ( | |
100 '\>(\d+)(.{0})' => 'AFTER', | |
101 '\<(.{0})(\d+)' => 'BEFORE', | |
102 '(\d+)' => 'EXACT', | |
103 '(\d+)(.{0})\>' => 'AFTER', | |
104 '(.{0})(\d+)\<' => 'BEFORE', | |
105 '(\d+)\.(\d+)' => 'WITHIN', | |
106 '(\d+)\^(\d+)' => 'BETWEEN', | |
107 ); | |
108 | |
109 %FUZZYRANGEENCODE = ( '\.' => 'WITHIN', | |
110 '\.\.' => 'EXACT', | |
111 '\^' => 'BETWEEN' ); | |
112 | |
113 } | |
114 | |
115 =head2 new | |
116 | |
117 Title : new | |
118 Usage : my $fuzzyloc = new Bio::Location::Fuzzy( @args); | |
119 Function: | |
120 Returns : | |
121 Args : -start => value for start (initialize by superclass) | |
122 -end => value for end (initialize by superclass) | |
123 -strand => value for strand (initialize by superclass) | |
124 -location_type => either ('EXACT', 'WITHIN', 'BETWEEN') OR | |
125 ( 1,2,3) | |
126 -start_ext=> extension for start - defaults to 0, | |
127 -start_fuz= fuzzy code for start can be | |
128 ( 'EXACT', 'WITHIN', 'BETWEEN', 'BEFORE', 'AFTER') OR | |
129 a value 1 - 5 corresponding to index+1 above | |
130 -end_ext=> extension for end - defaults to 0, | |
131 -end_fuz= fuzzy code for end can be | |
132 ( 'EXACT', 'WITHIN', 'BETWEEN', 'BEFORE', 'AFTER') OR | |
133 a value 1 - 5 corresponding to index+1 above | |
134 | |
135 =cut | |
136 | |
137 sub new { | |
138 my ($class, @args) = @_; | |
139 my $self = $class->SUPER::new(@args); | |
140 my ($location_type, $start_ext, $start_fuz, $end_ext, $end_fuz) = | |
141 $self->_rearrange([ qw(LOCATION_TYPE START_EXT START_FUZ | |
142 END_EXT END_FUZ ) | |
143 ], @args); | |
144 | |
145 $location_type && $self->location_type($location_type); | |
146 $start_ext && $self->max_start($self->min_start + $start_ext); | |
147 $end_ext && $self->max_end($self->min_end + $end_ext); | |
148 $start_fuz && $self->start_pos_type($start_fuz); | |
149 $end_fuz && $self->end_pos_type($end_fuz); | |
150 | |
151 return $self; | |
152 } | |
153 | |
154 =head2 location_type | |
155 | |
156 Title : location_type | |
157 Usage : my $location_type = $location->location_type(); | |
158 Function: Get location type encoded as text | |
159 Returns : string ('EXACT', 'WITHIN', 'BETWEEN') | |
160 Args : none | |
161 | |
162 =cut | |
163 | |
164 sub location_type { | |
165 my ($self,$value) = @_; | |
166 if( defined $value || ! defined $self->{'_location_type'} ) { | |
167 $value = 'EXACT' unless defined $value; | |
168 if(! defined $FUZZYCODES{$value}) { | |
169 $value = uc($value); | |
170 if( $value =~ /\.\./ ) { | |
171 $value = 'EXACT'; | |
172 } elsif( $value =~ /^\.$/ ) { | |
173 $value = 'WITHIN'; | |
174 } elsif( $value =~ /\^/ ) { | |
175 $value = 'BETWEEN'; | |
176 | |
177 | |
178 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [". $self->start. "] and [". $self->end. "]") | |
179 if defined $self->start && defined $self->end && ($self->end - 1 == $self->start); | |
180 | |
181 | |
182 } elsif( $value ne 'EXACT' && $value ne 'WITHIN' && | |
183 $value ne 'BETWEEN' ) { | |
184 $self->throw("Did not specify a valid location type"); | |
185 } | |
186 } | |
187 $self->{'_location_type'} = $value; | |
188 } | |
189 return $self->{'_location_type'}; | |
190 } | |
191 | |
192 =head1 LocationI methods | |
193 | |
194 =head2 length | |
195 | |
196 Title : length | |
197 Usage : $length = $fuzzy_loc->length(); | |
198 Function: Get the length of this location. | |
199 | |
200 Note that the length of a fuzzy location will always depend | |
201 on the currently active interpretation of start and end. The | |
202 result will therefore vary for different CoordinatePolicy objects. | |
203 | |
204 Returns : an integer | |
205 Args : none | |
206 | |
207 =cut | |
208 | |
209 #sub length { | |
210 # my($self) = @_; | |
211 # return $self->SUPER::length() if( !$self->start || !$self->end); | |
212 # $self->warn('Length is not valid for a FuzzyLocation'); | |
213 # return 0; | |
214 #} | |
215 | |
216 =head2 start | |
217 | |
218 Title : start | |
219 Usage : $start = $fuzzy->start(); | |
220 Function: get/set start of this range, handling fuzzy_starts | |
221 Returns : a positive integer representing the start of the location | |
222 Args : start location on set (can be fuzzy point string) | |
223 | |
224 =cut | |
225 | |
226 sub start { | |
227 my($self,$value) = @_; | |
228 if( defined $value ) { | |
229 my ($encode,$min,$max) = $self->_fuzzypointdecode($value); | |
230 $self->start_pos_type($encode); | |
231 $self->min_start($min); | |
232 $self->max_start($max); | |
233 } | |
234 | |
235 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [". $self->SUPER::start. "] and [". $self->SUPER::end. "]") | |
236 if $self->location_type eq 'BETWEEN' && defined $self->SUPER::end && ($self->SUPER::end - 1 == $self->SUPER::start); | |
237 | |
238 return $self->SUPER::start(); | |
239 } | |
240 | |
241 =head2 end | |
242 | |
243 Title : end | |
244 Usage : $end = $fuzzy->end(); | |
245 Function: get/set end of this range, handling fuzzy_ends | |
246 Returns : a positive integer representing the end of the range | |
247 Args : end location on set (can be fuzzy string) | |
248 | |
249 =cut | |
250 | |
251 sub end { | |
252 my($self,$value) = @_; | |
253 if( defined $value ) { | |
254 my ($encode,$min,$max) = $self->_fuzzypointdecode($value); | |
255 $self->end_pos_type($encode); | |
256 $self->min_end($min); | |
257 $self->max_end($max); | |
258 } | |
259 | |
260 $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [". $self->SUPER::start. "] and [". $self->SUPER::end. "]") | |
261 if $self->location_type eq 'BETWEEN' && defined $self->SUPER::start && ($self->SUPER::end - 1 == $self->SUPER::start); | |
262 | |
263 return $self->SUPER::end(); | |
264 } | |
265 | |
266 =head2 min_start | |
267 | |
268 Title : min_start | |
269 Usage : $min_start = $fuzzy->min_start(); | |
270 Function: get/set the minimum starting point | |
271 Returns : the minimum starting point from the contained sublocations | |
272 Args : integer or undef on set | |
273 | |
274 =cut | |
275 | |
276 sub min_start { | |
277 my ($self,@args) = @_; | |
278 | |
279 if(@args) { | |
280 $self->{'_min_start'} = $args[0]; # the value may be undef! | |
281 } | |
282 return $self->{'_min_start'}; | |
283 } | |
284 | |
285 =head2 max_start | |
286 | |
287 Title : max_start | |
288 Usage : my $maxstart = $location->max_start(); | |
289 Function: Get/set maximum starting location of feature startpoint | |
290 Returns : integer or undef if no maximum starting point. | |
291 Args : integer or undef on set | |
292 | |
293 =cut | |
294 | |
295 sub max_start { | |
296 my ($self,@args) = @_; | |
297 | |
298 if(@args) { | |
299 $self->{'_max_start'} = $args[0]; # the value may be undef! | |
300 } | |
301 return $self->{'_max_start'}; | |
302 } | |
303 | |
304 =head2 start_pos_type | |
305 | |
306 Title : start_pos_type | |
307 Usage : my $start_pos_type = $location->start_pos_type(); | |
308 Function: Get/set start position type. | |
309 Returns : type of position coded as text | |
310 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') | |
311 Args : a string on set | |
312 | |
313 =cut | |
314 | |
315 sub start_pos_type { | |
316 my ($self,$value) = @_; | |
317 if(defined $value && $value =~ /^\d+$/ ) { | |
318 if( $value == 0 ) { $value = 'EXACT'; } | |
319 else { | |
320 my $v = $LOCATIONCODESBSANE[$value]; | |
321 if( ! defined $v ) { | |
322 $self->warn("Provided value $value which I don't understand, reverting to 'EXACT'"); | |
323 $v = 'EXACT'; | |
324 } | |
325 $value = $v; | |
326 } | |
327 } | |
328 if(defined($value)) { | |
329 $self->{'_start_pos_type'} = $value; | |
330 } | |
331 return $self->{'_start_pos_type'}; | |
332 } | |
333 | |
334 =head2 min_end | |
335 | |
336 Title : min_end | |
337 Usage : my $minend = $location->min_end(); | |
338 Function: Get/set minimum ending location of feature endpoint | |
339 Returns : integer or undef if no minimum ending point. | |
340 Args : integer or undef on set | |
341 | |
342 =cut | |
343 | |
344 sub min_end { | |
345 my ($self,@args) = @_; | |
346 | |
347 if(@args) { | |
348 $self->{'_min_end'} = $args[0]; # the value may be undef! | |
349 } | |
350 return $self->{'_min_end'}; | |
351 } | |
352 | |
353 =head2 max_end | |
354 | |
355 Title : max_end | |
356 Usage : my $maxend = $location->max_end(); | |
357 Function: Get/set maximum ending location of feature endpoint | |
358 Returns : integer or undef if no maximum ending point. | |
359 Args : integer or undef on set | |
360 | |
361 =cut | |
362 | |
363 sub max_end { | |
364 my ($self,@args) = @_; | |
365 | |
366 if(@args) { | |
367 $self->{'_max_end'} = $args[0]; # the value may be undef! | |
368 } | |
369 return $self->{'_max_end'}; | |
370 } | |
371 | |
372 =head2 end_pos_type | |
373 | |
374 Title : end_pos_type | |
375 Usage : my $end_pos_type = $location->end_pos_type(); | |
376 Function: Get/set end position type. | |
377 Returns : type of position coded as text | |
378 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') | |
379 Args : a string on set | |
380 | |
381 =cut | |
382 | |
383 sub end_pos_type { | |
384 my ($self,$value) = @_; | |
385 if( defined $value && $value =~ /^\d+$/ ) { | |
386 if( $value == 0 ) { $value = 'EXACT'; } | |
387 else { | |
388 my $v = $LOCATIONCODESBSANE[$value]; | |
389 if( ! defined $v ) { | |
390 $self->warn("Provided value $value which I don't understand, reverting to 'EXACT'"); | |
391 $v = 'EXACT'; | |
392 } | |
393 $value = $v; | |
394 } | |
395 } | |
396 | |
397 if(defined($value)) { | |
398 $self->{'_end_pos_type'} = $value; | |
399 } | |
400 return $self->{'_end_pos_type'}; | |
401 } | |
402 | |
403 =head2 seq_id | |
404 | |
405 Title : seq_id | |
406 Usage : my $seqid = $location->seq_id(); | |
407 Function: Get/Set seq_id that location refers to | |
408 Returns : seq_id | |
409 Args : [optional] seq_id value to set | |
410 | |
411 =cut | |
412 | |
413 =head2 coordinate_policy | |
414 | |
415 Title : coordinate_policy | |
416 Usage : $policy = $location->coordinate_policy(); | |
417 $location->coordinate_policy($mypolicy); # set may not be possible | |
418 Function: Get the coordinate computing policy employed by this object. | |
419 | |
420 See Bio::Location::CoordinatePolicyI for documentation about | |
421 the policy object and its use. | |
422 | |
423 The interface *does not* require implementing classes to accept | |
424 setting of a different policy. The implementation provided here | |
425 does, however, allow to do so. | |
426 | |
427 Implementors of this interface are expected to initialize every | |
428 new instance with a CoordinatePolicyI object. The implementation | |
429 provided here will return a default policy object if none has | |
430 been set yet. To change this default policy object call this | |
431 method as a class method with an appropriate argument. Note that | |
432 in this case only subsequently created Location objects will be | |
433 affected. | |
434 | |
435 Returns : A Bio::Location::CoordinatePolicyI implementing object. | |
436 Args : On set, a Bio::Location::CoordinatePolicyI implementing object. | |
437 | |
438 =cut | |
439 | |
440 =head2 to_FTstring | |
441 | |
442 Title : to_FTstring | |
443 Usage : my $locstr = $location->to_FTstring() | |
444 Function: Get/Set seq_id that location refers to | |
445 Returns : seq_id | |
446 Args : [optional] seq_id value to set | |
447 | |
448 =cut | |
449 | |
450 sub to_FTstring { | |
451 my ($self) = @_; | |
452 my (%vals) = ( 'start' => $self->start, | |
453 'min_start' => $self->min_start, | |
454 'max_start' => $self->max_start, | |
455 'start_code' => $self->start_pos_type, | |
456 'end' => $self->end, | |
457 'min_end' => $self->min_end, | |
458 'max_end' => $self->max_end, | |
459 'end_code' => $self->end_pos_type ); | |
460 | |
461 my (%strs) = ( 'start' => '', | |
462 'end' => ''); | |
463 my ($delimiter) = $FUZZYCODES{$self->location_type}; | |
464 # I'm lazy, lets do this in a loop since behaviour will be the same for | |
465 # start and end | |
466 foreach my $point ( qw(start end) ) { | |
467 if( $vals{$point."_code"} ne 'EXACT' ) { | |
468 | |
469 if( (!defined $vals{"min_$point"} || | |
470 !defined $vals{"max_$point"}) | |
471 && ( $vals{$point."_code"} eq 'WITHIN' || | |
472 $vals{$point."_code"} eq 'BETWEEN') | |
473 ) { | |
474 $vals{"min_$point"} = '' unless defined $vals{"min_$point"}; | |
475 $vals{"max_$point"} = '' unless defined $vals{"max_$point"}; | |
476 | |
477 $self->warn("Fuzzy codes for start are in a strange state, (". | |
478 join(",", ($vals{"min_$point"}, | |
479 $vals{"max_$point"}, | |
480 $vals{$point."_code"})). ")"); | |
481 return ''; | |
482 } | |
483 if( defined $vals{$point."_code"} && | |
484 ($vals{$point."_code"} eq 'BEFORE' || | |
485 $vals{$point."_code"} eq 'AFTER') | |
486 ) { | |
487 $strs{$point} .= $FUZZYCODES{$vals{$point."_code"}}; | |
488 } | |
489 if( defined $vals{"min_$point"} ) { | |
490 $strs{$point} .= $vals{"min_$point"}; | |
491 } | |
492 if( defined $vals{$point."_code"} && | |
493 ($vals{$point."_code"} eq 'WITHIN' || | |
494 $vals{$point."_code"} eq 'BETWEEN') | |
495 ) { | |
496 $strs{$point} .= $FUZZYCODES{$vals{$point."_code"}}; | |
497 } | |
498 if( defined $vals{"max_$point"} ) { | |
499 $strs{$point} .= $vals{"max_$point"}; | |
500 } | |
501 if(($vals{$point."_code"} eq 'WITHIN') || | |
502 ($vals{$point."_code"} eq 'BETWEEN')) { | |
503 $strs{$point} = "(".$strs{$point}.")"; | |
504 } | |
505 } else { | |
506 $strs{$point} = $vals{$point}; | |
507 } | |
508 | |
509 } | |
510 my $str = $strs{'start'} . $delimiter . $strs{'end'}; | |
511 if($self->is_remote() && $self->seq_id()) { | |
512 $str = $self->seq_id() . ":" . $str; | |
513 } | |
514 if( $self->strand == -1 ) { | |
515 $str = "complement(" . $str . ")"; | |
516 } elsif($self->location_type() eq "WITHIN") { | |
517 $str = "(".$str.")"; | |
518 } | |
519 return $str; | |
520 } | |
521 | |
522 =head2 _fuzzypointdecode | |
523 | |
524 Title : _fuzzypointdecode | |
525 Usage : ($type,$min,$max) = $self->_fuzzypointdecode('<5'); | |
526 Function: Decode a fuzzy string. | |
527 Returns : A 3-element array consisting of the type of location, the | |
528 minimum integer, and the maximum integer describing the range | |
529 of coordinates this start or endpoint refers to. Minimum or | |
530 maximum coordinate may be undefined. | |
531 : Returns empty array on fail. | |
532 Args : fuzzypoint string | |
533 | |
534 =cut | |
535 | |
536 sub _fuzzypointdecode { | |
537 my ($self, $string) = @_; | |
538 return () if( !defined $string); | |
539 # strip off leading and trailing space | |
540 $string =~ s/^\s*(\S+)\s*/$1/; | |
541 foreach my $pattern ( keys %FUZZYPOINTENCODE ) { | |
542 if( $string =~ /^$pattern$/ ) { | |
543 my ($min,$max) = ($1,$2); | |
544 if($FUZZYPOINTENCODE{$pattern} eq 'EXACT') { | |
545 $max = $min; | |
546 } else { | |
547 $max = undef if(length($max) == 0); | |
548 $min = undef if(length($min) == 0); | |
549 } | |
550 return ($FUZZYPOINTENCODE{$pattern},$min,$max); | |
551 } | |
552 } | |
553 if( $self->verbose >= 1 ) { | |
554 $self->warn("could not find a valid fuzzy encoding for $string"); | |
555 } | |
556 return (); | |
557 } | |
558 | |
559 1; | |
560 |