Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/GFF/RelSegment.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 NAME | |
2 | |
3 Bio::DB::GFF::RelSegment -- Sequence segment with relative coordinate support | |
4 | |
5 =head1 SYNOPSIS | |
6 | |
7 See L<Bio::DB::GFF>. | |
8 | |
9 =head1 DESCRIPTION | |
10 | |
11 Bio::DB::GFF::RelSegment is a stretch of sequence that can handle | |
12 relative coordinate addressing. It inherits from | |
13 Bio::DB::GFF::Segment, and is the base class for | |
14 Bio::DB::GFF::Feature. | |
15 | |
16 In addition to the source sequence, a relative segment has a | |
17 "reference sequence", which is used as the basis for its coordinate | |
18 system. The reference sequence can be changed at will, allowing you | |
19 freedom to change the "frame of reference" for features contained | |
20 within the segment. For example, by setting a segment's reference | |
21 sequence to the beginning of a gene, you can view all other features | |
22 in gene-relative coordinates. | |
23 | |
24 The reference sequence and the source sequence must be on the same | |
25 physical stretch of DNA, naturally. However, they do not have to be | |
26 on the same strand. The strandedness of the reference sequence | |
27 determines whether coordinates increase to the right or the left. | |
28 | |
29 Generally, you will not create or manipulate Bio::DB::GFF::RelSeg0ment | |
30 objects directly, but use those that are returned by the Bio::DB::GFF | |
31 module. | |
32 | |
33 =head2 An Example | |
34 | |
35 To understand how relative coordinates work, consider the following | |
36 example from the C. elegans database. First we create the appropriate | |
37 GFF accessor object (the factory): | |
38 | |
39 my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:elegans', | |
40 -adaptor=>'dbi:mysqlopt'); | |
41 | |
42 Now we fetch out a segment based on cosmid clone ZK909: | |
43 | |
44 my $seg = $db->segment('ZK909'); | |
45 | |
46 If we call the segment's refseq() method, we see that the base of the | |
47 coordinate system is the sequence "ZK154", and that its start and | |
48 stop positions are 1 and the length of the cosmid: | |
49 | |
50 print $seg->refseq; | |
51 => ZK909 | |
52 | |
53 print $seg->start,' - ',$seg->stop; | |
54 => 1 - 33782 | |
55 | |
56 As a convenience, the "" operator is overloaded in this class, to give | |
57 the reference sequence, and start and stop positions: | |
58 | |
59 print $seg; | |
60 => ZK909:1,33782 | |
61 | |
62 Internally, Bio::DB::GFF::RelSegment has looked up the absolute | |
63 coordinates of this segment and maintains the source sequence and the | |
64 absolute coordinates relative to the source sequence. We can see this | |
65 information using sourceseq() (inherited from Bio::DB::GFF::Segment) | |
66 and the abs_start() and abs_end() methods: | |
67 | |
68 print $seg->sourceseq; | |
69 => CHROMOSOME_I | |
70 | |
71 print $seg->abs_start,' - ',$seg->abs_end; | |
72 => 14839545 - 14873326 | |
73 | |
74 We can also put the segment into absolute mode, so that it behaves | |
75 like Bio::DB::Segment, and always represents coordinates on the source | |
76 sequence. This is done by passing a true value to the absolute() | |
77 method: | |
78 | |
79 $seq->absolute(1); | |
80 print $seg; | |
81 => CHROMOSOME_I:14839545,14873326 | |
82 | |
83 We can change the reference sequence at any time. One way is to call | |
84 the segment's ref() method, giving it the ID (and optionally the | |
85 class) of another landmark on the genome. For example, if we know | |
86 that cosmid ZK337 is adjacent to ZK909, then we can view ZK909 in | |
87 ZK337-relative coordinates: | |
88 | |
89 $seg->refseq('ZK337'); | |
90 print $seg; | |
91 => ZK337:-33670,111 | |
92 | |
93 We can call the segment's features() method in order to get the list | |
94 of contigs that overlap this segment (in the C. elegans database, | |
95 contigs have feature type "Sequence:Link"): | |
96 | |
97 @links = $seg->features('Sequence:Link'); | |
98 | |
99 We can now set the reference sequence to the first of these contigs like so: | |
100 | |
101 $seg->refseq($links[0]); | |
102 print $seg; | |
103 => Sequence:Link(LINK_Y95D11A):3997326,4031107 | |
104 | |
105 =cut | |
106 | |
107 package Bio::DB::GFF::RelSegment; | |
108 | |
109 use strict; | |
110 | |
111 use Bio::DB::GFF::Feature; | |
112 use Bio::DB::GFF::Util::Rearrange; | |
113 use Bio::DB::GFF::Segment; | |
114 use Bio::RangeI; | |
115 | |
116 use vars qw(@ISA); | |
117 @ISA = qw(Bio::DB::GFF::Segment); | |
118 | |
119 use overload '""' => 'asString', | |
120 'bool' => sub { overload::StrVal(shift) }, | |
121 fallback=>1; | |
122 | |
123 =head1 API | |
124 | |
125 The remainder of this document describes the API for | |
126 Bio::DB::GFF::Segment. | |
127 | |
128 =cut | |
129 | |
130 =head2 new | |
131 | |
132 Title : new | |
133 Usage : $s = Bio::DB::GFF::RelSegment->new(@args) | |
134 Function: create a new relative segment | |
135 Returns : a new Bio::DB::GFF::RelSegment object | |
136 Args : see below | |
137 Status : Public | |
138 | |
139 This method creates a new Bio::DB::GFF::RelSegment object. Generally | |
140 this is called automatically by the Bio::DB::GFF module and | |
141 derivatives. | |
142 | |
143 This function uses a named-argument style: | |
144 | |
145 -factory a Bio::DB::GFF::Adaptor to use for database access | |
146 -seq ID of the source sequence | |
147 -class class of the source sequence | |
148 -start start of the desired segment relative to source sequence | |
149 -stop stop of the desired segment relative to source sequence | |
150 -ref ID of the reference sequence | |
151 -refclass class of the reference sequence | |
152 -offset 0-based offset from source sequence to start of segment | |
153 -length length of desired segment | |
154 -absolute, -force_absolute | |
155 use absolute coordinates, rather than coordinates relative | |
156 to the start of self or the reference sequence | |
157 | |
158 The -seq argument accepts the ID of any landmark in the database. The | |
159 stored source sequence becomes whatever the GFF file indicates is the | |
160 proper sequence for this landmark. A class of "Sequence" is assumed | |
161 unless otherwise specified in the -class argument. | |
162 | |
163 If the argument to -seq is a Bio::GFF::Featname object (such as | |
164 returned by the group() method), then the class is taken from that. | |
165 | |
166 The optional -start and -stop arguments specify the end points for the | |
167 retrieved segment. For those who do not like 1-based indexing, | |
168 -offset and -length are provided. If both -start/-stop and | |
169 -offset/-length are provided, the latter overrides the former. | |
170 Generally it is not a good idea to mix metaphors. | |
171 | |
172 -ref and -refclass together indicate a sequence to be used for | |
173 relative coordinates. If not provided, the source sequence indicated | |
174 by -seq is used as the reference sequence. If the argument to -ref is | |
175 a Bio::GFF::Featname object (such as returned by the group() method), | |
176 then the class is taken from that. | |
177 | |
178 -force_absolute should be used if you wish to skip the lookup of the | |
179 absolute position of the source sequence that ordinarily occurs when | |
180 you create a relative segment. In this case, the source sequence must | |
181 be a sequence that has been specified as the "source" in the GFF file. | |
182 | |
183 =cut | |
184 | |
185 # Create a new Bio::DB::GFF::RelSegment Object | |
186 # arguments are: | |
187 # -factory => factory and DBI interface | |
188 # -seq => $sequence_name | |
189 # -start => $start_relative_to_sequence | |
190 # -stop => $stop_relative_to_sequence | |
191 # -ref => $sequence which establishes coordinate system | |
192 # -offset => 0-based offset relative to sequence | |
193 # -length => length of segment | |
194 # -nocheck => turn off checking, force segment to be constructed | |
195 # -absolute => use absolute coordinate addressing | |
196 #' | |
197 sub new { | |
198 my $package = shift; | |
199 my ($factory,$name,$start,$stop,$refseq,$class,$refclass,$offset,$length,$force_absolute,$nocheck) = | |
200 rearrange([ | |
201 'FACTORY', | |
202 [qw(NAME SEQ SEQUENCE SOURCESEQ)], | |
203 [qw(START BEGIN)], | |
204 [qw(STOP END)], | |
205 [qw(REFSEQ REF REFNAME)], | |
206 [qw(CLASS SEQCLASS)], | |
207 qw(REFCLASS), | |
208 [qw(OFFSET OFF)], | |
209 [qw(LENGTH LEN)], | |
210 [qw(ABSOLUTE)], | |
211 [qw(NOCHECK FORCE)], | |
212 ],@_); | |
213 | |
214 $package = ref $package if ref $package; | |
215 $factory or $package->throw("new(): provide a -factory argument"); | |
216 | |
217 # to allow people to use segments as sources | |
218 if (ref($name) && $name->isa('Bio::DB::GFF::Segment')) { | |
219 $start = 1 unless defined $start; | |
220 $stop = $name->length unless defined $stop; | |
221 return $name->subseq($start,$stop); | |
222 } | |
223 | |
224 my @object_results; | |
225 | |
226 # support for Featname objects | |
227 if (ref($name) && $name->can('class')) { | |
228 $class = $name->class; | |
229 $name = $name->name; | |
230 } | |
231 # if the class of the landmark is not specified then default to 'Sequence' | |
232 $class ||= eval{$factory->default_class} || 'Sequence'; | |
233 | |
234 # confirm that indicated sequence is actually in the database! | |
235 my @abscoords; | |
236 | |
237 # abscoords() will now return an array ref, each element of which is | |
238 # ($absref,$absclass,$absstart,$absstop,$absstrand) | |
239 | |
240 if ($nocheck) { | |
241 $force_absolute++; | |
242 $start = 1; | |
243 } | |
244 | |
245 if ($force_absolute && defined($start)) { # absolute position is given to us | |
246 @abscoords = ([$name,$class,$start,$stop,'+']); | |
247 } else { | |
248 my $result = $factory->abscoords($name,$class,$force_absolute ? $name : ()) or return; | |
249 @abscoords = @$result; | |
250 } | |
251 | |
252 foreach (@abscoords) { | |
253 my ($absref,$absclass,$absstart,$absstop,$absstrand,$sname) = @$_; | |
254 $sname = $name unless defined $sname; | |
255 my ($this_start,$this_stop,$this_length) = ($start,$stop,$length); | |
256 | |
257 # partially fill in object | |
258 my $self = bless { factory => $factory },$package; | |
259 | |
260 $absstrand ||= '+'; | |
261 | |
262 # an explicit length overrides start and stop | |
263 if (defined $offset) { | |
264 warn "new(): bad idea to call new() with both a start and an offset" | |
265 if defined $this_start; | |
266 $this_start = $offset+1; | |
267 } | |
268 if (defined $this_length) { | |
269 warn "new(): bad idea to call new() with both a stop and a length" | |
270 if defined $this_stop; | |
271 $this_stop = $this_start + $length - 1; | |
272 } | |
273 | |
274 # this allows a SQL optimization way down deep | |
275 $self->{whole}++ if $absref eq $sname and !defined($this_start) and !defined($this_stop); | |
276 | |
277 $this_start = 1 if !defined $this_start; | |
278 $this_stop = $absstop-$absstart+1 if !defined $this_stop; | |
279 $this_length = $this_stop - $this_start + 1; | |
280 | |
281 # now offset to correct subsegment based on desired start and stop | |
282 if ($force_absolute) { | |
283 ($this_start,$this_stop) = ($absstart,$absstop); | |
284 $self->absolute(1); | |
285 } elsif ($absstrand eq '+') { | |
286 $this_start = $absstart + $this_start - 1; | |
287 $this_stop = $this_start + $this_length - 1; | |
288 } else { | |
289 $this_start = $absstop - ($this_start - 1); | |
290 $this_stop = $absstop - ($this_stop - 1); | |
291 } | |
292 | |
293 # handle truncation in either direction | |
294 # This only happens if the segment runs off the end of | |
295 # the reference sequence | |
296 if ($factory->strict_bounds_checking && | |
297 (($this_start < $absstart) || ($this_stop > $absstop))) { | |
298 # return empty if we are completely off the end of the ref se | |
299 next unless $this_start<=$absstop && $this_stop>=$absstart; | |
300 if (my $a = $factory->abscoords($absref,'Sequence')) { | |
301 my $refstart = $a->[0][2]; | |
302 my $refstop = $a->[0][3]; | |
303 if ($this_start < $refstart) { | |
304 $this_start = $refstart; | |
305 $self->{truncated}{start}++; | |
306 } | |
307 if ($this_stop > $refstop) { | |
308 $this_stop = $absstop; | |
309 $self->{truncated}{stop}++; | |
310 } | |
311 } | |
312 } | |
313 | |
314 @{$self}{qw(sourceseq start stop strand class)} | |
315 = ($absref,$this_start,$this_stop,$absstrand,$absclass); | |
316 | |
317 # handle reference sequence | |
318 if (defined $refseq) { | |
319 $refclass = $refseq->class if $refseq->can('class'); | |
320 $refclass ||= 'Sequence'; | |
321 my ($refref,$refstart,$refstop,$refstrand) = $factory->abscoords($refseq,$refclass); | |
322 unless ($refref eq $absref) { | |
323 $self->error("reference sequence is on $refref but source sequence is on $absref"); | |
324 return; | |
325 } | |
326 $refstart = $refstop if $refstrand eq '-'; | |
327 @{$self}{qw(ref refstart refstrand)} = ($refseq,$refstart,$refstrand); | |
328 } else { | |
329 $absstart = $absstop if $absstrand eq '-'; | |
330 @{$self}{qw(ref refstart refstrand)} = ($sname,$absstart,$absstrand); | |
331 } | |
332 push @object_results,$self; | |
333 } | |
334 | |
335 return wantarray ? @object_results : $object_results[0]; | |
336 } | |
337 | |
338 # overridden methods | |
339 # start, stop, length | |
340 sub start { | |
341 my $self = shift; | |
342 return $self->strand < 0 ? $self->{stop} : $self->{start} if $self->absolute; | |
343 $self->_abs2rel($self->{start}); | |
344 } | |
345 sub end { | |
346 my $self = shift; | |
347 return $self->strand < 0 ? $self->{start} : $self->{stop} if $self->absolute; | |
348 $self->_abs2rel($self->{stop}); | |
349 } | |
350 *stop = \&end; | |
351 | |
352 sub length { | |
353 my $self = shift; | |
354 return unless defined $self->abs_end; | |
355 abs($self->abs_end - $self->abs_start) + 1; | |
356 } | |
357 | |
358 sub abs_start { | |
359 my $self = shift; | |
360 if ($self->absolute) { | |
361 my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end); | |
362 return ($a<$b) ? $a : $b; | |
363 } | |
364 else { | |
365 return $self->SUPER::abs_start(@_); | |
366 } | |
367 } | |
368 sub abs_end { | |
369 my $self = shift; | |
370 if ($self->absolute) { | |
371 my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end); | |
372 return ($a>$b) ? $a : $b; | |
373 } | |
374 | |
375 else { | |
376 return $self->SUPER::abs_end(@_); | |
377 } | |
378 } | |
379 | |
380 =head2 refseq | |
381 | |
382 Title : refseq | |
383 Usage : $ref = $s->refseq([$newseq] [,$newseqclass]) | |
384 Function: get/set reference sequence | |
385 Returns : current reference sequence | |
386 Args : new reference sequence and class (optional) | |
387 Status : Public | |
388 | |
389 This method will get or set the reference sequence. Called with no | |
390 arguments, it returns the current reference sequence. Called with | |
391 either a sequence ID and class, a Bio::DB::GFF::Segment object (or | |
392 subclass) or a Bio::DB::GFF::Featname object, it will set the current | |
393 reference sequence and return the previous one. | |
394 | |
395 The method will generate an exception if you attempt to set the | |
396 reference sequence to a sequence that isn't contained in the database, | |
397 or one that has a different source sequence from the segment. | |
398 | |
399 =cut | |
400 | |
401 #' | |
402 sub refseq { | |
403 my $self = shift; | |
404 my $g = $self->{ref}; | |
405 if (@_) { | |
406 my ($newref,$newclass); | |
407 if (@_ == 2) { | |
408 $newclass = shift; | |
409 $newref = shift; | |
410 } else { | |
411 $newref = shift; | |
412 $newclass = 'Sequence'; | |
413 } | |
414 | |
415 defined $newref or $self->throw('refseq() called with an undef reference sequence'); | |
416 | |
417 # support for Featname objects | |
418 $newclass = $newref->class if ref($newref) && $newref->can('class'); | |
419 | |
420 # $self->throw("Cannot define a segment's reference sequence in terms of itself!") | |
421 # if ref($newref) and overload::StrVal($newref) eq overload::StrVal($self); | |
422 | |
423 my ($refsource,undef,$refstart,$refstop,$refstrand); | |
424 if ($newref->isa('Bio::DB::GFF::RelSegment')) { | |
425 ($refsource,undef,$refstart,$refstop,$refstrand) = | |
426 ($newref->sourceseq,undef,$newref->abs_start,$newref->abs_end,$newref->abs_strand >= 0 ? '+' : '-'); | |
427 } else { | |
428 my $coords = $self->factory->abscoords($newref,$newclass); | |
429 foreach (@$coords) { # find the appropriate one | |
430 ($refsource,undef,$refstart,$refstop,$refstrand) = @$_; | |
431 last if $refsource eq $self->{sourceseq}; | |
432 } | |
433 | |
434 } | |
435 $self->throw("can't set reference sequence: $newref and $self are on different sequence segments") | |
436 unless $refsource eq $self->{sourceseq}; | |
437 | |
438 @{$self}{qw(ref refstart refstrand)} = ($newref,$refstart,$refstrand); | |
439 $self->absolute(0); | |
440 } | |
441 return $self->absolute ? $self->sourceseq : $g; | |
442 } | |
443 | |
444 | |
445 =head2 abs_low | |
446 | |
447 Title : abs_low | |
448 Usage : $s->abs_low | |
449 Function: the absolute lowest coordinate of the segment | |
450 Returns : an integer | |
451 Args : none | |
452 Status : Public | |
453 | |
454 This is for GadFly compatibility, and returns the low coordinate in | |
455 absolute coordinates; | |
456 | |
457 =cut | |
458 | |
459 sub abs_low { | |
460 my $self = shift; | |
461 my ($a,$b) = ($self->abs_start,$self->abs_end); | |
462 return ($a<$b) ? $a : $b; | |
463 } | |
464 | |
465 =head2 abs_high | |
466 | |
467 Title : abs_high | |
468 Usage : $s->abs_high | |
469 Function: the absolute highest coordinate of the segment | |
470 Returns : an integer | |
471 Args : none | |
472 Status : Public | |
473 | |
474 This is for GadFly compatibility, and returns the high coordinate in | |
475 absolute coordinates; | |
476 | |
477 =cut | |
478 | |
479 sub abs_high { | |
480 my $self = shift; | |
481 my ($a,$b) = ($self->abs_start,$self->abs_end); | |
482 return ($a>$b) ? $a : $b; | |
483 } | |
484 | |
485 | |
486 =head2 asString | |
487 | |
488 Title : asString | |
489 Usage : $s->asString | |
490 Function: human-readable representation of the segment | |
491 Returns : a string | |
492 Args : none | |
493 Status : Public | |
494 | |
495 This method will return a human-readable representation of the | |
496 segment. It is the overloaded method call for the "" operator. | |
497 | |
498 Currently the format is: | |
499 | |
500 refseq:start,stop | |
501 | |
502 =cut | |
503 | |
504 sub asString { | |
505 my $self = shift; | |
506 return $self->SUPER::asString if $self->absolute; | |
507 my $label = $self->{ref}; | |
508 my $start = $self->start || ''; | |
509 my $stop = $self->stop || ''; | |
510 if (ref($label) && overload::StrVal($self) eq overload::StrVal($label->ref)) { | |
511 $label = $self->abs_ref; | |
512 $start = $self->abs_start; | |
513 $stop = $self->abs_end; | |
514 } | |
515 return "$label:$start,$stop"; | |
516 } | |
517 | |
518 sub name { shift->asString } | |
519 | |
520 =head2 absolute | |
521 | |
522 Title : absolute | |
523 Usage : $abs = $s->absolute([$abs]) | |
524 Function: get/set absolute coordinates | |
525 Returns : a boolean flag | |
526 Args : new setting for flag (optional) | |
527 Status : Public | |
528 | |
529 Called with a boolean flag, this method controls whether to display | |
530 relative coordinates (relative to the reference sequence) or absolute | |
531 coordinates (relative to the source sequence). It will return the | |
532 previous value of the setting. | |
533 | |
534 =cut | |
535 | |
536 sub absolute { | |
537 my $self = shift; | |
538 my $g = $self->{absolute}; | |
539 $self->{absolute} = shift if @_; | |
540 $g; | |
541 } | |
542 | |
543 =head2 features | |
544 | |
545 Title : features | |
546 Usage : @features = $s->features(@args) | |
547 Function: get features that overlap this segment | |
548 Returns : a list of Bio::DB::GFF::Feature objects | |
549 Args : see below | |
550 Status : Public | |
551 | |
552 This method will find all features that overlap the segment and return | |
553 a list of Bio::DB::GFF::Feature objects. The features will use | |
554 coordinates relative to the reference sequence in effect at the time | |
555 that features() was called. | |
556 | |
557 The returned list can be limited to certain types of feature by | |
558 filtering on their method and/or source. In addition, it is possible | |
559 to obtain an iterator that will step through a large number of | |
560 features sequentially. | |
561 | |
562 Arguments can be provided positionally or using the named arguments | |
563 format. In the former case, the arguments are a list of feature types | |
564 in the format "method:source". Either method or source can be | |
565 omitted, in which case the missing component is treated as a wildcard. | |
566 If no colon is present, then the type is treated as a method name. | |
567 Multiple arguments are ORed together. | |
568 | |
569 Examples: | |
570 | |
571 @f = $s->features('exon:curated'); # all curated exons | |
572 @f = $s->features('exon:curated','intron'); # curated exons and all introns | |
573 @f = $s->features('similarity:.*EST.*'); # all similarities | |
574 # having something to do | |
575 # with ESTs | |
576 | |
577 The named parameter form gives you control over a few options: | |
578 | |
579 -types an array reference to type names in the format | |
580 "method:source" | |
581 | |
582 -merge Whether to apply aggregators to the generated features (default yes) | |
583 | |
584 -rare Turn on an optimization suitable for a relatively rare feature type, | |
585 where it will be faster to filter by feature type first | |
586 and then by position, rather than vice versa. | |
587 | |
588 -attributes a hashref containing a set of attributes to match | |
589 | |
590 -iterator Whether to return an iterator across the features. | |
591 | |
592 -binsize A true value will create a set of artificial features whose | |
593 start and stop positions indicate bins of the given size, and | |
594 whose scores are the number of features in the bin. The | |
595 class and method of the feature will be set to "bin", | |
596 its source to "method:source", and its group to "bin:method:source". | |
597 This is a handy way of generating histograms of feature density. | |
598 | |
599 -merge is a boolean flag that controls whether the adaptor's | |
600 aggregators wll be applied to the features returned by this method. | |
601 | |
602 If -iterator is true, then the method returns a single scalar value | |
603 consisting of a Bio::SeqIO object. You can call next_seq() repeatedly | |
604 on this object to fetch each of the features in turn. If iterator is | |
605 false or absent, then all the features are returned as a list. | |
606 | |
607 The -attributes argument is a hashref containing one or more | |
608 attributes to match against: | |
609 | |
610 -attributes => { Gene => 'abc-1', | |
611 Note => 'confirmed' } | |
612 | |
613 Attribute matching is simple string matching, and multiple attributes | |
614 are ANDed together. | |
615 | |
616 =cut | |
617 | |
618 #' | |
619 | |
620 # return all features that overlap with this segment; | |
621 # optionally modified by a list of types to filter on | |
622 sub features { | |
623 my $self = shift; | |
624 my @args = $self->_process_feature_args(@_); | |
625 return $self->factory->overlapping_features(@args); | |
626 } | |
627 | |
628 =head2 top_SeqFeatures | |
629 | |
630 Title : top_SeqFeatures | |
631 Usage : | |
632 Function: | |
633 Example : | |
634 Returns : | |
635 Args : | |
636 | |
637 Alias for features(). Provided for Bio::SeqI compatibility. | |
638 | |
639 =cut | |
640 | |
641 =head2 all_SeqFeatures | |
642 | |
643 Title : all_SeqFeatures | |
644 Usage : | |
645 Function: | |
646 Example : | |
647 Returns : | |
648 Args : | |
649 | |
650 Alias for features(). Provided for Bio::SeqI compatibility. | |
651 | |
652 =cut | |
653 | |
654 =head2 sub_SeqFeatures | |
655 | |
656 Title : sub_SeqFeatures | |
657 Usage : | |
658 Function: | |
659 Example : | |
660 Returns : | |
661 Args : | |
662 | |
663 Alias for features(). Provided for Bio::SeqI compatibility. | |
664 | |
665 =cut | |
666 | |
667 *top_SeqFeatures = *all_SeqFeatures = \&features; | |
668 | |
669 =head2 get_feature_stream | |
670 | |
671 Title : features | |
672 Usage : $stream = $s->get_feature_stream(@args) | |
673 Function: get a stream of features that overlap this segment | |
674 Returns : a Bio::SeqIO::Stream-compliant stream | |
675 Args : see below | |
676 Status : Public | |
677 | |
678 This is the same as features(), but returns a stream. Use like this: | |
679 | |
680 $stream = $s->get_feature_stream('exon'); | |
681 while (my $exon = $stream->next_seq) { | |
682 print $exon->start,"\n"; | |
683 } | |
684 | |
685 =cut | |
686 | |
687 sub get_feature_stream { | |
688 my $self = shift; | |
689 my @args = $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1); | |
690 $self->features(@args); | |
691 } | |
692 | |
693 =head2 get_seq_stream | |
694 | |
695 Title : get_seq_stream | |
696 Usage : $stream = $s->get_seq_stream(@args) | |
697 Function: get a stream of features that overlap this segment | |
698 Returns : a Bio::SeqIO::Stream-compliant stream | |
699 Args : see below | |
700 Status : Public | |
701 | |
702 This is the same as feature_stream(), and is provided for Bioperl | |
703 compatibility. Use like this: | |
704 | |
705 $stream = $s->get_seq_stream('exon'); | |
706 while (my $exon = $stream->next_seq) { | |
707 print $exon->start,"\n"; | |
708 } | |
709 | |
710 =cut | |
711 | |
712 *get_seq_stream = \&get_feature_stream; | |
713 | |
714 | |
715 =head2 overlapping_features | |
716 | |
717 Title : overlapping_features | |
718 Usage : @features = $s->overlapping_features(@args) | |
719 Function: get features that overlap this segment | |
720 Returns : a list of Bio::DB::GFF::Feature objects | |
721 Args : see features() | |
722 Status : Public | |
723 | |
724 This is an alias for the features() method, and takes the same | |
725 arguments. | |
726 | |
727 =cut | |
728 | |
729 *overlapping_features = \&features; | |
730 | |
731 =head2 contained_features | |
732 | |
733 Title : contained_features | |
734 Usage : @features = $s->contained_features(@args) | |
735 Function: get features that are contained by this segment | |
736 Returns : a list of Bio::DB::GFF::Feature objects | |
737 Args : see features() | |
738 Status : Public | |
739 | |
740 This is identical in behavior to features() except that it returns | |
741 only those features that are completely contained within the segment, | |
742 rather than any that overlap. | |
743 | |
744 =cut | |
745 | |
746 # return all features completely contained within this segment | |
747 sub contained_features { | |
748 my $self = shift; | |
749 local $self->{whole} = 0; | |
750 my @args = $self->_process_feature_args(@_); | |
751 return $self->factory->contained_features(@args); | |
752 } | |
753 | |
754 # *contains = \&contained_features; | |
755 | |
756 =head2 contained_in | |
757 | |
758 Title : contained_in | |
759 Usage : @features = $s->contained_in(@args) | |
760 Function: get features that contain this segment | |
761 Returns : a list of Bio::DB::GFF::Feature objects | |
762 Args : see features() | |
763 Status : Public | |
764 | |
765 This is identical in behavior to features() except that it returns | |
766 only those features that completely contain the segment. | |
767 | |
768 =cut | |
769 | |
770 # return all features completely contained within this segment | |
771 sub contained_in { | |
772 my $self = shift; | |
773 local $self->{whole} = 0; | |
774 my @args = $self->_process_feature_args(@_); | |
775 return $self->factory->contained_in(@args); | |
776 } | |
777 | |
778 =head2 _process_feature_args | |
779 | |
780 Title : _process_feature_args | |
781 Usage : @args = $s->_process_feature_args(@args) | |
782 Function: preprocess arguments passed to features, | |
783 contained_features, and overlapping_features | |
784 Returns : a list of parsed arguents | |
785 Args : see feature() | |
786 Status : Internal | |
787 | |
788 This is an internal method that is used to check and format the | |
789 arguments to features() before passing them on to the adaptor. | |
790 | |
791 =cut | |
792 | |
793 sub _process_feature_args { | |
794 my $self = shift; | |
795 | |
796 my ($ref,$class,$start,$stop,$strand,$whole) | |
797 = @{$self}{qw(sourceseq class start stop strand whole)}; | |
798 | |
799 ($start,$stop) = ($stop,$start) if $strand eq '-'; | |
800 | |
801 my @args = (-ref=>$ref,-class=>$class); | |
802 | |
803 # indicating that we are fetching the whole segment allows certain | |
804 # SQL optimizations. | |
805 push @args,(-start=>$start,-stop=>$stop) unless $whole; | |
806 | |
807 if (@_) { | |
808 if ($_[0] =~ /^-/) { | |
809 push @args,@_; | |
810 } else { | |
811 my @types = @_; | |
812 push @args,-types=>\@types; | |
813 } | |
814 } | |
815 push @args,-parent=>$self; | |
816 @args; | |
817 } | |
818 | |
819 =head2 types | |
820 | |
821 Title : types | |
822 Usage : @types = $s->types([-enumerate=>1]) | |
823 Function: list feature types that overlap this segment | |
824 Returns : a list of Bio::DB::GFF::Typename objects or a hash | |
825 Args : see below | |
826 Status : Public | |
827 | |
828 The types() method will return a list of Bio::DB::GFF::Typename | |
829 objects, each corresponding to a feature that overlaps the segment. | |
830 If the optional -enumerate parameter is set to a true value, then the | |
831 method will return a hash in which the keys are the type names and the | |
832 values are the number of times a feature of that type is present on | |
833 the segment. For example: | |
834 | |
835 %count = $s->types(-enumerate=>1); | |
836 | |
837 =cut | |
838 | |
839 # wrapper for lower-level types() call. | |
840 sub types { | |
841 my $self = shift; | |
842 my ($ref,$class,$start,$stop,$strand) = @{$self}{qw(sourceseq class start stop strand)}; | |
843 ($start,$stop) = ($stop,$start) if $strand eq '-'; | |
844 | |
845 my @args; | |
846 if (@_ && $_[0] !~ /^-/) { | |
847 @args = (-type => \@_) | |
848 } else { | |
849 @args = @_; | |
850 } | |
851 $self->factory->types(-ref => $ref, | |
852 -class => $class, | |
853 -start=> $start, | |
854 -stop => $stop, | |
855 @args); | |
856 } | |
857 | |
858 =head1 Internal Methods | |
859 | |
860 The following are internal methods and should not be called directly. | |
861 | |
862 =head2 new_from_segment | |
863 | |
864 Title : new_from_segment | |
865 Usage : $s = $segment->new_from_segment(@args) | |
866 Function: create a new relative segment | |
867 Returns : a new Bio::DB::GFF::RelSegment object | |
868 Args : see below | |
869 Status : Internal | |
870 | |
871 This constructor is used internally by the subseq() method. It forces | |
872 the new segment into the Bio::DB::GFF::RelSegment package, regardless | |
873 of the package that it is called from. This causes subclass-specfic | |
874 information, such as feature types, to be dropped when a subsequence | |
875 is created. | |
876 | |
877 =cut | |
878 | |
879 sub new_from_segment { | |
880 my $package = shift; | |
881 $package = ref $package if ref $package; | |
882 my $segment = shift; | |
883 my $new = {}; | |
884 @{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)} | |
885 = @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)}; | |
886 return bless $new,__PACKAGE__; | |
887 } | |
888 | |
889 =head2 _abs2rel | |
890 | |
891 Title : _abs2rel | |
892 Usage : @coords = $s->_abs2rel(@coords) | |
893 Function: convert absolute coordinates into relative coordinates | |
894 Returns : a list of relative coordinates | |
895 Args : a list of absolute coordinates | |
896 Status : Internal | |
897 | |
898 This is used internally to map from absolute to relative | |
899 coordinates. It does not take the offset of the reference sequence | |
900 into account, so please use abs2rel() instead. | |
901 | |
902 =cut | |
903 | |
904 sub _abs2rel { | |
905 my $self = shift; | |
906 my @result; | |
907 return unless defined $_[0]; | |
908 | |
909 if ($self->absolute) { | |
910 @result = @_; | |
911 } else { | |
912 my ($refstart,$refstrand) = @{$self}{qw(refstart refstrand)}; | |
913 @result = defined($refstrand) && $refstrand eq '-' ? map { $refstart - $_ + 1 } @_ | |
914 : map { $_ - $refstart + 1 } @_; | |
915 } | |
916 # if called with a single argument, caller will expect a single scalar reply | |
917 # not the size of the returned array! | |
918 return $result[0] if @result == 1 and !wantarray; | |
919 @result; | |
920 } | |
921 | |
922 =head2 rel2abs | |
923 | |
924 Title : rel2abs | |
925 Usage : @coords = $s->rel2abs(@coords) | |
926 Function: convert relative coordinates into absolute coordinates | |
927 Returns : a list of absolute coordinates | |
928 Args : a list of relative coordinates | |
929 Status : Public | |
930 | |
931 This function takes a list of positions in relative coordinates to the | |
932 segment, and converts them into absolute coordinates. | |
933 | |
934 =cut | |
935 | |
936 sub rel2abs { | |
937 my $self = shift; | |
938 my @result; | |
939 | |
940 if ($self->absolute) { | |
941 @result = @_; | |
942 } else { | |
943 my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand); | |
944 @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_ | |
945 : map { $_ + $abs_start - 1 } @_; | |
946 } | |
947 # if called with a single argument, caller will expect a single scalar reply | |
948 # not the size of the returned array! | |
949 return $result[0] if @result == 1 and !wantarray; | |
950 @result; | |
951 } | |
952 | |
953 =head2 abs2rel | |
954 | |
955 Title : abs2rel | |
956 Usage : @rel_coords = $s-abs2rel(@abs_coords) | |
957 Function: convert absolute coordinates into relative coordinates | |
958 Returns : a list of relative coordinates | |
959 Args : a list of absolutee coordinates | |
960 Status : Public | |
961 | |
962 This function takes a list of positions in absolute coordinates | |
963 and returns a list expressed in relative coordinates. | |
964 | |
965 =cut | |
966 | |
967 sub abs2rel { | |
968 my $self = shift; | |
969 my @result; | |
970 | |
971 if ($self->absolute) { | |
972 @result = @_; | |
973 } else { | |
974 my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand); | |
975 @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_ | |
976 : map { $_ - $abs_start + 1 } @_; | |
977 } | |
978 # if called with a single argument, caller will expect a single scalar reply | |
979 # not the size of the returned array! | |
980 return $result[0] if @result == 1 and !wantarray; | |
981 @result; | |
982 } | |
983 | |
984 sub subseq { | |
985 my $self = shift; | |
986 my $obj = $self->SUPER::subseq(@_); | |
987 bless $obj,__PACKAGE__; # always bless into the generic RelSegment package | |
988 } | |
989 | |
990 sub strand { | |
991 my $self = shift; | |
992 if ($self->absolute) { | |
993 return _to_strand($self->{strand}); | |
994 } | |
995 return $self->stop <=> $self->start; | |
996 } | |
997 | |
998 sub _to_strand { | |
999 my $s = shift; | |
1000 return -1 if $s eq '-'; | |
1001 return +1 if $s eq '+'; | |
1002 return 0; | |
1003 } | |
1004 | |
1005 =head2 Bio::RangeI Methods | |
1006 | |
1007 The following Bio::RangeI methods are supported: | |
1008 | |
1009 overlaps(), contains(), equals(),intersection(),union(),overlap_extent() | |
1010 | |
1011 =cut | |
1012 | |
1013 sub intersection { | |
1014 my $self = shift; | |
1015 my (@ranges) = @_; | |
1016 unshift @ranges,$self if ref $self; | |
1017 $ranges[0]->isa('Bio::DB::GFF::RelSegment') | |
1018 or return $self->SUPER::intersection(@_); | |
1019 | |
1020 my $ref = $ranges[0]->abs_ref; | |
1021 my ($low,$high); | |
1022 foreach (@ranges) { | |
1023 return unless $_->can('abs_ref'); | |
1024 $ref eq $_->abs_ref or return; | |
1025 $low = $_->abs_low if !defined($low) or $low < $_->abs_low; | |
1026 $high = $_->abs_high if !defined($high) or $high > $_->abs_high; | |
1027 } | |
1028 return unless $low < $high; | |
1029 $self->new(-factory=> $self->factory, | |
1030 -seq => $ref, | |
1031 -start => $low, | |
1032 -stop => $high); | |
1033 } | |
1034 | |
1035 sub overlaps { | |
1036 my $self = shift; | |
1037 my($other,$so) = @_; | |
1038 return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment'); | |
1039 return if $self->abs_ref ne $other->abs_ref; | |
1040 return if $self->abs_low > $other->abs_high; | |
1041 return if $self->abs_high < $other->abs_low; | |
1042 1; | |
1043 } | |
1044 | |
1045 sub contains { | |
1046 my $self = shift; | |
1047 my($other,$so) = @_; | |
1048 return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment'); | |
1049 return if $self->abs_ref ne $other->abs_ref; | |
1050 return unless $self->abs_low <= $other->abs_low; | |
1051 return unless $self->abs_high >= $other->abs_high; | |
1052 1; | |
1053 } | |
1054 | |
1055 sub union { | |
1056 my $self = shift; | |
1057 my (@ranges) = @_; | |
1058 unshift @ranges,$self if ref $self; | |
1059 $ranges[0]->isa('Bio::DB::GFF::RelSegment') | |
1060 or return $self->SUPER::union(@_); | |
1061 | |
1062 my $ref = $ranges[0]->abs_ref; | |
1063 my ($low,$high); | |
1064 foreach (@ranges) { | |
1065 return unless $_->can('abs_ref'); | |
1066 $ref eq $_->abs_ref or return; | |
1067 $low = $_->abs_low if !defined($low) or $low > $_->abs_low; | |
1068 $high = $_->abs_high if !defined($high) or $high < $_->abs_high; | |
1069 } | |
1070 $self->new(-factory=> $self->factory, | |
1071 -seq => $ref, | |
1072 -start => $low, | |
1073 -stop => $high); | |
1074 } | |
1075 | |
1076 | |
1077 1; | |
1078 | |
1079 __END__ | |
1080 | |
1081 =head1 BUGS | |
1082 | |
1083 Schemas need some work. | |
1084 | |
1085 =head1 SEE ALSO | |
1086 | |
1087 L<bioperl> | |
1088 | |
1089 =head1 AUTHOR | |
1090 | |
1091 Lincoln Stein E<lt>lstein@cshl.orgE<gt>. | |
1092 | |
1093 Copyright (c) 2001 Cold Spring Harbor Laboratory. | |
1094 | |
1095 This library is free software; you can redistribute it and/or modify | |
1096 it under the same terms as Perl itself. | |
1097 | |
1098 =cut | |
1099 |