0
|
1 =head1 NAME
|
|
2
|
|
3 Bio::DB::GFF::RelSegment -- Sequence segment with relative coordinate support
|
|
4
|
|
5 =head1 SYNOPSIS
|
|
6
|
|
7 See L<Bio::DB::GFF>.
|
|
8
|
|
9 =head1 DESCRIPTION
|
|
10
|
|
11 Bio::DB::GFF::RelSegment is a stretch of sequence that can handle
|
|
12 relative coordinate addressing. It inherits from
|
|
13 Bio::DB::GFF::Segment, and is the base class for
|
|
14 Bio::DB::GFF::Feature.
|
|
15
|
|
16 In addition to the source sequence, a relative segment has a
|
|
17 "reference sequence", which is used as the basis for its coordinate
|
|
18 system. The reference sequence can be changed at will, allowing you
|
|
19 freedom to change the "frame of reference" for features contained
|
|
20 within the segment. For example, by setting a segment's reference
|
|
21 sequence to the beginning of a gene, you can view all other features
|
|
22 in gene-relative coordinates.
|
|
23
|
|
24 The reference sequence and the source sequence must be on the same
|
|
25 physical stretch of DNA, naturally. However, they do not have to be
|
|
26 on the same strand. The strandedness of the reference sequence
|
|
27 determines whether coordinates increase to the right or the left.
|
|
28
|
|
29 Generally, you will not create or manipulate Bio::DB::GFF::RelSeg0ment
|
|
30 objects directly, but use those that are returned by the Bio::DB::GFF
|
|
31 module.
|
|
32
|
|
33 =head2 An Example
|
|
34
|
|
35 To understand how relative coordinates work, consider the following
|
|
36 example from the C. elegans database. First we create the appropriate
|
|
37 GFF accessor object (the factory):
|
|
38
|
|
39 my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:elegans',
|
|
40 -adaptor=>'dbi:mysqlopt');
|
|
41
|
|
42 Now we fetch out a segment based on cosmid clone ZK909:
|
|
43
|
|
44 my $seg = $db->segment('ZK909');
|
|
45
|
|
46 If we call the segment's refseq() method, we see that the base of the
|
|
47 coordinate system is the sequence "ZK154", and that its start and
|
|
48 stop positions are 1 and the length of the cosmid:
|
|
49
|
|
50 print $seg->refseq;
|
|
51 => ZK909
|
|
52
|
|
53 print $seg->start,' - ',$seg->stop;
|
|
54 => 1 - 33782
|
|
55
|
|
56 As a convenience, the "" operator is overloaded in this class, to give
|
|
57 the reference sequence, and start and stop positions:
|
|
58
|
|
59 print $seg;
|
|
60 => ZK909:1,33782
|
|
61
|
|
62 Internally, Bio::DB::GFF::RelSegment has looked up the absolute
|
|
63 coordinates of this segment and maintains the source sequence and the
|
|
64 absolute coordinates relative to the source sequence. We can see this
|
|
65 information using sourceseq() (inherited from Bio::DB::GFF::Segment)
|
|
66 and the abs_start() and abs_end() methods:
|
|
67
|
|
68 print $seg->sourceseq;
|
|
69 => CHROMOSOME_I
|
|
70
|
|
71 print $seg->abs_start,' - ',$seg->abs_end;
|
|
72 => 14839545 - 14873326
|
|
73
|
|
74 We can also put the segment into absolute mode, so that it behaves
|
|
75 like Bio::DB::Segment, and always represents coordinates on the source
|
|
76 sequence. This is done by passing a true value to the absolute()
|
|
77 method:
|
|
78
|
|
79 $seq->absolute(1);
|
|
80 print $seg;
|
|
81 => CHROMOSOME_I:14839545,14873326
|
|
82
|
|
83 We can change the reference sequence at any time. One way is to call
|
|
84 the segment's ref() method, giving it the ID (and optionally the
|
|
85 class) of another landmark on the genome. For example, if we know
|
|
86 that cosmid ZK337 is adjacent to ZK909, then we can view ZK909 in
|
|
87 ZK337-relative coordinates:
|
|
88
|
|
89 $seg->refseq('ZK337');
|
|
90 print $seg;
|
|
91 => ZK337:-33670,111
|
|
92
|
|
93 We can call the segment's features() method in order to get the list
|
|
94 of contigs that overlap this segment (in the C. elegans database,
|
|
95 contigs have feature type "Sequence:Link"):
|
|
96
|
|
97 @links = $seg->features('Sequence:Link');
|
|
98
|
|
99 We can now set the reference sequence to the first of these contigs like so:
|
|
100
|
|
101 $seg->refseq($links[0]);
|
|
102 print $seg;
|
|
103 => Sequence:Link(LINK_Y95D11A):3997326,4031107
|
|
104
|
|
105 =cut
|
|
106
|
|
107 package Bio::DB::GFF::RelSegment;
|
|
108
|
|
109 use strict;
|
|
110
|
|
111 use Bio::DB::GFF::Feature;
|
|
112 use Bio::DB::GFF::Util::Rearrange;
|
|
113 use Bio::DB::GFF::Segment;
|
|
114 use Bio::RangeI;
|
|
115
|
|
116 use vars qw(@ISA);
|
|
117 @ISA = qw(Bio::DB::GFF::Segment);
|
|
118
|
|
119 use overload '""' => 'asString',
|
|
120 'bool' => sub { overload::StrVal(shift) },
|
|
121 fallback=>1;
|
|
122
|
|
123 =head1 API
|
|
124
|
|
125 The remainder of this document describes the API for
|
|
126 Bio::DB::GFF::Segment.
|
|
127
|
|
128 =cut
|
|
129
|
|
130 =head2 new
|
|
131
|
|
132 Title : new
|
|
133 Usage : $s = Bio::DB::GFF::RelSegment->new(@args)
|
|
134 Function: create a new relative segment
|
|
135 Returns : a new Bio::DB::GFF::RelSegment object
|
|
136 Args : see below
|
|
137 Status : Public
|
|
138
|
|
139 This method creates a new Bio::DB::GFF::RelSegment object. Generally
|
|
140 this is called automatically by the Bio::DB::GFF module and
|
|
141 derivatives.
|
|
142
|
|
143 This function uses a named-argument style:
|
|
144
|
|
145 -factory a Bio::DB::GFF::Adaptor to use for database access
|
|
146 -seq ID of the source sequence
|
|
147 -class class of the source sequence
|
|
148 -start start of the desired segment relative to source sequence
|
|
149 -stop stop of the desired segment relative to source sequence
|
|
150 -ref ID of the reference sequence
|
|
151 -refclass class of the reference sequence
|
|
152 -offset 0-based offset from source sequence to start of segment
|
|
153 -length length of desired segment
|
|
154 -absolute, -force_absolute
|
|
155 use absolute coordinates, rather than coordinates relative
|
|
156 to the start of self or the reference sequence
|
|
157
|
|
158 The -seq argument accepts the ID of any landmark in the database. The
|
|
159 stored source sequence becomes whatever the GFF file indicates is the
|
|
160 proper sequence for this landmark. A class of "Sequence" is assumed
|
|
161 unless otherwise specified in the -class argument.
|
|
162
|
|
163 If the argument to -seq is a Bio::GFF::Featname object (such as
|
|
164 returned by the group() method), then the class is taken from that.
|
|
165
|
|
166 The optional -start and -stop arguments specify the end points for the
|
|
167 retrieved segment. For those who do not like 1-based indexing,
|
|
168 -offset and -length are provided. If both -start/-stop and
|
|
169 -offset/-length are provided, the latter overrides the former.
|
|
170 Generally it is not a good idea to mix metaphors.
|
|
171
|
|
172 -ref and -refclass together indicate a sequence to be used for
|
|
173 relative coordinates. If not provided, the source sequence indicated
|
|
174 by -seq is used as the reference sequence. If the argument to -ref is
|
|
175 a Bio::GFF::Featname object (such as returned by the group() method),
|
|
176 then the class is taken from that.
|
|
177
|
|
178 -force_absolute should be used if you wish to skip the lookup of the
|
|
179 absolute position of the source sequence that ordinarily occurs when
|
|
180 you create a relative segment. In this case, the source sequence must
|
|
181 be a sequence that has been specified as the "source" in the GFF file.
|
|
182
|
|
183 =cut
|
|
184
|
|
185 # Create a new Bio::DB::GFF::RelSegment Object
|
|
186 # arguments are:
|
|
187 # -factory => factory and DBI interface
|
|
188 # -seq => $sequence_name
|
|
189 # -start => $start_relative_to_sequence
|
|
190 # -stop => $stop_relative_to_sequence
|
|
191 # -ref => $sequence which establishes coordinate system
|
|
192 # -offset => 0-based offset relative to sequence
|
|
193 # -length => length of segment
|
|
194 # -nocheck => turn off checking, force segment to be constructed
|
|
195 # -absolute => use absolute coordinate addressing
|
|
196 #'
|
|
197 sub new {
|
|
198 my $package = shift;
|
|
199 my ($factory,$name,$start,$stop,$refseq,$class,$refclass,$offset,$length,$force_absolute,$nocheck) =
|
|
200 rearrange([
|
|
201 'FACTORY',
|
|
202 [qw(NAME SEQ SEQUENCE SOURCESEQ)],
|
|
203 [qw(START BEGIN)],
|
|
204 [qw(STOP END)],
|
|
205 [qw(REFSEQ REF REFNAME)],
|
|
206 [qw(CLASS SEQCLASS)],
|
|
207 qw(REFCLASS),
|
|
208 [qw(OFFSET OFF)],
|
|
209 [qw(LENGTH LEN)],
|
|
210 [qw(ABSOLUTE)],
|
|
211 [qw(NOCHECK FORCE)],
|
|
212 ],@_);
|
|
213
|
|
214 $package = ref $package if ref $package;
|
|
215 $factory or $package->throw("new(): provide a -factory argument");
|
|
216
|
|
217 # to allow people to use segments as sources
|
|
218 if (ref($name) && $name->isa('Bio::DB::GFF::Segment')) {
|
|
219 $start = 1 unless defined $start;
|
|
220 $stop = $name->length unless defined $stop;
|
|
221 return $name->subseq($start,$stop);
|
|
222 }
|
|
223
|
|
224 my @object_results;
|
|
225
|
|
226 # support for Featname objects
|
|
227 if (ref($name) && $name->can('class')) {
|
|
228 $class = $name->class;
|
|
229 $name = $name->name;
|
|
230 }
|
|
231 # if the class of the landmark is not specified then default to 'Sequence'
|
|
232 $class ||= eval{$factory->default_class} || 'Sequence';
|
|
233
|
|
234 # confirm that indicated sequence is actually in the database!
|
|
235 my @abscoords;
|
|
236
|
|
237 # abscoords() will now return an array ref, each element of which is
|
|
238 # ($absref,$absclass,$absstart,$absstop,$absstrand)
|
|
239
|
|
240 if ($nocheck) {
|
|
241 $force_absolute++;
|
|
242 $start = 1;
|
|
243 }
|
|
244
|
|
245 if ($force_absolute && defined($start)) { # absolute position is given to us
|
|
246 @abscoords = ([$name,$class,$start,$stop,'+']);
|
|
247 } else {
|
|
248 my $result = $factory->abscoords($name,$class,$force_absolute ? $name : ()) or return;
|
|
249 @abscoords = @$result;
|
|
250 }
|
|
251
|
|
252 foreach (@abscoords) {
|
|
253 my ($absref,$absclass,$absstart,$absstop,$absstrand,$sname) = @$_;
|
|
254 $sname = $name unless defined $sname;
|
|
255 my ($this_start,$this_stop,$this_length) = ($start,$stop,$length);
|
|
256
|
|
257 # partially fill in object
|
|
258 my $self = bless { factory => $factory },$package;
|
|
259
|
|
260 $absstrand ||= '+';
|
|
261
|
|
262 # an explicit length overrides start and stop
|
|
263 if (defined $offset) {
|
|
264 warn "new(): bad idea to call new() with both a start and an offset"
|
|
265 if defined $this_start;
|
|
266 $this_start = $offset+1;
|
|
267 }
|
|
268 if (defined $this_length) {
|
|
269 warn "new(): bad idea to call new() with both a stop and a length"
|
|
270 if defined $this_stop;
|
|
271 $this_stop = $this_start + $length - 1;
|
|
272 }
|
|
273
|
|
274 # this allows a SQL optimization way down deep
|
|
275 $self->{whole}++ if $absref eq $sname and !defined($this_start) and !defined($this_stop);
|
|
276
|
|
277 $this_start = 1 if !defined $this_start;
|
|
278 $this_stop = $absstop-$absstart+1 if !defined $this_stop;
|
|
279 $this_length = $this_stop - $this_start + 1;
|
|
280
|
|
281 # now offset to correct subsegment based on desired start and stop
|
|
282 if ($force_absolute) {
|
|
283 ($this_start,$this_stop) = ($absstart,$absstop);
|
|
284 $self->absolute(1);
|
|
285 } elsif ($absstrand eq '+') {
|
|
286 $this_start = $absstart + $this_start - 1;
|
|
287 $this_stop = $this_start + $this_length - 1;
|
|
288 } else {
|
|
289 $this_start = $absstop - ($this_start - 1);
|
|
290 $this_stop = $absstop - ($this_stop - 1);
|
|
291 }
|
|
292
|
|
293 # handle truncation in either direction
|
|
294 # This only happens if the segment runs off the end of
|
|
295 # the reference sequence
|
|
296 if ($factory->strict_bounds_checking &&
|
|
297 (($this_start < $absstart) || ($this_stop > $absstop))) {
|
|
298 # return empty if we are completely off the end of the ref se
|
|
299 next unless $this_start<=$absstop && $this_stop>=$absstart;
|
|
300 if (my $a = $factory->abscoords($absref,'Sequence')) {
|
|
301 my $refstart = $a->[0][2];
|
|
302 my $refstop = $a->[0][3];
|
|
303 if ($this_start < $refstart) {
|
|
304 $this_start = $refstart;
|
|
305 $self->{truncated}{start}++;
|
|
306 }
|
|
307 if ($this_stop > $refstop) {
|
|
308 $this_stop = $absstop;
|
|
309 $self->{truncated}{stop}++;
|
|
310 }
|
|
311 }
|
|
312 }
|
|
313
|
|
314 @{$self}{qw(sourceseq start stop strand class)}
|
|
315 = ($absref,$this_start,$this_stop,$absstrand,$absclass);
|
|
316
|
|
317 # handle reference sequence
|
|
318 if (defined $refseq) {
|
|
319 $refclass = $refseq->class if $refseq->can('class');
|
|
320 $refclass ||= 'Sequence';
|
|
321 my ($refref,$refstart,$refstop,$refstrand) = $factory->abscoords($refseq,$refclass);
|
|
322 unless ($refref eq $absref) {
|
|
323 $self->error("reference sequence is on $refref but source sequence is on $absref");
|
|
324 return;
|
|
325 }
|
|
326 $refstart = $refstop if $refstrand eq '-';
|
|
327 @{$self}{qw(ref refstart refstrand)} = ($refseq,$refstart,$refstrand);
|
|
328 } else {
|
|
329 $absstart = $absstop if $absstrand eq '-';
|
|
330 @{$self}{qw(ref refstart refstrand)} = ($sname,$absstart,$absstrand);
|
|
331 }
|
|
332 push @object_results,$self;
|
|
333 }
|
|
334
|
|
335 return wantarray ? @object_results : $object_results[0];
|
|
336 }
|
|
337
|
|
338 # overridden methods
|
|
339 # start, stop, length
|
|
340 sub start {
|
|
341 my $self = shift;
|
|
342 return $self->strand < 0 ? $self->{stop} : $self->{start} if $self->absolute;
|
|
343 $self->_abs2rel($self->{start});
|
|
344 }
|
|
345 sub end {
|
|
346 my $self = shift;
|
|
347 return $self->strand < 0 ? $self->{start} : $self->{stop} if $self->absolute;
|
|
348 $self->_abs2rel($self->{stop});
|
|
349 }
|
|
350 *stop = \&end;
|
|
351
|
|
352 sub length {
|
|
353 my $self = shift;
|
|
354 return unless defined $self->abs_end;
|
|
355 abs($self->abs_end - $self->abs_start) + 1;
|
|
356 }
|
|
357
|
|
358 sub abs_start {
|
|
359 my $self = shift;
|
|
360 if ($self->absolute) {
|
|
361 my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
|
|
362 return ($a<$b) ? $a : $b;
|
|
363 }
|
|
364 else {
|
|
365 return $self->SUPER::abs_start(@_);
|
|
366 }
|
|
367 }
|
|
368 sub abs_end {
|
|
369 my $self = shift;
|
|
370 if ($self->absolute) {
|
|
371 my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
|
|
372 return ($a>$b) ? $a : $b;
|
|
373 }
|
|
374
|
|
375 else {
|
|
376 return $self->SUPER::abs_end(@_);
|
|
377 }
|
|
378 }
|
|
379
|
|
380 =head2 refseq
|
|
381
|
|
382 Title : refseq
|
|
383 Usage : $ref = $s->refseq([$newseq] [,$newseqclass])
|
|
384 Function: get/set reference sequence
|
|
385 Returns : current reference sequence
|
|
386 Args : new reference sequence and class (optional)
|
|
387 Status : Public
|
|
388
|
|
389 This method will get or set the reference sequence. Called with no
|
|
390 arguments, it returns the current reference sequence. Called with
|
|
391 either a sequence ID and class, a Bio::DB::GFF::Segment object (or
|
|
392 subclass) or a Bio::DB::GFF::Featname object, it will set the current
|
|
393 reference sequence and return the previous one.
|
|
394
|
|
395 The method will generate an exception if you attempt to set the
|
|
396 reference sequence to a sequence that isn't contained in the database,
|
|
397 or one that has a different source sequence from the segment.
|
|
398
|
|
399 =cut
|
|
400
|
|
401 #'
|
|
402 sub refseq {
|
|
403 my $self = shift;
|
|
404 my $g = $self->{ref};
|
|
405 if (@_) {
|
|
406 my ($newref,$newclass);
|
|
407 if (@_ == 2) {
|
|
408 $newclass = shift;
|
|
409 $newref = shift;
|
|
410 } else {
|
|
411 $newref = shift;
|
|
412 $newclass = 'Sequence';
|
|
413 }
|
|
414
|
|
415 defined $newref or $self->throw('refseq() called with an undef reference sequence');
|
|
416
|
|
417 # support for Featname objects
|
|
418 $newclass = $newref->class if ref($newref) && $newref->can('class');
|
|
419
|
|
420 # $self->throw("Cannot define a segment's reference sequence in terms of itself!")
|
|
421 # if ref($newref) and overload::StrVal($newref) eq overload::StrVal($self);
|
|
422
|
|
423 my ($refsource,undef,$refstart,$refstop,$refstrand);
|
|
424 if ($newref->isa('Bio::DB::GFF::RelSegment')) {
|
|
425 ($refsource,undef,$refstart,$refstop,$refstrand) =
|
|
426 ($newref->sourceseq,undef,$newref->abs_start,$newref->abs_end,$newref->abs_strand >= 0 ? '+' : '-');
|
|
427 } else {
|
|
428 my $coords = $self->factory->abscoords($newref,$newclass);
|
|
429 foreach (@$coords) { # find the appropriate one
|
|
430 ($refsource,undef,$refstart,$refstop,$refstrand) = @$_;
|
|
431 last if $refsource eq $self->{sourceseq};
|
|
432 }
|
|
433
|
|
434 }
|
|
435 $self->throw("can't set reference sequence: $newref and $self are on different sequence segments")
|
|
436 unless $refsource eq $self->{sourceseq};
|
|
437
|
|
438 @{$self}{qw(ref refstart refstrand)} = ($newref,$refstart,$refstrand);
|
|
439 $self->absolute(0);
|
|
440 }
|
|
441 return $self->absolute ? $self->sourceseq : $g;
|
|
442 }
|
|
443
|
|
444
|
|
445 =head2 abs_low
|
|
446
|
|
447 Title : abs_low
|
|
448 Usage : $s->abs_low
|
|
449 Function: the absolute lowest coordinate of the segment
|
|
450 Returns : an integer
|
|
451 Args : none
|
|
452 Status : Public
|
|
453
|
|
454 This is for GadFly compatibility, and returns the low coordinate in
|
|
455 absolute coordinates;
|
|
456
|
|
457 =cut
|
|
458
|
|
459 sub abs_low {
|
|
460 my $self = shift;
|
|
461 my ($a,$b) = ($self->abs_start,$self->abs_end);
|
|
462 return ($a<$b) ? $a : $b;
|
|
463 }
|
|
464
|
|
465 =head2 abs_high
|
|
466
|
|
467 Title : abs_high
|
|
468 Usage : $s->abs_high
|
|
469 Function: the absolute highest coordinate of the segment
|
|
470 Returns : an integer
|
|
471 Args : none
|
|
472 Status : Public
|
|
473
|
|
474 This is for GadFly compatibility, and returns the high coordinate in
|
|
475 absolute coordinates;
|
|
476
|
|
477 =cut
|
|
478
|
|
479 sub abs_high {
|
|
480 my $self = shift;
|
|
481 my ($a,$b) = ($self->abs_start,$self->abs_end);
|
|
482 return ($a>$b) ? $a : $b;
|
|
483 }
|
|
484
|
|
485
|
|
486 =head2 asString
|
|
487
|
|
488 Title : asString
|
|
489 Usage : $s->asString
|
|
490 Function: human-readable representation of the segment
|
|
491 Returns : a string
|
|
492 Args : none
|
|
493 Status : Public
|
|
494
|
|
495 This method will return a human-readable representation of the
|
|
496 segment. It is the overloaded method call for the "" operator.
|
|
497
|
|
498 Currently the format is:
|
|
499
|
|
500 refseq:start,stop
|
|
501
|
|
502 =cut
|
|
503
|
|
504 sub asString {
|
|
505 my $self = shift;
|
|
506 return $self->SUPER::asString if $self->absolute;
|
|
507 my $label = $self->{ref};
|
|
508 my $start = $self->start || '';
|
|
509 my $stop = $self->stop || '';
|
|
510 if (ref($label) && overload::StrVal($self) eq overload::StrVal($label->ref)) {
|
|
511 $label = $self->abs_ref;
|
|
512 $start = $self->abs_start;
|
|
513 $stop = $self->abs_end;
|
|
514 }
|
|
515 return "$label:$start,$stop";
|
|
516 }
|
|
517
|
|
518 sub name { shift->asString }
|
|
519
|
|
520 =head2 absolute
|
|
521
|
|
522 Title : absolute
|
|
523 Usage : $abs = $s->absolute([$abs])
|
|
524 Function: get/set absolute coordinates
|
|
525 Returns : a boolean flag
|
|
526 Args : new setting for flag (optional)
|
|
527 Status : Public
|
|
528
|
|
529 Called with a boolean flag, this method controls whether to display
|
|
530 relative coordinates (relative to the reference sequence) or absolute
|
|
531 coordinates (relative to the source sequence). It will return the
|
|
532 previous value of the setting.
|
|
533
|
|
534 =cut
|
|
535
|
|
536 sub absolute {
|
|
537 my $self = shift;
|
|
538 my $g = $self->{absolute};
|
|
539 $self->{absolute} = shift if @_;
|
|
540 $g;
|
|
541 }
|
|
542
|
|
543 =head2 features
|
|
544
|
|
545 Title : features
|
|
546 Usage : @features = $s->features(@args)
|
|
547 Function: get features that overlap this segment
|
|
548 Returns : a list of Bio::DB::GFF::Feature objects
|
|
549 Args : see below
|
|
550 Status : Public
|
|
551
|
|
552 This method will find all features that overlap the segment and return
|
|
553 a list of Bio::DB::GFF::Feature objects. The features will use
|
|
554 coordinates relative to the reference sequence in effect at the time
|
|
555 that features() was called.
|
|
556
|
|
557 The returned list can be limited to certain types of feature by
|
|
558 filtering on their method and/or source. In addition, it is possible
|
|
559 to obtain an iterator that will step through a large number of
|
|
560 features sequentially.
|
|
561
|
|
562 Arguments can be provided positionally or using the named arguments
|
|
563 format. In the former case, the arguments are a list of feature types
|
|
564 in the format "method:source". Either method or source can be
|
|
565 omitted, in which case the missing component is treated as a wildcard.
|
|
566 If no colon is present, then the type is treated as a method name.
|
|
567 Multiple arguments are ORed together.
|
|
568
|
|
569 Examples:
|
|
570
|
|
571 @f = $s->features('exon:curated'); # all curated exons
|
|
572 @f = $s->features('exon:curated','intron'); # curated exons and all introns
|
|
573 @f = $s->features('similarity:.*EST.*'); # all similarities
|
|
574 # having something to do
|
|
575 # with ESTs
|
|
576
|
|
577 The named parameter form gives you control over a few options:
|
|
578
|
|
579 -types an array reference to type names in the format
|
|
580 "method:source"
|
|
581
|
|
582 -merge Whether to apply aggregators to the generated features (default yes)
|
|
583
|
|
584 -rare Turn on an optimization suitable for a relatively rare feature type,
|
|
585 where it will be faster to filter by feature type first
|
|
586 and then by position, rather than vice versa.
|
|
587
|
|
588 -attributes a hashref containing a set of attributes to match
|
|
589
|
|
590 -iterator Whether to return an iterator across the features.
|
|
591
|
|
592 -binsize A true value will create a set of artificial features whose
|
|
593 start and stop positions indicate bins of the given size, and
|
|
594 whose scores are the number of features in the bin. The
|
|
595 class and method of the feature will be set to "bin",
|
|
596 its source to "method:source", and its group to "bin:method:source".
|
|
597 This is a handy way of generating histograms of feature density.
|
|
598
|
|
599 -merge is a boolean flag that controls whether the adaptor's
|
|
600 aggregators wll be applied to the features returned by this method.
|
|
601
|
|
602 If -iterator is true, then the method returns a single scalar value
|
|
603 consisting of a Bio::SeqIO object. You can call next_seq() repeatedly
|
|
604 on this object to fetch each of the features in turn. If iterator is
|
|
605 false or absent, then all the features are returned as a list.
|
|
606
|
|
607 The -attributes argument is a hashref containing one or more
|
|
608 attributes to match against:
|
|
609
|
|
610 -attributes => { Gene => 'abc-1',
|
|
611 Note => 'confirmed' }
|
|
612
|
|
613 Attribute matching is simple string matching, and multiple attributes
|
|
614 are ANDed together.
|
|
615
|
|
616 =cut
|
|
617
|
|
618 #'
|
|
619
|
|
620 # return all features that overlap with this segment;
|
|
621 # optionally modified by a list of types to filter on
|
|
622 sub features {
|
|
623 my $self = shift;
|
|
624 my @args = $self->_process_feature_args(@_);
|
|
625 return $self->factory->overlapping_features(@args);
|
|
626 }
|
|
627
|
|
628 =head2 top_SeqFeatures
|
|
629
|
|
630 Title : top_SeqFeatures
|
|
631 Usage :
|
|
632 Function:
|
|
633 Example :
|
|
634 Returns :
|
|
635 Args :
|
|
636
|
|
637 Alias for features(). Provided for Bio::SeqI compatibility.
|
|
638
|
|
639 =cut
|
|
640
|
|
641 =head2 all_SeqFeatures
|
|
642
|
|
643 Title : all_SeqFeatures
|
|
644 Usage :
|
|
645 Function:
|
|
646 Example :
|
|
647 Returns :
|
|
648 Args :
|
|
649
|
|
650 Alias for features(). Provided for Bio::SeqI compatibility.
|
|
651
|
|
652 =cut
|
|
653
|
|
654 =head2 sub_SeqFeatures
|
|
655
|
|
656 Title : sub_SeqFeatures
|
|
657 Usage :
|
|
658 Function:
|
|
659 Example :
|
|
660 Returns :
|
|
661 Args :
|
|
662
|
|
663 Alias for features(). Provided for Bio::SeqI compatibility.
|
|
664
|
|
665 =cut
|
|
666
|
|
667 *top_SeqFeatures = *all_SeqFeatures = \&features;
|
|
668
|
|
669 =head2 get_feature_stream
|
|
670
|
|
671 Title : features
|
|
672 Usage : $stream = $s->get_feature_stream(@args)
|
|
673 Function: get a stream of features that overlap this segment
|
|
674 Returns : a Bio::SeqIO::Stream-compliant stream
|
|
675 Args : see below
|
|
676 Status : Public
|
|
677
|
|
678 This is the same as features(), but returns a stream. Use like this:
|
|
679
|
|
680 $stream = $s->get_feature_stream('exon');
|
|
681 while (my $exon = $stream->next_seq) {
|
|
682 print $exon->start,"\n";
|
|
683 }
|
|
684
|
|
685 =cut
|
|
686
|
|
687 sub get_feature_stream {
|
|
688 my $self = shift;
|
|
689 my @args = $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1);
|
|
690 $self->features(@args);
|
|
691 }
|
|
692
|
|
693 =head2 get_seq_stream
|
|
694
|
|
695 Title : get_seq_stream
|
|
696 Usage : $stream = $s->get_seq_stream(@args)
|
|
697 Function: get a stream of features that overlap this segment
|
|
698 Returns : a Bio::SeqIO::Stream-compliant stream
|
|
699 Args : see below
|
|
700 Status : Public
|
|
701
|
|
702 This is the same as feature_stream(), and is provided for Bioperl
|
|
703 compatibility. Use like this:
|
|
704
|
|
705 $stream = $s->get_seq_stream('exon');
|
|
706 while (my $exon = $stream->next_seq) {
|
|
707 print $exon->start,"\n";
|
|
708 }
|
|
709
|
|
710 =cut
|
|
711
|
|
712 *get_seq_stream = \&get_feature_stream;
|
|
713
|
|
714
|
|
715 =head2 overlapping_features
|
|
716
|
|
717 Title : overlapping_features
|
|
718 Usage : @features = $s->overlapping_features(@args)
|
|
719 Function: get features that overlap this segment
|
|
720 Returns : a list of Bio::DB::GFF::Feature objects
|
|
721 Args : see features()
|
|
722 Status : Public
|
|
723
|
|
724 This is an alias for the features() method, and takes the same
|
|
725 arguments.
|
|
726
|
|
727 =cut
|
|
728
|
|
729 *overlapping_features = \&features;
|
|
730
|
|
731 =head2 contained_features
|
|
732
|
|
733 Title : contained_features
|
|
734 Usage : @features = $s->contained_features(@args)
|
|
735 Function: get features that are contained by this segment
|
|
736 Returns : a list of Bio::DB::GFF::Feature objects
|
|
737 Args : see features()
|
|
738 Status : Public
|
|
739
|
|
740 This is identical in behavior to features() except that it returns
|
|
741 only those features that are completely contained within the segment,
|
|
742 rather than any that overlap.
|
|
743
|
|
744 =cut
|
|
745
|
|
746 # return all features completely contained within this segment
|
|
747 sub contained_features {
|
|
748 my $self = shift;
|
|
749 local $self->{whole} = 0;
|
|
750 my @args = $self->_process_feature_args(@_);
|
|
751 return $self->factory->contained_features(@args);
|
|
752 }
|
|
753
|
|
754 # *contains = \&contained_features;
|
|
755
|
|
756 =head2 contained_in
|
|
757
|
|
758 Title : contained_in
|
|
759 Usage : @features = $s->contained_in(@args)
|
|
760 Function: get features that contain this segment
|
|
761 Returns : a list of Bio::DB::GFF::Feature objects
|
|
762 Args : see features()
|
|
763 Status : Public
|
|
764
|
|
765 This is identical in behavior to features() except that it returns
|
|
766 only those features that completely contain the segment.
|
|
767
|
|
768 =cut
|
|
769
|
|
770 # return all features completely contained within this segment
|
|
771 sub contained_in {
|
|
772 my $self = shift;
|
|
773 local $self->{whole} = 0;
|
|
774 my @args = $self->_process_feature_args(@_);
|
|
775 return $self->factory->contained_in(@args);
|
|
776 }
|
|
777
|
|
778 =head2 _process_feature_args
|
|
779
|
|
780 Title : _process_feature_args
|
|
781 Usage : @args = $s->_process_feature_args(@args)
|
|
782 Function: preprocess arguments passed to features,
|
|
783 contained_features, and overlapping_features
|
|
784 Returns : a list of parsed arguents
|
|
785 Args : see feature()
|
|
786 Status : Internal
|
|
787
|
|
788 This is an internal method that is used to check and format the
|
|
789 arguments to features() before passing them on to the adaptor.
|
|
790
|
|
791 =cut
|
|
792
|
|
793 sub _process_feature_args {
|
|
794 my $self = shift;
|
|
795
|
|
796 my ($ref,$class,$start,$stop,$strand,$whole)
|
|
797 = @{$self}{qw(sourceseq class start stop strand whole)};
|
|
798
|
|
799 ($start,$stop) = ($stop,$start) if $strand eq '-';
|
|
800
|
|
801 my @args = (-ref=>$ref,-class=>$class);
|
|
802
|
|
803 # indicating that we are fetching the whole segment allows certain
|
|
804 # SQL optimizations.
|
|
805 push @args,(-start=>$start,-stop=>$stop) unless $whole;
|
|
806
|
|
807 if (@_) {
|
|
808 if ($_[0] =~ /^-/) {
|
|
809 push @args,@_;
|
|
810 } else {
|
|
811 my @types = @_;
|
|
812 push @args,-types=>\@types;
|
|
813 }
|
|
814 }
|
|
815 push @args,-parent=>$self;
|
|
816 @args;
|
|
817 }
|
|
818
|
|
819 =head2 types
|
|
820
|
|
821 Title : types
|
|
822 Usage : @types = $s->types([-enumerate=>1])
|
|
823 Function: list feature types that overlap this segment
|
|
824 Returns : a list of Bio::DB::GFF::Typename objects or a hash
|
|
825 Args : see below
|
|
826 Status : Public
|
|
827
|
|
828 The types() method will return a list of Bio::DB::GFF::Typename
|
|
829 objects, each corresponding to a feature that overlaps the segment.
|
|
830 If the optional -enumerate parameter is set to a true value, then the
|
|
831 method will return a hash in which the keys are the type names and the
|
|
832 values are the number of times a feature of that type is present on
|
|
833 the segment. For example:
|
|
834
|
|
835 %count = $s->types(-enumerate=>1);
|
|
836
|
|
837 =cut
|
|
838
|
|
839 # wrapper for lower-level types() call.
|
|
840 sub types {
|
|
841 my $self = shift;
|
|
842 my ($ref,$class,$start,$stop,$strand) = @{$self}{qw(sourceseq class start stop strand)};
|
|
843 ($start,$stop) = ($stop,$start) if $strand eq '-';
|
|
844
|
|
845 my @args;
|
|
846 if (@_ && $_[0] !~ /^-/) {
|
|
847 @args = (-type => \@_)
|
|
848 } else {
|
|
849 @args = @_;
|
|
850 }
|
|
851 $self->factory->types(-ref => $ref,
|
|
852 -class => $class,
|
|
853 -start=> $start,
|
|
854 -stop => $stop,
|
|
855 @args);
|
|
856 }
|
|
857
|
|
858 =head1 Internal Methods
|
|
859
|
|
860 The following are internal methods and should not be called directly.
|
|
861
|
|
862 =head2 new_from_segment
|
|
863
|
|
864 Title : new_from_segment
|
|
865 Usage : $s = $segment->new_from_segment(@args)
|
|
866 Function: create a new relative segment
|
|
867 Returns : a new Bio::DB::GFF::RelSegment object
|
|
868 Args : see below
|
|
869 Status : Internal
|
|
870
|
|
871 This constructor is used internally by the subseq() method. It forces
|
|
872 the new segment into the Bio::DB::GFF::RelSegment package, regardless
|
|
873 of the package that it is called from. This causes subclass-specfic
|
|
874 information, such as feature types, to be dropped when a subsequence
|
|
875 is created.
|
|
876
|
|
877 =cut
|
|
878
|
|
879 sub new_from_segment {
|
|
880 my $package = shift;
|
|
881 $package = ref $package if ref $package;
|
|
882 my $segment = shift;
|
|
883 my $new = {};
|
|
884 @{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)}
|
|
885 = @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)};
|
|
886 return bless $new,__PACKAGE__;
|
|
887 }
|
|
888
|
|
889 =head2 _abs2rel
|
|
890
|
|
891 Title : _abs2rel
|
|
892 Usage : @coords = $s->_abs2rel(@coords)
|
|
893 Function: convert absolute coordinates into relative coordinates
|
|
894 Returns : a list of relative coordinates
|
|
895 Args : a list of absolute coordinates
|
|
896 Status : Internal
|
|
897
|
|
898 This is used internally to map from absolute to relative
|
|
899 coordinates. It does not take the offset of the reference sequence
|
|
900 into account, so please use abs2rel() instead.
|
|
901
|
|
902 =cut
|
|
903
|
|
904 sub _abs2rel {
|
|
905 my $self = shift;
|
|
906 my @result;
|
|
907 return unless defined $_[0];
|
|
908
|
|
909 if ($self->absolute) {
|
|
910 @result = @_;
|
|
911 } else {
|
|
912 my ($refstart,$refstrand) = @{$self}{qw(refstart refstrand)};
|
|
913 @result = defined($refstrand) && $refstrand eq '-' ? map { $refstart - $_ + 1 } @_
|
|
914 : map { $_ - $refstart + 1 } @_;
|
|
915 }
|
|
916 # if called with a single argument, caller will expect a single scalar reply
|
|
917 # not the size of the returned array!
|
|
918 return $result[0] if @result == 1 and !wantarray;
|
|
919 @result;
|
|
920 }
|
|
921
|
|
922 =head2 rel2abs
|
|
923
|
|
924 Title : rel2abs
|
|
925 Usage : @coords = $s->rel2abs(@coords)
|
|
926 Function: convert relative coordinates into absolute coordinates
|
|
927 Returns : a list of absolute coordinates
|
|
928 Args : a list of relative coordinates
|
|
929 Status : Public
|
|
930
|
|
931 This function takes a list of positions in relative coordinates to the
|
|
932 segment, and converts them into absolute coordinates.
|
|
933
|
|
934 =cut
|
|
935
|
|
936 sub rel2abs {
|
|
937 my $self = shift;
|
|
938 my @result;
|
|
939
|
|
940 if ($self->absolute) {
|
|
941 @result = @_;
|
|
942 } else {
|
|
943 my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
|
|
944 @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
|
|
945 : map { $_ + $abs_start - 1 } @_;
|
|
946 }
|
|
947 # if called with a single argument, caller will expect a single scalar reply
|
|
948 # not the size of the returned array!
|
|
949 return $result[0] if @result == 1 and !wantarray;
|
|
950 @result;
|
|
951 }
|
|
952
|
|
953 =head2 abs2rel
|
|
954
|
|
955 Title : abs2rel
|
|
956 Usage : @rel_coords = $s-abs2rel(@abs_coords)
|
|
957 Function: convert absolute coordinates into relative coordinates
|
|
958 Returns : a list of relative coordinates
|
|
959 Args : a list of absolutee coordinates
|
|
960 Status : Public
|
|
961
|
|
962 This function takes a list of positions in absolute coordinates
|
|
963 and returns a list expressed in relative coordinates.
|
|
964
|
|
965 =cut
|
|
966
|
|
967 sub abs2rel {
|
|
968 my $self = shift;
|
|
969 my @result;
|
|
970
|
|
971 if ($self->absolute) {
|
|
972 @result = @_;
|
|
973 } else {
|
|
974 my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
|
|
975 @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
|
|
976 : map { $_ - $abs_start + 1 } @_;
|
|
977 }
|
|
978 # if called with a single argument, caller will expect a single scalar reply
|
|
979 # not the size of the returned array!
|
|
980 return $result[0] if @result == 1 and !wantarray;
|
|
981 @result;
|
|
982 }
|
|
983
|
|
984 sub subseq {
|
|
985 my $self = shift;
|
|
986 my $obj = $self->SUPER::subseq(@_);
|
|
987 bless $obj,__PACKAGE__; # always bless into the generic RelSegment package
|
|
988 }
|
|
989
|
|
990 sub strand {
|
|
991 my $self = shift;
|
|
992 if ($self->absolute) {
|
|
993 return _to_strand($self->{strand});
|
|
994 }
|
|
995 return $self->stop <=> $self->start;
|
|
996 }
|
|
997
|
|
998 sub _to_strand {
|
|
999 my $s = shift;
|
|
1000 return -1 if $s eq '-';
|
|
1001 return +1 if $s eq '+';
|
|
1002 return 0;
|
|
1003 }
|
|
1004
|
|
1005 =head2 Bio::RangeI Methods
|
|
1006
|
|
1007 The following Bio::RangeI methods are supported:
|
|
1008
|
|
1009 overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
|
|
1010
|
|
1011 =cut
|
|
1012
|
|
1013 sub intersection {
|
|
1014 my $self = shift;
|
|
1015 my (@ranges) = @_;
|
|
1016 unshift @ranges,$self if ref $self;
|
|
1017 $ranges[0]->isa('Bio::DB::GFF::RelSegment')
|
|
1018 or return $self->SUPER::intersection(@_);
|
|
1019
|
|
1020 my $ref = $ranges[0]->abs_ref;
|
|
1021 my ($low,$high);
|
|
1022 foreach (@ranges) {
|
|
1023 return unless $_->can('abs_ref');
|
|
1024 $ref eq $_->abs_ref or return;
|
|
1025 $low = $_->abs_low if !defined($low) or $low < $_->abs_low;
|
|
1026 $high = $_->abs_high if !defined($high) or $high > $_->abs_high;
|
|
1027 }
|
|
1028 return unless $low < $high;
|
|
1029 $self->new(-factory=> $self->factory,
|
|
1030 -seq => $ref,
|
|
1031 -start => $low,
|
|
1032 -stop => $high);
|
|
1033 }
|
|
1034
|
|
1035 sub overlaps {
|
|
1036 my $self = shift;
|
|
1037 my($other,$so) = @_;
|
|
1038 return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
|
|
1039 return if $self->abs_ref ne $other->abs_ref;
|
|
1040 return if $self->abs_low > $other->abs_high;
|
|
1041 return if $self->abs_high < $other->abs_low;
|
|
1042 1;
|
|
1043 }
|
|
1044
|
|
1045 sub contains {
|
|
1046 my $self = shift;
|
|
1047 my($other,$so) = @_;
|
|
1048 return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
|
|
1049 return if $self->abs_ref ne $other->abs_ref;
|
|
1050 return unless $self->abs_low <= $other->abs_low;
|
|
1051 return unless $self->abs_high >= $other->abs_high;
|
|
1052 1;
|
|
1053 }
|
|
1054
|
|
1055 sub union {
|
|
1056 my $self = shift;
|
|
1057 my (@ranges) = @_;
|
|
1058 unshift @ranges,$self if ref $self;
|
|
1059 $ranges[0]->isa('Bio::DB::GFF::RelSegment')
|
|
1060 or return $self->SUPER::union(@_);
|
|
1061
|
|
1062 my $ref = $ranges[0]->abs_ref;
|
|
1063 my ($low,$high);
|
|
1064 foreach (@ranges) {
|
|
1065 return unless $_->can('abs_ref');
|
|
1066 $ref eq $_->abs_ref or return;
|
|
1067 $low = $_->abs_low if !defined($low) or $low > $_->abs_low;
|
|
1068 $high = $_->abs_high if !defined($high) or $high < $_->abs_high;
|
|
1069 }
|
|
1070 $self->new(-factory=> $self->factory,
|
|
1071 -seq => $ref,
|
|
1072 -start => $low,
|
|
1073 -stop => $high);
|
|
1074 }
|
|
1075
|
|
1076
|
|
1077 1;
|
|
1078
|
|
1079 __END__
|
|
1080
|
|
1081 =head1 BUGS
|
|
1082
|
|
1083 Schemas need some work.
|
|
1084
|
|
1085 =head1 SEE ALSO
|
|
1086
|
|
1087 L<bioperl>
|
|
1088
|
|
1089 =head1 AUTHOR
|
|
1090
|
|
1091 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
|
|
1092
|
|
1093 Copyright (c) 2001 Cold Spring Harbor Laboratory.
|
|
1094
|
|
1095 This library is free software; you can redistribute it and/or modify
|
|
1096 it under the same terms as Perl itself.
|
|
1097
|
|
1098 =cut
|
|
1099
|