0
|
1 # $Id: SegmentI.pm,v 1.6 2002/12/22 03:42:22 lstein Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Das::SegmentI
|
|
4 #
|
|
5 # Cared for by Lincoln Stein <lstein@cshl.org>
|
|
6 #
|
|
7 # Copyright Lincoln Stein
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Das::SegmentI - DAS-style access to a feature database
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # Get a Bio::Das::SegmentI object from a Bio::DasI database...
|
|
20
|
|
21 $segment = $das->segment(-name=>'Landmark',
|
|
22 -start=>$start,
|
|
23 -end => $end);
|
|
24
|
|
25 @features = $segment->overlapping_features(-type=>['type1','type2']);
|
|
26 # each feature is a Bio::SeqFeatureI-compliant object
|
|
27
|
|
28 @features = $segment->contained_features(-type=>['type1','type2']);
|
|
29
|
|
30 @features = $segment->contained_in(-type=>['type1','type2']);
|
|
31
|
|
32 $stream = $segment->get_feature_stream(-type=>['type1','type2','type3'];
|
|
33 while (my $feature = $stream->next_seq) {
|
|
34 # do something with feature
|
|
35 }
|
|
36
|
|
37 $count = $segment->features_callback(-type=>['type1','type2','type3'],
|
|
38 -callback => sub { ... { }
|
|
39 );
|
|
40
|
|
41 =head1 DESCRIPTION
|
|
42
|
|
43 Bio::Das::SegmentI is a simplified alternative interface to sequence
|
|
44 annotation databases used by the distributed annotation system. In
|
|
45 this scheme, the genome is represented as a series of landmarks. Each
|
|
46 Bio::Das::SegmentI object ("segment") corresponds to a genomic region
|
|
47 defined by a landmark and a start and end position relative to that
|
|
48 landmark. A segment is created using the Bio::DasI segment() method.
|
|
49
|
|
50 Features can be filtered by the following attributes:
|
|
51
|
|
52 1) their location relative to the segment (whether overlapping,
|
|
53 contained within, or completely containing)
|
|
54
|
|
55 2) their type
|
|
56
|
|
57 3) other attributes using tag/value semantics
|
|
58
|
|
59 Access to the feature list uses three distinct APIs:
|
|
60
|
|
61 1) fetching entire list of features at a time
|
|
62
|
|
63 2) fetching an iterator across features
|
|
64
|
|
65 3) a callback
|
|
66
|
|
67 =head1 FEEDBACK
|
|
68
|
|
69 =head2 Mailing Lists
|
|
70
|
|
71 User feedback is an integral part of the evolution of this and other
|
|
72 Bioperl modules. Send your comments and suggestions preferably to one
|
|
73 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
74
|
|
75 bioperl-l@bio.perl.org
|
|
76
|
|
77 =head2 Reporting Bugs
|
|
78
|
|
79 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
80 the bugs and their resolution. Bug reports can be submitted via email
|
|
81 or the web:
|
|
82
|
|
83 bioperl-bugs@bio.perl.org
|
|
84 http://bugzilla.bioperl.org/
|
|
85
|
|
86 =head1 AUTHOR - Lincoln Stein
|
|
87
|
|
88 Email lstein@cshl.org
|
|
89
|
|
90 =head1 APPENDIX
|
|
91
|
|
92 The rest of the documentation details each of the object
|
|
93 methods. Internal methods are usually preceded with a _
|
|
94
|
|
95 =cut
|
|
96
|
|
97 #'
|
|
98 # Let the code begin...
|
|
99
|
|
100 package Bio::Das::SegmentI;
|
|
101 use strict;
|
|
102
|
|
103 use vars qw(@ISA $VERSION);
|
|
104 use Bio::Root::RootI;
|
|
105
|
|
106 # Object preamble - inherits from Bio::Root::RootI;
|
|
107 @ISA = qw(Bio::Root::RootI);
|
|
108 $VERSION = 1.00;
|
|
109
|
|
110 =head2 seq_id
|
|
111
|
|
112 Title : seq_id
|
|
113 Usage : $ref = $s->seq_id
|
|
114 Function: return the ID of the landmark
|
|
115 Returns : a string
|
|
116 Args : none
|
|
117 Status : Public
|
|
118
|
|
119 =cut
|
|
120
|
|
121 sub seq_id { shift->throw_not_implemented }
|
|
122
|
|
123 =head2 start
|
|
124
|
|
125 Title : start
|
|
126 Usage : $s->start
|
|
127 Function: start of segment
|
|
128 Returns : integer
|
|
129 Args : none
|
|
130 Status : Public
|
|
131
|
|
132 This is a read-only accessor for the start of the segment. Alias
|
|
133 to low() for Gadfly compatibility.
|
|
134
|
|
135 =cut
|
|
136
|
|
137 sub start { shift->throw_not_implemented }
|
|
138 sub low { shift->start }
|
|
139
|
|
140 =head2 end
|
|
141
|
|
142 Title : end
|
|
143 Usage : $s->end
|
|
144 Function: end of segment
|
|
145 Returns : integer
|
|
146 Args : none
|
|
147 Status : Public
|
|
148
|
|
149 This is a read-only accessor for the end of the segment. Alias to
|
|
150 high() for Gadfly compatibility.
|
|
151
|
|
152 =cut
|
|
153
|
|
154 sub end { shift->throw_not_implemented }
|
|
155 sub stop { shift->end }
|
|
156 sub high { shift->end }
|
|
157
|
|
158 =head2 length
|
|
159
|
|
160 Title : length
|
|
161 Usage : $s->length
|
|
162 Function: length of segment
|
|
163 Returns : integer
|
|
164 Args : none
|
|
165 Status : Public
|
|
166
|
|
167 Returns the length of the segment. Always a positive number.
|
|
168
|
|
169 =cut
|
|
170
|
|
171 sub length { shift->throw_not_implemented; }
|
|
172
|
|
173 =head2 seq
|
|
174
|
|
175 Title : seq
|
|
176 Usage : $s->seq
|
|
177 Function: get the sequence string for this segment
|
|
178 Returns : a string
|
|
179 Args : none
|
|
180 Status : Public
|
|
181
|
|
182 Returns the sequence for this segment as a simple string.
|
|
183
|
|
184 =cut
|
|
185
|
|
186 sub seq {shift->throw_not_implemented}
|
|
187
|
|
188 =head2 ref
|
|
189
|
|
190 Title : ref
|
|
191 Usage : $ref = $s->ref([$newlandmark])
|
|
192 Function: get/set the reference landmark for addressing
|
|
193 Returns : a string
|
|
194 Args : none
|
|
195 Status : Public
|
|
196
|
|
197 This method is used to examine/change the reference landmark used to
|
|
198 establish the coordinate system. By default, the landmark cannot be
|
|
199 changed and therefore this has the same effect as seq_id(). The new
|
|
200 landmark might be an ID, or another Das::SegmentI object.
|
|
201
|
|
202 =cut
|
|
203
|
|
204 sub ref { shift->seq_id }
|
|
205 *refseq = \&ref;
|
|
206
|
|
207 =head2 absolute
|
|
208
|
|
209 Title : absolute
|
|
210 Usage : $s->absolute([$new_value])
|
|
211 Function: get/set absolute addressing mode
|
|
212 Returns : flag
|
|
213 Args : new flag (optional)
|
|
214 Status : Public
|
|
215
|
|
216 Turn on and off absolute-addressing mode. In absolute addressing
|
|
217 mode, coordinates are relative to some underlying "top level"
|
|
218 coordinate system (such as a chromosome). ref() returns the identity
|
|
219 of the top level landmark, and start() and end() return locations
|
|
220 relative to that landmark. In relative addressing mode, coordinates
|
|
221 are relative to the landmark sequence specified at the time of segment
|
|
222 creation or later modified by the ref() method.
|
|
223
|
|
224 The default is to return false and to do nothing in response to
|
|
225 attempts to set absolute addressing mode.
|
|
226
|
|
227 =cut
|
|
228
|
|
229 sub absolute { return }
|
|
230
|
|
231 =head2 features
|
|
232
|
|
233 Title : features
|
|
234 Usage : @features = $s->features(@args)
|
|
235 Function: get features that overlap this segment
|
|
236 Returns : a list of Bio::SeqFeatureI objects
|
|
237 Args : see below
|
|
238 Status : Public
|
|
239
|
|
240 This method will find all features that intersect the segment in a
|
|
241 variety of ways and return a list of Bio::SeqFeatureI objects. The
|
|
242 feature locations will use coordinates relative to the reference
|
|
243 sequence in effect at the time that features() was called.
|
|
244
|
|
245 The returned list can be limited to certain types, attributes or
|
|
246 range intersection modes. Types of range intersection are one of:
|
|
247
|
|
248 "overlaps" the default
|
|
249 "contains" return features completely contained within the segment
|
|
250 "contained_in" return features that completely contain the segment
|
|
251
|
|
252 Two types of argument lists are accepted. In the positional argument
|
|
253 form, the arguments are treated as a list of feature types. In the
|
|
254 named parameter form, the arguments are a series of -name=E<gt>value
|
|
255 pairs.
|
|
256
|
|
257 Argument Description
|
|
258 -------- ------------
|
|
259
|
|
260 -types An array reference to type names in the format
|
|
261 "method:source"
|
|
262
|
|
263 -attributes A hashref containing a set of attributes to match
|
|
264
|
|
265 -rangetype One of "overlaps", "contains", or "contained_in".
|
|
266
|
|
267 -iterator Return an iterator across the features.
|
|
268
|
|
269 -callback A callback to invoke on each feature
|
|
270
|
|
271 The -attributes argument is a hashref containing one or more
|
|
272 attributes to match against:
|
|
273
|
|
274 -attributes => { Gene => 'abc-1',
|
|
275 Note => 'confirmed' }
|
|
276
|
|
277 Attribute matching is simple string matching, and multiple attributes
|
|
278 are ANDed together. More complex filtering can be performed using the
|
|
279 -callback option (see below).
|
|
280
|
|
281 If -iterator is true, then the method returns an object reference that
|
|
282 implements the next_seq() method. Each call to next_seq() returns a
|
|
283 new Bio::SeqFeatureI object.
|
|
284
|
|
285 If -callback is passed a code reference, the code reference will be
|
|
286 invoked on each feature returned. The code will be passed two
|
|
287 arguments consisting of the current feature and the segment object
|
|
288 itself, and must return a true value. If the code returns a false
|
|
289 value, feature retrieval will be aborted.
|
|
290
|
|
291 -callback and -iterator are mutually exclusive options. If -iterator
|
|
292 is defined, then -callback is ignored.
|
|
293
|
|
294 NOTE: the following methods all build on top of features(), and do not
|
|
295 need to be explicitly implemented.
|
|
296
|
|
297 overlapping_features()
|
|
298 contained_features()
|
|
299 contained_in()
|
|
300 get_feature_stream()
|
|
301
|
|
302 =cut
|
|
303
|
|
304 sub features {shift->throw_not_implemented}
|
|
305
|
|
306 =head2 overlapping_features
|
|
307
|
|
308 Title : overlapping_features
|
|
309 Usage : @features = $s->overlapping_features(@args)
|
|
310 Function: get features that overlap this segment
|
|
311 Returns : a list of Bio::SeqFeatureI objects
|
|
312 Args : see below
|
|
313 Status : Public
|
|
314
|
|
315 This method is identical to features() except that it defaults to
|
|
316 finding overlapping features.
|
|
317
|
|
318 =cut
|
|
319
|
|
320 sub overlapping_features {
|
|
321 my $self = shift;
|
|
322 my @args = $_[0] !~ /^-/ ? (@_, -rangetype=>'overlaps')
|
|
323 : (-types=>\@_,-rangetype=>'overlaps');
|
|
324 $self->features(@args);
|
|
325 }
|
|
326
|
|
327 =head2 contained_features
|
|
328
|
|
329 Title : contained_features
|
|
330 Usage : @features = $s->contained_features(@args)
|
|
331 Function: get features that are contained in this segment
|
|
332 Returns : a list of Bio::SeqFeatureI objects
|
|
333 Args : see below
|
|
334 Status : Public
|
|
335
|
|
336 This method is identical to features() except that it defaults to
|
|
337 a range type of 'contained'.
|
|
338
|
|
339 =cut
|
|
340
|
|
341 sub contained_features {
|
|
342 my $self = shift;
|
|
343 my @args = $_[0] !~ /^-/ ? (@_, -rangetype=>'contained')
|
|
344 : (-types=>\@_,-rangetype=>'contained');
|
|
345 $self->features(@args);
|
|
346 }
|
|
347
|
|
348 =head2 contained_in
|
|
349
|
|
350 Title : contained_in
|
|
351 Usage : @features = $s->contained_in(@args)
|
|
352 Function: get features that contain this segment
|
|
353 Returns : a list of Bio::SeqFeatureI objects
|
|
354 Args : see below
|
|
355 Status : Public
|
|
356
|
|
357 This method is identical to features() except that it defaults to
|
|
358 a range type of 'contained_in'.
|
|
359
|
|
360 =cut
|
|
361
|
|
362 sub contained_in {
|
|
363 my $self = shift;
|
|
364 my @args = $_[0] !~ /^-/ ? (@_, -rangetype=>'contained_in')
|
|
365 : (-types=>\@_,-rangetype=>'contained_in');
|
|
366 $self->features(@args);
|
|
367 }
|
|
368
|
|
369 =head2 get_feature_stream
|
|
370
|
|
371 Title : get_feature_stream
|
|
372 Usage : $iterator = $s->get_feature_stream(@args)
|
|
373 Function: get an iterator across the segment
|
|
374 Returns : an object that implements next_seq()
|
|
375 Args : see below
|
|
376 Status : Public
|
|
377
|
|
378 This method is identical to features() except that it always generates
|
|
379 an iterator.
|
|
380
|
|
381 NOTE: This is defined in the interface in terms of features(). You do not
|
|
382 have to implement it.
|
|
383
|
|
384 =cut
|
|
385
|
|
386 sub get_feature_stream {
|
|
387 my $self = shift;
|
|
388 my @args = defined $_[0] && $_[0] =~ /^-/ ? (@_, -iterator=>1)
|
|
389 : (-types=>\@_,-iterator=>1);
|
|
390 $self->features(@args);
|
|
391 }
|
|
392
|
|
393 =head2 factory
|
|
394
|
|
395 Title : factory
|
|
396 Usage : $factory = $s->factory
|
|
397 Function: return the segment factory
|
|
398 Returns : a Bio::DasI object
|
|
399 Args : see below
|
|
400 Status : Public
|
|
401
|
|
402 This method returns a Bio::DasI object that can be used to fetch
|
|
403 more segments. This is typically the Bio::DasI object from which
|
|
404 the segment was originally generated.
|
|
405
|
|
406 =cut
|
|
407
|
|
408 #'
|
|
409
|
|
410 sub factory {shift->throw_not_implemented}
|
|
411
|
|
412 =head2 primary_tag
|
|
413
|
|
414 Title : primary_tag
|
|
415 Usage : $tag = $s->primary_tag
|
|
416 Function: identifies the segment as type "DasSegment"
|
|
417 Returns : a string named "DasSegment"
|
|
418 Args : none
|
|
419 Status : Public, but see below
|
|
420
|
|
421 This method provides Bio::Das::Segment objects with a primary_tag()
|
|
422 field that identifies them as being of type "DasSegment". This allows
|
|
423 the Bio::Graphics engine to render segments just like a feature in order
|
|
424 nis way useful.
|
|
425
|
|
426 This does not need to be implemented. It is defined by the interface.
|
|
427
|
|
428 =cut
|
|
429
|
|
430 #'
|
|
431
|
|
432 sub primary_tag {"DasSegment"}
|
|
433
|
|
434 =head2 strand
|
|
435
|
|
436 Title : strand
|
|
437 Usage : $strand = $s->strand
|
|
438 Function: identifies the segment strand as 0
|
|
439 Returns : the number 0
|
|
440 Args : none
|
|
441 Status : Public, but see below
|
|
442
|
|
443 This method provides Bio::Das::Segment objects with a strand() field
|
|
444 that identifies it as being strandless. This allows the Bio::Graphics
|
|
445 engine to render segments just like a feature in order nis way useful.
|
|
446
|
|
447 This does not need to be implemented. It is defined by the interface.
|
|
448
|
|
449 =cut
|
|
450
|
|
451 sub strand { 0 }
|
|
452
|
|
453 1;
|