Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Das/SegmentI.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: SegmentI.pm,v 1.6 2002/12/22 03:42:22 lstein Exp $ | |
2 # | |
3 # BioPerl module for Bio::Das::SegmentI | |
4 # | |
5 # Cared for by Lincoln Stein <lstein@cshl.org> | |
6 # | |
7 # Copyright Lincoln Stein | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Das::SegmentI - DAS-style access to a feature database | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # Get a Bio::Das::SegmentI object from a Bio::DasI database... | |
20 | |
21 $segment = $das->segment(-name=>'Landmark', | |
22 -start=>$start, | |
23 -end => $end); | |
24 | |
25 @features = $segment->overlapping_features(-type=>['type1','type2']); | |
26 # each feature is a Bio::SeqFeatureI-compliant object | |
27 | |
28 @features = $segment->contained_features(-type=>['type1','type2']); | |
29 | |
30 @features = $segment->contained_in(-type=>['type1','type2']); | |
31 | |
32 $stream = $segment->get_feature_stream(-type=>['type1','type2','type3']; | |
33 while (my $feature = $stream->next_seq) { | |
34 # do something with feature | |
35 } | |
36 | |
37 $count = $segment->features_callback(-type=>['type1','type2','type3'], | |
38 -callback => sub { ... { } | |
39 ); | |
40 | |
41 =head1 DESCRIPTION | |
42 | |
43 Bio::Das::SegmentI is a simplified alternative interface to sequence | |
44 annotation databases used by the distributed annotation system. In | |
45 this scheme, the genome is represented as a series of landmarks. Each | |
46 Bio::Das::SegmentI object ("segment") corresponds to a genomic region | |
47 defined by a landmark and a start and end position relative to that | |
48 landmark. A segment is created using the Bio::DasI segment() method. | |
49 | |
50 Features can be filtered by the following attributes: | |
51 | |
52 1) their location relative to the segment (whether overlapping, | |
53 contained within, or completely containing) | |
54 | |
55 2) their type | |
56 | |
57 3) other attributes using tag/value semantics | |
58 | |
59 Access to the feature list uses three distinct APIs: | |
60 | |
61 1) fetching entire list of features at a time | |
62 | |
63 2) fetching an iterator across features | |
64 | |
65 3) a callback | |
66 | |
67 =head1 FEEDBACK | |
68 | |
69 =head2 Mailing Lists | |
70 | |
71 User feedback is an integral part of the evolution of this and other | |
72 Bioperl modules. Send your comments and suggestions preferably to one | |
73 of the Bioperl mailing lists. Your participation is much appreciated. | |
74 | |
75 bioperl-l@bio.perl.org | |
76 | |
77 =head2 Reporting Bugs | |
78 | |
79 Report bugs to the Bioperl bug tracking system to help us keep track | |
80 the bugs and their resolution. Bug reports can be submitted via email | |
81 or the web: | |
82 | |
83 bioperl-bugs@bio.perl.org | |
84 http://bugzilla.bioperl.org/ | |
85 | |
86 =head1 AUTHOR - Lincoln Stein | |
87 | |
88 Email lstein@cshl.org | |
89 | |
90 =head1 APPENDIX | |
91 | |
92 The rest of the documentation details each of the object | |
93 methods. Internal methods are usually preceded with a _ | |
94 | |
95 =cut | |
96 | |
97 #' | |
98 # Let the code begin... | |
99 | |
100 package Bio::Das::SegmentI; | |
101 use strict; | |
102 | |
103 use vars qw(@ISA $VERSION); | |
104 use Bio::Root::RootI; | |
105 | |
106 # Object preamble - inherits from Bio::Root::RootI; | |
107 @ISA = qw(Bio::Root::RootI); | |
108 $VERSION = 1.00; | |
109 | |
110 =head2 seq_id | |
111 | |
112 Title : seq_id | |
113 Usage : $ref = $s->seq_id | |
114 Function: return the ID of the landmark | |
115 Returns : a string | |
116 Args : none | |
117 Status : Public | |
118 | |
119 =cut | |
120 | |
121 sub seq_id { shift->throw_not_implemented } | |
122 | |
123 =head2 start | |
124 | |
125 Title : start | |
126 Usage : $s->start | |
127 Function: start of segment | |
128 Returns : integer | |
129 Args : none | |
130 Status : Public | |
131 | |
132 This is a read-only accessor for the start of the segment. Alias | |
133 to low() for Gadfly compatibility. | |
134 | |
135 =cut | |
136 | |
137 sub start { shift->throw_not_implemented } | |
138 sub low { shift->start } | |
139 | |
140 =head2 end | |
141 | |
142 Title : end | |
143 Usage : $s->end | |
144 Function: end of segment | |
145 Returns : integer | |
146 Args : none | |
147 Status : Public | |
148 | |
149 This is a read-only accessor for the end of the segment. Alias to | |
150 high() for Gadfly compatibility. | |
151 | |
152 =cut | |
153 | |
154 sub end { shift->throw_not_implemented } | |
155 sub stop { shift->end } | |
156 sub high { shift->end } | |
157 | |
158 =head2 length | |
159 | |
160 Title : length | |
161 Usage : $s->length | |
162 Function: length of segment | |
163 Returns : integer | |
164 Args : none | |
165 Status : Public | |
166 | |
167 Returns the length of the segment. Always a positive number. | |
168 | |
169 =cut | |
170 | |
171 sub length { shift->throw_not_implemented; } | |
172 | |
173 =head2 seq | |
174 | |
175 Title : seq | |
176 Usage : $s->seq | |
177 Function: get the sequence string for this segment | |
178 Returns : a string | |
179 Args : none | |
180 Status : Public | |
181 | |
182 Returns the sequence for this segment as a simple string. | |
183 | |
184 =cut | |
185 | |
186 sub seq {shift->throw_not_implemented} | |
187 | |
188 =head2 ref | |
189 | |
190 Title : ref | |
191 Usage : $ref = $s->ref([$newlandmark]) | |
192 Function: get/set the reference landmark for addressing | |
193 Returns : a string | |
194 Args : none | |
195 Status : Public | |
196 | |
197 This method is used to examine/change the reference landmark used to | |
198 establish the coordinate system. By default, the landmark cannot be | |
199 changed and therefore this has the same effect as seq_id(). The new | |
200 landmark might be an ID, or another Das::SegmentI object. | |
201 | |
202 =cut | |
203 | |
204 sub ref { shift->seq_id } | |
205 *refseq = \&ref; | |
206 | |
207 =head2 absolute | |
208 | |
209 Title : absolute | |
210 Usage : $s->absolute([$new_value]) | |
211 Function: get/set absolute addressing mode | |
212 Returns : flag | |
213 Args : new flag (optional) | |
214 Status : Public | |
215 | |
216 Turn on and off absolute-addressing mode. In absolute addressing | |
217 mode, coordinates are relative to some underlying "top level" | |
218 coordinate system (such as a chromosome). ref() returns the identity | |
219 of the top level landmark, and start() and end() return locations | |
220 relative to that landmark. In relative addressing mode, coordinates | |
221 are relative to the landmark sequence specified at the time of segment | |
222 creation or later modified by the ref() method. | |
223 | |
224 The default is to return false and to do nothing in response to | |
225 attempts to set absolute addressing mode. | |
226 | |
227 =cut | |
228 | |
229 sub absolute { return } | |
230 | |
231 =head2 features | |
232 | |
233 Title : features | |
234 Usage : @features = $s->features(@args) | |
235 Function: get features that overlap this segment | |
236 Returns : a list of Bio::SeqFeatureI objects | |
237 Args : see below | |
238 Status : Public | |
239 | |
240 This method will find all features that intersect the segment in a | |
241 variety of ways and return a list of Bio::SeqFeatureI objects. The | |
242 feature locations will use coordinates relative to the reference | |
243 sequence in effect at the time that features() was called. | |
244 | |
245 The returned list can be limited to certain types, attributes or | |
246 range intersection modes. Types of range intersection are one of: | |
247 | |
248 "overlaps" the default | |
249 "contains" return features completely contained within the segment | |
250 "contained_in" return features that completely contain the segment | |
251 | |
252 Two types of argument lists are accepted. In the positional argument | |
253 form, the arguments are treated as a list of feature types. In the | |
254 named parameter form, the arguments are a series of -name=E<gt>value | |
255 pairs. | |
256 | |
257 Argument Description | |
258 -------- ------------ | |
259 | |
260 -types An array reference to type names in the format | |
261 "method:source" | |
262 | |
263 -attributes A hashref containing a set of attributes to match | |
264 | |
265 -rangetype One of "overlaps", "contains", or "contained_in". | |
266 | |
267 -iterator Return an iterator across the features. | |
268 | |
269 -callback A callback to invoke on each feature | |
270 | |
271 The -attributes argument is a hashref containing one or more | |
272 attributes to match against: | |
273 | |
274 -attributes => { Gene => 'abc-1', | |
275 Note => 'confirmed' } | |
276 | |
277 Attribute matching is simple string matching, and multiple attributes | |
278 are ANDed together. More complex filtering can be performed using the | |
279 -callback option (see below). | |
280 | |
281 If -iterator is true, then the method returns an object reference that | |
282 implements the next_seq() method. Each call to next_seq() returns a | |
283 new Bio::SeqFeatureI object. | |
284 | |
285 If -callback is passed a code reference, the code reference will be | |
286 invoked on each feature returned. The code will be passed two | |
287 arguments consisting of the current feature and the segment object | |
288 itself, and must return a true value. If the code returns a false | |
289 value, feature retrieval will be aborted. | |
290 | |
291 -callback and -iterator are mutually exclusive options. If -iterator | |
292 is defined, then -callback is ignored. | |
293 | |
294 NOTE: the following methods all build on top of features(), and do not | |
295 need to be explicitly implemented. | |
296 | |
297 overlapping_features() | |
298 contained_features() | |
299 contained_in() | |
300 get_feature_stream() | |
301 | |
302 =cut | |
303 | |
304 sub features {shift->throw_not_implemented} | |
305 | |
306 =head2 overlapping_features | |
307 | |
308 Title : overlapping_features | |
309 Usage : @features = $s->overlapping_features(@args) | |
310 Function: get features that overlap this segment | |
311 Returns : a list of Bio::SeqFeatureI objects | |
312 Args : see below | |
313 Status : Public | |
314 | |
315 This method is identical to features() except that it defaults to | |
316 finding overlapping features. | |
317 | |
318 =cut | |
319 | |
320 sub overlapping_features { | |
321 my $self = shift; | |
322 my @args = $_[0] !~ /^-/ ? (@_, -rangetype=>'overlaps') | |
323 : (-types=>\@_,-rangetype=>'overlaps'); | |
324 $self->features(@args); | |
325 } | |
326 | |
327 =head2 contained_features | |
328 | |
329 Title : contained_features | |
330 Usage : @features = $s->contained_features(@args) | |
331 Function: get features that are contained in this segment | |
332 Returns : a list of Bio::SeqFeatureI objects | |
333 Args : see below | |
334 Status : Public | |
335 | |
336 This method is identical to features() except that it defaults to | |
337 a range type of 'contained'. | |
338 | |
339 =cut | |
340 | |
341 sub contained_features { | |
342 my $self = shift; | |
343 my @args = $_[0] !~ /^-/ ? (@_, -rangetype=>'contained') | |
344 : (-types=>\@_,-rangetype=>'contained'); | |
345 $self->features(@args); | |
346 } | |
347 | |
348 =head2 contained_in | |
349 | |
350 Title : contained_in | |
351 Usage : @features = $s->contained_in(@args) | |
352 Function: get features that contain this segment | |
353 Returns : a list of Bio::SeqFeatureI objects | |
354 Args : see below | |
355 Status : Public | |
356 | |
357 This method is identical to features() except that it defaults to | |
358 a range type of 'contained_in'. | |
359 | |
360 =cut | |
361 | |
362 sub contained_in { | |
363 my $self = shift; | |
364 my @args = $_[0] !~ /^-/ ? (@_, -rangetype=>'contained_in') | |
365 : (-types=>\@_,-rangetype=>'contained_in'); | |
366 $self->features(@args); | |
367 } | |
368 | |
369 =head2 get_feature_stream | |
370 | |
371 Title : get_feature_stream | |
372 Usage : $iterator = $s->get_feature_stream(@args) | |
373 Function: get an iterator across the segment | |
374 Returns : an object that implements next_seq() | |
375 Args : see below | |
376 Status : Public | |
377 | |
378 This method is identical to features() except that it always generates | |
379 an iterator. | |
380 | |
381 NOTE: This is defined in the interface in terms of features(). You do not | |
382 have to implement it. | |
383 | |
384 =cut | |
385 | |
386 sub get_feature_stream { | |
387 my $self = shift; | |
388 my @args = defined $_[0] && $_[0] =~ /^-/ ? (@_, -iterator=>1) | |
389 : (-types=>\@_,-iterator=>1); | |
390 $self->features(@args); | |
391 } | |
392 | |
393 =head2 factory | |
394 | |
395 Title : factory | |
396 Usage : $factory = $s->factory | |
397 Function: return the segment factory | |
398 Returns : a Bio::DasI object | |
399 Args : see below | |
400 Status : Public | |
401 | |
402 This method returns a Bio::DasI object that can be used to fetch | |
403 more segments. This is typically the Bio::DasI object from which | |
404 the segment was originally generated. | |
405 | |
406 =cut | |
407 | |
408 #' | |
409 | |
410 sub factory {shift->throw_not_implemented} | |
411 | |
412 =head2 primary_tag | |
413 | |
414 Title : primary_tag | |
415 Usage : $tag = $s->primary_tag | |
416 Function: identifies the segment as type "DasSegment" | |
417 Returns : a string named "DasSegment" | |
418 Args : none | |
419 Status : Public, but see below | |
420 | |
421 This method provides Bio::Das::Segment objects with a primary_tag() | |
422 field that identifies them as being of type "DasSegment". This allows | |
423 the Bio::Graphics engine to render segments just like a feature in order | |
424 nis way useful. | |
425 | |
426 This does not need to be implemented. It is defined by the interface. | |
427 | |
428 =cut | |
429 | |
430 #' | |
431 | |
432 sub primary_tag {"DasSegment"} | |
433 | |
434 =head2 strand | |
435 | |
436 Title : strand | |
437 Usage : $strand = $s->strand | |
438 Function: identifies the segment strand as 0 | |
439 Returns : the number 0 | |
440 Args : none | |
441 Status : Public, but see below | |
442 | |
443 This method provides Bio::Das::Segment objects with a strand() field | |
444 that identifies it as being strandless. This allows the Bio::Graphics | |
445 engine to render segments just like a feature in order nis way useful. | |
446 | |
447 This does not need to be implemented. It is defined by the interface. | |
448 | |
449 =cut | |
450 | |
451 sub strand { 0 } | |
452 | |
453 1; |