Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Graphics/Feature.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 package Bio::Graphics::Feature; | |
2 | |
3 =head1 NAME | |
4 | |
5 Bio::Graphics::Feature - A simple feature object for use with Bio::Graphics::Panel | |
6 | |
7 =head1 SYNOPSIS | |
8 | |
9 use Bio::Graphics::Feature; | |
10 | |
11 # create a simple feature with no internal structure | |
12 $f = Bio::Graphics::Feature->new(-start => 1000, | |
13 -stop => 2000, | |
14 -type => 'transcript', | |
15 -name => 'alpha-1 antitrypsin', | |
16 -desc => 'an enzyme inhibitor', | |
17 ); | |
18 | |
19 # create a feature composed of multiple segments, all of type "similarity" | |
20 $f = Bio::Graphics::Feature->new(-segments => [[1000,1100],[1500,1550],[1800,2000]], | |
21 -name => 'ABC-3', | |
22 -type => 'gapped_alignment', | |
23 -subtype => 'similarity'); | |
24 | |
25 # build up a gene exon by exon | |
26 $e1 = Bio::Graphics::Feature->new(-start=>1,-stop=>100,-type=>'exon'); | |
27 $e2 = Bio::Graphics::Feature->new(-start=>150,-stop=>200,-type=>'exon'); | |
28 $e3 = Bio::Graphics::Feature->new(-start=>300,-stop=>500,-type=>'exon'); | |
29 $f = Bio::Graphics::Feature->new(-segments=>[$e1,$e2,$e3],-type=>'gene'); | |
30 | |
31 =head1 DESCRIPTION | |
32 | |
33 This is a simple Bio::SeqFeatureI-compliant object that is compatible | |
34 with Bio::Graphics::Panel. With it you can create lightweight feature | |
35 objects for drawing. | |
36 | |
37 All methods are as described in L<Bio::SeqFeatureI> with the following additions: | |
38 | |
39 =head2 The new() Constructor | |
40 | |
41 $feature = Bio::Graphics::Feature->new(@args); | |
42 | |
43 This method creates a new feature object. You can create a simple | |
44 feature that contains no subfeatures, or a hierarchically nested object. | |
45 | |
46 Arguments are as follows: | |
47 | |
48 -start the start position of the feature | |
49 -end the stop position of the feature | |
50 -stop an alias for end | |
51 -name the feature name (returned by seqname()) | |
52 -type the feature type (returned by primary_tag()) | |
53 -source the source tag | |
54 -desc a description of the feature | |
55 -segments a list of subfeatures (see below) | |
56 -subtype the type to use when creating subfeatures | |
57 -strand the strand of the feature (one of -1, 0 or +1) | |
58 -id an alias for -name | |
59 -seqname an alias for -name | |
60 -primary_id an alias for -name | |
61 -display_id an alias for -name | |
62 -display_name an alias for -name (do you get the idea the API has changed?) | |
63 -attributes a hashref of tag value attributes, in which the key is the tag | |
64 and the value is an array reference of values | |
65 -factory a reference to a feature factory, used for compatibility with | |
66 more obscure parts of Bio::DB::GFF | |
67 | |
68 The subfeatures passed in -segments may be an array of | |
69 Bio::Graphics::Feature objects, or an array of [$start,$stop] | |
70 pairs. Each pair should be a two-element array reference. In the | |
71 latter case, the feature type passed in -subtype will be used when | |
72 creating the subfeatures. | |
73 | |
74 If no feature type is passed, then it defaults to "feature". | |
75 | |
76 =head2 Non-SeqFeatureI methods | |
77 | |
78 A number of new methods are provided for compatibility with | |
79 Ace::Sequence, which has a slightly different API from SeqFeatureI: | |
80 | |
81 =over 4 | |
82 | |
83 =item add_segment(@segments) | |
84 | |
85 Add one or more segments (a subfeature). Segments can either be | |
86 Feature objects, or [start,stop] arrays, as in the -segments argument | |
87 to new(). The feature endpoints are automatically adjusted. | |
88 | |
89 =item segments() | |
90 | |
91 An alias for sub_SeqFeature(). | |
92 | |
93 =item merged_segments() | |
94 | |
95 Another alias for sub_SeqFeature(). | |
96 | |
97 =item stop() | |
98 | |
99 An alias for end(). | |
100 | |
101 =item name() | |
102 | |
103 An alias for seqname(). | |
104 | |
105 =item exons() | |
106 | |
107 An alias for sub_SeqFeature() (you don't want to know why!) | |
108 | |
109 =back | |
110 | |
111 =cut | |
112 | |
113 use strict; | |
114 use Bio::Root::Root; | |
115 use Bio::SeqFeatureI; | |
116 use Bio::SeqI; | |
117 use Bio::LocationI; | |
118 | |
119 use vars '@ISA'; | |
120 @ISA = qw(Bio::Root::Root Bio::SeqFeatureI Bio::LocationI Bio::SeqI); | |
121 | |
122 *stop = \&end; | |
123 *info = \&name; | |
124 *seqname = \&name; | |
125 *type = \&primary_tag; | |
126 *exons = *sub_SeqFeature = *merged_segments = \&segments; | |
127 *method = \&type; | |
128 *source = \&source_tag; | |
129 | |
130 sub target { return; } | |
131 sub hit { return; } | |
132 | |
133 # usage: | |
134 # Bio::Graphics::Feature->new( | |
135 # -start => 1, | |
136 # -end => 100, | |
137 # -name => 'fred feature', | |
138 # -strand => +1); | |
139 # | |
140 # Alternatively, use -segments => [ [start,stop],[start,stop]...] | |
141 # to create a multisegmented feature. | |
142 sub new { | |
143 my $class= shift; | |
144 $class = ref($class) if ref $class; | |
145 my %arg = @_; | |
146 | |
147 my $self = bless {},$class; | |
148 | |
149 $arg{-strand} ||= 0; | |
150 $self->{strand} = $arg{-strand} ? ($arg{-strand} >= 0 ? +1 : -1) : 0; | |
151 $self->{name} = $arg{-name} || $arg{-seqname} || $arg{-display_id} | |
152 || $arg{-display_name} || $arg{-id} || $arg{-primary_id}; | |
153 $self->{type} = $arg{-type} || 'feature'; | |
154 $self->{subtype} = $arg{-subtype} if exists $arg{-subtype}; | |
155 $self->{source} = $arg{-source} || $arg{-source_tag} || ''; | |
156 $self->{score} = $arg{-score} if exists $arg{-score}; | |
157 $self->{start} = $arg{-start}; | |
158 $self->{stop} = $arg{-end} || $arg{-stop}; | |
159 $self->{ref} = $arg{-ref}; | |
160 $self->{class} = $arg{-class} if exists $arg{-class}; | |
161 $self->{url} = $arg{-url} if exists $arg{-url}; | |
162 $self->{seq} = $arg{-seq} if exists $arg{-seq}; | |
163 $self->{phase} = $arg{-phase} if exists $arg{-phase}; | |
164 $self->{desc} = $arg{-desc} if exists $arg{-desc}; | |
165 $self->{attrib} = $arg{-attributes} if exists $arg{-attributes}; | |
166 $self->{factory} = $arg{-factory} if exists $arg{-factory}; | |
167 | |
168 # fix start, stop | |
169 if (defined $self->{stop} && defined $self->{start} | |
170 && $self->{stop} < $self->{start}) { | |
171 @{$self}{'start','stop'} = @{$self}{'stop','start'}; | |
172 $self->{strand} *= -1; | |
173 } | |
174 | |
175 my @segments; | |
176 if (my $s = $arg{-segments}) { | |
177 $self->add_segment(@$s); | |
178 } | |
179 $self; | |
180 } | |
181 | |
182 sub add_segment { | |
183 my $self = shift; | |
184 my $type = $self->{subtype} || $self->{type}; | |
185 $self->{segments} ||= []; | |
186 | |
187 my @segments = @{$self->{segments}}; | |
188 | |
189 for my $seg (@_) { | |
190 if (ref($seg) eq 'ARRAY') { | |
191 my ($start,$stop) = @{$seg}; | |
192 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us | |
193 my $strand = $self->{strand}; | |
194 | |
195 if ($start > $stop) { | |
196 ($start,$stop) = ($stop,$start); | |
197 # $strand *= -1; | |
198 $strand = -1; | |
199 } | |
200 push @segments,$self->new(-start => $start, | |
201 -stop => $stop, | |
202 -strand => $strand, | |
203 -type => $type); | |
204 } else { | |
205 push @segments,$seg; | |
206 } | |
207 } | |
208 if (@segments) { | |
209 local $^W = 0; # some warning of an uninitialized variable... | |
210 $self->{segments} = [ sort {$a->start <=> $b->start } @segments ]; | |
211 $self->{start} = $self->{segments}[0]->start; | |
212 ($self->{stop}) = sort { $b <=> $a } map { $_->end } @segments; | |
213 } | |
214 } | |
215 | |
216 sub segments { | |
217 my $self = shift; | |
218 my $s = $self->{segments} or return wantarray ? () : 0; | |
219 @$s; | |
220 } | |
221 sub score { | |
222 my $self = shift; | |
223 my $d = $self->{score}; | |
224 $self->{score} = shift if @_; | |
225 $d; | |
226 } | |
227 sub primary_tag { shift->{type} } | |
228 sub name { | |
229 my $self = shift; | |
230 my $d = $self->{name}; | |
231 $self->{name} = shift if @_; | |
232 $d; | |
233 } | |
234 sub seq_id { shift->ref() } | |
235 sub ref { | |
236 my $self = shift; | |
237 my $d = $self->{ref}; | |
238 $self->{ref} = shift if @_; | |
239 $d; | |
240 } | |
241 sub start { | |
242 my $self = shift; | |
243 my $d = $self->{start}; | |
244 $self->{start} = shift if @_; | |
245 $d; | |
246 } | |
247 sub end { | |
248 my $self = shift; | |
249 my $d = $self->{stop}; | |
250 $self->{stop} = shift if @_; | |
251 $d; | |
252 } | |
253 sub strand { | |
254 my $self = shift; | |
255 my $d = $self->{strand}; | |
256 $self->{strand} = shift if @_; | |
257 $d; | |
258 } | |
259 sub length { | |
260 my $self = shift; | |
261 return $self->end - $self->start + 1; | |
262 } | |
263 | |
264 sub seq { | |
265 my $self = shift; | |
266 my $dna = exists $self->{seq} ? $self->{seq} : ''; | |
267 # $dna .= 'n' x ($self->length - CORE::length($dna)); | |
268 return $dna; | |
269 } | |
270 *dna = \&seq; | |
271 | |
272 =head2 factory | |
273 | |
274 Title : factory | |
275 Usage : $factory = $obj->factory([$new_factory]) | |
276 Function: Returns the feature factory from which this feature was generated. | |
277 Mostly for compatibility with weird dependencies in gbrowse. | |
278 Returns : A feature factory | |
279 Args : None | |
280 | |
281 =cut | |
282 | |
283 sub factory { | |
284 my $self = shift; | |
285 my $d = $self->{factory}; | |
286 $self->{factory} = shift if @_; | |
287 $d; | |
288 } | |
289 | |
290 =head2 display_name | |
291 | |
292 Title : display_name | |
293 Usage : $id = $obj->display_name or $obj->display_name($newid); | |
294 Function: Gets or sets the display id, also known as the common name of | |
295 the Seq object. | |
296 | |
297 The semantics of this is that it is the most likely string | |
298 to be used as an identifier of the sequence, and likely to | |
299 have "human" readability. The id is equivalent to the LOCUS | |
300 field of the GenBank/EMBL databanks and the ID field of the | |
301 Swissprot/sptrembl database. In fasta format, the >(\S+) is | |
302 presumed to be the id, though some people overload the id | |
303 to embed other information. Bioperl does not use any | |
304 embedded information in the ID field, and people are | |
305 encouraged to use other mechanisms (accession field for | |
306 example, or extending the sequence object) to solve this. | |
307 | |
308 Notice that $seq->id() maps to this function, mainly for | |
309 legacy/convenience issues. | |
310 Returns : A string | |
311 Args : None or a new id | |
312 | |
313 | |
314 =cut | |
315 | |
316 sub display_name { shift->name } | |
317 | |
318 *display_id = \&display_name; | |
319 | |
320 =head2 accession_number | |
321 | |
322 Title : accession_number | |
323 Usage : $unique_biological_key = $obj->accession_number; | |
324 Function: Returns the unique biological id for a sequence, commonly | |
325 called the accession_number. For sequences from established | |
326 databases, the implementors should try to use the correct | |
327 accession number. Notice that primary_id() provides the | |
328 unique id for the implemetation, allowing multiple objects | |
329 to have the same accession number in a particular implementation. | |
330 | |
331 For sequences with no accession number, this method should return | |
332 "unknown". | |
333 Returns : A string | |
334 Args : None | |
335 | |
336 | |
337 =cut | |
338 | |
339 sub accession_number { | |
340 return 'unknown'; | |
341 } | |
342 | |
343 =head2 alphabet | |
344 | |
345 Title : alphabet | |
346 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | |
347 Function: Returns the type of sequence being one of | |
348 'dna', 'rna' or 'protein'. This is case sensitive. | |
349 | |
350 This is not called <type> because this would cause | |
351 upgrade problems from the 0.5 and earlier Seq objects. | |
352 | |
353 Returns : a string either 'dna','rna','protein'. NB - the object must | |
354 make a call of the type - if there is no type specified it | |
355 has to guess. | |
356 Args : none | |
357 Status : Virtual | |
358 | |
359 | |
360 =cut | |
361 | |
362 sub alphabet{ | |
363 return 'dna'; # no way this will be anything other than dna! | |
364 } | |
365 | |
366 | |
367 | |
368 =head2 desc | |
369 | |
370 Title : desc | |
371 Usage : $seqobj->desc($string) or $seqobj->desc() | |
372 Function: Sets or gets the description of the sequence | |
373 Example : | |
374 Returns : The description | |
375 Args : The description or none | |
376 | |
377 | |
378 =cut | |
379 | |
380 sub desc { | |
381 my $self = shift; | |
382 my $d = $self->{desc}; | |
383 $self->{desc} = shift if @_; | |
384 $d; | |
385 } | |
386 | |
387 sub notes { | |
388 return shift->desc; | |
389 } | |
390 | |
391 sub low { | |
392 my $self = shift; | |
393 return $self->start < $self->end ? $self->start : $self->end; | |
394 } | |
395 | |
396 sub high { | |
397 my $self = shift; | |
398 return $self->start > $self->end ? $self->start : $self->end; | |
399 } | |
400 | |
401 =head2 location | |
402 | |
403 Title : location | |
404 Usage : my $location = $seqfeature->location() | |
405 Function: returns a location object suitable for identifying location | |
406 of feature on sequence or parent feature | |
407 Returns : Bio::LocationI object | |
408 Args : none | |
409 | |
410 =cut | |
411 | |
412 sub location { | |
413 my $self = shift; | |
414 require Bio::Location::Split unless Bio::Location::Split->can('new'); | |
415 my $location; | |
416 if (my @segments = $self->segments) { | |
417 $location = Bio::Location::Split->new(); | |
418 foreach (@segments) { | |
419 $location->add_sub_Location($_); | |
420 } | |
421 } else { | |
422 $location = $self; | |
423 } | |
424 $location; | |
425 } | |
426 | |
427 sub coordinate_policy { | |
428 require Bio::Location::WidestCoordPolicy unless Bio::Location::WidestCoordPolicy->can('new'); | |
429 return Bio::Location::WidestCoordPolicy->new(); | |
430 } | |
431 | |
432 sub min_start { shift->low } | |
433 sub max_start { shift->low } | |
434 sub min_end { shift->high } | |
435 sub max_end { shift->high} | |
436 sub start_pos_type { 'EXACT' } | |
437 sub end_pos_type { 'EXACT' } | |
438 sub to_FTstring { | |
439 my $self = shift; | |
440 my $low = $self->min_start; | |
441 my $high = $self->max_end; | |
442 return "$low..$high"; | |
443 } | |
444 sub phase { shift->{phase} } | |
445 sub class { | |
446 my $self = shift; | |
447 my $d = $self->{class}; | |
448 $self->{class} = shift if @_; | |
449 return defined($d) ? $d : ucfirst $self->method; | |
450 } | |
451 | |
452 sub gff_string { | |
453 my $self = shift; | |
454 my $name = $self->name; | |
455 my $class = $self->class; | |
456 my $group = "$class $name" if $name; | |
457 my $string; | |
458 $string .= join("\t",$self->ref,$self->source||'.',$self->method||'.', | |
459 $self->start,$self->stop, | |
460 $self->score||'.',$self->strand||'.',$self->phase||'.', | |
461 $group); | |
462 $string .= "\n"; | |
463 foreach ($self->sub_SeqFeature) { | |
464 # add missing data if we need it | |
465 $_->ref($self->ref) unless defined $_->ref; | |
466 $_->name($self->name); | |
467 $_->class($self->class); | |
468 $string .= $_->gff_string; | |
469 } | |
470 $string; | |
471 } | |
472 | |
473 | |
474 sub db { return } | |
475 | |
476 sub source_tag { | |
477 my $self = shift; | |
478 my $d = $self->{source}; | |
479 $self->{source} = shift if @_; | |
480 $d; | |
481 } | |
482 | |
483 # This probably should be deleted. Not sure why it's here, but might | |
484 # have been added for Ace::Sequence::Feature-compliance. | |
485 sub introns { | |
486 my $self = shift; | |
487 return; | |
488 } | |
489 | |
490 sub has_tag { } | |
491 | |
492 # get/set the configurator (Bio::Graphics::FeatureFile) for this feature | |
493 sub configurator { | |
494 my $self = shift; | |
495 my $d = $self->{configurator}; | |
496 $self->{configurator} = shift if @_; | |
497 $d; | |
498 } | |
499 | |
500 # get/set the url for this feature | |
501 sub url { | |
502 my $self = shift; | |
503 my $d = $self->{url}; | |
504 $self->{url} = shift if @_; | |
505 $d; | |
506 } | |
507 | |
508 # make a link | |
509 sub make_link { | |
510 my $self = shift; | |
511 if (my $url = $self->url) { | |
512 return $url; | |
513 } | |
514 | |
515 elsif (my $configurator = $self->configurator) { | |
516 return $configurator->make_link($self); | |
517 } | |
518 | |
519 else { | |
520 return; | |
521 } | |
522 } | |
523 | |
524 sub all_tags { | |
525 my $self = shift; | |
526 return keys %{$self->{attrib}}; | |
527 } | |
528 sub each_tag_value { | |
529 my $self = shift; | |
530 my $tag = shift; | |
531 my $value = $self->{attrib}{$tag} or return; | |
532 return CORE::ref $value ? @{$self->{attrib}{$tag}} | |
533 : $self->{attrib}{$tag}; | |
534 } | |
535 | |
536 sub DESTROY { } | |
537 | |
538 1; | |
539 | |
540 __END__ | |
541 | |
542 =head1 SEE ALSO | |
543 | |
544 L<Bio::Graphics::Panel>,L<Bio::Graphics::Glyph>, | |
545 L<GD> | |
546 | |
547 =head1 AUTHOR | |
548 | |
549 Lincoln Stein E<lt>lstein@cshl.orgE<gt>. | |
550 | |
551 Copyright (c) 2001 Cold Spring Harbor Laboratory | |
552 | |
553 This library is free software; you can redistribute it and/or modify | |
554 it under the same terms as Perl itself. See DISCLAIMER.txt for | |
555 disclaimers of warranty. | |
556 | |
557 =cut |