0
|
1 package Bio::Graphics::Feature;
|
|
2
|
|
3 =head1 NAME
|
|
4
|
|
5 Bio::Graphics::Feature - A simple feature object for use with Bio::Graphics::Panel
|
|
6
|
|
7 =head1 SYNOPSIS
|
|
8
|
|
9 use Bio::Graphics::Feature;
|
|
10
|
|
11 # create a simple feature with no internal structure
|
|
12 $f = Bio::Graphics::Feature->new(-start => 1000,
|
|
13 -stop => 2000,
|
|
14 -type => 'transcript',
|
|
15 -name => 'alpha-1 antitrypsin',
|
|
16 -desc => 'an enzyme inhibitor',
|
|
17 );
|
|
18
|
|
19 # create a feature composed of multiple segments, all of type "similarity"
|
|
20 $f = Bio::Graphics::Feature->new(-segments => [[1000,1100],[1500,1550],[1800,2000]],
|
|
21 -name => 'ABC-3',
|
|
22 -type => 'gapped_alignment',
|
|
23 -subtype => 'similarity');
|
|
24
|
|
25 # build up a gene exon by exon
|
|
26 $e1 = Bio::Graphics::Feature->new(-start=>1,-stop=>100,-type=>'exon');
|
|
27 $e2 = Bio::Graphics::Feature->new(-start=>150,-stop=>200,-type=>'exon');
|
|
28 $e3 = Bio::Graphics::Feature->new(-start=>300,-stop=>500,-type=>'exon');
|
|
29 $f = Bio::Graphics::Feature->new(-segments=>[$e1,$e2,$e3],-type=>'gene');
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 This is a simple Bio::SeqFeatureI-compliant object that is compatible
|
|
34 with Bio::Graphics::Panel. With it you can create lightweight feature
|
|
35 objects for drawing.
|
|
36
|
|
37 All methods are as described in L<Bio::SeqFeatureI> with the following additions:
|
|
38
|
|
39 =head2 The new() Constructor
|
|
40
|
|
41 $feature = Bio::Graphics::Feature->new(@args);
|
|
42
|
|
43 This method creates a new feature object. You can create a simple
|
|
44 feature that contains no subfeatures, or a hierarchically nested object.
|
|
45
|
|
46 Arguments are as follows:
|
|
47
|
|
48 -start the start position of the feature
|
|
49 -end the stop position of the feature
|
|
50 -stop an alias for end
|
|
51 -name the feature name (returned by seqname())
|
|
52 -type the feature type (returned by primary_tag())
|
|
53 -source the source tag
|
|
54 -desc a description of the feature
|
|
55 -segments a list of subfeatures (see below)
|
|
56 -subtype the type to use when creating subfeatures
|
|
57 -strand the strand of the feature (one of -1, 0 or +1)
|
|
58 -id an alias for -name
|
|
59 -seqname an alias for -name
|
|
60 -primary_id an alias for -name
|
|
61 -display_id an alias for -name
|
|
62 -display_name an alias for -name (do you get the idea the API has changed?)
|
|
63 -attributes a hashref of tag value attributes, in which the key is the tag
|
|
64 and the value is an array reference of values
|
|
65 -factory a reference to a feature factory, used for compatibility with
|
|
66 more obscure parts of Bio::DB::GFF
|
|
67
|
|
68 The subfeatures passed in -segments may be an array of
|
|
69 Bio::Graphics::Feature objects, or an array of [$start,$stop]
|
|
70 pairs. Each pair should be a two-element array reference. In the
|
|
71 latter case, the feature type passed in -subtype will be used when
|
|
72 creating the subfeatures.
|
|
73
|
|
74 If no feature type is passed, then it defaults to "feature".
|
|
75
|
|
76 =head2 Non-SeqFeatureI methods
|
|
77
|
|
78 A number of new methods are provided for compatibility with
|
|
79 Ace::Sequence, which has a slightly different API from SeqFeatureI:
|
|
80
|
|
81 =over 4
|
|
82
|
|
83 =item add_segment(@segments)
|
|
84
|
|
85 Add one or more segments (a subfeature). Segments can either be
|
|
86 Feature objects, or [start,stop] arrays, as in the -segments argument
|
|
87 to new(). The feature endpoints are automatically adjusted.
|
|
88
|
|
89 =item segments()
|
|
90
|
|
91 An alias for sub_SeqFeature().
|
|
92
|
|
93 =item merged_segments()
|
|
94
|
|
95 Another alias for sub_SeqFeature().
|
|
96
|
|
97 =item stop()
|
|
98
|
|
99 An alias for end().
|
|
100
|
|
101 =item name()
|
|
102
|
|
103 An alias for seqname().
|
|
104
|
|
105 =item exons()
|
|
106
|
|
107 An alias for sub_SeqFeature() (you don't want to know why!)
|
|
108
|
|
109 =back
|
|
110
|
|
111 =cut
|
|
112
|
|
113 use strict;
|
|
114 use Bio::Root::Root;
|
|
115 use Bio::SeqFeatureI;
|
|
116 use Bio::SeqI;
|
|
117 use Bio::LocationI;
|
|
118
|
|
119 use vars '@ISA';
|
|
120 @ISA = qw(Bio::Root::Root Bio::SeqFeatureI Bio::LocationI Bio::SeqI);
|
|
121
|
|
122 *stop = \&end;
|
|
123 *info = \&name;
|
|
124 *seqname = \&name;
|
|
125 *type = \&primary_tag;
|
|
126 *exons = *sub_SeqFeature = *merged_segments = \&segments;
|
|
127 *method = \&type;
|
|
128 *source = \&source_tag;
|
|
129
|
|
130 sub target { return; }
|
|
131 sub hit { return; }
|
|
132
|
|
133 # usage:
|
|
134 # Bio::Graphics::Feature->new(
|
|
135 # -start => 1,
|
|
136 # -end => 100,
|
|
137 # -name => 'fred feature',
|
|
138 # -strand => +1);
|
|
139 #
|
|
140 # Alternatively, use -segments => [ [start,stop],[start,stop]...]
|
|
141 # to create a multisegmented feature.
|
|
142 sub new {
|
|
143 my $class= shift;
|
|
144 $class = ref($class) if ref $class;
|
|
145 my %arg = @_;
|
|
146
|
|
147 my $self = bless {},$class;
|
|
148
|
|
149 $arg{-strand} ||= 0;
|
|
150 $self->{strand} = $arg{-strand} ? ($arg{-strand} >= 0 ? +1 : -1) : 0;
|
|
151 $self->{name} = $arg{-name} || $arg{-seqname} || $arg{-display_id}
|
|
152 || $arg{-display_name} || $arg{-id} || $arg{-primary_id};
|
|
153 $self->{type} = $arg{-type} || 'feature';
|
|
154 $self->{subtype} = $arg{-subtype} if exists $arg{-subtype};
|
|
155 $self->{source} = $arg{-source} || $arg{-source_tag} || '';
|
|
156 $self->{score} = $arg{-score} if exists $arg{-score};
|
|
157 $self->{start} = $arg{-start};
|
|
158 $self->{stop} = $arg{-end} || $arg{-stop};
|
|
159 $self->{ref} = $arg{-ref};
|
|
160 $self->{class} = $arg{-class} if exists $arg{-class};
|
|
161 $self->{url} = $arg{-url} if exists $arg{-url};
|
|
162 $self->{seq} = $arg{-seq} if exists $arg{-seq};
|
|
163 $self->{phase} = $arg{-phase} if exists $arg{-phase};
|
|
164 $self->{desc} = $arg{-desc} if exists $arg{-desc};
|
|
165 $self->{attrib} = $arg{-attributes} if exists $arg{-attributes};
|
|
166 $self->{factory} = $arg{-factory} if exists $arg{-factory};
|
|
167
|
|
168 # fix start, stop
|
|
169 if (defined $self->{stop} && defined $self->{start}
|
|
170 && $self->{stop} < $self->{start}) {
|
|
171 @{$self}{'start','stop'} = @{$self}{'stop','start'};
|
|
172 $self->{strand} *= -1;
|
|
173 }
|
|
174
|
|
175 my @segments;
|
|
176 if (my $s = $arg{-segments}) {
|
|
177 $self->add_segment(@$s);
|
|
178 }
|
|
179 $self;
|
|
180 }
|
|
181
|
|
182 sub add_segment {
|
|
183 my $self = shift;
|
|
184 my $type = $self->{subtype} || $self->{type};
|
|
185 $self->{segments} ||= [];
|
|
186
|
|
187 my @segments = @{$self->{segments}};
|
|
188
|
|
189 for my $seg (@_) {
|
|
190 if (ref($seg) eq 'ARRAY') {
|
|
191 my ($start,$stop) = @{$seg};
|
|
192 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us
|
|
193 my $strand = $self->{strand};
|
|
194
|
|
195 if ($start > $stop) {
|
|
196 ($start,$stop) = ($stop,$start);
|
|
197 # $strand *= -1;
|
|
198 $strand = -1;
|
|
199 }
|
|
200 push @segments,$self->new(-start => $start,
|
|
201 -stop => $stop,
|
|
202 -strand => $strand,
|
|
203 -type => $type);
|
|
204 } else {
|
|
205 push @segments,$seg;
|
|
206 }
|
|
207 }
|
|
208 if (@segments) {
|
|
209 local $^W = 0; # some warning of an uninitialized variable...
|
|
210 $self->{segments} = [ sort {$a->start <=> $b->start } @segments ];
|
|
211 $self->{start} = $self->{segments}[0]->start;
|
|
212 ($self->{stop}) = sort { $b <=> $a } map { $_->end } @segments;
|
|
213 }
|
|
214 }
|
|
215
|
|
216 sub segments {
|
|
217 my $self = shift;
|
|
218 my $s = $self->{segments} or return wantarray ? () : 0;
|
|
219 @$s;
|
|
220 }
|
|
221 sub score {
|
|
222 my $self = shift;
|
|
223 my $d = $self->{score};
|
|
224 $self->{score} = shift if @_;
|
|
225 $d;
|
|
226 }
|
|
227 sub primary_tag { shift->{type} }
|
|
228 sub name {
|
|
229 my $self = shift;
|
|
230 my $d = $self->{name};
|
|
231 $self->{name} = shift if @_;
|
|
232 $d;
|
|
233 }
|
|
234 sub seq_id { shift->ref() }
|
|
235 sub ref {
|
|
236 my $self = shift;
|
|
237 my $d = $self->{ref};
|
|
238 $self->{ref} = shift if @_;
|
|
239 $d;
|
|
240 }
|
|
241 sub start {
|
|
242 my $self = shift;
|
|
243 my $d = $self->{start};
|
|
244 $self->{start} = shift if @_;
|
|
245 $d;
|
|
246 }
|
|
247 sub end {
|
|
248 my $self = shift;
|
|
249 my $d = $self->{stop};
|
|
250 $self->{stop} = shift if @_;
|
|
251 $d;
|
|
252 }
|
|
253 sub strand {
|
|
254 my $self = shift;
|
|
255 my $d = $self->{strand};
|
|
256 $self->{strand} = shift if @_;
|
|
257 $d;
|
|
258 }
|
|
259 sub length {
|
|
260 my $self = shift;
|
|
261 return $self->end - $self->start + 1;
|
|
262 }
|
|
263
|
|
264 sub seq {
|
|
265 my $self = shift;
|
|
266 my $dna = exists $self->{seq} ? $self->{seq} : '';
|
|
267 # $dna .= 'n' x ($self->length - CORE::length($dna));
|
|
268 return $dna;
|
|
269 }
|
|
270 *dna = \&seq;
|
|
271
|
|
272 =head2 factory
|
|
273
|
|
274 Title : factory
|
|
275 Usage : $factory = $obj->factory([$new_factory])
|
|
276 Function: Returns the feature factory from which this feature was generated.
|
|
277 Mostly for compatibility with weird dependencies in gbrowse.
|
|
278 Returns : A feature factory
|
|
279 Args : None
|
|
280
|
|
281 =cut
|
|
282
|
|
283 sub factory {
|
|
284 my $self = shift;
|
|
285 my $d = $self->{factory};
|
|
286 $self->{factory} = shift if @_;
|
|
287 $d;
|
|
288 }
|
|
289
|
|
290 =head2 display_name
|
|
291
|
|
292 Title : display_name
|
|
293 Usage : $id = $obj->display_name or $obj->display_name($newid);
|
|
294 Function: Gets or sets the display id, also known as the common name of
|
|
295 the Seq object.
|
|
296
|
|
297 The semantics of this is that it is the most likely string
|
|
298 to be used as an identifier of the sequence, and likely to
|
|
299 have "human" readability. The id is equivalent to the LOCUS
|
|
300 field of the GenBank/EMBL databanks and the ID field of the
|
|
301 Swissprot/sptrembl database. In fasta format, the >(\S+) is
|
|
302 presumed to be the id, though some people overload the id
|
|
303 to embed other information. Bioperl does not use any
|
|
304 embedded information in the ID field, and people are
|
|
305 encouraged to use other mechanisms (accession field for
|
|
306 example, or extending the sequence object) to solve this.
|
|
307
|
|
308 Notice that $seq->id() maps to this function, mainly for
|
|
309 legacy/convenience issues.
|
|
310 Returns : A string
|
|
311 Args : None or a new id
|
|
312
|
|
313
|
|
314 =cut
|
|
315
|
|
316 sub display_name { shift->name }
|
|
317
|
|
318 *display_id = \&display_name;
|
|
319
|
|
320 =head2 accession_number
|
|
321
|
|
322 Title : accession_number
|
|
323 Usage : $unique_biological_key = $obj->accession_number;
|
|
324 Function: Returns the unique biological id for a sequence, commonly
|
|
325 called the accession_number. For sequences from established
|
|
326 databases, the implementors should try to use the correct
|
|
327 accession number. Notice that primary_id() provides the
|
|
328 unique id for the implemetation, allowing multiple objects
|
|
329 to have the same accession number in a particular implementation.
|
|
330
|
|
331 For sequences with no accession number, this method should return
|
|
332 "unknown".
|
|
333 Returns : A string
|
|
334 Args : None
|
|
335
|
|
336
|
|
337 =cut
|
|
338
|
|
339 sub accession_number {
|
|
340 return 'unknown';
|
|
341 }
|
|
342
|
|
343 =head2 alphabet
|
|
344
|
|
345 Title : alphabet
|
|
346 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
|
|
347 Function: Returns the type of sequence being one of
|
|
348 'dna', 'rna' or 'protein'. This is case sensitive.
|
|
349
|
|
350 This is not called <type> because this would cause
|
|
351 upgrade problems from the 0.5 and earlier Seq objects.
|
|
352
|
|
353 Returns : a string either 'dna','rna','protein'. NB - the object must
|
|
354 make a call of the type - if there is no type specified it
|
|
355 has to guess.
|
|
356 Args : none
|
|
357 Status : Virtual
|
|
358
|
|
359
|
|
360 =cut
|
|
361
|
|
362 sub alphabet{
|
|
363 return 'dna'; # no way this will be anything other than dna!
|
|
364 }
|
|
365
|
|
366
|
|
367
|
|
368 =head2 desc
|
|
369
|
|
370 Title : desc
|
|
371 Usage : $seqobj->desc($string) or $seqobj->desc()
|
|
372 Function: Sets or gets the description of the sequence
|
|
373 Example :
|
|
374 Returns : The description
|
|
375 Args : The description or none
|
|
376
|
|
377
|
|
378 =cut
|
|
379
|
|
380 sub desc {
|
|
381 my $self = shift;
|
|
382 my $d = $self->{desc};
|
|
383 $self->{desc} = shift if @_;
|
|
384 $d;
|
|
385 }
|
|
386
|
|
387 sub notes {
|
|
388 return shift->desc;
|
|
389 }
|
|
390
|
|
391 sub low {
|
|
392 my $self = shift;
|
|
393 return $self->start < $self->end ? $self->start : $self->end;
|
|
394 }
|
|
395
|
|
396 sub high {
|
|
397 my $self = shift;
|
|
398 return $self->start > $self->end ? $self->start : $self->end;
|
|
399 }
|
|
400
|
|
401 =head2 location
|
|
402
|
|
403 Title : location
|
|
404 Usage : my $location = $seqfeature->location()
|
|
405 Function: returns a location object suitable for identifying location
|
|
406 of feature on sequence or parent feature
|
|
407 Returns : Bio::LocationI object
|
|
408 Args : none
|
|
409
|
|
410 =cut
|
|
411
|
|
412 sub location {
|
|
413 my $self = shift;
|
|
414 require Bio::Location::Split unless Bio::Location::Split->can('new');
|
|
415 my $location;
|
|
416 if (my @segments = $self->segments) {
|
|
417 $location = Bio::Location::Split->new();
|
|
418 foreach (@segments) {
|
|
419 $location->add_sub_Location($_);
|
|
420 }
|
|
421 } else {
|
|
422 $location = $self;
|
|
423 }
|
|
424 $location;
|
|
425 }
|
|
426
|
|
427 sub coordinate_policy {
|
|
428 require Bio::Location::WidestCoordPolicy unless Bio::Location::WidestCoordPolicy->can('new');
|
|
429 return Bio::Location::WidestCoordPolicy->new();
|
|
430 }
|
|
431
|
|
432 sub min_start { shift->low }
|
|
433 sub max_start { shift->low }
|
|
434 sub min_end { shift->high }
|
|
435 sub max_end { shift->high}
|
|
436 sub start_pos_type { 'EXACT' }
|
|
437 sub end_pos_type { 'EXACT' }
|
|
438 sub to_FTstring {
|
|
439 my $self = shift;
|
|
440 my $low = $self->min_start;
|
|
441 my $high = $self->max_end;
|
|
442 return "$low..$high";
|
|
443 }
|
|
444 sub phase { shift->{phase} }
|
|
445 sub class {
|
|
446 my $self = shift;
|
|
447 my $d = $self->{class};
|
|
448 $self->{class} = shift if @_;
|
|
449 return defined($d) ? $d : ucfirst $self->method;
|
|
450 }
|
|
451
|
|
452 sub gff_string {
|
|
453 my $self = shift;
|
|
454 my $name = $self->name;
|
|
455 my $class = $self->class;
|
|
456 my $group = "$class $name" if $name;
|
|
457 my $string;
|
|
458 $string .= join("\t",$self->ref,$self->source||'.',$self->method||'.',
|
|
459 $self->start,$self->stop,
|
|
460 $self->score||'.',$self->strand||'.',$self->phase||'.',
|
|
461 $group);
|
|
462 $string .= "\n";
|
|
463 foreach ($self->sub_SeqFeature) {
|
|
464 # add missing data if we need it
|
|
465 $_->ref($self->ref) unless defined $_->ref;
|
|
466 $_->name($self->name);
|
|
467 $_->class($self->class);
|
|
468 $string .= $_->gff_string;
|
|
469 }
|
|
470 $string;
|
|
471 }
|
|
472
|
|
473
|
|
474 sub db { return }
|
|
475
|
|
476 sub source_tag {
|
|
477 my $self = shift;
|
|
478 my $d = $self->{source};
|
|
479 $self->{source} = shift if @_;
|
|
480 $d;
|
|
481 }
|
|
482
|
|
483 # This probably should be deleted. Not sure why it's here, but might
|
|
484 # have been added for Ace::Sequence::Feature-compliance.
|
|
485 sub introns {
|
|
486 my $self = shift;
|
|
487 return;
|
|
488 }
|
|
489
|
|
490 sub has_tag { }
|
|
491
|
|
492 # get/set the configurator (Bio::Graphics::FeatureFile) for this feature
|
|
493 sub configurator {
|
|
494 my $self = shift;
|
|
495 my $d = $self->{configurator};
|
|
496 $self->{configurator} = shift if @_;
|
|
497 $d;
|
|
498 }
|
|
499
|
|
500 # get/set the url for this feature
|
|
501 sub url {
|
|
502 my $self = shift;
|
|
503 my $d = $self->{url};
|
|
504 $self->{url} = shift if @_;
|
|
505 $d;
|
|
506 }
|
|
507
|
|
508 # make a link
|
|
509 sub make_link {
|
|
510 my $self = shift;
|
|
511 if (my $url = $self->url) {
|
|
512 return $url;
|
|
513 }
|
|
514
|
|
515 elsif (my $configurator = $self->configurator) {
|
|
516 return $configurator->make_link($self);
|
|
517 }
|
|
518
|
|
519 else {
|
|
520 return;
|
|
521 }
|
|
522 }
|
|
523
|
|
524 sub all_tags {
|
|
525 my $self = shift;
|
|
526 return keys %{$self->{attrib}};
|
|
527 }
|
|
528 sub each_tag_value {
|
|
529 my $self = shift;
|
|
530 my $tag = shift;
|
|
531 my $value = $self->{attrib}{$tag} or return;
|
|
532 return CORE::ref $value ? @{$self->{attrib}{$tag}}
|
|
533 : $self->{attrib}{$tag};
|
|
534 }
|
|
535
|
|
536 sub DESTROY { }
|
|
537
|
|
538 1;
|
|
539
|
|
540 __END__
|
|
541
|
|
542 =head1 SEE ALSO
|
|
543
|
|
544 L<Bio::Graphics::Panel>,L<Bio::Graphics::Glyph>,
|
|
545 L<GD>
|
|
546
|
|
547 =head1 AUTHOR
|
|
548
|
|
549 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
|
|
550
|
|
551 Copyright (c) 2001 Cold Spring Harbor Laboratory
|
|
552
|
|
553 This library is free software; you can redistribute it and/or modify
|
|
554 it under the same terms as Perl itself. See DISCLAIMER.txt for
|
|
555 disclaimers of warranty.
|
|
556
|
|
557 =cut
|