Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Graphics/Feature.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 package Bio::Graphics::Feature; | |
| 2 | |
| 3 =head1 NAME | |
| 4 | |
| 5 Bio::Graphics::Feature - A simple feature object for use with Bio::Graphics::Panel | |
| 6 | |
| 7 =head1 SYNOPSIS | |
| 8 | |
| 9 use Bio::Graphics::Feature; | |
| 10 | |
| 11 # create a simple feature with no internal structure | |
| 12 $f = Bio::Graphics::Feature->new(-start => 1000, | |
| 13 -stop => 2000, | |
| 14 -type => 'transcript', | |
| 15 -name => 'alpha-1 antitrypsin', | |
| 16 -desc => 'an enzyme inhibitor', | |
| 17 ); | |
| 18 | |
| 19 # create a feature composed of multiple segments, all of type "similarity" | |
| 20 $f = Bio::Graphics::Feature->new(-segments => [[1000,1100],[1500,1550],[1800,2000]], | |
| 21 -name => 'ABC-3', | |
| 22 -type => 'gapped_alignment', | |
| 23 -subtype => 'similarity'); | |
| 24 | |
| 25 # build up a gene exon by exon | |
| 26 $e1 = Bio::Graphics::Feature->new(-start=>1,-stop=>100,-type=>'exon'); | |
| 27 $e2 = Bio::Graphics::Feature->new(-start=>150,-stop=>200,-type=>'exon'); | |
| 28 $e3 = Bio::Graphics::Feature->new(-start=>300,-stop=>500,-type=>'exon'); | |
| 29 $f = Bio::Graphics::Feature->new(-segments=>[$e1,$e2,$e3],-type=>'gene'); | |
| 30 | |
| 31 =head1 DESCRIPTION | |
| 32 | |
| 33 This is a simple Bio::SeqFeatureI-compliant object that is compatible | |
| 34 with Bio::Graphics::Panel. With it you can create lightweight feature | |
| 35 objects for drawing. | |
| 36 | |
| 37 All methods are as described in L<Bio::SeqFeatureI> with the following additions: | |
| 38 | |
| 39 =head2 The new() Constructor | |
| 40 | |
| 41 $feature = Bio::Graphics::Feature->new(@args); | |
| 42 | |
| 43 This method creates a new feature object. You can create a simple | |
| 44 feature that contains no subfeatures, or a hierarchically nested object. | |
| 45 | |
| 46 Arguments are as follows: | |
| 47 | |
| 48 -start the start position of the feature | |
| 49 -end the stop position of the feature | |
| 50 -stop an alias for end | |
| 51 -name the feature name (returned by seqname()) | |
| 52 -type the feature type (returned by primary_tag()) | |
| 53 -source the source tag | |
| 54 -desc a description of the feature | |
| 55 -segments a list of subfeatures (see below) | |
| 56 -subtype the type to use when creating subfeatures | |
| 57 -strand the strand of the feature (one of -1, 0 or +1) | |
| 58 -id an alias for -name | |
| 59 -seqname an alias for -name | |
| 60 -primary_id an alias for -name | |
| 61 -display_id an alias for -name | |
| 62 -display_name an alias for -name (do you get the idea the API has changed?) | |
| 63 -attributes a hashref of tag value attributes, in which the key is the tag | |
| 64 and the value is an array reference of values | |
| 65 -factory a reference to a feature factory, used for compatibility with | |
| 66 more obscure parts of Bio::DB::GFF | |
| 67 | |
| 68 The subfeatures passed in -segments may be an array of | |
| 69 Bio::Graphics::Feature objects, or an array of [$start,$stop] | |
| 70 pairs. Each pair should be a two-element array reference. In the | |
| 71 latter case, the feature type passed in -subtype will be used when | |
| 72 creating the subfeatures. | |
| 73 | |
| 74 If no feature type is passed, then it defaults to "feature". | |
| 75 | |
| 76 =head2 Non-SeqFeatureI methods | |
| 77 | |
| 78 A number of new methods are provided for compatibility with | |
| 79 Ace::Sequence, which has a slightly different API from SeqFeatureI: | |
| 80 | |
| 81 =over 4 | |
| 82 | |
| 83 =item add_segment(@segments) | |
| 84 | |
| 85 Add one or more segments (a subfeature). Segments can either be | |
| 86 Feature objects, or [start,stop] arrays, as in the -segments argument | |
| 87 to new(). The feature endpoints are automatically adjusted. | |
| 88 | |
| 89 =item segments() | |
| 90 | |
| 91 An alias for sub_SeqFeature(). | |
| 92 | |
| 93 =item merged_segments() | |
| 94 | |
| 95 Another alias for sub_SeqFeature(). | |
| 96 | |
| 97 =item stop() | |
| 98 | |
| 99 An alias for end(). | |
| 100 | |
| 101 =item name() | |
| 102 | |
| 103 An alias for seqname(). | |
| 104 | |
| 105 =item exons() | |
| 106 | |
| 107 An alias for sub_SeqFeature() (you don't want to know why!) | |
| 108 | |
| 109 =back | |
| 110 | |
| 111 =cut | |
| 112 | |
| 113 use strict; | |
| 114 use Bio::Root::Root; | |
| 115 use Bio::SeqFeatureI; | |
| 116 use Bio::SeqI; | |
| 117 use Bio::LocationI; | |
| 118 | |
| 119 use vars '@ISA'; | |
| 120 @ISA = qw(Bio::Root::Root Bio::SeqFeatureI Bio::LocationI Bio::SeqI); | |
| 121 | |
| 122 *stop = \&end; | |
| 123 *info = \&name; | |
| 124 *seqname = \&name; | |
| 125 *type = \&primary_tag; | |
| 126 *exons = *sub_SeqFeature = *merged_segments = \&segments; | |
| 127 *method = \&type; | |
| 128 *source = \&source_tag; | |
| 129 | |
| 130 sub target { return; } | |
| 131 sub hit { return; } | |
| 132 | |
| 133 # usage: | |
| 134 # Bio::Graphics::Feature->new( | |
| 135 # -start => 1, | |
| 136 # -end => 100, | |
| 137 # -name => 'fred feature', | |
| 138 # -strand => +1); | |
| 139 # | |
| 140 # Alternatively, use -segments => [ [start,stop],[start,stop]...] | |
| 141 # to create a multisegmented feature. | |
| 142 sub new { | |
| 143 my $class= shift; | |
| 144 $class = ref($class) if ref $class; | |
| 145 my %arg = @_; | |
| 146 | |
| 147 my $self = bless {},$class; | |
| 148 | |
| 149 $arg{-strand} ||= 0; | |
| 150 $self->{strand} = $arg{-strand} ? ($arg{-strand} >= 0 ? +1 : -1) : 0; | |
| 151 $self->{name} = $arg{-name} || $arg{-seqname} || $arg{-display_id} | |
| 152 || $arg{-display_name} || $arg{-id} || $arg{-primary_id}; | |
| 153 $self->{type} = $arg{-type} || 'feature'; | |
| 154 $self->{subtype} = $arg{-subtype} if exists $arg{-subtype}; | |
| 155 $self->{source} = $arg{-source} || $arg{-source_tag} || ''; | |
| 156 $self->{score} = $arg{-score} if exists $arg{-score}; | |
| 157 $self->{start} = $arg{-start}; | |
| 158 $self->{stop} = $arg{-end} || $arg{-stop}; | |
| 159 $self->{ref} = $arg{-ref}; | |
| 160 $self->{class} = $arg{-class} if exists $arg{-class}; | |
| 161 $self->{url} = $arg{-url} if exists $arg{-url}; | |
| 162 $self->{seq} = $arg{-seq} if exists $arg{-seq}; | |
| 163 $self->{phase} = $arg{-phase} if exists $arg{-phase}; | |
| 164 $self->{desc} = $arg{-desc} if exists $arg{-desc}; | |
| 165 $self->{attrib} = $arg{-attributes} if exists $arg{-attributes}; | |
| 166 $self->{factory} = $arg{-factory} if exists $arg{-factory}; | |
| 167 | |
| 168 # fix start, stop | |
| 169 if (defined $self->{stop} && defined $self->{start} | |
| 170 && $self->{stop} < $self->{start}) { | |
| 171 @{$self}{'start','stop'} = @{$self}{'stop','start'}; | |
| 172 $self->{strand} *= -1; | |
| 173 } | |
| 174 | |
| 175 my @segments; | |
| 176 if (my $s = $arg{-segments}) { | |
| 177 $self->add_segment(@$s); | |
| 178 } | |
| 179 $self; | |
| 180 } | |
| 181 | |
| 182 sub add_segment { | |
| 183 my $self = shift; | |
| 184 my $type = $self->{subtype} || $self->{type}; | |
| 185 $self->{segments} ||= []; | |
| 186 | |
| 187 my @segments = @{$self->{segments}}; | |
| 188 | |
| 189 for my $seg (@_) { | |
| 190 if (ref($seg) eq 'ARRAY') { | |
| 191 my ($start,$stop) = @{$seg}; | |
| 192 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us | |
| 193 my $strand = $self->{strand}; | |
| 194 | |
| 195 if ($start > $stop) { | |
| 196 ($start,$stop) = ($stop,$start); | |
| 197 # $strand *= -1; | |
| 198 $strand = -1; | |
| 199 } | |
| 200 push @segments,$self->new(-start => $start, | |
| 201 -stop => $stop, | |
| 202 -strand => $strand, | |
| 203 -type => $type); | |
| 204 } else { | |
| 205 push @segments,$seg; | |
| 206 } | |
| 207 } | |
| 208 if (@segments) { | |
| 209 local $^W = 0; # some warning of an uninitialized variable... | |
| 210 $self->{segments} = [ sort {$a->start <=> $b->start } @segments ]; | |
| 211 $self->{start} = $self->{segments}[0]->start; | |
| 212 ($self->{stop}) = sort { $b <=> $a } map { $_->end } @segments; | |
| 213 } | |
| 214 } | |
| 215 | |
| 216 sub segments { | |
| 217 my $self = shift; | |
| 218 my $s = $self->{segments} or return wantarray ? () : 0; | |
| 219 @$s; | |
| 220 } | |
| 221 sub score { | |
| 222 my $self = shift; | |
| 223 my $d = $self->{score}; | |
| 224 $self->{score} = shift if @_; | |
| 225 $d; | |
| 226 } | |
| 227 sub primary_tag { shift->{type} } | |
| 228 sub name { | |
| 229 my $self = shift; | |
| 230 my $d = $self->{name}; | |
| 231 $self->{name} = shift if @_; | |
| 232 $d; | |
| 233 } | |
| 234 sub seq_id { shift->ref() } | |
| 235 sub ref { | |
| 236 my $self = shift; | |
| 237 my $d = $self->{ref}; | |
| 238 $self->{ref} = shift if @_; | |
| 239 $d; | |
| 240 } | |
| 241 sub start { | |
| 242 my $self = shift; | |
| 243 my $d = $self->{start}; | |
| 244 $self->{start} = shift if @_; | |
| 245 $d; | |
| 246 } | |
| 247 sub end { | |
| 248 my $self = shift; | |
| 249 my $d = $self->{stop}; | |
| 250 $self->{stop} = shift if @_; | |
| 251 $d; | |
| 252 } | |
| 253 sub strand { | |
| 254 my $self = shift; | |
| 255 my $d = $self->{strand}; | |
| 256 $self->{strand} = shift if @_; | |
| 257 $d; | |
| 258 } | |
| 259 sub length { | |
| 260 my $self = shift; | |
| 261 return $self->end - $self->start + 1; | |
| 262 } | |
| 263 | |
| 264 sub seq { | |
| 265 my $self = shift; | |
| 266 my $dna = exists $self->{seq} ? $self->{seq} : ''; | |
| 267 # $dna .= 'n' x ($self->length - CORE::length($dna)); | |
| 268 return $dna; | |
| 269 } | |
| 270 *dna = \&seq; | |
| 271 | |
| 272 =head2 factory | |
| 273 | |
| 274 Title : factory | |
| 275 Usage : $factory = $obj->factory([$new_factory]) | |
| 276 Function: Returns the feature factory from which this feature was generated. | |
| 277 Mostly for compatibility with weird dependencies in gbrowse. | |
| 278 Returns : A feature factory | |
| 279 Args : None | |
| 280 | |
| 281 =cut | |
| 282 | |
| 283 sub factory { | |
| 284 my $self = shift; | |
| 285 my $d = $self->{factory}; | |
| 286 $self->{factory} = shift if @_; | |
| 287 $d; | |
| 288 } | |
| 289 | |
| 290 =head2 display_name | |
| 291 | |
| 292 Title : display_name | |
| 293 Usage : $id = $obj->display_name or $obj->display_name($newid); | |
| 294 Function: Gets or sets the display id, also known as the common name of | |
| 295 the Seq object. | |
| 296 | |
| 297 The semantics of this is that it is the most likely string | |
| 298 to be used as an identifier of the sequence, and likely to | |
| 299 have "human" readability. The id is equivalent to the LOCUS | |
| 300 field of the GenBank/EMBL databanks and the ID field of the | |
| 301 Swissprot/sptrembl database. In fasta format, the >(\S+) is | |
| 302 presumed to be the id, though some people overload the id | |
| 303 to embed other information. Bioperl does not use any | |
| 304 embedded information in the ID field, and people are | |
| 305 encouraged to use other mechanisms (accession field for | |
| 306 example, or extending the sequence object) to solve this. | |
| 307 | |
| 308 Notice that $seq->id() maps to this function, mainly for | |
| 309 legacy/convenience issues. | |
| 310 Returns : A string | |
| 311 Args : None or a new id | |
| 312 | |
| 313 | |
| 314 =cut | |
| 315 | |
| 316 sub display_name { shift->name } | |
| 317 | |
| 318 *display_id = \&display_name; | |
| 319 | |
| 320 =head2 accession_number | |
| 321 | |
| 322 Title : accession_number | |
| 323 Usage : $unique_biological_key = $obj->accession_number; | |
| 324 Function: Returns the unique biological id for a sequence, commonly | |
| 325 called the accession_number. For sequences from established | |
| 326 databases, the implementors should try to use the correct | |
| 327 accession number. Notice that primary_id() provides the | |
| 328 unique id for the implemetation, allowing multiple objects | |
| 329 to have the same accession number in a particular implementation. | |
| 330 | |
| 331 For sequences with no accession number, this method should return | |
| 332 "unknown". | |
| 333 Returns : A string | |
| 334 Args : None | |
| 335 | |
| 336 | |
| 337 =cut | |
| 338 | |
| 339 sub accession_number { | |
| 340 return 'unknown'; | |
| 341 } | |
| 342 | |
| 343 =head2 alphabet | |
| 344 | |
| 345 Title : alphabet | |
| 346 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | |
| 347 Function: Returns the type of sequence being one of | |
| 348 'dna', 'rna' or 'protein'. This is case sensitive. | |
| 349 | |
| 350 This is not called <type> because this would cause | |
| 351 upgrade problems from the 0.5 and earlier Seq objects. | |
| 352 | |
| 353 Returns : a string either 'dna','rna','protein'. NB - the object must | |
| 354 make a call of the type - if there is no type specified it | |
| 355 has to guess. | |
| 356 Args : none | |
| 357 Status : Virtual | |
| 358 | |
| 359 | |
| 360 =cut | |
| 361 | |
| 362 sub alphabet{ | |
| 363 return 'dna'; # no way this will be anything other than dna! | |
| 364 } | |
| 365 | |
| 366 | |
| 367 | |
| 368 =head2 desc | |
| 369 | |
| 370 Title : desc | |
| 371 Usage : $seqobj->desc($string) or $seqobj->desc() | |
| 372 Function: Sets or gets the description of the sequence | |
| 373 Example : | |
| 374 Returns : The description | |
| 375 Args : The description or none | |
| 376 | |
| 377 | |
| 378 =cut | |
| 379 | |
| 380 sub desc { | |
| 381 my $self = shift; | |
| 382 my $d = $self->{desc}; | |
| 383 $self->{desc} = shift if @_; | |
| 384 $d; | |
| 385 } | |
| 386 | |
| 387 sub notes { | |
| 388 return shift->desc; | |
| 389 } | |
| 390 | |
| 391 sub low { | |
| 392 my $self = shift; | |
| 393 return $self->start < $self->end ? $self->start : $self->end; | |
| 394 } | |
| 395 | |
| 396 sub high { | |
| 397 my $self = shift; | |
| 398 return $self->start > $self->end ? $self->start : $self->end; | |
| 399 } | |
| 400 | |
| 401 =head2 location | |
| 402 | |
| 403 Title : location | |
| 404 Usage : my $location = $seqfeature->location() | |
| 405 Function: returns a location object suitable for identifying location | |
| 406 of feature on sequence or parent feature | |
| 407 Returns : Bio::LocationI object | |
| 408 Args : none | |
| 409 | |
| 410 =cut | |
| 411 | |
| 412 sub location { | |
| 413 my $self = shift; | |
| 414 require Bio::Location::Split unless Bio::Location::Split->can('new'); | |
| 415 my $location; | |
| 416 if (my @segments = $self->segments) { | |
| 417 $location = Bio::Location::Split->new(); | |
| 418 foreach (@segments) { | |
| 419 $location->add_sub_Location($_); | |
| 420 } | |
| 421 } else { | |
| 422 $location = $self; | |
| 423 } | |
| 424 $location; | |
| 425 } | |
| 426 | |
| 427 sub coordinate_policy { | |
| 428 require Bio::Location::WidestCoordPolicy unless Bio::Location::WidestCoordPolicy->can('new'); | |
| 429 return Bio::Location::WidestCoordPolicy->new(); | |
| 430 } | |
| 431 | |
| 432 sub min_start { shift->low } | |
| 433 sub max_start { shift->low } | |
| 434 sub min_end { shift->high } | |
| 435 sub max_end { shift->high} | |
| 436 sub start_pos_type { 'EXACT' } | |
| 437 sub end_pos_type { 'EXACT' } | |
| 438 sub to_FTstring { | |
| 439 my $self = shift; | |
| 440 my $low = $self->min_start; | |
| 441 my $high = $self->max_end; | |
| 442 return "$low..$high"; | |
| 443 } | |
| 444 sub phase { shift->{phase} } | |
| 445 sub class { | |
| 446 my $self = shift; | |
| 447 my $d = $self->{class}; | |
| 448 $self->{class} = shift if @_; | |
| 449 return defined($d) ? $d : ucfirst $self->method; | |
| 450 } | |
| 451 | |
| 452 sub gff_string { | |
| 453 my $self = shift; | |
| 454 my $name = $self->name; | |
| 455 my $class = $self->class; | |
| 456 my $group = "$class $name" if $name; | |
| 457 my $string; | |
| 458 $string .= join("\t",$self->ref,$self->source||'.',$self->method||'.', | |
| 459 $self->start,$self->stop, | |
| 460 $self->score||'.',$self->strand||'.',$self->phase||'.', | |
| 461 $group); | |
| 462 $string .= "\n"; | |
| 463 foreach ($self->sub_SeqFeature) { | |
| 464 # add missing data if we need it | |
| 465 $_->ref($self->ref) unless defined $_->ref; | |
| 466 $_->name($self->name); | |
| 467 $_->class($self->class); | |
| 468 $string .= $_->gff_string; | |
| 469 } | |
| 470 $string; | |
| 471 } | |
| 472 | |
| 473 | |
| 474 sub db { return } | |
| 475 | |
| 476 sub source_tag { | |
| 477 my $self = shift; | |
| 478 my $d = $self->{source}; | |
| 479 $self->{source} = shift if @_; | |
| 480 $d; | |
| 481 } | |
| 482 | |
| 483 # This probably should be deleted. Not sure why it's here, but might | |
| 484 # have been added for Ace::Sequence::Feature-compliance. | |
| 485 sub introns { | |
| 486 my $self = shift; | |
| 487 return; | |
| 488 } | |
| 489 | |
| 490 sub has_tag { } | |
| 491 | |
| 492 # get/set the configurator (Bio::Graphics::FeatureFile) for this feature | |
| 493 sub configurator { | |
| 494 my $self = shift; | |
| 495 my $d = $self->{configurator}; | |
| 496 $self->{configurator} = shift if @_; | |
| 497 $d; | |
| 498 } | |
| 499 | |
| 500 # get/set the url for this feature | |
| 501 sub url { | |
| 502 my $self = shift; | |
| 503 my $d = $self->{url}; | |
| 504 $self->{url} = shift if @_; | |
| 505 $d; | |
| 506 } | |
| 507 | |
| 508 # make a link | |
| 509 sub make_link { | |
| 510 my $self = shift; | |
| 511 if (my $url = $self->url) { | |
| 512 return $url; | |
| 513 } | |
| 514 | |
| 515 elsif (my $configurator = $self->configurator) { | |
| 516 return $configurator->make_link($self); | |
| 517 } | |
| 518 | |
| 519 else { | |
| 520 return; | |
| 521 } | |
| 522 } | |
| 523 | |
| 524 sub all_tags { | |
| 525 my $self = shift; | |
| 526 return keys %{$self->{attrib}}; | |
| 527 } | |
| 528 sub each_tag_value { | |
| 529 my $self = shift; | |
| 530 my $tag = shift; | |
| 531 my $value = $self->{attrib}{$tag} or return; | |
| 532 return CORE::ref $value ? @{$self->{attrib}{$tag}} | |
| 533 : $self->{attrib}{$tag}; | |
| 534 } | |
| 535 | |
| 536 sub DESTROY { } | |
| 537 | |
| 538 1; | |
| 539 | |
| 540 __END__ | |
| 541 | |
| 542 =head1 SEE ALSO | |
| 543 | |
| 544 L<Bio::Graphics::Panel>,L<Bio::Graphics::Glyph>, | |
| 545 L<GD> | |
| 546 | |
| 547 =head1 AUTHOR | |
| 548 | |
| 549 Lincoln Stein E<lt>lstein@cshl.orgE<gt>. | |
| 550 | |
| 551 Copyright (c) 2001 Cold Spring Harbor Laboratory | |
| 552 | |
| 553 This library is free software; you can redistribute it and/or modify | |
| 554 it under the same terms as Perl itself. See DISCLAIMER.txt for | |
| 555 disclaimers of warranty. | |
| 556 | |
| 557 =cut |
