diff variant_effect_predictor/Bio/Graphics/Feature.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Graphics/Feature.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,557 @@
+package Bio::Graphics::Feature;
+
+=head1 NAME
+
+Bio::Graphics::Feature - A simple feature object for use with Bio::Graphics::Panel
+
+=head1 SYNOPSIS
+
+ use Bio::Graphics::Feature;
+
+ # create a simple feature with no internal structure
+ $f = Bio::Graphics::Feature->new(-start => 1000,
+                                  -stop  => 2000,
+                                  -type  => 'transcript',
+                                  -name  => 'alpha-1 antitrypsin',
+				  -desc  => 'an enzyme inhibitor',
+                                 );
+
+ # create a feature composed of multiple segments, all of type "similarity"
+ $f = Bio::Graphics::Feature->new(-segments => [[1000,1100],[1500,1550],[1800,2000]],
+                                  -name     => 'ABC-3',
+                                  -type     => 'gapped_alignment',
+                                  -subtype  => 'similarity');
+
+ # build up a gene exon by exon
+ $e1 = Bio::Graphics::Feature->new(-start=>1,-stop=>100,-type=>'exon');
+ $e2 = Bio::Graphics::Feature->new(-start=>150,-stop=>200,-type=>'exon');
+ $e3 = Bio::Graphics::Feature->new(-start=>300,-stop=>500,-type=>'exon');
+ $f  = Bio::Graphics::Feature->new(-segments=>[$e1,$e2,$e3],-type=>'gene');
+
+=head1 DESCRIPTION
+
+This is a simple Bio::SeqFeatureI-compliant object that is compatible
+with Bio::Graphics::Panel.  With it you can create lightweight feature
+objects for drawing.
+
+All methods are as described in L<Bio::SeqFeatureI> with the following additions:
+
+=head2 The new() Constructor
+
+ $feature = Bio::Graphics::Feature->new(@args);
+
+This method creates a new feature object.  You can create a simple
+feature that contains no subfeatures, or a hierarchically nested object.
+
+Arguments are as follows:
+
+  -start       the start position of the feature
+  -end         the stop position of the feature
+  -stop        an alias for end
+  -name        the feature name (returned by seqname())
+  -type        the feature type (returned by primary_tag())
+  -source      the source tag
+  -desc        a description of the feature
+  -segments    a list of subfeatures (see below)
+  -subtype     the type to use when creating subfeatures
+  -strand      the strand of the feature (one of -1, 0 or +1)
+  -id          an alias for -name
+  -seqname     an alias for -name
+  -primary_id  an alias for -name
+  -display_id  an alias for -name
+  -display_name an alias for -name  (do you get the idea the API has changed?)
+  -attributes  a hashref of tag value attributes, in which the key is the tag
+               and the value is an array reference of values
+  -factory     a reference to a feature factory, used for compatibility with
+               more obscure parts of Bio::DB::GFF
+
+The subfeatures passed in -segments may be an array of
+Bio::Graphics::Feature objects, or an array of [$start,$stop]
+pairs. Each pair should be a two-element array reference.  In the
+latter case, the feature type passed in -subtype will be used when
+creating the subfeatures.
+
+If no feature type is passed, then it defaults to "feature".
+
+=head2 Non-SeqFeatureI methods
+
+A number of new methods are provided for compatibility with
+Ace::Sequence, which has a slightly different API from SeqFeatureI:
+
+=over 4
+
+=item add_segment(@segments)
+
+Add one or more segments (a subfeature).  Segments can either be
+Feature objects, or [start,stop] arrays, as in the -segments argument
+to new().  The feature endpoints are automatically adjusted.
+
+=item segments()
+
+An alias for sub_SeqFeature().
+
+=item merged_segments()
+
+Another alias for sub_SeqFeature().
+
+=item stop()
+
+An alias for end().
+
+=item name()
+
+An alias for seqname().
+
+=item exons()
+
+An alias for sub_SeqFeature() (you don't want to know why!)
+
+=back
+
+=cut
+
+use strict;
+use Bio::Root::Root;
+use Bio::SeqFeatureI;
+use Bio::SeqI;
+use Bio::LocationI;
+
+use vars '@ISA';
+@ISA  = qw(Bio::Root::Root Bio::SeqFeatureI Bio::LocationI Bio::SeqI);
+
+*stop        = \&end;
+*info        = \&name;
+*seqname     = \&name;
+*type        = \&primary_tag;
+*exons       = *sub_SeqFeature = *merged_segments = \&segments;
+*method      = \&type;
+*source      = \&source_tag;
+
+sub target { return; }
+sub hit    { return; }
+
+# usage:
+# Bio::Graphics::Feature->new(
+#                         -start => 1,
+#                         -end   => 100,
+#                         -name  => 'fred feature',
+#                         -strand => +1);
+#
+# Alternatively, use -segments => [ [start,stop],[start,stop]...]
+# to create a multisegmented feature.
+sub new {
+  my $class= shift;
+  $class = ref($class) if ref $class;
+  my %arg = @_;
+
+  my $self = bless {},$class;
+
+  $arg{-strand} ||= 0;
+  $self->{strand}  = $arg{-strand} ? ($arg{-strand} >= 0 ? +1 : -1) : 0;
+  $self->{name}    = $arg{-name}   || $arg{-seqname} || $arg{-display_id} 
+    || $arg{-display_name} || $arg{-id} || $arg{-primary_id};
+  $self->{type}    = $arg{-type}   || 'feature';
+  $self->{subtype} = $arg{-subtype} if exists $arg{-subtype};
+  $self->{source}  = $arg{-source} || $arg{-source_tag} || '';
+  $self->{score}   = $arg{-score}   if exists $arg{-score};
+  $self->{start}   = $arg{-start};
+  $self->{stop}    = $arg{-end} || $arg{-stop};
+  $self->{ref}     = $arg{-ref};
+  $self->{class}   = $arg{-class} if exists $arg{-class};
+  $self->{url}     = $arg{-url}   if exists $arg{-url};
+  $self->{seq}     = $arg{-seq}   if exists $arg{-seq};
+  $self->{phase}   = $arg{-phase} if exists $arg{-phase};
+  $self->{desc}    = $arg{-desc}  if exists $arg{-desc};
+  $self->{attrib}  = $arg{-attributes} if exists $arg{-attributes};
+  $self->{factory} = $arg{-factory} if exists $arg{-factory};
+
+  # fix start, stop
+  if (defined $self->{stop} && defined $self->{start}
+      && $self->{stop} < $self->{start}) {
+    @{$self}{'start','stop'} = @{$self}{'stop','start'};
+    $self->{strand} *= -1;
+  }
+
+  my @segments;
+  if (my $s = $arg{-segments}) {
+    $self->add_segment(@$s);
+  }
+  $self;
+}
+
+sub add_segment {
+  my $self        = shift;
+  my $type = $self->{subtype} || $self->{type};
+  $self->{segments} ||= [];
+
+  my @segments = @{$self->{segments}};
+
+  for my $seg (@_) {
+    if (ref($seg) eq 'ARRAY') {
+      my ($start,$stop) = @{$seg};
+      next unless defined $start && defined $stop;  # fixes an obscure bug somewhere above us
+      my $strand = $self->{strand};
+
+      if ($start > $stop) {
+	($start,$stop) = ($stop,$start);
+#	$strand *= -1;
+	$strand = -1;
+      }
+      push @segments,$self->new(-start  => $start,
+				-stop   => $stop,
+				-strand => $strand,
+				-type   => $type);
+    } else {
+      push @segments,$seg;
+    }
+  }
+  if (@segments) {
+    local $^W = 0;  # some warning of an uninitialized variable...
+    $self->{segments} = [ sort {$a->start <=> $b->start } @segments ];
+    $self->{start}    = $self->{segments}[0]->start;
+    ($self->{stop})   = sort { $b <=> $a } map { $_->end } @segments;
+  }
+}
+
+sub segments {
+  my $self = shift;
+  my $s = $self->{segments} or return wantarray ? () : 0;
+  @$s;
+}
+sub score    {
+  my $self = shift;
+  my $d = $self->{score};
+  $self->{score} = shift if @_;
+  $d;
+}
+sub primary_tag     { shift->{type}        }
+sub name            {
+  my $self = shift;
+  my $d    = $self->{name};
+  $self->{name} = shift if @_;
+  $d;
+}
+sub seq_id          { shift->ref()         }
+sub ref {
+  my $self = shift;
+  my $d = $self->{ref};
+  $self->{ref} = shift if @_;
+  $d;
+}
+sub start    {
+  my $self = shift;
+  my $d = $self->{start};
+  $self->{start} = shift if @_;
+  $d;
+}
+sub end    {
+  my $self = shift;
+  my $d = $self->{stop};
+  $self->{stop} = shift if @_;
+  $d;
+}
+sub strand {
+  my $self = shift;
+  my $d = $self->{strand};
+  $self->{strand} = shift if @_;
+  $d;
+}
+sub length {
+  my $self = shift;
+  return $self->end - $self->start + 1;
+}
+
+sub seq {
+  my $self = shift;
+  my $dna =  exists $self->{seq} ? $self->{seq} : '';
+  # $dna .= 'n' x ($self->length - CORE::length($dna));
+  return $dna;
+}
+*dna = \&seq;
+
+=head2 factory
+
+ Title   : factory
+ Usage   : $factory = $obj->factory([$new_factory])
+ Function: Returns the feature factory from which this feature was generated.
+           Mostly for compatibility with weird dependencies in gbrowse.
+ Returns : A feature factory
+ Args    : None
+
+=cut
+
+sub factory {
+  my $self = shift;
+  my $d = $self->{factory};
+  $self->{factory} = shift if @_;
+  $d;
+}
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $id = $obj->display_name or $obj->display_name($newid);
+ Function: Gets or sets the display id, also known as the common name of
+           the Seq object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the LOCUS
+           field of the GenBank/EMBL databanks and the ID field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience issues.
+ Returns : A string
+ Args    : None or a new id
+
+
+=cut
+
+sub display_name { shift->name }
+
+*display_id = \&display_name;
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+ Returns : A string
+ Args    : None
+
+
+=cut
+
+sub accession_number {
+    return 'unknown';
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : none
+ Status  : Virtual
+
+
+=cut
+
+sub alphabet{
+    return 'dna'; # no way this will be anything other than dna!
+}
+
+
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seqobj->desc($string) or $seqobj->desc()
+ Function: Sets or gets the description of the sequence
+ Example :
+ Returns : The description
+ Args    : The description or none
+
+
+=cut
+
+sub desc {
+  my $self = shift;
+  my $d    = $self->{desc};
+  $self->{desc} = shift if @_;
+  $d;
+}
+
+sub notes {
+  return shift->desc;
+}
+
+sub low {
+  my $self = shift;
+  return $self->start < $self->end ? $self->start : $self->end;
+}
+
+sub high {
+  my $self = shift;
+  return $self->start > $self->end ? $self->start : $self->end;
+}
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location
+	   of feature on sequence or parent feature
+ Returns : Bio::LocationI object
+ Args    : none
+
+=cut
+
+sub location {
+   my $self = shift;
+   require Bio::Location::Split unless Bio::Location::Split->can('new');
+   my $location;
+   if (my @segments = $self->segments) {
+       $location = Bio::Location::Split->new();
+       foreach (@segments) {
+	 $location->add_sub_Location($_);
+       }
+   } else {
+       $location = $self;
+   }
+   $location;
+}
+
+sub coordinate_policy {
+   require Bio::Location::WidestCoordPolicy unless Bio::Location::WidestCoordPolicy->can('new');
+   return Bio::Location::WidestCoordPolicy->new();
+}
+
+sub min_start { shift->low }
+sub max_start { shift->low }
+sub min_end   { shift->high }
+sub max_end   { shift->high}
+sub start_pos_type { 'EXACT' }
+sub end_pos_type   { 'EXACT' }
+sub to_FTstring {
+  my $self = shift;
+  my $low  = $self->min_start;
+  my $high = $self->max_end;
+  return "$low..$high";
+}
+sub phase { shift->{phase} }
+sub class {
+  my $self = shift;
+  my $d = $self->{class};
+  $self->{class} = shift if @_;
+  return defined($d) ? $d : ucfirst $self->method;
+}
+
+sub gff_string {
+  my $self = shift;
+  my $name  = $self->name;
+  my $class = $self->class;
+  my $group = "$class $name" if $name;
+  my $string;
+  $string .= join("\t",$self->ref,$self->source||'.',$self->method||'.',
+                       $self->start,$self->stop,
+                       $self->score||'.',$self->strand||'.',$self->phase||'.',
+                       $group);
+  $string .= "\n";
+  foreach ($self->sub_SeqFeature) {
+    # add missing data if we need it
+    $_->ref($self->ref)     unless defined $_->ref;
+    $_->name($self->name);
+    $_->class($self->class);
+    $string .= $_->gff_string;
+  }
+  $string;
+}
+
+
+sub db { return }
+
+sub source_tag {
+  my $self = shift;
+  my $d = $self->{source};
+  $self->{source} = shift if @_;
+  $d;
+}
+
+# This probably should be deleted.  Not sure why it's here, but might
+# have been added for Ace::Sequence::Feature-compliance.
+sub introns {
+  my $self = shift;
+  return;
+}
+
+sub has_tag { }
+
+# get/set the configurator (Bio::Graphics::FeatureFile) for this feature
+sub configurator {
+  my $self = shift;
+  my $d = $self->{configurator};
+  $self->{configurator} = shift if @_;
+  $d;
+}
+
+# get/set the url for this feature
+sub url {
+  my $self = shift;
+  my $d = $self->{url};
+  $self->{url} = shift if @_;
+  $d;
+}
+
+# make a link
+sub make_link {
+  my $self = shift;
+  if (my $url = $self->url) {
+    return $url;
+  }
+
+  elsif (my $configurator = $self->configurator) {
+    return $configurator->make_link($self);
+  }
+
+  else {
+    return;
+  }
+}
+
+sub all_tags {
+  my $self = shift;
+  return keys %{$self->{attrib}};
+}
+sub each_tag_value {
+  my $self = shift;
+  my $tag  = shift;
+  my $value = $self->{attrib}{$tag} or return;
+  return CORE::ref $value ? @{$self->{attrib}{$tag}}
+                          : $self->{attrib}{$tag};
+}
+
+sub DESTROY { }
+
+1;
+
+__END__
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,L<Bio::Graphics::Glyph>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut