diff variant_effect_predictor/Bio/EnsEMBL/SeqEdit.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/SeqEdit.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,431 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL:SeqEdit - A class representing a post transcriptional edit to a
+sequence.
+
+=head1 SYNOPSIS
+
+  use Bio::EnsEMBL::SeqEdit;
+  use Bio::EnsEMBL::Attribute;
+
+  # construct a SeqEdit object using a Transcript attribute
+
+  ($attribute) = @{ $translation->get_all_Attributes('_rna_edit') };
+
+  $seq_edit = Bio::EnsEMBL::SeqEdit( -ATTRIB => $attribute );
+
+  print $seq_edit->start(),   "\n";
+  print $seq_edit->end(),     "\n";
+  print $seq_edit->alt_seq(), "\n";
+
+  # apply the edit to some sequence
+  $seq = $transcript->spliced_seq();
+  print "Before modifiction: $seq\n";
+
+  $seq_edit->apply_edit( \$seq );
+  print "After modification: $seq\n";
+
+  # construct an attribute object from a SeqEdit and add it to a
+  # translation
+
+  $seq_edit = Bio::EnsEMBL::SeqEdit->new(
+    -CODE    => '_selenocysteine',
+    -NAME    => 'Selenocysteine',
+    -DESC    => 'Selenocysteine',
+    -START   => 10,
+    -END     => 10,
+    -ALT_SEQ => 'U'
+  );
+
+  $attribute = $seq_edit->get_Attribute();
+  $translation->add_Attributes($attribute);
+
+=head1 DESCRIPTION
+
+This is a class used to represent post transcriptional
+modifications to sequences.  SeqEdit objects are stored as ordinary
+Bio::EnsEMBL::Attributes with a parseable value and can be used to
+represent RNA editing, selenocysteines etc.
+
+Also see B<Bio::EnsEMBL::Attribute>
+
+=head1 METHODS
+
+=cut
+
+package Bio::EnsEMBL::SeqEdit;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::Attribute;
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw);
+
+
+=head2 new
+
+  Arg [-ATTRIB] : Bio::EnsEMBL::Attribute
+                  Constructs a new SeqEdit from an Attribute.
+                  Can only be provided if no other constructor arguments
+                  are provided.
+  Arg [-START]       : The start position of the edit.
+  Arg [-END]         : The end position of the edit.
+  Arg [-ALT_SEQ]     : The alternate sequence
+  Arg [-CODE]        : A code for this SeqEdit
+  Arg [-NAME]        : A name for this SeqEdit
+  Arg [-DESCRIPTION] : Arg passed to superclass constructor
+  Example    : my $sea = Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $attrib);
+               my $sea = Bio::EnsEMBL::SeqEdit->new
+                             (-START => 10,
+                              -END   => 12,
+                              -ALT_SEQ => 'ACG',
+                              -CODE    => '_rna_edit',
+                              -NAME    => 'RNA Edit',
+                              -DESCRIPTION => 'RNA edit');
+  Description: Constructs a SeqEdit representing a single edit to a
+               sequence, such as an rna modification or a selenocysteine.
+  Returntype : Bio::EnsEMBL::SeqEdit
+  Exceptions : throws if attribute set and other args aswell
+               throws if start and end not set correctly of attribure not set
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub new {
+  my $class = shift;
+
+  my ($attrib, $start, $end, $alt_seq, $name, $desc, $code) =
+    rearrange([qw(ATTRIB START END ALT_SEQ NAME DESCRIPTION CODE)], @_);
+
+  my $self;
+
+  if($attrib) {
+    if(defined($start) || defined($end) || defined($alt_seq) ||
+       defined($name)  || defined($desc) || defined($code)) {
+      throw("Cannot specify -ATTRIB argument with additional arguments.");
+    }
+
+    if(!ref($attrib) || !$attrib->isa('Bio::EnsEMBL::Attribute')) {
+      throw('Bio::EnsEMBL::Attribute argument expected.');
+    }
+
+    ($start, $end, $alt_seq) = split(/\s+/, $attrib->value());
+
+    if($start !~ /\d+/ || $end !~ /\d+/) {
+      throw('Could not parse value of attribute: '.$attrib->value());
+    }
+
+    $name  = $attrib->name();
+    $code  = $attrib->code();
+    $desc = $attrib->description();
+
+
+  }
+
+  if(defined($end) && defined($start) && $start > $end+1) {
+    throw("start must be less than or equal to end + 1");
+  }
+
+  if(defined($start) && $start < 1) {
+    throw("start must be greater than or equal to 1");
+  }
+
+  if(defined($end) && $end < 0) {
+    throw("end must be greater than or equal to 0");
+  }
+
+  $alt_seq ||= '';
+
+  return bless {'start'        => $start,
+                'end'          => $end,
+                'alt_seq'      => $alt_seq,
+                'description'  => $desc,
+                'name'         => $name,
+                'code'         => $code}, $class;
+}
+
+
+
+=head2 start
+
+  Arg [1]    : (optional) int $start - the new start position
+  Example    : $start = $se_attrib->start();
+  Description: Getter/Setter for the start position of the region replaced
+               by the alt_seq.
+
+               Coordinates are inclusive and one-based, which means that
+               inserts are unusually represented by a start 1bp higher than
+               the end.
+
+               E.g. start = 1, end = 1 is a replacement of the first base but 
+               start = 1, end = 0 is an insert BEFORE the first base.
+  Returntype : int
+  Exceptions : none
+  Caller     : Transcript, Translation
+  Status     : Stable
+
+=cut
+
+sub start {
+  my $self = shift;
+
+  if(@_) {
+    my $start = shift;
+    if(defined($start) && $start < 1) {
+      throw("start must be greater than or equal to 1");
+    }
+    $self->{'start'} = $start;
+  }
+
+  return $self->{'start'};
+}
+
+
+=head2 end
+
+  Arg [1]    : (optional) int $end - the new end position
+  Example    : $end = $se_attrib->end();
+  Description: Getter/Setter for the end position of the region replaced
+               by the alt_seq.
+
+               Coordinates are inclusive and one-based, which means that
+               inserts are unusually represented by a start 1bp higher than
+               the end.
+
+               E.g. start = 1, end = 1 is a replacement of the first base but
+               start = 1, end = 0 is an insert BEFORE the first base.
+  Returntype : int
+  Exceptions : throws if end  <= 0
+  Caller     : Transcript, Translation
+  Status     : Stable
+
+=cut
+
+sub end {
+  my $self = shift;
+
+  if(@_) {
+    my $end = shift;
+    if(defined($end) && $end < 0) {
+      throw("end must be greater than or equal to 0");
+    }
+    $self->{'end'} = $end;
+  }
+
+  return $self->{'end'};
+}
+
+
+=head2 alt_seq
+
+  Arg [1]    : (optional) string $alt_seq
+  Example    : my $alt_seq = $se_attrib->alt_seq();
+  Description: Getter/Setter for the replacement sequence used by this edit.
+               The sequence may either be a string of amino acids or
+               nucleotides depending on the context in which this edit is
+               used.
+
+               In the case of a deletion the replacement sequence is an empty
+               string.
+  Returntype : string
+  Exceptions : none
+  Caller     : Transcript, Translation
+  Status     : Stable
+
+=cut
+
+sub alt_seq {
+  my $self = shift;
+  $self->{'alt_seq'} = shift || '' if(@_);
+  return $self->{'alt_seq'};
+}
+
+
+=head2 length_diff
+
+  Arg [1]    : none
+  Example    : my $diff = $sea->length_diff();
+  Description: Returns the difference in length caused by applying this
+               edit to a sequence.  This may be be negative (deletion),
+               positive (insertion) or 0 (replacement).
+
+               If either start or end are not defined 0 is returned.
+  Returntype : int
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub length_diff {
+  my $self = shift;
+
+  return 0 if(!defined($self->{'end'}) || !defined($self->{'start'}));
+
+  return length($self->{'alt_seq'}) - ($self->{'end'} - $self->{'start'} + 1);
+}
+
+
+
+=head2 name
+
+  Arg [1]    : (optional) string $name
+  Example    : my $name = $seqedit->name();
+  Description: Getter/Setter for the name of this SeqEdit
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub name {
+  my $self = shift;
+  $self->{'name'} = shift if(@_);
+  return $self->{'name'};
+}
+
+
+
+
+=head2 code
+
+  Arg [1]    : (optional) string $code
+  Example    : my $code = $seqedit->code();
+  Description: Getter/Setter for the code of this SeqEdit
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub code {
+  my $self = shift;
+  $self->{'code'} = shift if(@_);
+  return $self->{'code'};
+}
+
+
+
+=head2 description
+
+  Arg [1]    : (optional) string $desc
+  Example    : my $desc = $seqedit->description();
+  Description: Getter/Setter for the description of this SeqEdit
+  Returntype : string
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub description {
+  my $self = shift;
+  $self->{'description'} = shift if(@_);
+  return $self->{'description'};
+}
+
+
+
+=head2 get_Attribute
+
+  Arg [1]    : none
+  Example    : my $attrib = $seqedit->get_Attribute();
+               $transcript->add_Attributes($attrib);
+  Description: Converts a SeqEdit object into an Attribute object.  This
+               allows the SeqEdit to be stored as any other attribute in the
+               ensembl database.  The start/end and alt_seq properties
+               should be set before calling this method.
+  Returntype : Bio::EnsEMBL::Attribute
+  Exceptions : warning if start/end or alt_seq properties are not defined
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+sub get_Attribute {
+  my $self = shift;
+
+  my $start = $self->start();
+  my $end  = $self->end();
+  my $alt_seq = $self->alt_seq();
+
+  my $value;
+
+  if(defined($start) && defined($end) && defined($alt_seq)) {
+    $value = join(' ', $start, $end, $alt_seq);
+  } else {
+    warning('Attribute value cannot be created unless start, end and alt_seq' .
+            'properties are defined');
+    $value = '';
+  }
+
+  return Bio::EnsEMBL::Attribute->new(-CODE  => $self->code(),
+                                      -VALUE => $value,
+                                      -NAME  => $self->name(),
+                                      -DESCRIPTION => $self->description());
+}
+
+
+=head2 apply_edit
+
+  Arg [1]    : reference to string $seqref
+  Example    : $sequence = 'ACTGAATATTTAAGGCA';
+               $seqedit->apply_edit(\$sequence);
+               print $sequence, "\n";
+  Description: Applies this edit directly to a sequence which is
+               passed by reference.  The coordinates of this SeqEdit
+               are assumed to be relative to the start of the sequence
+               argument.
+               If either the start or end of this SeqEdit are not defined
+               this function will not do anything to the passed sequence.
+  Returntype : reference to the same sequence that was passed in
+  Exceptions : none
+  Caller     : Transcript, Translation
+  Status     : Stable
+
+=cut
+
+sub apply_edit {
+  my $self   = shift;
+  my $seqref = shift;
+
+  if(ref($seqref) ne 'SCALAR') {
+    throw("Reference to scalar argument expected");
+  }
+
+  if(!defined($self->{'start'}) || !defined($self->{'end'})) {
+    return $seqref;
+  }
+
+  my $len = $self->{'end'} - $self->{'start'} + 1;
+  substr($$seqref, $self->{'start'} - 1, $len) = $self->{'alt_seq'};
+
+  return $seqref;
+}
+
+
+1;