view variant_effect_predictor/Bio/EnsEMBL/Compara/Attribute.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
line wrap: on
line source

package Bio::EnsEMBL::Compara::Attribute;

use strict;
use Carp;
use Bio::EnsEMBL::Utils::Exception;

our ($AUTOLOAD, %ok_field);

%ok_field = ('member_id' => 1,
             'family_id' => 1,
             'cigar_line' => 1,
             'cigar_start' => 1,
             'cigar_end' => 1,
             'homology_id' => 1,
             'peptide_member_id' => 1,
             'perc_cov' => 1,
             'perc_id' => 1,
             'perc_pos' => 1,
             'peptide_align_feature_id' => 1
             );


sub new {
  my ($class) = @_;

  return bless {}, $class;
}

=head2 new_fast

  Arg [1]    : hash reference $hashref
  Example    : none
  Description: This is an ultra fast constructor which requires knowledge of
               the objects internals to be used.
  Returntype : 
  Exceptions : none
  Caller     : 

=cut

sub new_fast {
  my ($class, $hashref) = @_;

  return bless $hashref, $class;
}

sub AUTOLOAD {
  my $self = shift;
  my $method = $AUTOLOAD;
  $method =~ s/.*:://;
  croak "invalid method: ->$method()" unless $ok_field{$method};
  $self->{lc $method} = shift if(@_);
  return $self->{lc $method};
}

sub alignment_string {
  my ($self, $member) = @_;

  

  unless (defined $self->cigar_line) {
    throw("To get an alignment_string, the cigar_line needs to be define\n");
  }
  unless (defined $member) {
    throw("To get an alignment_string, the peptide member needs to be passed as an option\n");
  }

  unless (defined $self->{'alignment_string'}) {
    my $sequence = $member->sequence;
    if ((defined($self->cigar_start) && ($self->cigar_start != 0)) 
        || (defined $self->cigar_end  && ($self->cigar_end != 0))) {
      unless ((defined($self->cigar_start) && ($self->cigar_start != 0)) && 
               (defined $self->cigar_end  && ($self->cigar_end != 0))) {
#     if (defined $self->cigar_start || defined $self->cigar_end) {
#       unless (defined $self->cigar_start && defined $self->cigar_end) {
        throw("both cigar_start and cigar_end should be defined");
      }
      my $offset = $self->cigar_start - 1;
      my $length = $self->cigar_end - $self->cigar_start + 1;
      $sequence = substr($sequence, $offset, $length);
    }

    my $cigar_line = $self->cigar_line;
    $cigar_line =~ s/([MD])/$1 /g;

    my @cigar_segments = split " ",$cigar_line;
    my $alignment_string = "";
    my $seq_start = 0;
    foreach my $segment (@cigar_segments) {
      if ($segment =~ /^(\d*)D$/) {
        my $length = $1;
        $length = 1 if ($length eq "");
        $alignment_string .= "-" x $length;
      } elsif ($segment =~ /^(\d*)M$/) {
        my $length = $1;
        $length = 1 if ($length eq "");
        $alignment_string .= substr($sequence,$seq_start,$length);
        $seq_start += $length;
      }
    }
    $self->{'alignment_string'} = $alignment_string;
  }

  return $self->{'alignment_string'};
}

=head2 cdna_alignment_string

  Arg [1]    : none
  Example    : my $cdna_alignment = $family_member->cdna_alignment_string();
  Description: Converts the peptide alignment string to a cdna alignment
               string.  This only works for EnsEMBL peptides whose cdna can
               be retrieved from the attached EnsEMBL databse.
               If the cdna cannot be retrieved undef is returned and a
               warning is thrown.
  Returntype : string
  Exceptions : none
  Caller     : general

=cut

sub cdna_alignment_string {
  my ($self, $member, $changeSelenos) = @_;
  unless (defined $changeSelenos) {
      $changeSelenos = 0;
  }

  if($member->source_name ne 'ENSEMBLPEP') {
    warning("Don't know how to retrieve cdna for database [@{[$member->source_name]}]
      SPECIES @{[ $member->adaptor->db->get_GenomeDBAdaptor->fetch_by_dbID($member->genome_db_id)->db_adaptor->dbname ]}" );
    return undef;
  }

  unless (defined $self->{'cdna_alignment_string'}) {
    my $cdna = $member->sequence_cds();
    
    if (defined $self->cigar_start || defined $self->cigar_end) {
      unless (defined $self->cigar_start && defined $self->cigar_end) {
        throw("both cigar_start and cigar_end should be defined");
      }
      my $offset = $self->cigar_start * 3 - 3;
      my $length = ($self->cigar_end - $self->cigar_start + 1) * 3;
      $cdna = substr($cdna, $offset, $length);
    }

    my $start = 0;
    my $cdna_align_string = '';

    foreach my $pep (split(//,$self->alignment_string($member))) {
      
      if($pep eq '-') {
        $cdna_align_string .= '--- ';
      } elsif ($pep eq 'U' && $changeSelenos or $pep eq '*') {
	$cdna_align_string .= 'NNN ';
	$start += 3;  
      } else {
        my $codon = substr($cdna, $start, 3);
        unless (length($codon) == 3) {
          # sometimes the last codon contains only 1 or 2 nucleotides.
          # making sure that it has 3 by adding as many Ns as necessary
          $codon .= 'N' x (3 - length($codon));
        }
        $cdna_align_string .= $codon . ' ';
        $start += 3;
      }
    }
    $self->{'cdna_alignment_string'} = $cdna_align_string;
  }
  
  return $self->{'cdna_alignment_string'};
}

sub DESTROY {}

1;