Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Compara/Attribute.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Compara/Attribute.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,175 @@ +package Bio::EnsEMBL::Compara::Attribute; + +use strict; +use Carp; +use Bio::EnsEMBL::Utils::Exception; + +our ($AUTOLOAD, %ok_field); + +%ok_field = ('member_id' => 1, + 'family_id' => 1, + 'cigar_line' => 1, + 'cigar_start' => 1, + 'cigar_end' => 1, + 'homology_id' => 1, + 'peptide_member_id' => 1, + 'perc_cov' => 1, + 'perc_id' => 1, + 'perc_pos' => 1, + 'peptide_align_feature_id' => 1 + ); + + +sub new { + my ($class) = @_; + + return bless {}, $class; +} + +=head2 new_fast + + Arg [1] : hash reference $hashref + Example : none + Description: This is an ultra fast constructor which requires knowledge of + the objects internals to be used. + Returntype : + Exceptions : none + Caller : + +=cut + +sub new_fast { + my ($class, $hashref) = @_; + + return bless $hashref, $class; +} + +sub AUTOLOAD { + my $self = shift; + my $method = $AUTOLOAD; + $method =~ s/.*:://; + croak "invalid method: ->$method()" unless $ok_field{$method}; + $self->{lc $method} = shift if(@_); + return $self->{lc $method}; +} + +sub alignment_string { + my ($self, $member) = @_; + + + + unless (defined $self->cigar_line) { + throw("To get an alignment_string, the cigar_line needs to be define\n"); + } + unless (defined $member) { + throw("To get an alignment_string, the peptide member needs to be passed as an option\n"); + } + + unless (defined $self->{'alignment_string'}) { + my $sequence = $member->sequence; + if ((defined($self->cigar_start) && ($self->cigar_start != 0)) + || (defined $self->cigar_end && ($self->cigar_end != 0))) { + unless ((defined($self->cigar_start) && ($self->cigar_start != 0)) && + (defined $self->cigar_end && ($self->cigar_end != 0))) { +# if (defined $self->cigar_start || defined $self->cigar_end) { +# unless (defined $self->cigar_start && defined $self->cigar_end) { + throw("both cigar_start and cigar_end should be defined"); + } + my $offset = $self->cigar_start - 1; + my $length = $self->cigar_end - $self->cigar_start + 1; + $sequence = substr($sequence, $offset, $length); + } + + my $cigar_line = $self->cigar_line; + $cigar_line =~ s/([MD])/$1 /g; + + my @cigar_segments = split " ",$cigar_line; + my $alignment_string = ""; + my $seq_start = 0; + foreach my $segment (@cigar_segments) { + if ($segment =~ /^(\d*)D$/) { + my $length = $1; + $length = 1 if ($length eq ""); + $alignment_string .= "-" x $length; + } elsif ($segment =~ /^(\d*)M$/) { + my $length = $1; + $length = 1 if ($length eq ""); + $alignment_string .= substr($sequence,$seq_start,$length); + $seq_start += $length; + } + } + $self->{'alignment_string'} = $alignment_string; + } + + return $self->{'alignment_string'}; +} + +=head2 cdna_alignment_string + + Arg [1] : none + Example : my $cdna_alignment = $family_member->cdna_alignment_string(); + Description: Converts the peptide alignment string to a cdna alignment + string. This only works for EnsEMBL peptides whose cdna can + be retrieved from the attached EnsEMBL databse. + If the cdna cannot be retrieved undef is returned and a + warning is thrown. + Returntype : string + Exceptions : none + Caller : general + +=cut + +sub cdna_alignment_string { + my ($self, $member, $changeSelenos) = @_; + unless (defined $changeSelenos) { + $changeSelenos = 0; + } + + if($member->source_name ne 'ENSEMBLPEP') { + warning("Don't know how to retrieve cdna for database [@{[$member->source_name]}] + SPECIES @{[ $member->adaptor->db->get_GenomeDBAdaptor->fetch_by_dbID($member->genome_db_id)->db_adaptor->dbname ]}" ); + return undef; + } + + unless (defined $self->{'cdna_alignment_string'}) { + my $cdna = $member->sequence_cds(); + + if (defined $self->cigar_start || defined $self->cigar_end) { + unless (defined $self->cigar_start && defined $self->cigar_end) { + throw("both cigar_start and cigar_end should be defined"); + } + my $offset = $self->cigar_start * 3 - 3; + my $length = ($self->cigar_end - $self->cigar_start + 1) * 3; + $cdna = substr($cdna, $offset, $length); + } + + my $start = 0; + my $cdna_align_string = ''; + + foreach my $pep (split(//,$self->alignment_string($member))) { + + if($pep eq '-') { + $cdna_align_string .= '--- '; + } elsif ($pep eq 'U' && $changeSelenos or $pep eq '*') { + $cdna_align_string .= 'NNN '; + $start += 3; + } else { + my $codon = substr($cdna, $start, 3); + unless (length($codon) == 3) { + # sometimes the last codon contains only 1 or 2 nucleotides. + # making sure that it has 3 by adding as many Ns as necessary + $codon .= 'N' x (3 - length($codon)); + } + $cdna_align_string .= $codon . ' '; + $start += 3; + } + } + $self->{'cdna_alignment_string'} = $cdna_align_string; + } + + return $self->{'cdna_alignment_string'}; +} + +sub DESTROY {} + +1;