comparison variant_effect_predictor/Bio/EnsEMBL/Compara/Attribute.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 package Bio::EnsEMBL::Compara::Attribute;
2
3 use strict;
4 use Carp;
5 use Bio::EnsEMBL::Utils::Exception;
6
7 our ($AUTOLOAD, %ok_field);
8
9 %ok_field = ('member_id' => 1,
10 'family_id' => 1,
11 'cigar_line' => 1,
12 'cigar_start' => 1,
13 'cigar_end' => 1,
14 'homology_id' => 1,
15 'peptide_member_id' => 1,
16 'perc_cov' => 1,
17 'perc_id' => 1,
18 'perc_pos' => 1,
19 'peptide_align_feature_id' => 1
20 );
21
22
23 sub new {
24 my ($class) = @_;
25
26 return bless {}, $class;
27 }
28
29 =head2 new_fast
30
31 Arg [1] : hash reference $hashref
32 Example : none
33 Description: This is an ultra fast constructor which requires knowledge of
34 the objects internals to be used.
35 Returntype :
36 Exceptions : none
37 Caller :
38
39 =cut
40
41 sub new_fast {
42 my ($class, $hashref) = @_;
43
44 return bless $hashref, $class;
45 }
46
47 sub AUTOLOAD {
48 my $self = shift;
49 my $method = $AUTOLOAD;
50 $method =~ s/.*:://;
51 croak "invalid method: ->$method()" unless $ok_field{$method};
52 $self->{lc $method} = shift if(@_);
53 return $self->{lc $method};
54 }
55
56 sub alignment_string {
57 my ($self, $member) = @_;
58
59
60
61 unless (defined $self->cigar_line) {
62 throw("To get an alignment_string, the cigar_line needs to be define\n");
63 }
64 unless (defined $member) {
65 throw("To get an alignment_string, the peptide member needs to be passed as an option\n");
66 }
67
68 unless (defined $self->{'alignment_string'}) {
69 my $sequence = $member->sequence;
70 if ((defined($self->cigar_start) && ($self->cigar_start != 0))
71 || (defined $self->cigar_end && ($self->cigar_end != 0))) {
72 unless ((defined($self->cigar_start) && ($self->cigar_start != 0)) &&
73 (defined $self->cigar_end && ($self->cigar_end != 0))) {
74 # if (defined $self->cigar_start || defined $self->cigar_end) {
75 # unless (defined $self->cigar_start && defined $self->cigar_end) {
76 throw("both cigar_start and cigar_end should be defined");
77 }
78 my $offset = $self->cigar_start - 1;
79 my $length = $self->cigar_end - $self->cigar_start + 1;
80 $sequence = substr($sequence, $offset, $length);
81 }
82
83 my $cigar_line = $self->cigar_line;
84 $cigar_line =~ s/([MD])/$1 /g;
85
86 my @cigar_segments = split " ",$cigar_line;
87 my $alignment_string = "";
88 my $seq_start = 0;
89 foreach my $segment (@cigar_segments) {
90 if ($segment =~ /^(\d*)D$/) {
91 my $length = $1;
92 $length = 1 if ($length eq "");
93 $alignment_string .= "-" x $length;
94 } elsif ($segment =~ /^(\d*)M$/) {
95 my $length = $1;
96 $length = 1 if ($length eq "");
97 $alignment_string .= substr($sequence,$seq_start,$length);
98 $seq_start += $length;
99 }
100 }
101 $self->{'alignment_string'} = $alignment_string;
102 }
103
104 return $self->{'alignment_string'};
105 }
106
107 =head2 cdna_alignment_string
108
109 Arg [1] : none
110 Example : my $cdna_alignment = $family_member->cdna_alignment_string();
111 Description: Converts the peptide alignment string to a cdna alignment
112 string. This only works for EnsEMBL peptides whose cdna can
113 be retrieved from the attached EnsEMBL databse.
114 If the cdna cannot be retrieved undef is returned and a
115 warning is thrown.
116 Returntype : string
117 Exceptions : none
118 Caller : general
119
120 =cut
121
122 sub cdna_alignment_string {
123 my ($self, $member, $changeSelenos) = @_;
124 unless (defined $changeSelenos) {
125 $changeSelenos = 0;
126 }
127
128 if($member->source_name ne 'ENSEMBLPEP') {
129 warning("Don't know how to retrieve cdna for database [@{[$member->source_name]}]
130 SPECIES @{[ $member->adaptor->db->get_GenomeDBAdaptor->fetch_by_dbID($member->genome_db_id)->db_adaptor->dbname ]}" );
131 return undef;
132 }
133
134 unless (defined $self->{'cdna_alignment_string'}) {
135 my $cdna = $member->sequence_cds();
136
137 if (defined $self->cigar_start || defined $self->cigar_end) {
138 unless (defined $self->cigar_start && defined $self->cigar_end) {
139 throw("both cigar_start and cigar_end should be defined");
140 }
141 my $offset = $self->cigar_start * 3 - 3;
142 my $length = ($self->cigar_end - $self->cigar_start + 1) * 3;
143 $cdna = substr($cdna, $offset, $length);
144 }
145
146 my $start = 0;
147 my $cdna_align_string = '';
148
149 foreach my $pep (split(//,$self->alignment_string($member))) {
150
151 if($pep eq '-') {
152 $cdna_align_string .= '--- ';
153 } elsif ($pep eq 'U' && $changeSelenos or $pep eq '*') {
154 $cdna_align_string .= 'NNN ';
155 $start += 3;
156 } else {
157 my $codon = substr($cdna, $start, 3);
158 unless (length($codon) == 3) {
159 # sometimes the last codon contains only 1 or 2 nucleotides.
160 # making sure that it has 3 by adding as many Ns as necessary
161 $codon .= 'N' x (3 - length($codon));
162 }
163 $cdna_align_string .= $codon . ' ';
164 $start += 3;
165 }
166 }
167 $self->{'cdna_alignment_string'} = $cdna_align_string;
168 }
169
170 return $self->{'cdna_alignment_string'};
171 }
172
173 sub DESTROY {}
174
175 1;