Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Compara/Attribute.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 package Bio::EnsEMBL::Compara::Attribute; | |
2 | |
3 use strict; | |
4 use Carp; | |
5 use Bio::EnsEMBL::Utils::Exception; | |
6 | |
7 our ($AUTOLOAD, %ok_field); | |
8 | |
9 %ok_field = ('member_id' => 1, | |
10 'family_id' => 1, | |
11 'cigar_line' => 1, | |
12 'cigar_start' => 1, | |
13 'cigar_end' => 1, | |
14 'homology_id' => 1, | |
15 'peptide_member_id' => 1, | |
16 'perc_cov' => 1, | |
17 'perc_id' => 1, | |
18 'perc_pos' => 1, | |
19 'peptide_align_feature_id' => 1 | |
20 ); | |
21 | |
22 | |
23 sub new { | |
24 my ($class) = @_; | |
25 | |
26 return bless {}, $class; | |
27 } | |
28 | |
29 =head2 new_fast | |
30 | |
31 Arg [1] : hash reference $hashref | |
32 Example : none | |
33 Description: This is an ultra fast constructor which requires knowledge of | |
34 the objects internals to be used. | |
35 Returntype : | |
36 Exceptions : none | |
37 Caller : | |
38 | |
39 =cut | |
40 | |
41 sub new_fast { | |
42 my ($class, $hashref) = @_; | |
43 | |
44 return bless $hashref, $class; | |
45 } | |
46 | |
47 sub AUTOLOAD { | |
48 my $self = shift; | |
49 my $method = $AUTOLOAD; | |
50 $method =~ s/.*:://; | |
51 croak "invalid method: ->$method()" unless $ok_field{$method}; | |
52 $self->{lc $method} = shift if(@_); | |
53 return $self->{lc $method}; | |
54 } | |
55 | |
56 sub alignment_string { | |
57 my ($self, $member) = @_; | |
58 | |
59 | |
60 | |
61 unless (defined $self->cigar_line) { | |
62 throw("To get an alignment_string, the cigar_line needs to be define\n"); | |
63 } | |
64 unless (defined $member) { | |
65 throw("To get an alignment_string, the peptide member needs to be passed as an option\n"); | |
66 } | |
67 | |
68 unless (defined $self->{'alignment_string'}) { | |
69 my $sequence = $member->sequence; | |
70 if ((defined($self->cigar_start) && ($self->cigar_start != 0)) | |
71 || (defined $self->cigar_end && ($self->cigar_end != 0))) { | |
72 unless ((defined($self->cigar_start) && ($self->cigar_start != 0)) && | |
73 (defined $self->cigar_end && ($self->cigar_end != 0))) { | |
74 # if (defined $self->cigar_start || defined $self->cigar_end) { | |
75 # unless (defined $self->cigar_start && defined $self->cigar_end) { | |
76 throw("both cigar_start and cigar_end should be defined"); | |
77 } | |
78 my $offset = $self->cigar_start - 1; | |
79 my $length = $self->cigar_end - $self->cigar_start + 1; | |
80 $sequence = substr($sequence, $offset, $length); | |
81 } | |
82 | |
83 my $cigar_line = $self->cigar_line; | |
84 $cigar_line =~ s/([MD])/$1 /g; | |
85 | |
86 my @cigar_segments = split " ",$cigar_line; | |
87 my $alignment_string = ""; | |
88 my $seq_start = 0; | |
89 foreach my $segment (@cigar_segments) { | |
90 if ($segment =~ /^(\d*)D$/) { | |
91 my $length = $1; | |
92 $length = 1 if ($length eq ""); | |
93 $alignment_string .= "-" x $length; | |
94 } elsif ($segment =~ /^(\d*)M$/) { | |
95 my $length = $1; | |
96 $length = 1 if ($length eq ""); | |
97 $alignment_string .= substr($sequence,$seq_start,$length); | |
98 $seq_start += $length; | |
99 } | |
100 } | |
101 $self->{'alignment_string'} = $alignment_string; | |
102 } | |
103 | |
104 return $self->{'alignment_string'}; | |
105 } | |
106 | |
107 =head2 cdna_alignment_string | |
108 | |
109 Arg [1] : none | |
110 Example : my $cdna_alignment = $family_member->cdna_alignment_string(); | |
111 Description: Converts the peptide alignment string to a cdna alignment | |
112 string. This only works for EnsEMBL peptides whose cdna can | |
113 be retrieved from the attached EnsEMBL databse. | |
114 If the cdna cannot be retrieved undef is returned and a | |
115 warning is thrown. | |
116 Returntype : string | |
117 Exceptions : none | |
118 Caller : general | |
119 | |
120 =cut | |
121 | |
122 sub cdna_alignment_string { | |
123 my ($self, $member, $changeSelenos) = @_; | |
124 unless (defined $changeSelenos) { | |
125 $changeSelenos = 0; | |
126 } | |
127 | |
128 if($member->source_name ne 'ENSEMBLPEP') { | |
129 warning("Don't know how to retrieve cdna for database [@{[$member->source_name]}] | |
130 SPECIES @{[ $member->adaptor->db->get_GenomeDBAdaptor->fetch_by_dbID($member->genome_db_id)->db_adaptor->dbname ]}" ); | |
131 return undef; | |
132 } | |
133 | |
134 unless (defined $self->{'cdna_alignment_string'}) { | |
135 my $cdna = $member->sequence_cds(); | |
136 | |
137 if (defined $self->cigar_start || defined $self->cigar_end) { | |
138 unless (defined $self->cigar_start && defined $self->cigar_end) { | |
139 throw("both cigar_start and cigar_end should be defined"); | |
140 } | |
141 my $offset = $self->cigar_start * 3 - 3; | |
142 my $length = ($self->cigar_end - $self->cigar_start + 1) * 3; | |
143 $cdna = substr($cdna, $offset, $length); | |
144 } | |
145 | |
146 my $start = 0; | |
147 my $cdna_align_string = ''; | |
148 | |
149 foreach my $pep (split(//,$self->alignment_string($member))) { | |
150 | |
151 if($pep eq '-') { | |
152 $cdna_align_string .= '--- '; | |
153 } elsif ($pep eq 'U' && $changeSelenos or $pep eq '*') { | |
154 $cdna_align_string .= 'NNN '; | |
155 $start += 3; | |
156 } else { | |
157 my $codon = substr($cdna, $start, 3); | |
158 unless (length($codon) == 3) { | |
159 # sometimes the last codon contains only 1 or 2 nucleotides. | |
160 # making sure that it has 3 by adding as many Ns as necessary | |
161 $codon .= 'N' x (3 - length($codon)); | |
162 } | |
163 $cdna_align_string .= $codon . ' '; | |
164 $start += 3; | |
165 } | |
166 } | |
167 $self->{'cdna_alignment_string'} = $cdna_align_string; | |
168 } | |
169 | |
170 return $self->{'cdna_alignment_string'}; | |
171 } | |
172 | |
173 sub DESTROY {} | |
174 | |
175 1; |