0
|
1 # $Id: AAChange.pm,v 1.13 2002/10/22 07:38:49 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Variation::AAChange
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Variation::AAChange - Sequence change class for polypeptides
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 $aamut = Bio::Variation::AAChange->new
|
|
20 ('-start' => $start,
|
|
21 '-end' => $end,
|
|
22 '-length' => $len,
|
|
23 '-proof' => $proof,
|
|
24 '-isMutation' => 1,
|
|
25 '-mut_number' => $mut_number
|
|
26 );
|
|
27
|
|
28 my $a1 = Bio::Variation::Allele->new;
|
|
29 $a1->seq($ori) if $ori;
|
|
30 $aamut->allele_ori($a1);
|
|
31 my $a2 = Bio::Variation::Allele->new;
|
|
32 $a2->seq($mut) if $mut;
|
|
33 $aachange->add_Allele($a2);
|
|
34 $aachange->allele_mut($a2);
|
|
35
|
|
36 print "\n";
|
|
37
|
|
38 # add it to a SeqDiff container object
|
|
39 $seqdiff->add_Variant($rnachange);
|
|
40
|
|
41 # and create links to and from RNA level variant objects
|
|
42 $aamut->RNAChange($rnachange);
|
|
43 $rnachange->AAChange($rnachange);
|
|
44
|
|
45 =head1 DESCRIPTION
|
|
46
|
|
47 The instantiable class Bio::Variation::RNAChange describes basic
|
|
48 sequence changes at polypeptide level. It uses methods defined in
|
|
49 superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI>
|
|
50 for details.
|
|
51
|
|
52 If the variation described by a AAChange object has a known
|
|
53 Bio::Variation::RNAAChange object, create the link with method
|
|
54 AAChange(). See L<Bio::Variation::AAChange> for more information.
|
|
55
|
|
56 =head1 FEEDBACK
|
|
57
|
|
58 =head2 Mailing Lists
|
|
59
|
|
60 User feedback is an integral part of the evolution of this and other
|
|
61 Bioperl modules. Send your comments and suggestions preferably to the
|
|
62 Bioperl mailing lists Your participation is much appreciated.
|
|
63
|
|
64 bioperl-l@bioperl.org - General discussion
|
|
65 http://bio.perl.org/MailList.html - About the mailing lists
|
|
66
|
|
67 =head2 Reporting Bugs
|
|
68
|
|
69 report bugs to the Bioperl bug tracking system to help us keep track
|
|
70 the bugs and their resolution. Bug reports can be submitted via
|
|
71 email or the web:
|
|
72
|
|
73 bioperl-bugs@bio.perl.org
|
|
74 http://bugzilla.bioperl.org/
|
|
75
|
|
76 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
77
|
|
78 Email: heikki@ebi.ac.uk
|
|
79
|
|
80 Address:
|
|
81
|
|
82 EMBL Outstation, European Bioinformatics Institute
|
|
83 Wellcome Trust Genome Campus, Hinxton
|
|
84 Cambs. CB10 1SD, United Kingdom
|
|
85
|
|
86 =head1 APPENDIX
|
|
87
|
|
88 The rest of the documentation details each of the object
|
|
89 methods. Internal methods are usually preceded with a _
|
|
90
|
|
91 =cut
|
|
92
|
|
93
|
|
94 # Let the code begin...
|
|
95
|
|
96
|
|
97 package Bio::Variation::AAChange;
|
|
98 my $VERSION=1.0;
|
|
99 use vars qw(@ISA $MATRIX);
|
|
100 use strict;
|
|
101
|
|
102 # Object preamble - inheritance
|
|
103 use Bio::Variation::VariantI;
|
|
104
|
|
105 @ISA = qw( Bio::Variation::VariantI );
|
|
106
|
|
107 BEGIN {
|
|
108
|
|
109 my $matrix = << "__MATRIX__";
|
|
110 # Matrix made by matblas from blosum62.iij
|
|
111 # * column uses minimum score
|
|
112 # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
|
|
113 # Blocks Database = /data/blocks_5.0/blocks.dat
|
|
114 # Cluster Percentage: >= 62
|
|
115 # Entropy = 0.6979, Expected = -0.5209
|
|
116 A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
117 A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
|
|
118 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
|
|
119 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
|
|
120 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
|
|
121 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
|
|
122 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
|
|
123 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
|
|
124 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
|
|
125 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
|
|
126 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
|
|
127 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
|
|
128 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
|
|
129 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
|
|
130 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
|
|
131 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
|
|
132 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
|
|
133 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
|
|
134 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
|
|
135 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
|
|
136 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
|
|
137 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
|
|
138 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
|
|
139 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
|
|
140 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
|
|
141 __MATRIX__
|
|
142
|
|
143 my %blosum = ();
|
|
144 $matrix =~ /^ +(.+)$/m;
|
|
145 my @aas = split / +/, $1;
|
|
146 foreach my $aa (@aas) {
|
|
147 my $tmp = $aa;
|
|
148 $tmp = "\\$aa" if $aa eq '*';
|
|
149 $matrix =~ /^($tmp) +([-+]?\d.*)$/m;
|
|
150 my @scores = split / +/, $2 if defined $2;
|
|
151 my $count = 0;
|
|
152 foreach my $ak (@aas) {
|
|
153 $blosum{$aa}->{$aas[$count]} = $scores[$count];
|
|
154 $count++;
|
|
155 }
|
|
156 }
|
|
157 sub _matrix;
|
|
158 $MATRIX = \%blosum;
|
|
159 }
|
|
160
|
|
161 sub new {
|
|
162 my($class,@args) = @_;
|
|
163 my $self = $class->SUPER::new(@args);
|
|
164
|
|
165 my ($start, $end, $length, $strand, $primary, $source,
|
|
166 $frame, $score, $gff_string,
|
|
167 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
|
|
168 $label, $status, $proof, $re_changes, $region, $region_value,
|
|
169 $region_dist,
|
|
170 $numbering, $mut_number, $ismutation) =
|
|
171 $self->_rearrange([qw(START
|
|
172 END
|
|
173 LENGTH
|
|
174 STRAND
|
|
175 PRIMARY
|
|
176 SOURCE
|
|
177 FRAME
|
|
178 SCORE
|
|
179 GFF_STRING
|
|
180 ALLELE_ORI
|
|
181 ALLELE_MUT
|
|
182 UPSTREAMSEQ
|
|
183 DNSTREAMSEQ
|
|
184 LABEL
|
|
185 STATUS
|
|
186 PROOF
|
|
187 RE_CHANGES
|
|
188 REGION
|
|
189 REGION_VALUE
|
|
190 REGION_DIST
|
|
191 NUMBERING
|
|
192 MUT_NUMBER
|
|
193 ISMUTATION
|
|
194 )],@args);
|
|
195
|
|
196 $self->primary_tag("Variation");
|
|
197
|
|
198 $self->{ 'alleles' } = [];
|
|
199
|
|
200 $start && $self->start($start);
|
|
201 $end && $self->end($end);
|
|
202 $length && $self->length($length);
|
|
203 $strand && $self->strand($strand);
|
|
204 $primary && $self->primary_tag($primary);
|
|
205 $source && $self->source_tag($source);
|
|
206 $frame && $self->frame($frame);
|
|
207 $score && $self->score($score);
|
|
208 $gff_string && $self->_from_gff_string($gff_string);
|
|
209
|
|
210 $allele_ori && $self->allele_ori($allele_ori);
|
|
211 $allele_mut && $self->allele_mut($allele_mut);
|
|
212 $upstreamseq && $self->upstreamseq($upstreamseq);
|
|
213 $dnstreamseq && $self->dnstreamseq($dnstreamseq);
|
|
214
|
|
215 $label && $self->label($label);
|
|
216 $status && $self->status($status);
|
|
217 $proof && $self->proof($proof);
|
|
218 $region && $self->region($region);
|
|
219 $region_value && $self->region_value($region_value);
|
|
220 $region_dist && $self->region_dist($region_dist);
|
|
221 $numbering && $self->numbering($numbering);
|
|
222 $mut_number && $self->mut_number($mut_number);
|
|
223 $ismutation && $self->isMutation($ismutation);
|
|
224
|
|
225 return $self; # success - we hope!
|
|
226 }
|
|
227
|
|
228 =head2 RNAChange
|
|
229
|
|
230 Title : RNAChange
|
|
231 Usage : $mutobj = $self->RNAChange;
|
|
232 : $mutobj = $self->RNAChange($objref);
|
|
233 Function: Returns or sets the link-reference to a mutation/change object.
|
|
234 If there is no link, it will return undef
|
|
235 Returns : an obj_ref or undef
|
|
236
|
|
237 =cut
|
|
238
|
|
239 sub RNAChange {
|
|
240 my ($self,$value) = @_;
|
|
241 if (defined $value) {
|
|
242 if( ! $value->isa('Bio::Variation::RNAChange') ) {
|
|
243 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
|
|
244 return (undef);
|
|
245 }
|
|
246 else {
|
|
247 $self->{'RNAChange'} = $value;
|
|
248 }
|
|
249 }
|
|
250 unless (exists $self->{'RNAChange'}) {
|
|
251 return (undef);
|
|
252 } else {
|
|
253 return $self->{'RNAChange'};
|
|
254 }
|
|
255 }
|
|
256
|
|
257
|
|
258
|
|
259 =head2 label
|
|
260
|
|
261 Title : label
|
|
262 Usage : $obj->label();
|
|
263 Function:
|
|
264
|
|
265 Sets and returns mutation event label(s). If value is not
|
|
266 set, or no argument is given returns false. Each
|
|
267 instantiable subclass of L<Bio::Variation::VariantI> needs
|
|
268 to implement this method. Valid values are listed in
|
|
269 'Mutation event controlled vocabulary' in
|
|
270 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
|
|
271
|
|
272 Example :
|
|
273 Returns : string
|
|
274 Args : string
|
|
275
|
|
276 =cut
|
|
277
|
|
278
|
|
279 sub label {
|
|
280 my ($self) = @_;
|
|
281 my ($o, $m, $type);
|
|
282 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
|
|
283 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
|
|
284
|
|
285 if ($self->start == 1 ) {
|
|
286 if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) {
|
|
287 $type = 'no translation';
|
|
288 }
|
|
289 elsif ($o and $m and $o eq $m ) {
|
|
290 $type = 'silent';
|
|
291 }
|
|
292 # more ...
|
|
293 }
|
|
294 elsif ($o and substr($o, 0, 1) eq '*' ) {
|
|
295 if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) {
|
|
296 $type = 'post-elongation';
|
|
297 }
|
|
298 elsif ($m and $o eq $m ) {
|
|
299 $type = 'silent, conservative';
|
|
300 }
|
|
301 }
|
|
302 elsif ($o and $m and $o eq $m) {
|
|
303 $type = 'silent, conservative';
|
|
304 }
|
|
305 elsif ($m and $m eq '*') {
|
|
306 $type = 'truncation';
|
|
307 }
|
|
308 elsif ($o and $m and $o eq $m) {
|
|
309 $type = 'silent, conservative';
|
|
310 }
|
|
311 elsif (not $m or
|
|
312 ($o and $m and length($o) > length($m) and
|
|
313 substr($m, -1, 1) ne '*')) {
|
|
314 $type = 'deletion';
|
|
315 if ($o and $m and $o !~ $m and $o !~ $m) {
|
|
316 $type .= ', complex';
|
|
317 }
|
|
318 }
|
|
319 elsif (not $o or
|
|
320 ($o and $m and length($o) < length($m) and
|
|
321 substr($m, -1, 1) ne '*' ) ) {
|
|
322 $type = 'insertion';
|
|
323 if ($o and $m and $o !~ $m and $o !~ $m) {
|
|
324 $type .= ', complex';
|
|
325 }
|
|
326 }
|
|
327 elsif ($o and $m and $o ne $m and
|
|
328 length $o == 1 and length $m == 1 ) {
|
|
329 $type = 'substitution';
|
|
330 my $value = $self->similarity_score;
|
|
331 if (defined $value) {
|
|
332 my $cons = ($value < 0) ? 'nonconservative' : 'conservative';
|
|
333 $type .= ", ". $cons;
|
|
334 }
|
|
335 } else {
|
|
336 $type = 'out-of-frame translation, truncation';
|
|
337 }
|
|
338 $self->{'label'} = $type;
|
|
339 return $self->{'label'};
|
|
340 }
|
|
341
|
|
342
|
|
343 =head2 similarity_score
|
|
344
|
|
345 Title : similarity_score
|
|
346 Usage : $self->similarity_score
|
|
347 Function: Measure for evolutionary conservativeness
|
|
348 of single amino substitutions. Uses BLOSUM62.
|
|
349 Negative numbers are noncoservative changes.
|
|
350 Returns : integer, undef if not single amino acid change
|
|
351
|
|
352 =cut
|
|
353
|
|
354 sub similarity_score {
|
|
355 my ($self) = @_;
|
|
356 my ($o, $m, $type);
|
|
357 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
|
|
358 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
|
|
359 return undef unless $o and $m and length $o == 1 and length $m == 1;
|
|
360 return undef unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and
|
|
361 $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i;
|
|
362 return $MATRIX->{"\U$o"}->{"\U$m"};
|
|
363 }
|
|
364
|
|
365 =head2 trivname
|
|
366
|
|
367 Title : trivname
|
|
368 Usage : $self->trivname
|
|
369 Function:
|
|
370
|
|
371 Given a Bio::Variation::AAChange object with linked
|
|
372 Bio::Variation::RNAChange and Bio::Variation::DNAMutation
|
|
373 objects, this subroutine creates a string corresponding to
|
|
374 the 'trivial name' of the mutation. Trivial name is
|
|
375 specified in Antonorakis & MDI Nomenclature Working Group:
|
|
376 Human Mutation 11:1-3, 1998.
|
|
377 http://www.interscience.wiley.com/jpages/1059-7794/nomenclature.html
|
|
378
|
|
379 Returns : string
|
|
380
|
|
381 =cut
|
|
382
|
|
383
|
|
384 sub trivname {
|
|
385 my ($self,$value) = @_;
|
|
386 if( defined $value) {
|
|
387 $self->{'trivname'} = $value;
|
|
388 } else {
|
|
389 my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) =
|
|
390 ('', '', '', '', '');
|
|
391 my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
|
|
392 #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
|
|
393
|
|
394 $aaori = substr ($o, 0, 1) if $o;
|
|
395 $aaori =~ tr/\*/X/;
|
|
396
|
|
397 my $sep;
|
|
398 if ($self->isMutation) {
|
|
399 $sep = '>';
|
|
400 } else {
|
|
401 $sep = '|';
|
|
402 }
|
|
403 my $trivname = $aaori. $self->start;
|
|
404 $trivname .= $sep if $sep eq '|';
|
|
405
|
|
406 my @alleles = $self->each_Allele;
|
|
407 foreach my $allele (@alleles) {
|
|
408 my $m = $allele->seq if $allele->seq;
|
|
409
|
|
410 $self->allele_mut($allele);
|
|
411 #$trivname .= $sep. uc $m if $m;
|
|
412
|
|
413 $aamutterm = substr ($m, -1, 1) if $m;
|
|
414 if ($self->RNAChange->label =~ /initiation codon/ and
|
|
415 ( $o and $m and $o ne $m)) {
|
|
416 $aamut = 'X';
|
|
417 }
|
|
418 elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) {
|
|
419 $aamutsymbol = '';
|
|
420 $aamut = $aamutterm;
|
|
421 }
|
|
422 elsif ($self->RNAChange->label =~ /deletion/) {
|
|
423 $aamutsymbol = 'del';
|
|
424 if ($aamutterm eq '*') {
|
|
425 $aatermnumber = $self->start + length($m) -1;
|
|
426 $aamut = 'X'. $aatermnumber;
|
|
427 }
|
|
428 if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){
|
|
429 $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ;
|
|
430 }
|
|
431 }
|
|
432 elsif ($self->RNAChange->label =~ /insertion/) {
|
|
433 $aamutsymbol = 'ins';
|
|
434 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
|
|
435 $aatermnumber = $self->start + length($m)-1;
|
|
436 $aamut = $aatermnumber. 'X';
|
|
437 }
|
|
438 if ($self->RNAChange->label =~ /inframe/){
|
|
439 $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ;
|
|
440 }
|
|
441 }
|
|
442 elsif ($self->RNAChange->label =~ /complex/ ) {
|
|
443 my $diff = length($m) - length($o);
|
|
444 if ($diff >= 0 ) {
|
|
445 $aamutsymbol = 'ins';
|
|
446 } else {
|
|
447 $aamutsymbol = 'del' ;
|
|
448 }
|
|
449 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
|
|
450 $aatermnumber = $self->start + length($m)-1;
|
|
451 $aamut = $aatermnumber. 'X';
|
|
452 }
|
|
453 if ($self->RNAChange->label =~ /inframe/){
|
|
454
|
|
455 if ($diff >= 0 ) {
|
|
456 $aamut = '+'. $diff ;
|
|
457 } else {
|
|
458 $aamut = $diff ;
|
|
459 }
|
|
460 }
|
|
461 }
|
|
462 elsif ($self->label =~ /truncation/) {
|
|
463 $aamut = $m;
|
|
464 } else {
|
|
465 $aamutsymbol = '';
|
|
466 $aamut = $aamutterm;
|
|
467 }
|
|
468 $aamut =~ tr/\*/X/;
|
|
469 $trivname .= $aamutsymbol. $aamut. $sep;
|
|
470 }
|
|
471 chop $trivname;
|
|
472 $self->{'trivname'} = $trivname;
|
|
473 }
|
|
474 return $self->{'trivname'};
|
|
475 }
|
|
476
|
|
477 1;
|