comparison variant_effect_predictor/Bio/Variation/AAChange.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: AAChange.pm,v 1.13 2002/10/22 07:38:49 lapp Exp $
2 #
3 # BioPerl module for Bio::Variation::AAChange
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 #
7 # Copyright Heikki Lehvaslaiho
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Variation::AAChange - Sequence change class for polypeptides
16
17 =head1 SYNOPSIS
18
19 $aamut = Bio::Variation::AAChange->new
20 ('-start' => $start,
21 '-end' => $end,
22 '-length' => $len,
23 '-proof' => $proof,
24 '-isMutation' => 1,
25 '-mut_number' => $mut_number
26 );
27
28 my $a1 = Bio::Variation::Allele->new;
29 $a1->seq($ori) if $ori;
30 $aamut->allele_ori($a1);
31 my $a2 = Bio::Variation::Allele->new;
32 $a2->seq($mut) if $mut;
33 $aachange->add_Allele($a2);
34 $aachange->allele_mut($a2);
35
36 print "\n";
37
38 # add it to a SeqDiff container object
39 $seqdiff->add_Variant($rnachange);
40
41 # and create links to and from RNA level variant objects
42 $aamut->RNAChange($rnachange);
43 $rnachange->AAChange($rnachange);
44
45 =head1 DESCRIPTION
46
47 The instantiable class Bio::Variation::RNAChange describes basic
48 sequence changes at polypeptide level. It uses methods defined in
49 superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI>
50 for details.
51
52 If the variation described by a AAChange object has a known
53 Bio::Variation::RNAAChange object, create the link with method
54 AAChange(). See L<Bio::Variation::AAChange> for more information.
55
56 =head1 FEEDBACK
57
58 =head2 Mailing Lists
59
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to the
62 Bioperl mailing lists Your participation is much appreciated.
63
64 bioperl-l@bioperl.org - General discussion
65 http://bio.perl.org/MailList.html - About the mailing lists
66
67 =head2 Reporting Bugs
68
69 report bugs to the Bioperl bug tracking system to help us keep track
70 the bugs and their resolution. Bug reports can be submitted via
71 email or the web:
72
73 bioperl-bugs@bio.perl.org
74 http://bugzilla.bioperl.org/
75
76 =head1 AUTHOR - Heikki Lehvaslaiho
77
78 Email: heikki@ebi.ac.uk
79
80 Address:
81
82 EMBL Outstation, European Bioinformatics Institute
83 Wellcome Trust Genome Campus, Hinxton
84 Cambs. CB10 1SD, United Kingdom
85
86 =head1 APPENDIX
87
88 The rest of the documentation details each of the object
89 methods. Internal methods are usually preceded with a _
90
91 =cut
92
93
94 # Let the code begin...
95
96
97 package Bio::Variation::AAChange;
98 my $VERSION=1.0;
99 use vars qw(@ISA $MATRIX);
100 use strict;
101
102 # Object preamble - inheritance
103 use Bio::Variation::VariantI;
104
105 @ISA = qw( Bio::Variation::VariantI );
106
107 BEGIN {
108
109 my $matrix = << "__MATRIX__";
110 # Matrix made by matblas from blosum62.iij
111 # * column uses minimum score
112 # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
113 # Blocks Database = /data/blocks_5.0/blocks.dat
114 # Cluster Percentage: >= 62
115 # Entropy = 0.6979, Expected = -0.5209
116 A R N D C Q E G H I L K M F P S T W Y V B Z X *
117 A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
118 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
119 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
120 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
121 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
122 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
123 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
124 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
125 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
126 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
127 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
128 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
129 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
130 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
131 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
132 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
133 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
134 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
135 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
136 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
137 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
138 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
139 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
140 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
141 __MATRIX__
142
143 my %blosum = ();
144 $matrix =~ /^ +(.+)$/m;
145 my @aas = split / +/, $1;
146 foreach my $aa (@aas) {
147 my $tmp = $aa;
148 $tmp = "\\$aa" if $aa eq '*';
149 $matrix =~ /^($tmp) +([-+]?\d.*)$/m;
150 my @scores = split / +/, $2 if defined $2;
151 my $count = 0;
152 foreach my $ak (@aas) {
153 $blosum{$aa}->{$aas[$count]} = $scores[$count];
154 $count++;
155 }
156 }
157 sub _matrix;
158 $MATRIX = \%blosum;
159 }
160
161 sub new {
162 my($class,@args) = @_;
163 my $self = $class->SUPER::new(@args);
164
165 my ($start, $end, $length, $strand, $primary, $source,
166 $frame, $score, $gff_string,
167 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
168 $label, $status, $proof, $re_changes, $region, $region_value,
169 $region_dist,
170 $numbering, $mut_number, $ismutation) =
171 $self->_rearrange([qw(START
172 END
173 LENGTH
174 STRAND
175 PRIMARY
176 SOURCE
177 FRAME
178 SCORE
179 GFF_STRING
180 ALLELE_ORI
181 ALLELE_MUT
182 UPSTREAMSEQ
183 DNSTREAMSEQ
184 LABEL
185 STATUS
186 PROOF
187 RE_CHANGES
188 REGION
189 REGION_VALUE
190 REGION_DIST
191 NUMBERING
192 MUT_NUMBER
193 ISMUTATION
194 )],@args);
195
196 $self->primary_tag("Variation");
197
198 $self->{ 'alleles' } = [];
199
200 $start && $self->start($start);
201 $end && $self->end($end);
202 $length && $self->length($length);
203 $strand && $self->strand($strand);
204 $primary && $self->primary_tag($primary);
205 $source && $self->source_tag($source);
206 $frame && $self->frame($frame);
207 $score && $self->score($score);
208 $gff_string && $self->_from_gff_string($gff_string);
209
210 $allele_ori && $self->allele_ori($allele_ori);
211 $allele_mut && $self->allele_mut($allele_mut);
212 $upstreamseq && $self->upstreamseq($upstreamseq);
213 $dnstreamseq && $self->dnstreamseq($dnstreamseq);
214
215 $label && $self->label($label);
216 $status && $self->status($status);
217 $proof && $self->proof($proof);
218 $region && $self->region($region);
219 $region_value && $self->region_value($region_value);
220 $region_dist && $self->region_dist($region_dist);
221 $numbering && $self->numbering($numbering);
222 $mut_number && $self->mut_number($mut_number);
223 $ismutation && $self->isMutation($ismutation);
224
225 return $self; # success - we hope!
226 }
227
228 =head2 RNAChange
229
230 Title : RNAChange
231 Usage : $mutobj = $self->RNAChange;
232 : $mutobj = $self->RNAChange($objref);
233 Function: Returns or sets the link-reference to a mutation/change object.
234 If there is no link, it will return undef
235 Returns : an obj_ref or undef
236
237 =cut
238
239 sub RNAChange {
240 my ($self,$value) = @_;
241 if (defined $value) {
242 if( ! $value->isa('Bio::Variation::RNAChange') ) {
243 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
244 return (undef);
245 }
246 else {
247 $self->{'RNAChange'} = $value;
248 }
249 }
250 unless (exists $self->{'RNAChange'}) {
251 return (undef);
252 } else {
253 return $self->{'RNAChange'};
254 }
255 }
256
257
258
259 =head2 label
260
261 Title : label
262 Usage : $obj->label();
263 Function:
264
265 Sets and returns mutation event label(s). If value is not
266 set, or no argument is given returns false. Each
267 instantiable subclass of L<Bio::Variation::VariantI> needs
268 to implement this method. Valid values are listed in
269 'Mutation event controlled vocabulary' in
270 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
271
272 Example :
273 Returns : string
274 Args : string
275
276 =cut
277
278
279 sub label {
280 my ($self) = @_;
281 my ($o, $m, $type);
282 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
283 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
284
285 if ($self->start == 1 ) {
286 if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) {
287 $type = 'no translation';
288 }
289 elsif ($o and $m and $o eq $m ) {
290 $type = 'silent';
291 }
292 # more ...
293 }
294 elsif ($o and substr($o, 0, 1) eq '*' ) {
295 if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) {
296 $type = 'post-elongation';
297 }
298 elsif ($m and $o eq $m ) {
299 $type = 'silent, conservative';
300 }
301 }
302 elsif ($o and $m and $o eq $m) {
303 $type = 'silent, conservative';
304 }
305 elsif ($m and $m eq '*') {
306 $type = 'truncation';
307 }
308 elsif ($o and $m and $o eq $m) {
309 $type = 'silent, conservative';
310 }
311 elsif (not $m or
312 ($o and $m and length($o) > length($m) and
313 substr($m, -1, 1) ne '*')) {
314 $type = 'deletion';
315 if ($o and $m and $o !~ $m and $o !~ $m) {
316 $type .= ', complex';
317 }
318 }
319 elsif (not $o or
320 ($o and $m and length($o) < length($m) and
321 substr($m, -1, 1) ne '*' ) ) {
322 $type = 'insertion';
323 if ($o and $m and $o !~ $m and $o !~ $m) {
324 $type .= ', complex';
325 }
326 }
327 elsif ($o and $m and $o ne $m and
328 length $o == 1 and length $m == 1 ) {
329 $type = 'substitution';
330 my $value = $self->similarity_score;
331 if (defined $value) {
332 my $cons = ($value < 0) ? 'nonconservative' : 'conservative';
333 $type .= ", ". $cons;
334 }
335 } else {
336 $type = 'out-of-frame translation, truncation';
337 }
338 $self->{'label'} = $type;
339 return $self->{'label'};
340 }
341
342
343 =head2 similarity_score
344
345 Title : similarity_score
346 Usage : $self->similarity_score
347 Function: Measure for evolutionary conservativeness
348 of single amino substitutions. Uses BLOSUM62.
349 Negative numbers are noncoservative changes.
350 Returns : integer, undef if not single amino acid change
351
352 =cut
353
354 sub similarity_score {
355 my ($self) = @_;
356 my ($o, $m, $type);
357 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
358 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
359 return undef unless $o and $m and length $o == 1 and length $m == 1;
360 return undef unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and
361 $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i;
362 return $MATRIX->{"\U$o"}->{"\U$m"};
363 }
364
365 =head2 trivname
366
367 Title : trivname
368 Usage : $self->trivname
369 Function:
370
371 Given a Bio::Variation::AAChange object with linked
372 Bio::Variation::RNAChange and Bio::Variation::DNAMutation
373 objects, this subroutine creates a string corresponding to
374 the 'trivial name' of the mutation. Trivial name is
375 specified in Antonorakis & MDI Nomenclature Working Group:
376 Human Mutation 11:1-3, 1998.
377 http://www.interscience.wiley.com/jpages/1059-7794/nomenclature.html
378
379 Returns : string
380
381 =cut
382
383
384 sub trivname {
385 my ($self,$value) = @_;
386 if( defined $value) {
387 $self->{'trivname'} = $value;
388 } else {
389 my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) =
390 ('', '', '', '', '');
391 my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
392 #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
393
394 $aaori = substr ($o, 0, 1) if $o;
395 $aaori =~ tr/\*/X/;
396
397 my $sep;
398 if ($self->isMutation) {
399 $sep = '>';
400 } else {
401 $sep = '|';
402 }
403 my $trivname = $aaori. $self->start;
404 $trivname .= $sep if $sep eq '|';
405
406 my @alleles = $self->each_Allele;
407 foreach my $allele (@alleles) {
408 my $m = $allele->seq if $allele->seq;
409
410 $self->allele_mut($allele);
411 #$trivname .= $sep. uc $m if $m;
412
413 $aamutterm = substr ($m, -1, 1) if $m;
414 if ($self->RNAChange->label =~ /initiation codon/ and
415 ( $o and $m and $o ne $m)) {
416 $aamut = 'X';
417 }
418 elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) {
419 $aamutsymbol = '';
420 $aamut = $aamutterm;
421 }
422 elsif ($self->RNAChange->label =~ /deletion/) {
423 $aamutsymbol = 'del';
424 if ($aamutterm eq '*') {
425 $aatermnumber = $self->start + length($m) -1;
426 $aamut = 'X'. $aatermnumber;
427 }
428 if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){
429 $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ;
430 }
431 }
432 elsif ($self->RNAChange->label =~ /insertion/) {
433 $aamutsymbol = 'ins';
434 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
435 $aatermnumber = $self->start + length($m)-1;
436 $aamut = $aatermnumber. 'X';
437 }
438 if ($self->RNAChange->label =~ /inframe/){
439 $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ;
440 }
441 }
442 elsif ($self->RNAChange->label =~ /complex/ ) {
443 my $diff = length($m) - length($o);
444 if ($diff >= 0 ) {
445 $aamutsymbol = 'ins';
446 } else {
447 $aamutsymbol = 'del' ;
448 }
449 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
450 $aatermnumber = $self->start + length($m)-1;
451 $aamut = $aatermnumber. 'X';
452 }
453 if ($self->RNAChange->label =~ /inframe/){
454
455 if ($diff >= 0 ) {
456 $aamut = '+'. $diff ;
457 } else {
458 $aamut = $diff ;
459 }
460 }
461 }
462 elsif ($self->label =~ /truncation/) {
463 $aamut = $m;
464 } else {
465 $aamutsymbol = '';
466 $aamut = $aamutterm;
467 }
468 $aamut =~ tr/\*/X/;
469 $trivname .= $aamutsymbol. $aamut. $sep;
470 }
471 chop $trivname;
472 $self->{'trivname'} = $trivname;
473 }
474 return $self->{'trivname'};
475 }
476
477 1;