Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/AAChange.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: AAChange.pm,v 1.13 2002/10/22 07:38:49 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Variation::AAChange | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Variation::AAChange - Sequence change class for polypeptides | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 $aamut = Bio::Variation::AAChange->new | |
20 ('-start' => $start, | |
21 '-end' => $end, | |
22 '-length' => $len, | |
23 '-proof' => $proof, | |
24 '-isMutation' => 1, | |
25 '-mut_number' => $mut_number | |
26 ); | |
27 | |
28 my $a1 = Bio::Variation::Allele->new; | |
29 $a1->seq($ori) if $ori; | |
30 $aamut->allele_ori($a1); | |
31 my $a2 = Bio::Variation::Allele->new; | |
32 $a2->seq($mut) if $mut; | |
33 $aachange->add_Allele($a2); | |
34 $aachange->allele_mut($a2); | |
35 | |
36 print "\n"; | |
37 | |
38 # add it to a SeqDiff container object | |
39 $seqdiff->add_Variant($rnachange); | |
40 | |
41 # and create links to and from RNA level variant objects | |
42 $aamut->RNAChange($rnachange); | |
43 $rnachange->AAChange($rnachange); | |
44 | |
45 =head1 DESCRIPTION | |
46 | |
47 The instantiable class Bio::Variation::RNAChange describes basic | |
48 sequence changes at polypeptide level. It uses methods defined in | |
49 superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI> | |
50 for details. | |
51 | |
52 If the variation described by a AAChange object has a known | |
53 Bio::Variation::RNAAChange object, create the link with method | |
54 AAChange(). See L<Bio::Variation::AAChange> for more information. | |
55 | |
56 =head1 FEEDBACK | |
57 | |
58 =head2 Mailing Lists | |
59 | |
60 User feedback is an integral part of the evolution of this and other | |
61 Bioperl modules. Send your comments and suggestions preferably to the | |
62 Bioperl mailing lists Your participation is much appreciated. | |
63 | |
64 bioperl-l@bioperl.org - General discussion | |
65 http://bio.perl.org/MailList.html - About the mailing lists | |
66 | |
67 =head2 Reporting Bugs | |
68 | |
69 report bugs to the Bioperl bug tracking system to help us keep track | |
70 the bugs and their resolution. Bug reports can be submitted via | |
71 email or the web: | |
72 | |
73 bioperl-bugs@bio.perl.org | |
74 http://bugzilla.bioperl.org/ | |
75 | |
76 =head1 AUTHOR - Heikki Lehvaslaiho | |
77 | |
78 Email: heikki@ebi.ac.uk | |
79 | |
80 Address: | |
81 | |
82 EMBL Outstation, European Bioinformatics Institute | |
83 Wellcome Trust Genome Campus, Hinxton | |
84 Cambs. CB10 1SD, United Kingdom | |
85 | |
86 =head1 APPENDIX | |
87 | |
88 The rest of the documentation details each of the object | |
89 methods. Internal methods are usually preceded with a _ | |
90 | |
91 =cut | |
92 | |
93 | |
94 # Let the code begin... | |
95 | |
96 | |
97 package Bio::Variation::AAChange; | |
98 my $VERSION=1.0; | |
99 use vars qw(@ISA $MATRIX); | |
100 use strict; | |
101 | |
102 # Object preamble - inheritance | |
103 use Bio::Variation::VariantI; | |
104 | |
105 @ISA = qw( Bio::Variation::VariantI ); | |
106 | |
107 BEGIN { | |
108 | |
109 my $matrix = << "__MATRIX__"; | |
110 # Matrix made by matblas from blosum62.iij | |
111 # * column uses minimum score | |
112 # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units | |
113 # Blocks Database = /data/blocks_5.0/blocks.dat | |
114 # Cluster Percentage: >= 62 | |
115 # Entropy = 0.6979, Expected = -0.5209 | |
116 A R N D C Q E G H I L K M F P S T W Y V B Z X * | |
117 A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 | |
118 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 | |
119 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 | |
120 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 | |
121 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 | |
122 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 | |
123 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 | |
124 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 | |
125 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 | |
126 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 | |
127 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 | |
128 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 | |
129 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 | |
130 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 | |
131 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 | |
132 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 | |
133 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 | |
134 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 | |
135 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 | |
136 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 | |
137 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 | |
138 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 | |
139 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 | |
140 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 | |
141 __MATRIX__ | |
142 | |
143 my %blosum = (); | |
144 $matrix =~ /^ +(.+)$/m; | |
145 my @aas = split / +/, $1; | |
146 foreach my $aa (@aas) { | |
147 my $tmp = $aa; | |
148 $tmp = "\\$aa" if $aa eq '*'; | |
149 $matrix =~ /^($tmp) +([-+]?\d.*)$/m; | |
150 my @scores = split / +/, $2 if defined $2; | |
151 my $count = 0; | |
152 foreach my $ak (@aas) { | |
153 $blosum{$aa}->{$aas[$count]} = $scores[$count]; | |
154 $count++; | |
155 } | |
156 } | |
157 sub _matrix; | |
158 $MATRIX = \%blosum; | |
159 } | |
160 | |
161 sub new { | |
162 my($class,@args) = @_; | |
163 my $self = $class->SUPER::new(@args); | |
164 | |
165 my ($start, $end, $length, $strand, $primary, $source, | |
166 $frame, $score, $gff_string, | |
167 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, | |
168 $label, $status, $proof, $re_changes, $region, $region_value, | |
169 $region_dist, | |
170 $numbering, $mut_number, $ismutation) = | |
171 $self->_rearrange([qw(START | |
172 END | |
173 LENGTH | |
174 STRAND | |
175 PRIMARY | |
176 SOURCE | |
177 FRAME | |
178 SCORE | |
179 GFF_STRING | |
180 ALLELE_ORI | |
181 ALLELE_MUT | |
182 UPSTREAMSEQ | |
183 DNSTREAMSEQ | |
184 LABEL | |
185 STATUS | |
186 PROOF | |
187 RE_CHANGES | |
188 REGION | |
189 REGION_VALUE | |
190 REGION_DIST | |
191 NUMBERING | |
192 MUT_NUMBER | |
193 ISMUTATION | |
194 )],@args); | |
195 | |
196 $self->primary_tag("Variation"); | |
197 | |
198 $self->{ 'alleles' } = []; | |
199 | |
200 $start && $self->start($start); | |
201 $end && $self->end($end); | |
202 $length && $self->length($length); | |
203 $strand && $self->strand($strand); | |
204 $primary && $self->primary_tag($primary); | |
205 $source && $self->source_tag($source); | |
206 $frame && $self->frame($frame); | |
207 $score && $self->score($score); | |
208 $gff_string && $self->_from_gff_string($gff_string); | |
209 | |
210 $allele_ori && $self->allele_ori($allele_ori); | |
211 $allele_mut && $self->allele_mut($allele_mut); | |
212 $upstreamseq && $self->upstreamseq($upstreamseq); | |
213 $dnstreamseq && $self->dnstreamseq($dnstreamseq); | |
214 | |
215 $label && $self->label($label); | |
216 $status && $self->status($status); | |
217 $proof && $self->proof($proof); | |
218 $region && $self->region($region); | |
219 $region_value && $self->region_value($region_value); | |
220 $region_dist && $self->region_dist($region_dist); | |
221 $numbering && $self->numbering($numbering); | |
222 $mut_number && $self->mut_number($mut_number); | |
223 $ismutation && $self->isMutation($ismutation); | |
224 | |
225 return $self; # success - we hope! | |
226 } | |
227 | |
228 =head2 RNAChange | |
229 | |
230 Title : RNAChange | |
231 Usage : $mutobj = $self->RNAChange; | |
232 : $mutobj = $self->RNAChange($objref); | |
233 Function: Returns or sets the link-reference to a mutation/change object. | |
234 If there is no link, it will return undef | |
235 Returns : an obj_ref or undef | |
236 | |
237 =cut | |
238 | |
239 sub RNAChange { | |
240 my ($self,$value) = @_; | |
241 if (defined $value) { | |
242 if( ! $value->isa('Bio::Variation::RNAChange') ) { | |
243 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]"); | |
244 return (undef); | |
245 } | |
246 else { | |
247 $self->{'RNAChange'} = $value; | |
248 } | |
249 } | |
250 unless (exists $self->{'RNAChange'}) { | |
251 return (undef); | |
252 } else { | |
253 return $self->{'RNAChange'}; | |
254 } | |
255 } | |
256 | |
257 | |
258 | |
259 =head2 label | |
260 | |
261 Title : label | |
262 Usage : $obj->label(); | |
263 Function: | |
264 | |
265 Sets and returns mutation event label(s). If value is not | |
266 set, or no argument is given returns false. Each | |
267 instantiable subclass of L<Bio::Variation::VariantI> needs | |
268 to implement this method. Valid values are listed in | |
269 'Mutation event controlled vocabulary' in | |
270 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. | |
271 | |
272 Example : | |
273 Returns : string | |
274 Args : string | |
275 | |
276 =cut | |
277 | |
278 | |
279 sub label { | |
280 my ($self) = @_; | |
281 my ($o, $m, $type); | |
282 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; | |
283 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; | |
284 | |
285 if ($self->start == 1 ) { | |
286 if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) { | |
287 $type = 'no translation'; | |
288 } | |
289 elsif ($o and $m and $o eq $m ) { | |
290 $type = 'silent'; | |
291 } | |
292 # more ... | |
293 } | |
294 elsif ($o and substr($o, 0, 1) eq '*' ) { | |
295 if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) { | |
296 $type = 'post-elongation'; | |
297 } | |
298 elsif ($m and $o eq $m ) { | |
299 $type = 'silent, conservative'; | |
300 } | |
301 } | |
302 elsif ($o and $m and $o eq $m) { | |
303 $type = 'silent, conservative'; | |
304 } | |
305 elsif ($m and $m eq '*') { | |
306 $type = 'truncation'; | |
307 } | |
308 elsif ($o and $m and $o eq $m) { | |
309 $type = 'silent, conservative'; | |
310 } | |
311 elsif (not $m or | |
312 ($o and $m and length($o) > length($m) and | |
313 substr($m, -1, 1) ne '*')) { | |
314 $type = 'deletion'; | |
315 if ($o and $m and $o !~ $m and $o !~ $m) { | |
316 $type .= ', complex'; | |
317 } | |
318 } | |
319 elsif (not $o or | |
320 ($o and $m and length($o) < length($m) and | |
321 substr($m, -1, 1) ne '*' ) ) { | |
322 $type = 'insertion'; | |
323 if ($o and $m and $o !~ $m and $o !~ $m) { | |
324 $type .= ', complex'; | |
325 } | |
326 } | |
327 elsif ($o and $m and $o ne $m and | |
328 length $o == 1 and length $m == 1 ) { | |
329 $type = 'substitution'; | |
330 my $value = $self->similarity_score; | |
331 if (defined $value) { | |
332 my $cons = ($value < 0) ? 'nonconservative' : 'conservative'; | |
333 $type .= ", ". $cons; | |
334 } | |
335 } else { | |
336 $type = 'out-of-frame translation, truncation'; | |
337 } | |
338 $self->{'label'} = $type; | |
339 return $self->{'label'}; | |
340 } | |
341 | |
342 | |
343 =head2 similarity_score | |
344 | |
345 Title : similarity_score | |
346 Usage : $self->similarity_score | |
347 Function: Measure for evolutionary conservativeness | |
348 of single amino substitutions. Uses BLOSUM62. | |
349 Negative numbers are noncoservative changes. | |
350 Returns : integer, undef if not single amino acid change | |
351 | |
352 =cut | |
353 | |
354 sub similarity_score { | |
355 my ($self) = @_; | |
356 my ($o, $m, $type); | |
357 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; | |
358 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; | |
359 return undef unless $o and $m and length $o == 1 and length $m == 1; | |
360 return undef unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and | |
361 $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i; | |
362 return $MATRIX->{"\U$o"}->{"\U$m"}; | |
363 } | |
364 | |
365 =head2 trivname | |
366 | |
367 Title : trivname | |
368 Usage : $self->trivname | |
369 Function: | |
370 | |
371 Given a Bio::Variation::AAChange object with linked | |
372 Bio::Variation::RNAChange and Bio::Variation::DNAMutation | |
373 objects, this subroutine creates a string corresponding to | |
374 the 'trivial name' of the mutation. Trivial name is | |
375 specified in Antonorakis & MDI Nomenclature Working Group: | |
376 Human Mutation 11:1-3, 1998. | |
377 http://www.interscience.wiley.com/jpages/1059-7794/nomenclature.html | |
378 | |
379 Returns : string | |
380 | |
381 =cut | |
382 | |
383 | |
384 sub trivname { | |
385 my ($self,$value) = @_; | |
386 if( defined $value) { | |
387 $self->{'trivname'} = $value; | |
388 } else { | |
389 my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) = | |
390 ('', '', '', '', ''); | |
391 my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; | |
392 #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; | |
393 | |
394 $aaori = substr ($o, 0, 1) if $o; | |
395 $aaori =~ tr/\*/X/; | |
396 | |
397 my $sep; | |
398 if ($self->isMutation) { | |
399 $sep = '>'; | |
400 } else { | |
401 $sep = '|'; | |
402 } | |
403 my $trivname = $aaori. $self->start; | |
404 $trivname .= $sep if $sep eq '|'; | |
405 | |
406 my @alleles = $self->each_Allele; | |
407 foreach my $allele (@alleles) { | |
408 my $m = $allele->seq if $allele->seq; | |
409 | |
410 $self->allele_mut($allele); | |
411 #$trivname .= $sep. uc $m if $m; | |
412 | |
413 $aamutterm = substr ($m, -1, 1) if $m; | |
414 if ($self->RNAChange->label =~ /initiation codon/ and | |
415 ( $o and $m and $o ne $m)) { | |
416 $aamut = 'X'; | |
417 } | |
418 elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) { | |
419 $aamutsymbol = ''; | |
420 $aamut = $aamutterm; | |
421 } | |
422 elsif ($self->RNAChange->label =~ /deletion/) { | |
423 $aamutsymbol = 'del'; | |
424 if ($aamutterm eq '*') { | |
425 $aatermnumber = $self->start + length($m) -1; | |
426 $aamut = 'X'. $aatermnumber; | |
427 } | |
428 if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){ | |
429 $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ; | |
430 } | |
431 } | |
432 elsif ($self->RNAChange->label =~ /insertion/) { | |
433 $aamutsymbol = 'ins'; | |
434 if (($aamutterm eq '*') && (length($m)-1 != 0)) { | |
435 $aatermnumber = $self->start + length($m)-1; | |
436 $aamut = $aatermnumber. 'X'; | |
437 } | |
438 if ($self->RNAChange->label =~ /inframe/){ | |
439 $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ; | |
440 } | |
441 } | |
442 elsif ($self->RNAChange->label =~ /complex/ ) { | |
443 my $diff = length($m) - length($o); | |
444 if ($diff >= 0 ) { | |
445 $aamutsymbol = 'ins'; | |
446 } else { | |
447 $aamutsymbol = 'del' ; | |
448 } | |
449 if (($aamutterm eq '*') && (length($m)-1 != 0)) { | |
450 $aatermnumber = $self->start + length($m)-1; | |
451 $aamut = $aatermnumber. 'X'; | |
452 } | |
453 if ($self->RNAChange->label =~ /inframe/){ | |
454 | |
455 if ($diff >= 0 ) { | |
456 $aamut = '+'. $diff ; | |
457 } else { | |
458 $aamut = $diff ; | |
459 } | |
460 } | |
461 } | |
462 elsif ($self->label =~ /truncation/) { | |
463 $aamut = $m; | |
464 } else { | |
465 $aamutsymbol = ''; | |
466 $aamut = $aamutterm; | |
467 } | |
468 $aamut =~ tr/\*/X/; | |
469 $trivname .= $aamutsymbol. $aamut. $sep; | |
470 } | |
471 chop $trivname; | |
472 $self->{'trivname'} = $trivname; | |
473 } | |
474 return $self->{'trivname'}; | |
475 } | |
476 | |
477 1; |