Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/ProteinFunctionPredictionMatrix.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 # create a new matrix for polyphen predictions | |
28 | |
29 my $orig_pfpm = Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix->new( | |
30 -analysis => 'polyphen', | |
31 -peptide_length => 134, | |
32 ); | |
33 | |
34 # add some predictions | |
35 | |
36 $orig_pfpm->add_prediction(1, 'A', 'probably damaging', 0.967); | |
37 $orig_pfpm->add_prediction(2, 'C', 'benign', 0.09); | |
38 | |
39 # serialize the matrix to a compressed binary string | |
40 | |
41 my $binary_string = $pfpm->serialize; | |
42 | |
43 # store the string somewhere, fetch it later, and then create a new matrix using it | |
44 | |
45 my $new_pfpm = Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix->new( | |
46 -analysis => 'polyphen', | |
47 -matrix => $binary_string | |
48 ); | |
49 | |
50 # retrieve predictions | |
51 | |
52 my ($prediction, $score) = $new_pfpm->get_prediction(2, 'C'); | |
53 | |
54 print "A mutation to 'C' at position 2 is predicted to be $prediction\n"; | |
55 | |
56 =head1 DESCRIPTION | |
57 | |
58 This module defines a class representing a matrix of protein | |
59 function predictions, and provides method to access and set | |
60 predictions for a given position and amino acid, and also to | |
61 serialize the matrix for storage in a database, and deserialize | |
62 a matrix from the compressed format into a perl hash. | |
63 | |
64 =cut | |
65 | |
66 package Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix; | |
67 | |
68 use strict; | |
69 use warnings; | |
70 | |
71 use POSIX qw(ceil); | |
72 use List::Util qw(max); | |
73 use Compress::Zlib; | |
74 | |
75 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
76 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
77 | |
78 use base qw(Exporter); | |
79 | |
80 our @EXPORT_OK = qw($AA_LOOKUP @ALL_AAS); | |
81 | |
82 my $DEBUG = 0; | |
83 | |
84 # user-defined constants | |
85 | |
86 # a header which lets us identify prediction matrices and | |
87 # to check if they may have been corrupted | |
88 | |
89 our $HEADER = 'VEP'; | |
90 | |
91 # the format we use when 'pack'ing our predictions | |
92 | |
93 my $PACK_FORMAT = 'v'; | |
94 | |
95 # a hash mapping qualitative predictions to numerical values | |
96 # for all the analyses we use | |
97 | |
98 my $PREDICTION_TO_VAL = { | |
99 polyphen => { | |
100 'probably damaging' => 0, | |
101 'possibly damaging' => 1, | |
102 'benign' => 2, | |
103 'unknown' => 3, | |
104 }, | |
105 | |
106 sift => { | |
107 'tolerated' => 0, | |
108 'deleterious' => 1, | |
109 }, | |
110 }; | |
111 | |
112 # all valid amino acids | |
113 | |
114 our @ALL_AAS = qw(A C D E F G H I K L M N P Q R S T V W Y); | |
115 | |
116 # we use a short with all bits set to represent the lack of a prediction in | |
117 # an (uncompressed) prediction matrix, we will never observe this value | |
118 # as a real prediction even if we set all the (6) prediction bits because we | |
119 # limit the max score to 1000 so the 10 score bits will never all be set | |
120 | |
121 our $NO_PREDICTION = pack($PACK_FORMAT, 0xFFFF); | |
122 | |
123 # the number of bytes in a short | |
124 | |
125 my $BYTES_PER_PREDICTION = 2; | |
126 | |
127 | |
128 # constants derived from the the user-defined constants | |
129 | |
130 # the maximum number of distinct qualitative predictions used by any tool | |
131 | |
132 my $MAX_NUM_PREDS = max( map { scalar keys %$_ } values %$PREDICTION_TO_VAL ); | |
133 | |
134 # the number of bits used to encode the qualitative prediction | |
135 | |
136 my $NUM_PRED_BITS = ceil( log($MAX_NUM_PREDS) / log(2) ); | |
137 | |
138 throw("Cannot represent more than ".(2**6-1)." predictions") if $NUM_PRED_BITS > 6; | |
139 | |
140 # a hash mapping back from a numerical value to a qualitative prediction | |
141 | |
142 my $VAL_TO_PREDICTION = { | |
143 map { | |
144 my $tool = $_; | |
145 $tool => { | |
146 map { | |
147 $PREDICTION_TO_VAL->{$tool}->{$_} => $_ | |
148 } keys %{ $PREDICTION_TO_VAL->{$tool} } | |
149 } | |
150 } keys %$PREDICTION_TO_VAL | |
151 }; | |
152 | |
153 # a hash from amino acid single letter code to a numerical value | |
154 | |
155 our $AA_LOOKUP = { map {$ALL_AAS[$_] => $_} 0 .. $#ALL_AAS }; | |
156 | |
157 # the number of valid amino acids | |
158 | |
159 our $NUM_AAS = scalar(@ALL_AAS); | |
160 | |
161 =head2 new | |
162 | |
163 Arg [-ANALYSIS] : | |
164 The name of the analysis tool that made these predictions, | |
165 currently must be one of 'sift' or 'polyphen' | |
166 | |
167 Arg [-MATRIX] : | |
168 A gzip compressed binary string encoding the predictions, | |
169 typically created using this class and fetched from the | |
170 variation database (optional) | |
171 | |
172 Arg [-PEPTIDE_LENGTH] : | |
173 The length of the associated peptide, only required if | |
174 you want to serialize this matrix (optional) | |
175 | |
176 Arg [-TRANSLATION_MD5] : | |
177 The hex MD5 hash of the associated peptide sequence | |
178 | |
179 Description: Constructs a new ProteinFunctionPredictionMatrix object | |
180 Returntype : A new Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix instance | |
181 Exceptions : throws unless ANALYSIS is supplied and recognised | |
182 Status : At Risk | |
183 | |
184 =cut | |
185 | |
186 sub new { | |
187 my $class = shift; | |
188 | |
189 my ( | |
190 $analysis, | |
191 $sub_analysis, | |
192 $matrix, | |
193 $peptide_length, | |
194 $translation_md5, | |
195 ) = rearrange([qw( | |
196 ANALYSIS | |
197 SUB_ANALYSIS | |
198 MATRIX | |
199 PEPTIDE_LENGTH | |
200 TRANSLATION_MD5 | |
201 )], @_); | |
202 | |
203 throw("analysis argument required") unless defined $analysis; | |
204 | |
205 throw("Unrecognised analysis '$analysis'") | |
206 unless defined $PREDICTION_TO_VAL->{$analysis}; | |
207 | |
208 my $self = bless { | |
209 analysis => $analysis, | |
210 sub_analysis => $sub_analysis, | |
211 matrix => $matrix, | |
212 peptide_length => $peptide_length, | |
213 translation_md5 => $translation_md5, | |
214 }, $class; | |
215 | |
216 $self->{matrix_compressed} = defined $matrix ? 1 : 0; | |
217 | |
218 return $self; | |
219 } | |
220 | |
221 =head2 analysis | |
222 | |
223 Arg[1] : string $analysis - the name of the analysis tool (optional) | |
224 Description : Get/set the analysis name | |
225 Returntype : string | |
226 Exceptions : throws if the name is not recognised | |
227 Status : At Risk | |
228 | |
229 =cut | |
230 | |
231 sub analysis { | |
232 my ($self, $analysis) = @_; | |
233 | |
234 if ($analysis) { | |
235 throw("Unrecognised analysis '$analysis'") | |
236 unless defined $PREDICTION_TO_VAL->{$analysis}; | |
237 | |
238 $self->{analysis} = $analysis; | |
239 } | |
240 | |
241 return $self->{analysis}; | |
242 } | |
243 | |
244 =head2 sub_analysis | |
245 | |
246 Arg[1] : string $sub_analysis - the name of the sub analysis (optional) | |
247 Description : Get/set the sub analysis name | |
248 Returntype : string | |
249 Exceptions : None | |
250 Status : At Risk | |
251 | |
252 =cut | |
253 | |
254 sub sub_analysis { | |
255 my ($self, $sub_analysis) = @_; | |
256 | |
257 $self->{sub_analysis} = $sub_analysis if $sub_analysis; | |
258 | |
259 return $self->{sub_analysis}; | |
260 } | |
261 | |
262 =head2 peptide_length | |
263 | |
264 Arg[1] : int $peptide_length - the length of the peptide (optional) | |
265 Description : Get/set the length of the peptide - required when you want to | |
266 serialize a matrix, as we need to know how many rows the matrix has | |
267 Returntype : int | |
268 Exceptions : none | |
269 Status : At Risk | |
270 | |
271 =cut | |
272 | |
273 sub peptide_length { | |
274 my ($self, $peptide_length) = @_; | |
275 $self->{peptide_length} = $peptide_length if defined $peptide_length; | |
276 return $self->{peptide_length}; | |
277 } | |
278 | |
279 =head2 translation_md5 | |
280 | |
281 Arg[1] : string $translation_md5 - the hex MD5 hash of the peptide sequence (optional) | |
282 Description : Get/set the MD5 hash of the peptide sequence | |
283 Returntype : string | |
284 Exceptions : none | |
285 Status : At Risk | |
286 | |
287 =cut | |
288 | |
289 sub translation_md5 { | |
290 my ($self, $translation_md5) = @_; | |
291 $self->{translation_md5} = $translation_md5 if defined $translation_md5; | |
292 return $self->{translation_md5}; | |
293 } | |
294 | |
295 =head2 get_prediction | |
296 | |
297 Arg[1] : int $pos - the desired position | |
298 Arg[2] : string $aa - the mutant amino acid | |
299 Description : get the prediction and score for the given position and amino acid | |
300 Returntype : a list containing 2 values, the prediction and the score | |
301 Exceptions : throws if either the position or amino acid are invalid | |
302 Status : At Risk | |
303 | |
304 =cut | |
305 | |
306 sub get_prediction { | |
307 my ($self, $pos, $aa) = @_; | |
308 | |
309 # if we have it in our uncompressed hash then just return it | |
310 | |
311 if (defined $self->{preds}->{$pos}->{$aa}) { | |
312 return @{ $self->{preds}->{$pos}->{$aa} }; | |
313 } | |
314 | |
315 # otherwise we look in the serialized matrix string | |
316 | |
317 return $self->prediction_from_matrix($pos, $aa); | |
318 } | |
319 | |
320 =head2 add_prediction | |
321 | |
322 Arg[1] : int $pos - the peptide position | |
323 Arg[2] : string $aa - the mutant amino acid | |
324 Arg[3] : string $prediction - the prediction to store | |
325 Arg[4] : float $score - the score to store | |
326 Description : add a prediction to the matrix for the specified position and amino acid, | |
327 note that this just adds the prediction to a perl hash. If you want to | |
328 encode the matrix in the binary format you should call serialize on the | |
329 matrix object after you have added all the predictions. | |
330 Exceptions : none | |
331 Status : At Risk | |
332 | |
333 =cut | |
334 | |
335 sub add_prediction { | |
336 my ($self, $pos, $aa, $prediction, $score) = @_; | |
337 | |
338 $self->{preds}->{$pos}->{$aa} = [$prediction, $score]; | |
339 } | |
340 | |
341 =head2 serialize | |
342 | |
343 Arg[1] : int $peptide_length - the length of the associated peptide (optional) | |
344 Description : serialize the matrix into a compressed binary format suitable for | |
345 storage in a database, file etc. The same string can later be used | |
346 to create a new matrix object and the predictions can be retrieved | |
347 Exceptions : throws if the peptide length has not been specified | |
348 Status : At Risk | |
349 | |
350 =cut | |
351 | |
352 sub serialize { | |
353 my ($self, $peptide_length) = @_; | |
354 | |
355 $self->{peptide_length} = $peptide_length if defined $peptide_length; | |
356 | |
357 throw("peptide_length required to serialize predictions") | |
358 unless defined $self->{peptide_length}; | |
359 | |
360 # convert predictions to the binary format, and concatenate them all | |
361 # together in the correct order, inserting our dummy $NO_PREDICTION | |
362 # value to fill in any gaps | |
363 | |
364 if ($self->{preds}) { | |
365 | |
366 $self->{matrix_compressed} = 0; | |
367 | |
368 $self->{matrix} = $HEADER; | |
369 | |
370 for my $pos (1 .. $self->{peptide_length}) { | |
371 | |
372 for my $aa (@ALL_AAS) { | |
373 | |
374 my $short; | |
375 | |
376 if ($self->{preds}->{$pos}->{$aa}) { | |
377 my ($prediction, $score) = @{ $self->{preds}->{$pos}->{$aa} }; | |
378 | |
379 $short = $self->prediction_to_short($prediction, $score); | |
380 } | |
381 | |
382 $self->{matrix} .= defined $short ? $short : $NO_PREDICTION; | |
383 } | |
384 } | |
385 | |
386 # delete the hash copy, so things don't get out of sync | |
387 | |
388 $self->{preds} = undef; | |
389 } | |
390 else { | |
391 warning("There don't seem to be any predictions in the matrix to serialize!"); | |
392 } | |
393 | |
394 # and return the compressed string for storage | |
395 | |
396 return $self->compress_matrix; | |
397 } | |
398 | |
399 =head2 deserialize | |
400 | |
401 Arg [1] : coderef $coderef - an anonymous subroutine that will be called | |
402 as each prediction is decoded in order. The subroutine will be | |
403 called with 4 arguments: the peptide position, the amino acid, | |
404 the prediction and the score. This can be used, for example to | |
405 dump out the prediction matrix to a file. (optional) | |
406 Description : deserialize a binary formatted matrix into a perl hash reference | |
407 containing all the uncompressed predictions. This hash has an | |
408 entry for each position in the peptide, which is itself a hashref | |
409 with an entry for each possible alternate amino acid which is a | |
410 listref containing the prediction and score. For example, to retrieve | |
411 the prediction for a substitution of 'C' at position 23 from this | |
412 data structure, you could use code like: | |
413 | |
414 my $prediction_hash = $pfpm->deserialize; | |
415 my ($prediction, $score) = @{ $prediction_hash->{23}->{'C'} }; | |
416 | |
417 Note that if you don't explicitly deserialize a matrix this | |
418 class will keep it in the memory-efficient encoded format, and | |
419 you can access individual predictions with the get_prediction() | |
420 method. You should only use this method if you want to decode | |
421 all predictions (for example to perform some large-scale | |
422 analysis, or to reformat the predictions somehow) | |
423 | |
424 Returntype : hashref containing decoded predictions | |
425 Exceptions : throws if the binary matrix isn't in the expected format | |
426 Status : At Risk | |
427 | |
428 =cut | |
429 | |
430 sub deserialize { | |
431 my ($self, $coderef) = @_; | |
432 | |
433 if ($self->{matrix_compressed}) { | |
434 $self->expand_matrix; | |
435 } | |
436 | |
437 throw("Matrix looks corrupted") unless $self->header_ok; | |
438 | |
439 # we can work out the length of the peptide by counting the rows in the matrix | |
440 | |
441 my $length = ((length($self->{matrix}) - length($HEADER)) / $BYTES_PER_PREDICTION) / $NUM_AAS; | |
442 | |
443 for my $pos (1 .. $length) { | |
444 | |
445 for my $aa (@ALL_AAS) { | |
446 | |
447 # we call prediction_from_short directly to avoid doing all the | |
448 # checks performed in prediction_from_string | |
449 | |
450 my ($prediction, $score) = $self->prediction_from_short(substr($self->{matrix}, $self->compute_offset($pos, $aa), $BYTES_PER_PREDICTION)); | |
451 | |
452 $self->{preds}->{$pos}->{$aa} = [$prediction, $score]; | |
453 | |
454 if ($coderef) { | |
455 $coderef->($pos, $aa, $prediction, $score); | |
456 } | |
457 } | |
458 } | |
459 | |
460 return $self->{preds}; | |
461 } | |
462 | |
463 =head2 prediction_to_short | |
464 | |
465 Arg[1] : string $pred - one of the possible qualitative predictions of the tool | |
466 Arg[2] : float $prob - the prediction score (with 3 d.p.s of precision) | |
467 Description : converts a prediction and corresponding score into a 2-byte short value | |
468 Returntype : the packed short value | |
469 Exceptions : throws if the prediction argument is invalid | |
470 Status : At Risk | |
471 | |
472 =cut | |
473 | |
474 sub prediction_to_short { | |
475 my ($self, $pred, $prob) = @_; | |
476 | |
477 # we only store 3 d.p. so multiply by 1000 to turn our | |
478 # probability into a number between 0 and 1000. | |
479 # 2^10 == 1024 so we need 10 bits of our short to store | |
480 # this value | |
481 | |
482 my $val = $prob * 1000; | |
483 | |
484 # we store the prediction in the top $NUM_PRED_BITS bits | |
485 # so look up the numerical value for the prediction, | |
486 # shift this $NUM_PRED_BITS bits to the left and then set | |
487 # the appropriate bits of our short value | |
488 | |
489 $pred = lc($pred); | |
490 | |
491 my $pred_val = $PREDICTION_TO_VAL->{$self->{analysis}}->{$pred}; | |
492 | |
493 throw("No value defined for prediction: '$pred'?") | |
494 unless defined $pred_val; | |
495 | |
496 $val |= ($pred_val << (16 - $NUM_PRED_BITS)); | |
497 | |
498 printf("p2s: $pred ($prob) => 0x%04x\n", $val) if $DEBUG; | |
499 | |
500 $val = pack $PACK_FORMAT, $val; | |
501 | |
502 return $val; | |
503 } | |
504 | |
505 =head2 prediction_from_short | |
506 | |
507 Arg[1] : string $pred - the packed short value | |
508 Description : converts a 2-byte short value back into a prediction and a score | |
509 Exceptions : none | |
510 Returntype : a list containing 2 values, the prediction and the score | |
511 Status : At Risk | |
512 | |
513 =cut | |
514 | |
515 sub prediction_from_short { | |
516 my ($self, $val) = @_; | |
517 | |
518 # check it isn't our special null prediction | |
519 | |
520 if ($val eq $NO_PREDICTION) { | |
521 print "no pred\n" if $DEBUG; | |
522 return undef; | |
523 } | |
524 | |
525 # unpack the value as a short | |
526 | |
527 $val = unpack $PACK_FORMAT, $val; | |
528 | |
529 # shift the prediction bits down and look up the prediction string | |
530 | |
531 my $pred = $VAL_TO_PREDICTION->{$self->{analysis}}->{$val >> (16 - $NUM_PRED_BITS)}; | |
532 | |
533 # mask off the top 6 bits reserved for the prediction and convert back to a 3 d.p. float | |
534 | |
535 my $prob = ($val & (2**10 - 1)) / 1000; | |
536 | |
537 printf("pfs: 0x%04x => $pred ($prob)\n", $val) if $DEBUG; | |
538 | |
539 return ($pred, $prob); | |
540 } | |
541 | |
542 =head2 compress_matrix | |
543 | |
544 Description : compresses a prediction matrix with gzip | |
545 Returntype : the compressed matrix | |
546 Exceptions : throws if the matrix is an unexpected length, or if the compression fails | |
547 Status : At Risk | |
548 | |
549 =cut | |
550 | |
551 sub compress_matrix { | |
552 my ($self) = @_; | |
553 | |
554 my $matrix = $self->{matrix}; | |
555 | |
556 return undef unless $matrix; | |
557 | |
558 return $matrix if $self->{matrix_compressed}; | |
559 | |
560 # prepend a header, so we can tell if our matrix has been mangled, or | |
561 # is compressed etc. | |
562 | |
563 unless ($self->header_ok) { | |
564 $matrix = $HEADER.$matrix; | |
565 } | |
566 | |
567 throw("Prediction matrix is an unexpected length") | |
568 unless ( (length($matrix) - length($HEADER)) % $NUM_AAS) == 0; | |
569 | |
570 $self->{matrix} = Compress::Zlib::memGzip($matrix) or throw("Failed to gzip: $gzerrno"); | |
571 | |
572 $self->{matrix_compressed} = 1; | |
573 | |
574 return $self->{matrix}; | |
575 } | |
576 | |
577 =head2 header_ok | |
578 | |
579 Description : checks if the binary matrix has the expected header | |
580 Returntype : boolean | |
581 Exceptions : none | |
582 Status : At Risk | |
583 | |
584 =cut | |
585 | |
586 sub header_ok { | |
587 my ($self) = @_; | |
588 return undef unless ($self->{matrix} && !$self->{matrix_compressed}); | |
589 return substr($self->{matrix},0,length($HEADER)) eq $HEADER; | |
590 } | |
591 | |
592 =head2 expand_matrix | |
593 | |
594 Description : uncompresses a compressed prediction matrix | |
595 Returntype : the uncompressed binary matrix string | |
596 Exceptions : throws if the header is incorrect, or if the decompression fails | |
597 Status : At Risk | |
598 | |
599 =cut | |
600 | |
601 sub expand_matrix { | |
602 my ($self) = @_; | |
603 | |
604 return undef unless $self->{matrix}; | |
605 | |
606 return $self->{matrix} unless $self->{matrix_compressed}; | |
607 | |
608 $self->{matrix} = Compress::Zlib::memGunzip($self->{matrix}) | |
609 or throw("Failed to gunzip: $gzerrno"); | |
610 | |
611 $self->{matrix_compressed} = 0; | |
612 | |
613 throw("Malformed prediction matrix") unless $self->header_ok; | |
614 | |
615 return $self->{matrix}; | |
616 } | |
617 | |
618 =head2 compute_offset | |
619 | |
620 Arg[1] : int $pos - the desired position in the peptide | |
621 Arg[2] : string $aa - the desired mutant amino acid | |
622 Description : computes the correct offset into a prediction matrix for a given | |
623 peptide position and mutant amino acid | |
624 Returntype : the integer offset | |
625 Exceptions : none | |
626 Status : At Risk | |
627 | |
628 =cut | |
629 | |
630 sub compute_offset { | |
631 my ($self, $pos, $aa) = @_; | |
632 | |
633 my $offset = length($HEADER) + ( ( (($pos-1) * $NUM_AAS) + $AA_LOOKUP->{$aa} ) * $BYTES_PER_PREDICTION ); | |
634 | |
635 return $offset; | |
636 } | |
637 | |
638 =head2 prediction_from_matrix | |
639 | |
640 Arg[1] : int $pos - the desired position in the peptide | |
641 Arg[2] : string $aa - the desired mutant amino acid | |
642 Description : returns the prediction and score for the requested | |
643 position and mutant amino acid in the matrix | |
644 Returntype : a list containing 2 values, the prediction and the score | |
645 Exceptions : throws if either the position or amino acid are invalid, | |
646 or if the prediction matrix looks to be malformed | |
647 Status : At Risk | |
648 | |
649 =cut | |
650 | |
651 sub prediction_from_matrix { | |
652 my ($self, $pos, $aa) = @_; | |
653 | |
654 if ($self->{matrix_compressed}) { | |
655 # the matrix is still compressed so we try to uncompress it, | |
656 $self->expand_matrix; | |
657 } | |
658 | |
659 $aa = uc($aa) if defined $aa; | |
660 | |
661 throw("Invalid position: $pos") unless (defined $pos && $pos > 0); | |
662 | |
663 throw("Invalid amino acid: $aa") unless (defined $aa && defined $AA_LOOKUP->{$aa}); | |
664 | |
665 # compute our offset into the prediction matrix | |
666 | |
667 my $offset = $self->compute_offset($pos, $aa); | |
668 | |
669 print "offset: $offset\n" if $DEBUG; | |
670 | |
671 if ($offset + 1 > length($self->{matrix})) { | |
672 warning("Offset outside of prediction matrix for position $pos and amino acid $aa?"); | |
673 return undef; | |
674 } | |
675 | |
676 my $pred = substr($self->{matrix}, $offset, $BYTES_PER_PREDICTION); | |
677 | |
678 return $self->prediction_from_short($pred); | |
679 } | |
680 | |
681 1; | |
682 |