annotate variant_effect_predictor/Bio/EnsEMBL/Variation/ProteinFunctionPredictionMatrix.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 # create a new matrix for polyphen predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 my $orig_pfpm = Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 -analysis => 'polyphen',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 -peptide_length => 134,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 # add some predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 $orig_pfpm->add_prediction(1, 'A', 'probably damaging', 0.967);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 $orig_pfpm->add_prediction(2, 'C', 'benign', 0.09);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 # serialize the matrix to a compressed binary string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 my $binary_string = $pfpm->serialize;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 # store the string somewhere, fetch it later, and then create a new matrix using it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 my $new_pfpm = Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 -analysis => 'polyphen',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 -matrix => $binary_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 # retrieve predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my ($prediction, $score) = $new_pfpm->get_prediction(2, 'C');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 print "A mutation to 'C' at position 2 is predicted to be $prediction\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 This module defines a class representing a matrix of protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 function predictions, and provides method to access and set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 predictions for a given position and amino acid, and also to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 serialize the matrix for storage in a database, and deserialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 a matrix from the compressed format into a perl hash.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 package Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 use POSIX qw(ceil);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 use List::Util qw(max);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 use Compress::Zlib;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 use base qw(Exporter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 our @EXPORT_OK = qw($AA_LOOKUP @ALL_AAS);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 my $DEBUG = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 # user-defined constants
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 # a header which lets us identify prediction matrices and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 # to check if they may have been corrupted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 our $HEADER = 'VEP';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 # the format we use when 'pack'ing our predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $PACK_FORMAT = 'v';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 # a hash mapping qualitative predictions to numerical values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 # for all the analyses we use
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 my $PREDICTION_TO_VAL = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 polyphen => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 'probably damaging' => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 'possibly damaging' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 'benign' => 2,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 'unknown' => 3,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 sift => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 'tolerated' => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 'deleterious' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 # all valid amino acids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 our @ALL_AAS = qw(A C D E F G H I K L M N P Q R S T V W Y);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 # we use a short with all bits set to represent the lack of a prediction in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 # an (uncompressed) prediction matrix, we will never observe this value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 # as a real prediction even if we set all the (6) prediction bits because we
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 # limit the max score to 1000 so the 10 score bits will never all be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 our $NO_PREDICTION = pack($PACK_FORMAT, 0xFFFF);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 # the number of bytes in a short
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 my $BYTES_PER_PREDICTION = 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 # constants derived from the the user-defined constants
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 # the maximum number of distinct qualitative predictions used by any tool
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 my $MAX_NUM_PREDS = max( map { scalar keys %$_ } values %$PREDICTION_TO_VAL );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 # the number of bits used to encode the qualitative prediction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 my $NUM_PRED_BITS = ceil( log($MAX_NUM_PREDS) / log(2) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 throw("Cannot represent more than ".(2**6-1)." predictions") if $NUM_PRED_BITS > 6;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 # a hash mapping back from a numerical value to a qualitative prediction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 my $VAL_TO_PREDICTION = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 map {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 my $tool = $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 $tool => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 map {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 $PREDICTION_TO_VAL->{$tool}->{$_} => $_
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 } keys %{ $PREDICTION_TO_VAL->{$tool} }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 } keys %$PREDICTION_TO_VAL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 # a hash from amino acid single letter code to a numerical value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 our $AA_LOOKUP = { map {$ALL_AAS[$_] => $_} 0 .. $#ALL_AAS };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 # the number of valid amino acids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 our $NUM_AAS = scalar(@ALL_AAS);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 Arg [-ANALYSIS] :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 The name of the analysis tool that made these predictions,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 currently must be one of 'sift' or 'polyphen'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 Arg [-MATRIX] :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 A gzip compressed binary string encoding the predictions,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 typically created using this class and fetched from the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 variation database (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 Arg [-PEPTIDE_LENGTH] :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 The length of the associated peptide, only required if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 you want to serialize this matrix (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 Arg [-TRANSLATION_MD5] :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 The hex MD5 hash of the associated peptide sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 Description: Constructs a new ProteinFunctionPredictionMatrix object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 Returntype : A new Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix instance
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 Exceptions : throws unless ANALYSIS is supplied and recognised
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 my $class = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 my (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 $analysis,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 $sub_analysis,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 $matrix,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 $peptide_length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $translation_md5,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 ) = rearrange([qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 ANALYSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 SUB_ANALYSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 MATRIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 PEPTIDE_LENGTH
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 TRANSLATION_MD5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 )], @_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 throw("analysis argument required") unless defined $analysis;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 throw("Unrecognised analysis '$analysis'")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 unless defined $PREDICTION_TO_VAL->{$analysis};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 my $self = bless {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 analysis => $analysis,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 sub_analysis => $sub_analysis,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 matrix => $matrix,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 peptide_length => $peptide_length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 translation_md5 => $translation_md5,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 }, $class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 $self->{matrix_compressed} = defined $matrix ? 1 : 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 =head2 analysis
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 Arg[1] : string $analysis - the name of the analysis tool (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 Description : Get/set the analysis name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 Exceptions : throws if the name is not recognised
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 sub analysis {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 my ($self, $analysis) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 if ($analysis) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 throw("Unrecognised analysis '$analysis'")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 unless defined $PREDICTION_TO_VAL->{$analysis};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 $self->{analysis} = $analysis;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 return $self->{analysis};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 =head2 sub_analysis
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 Arg[1] : string $sub_analysis - the name of the sub analysis (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 Description : Get/set the sub analysis name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 sub sub_analysis {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 my ($self, $sub_analysis) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 $self->{sub_analysis} = $sub_analysis if $sub_analysis;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 return $self->{sub_analysis};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 =head2 peptide_length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 Arg[1] : int $peptide_length - the length of the peptide (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 Description : Get/set the length of the peptide - required when you want to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 serialize a matrix, as we need to know how many rows the matrix has
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 sub peptide_length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 my ($self, $peptide_length) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 $self->{peptide_length} = $peptide_length if defined $peptide_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 return $self->{peptide_length};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 =head2 translation_md5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 Arg[1] : string $translation_md5 - the hex MD5 hash of the peptide sequence (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Description : Get/set the MD5 hash of the peptide sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 sub translation_md5 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 my ($self, $translation_md5) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 $self->{translation_md5} = $translation_md5 if defined $translation_md5;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 return $self->{translation_md5};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 =head2 get_prediction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 Arg[1] : int $pos - the desired position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 Arg[2] : string $aa - the mutant amino acid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 Description : get the prediction and score for the given position and amino acid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 Returntype : a list containing 2 values, the prediction and the score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 Exceptions : throws if either the position or amino acid are invalid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 sub get_prediction {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 my ($self, $pos, $aa) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 # if we have it in our uncompressed hash then just return it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 if (defined $self->{preds}->{$pos}->{$aa}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 return @{ $self->{preds}->{$pos}->{$aa} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 # otherwise we look in the serialized matrix string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 return $self->prediction_from_matrix($pos, $aa);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 =head2 add_prediction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 Arg[1] : int $pos - the peptide position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 Arg[2] : string $aa - the mutant amino acid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 Arg[3] : string $prediction - the prediction to store
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 Arg[4] : float $score - the score to store
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 Description : add a prediction to the matrix for the specified position and amino acid,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 note that this just adds the prediction to a perl hash. If you want to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 encode the matrix in the binary format you should call serialize on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 matrix object after you have added all the predictions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 sub add_prediction {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 my ($self, $pos, $aa, $prediction, $score) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 $self->{preds}->{$pos}->{$aa} = [$prediction, $score];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 =head2 serialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 Arg[1] : int $peptide_length - the length of the associated peptide (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 Description : serialize the matrix into a compressed binary format suitable for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 storage in a database, file etc. The same string can later be used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 to create a new matrix object and the predictions can be retrieved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 Exceptions : throws if the peptide length has not been specified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 sub serialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 my ($self, $peptide_length) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 $self->{peptide_length} = $peptide_length if defined $peptide_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 throw("peptide_length required to serialize predictions")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 unless defined $self->{peptide_length};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 # convert predictions to the binary format, and concatenate them all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 # together in the correct order, inserting our dummy $NO_PREDICTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 # value to fill in any gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 if ($self->{preds}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 $self->{matrix_compressed} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 $self->{matrix} = $HEADER;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 for my $pos (1 .. $self->{peptide_length}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 for my $aa (@ALL_AAS) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 my $short;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 if ($self->{preds}->{$pos}->{$aa}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 my ($prediction, $score) = @{ $self->{preds}->{$pos}->{$aa} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 $short = $self->prediction_to_short($prediction, $score);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 $self->{matrix} .= defined $short ? $short : $NO_PREDICTION;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 # delete the hash copy, so things don't get out of sync
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 $self->{preds} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 warning("There don't seem to be any predictions in the matrix to serialize!");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 # and return the compressed string for storage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 return $self->compress_matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 =head2 deserialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 Arg [1] : coderef $coderef - an anonymous subroutine that will be called
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 as each prediction is decoded in order. The subroutine will be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 called with 4 arguments: the peptide position, the amino acid,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 the prediction and the score. This can be used, for example to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 dump out the prediction matrix to a file. (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 Description : deserialize a binary formatted matrix into a perl hash reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 containing all the uncompressed predictions. This hash has an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 entry for each position in the peptide, which is itself a hashref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 with an entry for each possible alternate amino acid which is a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 listref containing the prediction and score. For example, to retrieve
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 the prediction for a substitution of 'C' at position 23 from this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 data structure, you could use code like:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 my $prediction_hash = $pfpm->deserialize;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 my ($prediction, $score) = @{ $prediction_hash->{23}->{'C'} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 Note that if you don't explicitly deserialize a matrix this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 class will keep it in the memory-efficient encoded format, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 you can access individual predictions with the get_prediction()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 method. You should only use this method if you want to decode
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 all predictions (for example to perform some large-scale
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 analysis, or to reformat the predictions somehow)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 Returntype : hashref containing decoded predictions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 Exceptions : throws if the binary matrix isn't in the expected format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 sub deserialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 my ($self, $coderef) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 if ($self->{matrix_compressed}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 $self->expand_matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 throw("Matrix looks corrupted") unless $self->header_ok;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 # we can work out the length of the peptide by counting the rows in the matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 my $length = ((length($self->{matrix}) - length($HEADER)) / $BYTES_PER_PREDICTION) / $NUM_AAS;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 for my $pos (1 .. $length) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 for my $aa (@ALL_AAS) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 # we call prediction_from_short directly to avoid doing all the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 # checks performed in prediction_from_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 my ($prediction, $score) = $self->prediction_from_short(substr($self->{matrix}, $self->compute_offset($pos, $aa), $BYTES_PER_PREDICTION));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 $self->{preds}->{$pos}->{$aa} = [$prediction, $score];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 if ($coderef) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 $coderef->($pos, $aa, $prediction, $score);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 return $self->{preds};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 =head2 prediction_to_short
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 Arg[1] : string $pred - one of the possible qualitative predictions of the tool
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 Arg[2] : float $prob - the prediction score (with 3 d.p.s of precision)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 Description : converts a prediction and corresponding score into a 2-byte short value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 Returntype : the packed short value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 Exceptions : throws if the prediction argument is invalid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 sub prediction_to_short {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 my ($self, $pred, $prob) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 # we only store 3 d.p. so multiply by 1000 to turn our
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 # probability into a number between 0 and 1000.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 # 2^10 == 1024 so we need 10 bits of our short to store
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 # this value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 my $val = $prob * 1000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 # we store the prediction in the top $NUM_PRED_BITS bits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 # so look up the numerical value for the prediction,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 # shift this $NUM_PRED_BITS bits to the left and then set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 # the appropriate bits of our short value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 $pred = lc($pred);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 my $pred_val = $PREDICTION_TO_VAL->{$self->{analysis}}->{$pred};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 throw("No value defined for prediction: '$pred'?")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 unless defined $pred_val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 $val |= ($pred_val << (16 - $NUM_PRED_BITS));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 printf("p2s: $pred ($prob) => 0x%04x\n", $val) if $DEBUG;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 $val = pack $PACK_FORMAT, $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 return $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 =head2 prediction_from_short
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 Arg[1] : string $pred - the packed short value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 Description : converts a 2-byte short value back into a prediction and a score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 Returntype : a list containing 2 values, the prediction and the score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 sub prediction_from_short {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 my ($self, $val) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 # check it isn't our special null prediction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 if ($val eq $NO_PREDICTION) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 print "no pred\n" if $DEBUG;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 # unpack the value as a short
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 $val = unpack $PACK_FORMAT, $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 # shift the prediction bits down and look up the prediction string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 my $pred = $VAL_TO_PREDICTION->{$self->{analysis}}->{$val >> (16 - $NUM_PRED_BITS)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 # mask off the top 6 bits reserved for the prediction and convert back to a 3 d.p. float
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 my $prob = ($val & (2**10 - 1)) / 1000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 printf("pfs: 0x%04x => $pred ($prob)\n", $val) if $DEBUG;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 return ($pred, $prob);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 =head2 compress_matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 Description : compresses a prediction matrix with gzip
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 Returntype : the compressed matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 Exceptions : throws if the matrix is an unexpected length, or if the compression fails
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 sub compress_matrix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 my $matrix = $self->{matrix};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 return undef unless $matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 return $matrix if $self->{matrix_compressed};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 # prepend a header, so we can tell if our matrix has been mangled, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 # is compressed etc.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 unless ($self->header_ok) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 $matrix = $HEADER.$matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 throw("Prediction matrix is an unexpected length")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 unless ( (length($matrix) - length($HEADER)) % $NUM_AAS) == 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 $self->{matrix} = Compress::Zlib::memGzip($matrix) or throw("Failed to gzip: $gzerrno");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 $self->{matrix_compressed} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 return $self->{matrix};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 =head2 header_ok
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 Description : checks if the binary matrix has the expected header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 Returntype : boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 sub header_ok {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 return undef unless ($self->{matrix} && !$self->{matrix_compressed});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 return substr($self->{matrix},0,length($HEADER)) eq $HEADER;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 =head2 expand_matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 Description : uncompresses a compressed prediction matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 Returntype : the uncompressed binary matrix string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 Exceptions : throws if the header is incorrect, or if the decompression fails
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 sub expand_matrix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 return undef unless $self->{matrix};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 return $self->{matrix} unless $self->{matrix_compressed};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 $self->{matrix} = Compress::Zlib::memGunzip($self->{matrix})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 or throw("Failed to gunzip: $gzerrno");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 $self->{matrix_compressed} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 throw("Malformed prediction matrix") unless $self->header_ok;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 return $self->{matrix};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 =head2 compute_offset
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 Arg[1] : int $pos - the desired position in the peptide
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 Arg[2] : string $aa - the desired mutant amino acid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 Description : computes the correct offset into a prediction matrix for a given
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 peptide position and mutant amino acid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 Returntype : the integer offset
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 sub compute_offset {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 my ($self, $pos, $aa) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 my $offset = length($HEADER) + ( ( (($pos-1) * $NUM_AAS) + $AA_LOOKUP->{$aa} ) * $BYTES_PER_PREDICTION );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 return $offset;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 =head2 prediction_from_matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 Arg[1] : int $pos - the desired position in the peptide
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 Arg[2] : string $aa - the desired mutant amino acid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 Description : returns the prediction and score for the requested
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 position and mutant amino acid in the matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 Returntype : a list containing 2 values, the prediction and the score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 Exceptions : throws if either the position or amino acid are invalid,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 or if the prediction matrix looks to be malformed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 sub prediction_from_matrix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 my ($self, $pos, $aa) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 if ($self->{matrix_compressed}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 # the matrix is still compressed so we try to uncompress it,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 $self->expand_matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 $aa = uc($aa) if defined $aa;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 throw("Invalid position: $pos") unless (defined $pos && $pos > 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 throw("Invalid amino acid: $aa") unless (defined $aa && defined $AA_LOOKUP->{$aa});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 # compute our offset into the prediction matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 my $offset = $self->compute_offset($pos, $aa);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 print "offset: $offset\n" if $DEBUG;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 if ($offset + 1 > length($self->{matrix})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 warning("Offset outside of prediction matrix for position $pos and amino acid $aa?");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 my $pred = substr($self->{matrix}, $offset, $BYTES_PER_PREDICTION);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 return $self->prediction_from_short($pred);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682