Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/Sigcleave.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 #----------------------------------------------------------------------------- | |
| 2 # PACKAGE : Bio::Tools::Sigcleave | |
| 3 # AUTHOR : Chris Dagdigian, dag@sonsorol.org | |
| 4 # CREATED : Jan 28 1999 | |
| 5 # REVISION: $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $ | |
| 6 # | |
| 7 # Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved. | |
| 8 # This module is free software; you can redistribute it and/or | |
| 9 # modify it under the same terms as Perl itself. | |
| 10 # | |
| 11 # _History_ | |
| 12 # | |
| 13 # Object framework ripped from Steve Chervits's SeqPattern.pm | |
| 14 # | |
| 15 # Core EGCG Sigcleave emulation from perl code developed by | |
| 16 # Danh Nguyen & Kamalakar Gulukota which itself was based | |
| 17 # loosely on Colgrove's signal.c program. | |
| 18 # | |
| 19 # The overall idea is to replicate the output of the sigcleave | |
| 20 # program which was distributed with the EGCG extension to the GCG sequence | |
| 21 # analysis package. There is also an accessor method for just getting at | |
| 22 # the raw results. | |
| 23 # | |
| 24 #----------------------------------------------------------------------------- | |
| 25 | |
| 26 =head1 NAME | |
| 27 | |
| 28 Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis | |
| 29 | |
| 30 =head1 SYNOPSIS | |
| 31 | |
| 32 =head2 Object Creation | |
| 33 | |
| 34 use Bio::Tools::Sigcleave (); | |
| 35 | |
| 36 # to keep the module backwar compatible, you can pass it a sequence string, but | |
| 37 # there recommended say is to pass it a Seq object | |
| 38 | |
| 39 # this works | |
| 40 $seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI"; | |
| 41 $sig = new Bio::Tools::Sigcleave(-seq => $seq, | |
| 42 -type => 'protein', | |
| 43 -threshold=>'3.5', | |
| 44 ); | |
| 45 # but you do: | |
| 46 $seqobj = Bio::PrimarySeq->new(-seq => $seq); | |
| 47 | |
| 48 $sig = new Bio::Tools::Sigcleave(-seq => $seqobj, | |
| 49 -threshold=>'3.5', | |
| 50 ); | |
| 51 | |
| 52 | |
| 53 # now you can detect procaryotic signal sequences as well as eucaryotic | |
| 54 $sig->matrix('eucaryotic'); # or 'procaryotic' | |
| 55 | |
| 56 | |
| 57 =head2 Object Methods & Accessors | |
| 58 | |
| 59 # you can use this method to fine tune the threshod before printing out the results | |
| 60 $sig->result_count: | |
| 61 | |
| 62 %raw_results = $sig->signals; | |
| 63 $formatted_output = $sig->pretty_print; | |
| 64 | |
| 65 =head1 DESCRIPTION | |
| 66 | |
| 67 "Sigcleave" was a program distributed as part of the free EGCG add-on | |
| 68 to earlier versions of the GCG Sequence Analysis package. A new | |
| 69 implementation of the algorithm is now part of EMBOSS package. | |
| 70 | |
| 71 From the EGCG documentation: | |
| 72 | |
| 73 SigCleave uses the von Heijne method to locate signal sequences, and | |
| 74 to identify the cleavage site. The method is 95% accurate in | |
| 75 resolving signal sequences from non-signal sequences with a cutoff | |
| 76 score of 3.5, and 75-80% accurate in identifying the cleavage | |
| 77 site. The program reports all hits above a minimum value. | |
| 78 | |
| 79 The EGCG Sigcleave program was written by Peter Rice (E-mail: | |
| 80 pmr@sanger.ac.uk Post: Informatics Division, The Sanger Centre, | |
| 81 Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK). | |
| 82 | |
| 83 Since EGCG is no longer distributed for the latest versions of GCG, | |
| 84 this code was developed to emulate the output of the original program | |
| 85 as much as possible for those who lost access to sigcleave when | |
| 86 upgrading to newer versions of GCG. | |
| 87 | |
| 88 There are 2 accessor methods for this object. "signals" will return a | |
| 89 perl associative array containing the sigcleave scores keyed by amino | |
| 90 acid position. "pretty_print" returns a formatted string similar to | |
| 91 the output of the original sigcleave utility. | |
| 92 | |
| 93 In both cases, the "threshold" setting controls the score reporting | |
| 94 level. If no value for threshold is passed in by the user, the code | |
| 95 defaults to a reporting value of 3.5. | |
| 96 | |
| 97 In this implemntation the accessor will never return any | |
| 98 score/position pair which does not meet the threshold limit. This is | |
| 99 the slightly different from the behaviour of the 8.1 EGCG sigcleave | |
| 100 program which will report the highest of the under-threshold results | |
| 101 if nothing else is found. | |
| 102 | |
| 103 | |
| 104 Example of pretty_print output: | |
| 105 | |
| 106 SIGCLEAVE of sigtest from: 1 to 146 | |
| 107 | |
| 108 Report scores over 3.5 | |
| 109 Maximum score 4.9 at residue 131 | |
| 110 | |
| 111 Sequence: FVILAAMSIQGSA-NLQTQWKSTASLALET | |
| 112 | (signal) | (mature peptide) | |
| 113 118 131 | |
| 114 | |
| 115 Other entries above 3.5 | |
| 116 | |
| 117 Maximum score 3.7 at residue 112 | |
| 118 | |
| 119 Sequence: CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET | |
| 120 | (signal) | (mature peptide) | |
| 121 99 112 | |
| 122 | |
| 123 | |
| 124 =head1 FEEDBACK | |
| 125 | |
| 126 When updating and maintaining a module, it helps to know that people | |
| 127 are actually using it. Let us know if you find a bug, think this code | |
| 128 is useful or have any improvements/features to suggest. | |
| 129 | |
| 130 =head2 Reporting Bugs | |
| 131 | |
| 132 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 133 the bugs and their resolution. Bug reports can be submitted via email | |
| 134 or the web: | |
| 135 | |
| 136 bioperl-bugs@bio.perl.org | |
| 137 http://bugzilla.bioperl.org/ | |
| 138 | |
| 139 =head1 AUTHOR | |
| 140 | |
| 141 Chris Dagdigian, dag@sonsorol.org & others | |
| 142 | |
| 143 =head1 CONTRIBUTORS | |
| 144 | |
| 145 Heikki Lehvaslaiho, heikki@ebi.ac.uk | |
| 146 | |
| 147 =head1 VERSION | |
| 148 | |
| 149 Bio::Tools::Sigcleave.pm, $Id: Sigcleave.pm,v 1.17 2002/10/22 07:45:22 lapp Exp $ | |
| 150 | |
| 151 =head1 COPYRIGHT | |
| 152 | |
| 153 Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved. | |
| 154 This module is free software; you can redistribute it and/or modify it | |
| 155 under the same terms as Perl itself. | |
| 156 | |
| 157 =head1 REFERENCES / SEE ALSO | |
| 158 | |
| 159 von Heijne G. (1986) "A new method for predicting signal sequences | |
| 160 cleavage sites." Nucleic Acids Res. 14, 4683-4690. | |
| 161 | |
| 162 von Heijne G. (1987) in "Sequence Analysis in Molecular Biology: | |
| 163 Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117). | |
| 164 | |
| 165 | |
| 166 =head1 APPENDIX | |
| 167 | |
| 168 The following documentation describes the various functions | |
| 169 contained in this module. Some functions are for internal | |
| 170 use and are not meant to be called by the user; they are | |
| 171 preceded by an underscore ("_"). | |
| 172 | |
| 173 | |
| 174 =cut | |
| 175 | |
| 176 # | |
| 177 ## | |
| 178 ### | |
| 179 #### END of main POD documentation. | |
| 180 ### | |
| 181 ## | |
| 182 # | |
| 183 | |
| 184 | |
| 185 package Bio::Tools::Sigcleave; | |
| 186 | |
| 187 use Bio::Root::Root; | |
| 188 use Bio::PrimarySeq; | |
| 189 | |
| 190 @ISA = qw(Bio::Root::Root); | |
| 191 use strict; | |
| 192 use vars qw ($ID $VERSION %WeightTable_euc %WeightTable_pro ); | |
| 193 $ID = 'Bio::Tools::Sigcleave'; | |
| 194 $VERSION = 0.02; | |
| 195 | |
| 196 | |
| 197 %WeightTable_euc = ( | |
| 198 #Sample: 161 aligned sequences | |
| 199 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect | |
| 200 'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47, 6, 80, 18, 6, 14.5], | |
| 201 'C' => [ 3, 6, 9, 7, 9, 14, 6, 8, 5, 6, 19, 3, 9, 8, 3, 4.5], | |
| 202 'D' => [ 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 0, 5, 0, 10, 11, 8.9], | |
| 203 'E' => [ 0, 0, 0, 1, 0, 0, 0, 0, 3, 7, 0, 7, 0, 13, 14, 10.0], | |
| 204 'F' => [13, 9, 11, 11, 6, 7, 18, 13, 4, 5, 0, 13, 0, 6, 4, 5.6], | |
| 205 'G' => [ 4, 4, 3, 6, 3, 13, 3, 2, 19, 34, 5, 7, 39, 10, 7, 12.1], | |
| 206 'H' => [ 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 0, 6, 0, 4, 2, 3.4], | |
| 207 'I' => [15, 15, 8, 6, 11, 5, 4, 8, 5, 1, 10, 5, 0, 8, 7, 7.4], | |
| 208 'K' => [ 0, 0, 0, 1, 0, 0, 1, 0, 0, 4, 0, 2, 0, 11, 9, 11.3], | |
| 209 'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23, 8, 20, 1, 8, 4, 12.1], | |
| 210 'M' => [ 0, 3, 7, 4, 1, 6, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2.7], | |
| 211 'N' => [ 0, 1, 0, 1, 1, 0, 0, 0, 3, 3, 0, 10, 0, 4, 7, 7.1], | |
| 212 'P' => [ 2, 0, 2, 0, 0, 4, 1, 8, 20, 14, 0, 1, 3, 0, 22, 7.4], | |
| 213 'Q' => [ 0, 0, 0, 1, 0, 6, 1, 0, 10, 8, 0, 18, 3, 19, 10, 6.3], | |
| 214 'R' => [ 2, 0, 0, 0, 0, 1, 0, 0, 7, 4, 0, 15, 0, 12, 9, 7.6], | |
| 215 'S' => [ 9, 3, 8, 6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4], | |
| 216 'T' => [ 2, 10, 5, 4, 5, 13, 7, 7, 12, 6, 17, 8, 6, 3, 10, 9.7], | |
| 217 'V' => [20, 25, 15, 18, 13, 15, 11, 27, 0, 12, 32, 3, 0, 8, 17, 11.1], | |
| 218 'W' => [ 4, 3, 3, 1, 1, 2, 6, 3, 1, 3, 0, 9, 0, 2, 0, 1.8], | |
| 219 'Y' => [ 0, 1, 4, 0, 0, 1, 3, 1, 1, 2, 0, 5, 0, 1, 7, 5.6] | |
| 220 ); | |
| 221 | |
| 222 %WeightTable_pro = ( | |
| 223 #Sample: 36 aligned sequences | |
| 224 # R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect | |
| 225 'A' => [0, 8, 8, 9, 6, 7, 5, 6, 7, 7, 24, 2, 31, 18, 4, 3.2], | |
| 226 'C' => [1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1.0], | |
| 227 'D' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 2.0], | |
| 228 'E' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 8, 2.2], | |
| 229 'F' => [2, 4, 3, 4, 1, 1, 8, 0, 4, 1, 0, 7, 0, 1, 0, 1.3], | |
| 230 'G' => [4, 2, 2, 2, 3, 5, 2, 4, 2, 2, 0, 2, 2, 1, 0, 2.7], | |
| 231 'H' => [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 7, 0, 1, 0, 0.8], | |
| 232 'I' => [3, 1, 5, 1, 5, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 1.7], | |
| 233 'K' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 2.5], | |
| 234 'L' => [8, 11, 9, 8, 9, 13, 1, 0, 2, 2, 1, 2, 0, 0, 1, 2.7], | |
| 235 'M' => [0, 2, 1, 1, 3, 2, 3, 0, 1, 2, 0, 4, 0, 0, 1, 0.6], | |
| 236 'N' => [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 3, 0, 1, 4, 1.6], | |
| 237 'P' => [0, 1, 1, 1, 1, 1, 2, 3, 5, 2, 0, 0, 0, 0, 5, 1.7], | |
| 238 'Q' => [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 1, 1.4], | |
| 239 'R' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1.7], | |
| 240 'S' => [1, 0, 1, 4, 4, 1, 5, 15, 5, 8, 5, 2, 2, 0, 0, 2.6], | |
| 241 'T' => [2, 0, 4, 2, 2, 2, 2, 2, 5, 1, 3, 0, 1, 1, 2, 2.2], | |
| 242 'V' => [5, 7, 1, 3, 1, 4, 7, 0, 0, 4, 3, 0, 0, 2, 0, 2.5], | |
| 243 'W' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0.4], | |
| 244 'Y' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 1.3] | |
| 245 ); | |
| 246 | |
| 247 | |
| 248 ## | |
| 249 ## Now we calculate the _real_ values for the weight tables | |
| 250 ## | |
| 251 ## | |
| 252 ## yeah yeah yeah there is lots of math here that gets repeated | |
| 253 ## every single time a sigcleave object gets created. This is | |
| 254 ## a quick hack to make sure that we get the scores as accurate as | |
| 255 ## possible. Need all those significant digits.... | |
| 256 ## | |
| 257 ## suggestions for speedup aproaches welcome | |
| 258 ## | |
| 259 | |
| 260 | |
| 261 foreach my $i (keys %WeightTable_euc) { | |
| 262 my $expected = $WeightTable_euc{$i}[15]; | |
| 263 if ($expected > 0) { | |
| 264 for (my $j=0; $j<16; $j++) { | |
| 265 if ($WeightTable_euc{$i}[$j] == 0) { | |
| 266 $WeightTable_euc{$i}[$j] = 1; | |
| 267 if ($j == 10 || $j == 12) { | |
| 268 $WeightTable_euc{$i}[$j] = 1.e-10; | |
| 269 } | |
| 270 } | |
| 271 $WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected); | |
| 272 } | |
| 273 } | |
| 274 } | |
| 275 | |
| 276 | |
| 277 foreach my $i (keys %WeightTable_pro) { | |
| 278 my $expected = $WeightTable_pro{$i}[15]; | |
| 279 if ($expected > 0) { | |
| 280 for (my $j=0; $j<16; $j++) { | |
| 281 if ($WeightTable_pro{$i}[$j] == 0) { | |
| 282 $WeightTable_pro{$i}[$j] = 1; | |
| 283 if ($j == 10 || $j == 12) { | |
| 284 $WeightTable_pro{$i}[$j] = 1.e-10; | |
| 285 } | |
| 286 } | |
| 287 $WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected); | |
| 288 } | |
| 289 } | |
| 290 } | |
| 291 | |
| 292 | |
| 293 | |
| 294 ##################################################################################### | |
| 295 ## CONSTRUCTOR ## | |
| 296 ##################################################################################### | |
| 297 | |
| 298 | |
| 299 sub new { | |
| 300 my ($class, @args) = @_; | |
| 301 | |
| 302 my $self = $class->SUPER::new(@args); | |
| 303 #my $self = Bio::Seq->new(@args); | |
| 304 | |
| 305 my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)],@args); | |
| 306 | |
| 307 defined $threshold && $self->threshold($threshold); | |
| 308 $matrix && $self->matrix($matrix); | |
| 309 $seq && $self->seq($seq); | |
| 310 | |
| 311 return $self; | |
| 312 } | |
| 313 | |
| 314 | |
| 315 | |
| 316 =head1 threshold | |
| 317 | |
| 318 Title : threshold | |
| 319 Usage : $value = $self->threshold | |
| 320 Purpose : Read/write method sigcleave score reporting threshold. | |
| 321 Returns : float. | |
| 322 Argument : new value, float | |
| 323 Throws : on non-number argument | |
| 324 Comments : defaults to 3.5 | |
| 325 See Also : n/a | |
| 326 | |
| 327 =cut | |
| 328 | |
| 329 #---------------- | |
| 330 sub threshold { | |
| 331 #---------------- | |
| 332 my ($self, $value) = @_; | |
| 333 if( defined $value) { | |
| 334 $self->throw("I need a number, not [$value]") | |
| 335 if $value !~ /^[+-]?[\d\.]+$/; | |
| 336 $self->{'_threshold'} = $value; | |
| 337 } | |
| 338 return $self->{'_threshold'} || 3.5 ; | |
| 339 } | |
| 340 | |
| 341 =head1 matrix | |
| 342 | |
| 343 Title : matrix | |
| 344 Usage : $value = $self->matrix('procaryotic') | |
| 345 Purpose : Read/write method sigcleave matrix. | |
| 346 Returns : float. | |
| 347 Argument : new value: 'eucaryotic' or 'procaryotic' | |
| 348 Throws : on non-number argument | |
| 349 Comments : defaults to 3.5 | |
| 350 See Also : n/a | |
| 351 | |
| 352 =cut | |
| 353 | |
| 354 #---------------- | |
| 355 sub matrix { | |
| 356 #---------------- | |
| 357 my ($self, $value) = @_; | |
| 358 if( defined $value) { | |
| 359 $self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]") | |
| 360 unless $value eq 'eucaryotic' or $value eq 'procaryotic'; | |
| 361 $self->{'_matrix'} = $value; | |
| 362 } | |
| 363 return $self->{'_matrix'} || 'eucaryotic' ; | |
| 364 | |
| 365 } | |
| 366 | |
| 367 =head1 seq | |
| 368 | |
| 369 Title : seq | |
| 370 Usage : $value = $self->seq('procaryotic') | |
| 371 Purpose : Read/write method sigcleave seq. | |
| 372 Returns : float. | |
| 373 Argument : new value: 'eucaryotic' or 'procaryotic' | |
| 374 Throws : on non-number argument | |
| 375 Comments : defaults to 3.5 | |
| 376 See Also : n/a | |
| 377 | |
| 378 =cut | |
| 379 | |
| 380 #---------------- | |
| 381 sub seq { | |
| 382 #---------------- | |
| 383 my ($self, $value) = @_; | |
| 384 if( defined $value) { | |
| 385 if ($value->isa('Bio::PrimarySeqI')) { | |
| 386 $self->{'_seq'} = $value; | |
| 387 } else { | |
| 388 $self->{'_seq'} = Bio::PrimarySeq->new(-seq=>$value, | |
| 389 -alphabet=>'protein'); | |
| 390 } | |
| 391 } | |
| 392 return $self->{'_seq'}; | |
| 393 } | |
| 394 | |
| 395 | |
| 396 | |
| 397 =head1 _Analyze | |
| 398 | |
| 399 Title : _Analyze | |
| 400 Usage : N/A This is an internal method. Not meant to be called from outside | |
| 401 : the package | |
| 402 : | |
| 403 Purpose : calculates sigcleave score and amino acid position for the | |
| 404 : given protein sequence. The score reporting threshold can | |
| 405 : be adjusted by passing in the "threshold" parameter during | |
| 406 : object construction. If no threshold is passed in, the code | |
| 407 : defaults to reporting any scores equal to or above 3.5 | |
| 408 : | |
| 409 Returns : nothing. results are added to the object | |
| 410 Argument : none. | |
| 411 Throws : nothing. | |
| 412 Comments : nothing. | |
| 413 See Also : n/a | |
| 414 | |
| 415 =cut | |
| 416 | |
| 417 #---------------- | |
| 418 sub _Analyze { | |
| 419 #---------------- | |
| 420 my($self) = @_; | |
| 421 | |
| 422 my %signals; | |
| 423 my @hitWeight = (); | |
| 424 my @hitsort = (); | |
| 425 my @hitpos = (); | |
| 426 my $maxSite = ""; | |
| 427 my $seqPos = ""; | |
| 428 my $istart = ""; | |
| 429 my $iend = ""; | |
| 430 my $icol = ""; | |
| 431 my $i = ""; | |
| 432 my $weight = ""; | |
| 433 my $k = 0; | |
| 434 my $c = 0; | |
| 435 my $seqBegin = 0; | |
| 436 my $pVal = -13; | |
| 437 my $nVal = 2; | |
| 438 my $nHits = 0; | |
| 439 my $seqEnd = $self->seq->length; | |
| 440 my $pep = $self->seq->seq; | |
| 441 my $minWeight = $self->threshold; | |
| 442 my $matrix = $self->matrix; | |
| 443 | |
| 444 ## The weight table is keyed by UPPERCASE letters so we uppercase | |
| 445 ## the pep string because we don't want to alter the actual object | |
| 446 ## sequence. | |
| 447 | |
| 448 $pep =~ tr/a-z/A-Z/; | |
| 449 | |
| 450 for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) { | |
| 451 $istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal; | |
| 452 $iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd; | |
| 453 $icol= $iend - $istart + 1; | |
| 454 $weight = 0.00; | |
| 455 for ($k=0; $k<$icol; $k++) { | |
| 456 $c = substr($pep, $istart + $k, 1); | |
| 457 | |
| 458 ## CD: The if(defined) stuff was put in here because Sigcleave.pm | |
| 459 ## CD: kept getting warnings about undefined vals during 'make test' ... | |
| 460 if ($matrix eq 'eucaryotic') { | |
| 461 $weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k]; | |
| 462 } else { | |
| 463 $weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k]; | |
| 464 } | |
| 465 } | |
| 466 $signals{$seqPos+1} = sprintf ("%.1f", $weight) if $weight >= $minWeight; | |
| 467 } | |
| 468 | |
| 469 $self->{"_signal_scores"} = { %signals }; | |
| 470 } | |
| 471 | |
| 472 | |
| 473 =head1 signals | |
| 474 | |
| 475 Title : signals | |
| 476 Usage : %sigcleave_results = $sig->signals; | |
| 477 : | |
| 478 Purpose : Accessor method for sigcleave results | |
| 479 : | |
| 480 Returns : Associative array. The key value represents the amino acid position | |
| 481 : and the value represents the score. Only scores that | |
| 482 : are greater than or equal to the THRESHOLD value are reported. | |
| 483 : | |
| 484 Argument : none. | |
| 485 Throws : none. | |
| 486 Comments : none. | |
| 487 See Also : THRESHOLD | |
| 488 | |
| 489 =cut | |
| 490 | |
| 491 #---------------- | |
| 492 sub signals { | |
| 493 #---------------- | |
| 494 my $self = shift; | |
| 495 my %results; | |
| 496 my $position; | |
| 497 | |
| 498 # do the calculations | |
| 499 $self->_Analyze; | |
| 500 | |
| 501 foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) { | |
| 502 $results{$position} = $self->{'_signal_scores'}{$position}; | |
| 503 } | |
| 504 return %results; | |
| 505 } | |
| 506 | |
| 507 | |
| 508 =head1 result_count | |
| 509 | |
| 510 Title : result_count | |
| 511 Usage : $count = $sig->result_count; | |
| 512 : | |
| 513 Purpose : Accessor method for sigcleave results | |
| 514 : | |
| 515 Returns : Integer, number of results above the threshold | |
| 516 : | |
| 517 Argument : none. | |
| 518 Throws : none. | |
| 519 Comments : none. | |
| 520 | |
| 521 See Also : THRESHOLD | |
| 522 | |
| 523 =cut | |
| 524 | |
| 525 #---------------- | |
| 526 sub result_count { | |
| 527 #---------------- | |
| 528 my $self = shift; | |
| 529 $self->_Analyze; | |
| 530 return keys %{ $self->{'_signal_scores'} }; | |
| 531 } | |
| 532 | |
| 533 | |
| 534 =head1 pretty_print | |
| 535 | |
| 536 Title : pretty_print | |
| 537 Usage : $output = $sig->pretty_print; | |
| 538 : print $sig->pretty_print; | |
| 539 : | |
| 540 Purpose : Emulates the output of the EGCG Sigcleave | |
| 541 : utility. | |
| 542 : | |
| 543 Returns : A formatted string. | |
| 544 Argument : none. | |
| 545 Throws : none. | |
| 546 Comments : none. | |
| 547 See Also : n/a | |
| 548 | |
| 549 =cut | |
| 550 | |
| 551 #---------------- | |
| 552 sub pretty_print { | |
| 553 #---------------- | |
| 554 my $self = shift; | |
| 555 my $pos; | |
| 556 my $output; | |
| 557 my $cnt = 1; | |
| 558 my %results = $self->signals; | |
| 559 my @hits = keys %results; | |
| 560 my $hitcount = $#hits; $hitcount++; | |
| 561 my $thresh = $self->threshold; | |
| 562 my $seqlen = $self->seq->length || 0; | |
| 563 my $name = $self->seq->id || 'NONAME'; | |
| 564 my $pep = $self->seq->seq; | |
| 565 $pep =~ tr/a-z/A-Z/; | |
| 566 | |
| 567 $output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n"; | |
| 568 | |
| 569 if ($hitcount > 0) { | |
| 570 $output .= "Report scores over $thresh\n"; | |
| 571 foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) { | |
| 572 my $start = $pos - 15; | |
| 573 $start = 1 if $start < 1; | |
| 574 my $sig = substr($pep,$start -1,$pos-$start ); | |
| 575 | |
| 576 $output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos); | |
| 577 $output .= "\n"; | |
| 578 $output .= " Sequence: "; | |
| 579 $output .= $sig; | |
| 580 $output .= "-" x (15- length($sig)); | |
| 581 $output .= "-"; | |
| 582 $output .= substr($pep,$pos-1,50); | |
| 583 $output .= "\n"; | |
| 584 $output .= " " x 12; | |
| 585 $output .= "| \(signal\) | \(mature peptide\)\n"; | |
| 586 $output .= sprintf(" %3d %3d\n\n",$start,$pos); | |
| 587 | |
| 588 if (($hitcount > 1) && ($cnt == 1)) { | |
| 589 $output .= " Other entries above $thresh\n\n"; | |
| 590 } | |
| 591 $cnt++; | |
| 592 } | |
| 593 } | |
| 594 $output; | |
| 595 } | |
| 596 | |
| 597 | |
| 598 1; | |
| 599 __END__ | |
| 600 | |
| 601 | |
| 602 ######################################################################### | |
| 603 # End of class | |
| 604 ######################################################################### |
