Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/OddCodes.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 #$Id: OddCodes.pm,v 1.10.2.1 2003/04/07 04:27:42 heikki Exp $ | |
| 2 #----------------------------------------------------------------------------- | |
| 3 # PACKAGE : OddCodes.pm | |
| 4 # PURPOSE : To write amino acid sequences in alternative alphabets | |
| 5 # AUTHOR : Derek Gatherer (D.Gatherer@organon.nhe.akzonobel.nl) | |
| 6 # SOURCE : | |
| 7 # CREATED : 8th July 2000 | |
| 8 # MODIFIED : | |
| 9 # DISCLAIMER : I am employed in the pharmaceutical industry but my | |
| 10 # : employers do not endorse or sponsor this module | |
| 11 # : in any way whatsoever. The above email address is | |
| 12 # : given purely for the purpose of easy communication | |
| 13 # : with the author, and does not imply any connection | |
| 14 # : between my employers and anything written below. | |
| 15 # LICENCE : You may distribute this module under the same terms | |
| 16 # : as the rest of BioPerl. | |
| 17 #---------------------------------------------------------------------------- | |
| 18 | |
| 19 =head1 NAME | |
| 20 | |
| 21 Bio::Tools::OddCodes - Object holding alternative alphabet coding for | |
| 22 one protein sequence | |
| 23 | |
| 24 =head1 SYNOPSIS | |
| 25 | |
| 26 Take a sequence object from eg, an inputstream, and creates an object | |
| 27 for the purposes of rewriting that sequence in another alphabet. | |
| 28 These are abbreviated amino acid sequence alphabets, designed to | |
| 29 simplify the statistical aspects of analysing protein sequences, by | |
| 30 reducing the combinatorial explosion of the 20-letter alphabet. These | |
| 31 abbreviated alphabets range in size from 2 to 8. | |
| 32 | |
| 33 Creating the OddCodes object, eg: | |
| 34 | |
| 35 my $inputstream = Bio::SeqIO->new( '-file' => "seqfile", | |
| 36 '-format' => 'Fasta'); | |
| 37 my $seqobj = $inputstream->next_seq(); | |
| 38 my $oddcode_obj = Bio::Tools::Oddcodes->new(-seq => $seqobj); | |
| 39 | |
| 40 or: | |
| 41 | |
| 42 my $seqobj = Bio::PrimarySeq->new | |
| 43 (-seq=>'[cut and paste a sequence here]', | |
| 44 -alphabet => 'protein', | |
| 45 -id => 'test'); | |
| 46 my $oddcode_obj = Bio::Tools::OddCodes->new(-seq => $seqobj); | |
| 47 | |
| 48 do the alternative coding, returning the answer as a reference to a string | |
| 49 | |
| 50 my $output = $oddcode_obj->structural(); | |
| 51 my $output = $oddcode_obj->chemical(); | |
| 52 my $output = $oddcode_obj->functional(); | |
| 53 my $output = $oddcode_obj->charge(); | |
| 54 my $output = $oddcode_obj->hydrophobic(); | |
| 55 my $output = $oddcode_obj->Dayhoff(); | |
| 56 my $output = $oddcode_obj->Sneath(); | |
| 57 my $output = $oddcode_obj->Stanfel(); | |
| 58 | |
| 59 | |
| 60 display sequence in new form, eg: | |
| 61 | |
| 62 my $new_coding = $$output; | |
| 63 print "\n$new_coding"; | |
| 64 | |
| 65 =head1 DESCRIPTION | |
| 66 | |
| 67 Bio::Tools::Oddcodes is a welterweight object for rewriting a protein | |
| 68 sequence in an alternative alphabet. 8 of these are provided, ranging | |
| 69 from the the 2-letter hydrophobic alphabet, to the 8-letter chemical | |
| 70 alphabet. These are useful for the statistical analysis of protein | |
| 71 sequences since they can partially avoid the combinatorial explosion | |
| 72 produced by the full 20-letter alphabet (eg. 400 dimers, 8000 trimers | |
| 73 etc.) | |
| 74 | |
| 75 The objects will print out a warning if the input sequence is not a | |
| 76 protein. If you know what you are doing, you can silence the warning | |
| 77 by setting verbose() to a negetive value. | |
| 78 | |
| 79 See Synopsis above for object creation code. | |
| 80 | |
| 81 =head1 FEEDBACK | |
| 82 | |
| 83 =head2 Mailing Lists | |
| 84 | |
| 85 User feedback is an integral part of the evolution of this | |
| 86 and other Bioperl modules. Send your comments and suggestions preferably | |
| 87 to one of the Bioperl mailing lists. | |
| 88 Your participation is much appreciated. | |
| 89 | |
| 90 bioperl-l@bioperl.org - General discussion | |
| 91 http://www.bioperl.org/MailList.html - About the mailing lists | |
| 92 | |
| 93 =head2 Reporting Bugs | |
| 94 | |
| 95 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 96 the bugs and their resolution. Bug reports can be submitted via email | |
| 97 or the web: | |
| 98 | |
| 99 bioperl-bugs@bioperl.org | |
| 100 http://www.bugzilla.bioperl.org/ | |
| 101 | |
| 102 =head1 AUTHOR | |
| 103 | |
| 104 Derek Gatherer | |
| 105 | |
| 106 =head1 APPENDIX | |
| 107 | |
| 108 The rest of the documentation details each of the object methods. | |
| 109 Internal methods are usually preceded with a _ | |
| 110 | |
| 111 =cut | |
| 112 | |
| 113 #' | |
| 114 | |
| 115 package Bio::Tools::OddCodes; | |
| 116 use vars qw(@ISA); | |
| 117 use strict; | |
| 118 | |
| 119 use Bio::Root::Root; | |
| 120 | |
| 121 @ISA = qw(Bio::Root::Root); | |
| 122 | |
| 123 | |
| 124 sub new | |
| 125 { | |
| 126 my($class,@args) = @_; | |
| 127 | |
| 128 my $self = $class->SUPER::new(@args); | |
| 129 | |
| 130 my ($seqobj) = $self->_rearrange([qw(SEQ)],@args); | |
| 131 if((! defined($seqobj)) && @args && ref($args[0])) { | |
| 132 # parameter not passed as named parameter? | |
| 133 $seqobj = $args[0]; | |
| 134 } | |
| 135 unless ($seqobj->isa("Bio::PrimarySeqI")) | |
| 136 { | |
| 137 die("die in _init, OddCodes works only on PrimarySeqI | |
| 138 objects\n"); | |
| 139 } | |
| 140 | |
| 141 $self->{'_seqref'} = $seqobj; | |
| 142 | |
| 143 return $self; | |
| 144 } | |
| 145 | |
| 146 =head2 structural | |
| 147 | |
| 148 Title : structural | |
| 149 Usage : $output = $oddcode_obj->structural(); | |
| 150 Function: turns amino acid sequence into 3-letter structural alphabet | |
| 151 : A (ambivalent), E (external), I (internal) | |
| 152 Example : a sequence ACDEFGH will become AAEEIAE | |
| 153 Returns : Reference to the new sequence string | |
| 154 Args : none | |
| 155 | |
| 156 =cut | |
| 157 | |
| 158 sub structural() | |
| 159 { | |
| 160 my $self = $_[0]; | |
| 161 my $seqstring = &_pullseq($self); # see _pullseq() below | |
| 162 | |
| 163 # now the real business | |
| 164 | |
| 165 $seqstring =~ tr/[ACGPSTWY]/1/; | |
| 166 $seqstring =~ tr/[RNDQEHK]/2/; | |
| 167 $seqstring =~ tr/[ILMFV]/3/; | |
| 168 $seqstring =~ tr/1/A/; | |
| 169 $seqstring =~ tr/2/E/; | |
| 170 $seqstring =~ tr/3/I/; | |
| 171 | |
| 172 return \$seqstring; | |
| 173 | |
| 174 # and that's that one | |
| 175 } | |
| 176 | |
| 177 =head2 functional | |
| 178 | |
| 179 Title : functional | |
| 180 Usage : $output = $oddcode_obj->functional(); | |
| 181 Function: turns amino acid sequence into 4-letter functional alphabet | |
| 182 : A (acidic), C (basic), H (hydrophobic), P (polar) | |
| 183 Example : a sequence ACDEFGH will become HPAAHHC | |
| 184 Returns : Reference to the new sequence string | |
| 185 Args : none | |
| 186 | |
| 187 =cut | |
| 188 | |
| 189 sub functional() | |
| 190 { | |
| 191 my $self = $_[0]; | |
| 192 my $seqstring = &_pullseq($self); | |
| 193 | |
| 194 # now the real business | |
| 195 | |
| 196 $seqstring =~ tr/[DE]/1/; | |
| 197 $seqstring =~ tr/[HKR]/2/; | |
| 198 $seqstring =~ tr/[AFILMPVW]/3/; | |
| 199 $seqstring =~ tr/[CGNQSTY]/4/; | |
| 200 $seqstring =~ tr/1/A/; | |
| 201 $seqstring =~ tr/2/C/; | |
| 202 $seqstring =~ tr/3/H/; | |
| 203 $seqstring =~ tr/4/P/; | |
| 204 | |
| 205 return \$seqstring; | |
| 206 | |
| 207 # and that's that one | |
| 208 } | |
| 209 | |
| 210 =head2 hydrophobic | |
| 211 | |
| 212 Title : hydrophobic | |
| 213 Usage : $output = $oddcode_obj->hydrophobic(); | |
| 214 Function: turns amino acid sequence into 2-letter hydrophobicity alphabet | |
| 215 : O (hydrophobic), I (hydrophilic) | |
| 216 Example : a sequence ACDEFGH will become OIIIOII | |
| 217 Returns : Reference to the new sequence string | |
| 218 Args : none | |
| 219 | |
| 220 =cut | |
| 221 | |
| 222 sub hydrophobic() | |
| 223 { | |
| 224 my $self = $_[0]; | |
| 225 my $seqstring = &_pullseq($self); | |
| 226 | |
| 227 # now the real business | |
| 228 | |
| 229 $seqstring =~ tr/[AFILMPVW]/1/; | |
| 230 $seqstring =~ tr/[CDEGHKNQRSTY]/2/; | |
| 231 $seqstring =~ tr/1/I/; | |
| 232 $seqstring =~ tr/2/O/; | |
| 233 | |
| 234 return \$seqstring; | |
| 235 | |
| 236 # and that's that one | |
| 237 } | |
| 238 | |
| 239 =head2 Dayhoff | |
| 240 | |
| 241 Title : Dayhoff | |
| 242 Usage : $output = $oddcode_obj->Dayhoff(); | |
| 243 Function: turns amino acid sequence into 6-letter Dayhoff alphabet | |
| 244 Example : a sequence ACDEFGH will become CADDGCE | |
| 245 Returns : Reference to the new sequence string | |
| 246 Args : none | |
| 247 | |
| 248 =cut | |
| 249 | |
| 250 sub Dayhoff() | |
| 251 { | |
| 252 my $self = $_[0]; | |
| 253 my $seqstring = &_pullseq($self); | |
| 254 | |
| 255 # now the real business | |
| 256 | |
| 257 $seqstring =~ tr/[C]/1/; | |
| 258 $seqstring =~ tr/[AGPST]/2/; | |
| 259 $seqstring =~ tr/[DENQ]/3/; | |
| 260 $seqstring =~ tr/[HKR]/4/; | |
| 261 $seqstring =~ tr/[ILMV]/5/; | |
| 262 $seqstring =~ tr/[FWY]/6/; | |
| 263 $seqstring =~ tr/1/A/; | |
| 264 $seqstring =~ tr/2/C/; | |
| 265 $seqstring =~ tr/3/D/; | |
| 266 $seqstring =~ tr/4/E/; | |
| 267 $seqstring =~ tr/5/F/; | |
| 268 $seqstring =~ tr/6/G/; | |
| 269 | |
| 270 return \$seqstring; | |
| 271 | |
| 272 # and that's that one | |
| 273 } | |
| 274 | |
| 275 =head2 Sneath | |
| 276 | |
| 277 Title : Sneath | |
| 278 Usage : $output = $oddcode_obj->Sneath(); | |
| 279 Function: turns amino acid sequence into 7-letter Sneath alphabet | |
| 280 Example : a sequence ACDEFGH will become CEFFHCF | |
| 281 Returns : Reference to the new sequence string | |
| 282 Args : none | |
| 283 | |
| 284 =cut | |
| 285 | |
| 286 sub Sneath() | |
| 287 { | |
| 288 my $self = $_[0]; | |
| 289 my $seqstring = &_pullseq($self); | |
| 290 | |
| 291 # now the real business | |
| 292 | |
| 293 $seqstring =~ tr/[ILV]/1/; | |
| 294 $seqstring =~ tr/[AGP]/2/; | |
| 295 $seqstring =~ tr/[MNQ]/3/; | |
| 296 $seqstring =~ tr/[CST]/4/; | |
| 297 $seqstring =~ tr/[DE]/5/; | |
| 298 $seqstring =~ tr/[KR]/6/; | |
| 299 $seqstring =~ tr/[FHWY]/7/; | |
| 300 $seqstring =~ tr/1/A/; | |
| 301 $seqstring =~ tr/2/C/; | |
| 302 $seqstring =~ tr/3/D/; | |
| 303 $seqstring =~ tr/4/E/; | |
| 304 $seqstring =~ tr/5/F/; | |
| 305 $seqstring =~ tr/6/G/; | |
| 306 $seqstring =~ tr/7/H/; | |
| 307 | |
| 308 return \$seqstring; | |
| 309 | |
| 310 # and that's that one | |
| 311 } | |
| 312 | |
| 313 =head2 Stanfel | |
| 314 | |
| 315 Title : Stanfel | |
| 316 Usage : $output = $oddcode_obj->Stanfel(); | |
| 317 Function: turns amino acid sequence into 4-letter Stanfel alphabet | |
| 318 Example : a sequence ACDEFGH will become AACCDAE | |
| 319 Returns : Reference to the new sequence string | |
| 320 Args : none | |
| 321 | |
| 322 =cut | |
| 323 | |
| 324 sub Stanfel() | |
| 325 { | |
| 326 my $self = $_[0]; | |
| 327 my $seqstring = &_pullseq($self); | |
| 328 | |
| 329 # now the real business | |
| 330 | |
| 331 $seqstring =~ tr/[ACGILMPSTV]/1/; | |
| 332 $seqstring =~ tr/[DENQ]/2/; | |
| 333 $seqstring =~ tr/[FWY]/3/; | |
| 334 $seqstring =~ tr/[HKR]/4/; | |
| 335 $seqstring =~ tr/1/A/; | |
| 336 $seqstring =~ tr/2/C/; | |
| 337 $seqstring =~ tr/3/D/; | |
| 338 $seqstring =~ tr/4/E/; | |
| 339 | |
| 340 return \$seqstring; | |
| 341 | |
| 342 # and that's that one | |
| 343 } | |
| 344 | |
| 345 =head2 chemical() | |
| 346 | |
| 347 Title : chemical | |
| 348 Usage : $output = $oddcode_obj->chemical(); | |
| 349 Function: turns amino acid sequence into 8-letter chemical alphabet | |
| 350 : A (acidic), L (aliphatic), M (amide), R (aromatic) | |
| 351 : C (basic), H (hydroxyl), I (imino), S (sulphur) | |
| 352 Example : a sequence ACDEFGH will become LSAARAC | |
| 353 Returns : Reference to the new sequence string | |
| 354 Args : none | |
| 355 | |
| 356 =cut | |
| 357 | |
| 358 sub chemical() | |
| 359 { | |
| 360 my $self = $_[0]; | |
| 361 my $seqstring = &_pullseq($self); | |
| 362 | |
| 363 # now the real business | |
| 364 | |
| 365 $seqstring =~ tr/[DE]/1/; | |
| 366 $seqstring =~ tr/[AGILV]/2/; | |
| 367 $seqstring =~ tr/[NQ]/3/; | |
| 368 $seqstring =~ tr/[FWY]/4/; | |
| 369 $seqstring =~ tr/[RHK]/5/; | |
| 370 $seqstring =~ tr/[ST]/6/; | |
| 371 $seqstring =~ tr/P/7/; | |
| 372 $seqstring =~ tr/[CM]/8/; | |
| 373 $seqstring =~ tr/1/A/; | |
| 374 $seqstring =~ tr/2/L/; | |
| 375 $seqstring =~ tr/3/M/; | |
| 376 $seqstring =~ tr/4/R/; | |
| 377 $seqstring =~ tr/5/C/; | |
| 378 $seqstring =~ tr/6/H/; | |
| 379 $seqstring =~ tr/7/I/; | |
| 380 $seqstring =~ tr/8/S/; | |
| 381 | |
| 382 return \$seqstring; | |
| 383 | |
| 384 # and that's that one | |
| 385 } | |
| 386 | |
| 387 =head2 charge | |
| 388 | |
| 389 Title : charge | |
| 390 Usage : $output = $oddcode_obj->charge(); | |
| 391 Function: turns amino acid sequence into 3-letter charge alphabet | |
| 392 Example : a sequence ACDEFGH will become NNAANNC | |
| 393 Returns : Reference to the new sequence string | |
| 394 Args : none | |
| 395 | |
| 396 =cut | |
| 397 | |
| 398 sub charge() | |
| 399 { | |
| 400 my $self = $_[0]; | |
| 401 my $seqstring = &_pullseq($self); | |
| 402 | |
| 403 # now the real business | |
| 404 | |
| 405 $seqstring =~ tr/[DE]/1/; | |
| 406 $seqstring =~ tr/[HKR]/2/; | |
| 407 $seqstring =~ tr/[ACFGILMNPQSTVWY]/3/; | |
| 408 $seqstring =~ tr/1/A/; | |
| 409 $seqstring =~ tr/2/C/; | |
| 410 $seqstring =~ tr/3/N/; | |
| 411 | |
| 412 return \$seqstring; | |
| 413 | |
| 414 # and that's that one | |
| 415 } | |
| 416 | |
| 417 # _pullseq is called within each of the subroutines | |
| 418 # it just checks a few things and returns the sequence | |
| 419 | |
| 420 sub _pullseq | |
| 421 { | |
| 422 my $self = $_[0]; | |
| 423 | |
| 424 my $seqobj = $self->{'_seqref'}; | |
| 425 | |
| 426 unless ($seqobj->isa("Bio::PrimarySeqI")) | |
| 427 { | |
| 428 die("die, OddCodes works only on PrimarySeqI objects\n"); | |
| 429 } | |
| 430 $self->warn("\tAll OddCode alphabets need a protein sequence,\n". | |
| 431 "\tbut BioPerl thinks this is not: [". $seqobj->id. "]") | |
| 432 unless $seqobj->alphabet eq 'protein' or $self->verbose < 0;; | |
| 433 | |
| 434 my $seqstring = uc $seqobj->seq(); | |
| 435 | |
| 436 if(length($seqstring)<1) | |
| 437 { | |
| 438 die("$seqstring: die, sequence has zero length\n"); | |
| 439 } | |
| 440 return $seqstring; | |
| 441 } | |
| 442 | |
| 443 1; |
