Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Structure/SecStr/STRIDE/Res.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $id $ | |
| 2 # | |
| 3 # bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm | |
| 4 # | |
| 5 # Cared for by Ed Green <ed@compbio.berkeley.edu> | |
| 6 # | |
| 7 # Copyright Univ. of California | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 =head1 NAME | |
| 13 | |
| 14 Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output | |
| 15 | |
| 16 =head1 SYNOPSIS | |
| 17 | |
| 18 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-file' => 'filename.stride' ); | |
| 19 | |
| 20 # or | |
| 21 | |
| 22 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-fh' => \*STDOUT ); | |
| 23 | |
| 24 # Get secondary structure assignment for PDB residue 20 of chain A | |
| 25 $sec_str = $stride_obj->resSecStr( '20:A' ); | |
| 26 | |
| 27 # same | |
| 28 $sec_str = $stride_obj->resSecStr( 20, 'A' ) | |
| 29 | |
| 30 =head1 DESCRIPTION | |
| 31 | |
| 32 STRIDE::Res is a module for objectifying STRIDE output. STRIDE is a | |
| 33 program (similar to DSSP) for assigning secondary structure to | |
| 34 individual residues of a pdb structure file. | |
| 35 | |
| 36 ( Knowledge-Based Protein Secondary Structure Assignment, | |
| 37 PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) ) | |
| 38 | |
| 39 STRIDE is available here: | |
| 40 http://www.embl-heidelberg.de/argos/stride/down_stride.html | |
| 41 | |
| 42 Methods are then available for extracting all of the infomation | |
| 43 present within the output or convenient subsets of it. | |
| 44 | |
| 45 Although they are very similar in function, DSSP and STRIDE differ | |
| 46 somewhat in output format. Thes differences are reflected in the | |
| 47 return value of some methods of these modules. For example, both | |
| 48 the STRIDE and DSSP parsers have resSecStr() methods for returning | |
| 49 the secondary structure of a given residue. However, the range of | |
| 50 return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the | |
| 51 range of values for STRIDE is ( H, G, I, E, B, b, T, and C ). See | |
| 52 individual methods for details. | |
| 53 | |
| 54 The methods are roughly divided into 3 sections: | |
| 55 | |
| 56 1. Global features of this structure (PDB ID, total surface area, | |
| 57 etc.). These methods do not require an argument. | |
| 58 2. Residue specific features ( amino acid, secondary structure, | |
| 59 solvent exposed surface area, etc. ). These methods do require an | |
| 60 arguement. The argument is supposed to uniquely identify a | |
| 61 residue described within the structure. It can be of any of the | |
| 62 following forms: | |
| 63 ('#A:B') or ( #, 'A', 'B' ) | |
| 64 || | | |
| 65 || - Chain ID (blank for single chain) | |
| 66 |--- Insertion code for this residue. Blank for most residues. | |
| 67 |--- Numeric portion of residue ID. | |
| 68 | |
| 69 (#) | |
| 70 | | |
| 71 --- Numeric portion of residue ID. If there is only one chain and | |
| 72 it has no ID AND there is no residue with an insertion code at this | |
| 73 number, then this can uniquely specify a residue. | |
| 74 | |
| 75 ('#:C') or ( #, 'C' ) | |
| 76 | | | |
| 77 | -Chain ID | |
| 78 ---Numeric portion of residue ID. | |
| 79 | |
| 80 If a residue is incompletely specified then the first residue that | |
| 81 fits the arguments is returned. For example, if 19 is the argument | |
| 82 and there are three chains, A, B, and C with a residue whose number | |
| 83 is 19, then 19:A will be returned (assuming its listed first). | |
| 84 | |
| 85 Since neither DSSP nor STRIDE correctly handle alt-loc codes, they | |
| 86 are not supported by these modules. | |
| 87 | |
| 88 3. Value-added methods. Return values are not verbatem strings | |
| 89 parsed from DSSP or STRIDE output. | |
| 90 | |
| 91 =head1 FEEDBACK | |
| 92 | |
| 93 =head2 MailingLists | |
| 94 | |
| 95 UsUser feedback is an integral part of the evolution of this and other | |
| 96 Bioperl modules. Send your comments and suggestions preferably to one | |
| 97 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 98 | |
| 99 bioperl-l@bioperl.org - General discussion | |
| 100 http://bio.perl.org/MailList.html - About the mailing lists | |
| 101 | |
| 102 =head2 Reporting Bugs | |
| 103 | |
| 104 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 105 the bugs and their resolution. Bug reports can be submitted via email | |
| 106 or the web: | |
| 107 | |
| 108 bioperl-bugs@bio.perl.org | |
| 109 http://bugzilla.bioperl.org/ | |
| 110 | |
| 111 =head1 AUTHOR - Ed Green | |
| 112 | |
| 113 Email ed@compbio.berkeley.edu | |
| 114 | |
| 115 | |
| 116 =head1 APPENDIX | |
| 117 | |
| 118 The Rest of the documentation details each method. | |
| 119 Internal methods are preceded with a _. | |
| 120 | |
| 121 | |
| 122 =cut | |
| 123 | |
| 124 package Bio::Structure::SecStr::STRIDE::Res; | |
| 125 use strict; | |
| 126 use vars qw(@ISA); | |
| 127 use Bio::Root::Root; | |
| 128 use Bio::Root::IO; | |
| 129 use Bio::PrimarySeq; | |
| 130 | |
| 131 @ISA = qw(Bio::Root::Root); | |
| 132 | |
| 133 our %ASGTable = ( 'aa' => 0, | |
| 134 'resNum' => 1, | |
| 135 'ssAbbr' => 2, | |
| 136 'ssName' => 3, | |
| 137 'phi' => 4, | |
| 138 'psi' => 5, | |
| 139 'surfArea' => 6 ); | |
| 140 | |
| 141 our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N', | |
| 142 'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q', | |
| 143 'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H', | |
| 144 'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K', | |
| 145 'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P', | |
| 146 'SER' => 'S', 'THR' => 'T', 'TRP' => 'W', | |
| 147 'TYR' => 'Y', 'VAL' => 'V' ); | |
| 148 | |
| 149 =head2 new | |
| 150 | |
| 151 Title : new | |
| 152 Usage : makes new object of this class | |
| 153 Function : Constructor | |
| 154 Example : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' => filename | |
| 155 # or | |
| 156 '-fh' => FILEHANDLE ) | |
| 157 Returns : object (ref) | |
| 158 Args : filename or filehandle( must be proper STRIDE output ) | |
| 159 | |
| 160 =cut | |
| 161 | |
| 162 sub new { | |
| 163 my ( $class, @args ) = @_; | |
| 164 my $self = $class->SUPER::new( @args ); | |
| 165 my $io = Bio::Root::IO->new( @args ); | |
| 166 $self->_parse( $io ); # not passing filehandle ! | |
| 167 $io->close(); | |
| 168 return $self; | |
| 169 } | |
| 170 | |
| 171 # GLOBAL FEATURES / INFO / STATS | |
| 172 | |
| 173 =head2 totSurfArea | |
| 174 | |
| 175 Title : totSurfArea | |
| 176 Usage : returns sum of surface areas of all residues of all | |
| 177 chains considered. Result is memoized. | |
| 178 Function : | |
| 179 Example : $tot_SA = $stride_obj->totSurfArea(); | |
| 180 Returns : scalar | |
| 181 Args : none | |
| 182 | |
| 183 | |
| 184 =cut | |
| 185 | |
| 186 sub totSurfArea { | |
| 187 my $self = shift; | |
| 188 my $total = 0; | |
| 189 my ( $chain, $res ); | |
| 190 | |
| 191 if ( $self->{ 'SurfArea' } ) { | |
| 192 return $self->{ 'SurfArea' }; | |
| 193 } | |
| 194 else { | |
| 195 foreach $chain ( keys %{$self->{ 'ASG' }} ) { | |
| 196 for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { | |
| 197 $total += | |
| 198 $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}]; | |
| 199 } | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 $self->{ 'SurfArea' } = $total; | |
| 204 return $self->{ 'SurfArea' }; | |
| 205 | |
| 206 } | |
| 207 | |
| 208 =head2 numResidues | |
| 209 | |
| 210 Title : numResidues | |
| 211 Usage : returns total number of residues in all chains or | |
| 212 just the specified chain | |
| 213 Function : | |
| 214 Example : $tot_res = $stride_obj->numResidues(); | |
| 215 Returns : scalar int | |
| 216 Args : none or chain id | |
| 217 | |
| 218 | |
| 219 =cut | |
| 220 | |
| 221 sub numResidues { | |
| 222 my $self = shift; | |
| 223 my $chain = shift; | |
| 224 my $total = 0; | |
| 225 my $key; | |
| 226 foreach $key ( keys %{$self->{ 'ASG' }} ) { | |
| 227 if ( $chain ) { | |
| 228 if ( $key eq $chain ) { | |
| 229 $total += $#{$self->{ 'ASG' }{ $key }}; | |
| 230 } | |
| 231 } | |
| 232 else { | |
| 233 $total += $#{$self->{ 'ASG' }{ $key }}; | |
| 234 } | |
| 235 } | |
| 236 return $total; | |
| 237 } | |
| 238 | |
| 239 # STRAIGHT FROM THE PDB ENTRY | |
| 240 | |
| 241 =head2 pdbID | |
| 242 | |
| 243 Title : pdbID | |
| 244 Usage : returns pdb identifier ( 1FJM, e.g. ) | |
| 245 Function : | |
| 246 Example : $pdb_id = $stride_obj->pdbID(); | |
| 247 Returns : scalar string | |
| 248 Args : none | |
| 249 | |
| 250 | |
| 251 =cut | |
| 252 | |
| 253 sub pdbID { | |
| 254 my $self = shift; | |
| 255 return $self->{ 'PDB' }; | |
| 256 } | |
| 257 =head2 pdbAuthor | |
| 258 | |
| 259 Title : pdbAuthor | |
| 260 Usage : returns author of this PDB entry | |
| 261 Function : | |
| 262 Example : $auth = $stride_obj->pdbAuthor() | |
| 263 Returns : scalar string | |
| 264 Args : none | |
| 265 | |
| 266 | |
| 267 =cut | |
| 268 | |
| 269 sub pdbAuthor { | |
| 270 my $self = shift; | |
| 271 return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } ); | |
| 272 } | |
| 273 | |
| 274 =head2 pdbCompound | |
| 275 | |
| 276 Title : pdbCompound | |
| 277 Usage : returns string of what was found on the | |
| 278 CMP lines | |
| 279 Function : | |
| 280 Example : $cmp = $stride_obj->pdbCompound(); | |
| 281 Returns : string | |
| 282 Args : none | |
| 283 | |
| 284 | |
| 285 =cut | |
| 286 | |
| 287 sub pdbCompound { | |
| 288 my $self = shift; | |
| 289 return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } ); | |
| 290 } | |
| 291 | |
| 292 =head2 pdbDate | |
| 293 | |
| 294 Title : pdbDate | |
| 295 Usage : returns date given in PDB file | |
| 296 Function : | |
| 297 Example : $pdb_date = $stride_obj->pdbDate(); | |
| 298 Returns : scalar | |
| 299 Args : none | |
| 300 | |
| 301 | |
| 302 =cut | |
| 303 | |
| 304 sub pdbDate { | |
| 305 my $self = shift; | |
| 306 return $self->{ 'DATE' }; | |
| 307 } | |
| 308 | |
| 309 =head2 pdbHeader | |
| 310 | |
| 311 Title : pdbHeader | |
| 312 Usage : returns string of characters found on the PDB header line | |
| 313 Function : | |
| 314 Example : $head = $stride_obj->pdbHeader(); | |
| 315 Returns : scalar | |
| 316 Args : none | |
| 317 | |
| 318 | |
| 319 =cut | |
| 320 | |
| 321 sub pdbHeader { | |
| 322 my $self = shift; | |
| 323 return $self->{ 'HEAD' }->{ 'HEADER' }; | |
| 324 } | |
| 325 | |
| 326 =head2 pdbSource | |
| 327 | |
| 328 Title : pdbSource | |
| 329 Usage : returns string of what was found on SRC lines | |
| 330 Function : | |
| 331 Example : $src = $stride_obj->pdbSource(); | |
| 332 Returns : scalar | |
| 333 Args : none | |
| 334 | |
| 335 | |
| 336 =cut | |
| 337 | |
| 338 sub pdbSource { | |
| 339 my $self = shift; | |
| 340 return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } ); | |
| 341 } | |
| 342 | |
| 343 # RESIDUE SPECIFIC ACCESSORS | |
| 344 | |
| 345 =head2 resAA | |
| 346 | |
| 347 Title : resAA | |
| 348 Usage : returns 1 letter abbr. of the amino acid specified by | |
| 349 the arguments | |
| 350 Function : | |
| 351 Examples : $aa = $stride_obj->resAA( RESIDUE_ID ); | |
| 352 Returns : scalar character | |
| 353 Args : RESIDUE_ID | |
| 354 | |
| 355 | |
| 356 =cut | |
| 357 | |
| 358 sub resAA { | |
| 359 my $self = shift; | |
| 360 my @args = @_; | |
| 361 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 362 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); | |
| 363 } | |
| 364 | |
| 365 =head2 resPhi | |
| 366 | |
| 367 Title : resPhi | |
| 368 Usage : returns phi angle of specified residue | |
| 369 Function : | |
| 370 Example : $phi = $stride_obj->resPhi( RESIDUE_ID ); | |
| 371 Returns : scaler | |
| 372 Args : RESIDUE_ID | |
| 373 | |
| 374 | |
| 375 =cut | |
| 376 | |
| 377 sub resPhi { | |
| 378 my $self = shift; | |
| 379 my @args = @_; | |
| 380 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 381 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ]; | |
| 382 } | |
| 383 | |
| 384 =head2 resPsi | |
| 385 | |
| 386 Title : resPsi | |
| 387 Usage : returns psi angle of specified residue | |
| 388 Function : | |
| 389 Example : $psi = $stride_obj->resPsi( RESIDUE_ID ); | |
| 390 Returns : scalar | |
| 391 Args : RESIDUE_ID | |
| 392 | |
| 393 | |
| 394 =cut | |
| 395 | |
| 396 sub resPsi { | |
| 397 my $self = shift; | |
| 398 my @args = @_; | |
| 399 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 400 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ]; | |
| 401 } | |
| 402 | |
| 403 =head2 resSolvAcc | |
| 404 | |
| 405 Title : resSolvAcc | |
| 406 Usage : returns stride calculated surface area of specified residue | |
| 407 Function : | |
| 408 Example : $sa = $stride_obj->resSolvAcc( RESIDUE_ID ); | |
| 409 Returns : scalar | |
| 410 Args : RESIDUE_ID | |
| 411 | |
| 412 | |
| 413 =cut | |
| 414 | |
| 415 sub resSolvAcc { | |
| 416 my $self = shift; | |
| 417 my @args = @_; | |
| 418 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 419 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; | |
| 420 } | |
| 421 | |
| 422 =head2 resSurfArea | |
| 423 | |
| 424 Title : resSurfArea | |
| 425 Usage : returns stride calculated surface area of specified residue | |
| 426 Function : | |
| 427 Example : $sa = $stride_obj->resSurfArea( RESIDUE_ID ); | |
| 428 Returns : scalar | |
| 429 Args : RESIDUE_ID | |
| 430 | |
| 431 | |
| 432 =cut | |
| 433 | |
| 434 sub resSurfArea { | |
| 435 my $self = shift; | |
| 436 my @args = @_; | |
| 437 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 438 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; | |
| 439 } | |
| 440 | |
| 441 =head2 resSecStr | |
| 442 | |
| 443 Title : resSecStr | |
| 444 Usage : gives one letter abbr. of stride determined secondary | |
| 445 structure of specified residue | |
| 446 Function : | |
| 447 Example : $ss = $stride_obj->resSecStr( RESIDUE_ID ); | |
| 448 Returns : one of: 'H' => Alpha Helix | |
| 449 'G' => 3-10 helix | |
| 450 'I' => PI-helix | |
| 451 'E' => Extended conformation | |
| 452 'B' or 'b' => Isolated bridge | |
| 453 'T' => Turn | |
| 454 'C' => Coil | |
| 455 ' ' => None | |
| 456 # NOTE: This range is slightly DIFFERENT from the | |
| 457 # DSSP method of the same name | |
| 458 Args : RESIDUE_ID | |
| 459 | |
| 460 | |
| 461 =cut | |
| 462 | |
| 463 sub resSecStr { | |
| 464 my $self = shift; | |
| 465 my @args = @_; | |
| 466 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 467 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ]; | |
| 468 } | |
| 469 | |
| 470 =head2 resSecStrSum | |
| 471 | |
| 472 Title : resSecStrSum | |
| 473 Usage : gives one letter summary of secondary structure of | |
| 474 specified residue. More general than secStruc() | |
| 475 Function : | |
| 476 Example : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID ); | |
| 477 Returns : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil) | |
| 478 Args : residue identifier(s) ( SEE INTRO NOTE ) | |
| 479 | |
| 480 | |
| 481 =cut | |
| 482 | |
| 483 sub resSecStrSum { | |
| 484 my $self = shift; | |
| 485 my @args = @_; | |
| 486 my $ss_char = $self->resSecStr( @args ); | |
| 487 | |
| 488 if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) { | |
| 489 return 'H'; | |
| 490 } | |
| 491 if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) { | |
| 492 return 'B'; | |
| 493 } | |
| 494 if ( $ss_char eq 'T' ) { | |
| 495 return 'T'; | |
| 496 } | |
| 497 else { | |
| 498 return 'C'; | |
| 499 } | |
| 500 } | |
| 501 | |
| 502 # STRIDE SPECIFIC | |
| 503 | |
| 504 =head2 resSecStrName | |
| 505 | |
| 506 Title : resSecStrName | |
| 507 Usage : gives full name of the secondary structural element | |
| 508 classification of the specified residue | |
| 509 Function : | |
| 510 Example : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID ); | |
| 511 Returns : scalar string | |
| 512 Args : RESIDUE_ID | |
| 513 | |
| 514 | |
| 515 =cut | |
| 516 | |
| 517 sub resSecStrName { | |
| 518 my $self = shift; | |
| 519 my @args = @_; | |
| 520 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 521 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ]; | |
| 522 } | |
| 523 | |
| 524 =head2 strideLocs | |
| 525 | |
| 526 Title : strideLocs | |
| 527 Usage : returns stride determined contiguous secondary | |
| 528 structural elements as specified on the LOC lines | |
| 529 Function : | |
| 530 Example : $loc_pnt = $stride_obj->strideLocs(); | |
| 531 Returns : pointer to array of 5 element arrays. | |
| 532 0 => stride name of structural element | |
| 533 1 => first residue pdb key (including insertion code, if app.) | |
| 534 2 => first residue chain id | |
| 535 3 => last residue pdb key (including insertion code, if app.) | |
| 536 4 => last residue chain id | |
| 537 NOTE the differences between this range and the range of SecBounds() | |
| 538 Args : none | |
| 539 | |
| 540 | |
| 541 =cut | |
| 542 | |
| 543 sub strideLocs { | |
| 544 my $self = shift; | |
| 545 return $self->{ 'LOC' }; | |
| 546 } | |
| 547 | |
| 548 # VALUE ADDED METHODS (NOT JUST PARSE/REPORT) | |
| 549 | |
| 550 =head2 secBounds | |
| 551 | |
| 552 Title : secBounds | |
| 553 Usage : gets residue ids of boundary residues in each | |
| 554 contiguous secondary structural element of specified | |
| 555 chain | |
| 556 Function : | |
| 557 Example : $ss_bound_pnt = $stride_obj->secBounds( 'A' ); | |
| 558 Returns : pointer to array of 3 element arrays. First two elements | |
| 559 are the PDB IDs of the start and end points, respectively | |
| 560 and inclusively. The last element is the STRIDE secondary | |
| 561 structural element code (same range as resSecStr). | |
| 562 Args : chain identifier ( one character ). If none, '-' is assumed | |
| 563 | |
| 564 | |
| 565 =cut | |
| 566 | |
| 567 sub secBounds { | |
| 568 # Requires a chain name. If left blank, we assume ' ' which equals '-' | |
| 569 my $self = shift; | |
| 570 my $chain = shift; | |
| 571 my @SecBounds; | |
| 572 | |
| 573 $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' ); | |
| 574 | |
| 575 # if we've memoized this one, use that | |
| 576 if ( $self->{ 'SecBounds' }->{ $chain } ) { | |
| 577 return $self->{ 'SecBounds' }->{ $chain }; | |
| 578 } | |
| 579 | |
| 580 #check to make sure chain is valid | |
| 581 if ( !( $self->{ 'ASG' }->{ $chain } ) ) { | |
| 582 $self->throw( "No such chain: $chain\n" ); | |
| 583 } | |
| 584 | |
| 585 my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]-> | |
| 586 [ $ASGTable{ 'ssAbbr' } ]; | |
| 587 my $beg = 1; | |
| 588 my $i; | |
| 589 | |
| 590 for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { | |
| 591 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] | |
| 592 ne $cur_element ) { | |
| 593 push( @SecBounds, [ $beg, $i -1 , $cur_element ] ); | |
| 594 $beg = $i; | |
| 595 $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]-> | |
| 596 [ $ASGTable{ 'ssAbbr' } ]; | |
| 597 } | |
| 598 } | |
| 599 | |
| 600 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] | |
| 601 eq $cur_element ) { | |
| 602 push( @SecBounds, [ $beg, $i, $cur_element ] ); | |
| 603 } | |
| 604 else { | |
| 605 push( @SecBounds, [ $beg, $i - 1, $cur_element ], | |
| 606 [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]-> | |
| 607 [ $ASGTable{ 'ssAbbr' } ] ] ); | |
| 608 } | |
| 609 | |
| 610 $self->{ 'SecBounds' }->{ $chain } = \@SecBounds; | |
| 611 return $self->{ 'SecBounds' }->{ $chain }; | |
| 612 } | |
| 613 | |
| 614 =head2 chains | |
| 615 | |
| 616 Title : chains | |
| 617 Usage : gives array chain I.D.s (characters) | |
| 618 Function : | |
| 619 Example : @chains = $stride_obj->chains(); | |
| 620 Returns : array of characters | |
| 621 Args : none | |
| 622 | |
| 623 | |
| 624 =cut | |
| 625 | |
| 626 sub chains { | |
| 627 my $self = shift; | |
| 628 my @chains = keys ( %{ $self->{ 'ASG' } } ); | |
| 629 return \@chains; | |
| 630 } | |
| 631 | |
| 632 =head2 getSeq | |
| 633 | |
| 634 Title : getSeq | |
| 635 Usage : returns a Bio::PrimarySeq object which represents an | |
| 636 approximation at the sequence of the specified chain. | |
| 637 Function : For most chain of most entries, the sequence returned by | |
| 638 this method will be very good. However, it it inherently | |
| 639 unsafe to rely on STRIDE to extract sequence information about | |
| 640 a PDB entry. More reliable information can be obtained from | |
| 641 the PDB entry itself. If a second option is given | |
| 642 (and evaluates to true), the sequence generated will | |
| 643 have 'X' in spaces where the pdb residue numbers are | |
| 644 discontinuous. In some cases this results in a | |
| 645 better sequence object (when the discontinuity is | |
| 646 due to regions which were present, but could not be | |
| 647 resolved). In other cases, it will result in a WORSE | |
| 648 sequence object (when the discontinuity is due to | |
| 649 historical sequence numbering and all sequence is | |
| 650 actually resolved). | |
| 651 Example : $pso = $dssp_obj->getSeq( 'A' ); | |
| 652 Returns : (pointer to) a PrimarySeq object | |
| 653 Args : Chain identifier. If none given, '-' is assumed. | |
| 654 | |
| 655 | |
| 656 =cut | |
| 657 | |
| 658 sub getSeq { | |
| 659 my $self = shift; | |
| 660 my $chain = shift; | |
| 661 my $fill_in = shift; | |
| 662 | |
| 663 if ( !( $chain ) ) { | |
| 664 $chain = '-'; | |
| 665 } | |
| 666 | |
| 667 if ( $self->{ 'Seq' }->{ $chain } ) { | |
| 668 return $self->{ 'Seq' }->{ $chain }; | |
| 669 } | |
| 670 | |
| 671 my ( $seq, | |
| 672 $num_res, | |
| 673 $last_res_num, | |
| 674 $cur_res_num, | |
| 675 $i, | |
| 676 $step, | |
| 677 $id | |
| 678 ); | |
| 679 | |
| 680 $seq = ""; | |
| 681 $num_res = $self->numResidues( $chain ); | |
| 682 $last_res_num = $self->_pdbNum( 1, $chain ); | |
| 683 for ( $i = 1; $i <= $num_res; $i++ ) { | |
| 684 if ( $fill_in ) { | |
| 685 $cur_res_num = $self->_pdbNum( $i, $chain ); | |
| 686 $step = $cur_res_num - $last_res_num; | |
| 687 if ( $step > 1 ) { | |
| 688 $seq .= 'X' x ( $step - 1 ); | |
| 689 } | |
| 690 } | |
| 691 $seq .= $self->_resAA( $i, $chain ); | |
| 692 $last_res_num = $cur_res_num; | |
| 693 } | |
| 694 | |
| 695 $id = $self->pdbID(); | |
| 696 $id .= "$chain"; | |
| 697 | |
| 698 $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq, | |
| 699 -id => $id, | |
| 700 -moltype => 'protein' | |
| 701 ); | |
| 702 | |
| 703 return $self->{ 'Seq' }->{ $chain }; | |
| 704 } | |
| 705 | |
| 706 =head1 INTERNAL METHODS | |
| 707 | |
| 708 =head2 _pdbNum | |
| 709 | |
| 710 Title : _pdbNum | |
| 711 Usage : fetches the numeric portion of the identifier for a given | |
| 712 residue as reported by the pdb entry. Note, this DOES NOT | |
| 713 uniquely specify a residue. There may be an insertion code | |
| 714 and/or chain identifier differences. | |
| 715 Function : | |
| 716 Example : $pdbNum = $self->pdbNum( 3, 'A' ); | |
| 717 Returns : a scalar | |
| 718 Args : valid ordinal num / chain combination | |
| 719 | |
| 720 | |
| 721 =cut | |
| 722 | |
| 723 sub _pdbNum { | |
| 724 my $self = shift; | |
| 725 my $ord = shift; | |
| 726 my $chain = shift; | |
| 727 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { | |
| 728 $self->throw( "No such ordinal $ord in chain $chain.\n" ); | |
| 729 } | |
| 730 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; | |
| 731 my $num_part; | |
| 732 ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ ); | |
| 733 return $num_part; | |
| 734 } | |
| 735 | |
| 736 =head2 _resAA | |
| 737 | |
| 738 Title : _resAA | |
| 739 Usage : returns 1 letter abbr. of the amino acid specified by | |
| 740 the arguments | |
| 741 Function : | |
| 742 Examples : $aa = $stride_obj->_resAA( 3, '-' ); | |
| 743 Returns : scalar character | |
| 744 Args : ( ord. num, chain ) | |
| 745 | |
| 746 | |
| 747 =cut | |
| 748 | |
| 749 sub _resAA { | |
| 750 my $self = shift; | |
| 751 my $ord = shift; | |
| 752 my $chain = shift; | |
| 753 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { | |
| 754 $self->throw( "No such ordinal $ord in chain $chain.\n" ); | |
| 755 } | |
| 756 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); | |
| 757 } | |
| 758 | |
| 759 =head2 _pdbInsCo | |
| 760 | |
| 761 Title : _pdbInsCo | |
| 762 Usage : fetches the Insertion code for this residue. | |
| 763 Function : | |
| 764 Example : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' ); | |
| 765 Returns : a scalar | |
| 766 Args : ordinal number and chain | |
| 767 | |
| 768 | |
| 769 =cut | |
| 770 | |
| 771 sub _pdbInsCo { | |
| 772 my $self = shift; | |
| 773 my $ord = shift; | |
| 774 my $chain = shift; | |
| 775 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { | |
| 776 $self->throw( "No such ordinal $ord in chain $chain.\n" ); | |
| 777 } | |
| 778 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; | |
| 779 my $letter_part; | |
| 780 ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any | |
| 781 # non-word character(s) | |
| 782 return $letter_part; | |
| 783 } | |
| 784 | |
| 785 =head2 _toOrdChain | |
| 786 | |
| 787 Title : _toOrdChain | |
| 788 Usage : takes any set of residue identifying parameters and | |
| 789 wrestles them into a two element array: the chain and the ordinal | |
| 790 number of this residue. This two element array can then be | |
| 791 efficiently used as keys in many of the above accessor methods | |
| 792 ('#A:B') or ( #, 'A', 'B' ) | |
| 793 || | | |
| 794 || - Chain ID (blank for single chain) | |
| 795 |--- Insertion code for this residue. Blank for most residues. | |
| 796 |--- Numeric portion of residue ID. | |
| 797 | |
| 798 (#) | |
| 799 | | |
| 800 --- Numeric portion of residue ID. If there is only one chain and | |
| 801 it has no ID AND there is no residue with an insertion code at this | |
| 802 number, then this can uniquely specify a residue. | |
| 803 | |
| 804 # ('#:C) or ( #, 'C' ) | |
| 805 | | | |
| 806 | -Chain ID | |
| 807 ---Numeric portion of residue ID. | |
| 808 | |
| 809 If a residue is incompletely specified then the first residue that | |
| 810 fits the arguments is returned. For example, if 19 is the argument | |
| 811 and there are three chains, A, B, and C with a residue whose number | |
| 812 is 19, then 19:A will be returned (assuming its listed first). | |
| 813 | |
| 814 Function : | |
| 815 Example : my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
| 816 Returns : two element array | |
| 817 Args : valid set of residue identifier(s) ( SEE NOTE ABOVE ) | |
| 818 | |
| 819 | |
| 820 =cut | |
| 821 | |
| 822 sub _toOrdChain { | |
| 823 my $self = shift; | |
| 824 my $arg_str; | |
| 825 | |
| 826 my ( $key_num, $chain_id, $ins_code, $key, $i ); | |
| 827 | |
| 828 # check to see how many args are given | |
| 829 if ( $#_ >= 1 ) { # multiple args | |
| 830 $key_num = shift; | |
| 831 if ( $#_ >= 1 ) { # still multiple args => ins. code, too | |
| 832 $ins_code = shift; | |
| 833 $chain_id = shift; | |
| 834 } | |
| 835 else { # just one more arg. => chain_id | |
| 836 $chain_id = shift; | |
| 837 } | |
| 838 } | |
| 839 else { # only single arg. Might be number or string | |
| 840 $arg_str = shift; | |
| 841 if ( $arg_str =~ /:/ ) { | |
| 842 # a chain is specified | |
| 843 ( $chain_id ) = ( $arg_str =~ /:(.)/); | |
| 844 $arg_str =~ s/:.//; | |
| 845 } | |
| 846 if ( $arg_str =~ /[A-Z]|[a-z]/ ) { | |
| 847 # an insertion code is specified | |
| 848 ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ ); | |
| 849 $arg_str =~ s/[A-Z]|[a-z]//g; | |
| 850 } | |
| 851 #now, get the number bit-> everything still around | |
| 852 $key_num = $arg_str; | |
| 853 } | |
| 854 | |
| 855 $key = "$key_num$ins_code"; | |
| 856 if ( !( $chain_id ) || $chain_id eq ' ' ) { | |
| 857 $chain_id = '-'; | |
| 858 } | |
| 859 | |
| 860 if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) { | |
| 861 $self->throw( "No such chain: $chain_id" ); | |
| 862 } | |
| 863 | |
| 864 for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) { | |
| 865 if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq | |
| 866 $key ) { | |
| 867 return ( $i, $chain_id ); | |
| 868 } | |
| 869 } | |
| 870 | |
| 871 $self->throw( "No such key: $key" ); | |
| 872 | |
| 873 } | |
| 874 | |
| 875 =head2 _parse | |
| 876 | |
| 877 Title : _parse | |
| 878 Usage : as name suggests, parses stride output, creating object | |
| 879 Function : | |
| 880 Example : $self->_parse( $io ); | |
| 881 Returns : | |
| 882 Args : valid Bio::Root::IO object | |
| 883 | |
| 884 | |
| 885 =cut | |
| 886 | |
| 887 sub _parse { | |
| 888 my $self = shift; | |
| 889 my $io = shift; | |
| 890 my $file = $io->_fh(); | |
| 891 | |
| 892 # Parse top lines | |
| 893 if ( $self->_parseTop( $io ) ) { | |
| 894 $self->throw( "Not stride output" ); | |
| 895 } | |
| 896 | |
| 897 # Parse the HDR, CMP, SCR, and AUT lines | |
| 898 $self->_parseHead( $io ); | |
| 899 | |
| 900 # Parse the CHN, SEQ, STR, and LOC lines | |
| 901 $self->_parseSummary( $io ); # we're ignoring this | |
| 902 | |
| 903 # Parse the ASG lines | |
| 904 $self->_parseASG( $io ); | |
| 905 } | |
| 906 | |
| 907 =head2 _parseTop | |
| 908 | |
| 909 Title : _parseTop | |
| 910 Usage : makes sure this looks like stride output | |
| 911 Function : | |
| 912 Example : | |
| 913 Returns : | |
| 914 Args : | |
| 915 | |
| 916 | |
| 917 =cut | |
| 918 | |
| 919 sub _parseTop { | |
| 920 my $self = shift; | |
| 921 my $io = shift; | |
| 922 my $file = $io->_fh(); | |
| 923 my $cur = <$file>; | |
| 924 if ( $cur =~ /^REM ---/ ) { | |
| 925 return 0; | |
| 926 } | |
| 927 return 1; | |
| 928 } | |
| 929 | |
| 930 =head2 _parseHead | |
| 931 | |
| 932 Title : _parseHead | |
| 933 Usage : parses | |
| 934 Function : HDR, CMP, SRC, and AUT lines | |
| 935 Example : | |
| 936 Returns : | |
| 937 Args : | |
| 938 | |
| 939 | |
| 940 =cut | |
| 941 | |
| 942 sub _parseHead { | |
| 943 my $self = shift; | |
| 944 my $io = shift; | |
| 945 my $file = $io->_fh(); | |
| 946 my $cur; | |
| 947 my $element; | |
| 948 my ( @elements, @cmp, @src, @aut ); | |
| 949 my %head = {}; | |
| 950 my $still_head = 1; | |
| 951 | |
| 952 $cur = <$file>; | |
| 953 while ( $cur =~ /^REM / ) { | |
| 954 $cur = <$file>; | |
| 955 } | |
| 956 | |
| 957 if ( $cur =~ /^HDR / ) { | |
| 958 @elements = split( /\s+/, $cur ); | |
| 959 shift( @elements ); | |
| 960 pop( @elements ); | |
| 961 $self->{ 'PDB' } = pop( @elements ); | |
| 962 $self->{ 'DATE' } = pop( @elements ); | |
| 963 # now, everything else is "header" except for the word | |
| 964 # HDR | |
| 965 $element = join( ' ', @elements ); | |
| 966 $head{ 'HEADER' } = $element; | |
| 967 } | |
| 968 | |
| 969 $cur = <$file>; | |
| 970 while ( $cur =~ /^CMP / ) { | |
| 971 ( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ ); | |
| 972 push( @cmp, $cur ); | |
| 973 $cur = <$file>; | |
| 974 } | |
| 975 | |
| 976 while ( $cur =~ /^SRC / ) { | |
| 977 ( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ ); | |
| 978 push( @src, $cur ); | |
| 979 $cur = <$file>; | |
| 980 } | |
| 981 | |
| 982 while ( $cur =~ /^AUT / ) { | |
| 983 ( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ ); | |
| 984 push( @aut, $cur ); | |
| 985 $cur = <$file>; | |
| 986 } | |
| 987 | |
| 988 $head{ 'CMP' } = \@cmp; | |
| 989 $head{ 'SRC' } = \@src; | |
| 990 $head{ 'AUT' } = \@aut; | |
| 991 $self->{ 'HEAD' } = \%head; | |
| 992 } | |
| 993 | |
| 994 =head2 _parseSummary | |
| 995 | |
| 996 Title : _parseSummary | |
| 997 Usage : parses LOC lines | |
| 998 Function : | |
| 999 Example : | |
| 1000 Returns : | |
| 1001 Args : | |
| 1002 | |
| 1003 | |
| 1004 =cut | |
| 1005 | |
| 1006 sub _parseSummary { | |
| 1007 my $self = shift; | |
| 1008 my $io = shift; | |
| 1009 my $file = $io->_fh(); | |
| 1010 my $cur = <$file>; | |
| 1011 my $bound_set; | |
| 1012 my $element; | |
| 1013 my ( @elements, @cur ); | |
| 1014 my @LOC_lookup = ( [ 5, 12 ], # Element name | |
| 1015 # reduntdant [ 18, 3 ], # First residue name | |
| 1016 [ 22, 5 ], # First residue PDB number | |
| 1017 [ 28, 1 ], # First residue Chain ID | |
| 1018 # redundant [ 35, 3 ], # Last residue name | |
| 1019 [ 40, 5 ], # Last residue PDB number | |
| 1020 [ 46, 1 ] ); # Last residue Chain ID | |
| 1021 | |
| 1022 #ignore these lines | |
| 1023 while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) { | |
| 1024 $cur = <$file>; | |
| 1025 } | |
| 1026 | |
| 1027 while ( $cur =~ /^LOC / ) { | |
| 1028 foreach $bound_set ( @LOC_lookup ) { | |
| 1029 $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] ); | |
| 1030 $element =~ s/\s//g; | |
| 1031 push( @cur, $element ); | |
| 1032 } | |
| 1033 push( @elements, [ @cur ] ); | |
| 1034 $cur = <$file>; | |
| 1035 @cur = (); | |
| 1036 } | |
| 1037 $self->{ 'LOC' } = \@elements; | |
| 1038 | |
| 1039 } | |
| 1040 | |
| 1041 =head2 _parseASG | |
| 1042 | |
| 1043 Title : _parseASG | |
| 1044 Usage : parses ASG lines | |
| 1045 Function : | |
| 1046 Example : | |
| 1047 Returns : | |
| 1048 Args : | |
| 1049 | |
| 1050 | |
| 1051 =cut | |
| 1052 | |
| 1053 sub _parseASG { | |
| 1054 my $self = shift; | |
| 1055 my $io = shift; | |
| 1056 my $file = $io->_fh(); | |
| 1057 my $cur = <$file>; | |
| 1058 my $bound_set; | |
| 1059 my $ord_num; | |
| 1060 my ( $chain, $last_chain ); | |
| 1061 my $element; | |
| 1062 my %ASG; | |
| 1063 my ( @cur, @elements ); | |
| 1064 my @ASG_lookup = ( [ 5, 3 ], # Residue name | |
| 1065 # [ 9, 1 ], # Chain ID | |
| 1066 [ 10, 5 ], # PDB residue number (w/ins.code) | |
| 1067 # [ 16, 4 ], # ordinal stride number | |
| 1068 [ 24, 1 ], # one letter sec. stru. abbr. | |
| 1069 [ 26, 13], # full sec. stru. name | |
| 1070 [ 42, 7 ], # phi angle | |
| 1071 [ 52, 7 ], # psi angle | |
| 1072 [ 64, 5 ] );# residue solv. acc. | |
| 1073 | |
| 1074 while ( $cur =~ /^REM / ) { | |
| 1075 $cur = <$file>; | |
| 1076 } | |
| 1077 | |
| 1078 while ( $cur =~ /^ASG / ) { | |
| 1079 # get ordinal number for array key | |
| 1080 $ord_num = substr( $cur, 16, 4 ); | |
| 1081 $ord_num =~ s/\s//g; | |
| 1082 | |
| 1083 # get the chain id | |
| 1084 $chain = substr( $cur, 9, 1 ); | |
| 1085 | |
| 1086 if ( $last_chain && ( $chain ne $last_chain ) ) { | |
| 1087 $ASG{ $last_chain } = [ @elements ]; | |
| 1088 @elements = (); | |
| 1089 } | |
| 1090 | |
| 1091 # now get the rest of the info on this line | |
| 1092 foreach $bound_set ( @ASG_lookup ) { | |
| 1093 $element = substr( $cur, $bound_set->[ 0 ], | |
| 1094 $bound_set->[ 1 ] ); | |
| 1095 $element =~ s/\s//g; | |
| 1096 push( @cur, $element ); | |
| 1097 } | |
| 1098 $elements[ $ord_num ] = [ @cur ]; | |
| 1099 $cur = <$file>; | |
| 1100 @cur = (); | |
| 1101 $last_chain = $chain; | |
| 1102 } | |
| 1103 | |
| 1104 $ASG{ $chain } = [ @elements ]; | |
| 1105 | |
| 1106 $self->{ 'ASG' } = \%ASG; | |
| 1107 } | |
| 1108 | |
| 1109 1; | |
| 1110 | |
| 1111 | |
| 1112 |
