comparison variant_effect_predictor/Bio/Structure/SecStr/DSSP/Res.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $id $
2 #
3 # bioperl module for Bio::Structure::SecStr::DSSP::Res.pm
4 #
5 # Cared for by Ed Green <ed@compbio.berkeley.edu>
6 #
7 # Copyright Univ. of California
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Structure::SecStr::DSSP::Res - Module for parsing/accessing dssp output
16
17 =head1 SYNOPSIS
18
19 my $dssp_obj = new Bio::Structure::SecStr::DSSP::Res( '-file' => 'filename.dssp' );
20
21 # or
22
23 my $dssp-obj = new Bio::Structure::SecStr::DSSP::Res( '-fh' => \*STDOUT );
24
25 # get DSSP defined Secondary Structure for residue 20
26 $sec_str = $dssp_obj->resSecStr( 20 );
27
28 # get dssp defined sec. structure summary for PDB residue # 10 of chain A
29
30 $sec_str = $dssp_obj->resSecStrSum( '10:A' );
31
32 =head1 DESCRIPTION
33
34 DSSP::Res is a module for objectifying DSSP output. Methods are then
35 available for extracting all the information within the output file
36 and convenient subsets of it.
37 The principal purpose of DSSP is to determine secondary structural
38 elements of a given structure.
39
40 ( Dictionary of protein secondary structure: pattern recognition
41 of hydrogen-bonded and geometrical features.
42 Biopolymers. 1983 Dec;22(12):2577-637. )
43
44 The DSSP program is available from:
45 http://www.cmbi.kun.nl/swift/dssp
46
47 This information is available on a per residue basis ( see resSecStr
48 and resSecStrSum methods ) or on a per chain basis ( see secBounds
49 method ).
50
51 resSecStr() & secBounds() return one of the following:
52 'H' = alpha helix
53 'B' = residue in isolated beta-bridge
54 'E' = extended strand, participates in beta ladder
55 'G' = 3-helix (3/10 helix)
56 'I' = 5 helix (pi helix)
57 'T' = hydrogen bonded turn
58 'S' = bend
59 '' = no assignment
60
61 A more general classification is returned using the resSecStrSum()
62 method. The purpose of this is to have a method for DSSP and STRIDE
63 derived output whose range is the same.
64 Its output is one of the following:
65
66 'H' = helix ( => 'H', 'G', or 'I' from above )
67 'B' = beta ( => 'B' or 'E' from above )
68 'T' = turn ( => 'T' or 'S' from above )
69 ' ' = no assignment ( => ' ' from above )
70
71 The methods are roughly divided into 3 sections:
72 1. Global features of this structure (PDB ID, total surface area,
73 etc.). These methods do not require an argument.
74 2. Residue specific features ( amino acid, secondary structure,
75 solvent exposed surface area, etc. ). These methods do require an
76 arguement. The argument is supposed to uniquely identify a
77 residue described within the structure. It can be of any of the
78 following forms:
79 ('#A:B') or ( #, 'A', 'B' )
80 || |
81 || - Chain ID (blank for single chain)
82 |--- Insertion code for this residue. Blank for most residues.
83 |--- Numeric portion of residue ID.
84
85 (#)
86 |
87 --- Numeric portion of residue ID. If there is only one chain and
88 it has no ID AND there is no residue with an insertion code at this
89 number, then this can uniquely specify a residue.
90
91 ('#:C') or ( #, 'C' )
92 | |
93 | -Chain ID
94 ---Numeric portion of residue ID.
95
96 If a residue is incompletely specified then the first residue that
97 fits the arguments is returned. For example, if 19 is the argument
98 and there are three chains, A, B, and C with a residue whose number
99 is 19, then 19:A will be returned (assuming its listed first).
100
101 Since neither DSSP nor STRIDE correctly handle alt-loc codes, they
102 are not supported by these modules.
103
104 3. Value-added methods. Return values are not verbatem strings
105 parsed from DSSP or STRIDE output.
106
107
108 =head1 FEEDBACK
109
110 =head2 Mailing Lists
111
112 User feedback is an integral part of the evolution of this and other
113 Bioperl modules. Send your comments and suggestions preferably to one
114 of the Bioperl mailing lists. Your participation is much appreciated.
115
116 bioperl-l@bioperl.org - General discussion
117 http://bio.perl.org/MailList.html - About the mailing lists
118
119 =head2 Reporting Bugs
120
121 Report bugs to the Bioperl bug tracking system to help us keep track
122 the bugs and their resolution. Bug reports can be submitted via email
123 or the web:
124
125 bioperl-bugs@bio.perl.org
126 http://bugzilla.bioperl.org/
127
128 =head1 AUTHOR - Ed Green
129
130 Email ed@compbio.berkeley.edu
131
132
133 =head1 APPENDIX
134
135 The rest of the documentation details each method.
136 Internal methods are preceded with a _
137
138 =cut
139
140 package Bio::Structure::SecStr::DSSP::Res;
141 use strict;
142 use vars qw(@ISA);
143 use Bio::Root::Root;
144 use Bio::Root::IO;
145 use Bio::PrimarySeq;
146
147 @ISA = qw(Bio::Root::Root);
148
149 # Would be a class variable if Perl had them
150
151 #attribute begin col # columns
152 our %lookUp = ( 'pdb_resnum' => [ 5, 5 ],
153 'insertionco' => [ 10, 1 ],
154 'pdb_chain' => [ 11, 1 ],
155
156 'amino_acid' => [ 13, 1 ],
157 'term_sig' => [ 14, 1 ],
158
159 'ss_summary' => [ 16, 1 ],
160 '3tph' => [ 18, 1 ],
161 '4tph' => [ 19, 1 ],
162 '5tph' => [ 20, 1 ],
163 'geo_bend' => [ 21, 1 ],
164 'chirality' => [ 22, 1 ],
165 'beta_br1la' => [ 23, 1 ],
166 'beta_br2la' => [ 24, 1 ],
167
168 'bb_part1nu' => [ 25, 4 ],
169 'bb_part2nu' => [ 29, 4 ],
170 'betash_lab' => [ 33, 1 ],
171
172 'solv_acces' => [ 34, 4 ],
173
174 'hb1_nh_o_p' => [ 39, 6 ],
175 'hb1_nh_o_e' => [ 46, 4 ],
176
177 'hb1_o_hn_p' => [ 50, 6 ],
178 'hb1_o_hn_e' => [ 57, 4 ],
179
180 'hb2_nh_o_p' => [ 61, 6 ],
181 'hb2_nh_o_e' => [ 68, 4 ],
182
183 'hb2_o_hn_p' => [ 72, 6 ],
184 'hb2_o_hn_e' => [ 79, 4 ],
185
186 'tco' => [ 85, 6 ],
187
188 'kappa' => [ 91, 6 ],
189
190 'alpha' => [ 97, 6 ],
191
192 'phi' => [ 103, 6 ],
193
194 'psi' => [ 109, 6 ],
195
196 'x_ca' => [ 115, 7 ],
197
198 'y_ca' => [ 122, 7 ],
199
200 'z_ca' => [ 129, 7 ] );
201
202
203 =head1 CONSTRUCTOR
204
205
206 =cut
207
208
209 =head2 new
210
211 Title : new
212 Usage : makes new object of this class
213 Function : Constructor
214 Example : $dssp_obj = Bio::DSSP:Res->new( filename or FILEHANDLE )
215 Returns : object (ref)
216 Args : filename ( must be proper DSSP output file )
217
218 =cut
219
220 sub new {
221 my ( $class, @args ) = @_;
222 my $self = $class->SUPER::new( @args );
223 my $io = Bio::Root::IO->new( @args );
224 $self->_parse( $io->_fh() );
225 $io->close();
226 return $self;
227 }
228
229 =head1 ACCESSORS
230
231
232 =cut
233
234 # GLOBAL FEATURES / INFO / STATS
235
236 =head2 totSurfArea
237
238 Title : totSurfArea
239 Usage : returns total accessible surface area in square Ang.
240 Function :
241 Example : $surArea = $dssp_obj->totSurfArea();
242 Returns : scalar
243 Args : none
244
245 =cut
246
247 sub totSurfArea {
248 my $self = shift;
249 return $self->{ 'Head' }->{ 'ProAccSurf' };
250 }
251
252 =head2 numResidues
253
254 Title : numResidues
255 Usage : returns the total number of residues in all chains or
256 just the specified chain if a chain is specified
257 Function :
258 Example : $num_res = $dssp_obj->numResidues();
259 Returns : scalar int
260 Args : none
261
262
263 =cut
264
265 sub numResidues {
266 my $self = shift;
267 my $chain = shift;
268 if ( !( $chain ) ) {
269 return $self->{'Head'}->{'TotNumRes'};
270 }
271 else {
272 my ( $num_res,
273 $cont_seg );
274 my $cont_seg_pnt = $self->_contSegs();
275 foreach $cont_seg ( @{ $cont_seg_pnt } ) {
276 if ( $chain eq $cont_seg->[ 2 ] ) {
277 # this segment is part of the chain we want
278 $num_res += ( $self->_toDsspKey( $cont_seg->[ 1 ] )
279 - $self->_toDsspKey( $cont_seg->[ 0 ] )
280 + 1 ); # this works because we know the
281 # the region between the start
282 # and end of a dssp key is
283 # continuous
284 }
285 }
286 return $num_res;
287 }
288 }
289
290 # STRAIGHT FROM PDB ENTRY
291
292 =head2 pdbID
293
294 Title : pdbID
295 Usage : returns pdb identifier ( 1FJM, e.g.)
296 Function :
297 Example : $pdb_id = $dssp_obj->pdbID();
298 Returns : scalar string
299 Args : none
300
301
302 =cut
303
304 sub pdbID {
305 my $self = shift;
306 return $self->{'Head'}->{'PDB'};
307 }
308
309 =head2 pdbAuthor
310
311 Title : pdbAuthor
312 Usage : returns author field
313 Function :
314 Example : $auth = $dssp_obj->pdbAuthor()
315 Returns : scalar string
316 Args : none
317
318
319 =cut
320
321 sub pdbAuthor {
322 my $self = shift;
323 return $self->{'Head'}->{'AUTHOR'};
324 }
325
326 =head2 pdbCompound
327
328 Title : pdbCompound
329 Usage : returns pdbCompound given in PDB file
330 Function :
331 Example : $cmpd = $dssp_obj->pdbCompound();
332 Returns : scalar string
333 Args : none
334
335
336 =cut
337
338 sub pdbCompound {
339 my $self = shift;
340 return $self->{'Head'}->{'COMPND'};
341 }
342
343 =head2 pdbDate
344
345 Title : pdbDate
346 Usage : returns date given in PDB file
347 Function :
348 Example : $pdb_date = $dssp_obj->pdbDate();
349 Returns : scalar
350 Args : none
351
352
353 =cut
354
355 sub pdbDate {
356 my $self = shift;
357 return $self->{'Head'}->{'DATE'};
358 }
359
360 =head2 pdbHeader
361
362 Title : pdbHeader
363 Usage : returns header info from PDB file
364 Function :
365 Example : $header = $dssp_obj->pdbHeader();
366 Returns : scalar
367 Args : none
368
369
370 =cut
371
372 sub pdbHeader {
373 my $self = shift;
374 return $self->{'Head'}->{'HEADER'};
375 }
376
377 =head2 pdbSource
378
379 Title : pdbSource
380 Usage : returns pdbSource information from PDBSOURCE line
381 Function :
382 Example : $pdbSource = $dssp_obj->pdbSource();
383 Returns : scalar
384 Args : none
385
386
387 =cut
388
389 sub pdbSource {
390 my $self = shift;
391 return $self->{'Head'}->{'SOURCE'};
392 }
393
394
395 # RESIDUE SPECIFIC ACCESSORS
396
397 =head2 resAA
398
399 Title : resAA
400 Usage : fetches the 1 char amino acid code, given an id
401 Function :
402 Example : $aa = $dssp_obj->aminoAcid( '20:A' ); # pdb id as arg
403 Returns : 1 character scalar string
404 Args : RESIDUE_ID
405
406
407 =cut
408
409 sub resAA {
410 my $self = shift;
411 my @args = @_;
412 my $dssp_key = $self->_toDsspKey( @args );
413 return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' };
414 }
415
416 =head2 resPhi
417
418 Title : resPhi
419 Usage : returns phi angle of a single residue
420 Function : accessor
421 Example : $phi = $dssp_obj->resPhi( RESIDUE_ID )
422 Returns : scalar
423 Args : RESIDUE_ID
424
425
426 =cut
427
428 sub resPhi {
429 my $self = shift;
430 my @args = @_;
431 my $dssp_key = $self->_toDsspKey( @args );
432 return $self->{ 'Res' }->[ $dssp_key ]->{ 'phi' };
433 }
434
435 =head2 resPsi
436
437 Title : resPsi
438 Usage : returns psi angle of a single residue
439 Function : accessor
440 Example : $psi = $dssp_obj->resPsi( RESIDUE_ID )
441 Returns : scalar
442 Args : RESIDUE_ID
443
444
445 =cut
446
447 sub resPsi {
448 my $self = shift;
449 my @args = @_;
450 my $dssp_key = $self->_toDsspKey( @args );
451 return $self->{ 'Res' }->[ $dssp_key ]->{ 'psi' };
452 }
453
454 =head2 resSolvAcc
455
456 Title : resSolvAcc
457 Usage : returns solvent exposed area of this residue in
458 square Angstroms
459 Function :
460 Example : $solv_acc = $dssp_obj->resSolvAcc( RESIDUE_ID );
461 Returns : scalar
462 Args : RESIDUE_ID
463
464
465 =cut
466
467 sub resSolvAcc {
468 my $self = shift;
469 my @args = @_;
470 my $dssp_key = $self->_toDsspKey( @args );
471 return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' };
472 }
473
474 =head2 resSurfArea
475
476 Title : resSurfArea
477 Usage : returns solvent exposed area of this residue in
478 square Angstroms
479 Function :
480 Example : $solv_acc = $dssp_obj->resSurfArea( RESIDUE_ID );
481 Returns : scalar
482 Args : RESIDUE_ID
483
484
485 =cut
486
487 sub resSurfArea {
488 my $self = shift;
489 my @args = @_;
490 my $dssp_key = $self->_toDsspKey( @args );
491 return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' };
492 }
493
494 =head2 resSecStr
495
496 Title : resSecStr
497 Usage : $ss = $dssp_obj->resSecStr( RESIDUE_ID );
498 Function : returns the DSSP secondary structural designation of this residue
499 Example :
500 Returns : a character ( 'B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ' )
501 Args : RESIDUE_ID
502 NOTE : The range of this method differs from that of the
503 resSecStr method in the STRIDE SecStr parser. That is because of the
504 slightly different format for STRIDE and DSSP output. The resSecStrSum
505 method exists to map these different ranges onto an identical range.
506
507 =cut
508
509 sub resSecStr {
510 my $self = shift;
511 my @args = @_;
512 my $dssp_key = $self->_toDsspKey( @args );
513 my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' };
514 return $ss_char if $ss_char;
515 return ' ';
516 }
517
518
519 =head2 resSecStrSum
520
521 Title : resSecStrSum
522 Usage : $ss = $dssp_obj->resSecStrSum( $id );
523 Function : returns what secondary structure group this residue belongs
524 to. One of: 'H': helix ( H, G, or I )
525 'B': beta ( B or E )
526 'T': turn ( T or S )
527 ' ': none ( ' ' )
528 This method is similar to resSecStr, but the information
529 it returns is less specific.
530 Example :
531 Returns : a character ( 'H', 'B', 'T', or ' ' )
532 Args : dssp residue number of pdb residue identifier
533
534
535 =cut
536
537 sub resSecStrSum {
538 my $self = shift;
539 my @args = @_;
540 my $dssp_key = $self->_toDsspKey( @args );
541 my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' };
542 if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) {
543 return 'H';
544 }
545 if ( $ss_char eq ' ' || !( $ss_char ) ) {
546 return ' ';
547 }
548 if ( $ss_char eq 'B' || $ss_char eq 'E' ) {
549 return 'B';
550 }
551 else {
552 return 'T';
553 }
554 }
555
556 # DSSP SPECIFIC
557
558 =head2 hBonds
559
560 Title : hBonds
561 Usage : returns number of 14 different types of H Bonds
562 Function :
563 Example : $hb = $dssp_obj->hBonds
564 Returns : pointer to 14 element array of ints
565 Args : none
566 NOTE : The different type of H-Bonds reported are, in order:
567 TYPE O(I)-->H-N(J)
568 IN PARALLEL BRIDGES
569 IN ANTIPARALLEL BRIDGES
570 TYPE O(I)-->H-N(I-5)
571 TYPE O(I)-->H-N(I-4)
572 TYPE O(I)-->H-N(I-3)
573 TYPE O(I)-->H-N(I-2)
574 TYPE O(I)-->H-N(I-1)
575 TYPE O(I)-->H-N(I+0)
576 TYPE O(I)-->H-N(I+1)
577 TYPE O(I)-->H-N(I+2)
578 TYPE O(I)-->H-N(I+3)
579 TYPE O(I)-->H-N(I+4)
580 TYPE O(I)-->H-N(I+5)
581
582 =cut
583
584 sub hBonds {
585 my $self = shift;
586 return $self->{ 'HBond'};
587 }
588
589 =head2 numSSBr
590
591 Title : numSSBr
592 Usage : returns info about number of SS-bridges
593 Function :
594 Example : @SS_br = $dssp_obj->numSSbr();
595 Returns : 3 element scalar int array
596 Args : none
597
598
599 =cut
600
601 sub numSSBr {
602 my $self = shift;
603 return ( $self->{'Head'}->{'TotSSBr'},
604 $self->{'Head'}->{'TotIaSSBr'},
605 $self->{'Head'}->{'TotIeSSBr'} );
606 }
607
608 =head2 resHB_O_HN
609
610 Title : resHB_O_HN
611 Usage : returns pointer to a 4 element array
612 consisting of: relative position of binding
613 partner #1, energy of that bond (kcal/mol),
614 relative positionof binding partner #2,
615 energy of that bond (kcal/mol). If the bond
616 is not bifurcated, the second bond is reported
617 as 0, 0.0
618 Function : accessor
619 Example : $oBonds_ptr = $dssp_obj->resHB_O_HN( RESIDUE_ID )
620 Returns : pointer to 4 element array
621 Args : RESIDUE_ID
622
623
624 =cut
625
626 sub resHB_O_HN {
627 my $self = shift;
628 my @args = @_;
629 my $dssp_key = $self->_toDsspKey( @args );
630 return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_p' },
631 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_e' },
632 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_p' },
633 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_e' } );
634 }
635
636
637 =head2 resHB_NH_O
638
639 Title : resHB_NH_O
640 Usage : returns pointer to a 4 element array
641 consisting of: relative position of binding
642 partner #1, energy of that bond (kcal/mol),
643 relative positionof binding partner #2,
644 energy of that bond (kcal/mol). If the bond
645 is not bifurcated, the second bond is reported
646 as 0, 0.0
647 Function : accessor
648 Example : $nhBonds_ptr = $dssp_obj->resHB_NH_O( RESIDUE_ID )
649 Returns : pointer to 4 element array
650 Args : RESIDUE_ID
651
652
653 =cut
654
655 sub resHB_NH_O {
656 my $self = shift;
657 my @args = @_;
658 my $dssp_key = $self->_toDsspKey( @args );
659 return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_p' },
660 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_e' },
661 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_p' },
662 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_e' } );
663 }
664
665
666 =head2 resTco
667
668 Title : resTco
669 Usage : returns tco angle around this residue
670 Function : accessor
671 Example : resTco = $dssp_obj->resTco( RESIDUE_ID )
672 Returns : scalar
673 Args : RESIDUE_ID
674
675
676 =cut
677
678 sub resTco {
679 my $self = shift;
680 my @args = @_;
681 my $dssp_key = $self->_toDsspKey( @args );
682 return $self->{ 'Res' }->[ $dssp_key ]->{ 'tco' };
683 }
684
685
686 =head2 resKappa
687
688 Title : resKappa
689 Usage : returns kappa angle around this residue
690 Function : accessor
691 Example : $kappa = $dssp_obj->resKappa( RESIDUE_ID )
692 Returns : scalar
693 Args : RESIDUE_ID ( dssp or PDB )
694
695
696 =cut
697
698 sub resKappa {
699 my $self = shift;
700 my @args = @_;
701 my $dssp_key = $self->_toDsspKey( @args );
702 return $self->{ 'Res' }->[ $dssp_key ]->{ 'kappa' };
703 }
704
705
706 =head2 resAlpha
707
708 Title : resAlpha
709 Usage : returns alpha angle around this residue
710 Function : accessor
711 Example : $alpha = $dssp_obj->resAlpha( RESIDUE_ID )
712 Returns : scalar
713 Args : RESIDUE_ID ( dssp or PDB )
714
715
716 =cut
717
718 sub resAlpha {
719 my $self = shift;
720 my @args = @_;
721 my $dssp_key = $self->_toDsspKey( @args );
722 return $self->{ 'Res' }->[ $dssp_key ]->{ 'alpha' };
723 }
724
725 # VALUE ADDED METHODS (NOT JUST PARSE/REPORT)
726
727 =head2 secBounds
728
729 Title : secBounds
730 Usage : gets residue ids of boundary residues in each
731 contiguous secondary structural element of specified
732 chain
733 Function : returns pointer to array of 3 element arrays. First
734 two elements are the PDB IDs of the start and end points,
735 respectively and inclusively. The last element is the
736 DSSP secondary structural assignment code,
737 i.e. one of : ('B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ')
738 Example : $ss_elements_pts = $dssp_obj->secBounds( 'A' );
739 Returns : pointer to array of arrays
740 Args : chain id ( 'A', for example ). No arg => no chain id
741
742
743 =cut
744
745 sub secBounds {
746 my $self = shift;
747 my $chain = shift;
748 my %sec_bounds;
749
750 $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' );
751
752 # if we've memoized this chain, use that
753 if ( $self->{ 'SecBounds' } ) {
754 # check to make sure chain is valid
755 if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) {
756 $self->throw( "No such chain: $chain\n" );
757 }
758 return $self->{ 'SecBounds' }->{ $chain };
759 }
760
761 my ( $cur_element, $i, $cur_chain, $beg, );
762
763 #initialize
764 $cur_element = $self->{ 'Res' }->[ 1 ]->{ 'ss_summary' };
765 $beg = 1;
766
767 for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) {
768 if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) {
769 # element is terminated by a chain discontinuity
770 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
771 [ $self->_toPdbId( $beg ),
772 $self->_toPdbId( $i - 1 ),
773 $cur_element ] );
774 $i++;
775 $beg = $i;
776 $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' };
777 }
778
779 elsif ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ne $cur_element ) {
780 # element is terminated by beginning of a new element
781 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
782 [ $self->_toPdbId( $beg ),
783 $self->_toPdbId( $i - 1 ),
784 $cur_element ] );
785 $beg = $i;
786 $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' };
787 }
788 }
789 #last residue
790 if ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } eq $cur_element ) {
791 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
792 [ $self->_toPdbId( $beg ),
793 $self->_toPdbId( $i ),
794 $cur_element ] );
795 }
796
797 else {
798 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
799 [ $self->_toPdbId( $beg ),
800 $self->_toPdbId( $i - 1 ),
801 $cur_element ] );
802 push( @{ $sec_bounds{ $self->_pdbChain( $i ) } },
803 [ $self->_toPdbId( $i ),
804 $self->_toPdbId( $i ),
805 $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ] );
806 }
807
808 $self->{ 'SecBounds' } = \%sec_bounds;
809
810 # check to make sure chain is valid
811 if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) {
812 $self->throw( "No such chain: $chain\n" );
813 }
814
815 return $self->{ 'SecBounds' }->{ $chain };
816 }
817
818
819
820 =head2 chains
821
822 Title : chains
823 Usage : returns pointer to array of chain I.D.s (characters)
824 Function :
825 Example : $chains_pnt = $dssp_obj->chains();
826 Returns : array of characters, one of which may be ' '
827 Args : none
828
829
830 =cut
831
832 sub chains {
833 my $self = shift;
834 my $cont_segs = $self->_contSegs();
835 my %chains;
836 my $seg;
837 foreach $seg ( @{ $cont_segs } ) {
838 $chains{ $seg->[ 2 ] } = 1;
839 }
840 my @chains = keys( %chains );
841 return \@chains;
842 }
843
844
845 =head2 getSeq
846
847 Title : getSeq
848 Usage : returns a Bio::PrimarySeq object which represents a good
849 guess at the sequence of the given chain
850 Function : For most chains of most entries, the sequence returned by
851 this method will be very good. However, it is inherently
852 unsafe to rely on DSSP to extract sequence information about
853 a PDB entry. More reliable information can be obtained from
854 the PDB entry itself.
855 Example : $pso = $dssp_obj->getSeq( 'A' );
856 Returns : (pointer to) a PrimarySeq object
857 Args : Chain identifier. If none given, ' ' is assumed. If no ' '
858 chain, the first chain is used.
859
860
861 =cut
862
863 sub getSeq {
864 my $self = shift;
865 my $chain = shift;
866
867 my ( $pot_chain,
868 $seq,
869 $frag_num,
870 $frag,
871 $curPdbNum,
872 $lastPdbNum,
873 $gap_len,
874 $i,
875 $id,
876 );
877 my @frags;
878
879 if ( !( $chain ) ) {
880 $chain = ' ';
881 }
882
883 if ( $self->{ 'Seq' }->{ $chain } ) {
884 return $self->{ 'Seq' }->{ $chain };
885 }
886
887 my $contSegs_pnt = $self->_contSegs();
888
889 # load up specified chain
890 foreach $pot_chain ( @{ $contSegs_pnt } ) {
891 if ( $pot_chain->[ 2 ] eq $chain ) {
892 push( @frags, $pot_chain );
893 }
894 }
895
896 # if that didn't work, just get the first one
897 if ( !( @frags ) ) {
898 $chain = $contSegs_pnt->[ 0 ]->[ 2 ];
899 foreach $pot_chain ( @{ $contSegs_pnt } ) {
900 if ( $pot_chain->[ 2 ] eq $chain ) {
901 push( @frags, $chain );
902 }
903 }
904 }
905
906 # now build the sequence string
907 $seq = "";
908 $frag_num = 0;
909 foreach $frag ( @frags ) {
910 $frag_num++;
911 if ( $frag_num > 1 ) { # we need to put in some gap seq
912 $curPdbNum = $self->_pdbNum( $frag->[ 0 ] );
913 $gap_len = $curPdbNum - $lastPdbNum - 1;
914 if ( $gap_len > 0 ) {
915 $seq .= 'u' x $gap_len;
916 }
917 else {
918 $seq .= 'u';
919 }
920 }
921 for ( $i = $frag->[ 0 ]; $i <= $frag->[ 1 ]; $i++ ) {
922 $seq .= $self->_resAA( $i );
923 }
924 $lastPdbNum = $self->_pdbNum( $i - 1 );
925 }
926
927
928
929 $id = $self->pdbID();
930 $id .= ":$chain";
931
932 $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new ( -seq => $seq,
933 -id => $id,
934 -moltype => 'protein'
935 );
936 return $self->{ 'Seq' }->{ $chain };
937 }
938
939 =head1 INTERNAL METHODS
940
941
942 =cut
943
944 =head2 _pdbChain
945
946 Title : _pdbChain
947 Usage : returns the pdb chain id of given residue
948 Function :
949 Example : $chain_id = $dssp_obj->pdbChain( DSSP_KEY );
950 Returns : scalar
951 Args : DSSP_KEY ( dssp or pdb )
952
953
954 =cut
955
956 sub _pdbChain {
957 my $self = shift;
958 my $dssp_key = shift;
959 return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_chain' };
960 }
961
962 =head2 _resAA
963
964 Title : _resAA
965 Usage : fetches the 1 char amino acid code, given a dssp id
966 Function :
967 Example : $aa = $dssp_obj->_resAA( dssp_id );
968 Returns : 1 character scalar string
969 Args : dssp_id
970
971
972 =cut
973
974 sub _resAA {
975 my $self = shift;
976 my $dssp_key = shift;
977 return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' };
978 }
979
980
981 =head2 _pdbNum
982
983 Title : _pdbNum
984 Usage : fetches the numeric portion of the identifier for a given
985 residue as reported by the pdb entry. Note, this DOES NOT
986 uniquely specify a residue. There may be an insertion code
987 and/or chain identifier differences.
988 Function :
989 Example : $pdbNum = $self->_pdbNum( DSSP_ID );
990 Returns : a scalar
991 Args : DSSP_ID
992
993
994 =cut
995
996 sub _pdbNum {
997 my $self = shift;
998 my $dssp_key = shift;
999 return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_resnum' };
1000 }
1001
1002 =head2 _pdbInsCo
1003
1004 Title : _pdbInsCo
1005 Usage : fetches the Insertion Code for this residue, if it has one.
1006 Function :
1007 Example : $pdbNum = $self->_pdbInsCo( DSSP_ID );
1008 Returns : a scalar
1009 Args : DSSP_ID
1010
1011
1012 =cut
1013
1014 sub _pdbInsCo {
1015 my $self = shift;
1016 my $dssp_key = shift;
1017 return $self->{ 'Res' }->[ $dssp_key ]->{ 'insertionco' };
1018 }
1019
1020 =head2 _toPdbId
1021
1022 Title : _toPdbId
1023 Usage : Takes a dssp key and builds the corresponding
1024 PDB identifier string
1025 Function :
1026 Example : $pdbId = $self->_toPdbId( DSSP_ID );
1027 Returns : scalar
1028 Args : DSSP_ID
1029
1030 =cut
1031
1032 sub _toPdbId {
1033 my $self = shift;
1034 my $dssp_key = shift;
1035 my $pdbId = ( $self->_pdbNum( $dssp_key ).
1036 $self->_pdbInsCo( $dssp_key ) );
1037 my $chain = $self->_pdbChain( $dssp_key );
1038 $pdbId = "$pdbId:$chain" if $chain;
1039 return $pdbId;
1040 }
1041
1042 =head2 _contSegs
1043
1044 Title : _contSegs
1045 Usage : find the endpoints of continuous regions of this structure
1046 Function : returns pointer to array of 3 element array.
1047 Elements are the dssp keys of the start and end points of each
1048 continuous element and its PDB chain id (may be blank).
1049 Note that it is common to have several
1050 continuous elements with the same chain id. This occurs
1051 when an internal region is disordered and no structural
1052 information is available.
1053 Example : $cont_seg_ptr = $dssp_obj->_contSegs();
1054 Returns : pointer to array of arrays
1055 Args : none
1056
1057
1058 =cut
1059
1060 sub _contSegs {
1061 my $self = shift;
1062 if ( $self->{ 'contSegs' } ) {
1063 return $self->{ 'contSegs' };
1064 }
1065 else {
1066 # first time, so make contSegs
1067 my ( $cur_chain, $i, $beg );
1068 my @contSegs;
1069 #initialize
1070 $cur_chain = $self->_pdbChain( 1 );
1071 $beg = 1;
1072 #internal residues
1073 for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) {
1074 if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) {
1075 push( @contSegs, [ $beg, $i - 1, $cur_chain ] );
1076 $beg = $i + 1;
1077 $cur_chain = $self->_pdbChain( $i + 1 );
1078 }
1079 }
1080 # last residue must be the end of a chain
1081 push( @contSegs, [ $beg, $i, $cur_chain ] );
1082
1083 $self->{ 'contSegs' } = \@contSegs;
1084 return $self->{ 'contSegs' };
1085 }
1086 }
1087
1088 =head2 _numResLines
1089
1090 Title : _numResLines
1091 Usage : returns the total number of residue lines in this
1092 dssp file.
1093 This number is DIFFERENT than the number of residues in
1094 the pdb file because dssp has chain termination and chain
1095 discontinuity 'residues'.
1096 Function :
1097 Example : $num_res = $dssp_obj->_numResLines();
1098 Returns : scalar int
1099 Args : none
1100
1101
1102 =cut
1103
1104 sub _numResLines {
1105 my $self = shift;
1106 return ( $#{$self->{ 'Res' }} );
1107 }
1108
1109 =head2 _toDsspKey
1110
1111 Title : _toDsspKey
1112 Usage : returns the unique dssp integer key given a pdb residue id.
1113 All accessor methods require (internally)
1114 the dssp key. This method is very useful in converting
1115 pdb keys to dssp keys so the accessors can accept pdb keys
1116 as argument. PDB Residue IDs are inherently
1117 problematic since they have multiple parts of
1118 overlapping function and ill-defined or observed
1119 convention in form. Input can be in any of the formats
1120 described in the DESCRIPTION section above.
1121 Function :
1122 Example : $dssp_id = $dssp_obj->_pdbKeyToDsspKey( '10B:A' )
1123 Returns : scalar int
1124 Args : pdb residue identifier: num[insertion code]:[chain]
1125
1126
1127 =cut
1128
1129 sub _toDsspKey {
1130 # Consider adding lookup table for 'common' name (like 20:A) for
1131 # fast access. Could be built during parse of input.
1132
1133 my $self = shift;
1134 my $arg_str;
1135
1136 my ( $key_num, $chain_id, $ins_code );
1137
1138 # check to see how many args are given
1139 if ( $#_ > 1 ) { # multiple args
1140 $key_num = shift;
1141 if ( $#_ > 1 ) { # still multiple args => ins. code, too
1142 $ins_code = shift;
1143 $chain_id = shift;
1144 }
1145 else { # just one more arg. => chain_id
1146 $chain_id = shift;
1147 }
1148 }
1149 else { # only single arg. Might be number or string
1150 $arg_str = shift;
1151 if ( $arg_str =~ /:/ ) {
1152 # a chain is specified
1153 ( $chain_id ) = ( $arg_str =~ /:(.)/);
1154 $arg_str =~ s/:.//;
1155 }
1156 if ( $arg_str =~ /[A-Z]|[a-z]/ ) {
1157 # an insertion code is specified
1158 ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ );
1159 $arg_str =~ s/[A-Z]|[a-z]//g;
1160 }
1161 #now, get the number bit-> everything still around
1162 $key_num = $arg_str;
1163 }
1164
1165 # Now find the residue which fits this description. Linear search is
1166 # probably not the best way to do this, but oh well...
1167 for ( my $i = 1; $i <= $self->_numResLines(); $i++ ) {
1168 if ( $key_num == $self->{'Res'}->[$i]->{'pdb_resnum'} ) {
1169 if ( $chain_id ) { # if a chain was specified
1170 if ( $chain_id eq $self->{'Res'}->[$i]->{'pdb_chain'} ) {
1171 # and it's the right one
1172 if ( $ins_code ) { # if insertion code was specified
1173 if ( $ins_code eq $self->{'Res'}->[$i]->{'insertionco'} ) {
1174 # and it's the right one
1175 return $i;
1176 }
1177 }
1178 else { # no isertion code specified, this is it
1179 return $i;
1180 }
1181 }
1182 }
1183 else { # no chain was specified
1184 return $i;
1185 }
1186 }
1187 }
1188 $self->throw( "PDB key not found." );
1189 }
1190
1191 =head2 _parse
1192
1193 Title : _parse
1194 Usage : parses dssp output
1195 Function :
1196 Example : used by the constructor
1197 Returns :
1198 Args : input source ( handled by Bio::Root:IO )
1199
1200
1201 =cut
1202
1203 sub _parse {
1204 my $self = shift;
1205 my $file = shift;
1206 my $cur;
1207 my $current_chain;
1208 my ( @elements, @hbond );
1209 my ( %head, %his, );
1210 my $element;
1211 my $res_num;
1212
1213 $cur = <$file>;
1214 unless ( $cur =~ /^==== Secondary Structure Definition/ ) {
1215 $self->throw( "Not dssp output" );
1216 return;
1217 }
1218
1219 $cur = <$file>;
1220 ( $element ) = ( $cur =~ /^REFERENCE\s+(.+?)\s+\./ );
1221 $head{ 'REFERENCE' } = $element;
1222
1223 $cur = <$file>;
1224 @elements = split( /\s+/, $cur );
1225 pop( @elements ); # take off that annoying period
1226 $head{ 'PDB' } = pop( @elements );
1227 $head{ 'DATE' } = pop( @elements );
1228 # now, everything else is "header" except for the word
1229 # HEADER
1230 shift( @elements );
1231 $element = shift( @elements );
1232 while ( @elements ) {
1233 $element = $element." ".shift( @elements );
1234 }
1235 $head{ 'HEADER' } = $element;
1236
1237 $cur = <$file>;
1238 ($element) = ( $cur =~ /^COMPND\s+(.+?)\s+\./ );
1239 $head{ 'COMPND' } = $element;
1240
1241 $cur = <$file>;
1242 ($element) = ( $cur =~ /^PDBSOURCE\s+(.+?)\s+\./ );
1243 $head{ 'SOURCE' } = $element;
1244
1245 $cur = <$file>;
1246 ($element) = ( $cur =~ /^AUTHOR\s+(.+?)\s+/ );
1247 $head{ 'AUTHOR' } = $element;
1248
1249 $cur = <$file>;
1250 @elements = split( /\s+/, $cur );
1251 shift( @elements );
1252 $head{ 'TotNumRes' } = shift( @elements );
1253 $head{ 'NumChain' } = shift( @elements );
1254 $head{ 'TotSSBr' } = shift( @elements );
1255 $head{ 'TotIaSSBr' } = shift( @elements );
1256 $head{ 'TotIeSSBr' } = shift( @elements );
1257
1258 $cur = <$file>;
1259 ( $element ) = ( $cur =~ /\s*(\d+\.\d*)\s+ACCESSIBLE SURFACE OF PROTEIN/ );
1260 $head{ 'ProAccSurf' } = $element;
1261 $self->{ 'Head' } = \%head;
1262
1263 for ( my $i = 1; $i <= 14; $i++ ) {
1264 $cur = <$file>;
1265 ( $element ) =
1266 $cur =~ /\s*(\d+)\s+\d+\.\d+\s+TOTAL NUMBER OF HYDROGEN/;
1267 push( @hbond, $element );
1268 # $hbond{ $hBondType } = $element;
1269 }
1270 $self->{ 'HBond' } = \@hbond;
1271
1272 my $histogram_finished = 0;
1273 while ( !($histogram_finished) && chomp( $cur = <$file> ) ) {
1274 if ( $cur =~ /RESIDUE AA STRUCTURE/ ) {
1275 $histogram_finished = 1;
1276 }
1277 }
1278
1279 while ( chomp( $cur = <$file> ) ) {
1280 $res_num = substr( $cur, 0, 5 );
1281 $res_num =~ s/\s//g;
1282 $self->{ 'Res' }->[ $res_num ] = &_parseResLine( $cur );
1283 }
1284 }
1285
1286
1287 =head2 _parseResLine
1288
1289 Title : _parseResLine
1290 Usage : parses a single residue line
1291 Function :
1292 Example : used internally
1293 Returns :
1294 Args : residue line ( string )
1295
1296
1297 =cut
1298
1299 sub _parseResLine() {
1300 my $cur = shift;
1301 my ( $feat, $value );
1302 my %elements;
1303
1304 foreach $feat ( keys %lookUp ) {
1305 $value = substr( $cur, $lookUp{ $feat }->[0],
1306 $lookUp{ $feat }->[1] );
1307 $value =~ s/\s//g;
1308 $elements{$feat} = $value ;
1309 }
1310
1311 # if no chain id, make it '-' (like STRIDE...very convenient)
1312 if ( !( $elements{ 'pdb_chain' } ) || $elements{ 'pdb_chain'} eq ' ' ) {
1313 $elements{ 'pdb_chain' } = '-';
1314 }
1315 return \%elements;
1316 }
1317
1318 return 1; #just because
1319