0
|
1 # $id $
|
|
2 #
|
|
3 # bioperl module for Bio::Structure::SecStr::DSSP::Res.pm
|
|
4 #
|
|
5 # Cared for by Ed Green <ed@compbio.berkeley.edu>
|
|
6 #
|
|
7 # Copyright Univ. of California
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Structure::SecStr::DSSP::Res - Module for parsing/accessing dssp output
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 my $dssp_obj = new Bio::Structure::SecStr::DSSP::Res( '-file' => 'filename.dssp' );
|
|
20
|
|
21 # or
|
|
22
|
|
23 my $dssp-obj = new Bio::Structure::SecStr::DSSP::Res( '-fh' => \*STDOUT );
|
|
24
|
|
25 # get DSSP defined Secondary Structure for residue 20
|
|
26 $sec_str = $dssp_obj->resSecStr( 20 );
|
|
27
|
|
28 # get dssp defined sec. structure summary for PDB residue # 10 of chain A
|
|
29
|
|
30 $sec_str = $dssp_obj->resSecStrSum( '10:A' );
|
|
31
|
|
32 =head1 DESCRIPTION
|
|
33
|
|
34 DSSP::Res is a module for objectifying DSSP output. Methods are then
|
|
35 available for extracting all the information within the output file
|
|
36 and convenient subsets of it.
|
|
37 The principal purpose of DSSP is to determine secondary structural
|
|
38 elements of a given structure.
|
|
39
|
|
40 ( Dictionary of protein secondary structure: pattern recognition
|
|
41 of hydrogen-bonded and geometrical features.
|
|
42 Biopolymers. 1983 Dec;22(12):2577-637. )
|
|
43
|
|
44 The DSSP program is available from:
|
|
45 http://www.cmbi.kun.nl/swift/dssp
|
|
46
|
|
47 This information is available on a per residue basis ( see resSecStr
|
|
48 and resSecStrSum methods ) or on a per chain basis ( see secBounds
|
|
49 method ).
|
|
50
|
|
51 resSecStr() & secBounds() return one of the following:
|
|
52 'H' = alpha helix
|
|
53 'B' = residue in isolated beta-bridge
|
|
54 'E' = extended strand, participates in beta ladder
|
|
55 'G' = 3-helix (3/10 helix)
|
|
56 'I' = 5 helix (pi helix)
|
|
57 'T' = hydrogen bonded turn
|
|
58 'S' = bend
|
|
59 '' = no assignment
|
|
60
|
|
61 A more general classification is returned using the resSecStrSum()
|
|
62 method. The purpose of this is to have a method for DSSP and STRIDE
|
|
63 derived output whose range is the same.
|
|
64 Its output is one of the following:
|
|
65
|
|
66 'H' = helix ( => 'H', 'G', or 'I' from above )
|
|
67 'B' = beta ( => 'B' or 'E' from above )
|
|
68 'T' = turn ( => 'T' or 'S' from above )
|
|
69 ' ' = no assignment ( => ' ' from above )
|
|
70
|
|
71 The methods are roughly divided into 3 sections:
|
|
72 1. Global features of this structure (PDB ID, total surface area,
|
|
73 etc.). These methods do not require an argument.
|
|
74 2. Residue specific features ( amino acid, secondary structure,
|
|
75 solvent exposed surface area, etc. ). These methods do require an
|
|
76 arguement. The argument is supposed to uniquely identify a
|
|
77 residue described within the structure. It can be of any of the
|
|
78 following forms:
|
|
79 ('#A:B') or ( #, 'A', 'B' )
|
|
80 || |
|
|
81 || - Chain ID (blank for single chain)
|
|
82 |--- Insertion code for this residue. Blank for most residues.
|
|
83 |--- Numeric portion of residue ID.
|
|
84
|
|
85 (#)
|
|
86 |
|
|
87 --- Numeric portion of residue ID. If there is only one chain and
|
|
88 it has no ID AND there is no residue with an insertion code at this
|
|
89 number, then this can uniquely specify a residue.
|
|
90
|
|
91 ('#:C') or ( #, 'C' )
|
|
92 | |
|
|
93 | -Chain ID
|
|
94 ---Numeric portion of residue ID.
|
|
95
|
|
96 If a residue is incompletely specified then the first residue that
|
|
97 fits the arguments is returned. For example, if 19 is the argument
|
|
98 and there are three chains, A, B, and C with a residue whose number
|
|
99 is 19, then 19:A will be returned (assuming its listed first).
|
|
100
|
|
101 Since neither DSSP nor STRIDE correctly handle alt-loc codes, they
|
|
102 are not supported by these modules.
|
|
103
|
|
104 3. Value-added methods. Return values are not verbatem strings
|
|
105 parsed from DSSP or STRIDE output.
|
|
106
|
|
107
|
|
108 =head1 FEEDBACK
|
|
109
|
|
110 =head2 Mailing Lists
|
|
111
|
|
112 User feedback is an integral part of the evolution of this and other
|
|
113 Bioperl modules. Send your comments and suggestions preferably to one
|
|
114 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
115
|
|
116 bioperl-l@bioperl.org - General discussion
|
|
117 http://bio.perl.org/MailList.html - About the mailing lists
|
|
118
|
|
119 =head2 Reporting Bugs
|
|
120
|
|
121 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
122 the bugs and their resolution. Bug reports can be submitted via email
|
|
123 or the web:
|
|
124
|
|
125 bioperl-bugs@bio.perl.org
|
|
126 http://bugzilla.bioperl.org/
|
|
127
|
|
128 =head1 AUTHOR - Ed Green
|
|
129
|
|
130 Email ed@compbio.berkeley.edu
|
|
131
|
|
132
|
|
133 =head1 APPENDIX
|
|
134
|
|
135 The rest of the documentation details each method.
|
|
136 Internal methods are preceded with a _
|
|
137
|
|
138 =cut
|
|
139
|
|
140 package Bio::Structure::SecStr::DSSP::Res;
|
|
141 use strict;
|
|
142 use vars qw(@ISA);
|
|
143 use Bio::Root::Root;
|
|
144 use Bio::Root::IO;
|
|
145 use Bio::PrimarySeq;
|
|
146
|
|
147 @ISA = qw(Bio::Root::Root);
|
|
148
|
|
149 # Would be a class variable if Perl had them
|
|
150
|
|
151 #attribute begin col # columns
|
|
152 our %lookUp = ( 'pdb_resnum' => [ 5, 5 ],
|
|
153 'insertionco' => [ 10, 1 ],
|
|
154 'pdb_chain' => [ 11, 1 ],
|
|
155
|
|
156 'amino_acid' => [ 13, 1 ],
|
|
157 'term_sig' => [ 14, 1 ],
|
|
158
|
|
159 'ss_summary' => [ 16, 1 ],
|
|
160 '3tph' => [ 18, 1 ],
|
|
161 '4tph' => [ 19, 1 ],
|
|
162 '5tph' => [ 20, 1 ],
|
|
163 'geo_bend' => [ 21, 1 ],
|
|
164 'chirality' => [ 22, 1 ],
|
|
165 'beta_br1la' => [ 23, 1 ],
|
|
166 'beta_br2la' => [ 24, 1 ],
|
|
167
|
|
168 'bb_part1nu' => [ 25, 4 ],
|
|
169 'bb_part2nu' => [ 29, 4 ],
|
|
170 'betash_lab' => [ 33, 1 ],
|
|
171
|
|
172 'solv_acces' => [ 34, 4 ],
|
|
173
|
|
174 'hb1_nh_o_p' => [ 39, 6 ],
|
|
175 'hb1_nh_o_e' => [ 46, 4 ],
|
|
176
|
|
177 'hb1_o_hn_p' => [ 50, 6 ],
|
|
178 'hb1_o_hn_e' => [ 57, 4 ],
|
|
179
|
|
180 'hb2_nh_o_p' => [ 61, 6 ],
|
|
181 'hb2_nh_o_e' => [ 68, 4 ],
|
|
182
|
|
183 'hb2_o_hn_p' => [ 72, 6 ],
|
|
184 'hb2_o_hn_e' => [ 79, 4 ],
|
|
185
|
|
186 'tco' => [ 85, 6 ],
|
|
187
|
|
188 'kappa' => [ 91, 6 ],
|
|
189
|
|
190 'alpha' => [ 97, 6 ],
|
|
191
|
|
192 'phi' => [ 103, 6 ],
|
|
193
|
|
194 'psi' => [ 109, 6 ],
|
|
195
|
|
196 'x_ca' => [ 115, 7 ],
|
|
197
|
|
198 'y_ca' => [ 122, 7 ],
|
|
199
|
|
200 'z_ca' => [ 129, 7 ] );
|
|
201
|
|
202
|
|
203 =head1 CONSTRUCTOR
|
|
204
|
|
205
|
|
206 =cut
|
|
207
|
|
208
|
|
209 =head2 new
|
|
210
|
|
211 Title : new
|
|
212 Usage : makes new object of this class
|
|
213 Function : Constructor
|
|
214 Example : $dssp_obj = Bio::DSSP:Res->new( filename or FILEHANDLE )
|
|
215 Returns : object (ref)
|
|
216 Args : filename ( must be proper DSSP output file )
|
|
217
|
|
218 =cut
|
|
219
|
|
220 sub new {
|
|
221 my ( $class, @args ) = @_;
|
|
222 my $self = $class->SUPER::new( @args );
|
|
223 my $io = Bio::Root::IO->new( @args );
|
|
224 $self->_parse( $io->_fh() );
|
|
225 $io->close();
|
|
226 return $self;
|
|
227 }
|
|
228
|
|
229 =head1 ACCESSORS
|
|
230
|
|
231
|
|
232 =cut
|
|
233
|
|
234 # GLOBAL FEATURES / INFO / STATS
|
|
235
|
|
236 =head2 totSurfArea
|
|
237
|
|
238 Title : totSurfArea
|
|
239 Usage : returns total accessible surface area in square Ang.
|
|
240 Function :
|
|
241 Example : $surArea = $dssp_obj->totSurfArea();
|
|
242 Returns : scalar
|
|
243 Args : none
|
|
244
|
|
245 =cut
|
|
246
|
|
247 sub totSurfArea {
|
|
248 my $self = shift;
|
|
249 return $self->{ 'Head' }->{ 'ProAccSurf' };
|
|
250 }
|
|
251
|
|
252 =head2 numResidues
|
|
253
|
|
254 Title : numResidues
|
|
255 Usage : returns the total number of residues in all chains or
|
|
256 just the specified chain if a chain is specified
|
|
257 Function :
|
|
258 Example : $num_res = $dssp_obj->numResidues();
|
|
259 Returns : scalar int
|
|
260 Args : none
|
|
261
|
|
262
|
|
263 =cut
|
|
264
|
|
265 sub numResidues {
|
|
266 my $self = shift;
|
|
267 my $chain = shift;
|
|
268 if ( !( $chain ) ) {
|
|
269 return $self->{'Head'}->{'TotNumRes'};
|
|
270 }
|
|
271 else {
|
|
272 my ( $num_res,
|
|
273 $cont_seg );
|
|
274 my $cont_seg_pnt = $self->_contSegs();
|
|
275 foreach $cont_seg ( @{ $cont_seg_pnt } ) {
|
|
276 if ( $chain eq $cont_seg->[ 2 ] ) {
|
|
277 # this segment is part of the chain we want
|
|
278 $num_res += ( $self->_toDsspKey( $cont_seg->[ 1 ] )
|
|
279 - $self->_toDsspKey( $cont_seg->[ 0 ] )
|
|
280 + 1 ); # this works because we know the
|
|
281 # the region between the start
|
|
282 # and end of a dssp key is
|
|
283 # continuous
|
|
284 }
|
|
285 }
|
|
286 return $num_res;
|
|
287 }
|
|
288 }
|
|
289
|
|
290 # STRAIGHT FROM PDB ENTRY
|
|
291
|
|
292 =head2 pdbID
|
|
293
|
|
294 Title : pdbID
|
|
295 Usage : returns pdb identifier ( 1FJM, e.g.)
|
|
296 Function :
|
|
297 Example : $pdb_id = $dssp_obj->pdbID();
|
|
298 Returns : scalar string
|
|
299 Args : none
|
|
300
|
|
301
|
|
302 =cut
|
|
303
|
|
304 sub pdbID {
|
|
305 my $self = shift;
|
|
306 return $self->{'Head'}->{'PDB'};
|
|
307 }
|
|
308
|
|
309 =head2 pdbAuthor
|
|
310
|
|
311 Title : pdbAuthor
|
|
312 Usage : returns author field
|
|
313 Function :
|
|
314 Example : $auth = $dssp_obj->pdbAuthor()
|
|
315 Returns : scalar string
|
|
316 Args : none
|
|
317
|
|
318
|
|
319 =cut
|
|
320
|
|
321 sub pdbAuthor {
|
|
322 my $self = shift;
|
|
323 return $self->{'Head'}->{'AUTHOR'};
|
|
324 }
|
|
325
|
|
326 =head2 pdbCompound
|
|
327
|
|
328 Title : pdbCompound
|
|
329 Usage : returns pdbCompound given in PDB file
|
|
330 Function :
|
|
331 Example : $cmpd = $dssp_obj->pdbCompound();
|
|
332 Returns : scalar string
|
|
333 Args : none
|
|
334
|
|
335
|
|
336 =cut
|
|
337
|
|
338 sub pdbCompound {
|
|
339 my $self = shift;
|
|
340 return $self->{'Head'}->{'COMPND'};
|
|
341 }
|
|
342
|
|
343 =head2 pdbDate
|
|
344
|
|
345 Title : pdbDate
|
|
346 Usage : returns date given in PDB file
|
|
347 Function :
|
|
348 Example : $pdb_date = $dssp_obj->pdbDate();
|
|
349 Returns : scalar
|
|
350 Args : none
|
|
351
|
|
352
|
|
353 =cut
|
|
354
|
|
355 sub pdbDate {
|
|
356 my $self = shift;
|
|
357 return $self->{'Head'}->{'DATE'};
|
|
358 }
|
|
359
|
|
360 =head2 pdbHeader
|
|
361
|
|
362 Title : pdbHeader
|
|
363 Usage : returns header info from PDB file
|
|
364 Function :
|
|
365 Example : $header = $dssp_obj->pdbHeader();
|
|
366 Returns : scalar
|
|
367 Args : none
|
|
368
|
|
369
|
|
370 =cut
|
|
371
|
|
372 sub pdbHeader {
|
|
373 my $self = shift;
|
|
374 return $self->{'Head'}->{'HEADER'};
|
|
375 }
|
|
376
|
|
377 =head2 pdbSource
|
|
378
|
|
379 Title : pdbSource
|
|
380 Usage : returns pdbSource information from PDBSOURCE line
|
|
381 Function :
|
|
382 Example : $pdbSource = $dssp_obj->pdbSource();
|
|
383 Returns : scalar
|
|
384 Args : none
|
|
385
|
|
386
|
|
387 =cut
|
|
388
|
|
389 sub pdbSource {
|
|
390 my $self = shift;
|
|
391 return $self->{'Head'}->{'SOURCE'};
|
|
392 }
|
|
393
|
|
394
|
|
395 # RESIDUE SPECIFIC ACCESSORS
|
|
396
|
|
397 =head2 resAA
|
|
398
|
|
399 Title : resAA
|
|
400 Usage : fetches the 1 char amino acid code, given an id
|
|
401 Function :
|
|
402 Example : $aa = $dssp_obj->aminoAcid( '20:A' ); # pdb id as arg
|
|
403 Returns : 1 character scalar string
|
|
404 Args : RESIDUE_ID
|
|
405
|
|
406
|
|
407 =cut
|
|
408
|
|
409 sub resAA {
|
|
410 my $self = shift;
|
|
411 my @args = @_;
|
|
412 my $dssp_key = $self->_toDsspKey( @args );
|
|
413 return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' };
|
|
414 }
|
|
415
|
|
416 =head2 resPhi
|
|
417
|
|
418 Title : resPhi
|
|
419 Usage : returns phi angle of a single residue
|
|
420 Function : accessor
|
|
421 Example : $phi = $dssp_obj->resPhi( RESIDUE_ID )
|
|
422 Returns : scalar
|
|
423 Args : RESIDUE_ID
|
|
424
|
|
425
|
|
426 =cut
|
|
427
|
|
428 sub resPhi {
|
|
429 my $self = shift;
|
|
430 my @args = @_;
|
|
431 my $dssp_key = $self->_toDsspKey( @args );
|
|
432 return $self->{ 'Res' }->[ $dssp_key ]->{ 'phi' };
|
|
433 }
|
|
434
|
|
435 =head2 resPsi
|
|
436
|
|
437 Title : resPsi
|
|
438 Usage : returns psi angle of a single residue
|
|
439 Function : accessor
|
|
440 Example : $psi = $dssp_obj->resPsi( RESIDUE_ID )
|
|
441 Returns : scalar
|
|
442 Args : RESIDUE_ID
|
|
443
|
|
444
|
|
445 =cut
|
|
446
|
|
447 sub resPsi {
|
|
448 my $self = shift;
|
|
449 my @args = @_;
|
|
450 my $dssp_key = $self->_toDsspKey( @args );
|
|
451 return $self->{ 'Res' }->[ $dssp_key ]->{ 'psi' };
|
|
452 }
|
|
453
|
|
454 =head2 resSolvAcc
|
|
455
|
|
456 Title : resSolvAcc
|
|
457 Usage : returns solvent exposed area of this residue in
|
|
458 square Angstroms
|
|
459 Function :
|
|
460 Example : $solv_acc = $dssp_obj->resSolvAcc( RESIDUE_ID );
|
|
461 Returns : scalar
|
|
462 Args : RESIDUE_ID
|
|
463
|
|
464
|
|
465 =cut
|
|
466
|
|
467 sub resSolvAcc {
|
|
468 my $self = shift;
|
|
469 my @args = @_;
|
|
470 my $dssp_key = $self->_toDsspKey( @args );
|
|
471 return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' };
|
|
472 }
|
|
473
|
|
474 =head2 resSurfArea
|
|
475
|
|
476 Title : resSurfArea
|
|
477 Usage : returns solvent exposed area of this residue in
|
|
478 square Angstroms
|
|
479 Function :
|
|
480 Example : $solv_acc = $dssp_obj->resSurfArea( RESIDUE_ID );
|
|
481 Returns : scalar
|
|
482 Args : RESIDUE_ID
|
|
483
|
|
484
|
|
485 =cut
|
|
486
|
|
487 sub resSurfArea {
|
|
488 my $self = shift;
|
|
489 my @args = @_;
|
|
490 my $dssp_key = $self->_toDsspKey( @args );
|
|
491 return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' };
|
|
492 }
|
|
493
|
|
494 =head2 resSecStr
|
|
495
|
|
496 Title : resSecStr
|
|
497 Usage : $ss = $dssp_obj->resSecStr( RESIDUE_ID );
|
|
498 Function : returns the DSSP secondary structural designation of this residue
|
|
499 Example :
|
|
500 Returns : a character ( 'B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ' )
|
|
501 Args : RESIDUE_ID
|
|
502 NOTE : The range of this method differs from that of the
|
|
503 resSecStr method in the STRIDE SecStr parser. That is because of the
|
|
504 slightly different format for STRIDE and DSSP output. The resSecStrSum
|
|
505 method exists to map these different ranges onto an identical range.
|
|
506
|
|
507 =cut
|
|
508
|
|
509 sub resSecStr {
|
|
510 my $self = shift;
|
|
511 my @args = @_;
|
|
512 my $dssp_key = $self->_toDsspKey( @args );
|
|
513 my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' };
|
|
514 return $ss_char if $ss_char;
|
|
515 return ' ';
|
|
516 }
|
|
517
|
|
518
|
|
519 =head2 resSecStrSum
|
|
520
|
|
521 Title : resSecStrSum
|
|
522 Usage : $ss = $dssp_obj->resSecStrSum( $id );
|
|
523 Function : returns what secondary structure group this residue belongs
|
|
524 to. One of: 'H': helix ( H, G, or I )
|
|
525 'B': beta ( B or E )
|
|
526 'T': turn ( T or S )
|
|
527 ' ': none ( ' ' )
|
|
528 This method is similar to resSecStr, but the information
|
|
529 it returns is less specific.
|
|
530 Example :
|
|
531 Returns : a character ( 'H', 'B', 'T', or ' ' )
|
|
532 Args : dssp residue number of pdb residue identifier
|
|
533
|
|
534
|
|
535 =cut
|
|
536
|
|
537 sub resSecStrSum {
|
|
538 my $self = shift;
|
|
539 my @args = @_;
|
|
540 my $dssp_key = $self->_toDsspKey( @args );
|
|
541 my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' };
|
|
542 if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) {
|
|
543 return 'H';
|
|
544 }
|
|
545 if ( $ss_char eq ' ' || !( $ss_char ) ) {
|
|
546 return ' ';
|
|
547 }
|
|
548 if ( $ss_char eq 'B' || $ss_char eq 'E' ) {
|
|
549 return 'B';
|
|
550 }
|
|
551 else {
|
|
552 return 'T';
|
|
553 }
|
|
554 }
|
|
555
|
|
556 # DSSP SPECIFIC
|
|
557
|
|
558 =head2 hBonds
|
|
559
|
|
560 Title : hBonds
|
|
561 Usage : returns number of 14 different types of H Bonds
|
|
562 Function :
|
|
563 Example : $hb = $dssp_obj->hBonds
|
|
564 Returns : pointer to 14 element array of ints
|
|
565 Args : none
|
|
566 NOTE : The different type of H-Bonds reported are, in order:
|
|
567 TYPE O(I)-->H-N(J)
|
|
568 IN PARALLEL BRIDGES
|
|
569 IN ANTIPARALLEL BRIDGES
|
|
570 TYPE O(I)-->H-N(I-5)
|
|
571 TYPE O(I)-->H-N(I-4)
|
|
572 TYPE O(I)-->H-N(I-3)
|
|
573 TYPE O(I)-->H-N(I-2)
|
|
574 TYPE O(I)-->H-N(I-1)
|
|
575 TYPE O(I)-->H-N(I+0)
|
|
576 TYPE O(I)-->H-N(I+1)
|
|
577 TYPE O(I)-->H-N(I+2)
|
|
578 TYPE O(I)-->H-N(I+3)
|
|
579 TYPE O(I)-->H-N(I+4)
|
|
580 TYPE O(I)-->H-N(I+5)
|
|
581
|
|
582 =cut
|
|
583
|
|
584 sub hBonds {
|
|
585 my $self = shift;
|
|
586 return $self->{ 'HBond'};
|
|
587 }
|
|
588
|
|
589 =head2 numSSBr
|
|
590
|
|
591 Title : numSSBr
|
|
592 Usage : returns info about number of SS-bridges
|
|
593 Function :
|
|
594 Example : @SS_br = $dssp_obj->numSSbr();
|
|
595 Returns : 3 element scalar int array
|
|
596 Args : none
|
|
597
|
|
598
|
|
599 =cut
|
|
600
|
|
601 sub numSSBr {
|
|
602 my $self = shift;
|
|
603 return ( $self->{'Head'}->{'TotSSBr'},
|
|
604 $self->{'Head'}->{'TotIaSSBr'},
|
|
605 $self->{'Head'}->{'TotIeSSBr'} );
|
|
606 }
|
|
607
|
|
608 =head2 resHB_O_HN
|
|
609
|
|
610 Title : resHB_O_HN
|
|
611 Usage : returns pointer to a 4 element array
|
|
612 consisting of: relative position of binding
|
|
613 partner #1, energy of that bond (kcal/mol),
|
|
614 relative positionof binding partner #2,
|
|
615 energy of that bond (kcal/mol). If the bond
|
|
616 is not bifurcated, the second bond is reported
|
|
617 as 0, 0.0
|
|
618 Function : accessor
|
|
619 Example : $oBonds_ptr = $dssp_obj->resHB_O_HN( RESIDUE_ID )
|
|
620 Returns : pointer to 4 element array
|
|
621 Args : RESIDUE_ID
|
|
622
|
|
623
|
|
624 =cut
|
|
625
|
|
626 sub resHB_O_HN {
|
|
627 my $self = shift;
|
|
628 my @args = @_;
|
|
629 my $dssp_key = $self->_toDsspKey( @args );
|
|
630 return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_p' },
|
|
631 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_e' },
|
|
632 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_p' },
|
|
633 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_e' } );
|
|
634 }
|
|
635
|
|
636
|
|
637 =head2 resHB_NH_O
|
|
638
|
|
639 Title : resHB_NH_O
|
|
640 Usage : returns pointer to a 4 element array
|
|
641 consisting of: relative position of binding
|
|
642 partner #1, energy of that bond (kcal/mol),
|
|
643 relative positionof binding partner #2,
|
|
644 energy of that bond (kcal/mol). If the bond
|
|
645 is not bifurcated, the second bond is reported
|
|
646 as 0, 0.0
|
|
647 Function : accessor
|
|
648 Example : $nhBonds_ptr = $dssp_obj->resHB_NH_O( RESIDUE_ID )
|
|
649 Returns : pointer to 4 element array
|
|
650 Args : RESIDUE_ID
|
|
651
|
|
652
|
|
653 =cut
|
|
654
|
|
655 sub resHB_NH_O {
|
|
656 my $self = shift;
|
|
657 my @args = @_;
|
|
658 my $dssp_key = $self->_toDsspKey( @args );
|
|
659 return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_p' },
|
|
660 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_e' },
|
|
661 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_p' },
|
|
662 $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_e' } );
|
|
663 }
|
|
664
|
|
665
|
|
666 =head2 resTco
|
|
667
|
|
668 Title : resTco
|
|
669 Usage : returns tco angle around this residue
|
|
670 Function : accessor
|
|
671 Example : resTco = $dssp_obj->resTco( RESIDUE_ID )
|
|
672 Returns : scalar
|
|
673 Args : RESIDUE_ID
|
|
674
|
|
675
|
|
676 =cut
|
|
677
|
|
678 sub resTco {
|
|
679 my $self = shift;
|
|
680 my @args = @_;
|
|
681 my $dssp_key = $self->_toDsspKey( @args );
|
|
682 return $self->{ 'Res' }->[ $dssp_key ]->{ 'tco' };
|
|
683 }
|
|
684
|
|
685
|
|
686 =head2 resKappa
|
|
687
|
|
688 Title : resKappa
|
|
689 Usage : returns kappa angle around this residue
|
|
690 Function : accessor
|
|
691 Example : $kappa = $dssp_obj->resKappa( RESIDUE_ID )
|
|
692 Returns : scalar
|
|
693 Args : RESIDUE_ID ( dssp or PDB )
|
|
694
|
|
695
|
|
696 =cut
|
|
697
|
|
698 sub resKappa {
|
|
699 my $self = shift;
|
|
700 my @args = @_;
|
|
701 my $dssp_key = $self->_toDsspKey( @args );
|
|
702 return $self->{ 'Res' }->[ $dssp_key ]->{ 'kappa' };
|
|
703 }
|
|
704
|
|
705
|
|
706 =head2 resAlpha
|
|
707
|
|
708 Title : resAlpha
|
|
709 Usage : returns alpha angle around this residue
|
|
710 Function : accessor
|
|
711 Example : $alpha = $dssp_obj->resAlpha( RESIDUE_ID )
|
|
712 Returns : scalar
|
|
713 Args : RESIDUE_ID ( dssp or PDB )
|
|
714
|
|
715
|
|
716 =cut
|
|
717
|
|
718 sub resAlpha {
|
|
719 my $self = shift;
|
|
720 my @args = @_;
|
|
721 my $dssp_key = $self->_toDsspKey( @args );
|
|
722 return $self->{ 'Res' }->[ $dssp_key ]->{ 'alpha' };
|
|
723 }
|
|
724
|
|
725 # VALUE ADDED METHODS (NOT JUST PARSE/REPORT)
|
|
726
|
|
727 =head2 secBounds
|
|
728
|
|
729 Title : secBounds
|
|
730 Usage : gets residue ids of boundary residues in each
|
|
731 contiguous secondary structural element of specified
|
|
732 chain
|
|
733 Function : returns pointer to array of 3 element arrays. First
|
|
734 two elements are the PDB IDs of the start and end points,
|
|
735 respectively and inclusively. The last element is the
|
|
736 DSSP secondary structural assignment code,
|
|
737 i.e. one of : ('B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ')
|
|
738 Example : $ss_elements_pts = $dssp_obj->secBounds( 'A' );
|
|
739 Returns : pointer to array of arrays
|
|
740 Args : chain id ( 'A', for example ). No arg => no chain id
|
|
741
|
|
742
|
|
743 =cut
|
|
744
|
|
745 sub secBounds {
|
|
746 my $self = shift;
|
|
747 my $chain = shift;
|
|
748 my %sec_bounds;
|
|
749
|
|
750 $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' );
|
|
751
|
|
752 # if we've memoized this chain, use that
|
|
753 if ( $self->{ 'SecBounds' } ) {
|
|
754 # check to make sure chain is valid
|
|
755 if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) {
|
|
756 $self->throw( "No such chain: $chain\n" );
|
|
757 }
|
|
758 return $self->{ 'SecBounds' }->{ $chain };
|
|
759 }
|
|
760
|
|
761 my ( $cur_element, $i, $cur_chain, $beg, );
|
|
762
|
|
763 #initialize
|
|
764 $cur_element = $self->{ 'Res' }->[ 1 ]->{ 'ss_summary' };
|
|
765 $beg = 1;
|
|
766
|
|
767 for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) {
|
|
768 if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) {
|
|
769 # element is terminated by a chain discontinuity
|
|
770 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
|
|
771 [ $self->_toPdbId( $beg ),
|
|
772 $self->_toPdbId( $i - 1 ),
|
|
773 $cur_element ] );
|
|
774 $i++;
|
|
775 $beg = $i;
|
|
776 $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' };
|
|
777 }
|
|
778
|
|
779 elsif ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ne $cur_element ) {
|
|
780 # element is terminated by beginning of a new element
|
|
781 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
|
|
782 [ $self->_toPdbId( $beg ),
|
|
783 $self->_toPdbId( $i - 1 ),
|
|
784 $cur_element ] );
|
|
785 $beg = $i;
|
|
786 $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' };
|
|
787 }
|
|
788 }
|
|
789 #last residue
|
|
790 if ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } eq $cur_element ) {
|
|
791 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
|
|
792 [ $self->_toPdbId( $beg ),
|
|
793 $self->_toPdbId( $i ),
|
|
794 $cur_element ] );
|
|
795 }
|
|
796
|
|
797 else {
|
|
798 push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
|
|
799 [ $self->_toPdbId( $beg ),
|
|
800 $self->_toPdbId( $i - 1 ),
|
|
801 $cur_element ] );
|
|
802 push( @{ $sec_bounds{ $self->_pdbChain( $i ) } },
|
|
803 [ $self->_toPdbId( $i ),
|
|
804 $self->_toPdbId( $i ),
|
|
805 $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ] );
|
|
806 }
|
|
807
|
|
808 $self->{ 'SecBounds' } = \%sec_bounds;
|
|
809
|
|
810 # check to make sure chain is valid
|
|
811 if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) {
|
|
812 $self->throw( "No such chain: $chain\n" );
|
|
813 }
|
|
814
|
|
815 return $self->{ 'SecBounds' }->{ $chain };
|
|
816 }
|
|
817
|
|
818
|
|
819
|
|
820 =head2 chains
|
|
821
|
|
822 Title : chains
|
|
823 Usage : returns pointer to array of chain I.D.s (characters)
|
|
824 Function :
|
|
825 Example : $chains_pnt = $dssp_obj->chains();
|
|
826 Returns : array of characters, one of which may be ' '
|
|
827 Args : none
|
|
828
|
|
829
|
|
830 =cut
|
|
831
|
|
832 sub chains {
|
|
833 my $self = shift;
|
|
834 my $cont_segs = $self->_contSegs();
|
|
835 my %chains;
|
|
836 my $seg;
|
|
837 foreach $seg ( @{ $cont_segs } ) {
|
|
838 $chains{ $seg->[ 2 ] } = 1;
|
|
839 }
|
|
840 my @chains = keys( %chains );
|
|
841 return \@chains;
|
|
842 }
|
|
843
|
|
844
|
|
845 =head2 getSeq
|
|
846
|
|
847 Title : getSeq
|
|
848 Usage : returns a Bio::PrimarySeq object which represents a good
|
|
849 guess at the sequence of the given chain
|
|
850 Function : For most chains of most entries, the sequence returned by
|
|
851 this method will be very good. However, it is inherently
|
|
852 unsafe to rely on DSSP to extract sequence information about
|
|
853 a PDB entry. More reliable information can be obtained from
|
|
854 the PDB entry itself.
|
|
855 Example : $pso = $dssp_obj->getSeq( 'A' );
|
|
856 Returns : (pointer to) a PrimarySeq object
|
|
857 Args : Chain identifier. If none given, ' ' is assumed. If no ' '
|
|
858 chain, the first chain is used.
|
|
859
|
|
860
|
|
861 =cut
|
|
862
|
|
863 sub getSeq {
|
|
864 my $self = shift;
|
|
865 my $chain = shift;
|
|
866
|
|
867 my ( $pot_chain,
|
|
868 $seq,
|
|
869 $frag_num,
|
|
870 $frag,
|
|
871 $curPdbNum,
|
|
872 $lastPdbNum,
|
|
873 $gap_len,
|
|
874 $i,
|
|
875 $id,
|
|
876 );
|
|
877 my @frags;
|
|
878
|
|
879 if ( !( $chain ) ) {
|
|
880 $chain = ' ';
|
|
881 }
|
|
882
|
|
883 if ( $self->{ 'Seq' }->{ $chain } ) {
|
|
884 return $self->{ 'Seq' }->{ $chain };
|
|
885 }
|
|
886
|
|
887 my $contSegs_pnt = $self->_contSegs();
|
|
888
|
|
889 # load up specified chain
|
|
890 foreach $pot_chain ( @{ $contSegs_pnt } ) {
|
|
891 if ( $pot_chain->[ 2 ] eq $chain ) {
|
|
892 push( @frags, $pot_chain );
|
|
893 }
|
|
894 }
|
|
895
|
|
896 # if that didn't work, just get the first one
|
|
897 if ( !( @frags ) ) {
|
|
898 $chain = $contSegs_pnt->[ 0 ]->[ 2 ];
|
|
899 foreach $pot_chain ( @{ $contSegs_pnt } ) {
|
|
900 if ( $pot_chain->[ 2 ] eq $chain ) {
|
|
901 push( @frags, $chain );
|
|
902 }
|
|
903 }
|
|
904 }
|
|
905
|
|
906 # now build the sequence string
|
|
907 $seq = "";
|
|
908 $frag_num = 0;
|
|
909 foreach $frag ( @frags ) {
|
|
910 $frag_num++;
|
|
911 if ( $frag_num > 1 ) { # we need to put in some gap seq
|
|
912 $curPdbNum = $self->_pdbNum( $frag->[ 0 ] );
|
|
913 $gap_len = $curPdbNum - $lastPdbNum - 1;
|
|
914 if ( $gap_len > 0 ) {
|
|
915 $seq .= 'u' x $gap_len;
|
|
916 }
|
|
917 else {
|
|
918 $seq .= 'u';
|
|
919 }
|
|
920 }
|
|
921 for ( $i = $frag->[ 0 ]; $i <= $frag->[ 1 ]; $i++ ) {
|
|
922 $seq .= $self->_resAA( $i );
|
|
923 }
|
|
924 $lastPdbNum = $self->_pdbNum( $i - 1 );
|
|
925 }
|
|
926
|
|
927
|
|
928
|
|
929 $id = $self->pdbID();
|
|
930 $id .= ":$chain";
|
|
931
|
|
932 $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new ( -seq => $seq,
|
|
933 -id => $id,
|
|
934 -moltype => 'protein'
|
|
935 );
|
|
936 return $self->{ 'Seq' }->{ $chain };
|
|
937 }
|
|
938
|
|
939 =head1 INTERNAL METHODS
|
|
940
|
|
941
|
|
942 =cut
|
|
943
|
|
944 =head2 _pdbChain
|
|
945
|
|
946 Title : _pdbChain
|
|
947 Usage : returns the pdb chain id of given residue
|
|
948 Function :
|
|
949 Example : $chain_id = $dssp_obj->pdbChain( DSSP_KEY );
|
|
950 Returns : scalar
|
|
951 Args : DSSP_KEY ( dssp or pdb )
|
|
952
|
|
953
|
|
954 =cut
|
|
955
|
|
956 sub _pdbChain {
|
|
957 my $self = shift;
|
|
958 my $dssp_key = shift;
|
|
959 return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_chain' };
|
|
960 }
|
|
961
|
|
962 =head2 _resAA
|
|
963
|
|
964 Title : _resAA
|
|
965 Usage : fetches the 1 char amino acid code, given a dssp id
|
|
966 Function :
|
|
967 Example : $aa = $dssp_obj->_resAA( dssp_id );
|
|
968 Returns : 1 character scalar string
|
|
969 Args : dssp_id
|
|
970
|
|
971
|
|
972 =cut
|
|
973
|
|
974 sub _resAA {
|
|
975 my $self = shift;
|
|
976 my $dssp_key = shift;
|
|
977 return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' };
|
|
978 }
|
|
979
|
|
980
|
|
981 =head2 _pdbNum
|
|
982
|
|
983 Title : _pdbNum
|
|
984 Usage : fetches the numeric portion of the identifier for a given
|
|
985 residue as reported by the pdb entry. Note, this DOES NOT
|
|
986 uniquely specify a residue. There may be an insertion code
|
|
987 and/or chain identifier differences.
|
|
988 Function :
|
|
989 Example : $pdbNum = $self->_pdbNum( DSSP_ID );
|
|
990 Returns : a scalar
|
|
991 Args : DSSP_ID
|
|
992
|
|
993
|
|
994 =cut
|
|
995
|
|
996 sub _pdbNum {
|
|
997 my $self = shift;
|
|
998 my $dssp_key = shift;
|
|
999 return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_resnum' };
|
|
1000 }
|
|
1001
|
|
1002 =head2 _pdbInsCo
|
|
1003
|
|
1004 Title : _pdbInsCo
|
|
1005 Usage : fetches the Insertion Code for this residue, if it has one.
|
|
1006 Function :
|
|
1007 Example : $pdbNum = $self->_pdbInsCo( DSSP_ID );
|
|
1008 Returns : a scalar
|
|
1009 Args : DSSP_ID
|
|
1010
|
|
1011
|
|
1012 =cut
|
|
1013
|
|
1014 sub _pdbInsCo {
|
|
1015 my $self = shift;
|
|
1016 my $dssp_key = shift;
|
|
1017 return $self->{ 'Res' }->[ $dssp_key ]->{ 'insertionco' };
|
|
1018 }
|
|
1019
|
|
1020 =head2 _toPdbId
|
|
1021
|
|
1022 Title : _toPdbId
|
|
1023 Usage : Takes a dssp key and builds the corresponding
|
|
1024 PDB identifier string
|
|
1025 Function :
|
|
1026 Example : $pdbId = $self->_toPdbId( DSSP_ID );
|
|
1027 Returns : scalar
|
|
1028 Args : DSSP_ID
|
|
1029
|
|
1030 =cut
|
|
1031
|
|
1032 sub _toPdbId {
|
|
1033 my $self = shift;
|
|
1034 my $dssp_key = shift;
|
|
1035 my $pdbId = ( $self->_pdbNum( $dssp_key ).
|
|
1036 $self->_pdbInsCo( $dssp_key ) );
|
|
1037 my $chain = $self->_pdbChain( $dssp_key );
|
|
1038 $pdbId = "$pdbId:$chain" if $chain;
|
|
1039 return $pdbId;
|
|
1040 }
|
|
1041
|
|
1042 =head2 _contSegs
|
|
1043
|
|
1044 Title : _contSegs
|
|
1045 Usage : find the endpoints of continuous regions of this structure
|
|
1046 Function : returns pointer to array of 3 element array.
|
|
1047 Elements are the dssp keys of the start and end points of each
|
|
1048 continuous element and its PDB chain id (may be blank).
|
|
1049 Note that it is common to have several
|
|
1050 continuous elements with the same chain id. This occurs
|
|
1051 when an internal region is disordered and no structural
|
|
1052 information is available.
|
|
1053 Example : $cont_seg_ptr = $dssp_obj->_contSegs();
|
|
1054 Returns : pointer to array of arrays
|
|
1055 Args : none
|
|
1056
|
|
1057
|
|
1058 =cut
|
|
1059
|
|
1060 sub _contSegs {
|
|
1061 my $self = shift;
|
|
1062 if ( $self->{ 'contSegs' } ) {
|
|
1063 return $self->{ 'contSegs' };
|
|
1064 }
|
|
1065 else {
|
|
1066 # first time, so make contSegs
|
|
1067 my ( $cur_chain, $i, $beg );
|
|
1068 my @contSegs;
|
|
1069 #initialize
|
|
1070 $cur_chain = $self->_pdbChain( 1 );
|
|
1071 $beg = 1;
|
|
1072 #internal residues
|
|
1073 for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) {
|
|
1074 if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) {
|
|
1075 push( @contSegs, [ $beg, $i - 1, $cur_chain ] );
|
|
1076 $beg = $i + 1;
|
|
1077 $cur_chain = $self->_pdbChain( $i + 1 );
|
|
1078 }
|
|
1079 }
|
|
1080 # last residue must be the end of a chain
|
|
1081 push( @contSegs, [ $beg, $i, $cur_chain ] );
|
|
1082
|
|
1083 $self->{ 'contSegs' } = \@contSegs;
|
|
1084 return $self->{ 'contSegs' };
|
|
1085 }
|
|
1086 }
|
|
1087
|
|
1088 =head2 _numResLines
|
|
1089
|
|
1090 Title : _numResLines
|
|
1091 Usage : returns the total number of residue lines in this
|
|
1092 dssp file.
|
|
1093 This number is DIFFERENT than the number of residues in
|
|
1094 the pdb file because dssp has chain termination and chain
|
|
1095 discontinuity 'residues'.
|
|
1096 Function :
|
|
1097 Example : $num_res = $dssp_obj->_numResLines();
|
|
1098 Returns : scalar int
|
|
1099 Args : none
|
|
1100
|
|
1101
|
|
1102 =cut
|
|
1103
|
|
1104 sub _numResLines {
|
|
1105 my $self = shift;
|
|
1106 return ( $#{$self->{ 'Res' }} );
|
|
1107 }
|
|
1108
|
|
1109 =head2 _toDsspKey
|
|
1110
|
|
1111 Title : _toDsspKey
|
|
1112 Usage : returns the unique dssp integer key given a pdb residue id.
|
|
1113 All accessor methods require (internally)
|
|
1114 the dssp key. This method is very useful in converting
|
|
1115 pdb keys to dssp keys so the accessors can accept pdb keys
|
|
1116 as argument. PDB Residue IDs are inherently
|
|
1117 problematic since they have multiple parts of
|
|
1118 overlapping function and ill-defined or observed
|
|
1119 convention in form. Input can be in any of the formats
|
|
1120 described in the DESCRIPTION section above.
|
|
1121 Function :
|
|
1122 Example : $dssp_id = $dssp_obj->_pdbKeyToDsspKey( '10B:A' )
|
|
1123 Returns : scalar int
|
|
1124 Args : pdb residue identifier: num[insertion code]:[chain]
|
|
1125
|
|
1126
|
|
1127 =cut
|
|
1128
|
|
1129 sub _toDsspKey {
|
|
1130 # Consider adding lookup table for 'common' name (like 20:A) for
|
|
1131 # fast access. Could be built during parse of input.
|
|
1132
|
|
1133 my $self = shift;
|
|
1134 my $arg_str;
|
|
1135
|
|
1136 my ( $key_num, $chain_id, $ins_code );
|
|
1137
|
|
1138 # check to see how many args are given
|
|
1139 if ( $#_ > 1 ) { # multiple args
|
|
1140 $key_num = shift;
|
|
1141 if ( $#_ > 1 ) { # still multiple args => ins. code, too
|
|
1142 $ins_code = shift;
|
|
1143 $chain_id = shift;
|
|
1144 }
|
|
1145 else { # just one more arg. => chain_id
|
|
1146 $chain_id = shift;
|
|
1147 }
|
|
1148 }
|
|
1149 else { # only single arg. Might be number or string
|
|
1150 $arg_str = shift;
|
|
1151 if ( $arg_str =~ /:/ ) {
|
|
1152 # a chain is specified
|
|
1153 ( $chain_id ) = ( $arg_str =~ /:(.)/);
|
|
1154 $arg_str =~ s/:.//;
|
|
1155 }
|
|
1156 if ( $arg_str =~ /[A-Z]|[a-z]/ ) {
|
|
1157 # an insertion code is specified
|
|
1158 ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ );
|
|
1159 $arg_str =~ s/[A-Z]|[a-z]//g;
|
|
1160 }
|
|
1161 #now, get the number bit-> everything still around
|
|
1162 $key_num = $arg_str;
|
|
1163 }
|
|
1164
|
|
1165 # Now find the residue which fits this description. Linear search is
|
|
1166 # probably not the best way to do this, but oh well...
|
|
1167 for ( my $i = 1; $i <= $self->_numResLines(); $i++ ) {
|
|
1168 if ( $key_num == $self->{'Res'}->[$i]->{'pdb_resnum'} ) {
|
|
1169 if ( $chain_id ) { # if a chain was specified
|
|
1170 if ( $chain_id eq $self->{'Res'}->[$i]->{'pdb_chain'} ) {
|
|
1171 # and it's the right one
|
|
1172 if ( $ins_code ) { # if insertion code was specified
|
|
1173 if ( $ins_code eq $self->{'Res'}->[$i]->{'insertionco'} ) {
|
|
1174 # and it's the right one
|
|
1175 return $i;
|
|
1176 }
|
|
1177 }
|
|
1178 else { # no isertion code specified, this is it
|
|
1179 return $i;
|
|
1180 }
|
|
1181 }
|
|
1182 }
|
|
1183 else { # no chain was specified
|
|
1184 return $i;
|
|
1185 }
|
|
1186 }
|
|
1187 }
|
|
1188 $self->throw( "PDB key not found." );
|
|
1189 }
|
|
1190
|
|
1191 =head2 _parse
|
|
1192
|
|
1193 Title : _parse
|
|
1194 Usage : parses dssp output
|
|
1195 Function :
|
|
1196 Example : used by the constructor
|
|
1197 Returns :
|
|
1198 Args : input source ( handled by Bio::Root:IO )
|
|
1199
|
|
1200
|
|
1201 =cut
|
|
1202
|
|
1203 sub _parse {
|
|
1204 my $self = shift;
|
|
1205 my $file = shift;
|
|
1206 my $cur;
|
|
1207 my $current_chain;
|
|
1208 my ( @elements, @hbond );
|
|
1209 my ( %head, %his, );
|
|
1210 my $element;
|
|
1211 my $res_num;
|
|
1212
|
|
1213 $cur = <$file>;
|
|
1214 unless ( $cur =~ /^==== Secondary Structure Definition/ ) {
|
|
1215 $self->throw( "Not dssp output" );
|
|
1216 return;
|
|
1217 }
|
|
1218
|
|
1219 $cur = <$file>;
|
|
1220 ( $element ) = ( $cur =~ /^REFERENCE\s+(.+?)\s+\./ );
|
|
1221 $head{ 'REFERENCE' } = $element;
|
|
1222
|
|
1223 $cur = <$file>;
|
|
1224 @elements = split( /\s+/, $cur );
|
|
1225 pop( @elements ); # take off that annoying period
|
|
1226 $head{ 'PDB' } = pop( @elements );
|
|
1227 $head{ 'DATE' } = pop( @elements );
|
|
1228 # now, everything else is "header" except for the word
|
|
1229 # HEADER
|
|
1230 shift( @elements );
|
|
1231 $element = shift( @elements );
|
|
1232 while ( @elements ) {
|
|
1233 $element = $element." ".shift( @elements );
|
|
1234 }
|
|
1235 $head{ 'HEADER' } = $element;
|
|
1236
|
|
1237 $cur = <$file>;
|
|
1238 ($element) = ( $cur =~ /^COMPND\s+(.+?)\s+\./ );
|
|
1239 $head{ 'COMPND' } = $element;
|
|
1240
|
|
1241 $cur = <$file>;
|
|
1242 ($element) = ( $cur =~ /^PDBSOURCE\s+(.+?)\s+\./ );
|
|
1243 $head{ 'SOURCE' } = $element;
|
|
1244
|
|
1245 $cur = <$file>;
|
|
1246 ($element) = ( $cur =~ /^AUTHOR\s+(.+?)\s+/ );
|
|
1247 $head{ 'AUTHOR' } = $element;
|
|
1248
|
|
1249 $cur = <$file>;
|
|
1250 @elements = split( /\s+/, $cur );
|
|
1251 shift( @elements );
|
|
1252 $head{ 'TotNumRes' } = shift( @elements );
|
|
1253 $head{ 'NumChain' } = shift( @elements );
|
|
1254 $head{ 'TotSSBr' } = shift( @elements );
|
|
1255 $head{ 'TotIaSSBr' } = shift( @elements );
|
|
1256 $head{ 'TotIeSSBr' } = shift( @elements );
|
|
1257
|
|
1258 $cur = <$file>;
|
|
1259 ( $element ) = ( $cur =~ /\s*(\d+\.\d*)\s+ACCESSIBLE SURFACE OF PROTEIN/ );
|
|
1260 $head{ 'ProAccSurf' } = $element;
|
|
1261 $self->{ 'Head' } = \%head;
|
|
1262
|
|
1263 for ( my $i = 1; $i <= 14; $i++ ) {
|
|
1264 $cur = <$file>;
|
|
1265 ( $element ) =
|
|
1266 $cur =~ /\s*(\d+)\s+\d+\.\d+\s+TOTAL NUMBER OF HYDROGEN/;
|
|
1267 push( @hbond, $element );
|
|
1268 # $hbond{ $hBondType } = $element;
|
|
1269 }
|
|
1270 $self->{ 'HBond' } = \@hbond;
|
|
1271
|
|
1272 my $histogram_finished = 0;
|
|
1273 while ( !($histogram_finished) && chomp( $cur = <$file> ) ) {
|
|
1274 if ( $cur =~ /RESIDUE AA STRUCTURE/ ) {
|
|
1275 $histogram_finished = 1;
|
|
1276 }
|
|
1277 }
|
|
1278
|
|
1279 while ( chomp( $cur = <$file> ) ) {
|
|
1280 $res_num = substr( $cur, 0, 5 );
|
|
1281 $res_num =~ s/\s//g;
|
|
1282 $self->{ 'Res' }->[ $res_num ] = &_parseResLine( $cur );
|
|
1283 }
|
|
1284 }
|
|
1285
|
|
1286
|
|
1287 =head2 _parseResLine
|
|
1288
|
|
1289 Title : _parseResLine
|
|
1290 Usage : parses a single residue line
|
|
1291 Function :
|
|
1292 Example : used internally
|
|
1293 Returns :
|
|
1294 Args : residue line ( string )
|
|
1295
|
|
1296
|
|
1297 =cut
|
|
1298
|
|
1299 sub _parseResLine() {
|
|
1300 my $cur = shift;
|
|
1301 my ( $feat, $value );
|
|
1302 my %elements;
|
|
1303
|
|
1304 foreach $feat ( keys %lookUp ) {
|
|
1305 $value = substr( $cur, $lookUp{ $feat }->[0],
|
|
1306 $lookUp{ $feat }->[1] );
|
|
1307 $value =~ s/\s//g;
|
|
1308 $elements{$feat} = $value ;
|
|
1309 }
|
|
1310
|
|
1311 # if no chain id, make it '-' (like STRIDE...very convenient)
|
|
1312 if ( !( $elements{ 'pdb_chain' } ) || $elements{ 'pdb_chain'} eq ' ' ) {
|
|
1313 $elements{ 'pdb_chain' } = '-';
|
|
1314 }
|
|
1315 return \%elements;
|
|
1316 }
|
|
1317
|
|
1318 return 1; #just because
|
|
1319
|