0
|
1 # $id $
|
|
2 #
|
|
3 # bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm
|
|
4 #
|
|
5 # Cared for by Ed Green <ed@compbio.berkeley.edu>
|
|
6 #
|
|
7 # Copyright Univ. of California
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12 =head1 NAME
|
|
13
|
|
14 Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output
|
|
15
|
|
16 =head1 SYNOPSIS
|
|
17
|
|
18 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-file' => 'filename.stride' );
|
|
19
|
|
20 # or
|
|
21
|
|
22 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-fh' => \*STDOUT );
|
|
23
|
|
24 # Get secondary structure assignment for PDB residue 20 of chain A
|
|
25 $sec_str = $stride_obj->resSecStr( '20:A' );
|
|
26
|
|
27 # same
|
|
28 $sec_str = $stride_obj->resSecStr( 20, 'A' )
|
|
29
|
|
30 =head1 DESCRIPTION
|
|
31
|
|
32 STRIDE::Res is a module for objectifying STRIDE output. STRIDE is a
|
|
33 program (similar to DSSP) for assigning secondary structure to
|
|
34 individual residues of a pdb structure file.
|
|
35
|
|
36 ( Knowledge-Based Protein Secondary Structure Assignment,
|
|
37 PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) )
|
|
38
|
|
39 STRIDE is available here:
|
|
40 http://www.embl-heidelberg.de/argos/stride/down_stride.html
|
|
41
|
|
42 Methods are then available for extracting all of the infomation
|
|
43 present within the output or convenient subsets of it.
|
|
44
|
|
45 Although they are very similar in function, DSSP and STRIDE differ
|
|
46 somewhat in output format. Thes differences are reflected in the
|
|
47 return value of some methods of these modules. For example, both
|
|
48 the STRIDE and DSSP parsers have resSecStr() methods for returning
|
|
49 the secondary structure of a given residue. However, the range of
|
|
50 return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the
|
|
51 range of values for STRIDE is ( H, G, I, E, B, b, T, and C ). See
|
|
52 individual methods for details.
|
|
53
|
|
54 The methods are roughly divided into 3 sections:
|
|
55
|
|
56 1. Global features of this structure (PDB ID, total surface area,
|
|
57 etc.). These methods do not require an argument.
|
|
58 2. Residue specific features ( amino acid, secondary structure,
|
|
59 solvent exposed surface area, etc. ). These methods do require an
|
|
60 arguement. The argument is supposed to uniquely identify a
|
|
61 residue described within the structure. It can be of any of the
|
|
62 following forms:
|
|
63 ('#A:B') or ( #, 'A', 'B' )
|
|
64 || |
|
|
65 || - Chain ID (blank for single chain)
|
|
66 |--- Insertion code for this residue. Blank for most residues.
|
|
67 |--- Numeric portion of residue ID.
|
|
68
|
|
69 (#)
|
|
70 |
|
|
71 --- Numeric portion of residue ID. If there is only one chain and
|
|
72 it has no ID AND there is no residue with an insertion code at this
|
|
73 number, then this can uniquely specify a residue.
|
|
74
|
|
75 ('#:C') or ( #, 'C' )
|
|
76 | |
|
|
77 | -Chain ID
|
|
78 ---Numeric portion of residue ID.
|
|
79
|
|
80 If a residue is incompletely specified then the first residue that
|
|
81 fits the arguments is returned. For example, if 19 is the argument
|
|
82 and there are three chains, A, B, and C with a residue whose number
|
|
83 is 19, then 19:A will be returned (assuming its listed first).
|
|
84
|
|
85 Since neither DSSP nor STRIDE correctly handle alt-loc codes, they
|
|
86 are not supported by these modules.
|
|
87
|
|
88 3. Value-added methods. Return values are not verbatem strings
|
|
89 parsed from DSSP or STRIDE output.
|
|
90
|
|
91 =head1 FEEDBACK
|
|
92
|
|
93 =head2 MailingLists
|
|
94
|
|
95 UsUser feedback is an integral part of the evolution of this and other
|
|
96 Bioperl modules. Send your comments and suggestions preferably to one
|
|
97 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
98
|
|
99 bioperl-l@bioperl.org - General discussion
|
|
100 http://bio.perl.org/MailList.html - About the mailing lists
|
|
101
|
|
102 =head2 Reporting Bugs
|
|
103
|
|
104 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
105 the bugs and their resolution. Bug reports can be submitted via email
|
|
106 or the web:
|
|
107
|
|
108 bioperl-bugs@bio.perl.org
|
|
109 http://bugzilla.bioperl.org/
|
|
110
|
|
111 =head1 AUTHOR - Ed Green
|
|
112
|
|
113 Email ed@compbio.berkeley.edu
|
|
114
|
|
115
|
|
116 =head1 APPENDIX
|
|
117
|
|
118 The Rest of the documentation details each method.
|
|
119 Internal methods are preceded with a _.
|
|
120
|
|
121
|
|
122 =cut
|
|
123
|
|
124 package Bio::Structure::SecStr::STRIDE::Res;
|
|
125 use strict;
|
|
126 use vars qw(@ISA);
|
|
127 use Bio::Root::Root;
|
|
128 use Bio::Root::IO;
|
|
129 use Bio::PrimarySeq;
|
|
130
|
|
131 @ISA = qw(Bio::Root::Root);
|
|
132
|
|
133 our %ASGTable = ( 'aa' => 0,
|
|
134 'resNum' => 1,
|
|
135 'ssAbbr' => 2,
|
|
136 'ssName' => 3,
|
|
137 'phi' => 4,
|
|
138 'psi' => 5,
|
|
139 'surfArea' => 6 );
|
|
140
|
|
141 our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N',
|
|
142 'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q',
|
|
143 'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H',
|
|
144 'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K',
|
|
145 'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P',
|
|
146 'SER' => 'S', 'THR' => 'T', 'TRP' => 'W',
|
|
147 'TYR' => 'Y', 'VAL' => 'V' );
|
|
148
|
|
149 =head2 new
|
|
150
|
|
151 Title : new
|
|
152 Usage : makes new object of this class
|
|
153 Function : Constructor
|
|
154 Example : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' => filename
|
|
155 # or
|
|
156 '-fh' => FILEHANDLE )
|
|
157 Returns : object (ref)
|
|
158 Args : filename or filehandle( must be proper STRIDE output )
|
|
159
|
|
160 =cut
|
|
161
|
|
162 sub new {
|
|
163 my ( $class, @args ) = @_;
|
|
164 my $self = $class->SUPER::new( @args );
|
|
165 my $io = Bio::Root::IO->new( @args );
|
|
166 $self->_parse( $io ); # not passing filehandle !
|
|
167 $io->close();
|
|
168 return $self;
|
|
169 }
|
|
170
|
|
171 # GLOBAL FEATURES / INFO / STATS
|
|
172
|
|
173 =head2 totSurfArea
|
|
174
|
|
175 Title : totSurfArea
|
|
176 Usage : returns sum of surface areas of all residues of all
|
|
177 chains considered. Result is memoized.
|
|
178 Function :
|
|
179 Example : $tot_SA = $stride_obj->totSurfArea();
|
|
180 Returns : scalar
|
|
181 Args : none
|
|
182
|
|
183
|
|
184 =cut
|
|
185
|
|
186 sub totSurfArea {
|
|
187 my $self = shift;
|
|
188 my $total = 0;
|
|
189 my ( $chain, $res );
|
|
190
|
|
191 if ( $self->{ 'SurfArea' } ) {
|
|
192 return $self->{ 'SurfArea' };
|
|
193 }
|
|
194 else {
|
|
195 foreach $chain ( keys %{$self->{ 'ASG' }} ) {
|
|
196 for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
|
|
197 $total +=
|
|
198 $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}];
|
|
199 }
|
|
200 }
|
|
201 }
|
|
202
|
|
203 $self->{ 'SurfArea' } = $total;
|
|
204 return $self->{ 'SurfArea' };
|
|
205
|
|
206 }
|
|
207
|
|
208 =head2 numResidues
|
|
209
|
|
210 Title : numResidues
|
|
211 Usage : returns total number of residues in all chains or
|
|
212 just the specified chain
|
|
213 Function :
|
|
214 Example : $tot_res = $stride_obj->numResidues();
|
|
215 Returns : scalar int
|
|
216 Args : none or chain id
|
|
217
|
|
218
|
|
219 =cut
|
|
220
|
|
221 sub numResidues {
|
|
222 my $self = shift;
|
|
223 my $chain = shift;
|
|
224 my $total = 0;
|
|
225 my $key;
|
|
226 foreach $key ( keys %{$self->{ 'ASG' }} ) {
|
|
227 if ( $chain ) {
|
|
228 if ( $key eq $chain ) {
|
|
229 $total += $#{$self->{ 'ASG' }{ $key }};
|
|
230 }
|
|
231 }
|
|
232 else {
|
|
233 $total += $#{$self->{ 'ASG' }{ $key }};
|
|
234 }
|
|
235 }
|
|
236 return $total;
|
|
237 }
|
|
238
|
|
239 # STRAIGHT FROM THE PDB ENTRY
|
|
240
|
|
241 =head2 pdbID
|
|
242
|
|
243 Title : pdbID
|
|
244 Usage : returns pdb identifier ( 1FJM, e.g. )
|
|
245 Function :
|
|
246 Example : $pdb_id = $stride_obj->pdbID();
|
|
247 Returns : scalar string
|
|
248 Args : none
|
|
249
|
|
250
|
|
251 =cut
|
|
252
|
|
253 sub pdbID {
|
|
254 my $self = shift;
|
|
255 return $self->{ 'PDB' };
|
|
256 }
|
|
257 =head2 pdbAuthor
|
|
258
|
|
259 Title : pdbAuthor
|
|
260 Usage : returns author of this PDB entry
|
|
261 Function :
|
|
262 Example : $auth = $stride_obj->pdbAuthor()
|
|
263 Returns : scalar string
|
|
264 Args : none
|
|
265
|
|
266
|
|
267 =cut
|
|
268
|
|
269 sub pdbAuthor {
|
|
270 my $self = shift;
|
|
271 return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } );
|
|
272 }
|
|
273
|
|
274 =head2 pdbCompound
|
|
275
|
|
276 Title : pdbCompound
|
|
277 Usage : returns string of what was found on the
|
|
278 CMP lines
|
|
279 Function :
|
|
280 Example : $cmp = $stride_obj->pdbCompound();
|
|
281 Returns : string
|
|
282 Args : none
|
|
283
|
|
284
|
|
285 =cut
|
|
286
|
|
287 sub pdbCompound {
|
|
288 my $self = shift;
|
|
289 return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } );
|
|
290 }
|
|
291
|
|
292 =head2 pdbDate
|
|
293
|
|
294 Title : pdbDate
|
|
295 Usage : returns date given in PDB file
|
|
296 Function :
|
|
297 Example : $pdb_date = $stride_obj->pdbDate();
|
|
298 Returns : scalar
|
|
299 Args : none
|
|
300
|
|
301
|
|
302 =cut
|
|
303
|
|
304 sub pdbDate {
|
|
305 my $self = shift;
|
|
306 return $self->{ 'DATE' };
|
|
307 }
|
|
308
|
|
309 =head2 pdbHeader
|
|
310
|
|
311 Title : pdbHeader
|
|
312 Usage : returns string of characters found on the PDB header line
|
|
313 Function :
|
|
314 Example : $head = $stride_obj->pdbHeader();
|
|
315 Returns : scalar
|
|
316 Args : none
|
|
317
|
|
318
|
|
319 =cut
|
|
320
|
|
321 sub pdbHeader {
|
|
322 my $self = shift;
|
|
323 return $self->{ 'HEAD' }->{ 'HEADER' };
|
|
324 }
|
|
325
|
|
326 =head2 pdbSource
|
|
327
|
|
328 Title : pdbSource
|
|
329 Usage : returns string of what was found on SRC lines
|
|
330 Function :
|
|
331 Example : $src = $stride_obj->pdbSource();
|
|
332 Returns : scalar
|
|
333 Args : none
|
|
334
|
|
335
|
|
336 =cut
|
|
337
|
|
338 sub pdbSource {
|
|
339 my $self = shift;
|
|
340 return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } );
|
|
341 }
|
|
342
|
|
343 # RESIDUE SPECIFIC ACCESSORS
|
|
344
|
|
345 =head2 resAA
|
|
346
|
|
347 Title : resAA
|
|
348 Usage : returns 1 letter abbr. of the amino acid specified by
|
|
349 the arguments
|
|
350 Function :
|
|
351 Examples : $aa = $stride_obj->resAA( RESIDUE_ID );
|
|
352 Returns : scalar character
|
|
353 Args : RESIDUE_ID
|
|
354
|
|
355
|
|
356 =cut
|
|
357
|
|
358 sub resAA {
|
|
359 my $self = shift;
|
|
360 my @args = @_;
|
|
361 my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
362 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
|
|
363 }
|
|
364
|
|
365 =head2 resPhi
|
|
366
|
|
367 Title : resPhi
|
|
368 Usage : returns phi angle of specified residue
|
|
369 Function :
|
|
370 Example : $phi = $stride_obj->resPhi( RESIDUE_ID );
|
|
371 Returns : scaler
|
|
372 Args : RESIDUE_ID
|
|
373
|
|
374
|
|
375 =cut
|
|
376
|
|
377 sub resPhi {
|
|
378 my $self = shift;
|
|
379 my @args = @_;
|
|
380 my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
381 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ];
|
|
382 }
|
|
383
|
|
384 =head2 resPsi
|
|
385
|
|
386 Title : resPsi
|
|
387 Usage : returns psi angle of specified residue
|
|
388 Function :
|
|
389 Example : $psi = $stride_obj->resPsi( RESIDUE_ID );
|
|
390 Returns : scalar
|
|
391 Args : RESIDUE_ID
|
|
392
|
|
393
|
|
394 =cut
|
|
395
|
|
396 sub resPsi {
|
|
397 my $self = shift;
|
|
398 my @args = @_;
|
|
399 my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
400 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ];
|
|
401 }
|
|
402
|
|
403 =head2 resSolvAcc
|
|
404
|
|
405 Title : resSolvAcc
|
|
406 Usage : returns stride calculated surface area of specified residue
|
|
407 Function :
|
|
408 Example : $sa = $stride_obj->resSolvAcc( RESIDUE_ID );
|
|
409 Returns : scalar
|
|
410 Args : RESIDUE_ID
|
|
411
|
|
412
|
|
413 =cut
|
|
414
|
|
415 sub resSolvAcc {
|
|
416 my $self = shift;
|
|
417 my @args = @_;
|
|
418 my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
419 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
|
|
420 }
|
|
421
|
|
422 =head2 resSurfArea
|
|
423
|
|
424 Title : resSurfArea
|
|
425 Usage : returns stride calculated surface area of specified residue
|
|
426 Function :
|
|
427 Example : $sa = $stride_obj->resSurfArea( RESIDUE_ID );
|
|
428 Returns : scalar
|
|
429 Args : RESIDUE_ID
|
|
430
|
|
431
|
|
432 =cut
|
|
433
|
|
434 sub resSurfArea {
|
|
435 my $self = shift;
|
|
436 my @args = @_;
|
|
437 my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
438 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
|
|
439 }
|
|
440
|
|
441 =head2 resSecStr
|
|
442
|
|
443 Title : resSecStr
|
|
444 Usage : gives one letter abbr. of stride determined secondary
|
|
445 structure of specified residue
|
|
446 Function :
|
|
447 Example : $ss = $stride_obj->resSecStr( RESIDUE_ID );
|
|
448 Returns : one of: 'H' => Alpha Helix
|
|
449 'G' => 3-10 helix
|
|
450 'I' => PI-helix
|
|
451 'E' => Extended conformation
|
|
452 'B' or 'b' => Isolated bridge
|
|
453 'T' => Turn
|
|
454 'C' => Coil
|
|
455 ' ' => None
|
|
456 # NOTE: This range is slightly DIFFERENT from the
|
|
457 # DSSP method of the same name
|
|
458 Args : RESIDUE_ID
|
|
459
|
|
460
|
|
461 =cut
|
|
462
|
|
463 sub resSecStr {
|
|
464 my $self = shift;
|
|
465 my @args = @_;
|
|
466 my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
467 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ];
|
|
468 }
|
|
469
|
|
470 =head2 resSecStrSum
|
|
471
|
|
472 Title : resSecStrSum
|
|
473 Usage : gives one letter summary of secondary structure of
|
|
474 specified residue. More general than secStruc()
|
|
475 Function :
|
|
476 Example : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID );
|
|
477 Returns : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil)
|
|
478 Args : residue identifier(s) ( SEE INTRO NOTE )
|
|
479
|
|
480
|
|
481 =cut
|
|
482
|
|
483 sub resSecStrSum {
|
|
484 my $self = shift;
|
|
485 my @args = @_;
|
|
486 my $ss_char = $self->resSecStr( @args );
|
|
487
|
|
488 if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) {
|
|
489 return 'H';
|
|
490 }
|
|
491 if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) {
|
|
492 return 'B';
|
|
493 }
|
|
494 if ( $ss_char eq 'T' ) {
|
|
495 return 'T';
|
|
496 }
|
|
497 else {
|
|
498 return 'C';
|
|
499 }
|
|
500 }
|
|
501
|
|
502 # STRIDE SPECIFIC
|
|
503
|
|
504 =head2 resSecStrName
|
|
505
|
|
506 Title : resSecStrName
|
|
507 Usage : gives full name of the secondary structural element
|
|
508 classification of the specified residue
|
|
509 Function :
|
|
510 Example : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID );
|
|
511 Returns : scalar string
|
|
512 Args : RESIDUE_ID
|
|
513
|
|
514
|
|
515 =cut
|
|
516
|
|
517 sub resSecStrName {
|
|
518 my $self = shift;
|
|
519 my @args = @_;
|
|
520 my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
521 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ];
|
|
522 }
|
|
523
|
|
524 =head2 strideLocs
|
|
525
|
|
526 Title : strideLocs
|
|
527 Usage : returns stride determined contiguous secondary
|
|
528 structural elements as specified on the LOC lines
|
|
529 Function :
|
|
530 Example : $loc_pnt = $stride_obj->strideLocs();
|
|
531 Returns : pointer to array of 5 element arrays.
|
|
532 0 => stride name of structural element
|
|
533 1 => first residue pdb key (including insertion code, if app.)
|
|
534 2 => first residue chain id
|
|
535 3 => last residue pdb key (including insertion code, if app.)
|
|
536 4 => last residue chain id
|
|
537 NOTE the differences between this range and the range of SecBounds()
|
|
538 Args : none
|
|
539
|
|
540
|
|
541 =cut
|
|
542
|
|
543 sub strideLocs {
|
|
544 my $self = shift;
|
|
545 return $self->{ 'LOC' };
|
|
546 }
|
|
547
|
|
548 # VALUE ADDED METHODS (NOT JUST PARSE/REPORT)
|
|
549
|
|
550 =head2 secBounds
|
|
551
|
|
552 Title : secBounds
|
|
553 Usage : gets residue ids of boundary residues in each
|
|
554 contiguous secondary structural element of specified
|
|
555 chain
|
|
556 Function :
|
|
557 Example : $ss_bound_pnt = $stride_obj->secBounds( 'A' );
|
|
558 Returns : pointer to array of 3 element arrays. First two elements
|
|
559 are the PDB IDs of the start and end points, respectively
|
|
560 and inclusively. The last element is the STRIDE secondary
|
|
561 structural element code (same range as resSecStr).
|
|
562 Args : chain identifier ( one character ). If none, '-' is assumed
|
|
563
|
|
564
|
|
565 =cut
|
|
566
|
|
567 sub secBounds {
|
|
568 # Requires a chain name. If left blank, we assume ' ' which equals '-'
|
|
569 my $self = shift;
|
|
570 my $chain = shift;
|
|
571 my @SecBounds;
|
|
572
|
|
573 $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' );
|
|
574
|
|
575 # if we've memoized this one, use that
|
|
576 if ( $self->{ 'SecBounds' }->{ $chain } ) {
|
|
577 return $self->{ 'SecBounds' }->{ $chain };
|
|
578 }
|
|
579
|
|
580 #check to make sure chain is valid
|
|
581 if ( !( $self->{ 'ASG' }->{ $chain } ) ) {
|
|
582 $self->throw( "No such chain: $chain\n" );
|
|
583 }
|
|
584
|
|
585 my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]->
|
|
586 [ $ASGTable{ 'ssAbbr' } ];
|
|
587 my $beg = 1;
|
|
588 my $i;
|
|
589
|
|
590 for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
|
|
591 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ]
|
|
592 ne $cur_element ) {
|
|
593 push( @SecBounds, [ $beg, $i -1 , $cur_element ] );
|
|
594 $beg = $i;
|
|
595 $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]->
|
|
596 [ $ASGTable{ 'ssAbbr' } ];
|
|
597 }
|
|
598 }
|
|
599
|
|
600 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ]
|
|
601 eq $cur_element ) {
|
|
602 push( @SecBounds, [ $beg, $i, $cur_element ] );
|
|
603 }
|
|
604 else {
|
|
605 push( @SecBounds, [ $beg, $i - 1, $cur_element ],
|
|
606 [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]->
|
|
607 [ $ASGTable{ 'ssAbbr' } ] ] );
|
|
608 }
|
|
609
|
|
610 $self->{ 'SecBounds' }->{ $chain } = \@SecBounds;
|
|
611 return $self->{ 'SecBounds' }->{ $chain };
|
|
612 }
|
|
613
|
|
614 =head2 chains
|
|
615
|
|
616 Title : chains
|
|
617 Usage : gives array chain I.D.s (characters)
|
|
618 Function :
|
|
619 Example : @chains = $stride_obj->chains();
|
|
620 Returns : array of characters
|
|
621 Args : none
|
|
622
|
|
623
|
|
624 =cut
|
|
625
|
|
626 sub chains {
|
|
627 my $self = shift;
|
|
628 my @chains = keys ( %{ $self->{ 'ASG' } } );
|
|
629 return \@chains;
|
|
630 }
|
|
631
|
|
632 =head2 getSeq
|
|
633
|
|
634 Title : getSeq
|
|
635 Usage : returns a Bio::PrimarySeq object which represents an
|
|
636 approximation at the sequence of the specified chain.
|
|
637 Function : For most chain of most entries, the sequence returned by
|
|
638 this method will be very good. However, it it inherently
|
|
639 unsafe to rely on STRIDE to extract sequence information about
|
|
640 a PDB entry. More reliable information can be obtained from
|
|
641 the PDB entry itself. If a second option is given
|
|
642 (and evaluates to true), the sequence generated will
|
|
643 have 'X' in spaces where the pdb residue numbers are
|
|
644 discontinuous. In some cases this results in a
|
|
645 better sequence object (when the discontinuity is
|
|
646 due to regions which were present, but could not be
|
|
647 resolved). In other cases, it will result in a WORSE
|
|
648 sequence object (when the discontinuity is due to
|
|
649 historical sequence numbering and all sequence is
|
|
650 actually resolved).
|
|
651 Example : $pso = $dssp_obj->getSeq( 'A' );
|
|
652 Returns : (pointer to) a PrimarySeq object
|
|
653 Args : Chain identifier. If none given, '-' is assumed.
|
|
654
|
|
655
|
|
656 =cut
|
|
657
|
|
658 sub getSeq {
|
|
659 my $self = shift;
|
|
660 my $chain = shift;
|
|
661 my $fill_in = shift;
|
|
662
|
|
663 if ( !( $chain ) ) {
|
|
664 $chain = '-';
|
|
665 }
|
|
666
|
|
667 if ( $self->{ 'Seq' }->{ $chain } ) {
|
|
668 return $self->{ 'Seq' }->{ $chain };
|
|
669 }
|
|
670
|
|
671 my ( $seq,
|
|
672 $num_res,
|
|
673 $last_res_num,
|
|
674 $cur_res_num,
|
|
675 $i,
|
|
676 $step,
|
|
677 $id
|
|
678 );
|
|
679
|
|
680 $seq = "";
|
|
681 $num_res = $self->numResidues( $chain );
|
|
682 $last_res_num = $self->_pdbNum( 1, $chain );
|
|
683 for ( $i = 1; $i <= $num_res; $i++ ) {
|
|
684 if ( $fill_in ) {
|
|
685 $cur_res_num = $self->_pdbNum( $i, $chain );
|
|
686 $step = $cur_res_num - $last_res_num;
|
|
687 if ( $step > 1 ) {
|
|
688 $seq .= 'X' x ( $step - 1 );
|
|
689 }
|
|
690 }
|
|
691 $seq .= $self->_resAA( $i, $chain );
|
|
692 $last_res_num = $cur_res_num;
|
|
693 }
|
|
694
|
|
695 $id = $self->pdbID();
|
|
696 $id .= "$chain";
|
|
697
|
|
698 $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq,
|
|
699 -id => $id,
|
|
700 -moltype => 'protein'
|
|
701 );
|
|
702
|
|
703 return $self->{ 'Seq' }->{ $chain };
|
|
704 }
|
|
705
|
|
706 =head1 INTERNAL METHODS
|
|
707
|
|
708 =head2 _pdbNum
|
|
709
|
|
710 Title : _pdbNum
|
|
711 Usage : fetches the numeric portion of the identifier for a given
|
|
712 residue as reported by the pdb entry. Note, this DOES NOT
|
|
713 uniquely specify a residue. There may be an insertion code
|
|
714 and/or chain identifier differences.
|
|
715 Function :
|
|
716 Example : $pdbNum = $self->pdbNum( 3, 'A' );
|
|
717 Returns : a scalar
|
|
718 Args : valid ordinal num / chain combination
|
|
719
|
|
720
|
|
721 =cut
|
|
722
|
|
723 sub _pdbNum {
|
|
724 my $self = shift;
|
|
725 my $ord = shift;
|
|
726 my $chain = shift;
|
|
727 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
|
|
728 $self->throw( "No such ordinal $ord in chain $chain.\n" );
|
|
729 }
|
|
730 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
|
|
731 my $num_part;
|
|
732 ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ );
|
|
733 return $num_part;
|
|
734 }
|
|
735
|
|
736 =head2 _resAA
|
|
737
|
|
738 Title : _resAA
|
|
739 Usage : returns 1 letter abbr. of the amino acid specified by
|
|
740 the arguments
|
|
741 Function :
|
|
742 Examples : $aa = $stride_obj->_resAA( 3, '-' );
|
|
743 Returns : scalar character
|
|
744 Args : ( ord. num, chain )
|
|
745
|
|
746
|
|
747 =cut
|
|
748
|
|
749 sub _resAA {
|
|
750 my $self = shift;
|
|
751 my $ord = shift;
|
|
752 my $chain = shift;
|
|
753 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
|
|
754 $self->throw( "No such ordinal $ord in chain $chain.\n" );
|
|
755 }
|
|
756 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
|
|
757 }
|
|
758
|
|
759 =head2 _pdbInsCo
|
|
760
|
|
761 Title : _pdbInsCo
|
|
762 Usage : fetches the Insertion code for this residue.
|
|
763 Function :
|
|
764 Example : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' );
|
|
765 Returns : a scalar
|
|
766 Args : ordinal number and chain
|
|
767
|
|
768
|
|
769 =cut
|
|
770
|
|
771 sub _pdbInsCo {
|
|
772 my $self = shift;
|
|
773 my $ord = shift;
|
|
774 my $chain = shift;
|
|
775 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
|
|
776 $self->throw( "No such ordinal $ord in chain $chain.\n" );
|
|
777 }
|
|
778 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
|
|
779 my $letter_part;
|
|
780 ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any
|
|
781 # non-word character(s)
|
|
782 return $letter_part;
|
|
783 }
|
|
784
|
|
785 =head2 _toOrdChain
|
|
786
|
|
787 Title : _toOrdChain
|
|
788 Usage : takes any set of residue identifying parameters and
|
|
789 wrestles them into a two element array: the chain and the ordinal
|
|
790 number of this residue. This two element array can then be
|
|
791 efficiently used as keys in many of the above accessor methods
|
|
792 ('#A:B') or ( #, 'A', 'B' )
|
|
793 || |
|
|
794 || - Chain ID (blank for single chain)
|
|
795 |--- Insertion code for this residue. Blank for most residues.
|
|
796 |--- Numeric portion of residue ID.
|
|
797
|
|
798 (#)
|
|
799 |
|
|
800 --- Numeric portion of residue ID. If there is only one chain and
|
|
801 it has no ID AND there is no residue with an insertion code at this
|
|
802 number, then this can uniquely specify a residue.
|
|
803
|
|
804 # ('#:C) or ( #, 'C' )
|
|
805 | |
|
|
806 | -Chain ID
|
|
807 ---Numeric portion of residue ID.
|
|
808
|
|
809 If a residue is incompletely specified then the first residue that
|
|
810 fits the arguments is returned. For example, if 19 is the argument
|
|
811 and there are three chains, A, B, and C with a residue whose number
|
|
812 is 19, then 19:A will be returned (assuming its listed first).
|
|
813
|
|
814 Function :
|
|
815 Example : my ( $ord, $chain ) = $self->_toOrdChain( @args );
|
|
816 Returns : two element array
|
|
817 Args : valid set of residue identifier(s) ( SEE NOTE ABOVE )
|
|
818
|
|
819
|
|
820 =cut
|
|
821
|
|
822 sub _toOrdChain {
|
|
823 my $self = shift;
|
|
824 my $arg_str;
|
|
825
|
|
826 my ( $key_num, $chain_id, $ins_code, $key, $i );
|
|
827
|
|
828 # check to see how many args are given
|
|
829 if ( $#_ >= 1 ) { # multiple args
|
|
830 $key_num = shift;
|
|
831 if ( $#_ >= 1 ) { # still multiple args => ins. code, too
|
|
832 $ins_code = shift;
|
|
833 $chain_id = shift;
|
|
834 }
|
|
835 else { # just one more arg. => chain_id
|
|
836 $chain_id = shift;
|
|
837 }
|
|
838 }
|
|
839 else { # only single arg. Might be number or string
|
|
840 $arg_str = shift;
|
|
841 if ( $arg_str =~ /:/ ) {
|
|
842 # a chain is specified
|
|
843 ( $chain_id ) = ( $arg_str =~ /:(.)/);
|
|
844 $arg_str =~ s/:.//;
|
|
845 }
|
|
846 if ( $arg_str =~ /[A-Z]|[a-z]/ ) {
|
|
847 # an insertion code is specified
|
|
848 ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ );
|
|
849 $arg_str =~ s/[A-Z]|[a-z]//g;
|
|
850 }
|
|
851 #now, get the number bit-> everything still around
|
|
852 $key_num = $arg_str;
|
|
853 }
|
|
854
|
|
855 $key = "$key_num$ins_code";
|
|
856 if ( !( $chain_id ) || $chain_id eq ' ' ) {
|
|
857 $chain_id = '-';
|
|
858 }
|
|
859
|
|
860 if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) {
|
|
861 $self->throw( "No such chain: $chain_id" );
|
|
862 }
|
|
863
|
|
864 for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) {
|
|
865 if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq
|
|
866 $key ) {
|
|
867 return ( $i, $chain_id );
|
|
868 }
|
|
869 }
|
|
870
|
|
871 $self->throw( "No such key: $key" );
|
|
872
|
|
873 }
|
|
874
|
|
875 =head2 _parse
|
|
876
|
|
877 Title : _parse
|
|
878 Usage : as name suggests, parses stride output, creating object
|
|
879 Function :
|
|
880 Example : $self->_parse( $io );
|
|
881 Returns :
|
|
882 Args : valid Bio::Root::IO object
|
|
883
|
|
884
|
|
885 =cut
|
|
886
|
|
887 sub _parse {
|
|
888 my $self = shift;
|
|
889 my $io = shift;
|
|
890 my $file = $io->_fh();
|
|
891
|
|
892 # Parse top lines
|
|
893 if ( $self->_parseTop( $io ) ) {
|
|
894 $self->throw( "Not stride output" );
|
|
895 }
|
|
896
|
|
897 # Parse the HDR, CMP, SCR, and AUT lines
|
|
898 $self->_parseHead( $io );
|
|
899
|
|
900 # Parse the CHN, SEQ, STR, and LOC lines
|
|
901 $self->_parseSummary( $io ); # we're ignoring this
|
|
902
|
|
903 # Parse the ASG lines
|
|
904 $self->_parseASG( $io );
|
|
905 }
|
|
906
|
|
907 =head2 _parseTop
|
|
908
|
|
909 Title : _parseTop
|
|
910 Usage : makes sure this looks like stride output
|
|
911 Function :
|
|
912 Example :
|
|
913 Returns :
|
|
914 Args :
|
|
915
|
|
916
|
|
917 =cut
|
|
918
|
|
919 sub _parseTop {
|
|
920 my $self = shift;
|
|
921 my $io = shift;
|
|
922 my $file = $io->_fh();
|
|
923 my $cur = <$file>;
|
|
924 if ( $cur =~ /^REM ---/ ) {
|
|
925 return 0;
|
|
926 }
|
|
927 return 1;
|
|
928 }
|
|
929
|
|
930 =head2 _parseHead
|
|
931
|
|
932 Title : _parseHead
|
|
933 Usage : parses
|
|
934 Function : HDR, CMP, SRC, and AUT lines
|
|
935 Example :
|
|
936 Returns :
|
|
937 Args :
|
|
938
|
|
939
|
|
940 =cut
|
|
941
|
|
942 sub _parseHead {
|
|
943 my $self = shift;
|
|
944 my $io = shift;
|
|
945 my $file = $io->_fh();
|
|
946 my $cur;
|
|
947 my $element;
|
|
948 my ( @elements, @cmp, @src, @aut );
|
|
949 my %head = {};
|
|
950 my $still_head = 1;
|
|
951
|
|
952 $cur = <$file>;
|
|
953 while ( $cur =~ /^REM / ) {
|
|
954 $cur = <$file>;
|
|
955 }
|
|
956
|
|
957 if ( $cur =~ /^HDR / ) {
|
|
958 @elements = split( /\s+/, $cur );
|
|
959 shift( @elements );
|
|
960 pop( @elements );
|
|
961 $self->{ 'PDB' } = pop( @elements );
|
|
962 $self->{ 'DATE' } = pop( @elements );
|
|
963 # now, everything else is "header" except for the word
|
|
964 # HDR
|
|
965 $element = join( ' ', @elements );
|
|
966 $head{ 'HEADER' } = $element;
|
|
967 }
|
|
968
|
|
969 $cur = <$file>;
|
|
970 while ( $cur =~ /^CMP / ) {
|
|
971 ( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ );
|
|
972 push( @cmp, $cur );
|
|
973 $cur = <$file>;
|
|
974 }
|
|
975
|
|
976 while ( $cur =~ /^SRC / ) {
|
|
977 ( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ );
|
|
978 push( @src, $cur );
|
|
979 $cur = <$file>;
|
|
980 }
|
|
981
|
|
982 while ( $cur =~ /^AUT / ) {
|
|
983 ( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ );
|
|
984 push( @aut, $cur );
|
|
985 $cur = <$file>;
|
|
986 }
|
|
987
|
|
988 $head{ 'CMP' } = \@cmp;
|
|
989 $head{ 'SRC' } = \@src;
|
|
990 $head{ 'AUT' } = \@aut;
|
|
991 $self->{ 'HEAD' } = \%head;
|
|
992 }
|
|
993
|
|
994 =head2 _parseSummary
|
|
995
|
|
996 Title : _parseSummary
|
|
997 Usage : parses LOC lines
|
|
998 Function :
|
|
999 Example :
|
|
1000 Returns :
|
|
1001 Args :
|
|
1002
|
|
1003
|
|
1004 =cut
|
|
1005
|
|
1006 sub _parseSummary {
|
|
1007 my $self = shift;
|
|
1008 my $io = shift;
|
|
1009 my $file = $io->_fh();
|
|
1010 my $cur = <$file>;
|
|
1011 my $bound_set;
|
|
1012 my $element;
|
|
1013 my ( @elements, @cur );
|
|
1014 my @LOC_lookup = ( [ 5, 12 ], # Element name
|
|
1015 # reduntdant [ 18, 3 ], # First residue name
|
|
1016 [ 22, 5 ], # First residue PDB number
|
|
1017 [ 28, 1 ], # First residue Chain ID
|
|
1018 # redundant [ 35, 3 ], # Last residue name
|
|
1019 [ 40, 5 ], # Last residue PDB number
|
|
1020 [ 46, 1 ] ); # Last residue Chain ID
|
|
1021
|
|
1022 #ignore these lines
|
|
1023 while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) {
|
|
1024 $cur = <$file>;
|
|
1025 }
|
|
1026
|
|
1027 while ( $cur =~ /^LOC / ) {
|
|
1028 foreach $bound_set ( @LOC_lookup ) {
|
|
1029 $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] );
|
|
1030 $element =~ s/\s//g;
|
|
1031 push( @cur, $element );
|
|
1032 }
|
|
1033 push( @elements, [ @cur ] );
|
|
1034 $cur = <$file>;
|
|
1035 @cur = ();
|
|
1036 }
|
|
1037 $self->{ 'LOC' } = \@elements;
|
|
1038
|
|
1039 }
|
|
1040
|
|
1041 =head2 _parseASG
|
|
1042
|
|
1043 Title : _parseASG
|
|
1044 Usage : parses ASG lines
|
|
1045 Function :
|
|
1046 Example :
|
|
1047 Returns :
|
|
1048 Args :
|
|
1049
|
|
1050
|
|
1051 =cut
|
|
1052
|
|
1053 sub _parseASG {
|
|
1054 my $self = shift;
|
|
1055 my $io = shift;
|
|
1056 my $file = $io->_fh();
|
|
1057 my $cur = <$file>;
|
|
1058 my $bound_set;
|
|
1059 my $ord_num;
|
|
1060 my ( $chain, $last_chain );
|
|
1061 my $element;
|
|
1062 my %ASG;
|
|
1063 my ( @cur, @elements );
|
|
1064 my @ASG_lookup = ( [ 5, 3 ], # Residue name
|
|
1065 # [ 9, 1 ], # Chain ID
|
|
1066 [ 10, 5 ], # PDB residue number (w/ins.code)
|
|
1067 # [ 16, 4 ], # ordinal stride number
|
|
1068 [ 24, 1 ], # one letter sec. stru. abbr.
|
|
1069 [ 26, 13], # full sec. stru. name
|
|
1070 [ 42, 7 ], # phi angle
|
|
1071 [ 52, 7 ], # psi angle
|
|
1072 [ 64, 5 ] );# residue solv. acc.
|
|
1073
|
|
1074 while ( $cur =~ /^REM / ) {
|
|
1075 $cur = <$file>;
|
|
1076 }
|
|
1077
|
|
1078 while ( $cur =~ /^ASG / ) {
|
|
1079 # get ordinal number for array key
|
|
1080 $ord_num = substr( $cur, 16, 4 );
|
|
1081 $ord_num =~ s/\s//g;
|
|
1082
|
|
1083 # get the chain id
|
|
1084 $chain = substr( $cur, 9, 1 );
|
|
1085
|
|
1086 if ( $last_chain && ( $chain ne $last_chain ) ) {
|
|
1087 $ASG{ $last_chain } = [ @elements ];
|
|
1088 @elements = ();
|
|
1089 }
|
|
1090
|
|
1091 # now get the rest of the info on this line
|
|
1092 foreach $bound_set ( @ASG_lookup ) {
|
|
1093 $element = substr( $cur, $bound_set->[ 0 ],
|
|
1094 $bound_set->[ 1 ] );
|
|
1095 $element =~ s/\s//g;
|
|
1096 push( @cur, $element );
|
|
1097 }
|
|
1098 $elements[ $ord_num ] = [ @cur ];
|
|
1099 $cur = <$file>;
|
|
1100 @cur = ();
|
|
1101 $last_chain = $chain;
|
|
1102 }
|
|
1103
|
|
1104 $ASG{ $chain } = [ @elements ];
|
|
1105
|
|
1106 $self->{ 'ASG' } = \%ASG;
|
|
1107 }
|
|
1108
|
|
1109 1;
|
|
1110
|
|
1111
|
|
1112
|