comparison variant_effect_predictor/Bio/Structure/SecStr/STRIDE/Res.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $id $
2 #
3 # bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm
4 #
5 # Cared for by Ed Green <ed@compbio.berkeley.edu>
6 #
7 # Copyright Univ. of California
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12 =head1 NAME
13
14 Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output
15
16 =head1 SYNOPSIS
17
18 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-file' => 'filename.stride' );
19
20 # or
21
22 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-fh' => \*STDOUT );
23
24 # Get secondary structure assignment for PDB residue 20 of chain A
25 $sec_str = $stride_obj->resSecStr( '20:A' );
26
27 # same
28 $sec_str = $stride_obj->resSecStr( 20, 'A' )
29
30 =head1 DESCRIPTION
31
32 STRIDE::Res is a module for objectifying STRIDE output. STRIDE is a
33 program (similar to DSSP) for assigning secondary structure to
34 individual residues of a pdb structure file.
35
36 ( Knowledge-Based Protein Secondary Structure Assignment,
37 PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) )
38
39 STRIDE is available here:
40 http://www.embl-heidelberg.de/argos/stride/down_stride.html
41
42 Methods are then available for extracting all of the infomation
43 present within the output or convenient subsets of it.
44
45 Although they are very similar in function, DSSP and STRIDE differ
46 somewhat in output format. Thes differences are reflected in the
47 return value of some methods of these modules. For example, both
48 the STRIDE and DSSP parsers have resSecStr() methods for returning
49 the secondary structure of a given residue. However, the range of
50 return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the
51 range of values for STRIDE is ( H, G, I, E, B, b, T, and C ). See
52 individual methods for details.
53
54 The methods are roughly divided into 3 sections:
55
56 1. Global features of this structure (PDB ID, total surface area,
57 etc.). These methods do not require an argument.
58 2. Residue specific features ( amino acid, secondary structure,
59 solvent exposed surface area, etc. ). These methods do require an
60 arguement. The argument is supposed to uniquely identify a
61 residue described within the structure. It can be of any of the
62 following forms:
63 ('#A:B') or ( #, 'A', 'B' )
64 || |
65 || - Chain ID (blank for single chain)
66 |--- Insertion code for this residue. Blank for most residues.
67 |--- Numeric portion of residue ID.
68
69 (#)
70 |
71 --- Numeric portion of residue ID. If there is only one chain and
72 it has no ID AND there is no residue with an insertion code at this
73 number, then this can uniquely specify a residue.
74
75 ('#:C') or ( #, 'C' )
76 | |
77 | -Chain ID
78 ---Numeric portion of residue ID.
79
80 If a residue is incompletely specified then the first residue that
81 fits the arguments is returned. For example, if 19 is the argument
82 and there are three chains, A, B, and C with a residue whose number
83 is 19, then 19:A will be returned (assuming its listed first).
84
85 Since neither DSSP nor STRIDE correctly handle alt-loc codes, they
86 are not supported by these modules.
87
88 3. Value-added methods. Return values are not verbatem strings
89 parsed from DSSP or STRIDE output.
90
91 =head1 FEEDBACK
92
93 =head2 MailingLists
94
95 UsUser feedback is an integral part of the evolution of this and other
96 Bioperl modules. Send your comments and suggestions preferably to one
97 of the Bioperl mailing lists. Your participation is much appreciated.
98
99 bioperl-l@bioperl.org - General discussion
100 http://bio.perl.org/MailList.html - About the mailing lists
101
102 =head2 Reporting Bugs
103
104 Report bugs to the Bioperl bug tracking system to help us keep track
105 the bugs and their resolution. Bug reports can be submitted via email
106 or the web:
107
108 bioperl-bugs@bio.perl.org
109 http://bugzilla.bioperl.org/
110
111 =head1 AUTHOR - Ed Green
112
113 Email ed@compbio.berkeley.edu
114
115
116 =head1 APPENDIX
117
118 The Rest of the documentation details each method.
119 Internal methods are preceded with a _.
120
121
122 =cut
123
124 package Bio::Structure::SecStr::STRIDE::Res;
125 use strict;
126 use vars qw(@ISA);
127 use Bio::Root::Root;
128 use Bio::Root::IO;
129 use Bio::PrimarySeq;
130
131 @ISA = qw(Bio::Root::Root);
132
133 our %ASGTable = ( 'aa' => 0,
134 'resNum' => 1,
135 'ssAbbr' => 2,
136 'ssName' => 3,
137 'phi' => 4,
138 'psi' => 5,
139 'surfArea' => 6 );
140
141 our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N',
142 'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q',
143 'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H',
144 'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K',
145 'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P',
146 'SER' => 'S', 'THR' => 'T', 'TRP' => 'W',
147 'TYR' => 'Y', 'VAL' => 'V' );
148
149 =head2 new
150
151 Title : new
152 Usage : makes new object of this class
153 Function : Constructor
154 Example : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' => filename
155 # or
156 '-fh' => FILEHANDLE )
157 Returns : object (ref)
158 Args : filename or filehandle( must be proper STRIDE output )
159
160 =cut
161
162 sub new {
163 my ( $class, @args ) = @_;
164 my $self = $class->SUPER::new( @args );
165 my $io = Bio::Root::IO->new( @args );
166 $self->_parse( $io ); # not passing filehandle !
167 $io->close();
168 return $self;
169 }
170
171 # GLOBAL FEATURES / INFO / STATS
172
173 =head2 totSurfArea
174
175 Title : totSurfArea
176 Usage : returns sum of surface areas of all residues of all
177 chains considered. Result is memoized.
178 Function :
179 Example : $tot_SA = $stride_obj->totSurfArea();
180 Returns : scalar
181 Args : none
182
183
184 =cut
185
186 sub totSurfArea {
187 my $self = shift;
188 my $total = 0;
189 my ( $chain, $res );
190
191 if ( $self->{ 'SurfArea' } ) {
192 return $self->{ 'SurfArea' };
193 }
194 else {
195 foreach $chain ( keys %{$self->{ 'ASG' }} ) {
196 for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
197 $total +=
198 $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}];
199 }
200 }
201 }
202
203 $self->{ 'SurfArea' } = $total;
204 return $self->{ 'SurfArea' };
205
206 }
207
208 =head2 numResidues
209
210 Title : numResidues
211 Usage : returns total number of residues in all chains or
212 just the specified chain
213 Function :
214 Example : $tot_res = $stride_obj->numResidues();
215 Returns : scalar int
216 Args : none or chain id
217
218
219 =cut
220
221 sub numResidues {
222 my $self = shift;
223 my $chain = shift;
224 my $total = 0;
225 my $key;
226 foreach $key ( keys %{$self->{ 'ASG' }} ) {
227 if ( $chain ) {
228 if ( $key eq $chain ) {
229 $total += $#{$self->{ 'ASG' }{ $key }};
230 }
231 }
232 else {
233 $total += $#{$self->{ 'ASG' }{ $key }};
234 }
235 }
236 return $total;
237 }
238
239 # STRAIGHT FROM THE PDB ENTRY
240
241 =head2 pdbID
242
243 Title : pdbID
244 Usage : returns pdb identifier ( 1FJM, e.g. )
245 Function :
246 Example : $pdb_id = $stride_obj->pdbID();
247 Returns : scalar string
248 Args : none
249
250
251 =cut
252
253 sub pdbID {
254 my $self = shift;
255 return $self->{ 'PDB' };
256 }
257 =head2 pdbAuthor
258
259 Title : pdbAuthor
260 Usage : returns author of this PDB entry
261 Function :
262 Example : $auth = $stride_obj->pdbAuthor()
263 Returns : scalar string
264 Args : none
265
266
267 =cut
268
269 sub pdbAuthor {
270 my $self = shift;
271 return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } );
272 }
273
274 =head2 pdbCompound
275
276 Title : pdbCompound
277 Usage : returns string of what was found on the
278 CMP lines
279 Function :
280 Example : $cmp = $stride_obj->pdbCompound();
281 Returns : string
282 Args : none
283
284
285 =cut
286
287 sub pdbCompound {
288 my $self = shift;
289 return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } );
290 }
291
292 =head2 pdbDate
293
294 Title : pdbDate
295 Usage : returns date given in PDB file
296 Function :
297 Example : $pdb_date = $stride_obj->pdbDate();
298 Returns : scalar
299 Args : none
300
301
302 =cut
303
304 sub pdbDate {
305 my $self = shift;
306 return $self->{ 'DATE' };
307 }
308
309 =head2 pdbHeader
310
311 Title : pdbHeader
312 Usage : returns string of characters found on the PDB header line
313 Function :
314 Example : $head = $stride_obj->pdbHeader();
315 Returns : scalar
316 Args : none
317
318
319 =cut
320
321 sub pdbHeader {
322 my $self = shift;
323 return $self->{ 'HEAD' }->{ 'HEADER' };
324 }
325
326 =head2 pdbSource
327
328 Title : pdbSource
329 Usage : returns string of what was found on SRC lines
330 Function :
331 Example : $src = $stride_obj->pdbSource();
332 Returns : scalar
333 Args : none
334
335
336 =cut
337
338 sub pdbSource {
339 my $self = shift;
340 return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } );
341 }
342
343 # RESIDUE SPECIFIC ACCESSORS
344
345 =head2 resAA
346
347 Title : resAA
348 Usage : returns 1 letter abbr. of the amino acid specified by
349 the arguments
350 Function :
351 Examples : $aa = $stride_obj->resAA( RESIDUE_ID );
352 Returns : scalar character
353 Args : RESIDUE_ID
354
355
356 =cut
357
358 sub resAA {
359 my $self = shift;
360 my @args = @_;
361 my ( $ord, $chain ) = $self->_toOrdChain( @args );
362 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
363 }
364
365 =head2 resPhi
366
367 Title : resPhi
368 Usage : returns phi angle of specified residue
369 Function :
370 Example : $phi = $stride_obj->resPhi( RESIDUE_ID );
371 Returns : scaler
372 Args : RESIDUE_ID
373
374
375 =cut
376
377 sub resPhi {
378 my $self = shift;
379 my @args = @_;
380 my ( $ord, $chain ) = $self->_toOrdChain( @args );
381 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ];
382 }
383
384 =head2 resPsi
385
386 Title : resPsi
387 Usage : returns psi angle of specified residue
388 Function :
389 Example : $psi = $stride_obj->resPsi( RESIDUE_ID );
390 Returns : scalar
391 Args : RESIDUE_ID
392
393
394 =cut
395
396 sub resPsi {
397 my $self = shift;
398 my @args = @_;
399 my ( $ord, $chain ) = $self->_toOrdChain( @args );
400 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ];
401 }
402
403 =head2 resSolvAcc
404
405 Title : resSolvAcc
406 Usage : returns stride calculated surface area of specified residue
407 Function :
408 Example : $sa = $stride_obj->resSolvAcc( RESIDUE_ID );
409 Returns : scalar
410 Args : RESIDUE_ID
411
412
413 =cut
414
415 sub resSolvAcc {
416 my $self = shift;
417 my @args = @_;
418 my ( $ord, $chain ) = $self->_toOrdChain( @args );
419 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
420 }
421
422 =head2 resSurfArea
423
424 Title : resSurfArea
425 Usage : returns stride calculated surface area of specified residue
426 Function :
427 Example : $sa = $stride_obj->resSurfArea( RESIDUE_ID );
428 Returns : scalar
429 Args : RESIDUE_ID
430
431
432 =cut
433
434 sub resSurfArea {
435 my $self = shift;
436 my @args = @_;
437 my ( $ord, $chain ) = $self->_toOrdChain( @args );
438 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
439 }
440
441 =head2 resSecStr
442
443 Title : resSecStr
444 Usage : gives one letter abbr. of stride determined secondary
445 structure of specified residue
446 Function :
447 Example : $ss = $stride_obj->resSecStr( RESIDUE_ID );
448 Returns : one of: 'H' => Alpha Helix
449 'G' => 3-10 helix
450 'I' => PI-helix
451 'E' => Extended conformation
452 'B' or 'b' => Isolated bridge
453 'T' => Turn
454 'C' => Coil
455 ' ' => None
456 # NOTE: This range is slightly DIFFERENT from the
457 # DSSP method of the same name
458 Args : RESIDUE_ID
459
460
461 =cut
462
463 sub resSecStr {
464 my $self = shift;
465 my @args = @_;
466 my ( $ord, $chain ) = $self->_toOrdChain( @args );
467 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ];
468 }
469
470 =head2 resSecStrSum
471
472 Title : resSecStrSum
473 Usage : gives one letter summary of secondary structure of
474 specified residue. More general than secStruc()
475 Function :
476 Example : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID );
477 Returns : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil)
478 Args : residue identifier(s) ( SEE INTRO NOTE )
479
480
481 =cut
482
483 sub resSecStrSum {
484 my $self = shift;
485 my @args = @_;
486 my $ss_char = $self->resSecStr( @args );
487
488 if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) {
489 return 'H';
490 }
491 if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) {
492 return 'B';
493 }
494 if ( $ss_char eq 'T' ) {
495 return 'T';
496 }
497 else {
498 return 'C';
499 }
500 }
501
502 # STRIDE SPECIFIC
503
504 =head2 resSecStrName
505
506 Title : resSecStrName
507 Usage : gives full name of the secondary structural element
508 classification of the specified residue
509 Function :
510 Example : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID );
511 Returns : scalar string
512 Args : RESIDUE_ID
513
514
515 =cut
516
517 sub resSecStrName {
518 my $self = shift;
519 my @args = @_;
520 my ( $ord, $chain ) = $self->_toOrdChain( @args );
521 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ];
522 }
523
524 =head2 strideLocs
525
526 Title : strideLocs
527 Usage : returns stride determined contiguous secondary
528 structural elements as specified on the LOC lines
529 Function :
530 Example : $loc_pnt = $stride_obj->strideLocs();
531 Returns : pointer to array of 5 element arrays.
532 0 => stride name of structural element
533 1 => first residue pdb key (including insertion code, if app.)
534 2 => first residue chain id
535 3 => last residue pdb key (including insertion code, if app.)
536 4 => last residue chain id
537 NOTE the differences between this range and the range of SecBounds()
538 Args : none
539
540
541 =cut
542
543 sub strideLocs {
544 my $self = shift;
545 return $self->{ 'LOC' };
546 }
547
548 # VALUE ADDED METHODS (NOT JUST PARSE/REPORT)
549
550 =head2 secBounds
551
552 Title : secBounds
553 Usage : gets residue ids of boundary residues in each
554 contiguous secondary structural element of specified
555 chain
556 Function :
557 Example : $ss_bound_pnt = $stride_obj->secBounds( 'A' );
558 Returns : pointer to array of 3 element arrays. First two elements
559 are the PDB IDs of the start and end points, respectively
560 and inclusively. The last element is the STRIDE secondary
561 structural element code (same range as resSecStr).
562 Args : chain identifier ( one character ). If none, '-' is assumed
563
564
565 =cut
566
567 sub secBounds {
568 # Requires a chain name. If left blank, we assume ' ' which equals '-'
569 my $self = shift;
570 my $chain = shift;
571 my @SecBounds;
572
573 $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' );
574
575 # if we've memoized this one, use that
576 if ( $self->{ 'SecBounds' }->{ $chain } ) {
577 return $self->{ 'SecBounds' }->{ $chain };
578 }
579
580 #check to make sure chain is valid
581 if ( !( $self->{ 'ASG' }->{ $chain } ) ) {
582 $self->throw( "No such chain: $chain\n" );
583 }
584
585 my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]->
586 [ $ASGTable{ 'ssAbbr' } ];
587 my $beg = 1;
588 my $i;
589
590 for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
591 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ]
592 ne $cur_element ) {
593 push( @SecBounds, [ $beg, $i -1 , $cur_element ] );
594 $beg = $i;
595 $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]->
596 [ $ASGTable{ 'ssAbbr' } ];
597 }
598 }
599
600 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ]
601 eq $cur_element ) {
602 push( @SecBounds, [ $beg, $i, $cur_element ] );
603 }
604 else {
605 push( @SecBounds, [ $beg, $i - 1, $cur_element ],
606 [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]->
607 [ $ASGTable{ 'ssAbbr' } ] ] );
608 }
609
610 $self->{ 'SecBounds' }->{ $chain } = \@SecBounds;
611 return $self->{ 'SecBounds' }->{ $chain };
612 }
613
614 =head2 chains
615
616 Title : chains
617 Usage : gives array chain I.D.s (characters)
618 Function :
619 Example : @chains = $stride_obj->chains();
620 Returns : array of characters
621 Args : none
622
623
624 =cut
625
626 sub chains {
627 my $self = shift;
628 my @chains = keys ( %{ $self->{ 'ASG' } } );
629 return \@chains;
630 }
631
632 =head2 getSeq
633
634 Title : getSeq
635 Usage : returns a Bio::PrimarySeq object which represents an
636 approximation at the sequence of the specified chain.
637 Function : For most chain of most entries, the sequence returned by
638 this method will be very good. However, it it inherently
639 unsafe to rely on STRIDE to extract sequence information about
640 a PDB entry. More reliable information can be obtained from
641 the PDB entry itself. If a second option is given
642 (and evaluates to true), the sequence generated will
643 have 'X' in spaces where the pdb residue numbers are
644 discontinuous. In some cases this results in a
645 better sequence object (when the discontinuity is
646 due to regions which were present, but could not be
647 resolved). In other cases, it will result in a WORSE
648 sequence object (when the discontinuity is due to
649 historical sequence numbering and all sequence is
650 actually resolved).
651 Example : $pso = $dssp_obj->getSeq( 'A' );
652 Returns : (pointer to) a PrimarySeq object
653 Args : Chain identifier. If none given, '-' is assumed.
654
655
656 =cut
657
658 sub getSeq {
659 my $self = shift;
660 my $chain = shift;
661 my $fill_in = shift;
662
663 if ( !( $chain ) ) {
664 $chain = '-';
665 }
666
667 if ( $self->{ 'Seq' }->{ $chain } ) {
668 return $self->{ 'Seq' }->{ $chain };
669 }
670
671 my ( $seq,
672 $num_res,
673 $last_res_num,
674 $cur_res_num,
675 $i,
676 $step,
677 $id
678 );
679
680 $seq = "";
681 $num_res = $self->numResidues( $chain );
682 $last_res_num = $self->_pdbNum( 1, $chain );
683 for ( $i = 1; $i <= $num_res; $i++ ) {
684 if ( $fill_in ) {
685 $cur_res_num = $self->_pdbNum( $i, $chain );
686 $step = $cur_res_num - $last_res_num;
687 if ( $step > 1 ) {
688 $seq .= 'X' x ( $step - 1 );
689 }
690 }
691 $seq .= $self->_resAA( $i, $chain );
692 $last_res_num = $cur_res_num;
693 }
694
695 $id = $self->pdbID();
696 $id .= "$chain";
697
698 $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq,
699 -id => $id,
700 -moltype => 'protein'
701 );
702
703 return $self->{ 'Seq' }->{ $chain };
704 }
705
706 =head1 INTERNAL METHODS
707
708 =head2 _pdbNum
709
710 Title : _pdbNum
711 Usage : fetches the numeric portion of the identifier for a given
712 residue as reported by the pdb entry. Note, this DOES NOT
713 uniquely specify a residue. There may be an insertion code
714 and/or chain identifier differences.
715 Function :
716 Example : $pdbNum = $self->pdbNum( 3, 'A' );
717 Returns : a scalar
718 Args : valid ordinal num / chain combination
719
720
721 =cut
722
723 sub _pdbNum {
724 my $self = shift;
725 my $ord = shift;
726 my $chain = shift;
727 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
728 $self->throw( "No such ordinal $ord in chain $chain.\n" );
729 }
730 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
731 my $num_part;
732 ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ );
733 return $num_part;
734 }
735
736 =head2 _resAA
737
738 Title : _resAA
739 Usage : returns 1 letter abbr. of the amino acid specified by
740 the arguments
741 Function :
742 Examples : $aa = $stride_obj->_resAA( 3, '-' );
743 Returns : scalar character
744 Args : ( ord. num, chain )
745
746
747 =cut
748
749 sub _resAA {
750 my $self = shift;
751 my $ord = shift;
752 my $chain = shift;
753 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
754 $self->throw( "No such ordinal $ord in chain $chain.\n" );
755 }
756 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
757 }
758
759 =head2 _pdbInsCo
760
761 Title : _pdbInsCo
762 Usage : fetches the Insertion code for this residue.
763 Function :
764 Example : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' );
765 Returns : a scalar
766 Args : ordinal number and chain
767
768
769 =cut
770
771 sub _pdbInsCo {
772 my $self = shift;
773 my $ord = shift;
774 my $chain = shift;
775 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
776 $self->throw( "No such ordinal $ord in chain $chain.\n" );
777 }
778 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
779 my $letter_part;
780 ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any
781 # non-word character(s)
782 return $letter_part;
783 }
784
785 =head2 _toOrdChain
786
787 Title : _toOrdChain
788 Usage : takes any set of residue identifying parameters and
789 wrestles them into a two element array: the chain and the ordinal
790 number of this residue. This two element array can then be
791 efficiently used as keys in many of the above accessor methods
792 ('#A:B') or ( #, 'A', 'B' )
793 || |
794 || - Chain ID (blank for single chain)
795 |--- Insertion code for this residue. Blank for most residues.
796 |--- Numeric portion of residue ID.
797
798 (#)
799 |
800 --- Numeric portion of residue ID. If there is only one chain and
801 it has no ID AND there is no residue with an insertion code at this
802 number, then this can uniquely specify a residue.
803
804 # ('#:C) or ( #, 'C' )
805 | |
806 | -Chain ID
807 ---Numeric portion of residue ID.
808
809 If a residue is incompletely specified then the first residue that
810 fits the arguments is returned. For example, if 19 is the argument
811 and there are three chains, A, B, and C with a residue whose number
812 is 19, then 19:A will be returned (assuming its listed first).
813
814 Function :
815 Example : my ( $ord, $chain ) = $self->_toOrdChain( @args );
816 Returns : two element array
817 Args : valid set of residue identifier(s) ( SEE NOTE ABOVE )
818
819
820 =cut
821
822 sub _toOrdChain {
823 my $self = shift;
824 my $arg_str;
825
826 my ( $key_num, $chain_id, $ins_code, $key, $i );
827
828 # check to see how many args are given
829 if ( $#_ >= 1 ) { # multiple args
830 $key_num = shift;
831 if ( $#_ >= 1 ) { # still multiple args => ins. code, too
832 $ins_code = shift;
833 $chain_id = shift;
834 }
835 else { # just one more arg. => chain_id
836 $chain_id = shift;
837 }
838 }
839 else { # only single arg. Might be number or string
840 $arg_str = shift;
841 if ( $arg_str =~ /:/ ) {
842 # a chain is specified
843 ( $chain_id ) = ( $arg_str =~ /:(.)/);
844 $arg_str =~ s/:.//;
845 }
846 if ( $arg_str =~ /[A-Z]|[a-z]/ ) {
847 # an insertion code is specified
848 ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ );
849 $arg_str =~ s/[A-Z]|[a-z]//g;
850 }
851 #now, get the number bit-> everything still around
852 $key_num = $arg_str;
853 }
854
855 $key = "$key_num$ins_code";
856 if ( !( $chain_id ) || $chain_id eq ' ' ) {
857 $chain_id = '-';
858 }
859
860 if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) {
861 $self->throw( "No such chain: $chain_id" );
862 }
863
864 for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) {
865 if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq
866 $key ) {
867 return ( $i, $chain_id );
868 }
869 }
870
871 $self->throw( "No such key: $key" );
872
873 }
874
875 =head2 _parse
876
877 Title : _parse
878 Usage : as name suggests, parses stride output, creating object
879 Function :
880 Example : $self->_parse( $io );
881 Returns :
882 Args : valid Bio::Root::IO object
883
884
885 =cut
886
887 sub _parse {
888 my $self = shift;
889 my $io = shift;
890 my $file = $io->_fh();
891
892 # Parse top lines
893 if ( $self->_parseTop( $io ) ) {
894 $self->throw( "Not stride output" );
895 }
896
897 # Parse the HDR, CMP, SCR, and AUT lines
898 $self->_parseHead( $io );
899
900 # Parse the CHN, SEQ, STR, and LOC lines
901 $self->_parseSummary( $io ); # we're ignoring this
902
903 # Parse the ASG lines
904 $self->_parseASG( $io );
905 }
906
907 =head2 _parseTop
908
909 Title : _parseTop
910 Usage : makes sure this looks like stride output
911 Function :
912 Example :
913 Returns :
914 Args :
915
916
917 =cut
918
919 sub _parseTop {
920 my $self = shift;
921 my $io = shift;
922 my $file = $io->_fh();
923 my $cur = <$file>;
924 if ( $cur =~ /^REM ---/ ) {
925 return 0;
926 }
927 return 1;
928 }
929
930 =head2 _parseHead
931
932 Title : _parseHead
933 Usage : parses
934 Function : HDR, CMP, SRC, and AUT lines
935 Example :
936 Returns :
937 Args :
938
939
940 =cut
941
942 sub _parseHead {
943 my $self = shift;
944 my $io = shift;
945 my $file = $io->_fh();
946 my $cur;
947 my $element;
948 my ( @elements, @cmp, @src, @aut );
949 my %head = {};
950 my $still_head = 1;
951
952 $cur = <$file>;
953 while ( $cur =~ /^REM / ) {
954 $cur = <$file>;
955 }
956
957 if ( $cur =~ /^HDR / ) {
958 @elements = split( /\s+/, $cur );
959 shift( @elements );
960 pop( @elements );
961 $self->{ 'PDB' } = pop( @elements );
962 $self->{ 'DATE' } = pop( @elements );
963 # now, everything else is "header" except for the word
964 # HDR
965 $element = join( ' ', @elements );
966 $head{ 'HEADER' } = $element;
967 }
968
969 $cur = <$file>;
970 while ( $cur =~ /^CMP / ) {
971 ( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ );
972 push( @cmp, $cur );
973 $cur = <$file>;
974 }
975
976 while ( $cur =~ /^SRC / ) {
977 ( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ );
978 push( @src, $cur );
979 $cur = <$file>;
980 }
981
982 while ( $cur =~ /^AUT / ) {
983 ( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ );
984 push( @aut, $cur );
985 $cur = <$file>;
986 }
987
988 $head{ 'CMP' } = \@cmp;
989 $head{ 'SRC' } = \@src;
990 $head{ 'AUT' } = \@aut;
991 $self->{ 'HEAD' } = \%head;
992 }
993
994 =head2 _parseSummary
995
996 Title : _parseSummary
997 Usage : parses LOC lines
998 Function :
999 Example :
1000 Returns :
1001 Args :
1002
1003
1004 =cut
1005
1006 sub _parseSummary {
1007 my $self = shift;
1008 my $io = shift;
1009 my $file = $io->_fh();
1010 my $cur = <$file>;
1011 my $bound_set;
1012 my $element;
1013 my ( @elements, @cur );
1014 my @LOC_lookup = ( [ 5, 12 ], # Element name
1015 # reduntdant [ 18, 3 ], # First residue name
1016 [ 22, 5 ], # First residue PDB number
1017 [ 28, 1 ], # First residue Chain ID
1018 # redundant [ 35, 3 ], # Last residue name
1019 [ 40, 5 ], # Last residue PDB number
1020 [ 46, 1 ] ); # Last residue Chain ID
1021
1022 #ignore these lines
1023 while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) {
1024 $cur = <$file>;
1025 }
1026
1027 while ( $cur =~ /^LOC / ) {
1028 foreach $bound_set ( @LOC_lookup ) {
1029 $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] );
1030 $element =~ s/\s//g;
1031 push( @cur, $element );
1032 }
1033 push( @elements, [ @cur ] );
1034 $cur = <$file>;
1035 @cur = ();
1036 }
1037 $self->{ 'LOC' } = \@elements;
1038
1039 }
1040
1041 =head2 _parseASG
1042
1043 Title : _parseASG
1044 Usage : parses ASG lines
1045 Function :
1046 Example :
1047 Returns :
1048 Args :
1049
1050
1051 =cut
1052
1053 sub _parseASG {
1054 my $self = shift;
1055 my $io = shift;
1056 my $file = $io->_fh();
1057 my $cur = <$file>;
1058 my $bound_set;
1059 my $ord_num;
1060 my ( $chain, $last_chain );
1061 my $element;
1062 my %ASG;
1063 my ( @cur, @elements );
1064 my @ASG_lookup = ( [ 5, 3 ], # Residue name
1065 # [ 9, 1 ], # Chain ID
1066 [ 10, 5 ], # PDB residue number (w/ins.code)
1067 # [ 16, 4 ], # ordinal stride number
1068 [ 24, 1 ], # one letter sec. stru. abbr.
1069 [ 26, 13], # full sec. stru. name
1070 [ 42, 7 ], # phi angle
1071 [ 52, 7 ], # psi angle
1072 [ 64, 5 ] );# residue solv. acc.
1073
1074 while ( $cur =~ /^REM / ) {
1075 $cur = <$file>;
1076 }
1077
1078 while ( $cur =~ /^ASG / ) {
1079 # get ordinal number for array key
1080 $ord_num = substr( $cur, 16, 4 );
1081 $ord_num =~ s/\s//g;
1082
1083 # get the chain id
1084 $chain = substr( $cur, 9, 1 );
1085
1086 if ( $last_chain && ( $chain ne $last_chain ) ) {
1087 $ASG{ $last_chain } = [ @elements ];
1088 @elements = ();
1089 }
1090
1091 # now get the rest of the info on this line
1092 foreach $bound_set ( @ASG_lookup ) {
1093 $element = substr( $cur, $bound_set->[ 0 ],
1094 $bound_set->[ 1 ] );
1095 $element =~ s/\s//g;
1096 push( @cur, $element );
1097 }
1098 $elements[ $ord_num ] = [ @cur ];
1099 $cur = <$file>;
1100 @cur = ();
1101 $last_chain = $chain;
1102 }
1103
1104 $ASG{ $chain } = [ @elements ];
1105
1106 $self->{ 'ASG' } = \%ASG;
1107 }
1108
1109 1;
1110
1111
1112