Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Structure/SecStr/STRIDE/Res.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $id $ | |
2 # | |
3 # bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm | |
4 # | |
5 # Cared for by Ed Green <ed@compbio.berkeley.edu> | |
6 # | |
7 # Copyright Univ. of California | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 =head1 NAME | |
13 | |
14 Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-file' => 'filename.stride' ); | |
19 | |
20 # or | |
21 | |
22 my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-fh' => \*STDOUT ); | |
23 | |
24 # Get secondary structure assignment for PDB residue 20 of chain A | |
25 $sec_str = $stride_obj->resSecStr( '20:A' ); | |
26 | |
27 # same | |
28 $sec_str = $stride_obj->resSecStr( 20, 'A' ) | |
29 | |
30 =head1 DESCRIPTION | |
31 | |
32 STRIDE::Res is a module for objectifying STRIDE output. STRIDE is a | |
33 program (similar to DSSP) for assigning secondary structure to | |
34 individual residues of a pdb structure file. | |
35 | |
36 ( Knowledge-Based Protein Secondary Structure Assignment, | |
37 PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) ) | |
38 | |
39 STRIDE is available here: | |
40 http://www.embl-heidelberg.de/argos/stride/down_stride.html | |
41 | |
42 Methods are then available for extracting all of the infomation | |
43 present within the output or convenient subsets of it. | |
44 | |
45 Although they are very similar in function, DSSP and STRIDE differ | |
46 somewhat in output format. Thes differences are reflected in the | |
47 return value of some methods of these modules. For example, both | |
48 the STRIDE and DSSP parsers have resSecStr() methods for returning | |
49 the secondary structure of a given residue. However, the range of | |
50 return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the | |
51 range of values for STRIDE is ( H, G, I, E, B, b, T, and C ). See | |
52 individual methods for details. | |
53 | |
54 The methods are roughly divided into 3 sections: | |
55 | |
56 1. Global features of this structure (PDB ID, total surface area, | |
57 etc.). These methods do not require an argument. | |
58 2. Residue specific features ( amino acid, secondary structure, | |
59 solvent exposed surface area, etc. ). These methods do require an | |
60 arguement. The argument is supposed to uniquely identify a | |
61 residue described within the structure. It can be of any of the | |
62 following forms: | |
63 ('#A:B') or ( #, 'A', 'B' ) | |
64 || | | |
65 || - Chain ID (blank for single chain) | |
66 |--- Insertion code for this residue. Blank for most residues. | |
67 |--- Numeric portion of residue ID. | |
68 | |
69 (#) | |
70 | | |
71 --- Numeric portion of residue ID. If there is only one chain and | |
72 it has no ID AND there is no residue with an insertion code at this | |
73 number, then this can uniquely specify a residue. | |
74 | |
75 ('#:C') or ( #, 'C' ) | |
76 | | | |
77 | -Chain ID | |
78 ---Numeric portion of residue ID. | |
79 | |
80 If a residue is incompletely specified then the first residue that | |
81 fits the arguments is returned. For example, if 19 is the argument | |
82 and there are three chains, A, B, and C with a residue whose number | |
83 is 19, then 19:A will be returned (assuming its listed first). | |
84 | |
85 Since neither DSSP nor STRIDE correctly handle alt-loc codes, they | |
86 are not supported by these modules. | |
87 | |
88 3. Value-added methods. Return values are not verbatem strings | |
89 parsed from DSSP or STRIDE output. | |
90 | |
91 =head1 FEEDBACK | |
92 | |
93 =head2 MailingLists | |
94 | |
95 UsUser feedback is an integral part of the evolution of this and other | |
96 Bioperl modules. Send your comments and suggestions preferably to one | |
97 of the Bioperl mailing lists. Your participation is much appreciated. | |
98 | |
99 bioperl-l@bioperl.org - General discussion | |
100 http://bio.perl.org/MailList.html - About the mailing lists | |
101 | |
102 =head2 Reporting Bugs | |
103 | |
104 Report bugs to the Bioperl bug tracking system to help us keep track | |
105 the bugs and their resolution. Bug reports can be submitted via email | |
106 or the web: | |
107 | |
108 bioperl-bugs@bio.perl.org | |
109 http://bugzilla.bioperl.org/ | |
110 | |
111 =head1 AUTHOR - Ed Green | |
112 | |
113 Email ed@compbio.berkeley.edu | |
114 | |
115 | |
116 =head1 APPENDIX | |
117 | |
118 The Rest of the documentation details each method. | |
119 Internal methods are preceded with a _. | |
120 | |
121 | |
122 =cut | |
123 | |
124 package Bio::Structure::SecStr::STRIDE::Res; | |
125 use strict; | |
126 use vars qw(@ISA); | |
127 use Bio::Root::Root; | |
128 use Bio::Root::IO; | |
129 use Bio::PrimarySeq; | |
130 | |
131 @ISA = qw(Bio::Root::Root); | |
132 | |
133 our %ASGTable = ( 'aa' => 0, | |
134 'resNum' => 1, | |
135 'ssAbbr' => 2, | |
136 'ssName' => 3, | |
137 'phi' => 4, | |
138 'psi' => 5, | |
139 'surfArea' => 6 ); | |
140 | |
141 our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N', | |
142 'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q', | |
143 'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H', | |
144 'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K', | |
145 'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P', | |
146 'SER' => 'S', 'THR' => 'T', 'TRP' => 'W', | |
147 'TYR' => 'Y', 'VAL' => 'V' ); | |
148 | |
149 =head2 new | |
150 | |
151 Title : new | |
152 Usage : makes new object of this class | |
153 Function : Constructor | |
154 Example : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' => filename | |
155 # or | |
156 '-fh' => FILEHANDLE ) | |
157 Returns : object (ref) | |
158 Args : filename or filehandle( must be proper STRIDE output ) | |
159 | |
160 =cut | |
161 | |
162 sub new { | |
163 my ( $class, @args ) = @_; | |
164 my $self = $class->SUPER::new( @args ); | |
165 my $io = Bio::Root::IO->new( @args ); | |
166 $self->_parse( $io ); # not passing filehandle ! | |
167 $io->close(); | |
168 return $self; | |
169 } | |
170 | |
171 # GLOBAL FEATURES / INFO / STATS | |
172 | |
173 =head2 totSurfArea | |
174 | |
175 Title : totSurfArea | |
176 Usage : returns sum of surface areas of all residues of all | |
177 chains considered. Result is memoized. | |
178 Function : | |
179 Example : $tot_SA = $stride_obj->totSurfArea(); | |
180 Returns : scalar | |
181 Args : none | |
182 | |
183 | |
184 =cut | |
185 | |
186 sub totSurfArea { | |
187 my $self = shift; | |
188 my $total = 0; | |
189 my ( $chain, $res ); | |
190 | |
191 if ( $self->{ 'SurfArea' } ) { | |
192 return $self->{ 'SurfArea' }; | |
193 } | |
194 else { | |
195 foreach $chain ( keys %{$self->{ 'ASG' }} ) { | |
196 for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { | |
197 $total += | |
198 $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}]; | |
199 } | |
200 } | |
201 } | |
202 | |
203 $self->{ 'SurfArea' } = $total; | |
204 return $self->{ 'SurfArea' }; | |
205 | |
206 } | |
207 | |
208 =head2 numResidues | |
209 | |
210 Title : numResidues | |
211 Usage : returns total number of residues in all chains or | |
212 just the specified chain | |
213 Function : | |
214 Example : $tot_res = $stride_obj->numResidues(); | |
215 Returns : scalar int | |
216 Args : none or chain id | |
217 | |
218 | |
219 =cut | |
220 | |
221 sub numResidues { | |
222 my $self = shift; | |
223 my $chain = shift; | |
224 my $total = 0; | |
225 my $key; | |
226 foreach $key ( keys %{$self->{ 'ASG' }} ) { | |
227 if ( $chain ) { | |
228 if ( $key eq $chain ) { | |
229 $total += $#{$self->{ 'ASG' }{ $key }}; | |
230 } | |
231 } | |
232 else { | |
233 $total += $#{$self->{ 'ASG' }{ $key }}; | |
234 } | |
235 } | |
236 return $total; | |
237 } | |
238 | |
239 # STRAIGHT FROM THE PDB ENTRY | |
240 | |
241 =head2 pdbID | |
242 | |
243 Title : pdbID | |
244 Usage : returns pdb identifier ( 1FJM, e.g. ) | |
245 Function : | |
246 Example : $pdb_id = $stride_obj->pdbID(); | |
247 Returns : scalar string | |
248 Args : none | |
249 | |
250 | |
251 =cut | |
252 | |
253 sub pdbID { | |
254 my $self = shift; | |
255 return $self->{ 'PDB' }; | |
256 } | |
257 =head2 pdbAuthor | |
258 | |
259 Title : pdbAuthor | |
260 Usage : returns author of this PDB entry | |
261 Function : | |
262 Example : $auth = $stride_obj->pdbAuthor() | |
263 Returns : scalar string | |
264 Args : none | |
265 | |
266 | |
267 =cut | |
268 | |
269 sub pdbAuthor { | |
270 my $self = shift; | |
271 return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } ); | |
272 } | |
273 | |
274 =head2 pdbCompound | |
275 | |
276 Title : pdbCompound | |
277 Usage : returns string of what was found on the | |
278 CMP lines | |
279 Function : | |
280 Example : $cmp = $stride_obj->pdbCompound(); | |
281 Returns : string | |
282 Args : none | |
283 | |
284 | |
285 =cut | |
286 | |
287 sub pdbCompound { | |
288 my $self = shift; | |
289 return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } ); | |
290 } | |
291 | |
292 =head2 pdbDate | |
293 | |
294 Title : pdbDate | |
295 Usage : returns date given in PDB file | |
296 Function : | |
297 Example : $pdb_date = $stride_obj->pdbDate(); | |
298 Returns : scalar | |
299 Args : none | |
300 | |
301 | |
302 =cut | |
303 | |
304 sub pdbDate { | |
305 my $self = shift; | |
306 return $self->{ 'DATE' }; | |
307 } | |
308 | |
309 =head2 pdbHeader | |
310 | |
311 Title : pdbHeader | |
312 Usage : returns string of characters found on the PDB header line | |
313 Function : | |
314 Example : $head = $stride_obj->pdbHeader(); | |
315 Returns : scalar | |
316 Args : none | |
317 | |
318 | |
319 =cut | |
320 | |
321 sub pdbHeader { | |
322 my $self = shift; | |
323 return $self->{ 'HEAD' }->{ 'HEADER' }; | |
324 } | |
325 | |
326 =head2 pdbSource | |
327 | |
328 Title : pdbSource | |
329 Usage : returns string of what was found on SRC lines | |
330 Function : | |
331 Example : $src = $stride_obj->pdbSource(); | |
332 Returns : scalar | |
333 Args : none | |
334 | |
335 | |
336 =cut | |
337 | |
338 sub pdbSource { | |
339 my $self = shift; | |
340 return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } ); | |
341 } | |
342 | |
343 # RESIDUE SPECIFIC ACCESSORS | |
344 | |
345 =head2 resAA | |
346 | |
347 Title : resAA | |
348 Usage : returns 1 letter abbr. of the amino acid specified by | |
349 the arguments | |
350 Function : | |
351 Examples : $aa = $stride_obj->resAA( RESIDUE_ID ); | |
352 Returns : scalar character | |
353 Args : RESIDUE_ID | |
354 | |
355 | |
356 =cut | |
357 | |
358 sub resAA { | |
359 my $self = shift; | |
360 my @args = @_; | |
361 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
362 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); | |
363 } | |
364 | |
365 =head2 resPhi | |
366 | |
367 Title : resPhi | |
368 Usage : returns phi angle of specified residue | |
369 Function : | |
370 Example : $phi = $stride_obj->resPhi( RESIDUE_ID ); | |
371 Returns : scaler | |
372 Args : RESIDUE_ID | |
373 | |
374 | |
375 =cut | |
376 | |
377 sub resPhi { | |
378 my $self = shift; | |
379 my @args = @_; | |
380 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
381 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ]; | |
382 } | |
383 | |
384 =head2 resPsi | |
385 | |
386 Title : resPsi | |
387 Usage : returns psi angle of specified residue | |
388 Function : | |
389 Example : $psi = $stride_obj->resPsi( RESIDUE_ID ); | |
390 Returns : scalar | |
391 Args : RESIDUE_ID | |
392 | |
393 | |
394 =cut | |
395 | |
396 sub resPsi { | |
397 my $self = shift; | |
398 my @args = @_; | |
399 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
400 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ]; | |
401 } | |
402 | |
403 =head2 resSolvAcc | |
404 | |
405 Title : resSolvAcc | |
406 Usage : returns stride calculated surface area of specified residue | |
407 Function : | |
408 Example : $sa = $stride_obj->resSolvAcc( RESIDUE_ID ); | |
409 Returns : scalar | |
410 Args : RESIDUE_ID | |
411 | |
412 | |
413 =cut | |
414 | |
415 sub resSolvAcc { | |
416 my $self = shift; | |
417 my @args = @_; | |
418 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
419 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; | |
420 } | |
421 | |
422 =head2 resSurfArea | |
423 | |
424 Title : resSurfArea | |
425 Usage : returns stride calculated surface area of specified residue | |
426 Function : | |
427 Example : $sa = $stride_obj->resSurfArea( RESIDUE_ID ); | |
428 Returns : scalar | |
429 Args : RESIDUE_ID | |
430 | |
431 | |
432 =cut | |
433 | |
434 sub resSurfArea { | |
435 my $self = shift; | |
436 my @args = @_; | |
437 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
438 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; | |
439 } | |
440 | |
441 =head2 resSecStr | |
442 | |
443 Title : resSecStr | |
444 Usage : gives one letter abbr. of stride determined secondary | |
445 structure of specified residue | |
446 Function : | |
447 Example : $ss = $stride_obj->resSecStr( RESIDUE_ID ); | |
448 Returns : one of: 'H' => Alpha Helix | |
449 'G' => 3-10 helix | |
450 'I' => PI-helix | |
451 'E' => Extended conformation | |
452 'B' or 'b' => Isolated bridge | |
453 'T' => Turn | |
454 'C' => Coil | |
455 ' ' => None | |
456 # NOTE: This range is slightly DIFFERENT from the | |
457 # DSSP method of the same name | |
458 Args : RESIDUE_ID | |
459 | |
460 | |
461 =cut | |
462 | |
463 sub resSecStr { | |
464 my $self = shift; | |
465 my @args = @_; | |
466 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
467 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ]; | |
468 } | |
469 | |
470 =head2 resSecStrSum | |
471 | |
472 Title : resSecStrSum | |
473 Usage : gives one letter summary of secondary structure of | |
474 specified residue. More general than secStruc() | |
475 Function : | |
476 Example : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID ); | |
477 Returns : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil) | |
478 Args : residue identifier(s) ( SEE INTRO NOTE ) | |
479 | |
480 | |
481 =cut | |
482 | |
483 sub resSecStrSum { | |
484 my $self = shift; | |
485 my @args = @_; | |
486 my $ss_char = $self->resSecStr( @args ); | |
487 | |
488 if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) { | |
489 return 'H'; | |
490 } | |
491 if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) { | |
492 return 'B'; | |
493 } | |
494 if ( $ss_char eq 'T' ) { | |
495 return 'T'; | |
496 } | |
497 else { | |
498 return 'C'; | |
499 } | |
500 } | |
501 | |
502 # STRIDE SPECIFIC | |
503 | |
504 =head2 resSecStrName | |
505 | |
506 Title : resSecStrName | |
507 Usage : gives full name of the secondary structural element | |
508 classification of the specified residue | |
509 Function : | |
510 Example : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID ); | |
511 Returns : scalar string | |
512 Args : RESIDUE_ID | |
513 | |
514 | |
515 =cut | |
516 | |
517 sub resSecStrName { | |
518 my $self = shift; | |
519 my @args = @_; | |
520 my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
521 return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ]; | |
522 } | |
523 | |
524 =head2 strideLocs | |
525 | |
526 Title : strideLocs | |
527 Usage : returns stride determined contiguous secondary | |
528 structural elements as specified on the LOC lines | |
529 Function : | |
530 Example : $loc_pnt = $stride_obj->strideLocs(); | |
531 Returns : pointer to array of 5 element arrays. | |
532 0 => stride name of structural element | |
533 1 => first residue pdb key (including insertion code, if app.) | |
534 2 => first residue chain id | |
535 3 => last residue pdb key (including insertion code, if app.) | |
536 4 => last residue chain id | |
537 NOTE the differences between this range and the range of SecBounds() | |
538 Args : none | |
539 | |
540 | |
541 =cut | |
542 | |
543 sub strideLocs { | |
544 my $self = shift; | |
545 return $self->{ 'LOC' }; | |
546 } | |
547 | |
548 # VALUE ADDED METHODS (NOT JUST PARSE/REPORT) | |
549 | |
550 =head2 secBounds | |
551 | |
552 Title : secBounds | |
553 Usage : gets residue ids of boundary residues in each | |
554 contiguous secondary structural element of specified | |
555 chain | |
556 Function : | |
557 Example : $ss_bound_pnt = $stride_obj->secBounds( 'A' ); | |
558 Returns : pointer to array of 3 element arrays. First two elements | |
559 are the PDB IDs of the start and end points, respectively | |
560 and inclusively. The last element is the STRIDE secondary | |
561 structural element code (same range as resSecStr). | |
562 Args : chain identifier ( one character ). If none, '-' is assumed | |
563 | |
564 | |
565 =cut | |
566 | |
567 sub secBounds { | |
568 # Requires a chain name. If left blank, we assume ' ' which equals '-' | |
569 my $self = shift; | |
570 my $chain = shift; | |
571 my @SecBounds; | |
572 | |
573 $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' ); | |
574 | |
575 # if we've memoized this one, use that | |
576 if ( $self->{ 'SecBounds' }->{ $chain } ) { | |
577 return $self->{ 'SecBounds' }->{ $chain }; | |
578 } | |
579 | |
580 #check to make sure chain is valid | |
581 if ( !( $self->{ 'ASG' }->{ $chain } ) ) { | |
582 $self->throw( "No such chain: $chain\n" ); | |
583 } | |
584 | |
585 my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]-> | |
586 [ $ASGTable{ 'ssAbbr' } ]; | |
587 my $beg = 1; | |
588 my $i; | |
589 | |
590 for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { | |
591 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] | |
592 ne $cur_element ) { | |
593 push( @SecBounds, [ $beg, $i -1 , $cur_element ] ); | |
594 $beg = $i; | |
595 $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]-> | |
596 [ $ASGTable{ 'ssAbbr' } ]; | |
597 } | |
598 } | |
599 | |
600 if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] | |
601 eq $cur_element ) { | |
602 push( @SecBounds, [ $beg, $i, $cur_element ] ); | |
603 } | |
604 else { | |
605 push( @SecBounds, [ $beg, $i - 1, $cur_element ], | |
606 [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]-> | |
607 [ $ASGTable{ 'ssAbbr' } ] ] ); | |
608 } | |
609 | |
610 $self->{ 'SecBounds' }->{ $chain } = \@SecBounds; | |
611 return $self->{ 'SecBounds' }->{ $chain }; | |
612 } | |
613 | |
614 =head2 chains | |
615 | |
616 Title : chains | |
617 Usage : gives array chain I.D.s (characters) | |
618 Function : | |
619 Example : @chains = $stride_obj->chains(); | |
620 Returns : array of characters | |
621 Args : none | |
622 | |
623 | |
624 =cut | |
625 | |
626 sub chains { | |
627 my $self = shift; | |
628 my @chains = keys ( %{ $self->{ 'ASG' } } ); | |
629 return \@chains; | |
630 } | |
631 | |
632 =head2 getSeq | |
633 | |
634 Title : getSeq | |
635 Usage : returns a Bio::PrimarySeq object which represents an | |
636 approximation at the sequence of the specified chain. | |
637 Function : For most chain of most entries, the sequence returned by | |
638 this method will be very good. However, it it inherently | |
639 unsafe to rely on STRIDE to extract sequence information about | |
640 a PDB entry. More reliable information can be obtained from | |
641 the PDB entry itself. If a second option is given | |
642 (and evaluates to true), the sequence generated will | |
643 have 'X' in spaces where the pdb residue numbers are | |
644 discontinuous. In some cases this results in a | |
645 better sequence object (when the discontinuity is | |
646 due to regions which were present, but could not be | |
647 resolved). In other cases, it will result in a WORSE | |
648 sequence object (when the discontinuity is due to | |
649 historical sequence numbering and all sequence is | |
650 actually resolved). | |
651 Example : $pso = $dssp_obj->getSeq( 'A' ); | |
652 Returns : (pointer to) a PrimarySeq object | |
653 Args : Chain identifier. If none given, '-' is assumed. | |
654 | |
655 | |
656 =cut | |
657 | |
658 sub getSeq { | |
659 my $self = shift; | |
660 my $chain = shift; | |
661 my $fill_in = shift; | |
662 | |
663 if ( !( $chain ) ) { | |
664 $chain = '-'; | |
665 } | |
666 | |
667 if ( $self->{ 'Seq' }->{ $chain } ) { | |
668 return $self->{ 'Seq' }->{ $chain }; | |
669 } | |
670 | |
671 my ( $seq, | |
672 $num_res, | |
673 $last_res_num, | |
674 $cur_res_num, | |
675 $i, | |
676 $step, | |
677 $id | |
678 ); | |
679 | |
680 $seq = ""; | |
681 $num_res = $self->numResidues( $chain ); | |
682 $last_res_num = $self->_pdbNum( 1, $chain ); | |
683 for ( $i = 1; $i <= $num_res; $i++ ) { | |
684 if ( $fill_in ) { | |
685 $cur_res_num = $self->_pdbNum( $i, $chain ); | |
686 $step = $cur_res_num - $last_res_num; | |
687 if ( $step > 1 ) { | |
688 $seq .= 'X' x ( $step - 1 ); | |
689 } | |
690 } | |
691 $seq .= $self->_resAA( $i, $chain ); | |
692 $last_res_num = $cur_res_num; | |
693 } | |
694 | |
695 $id = $self->pdbID(); | |
696 $id .= "$chain"; | |
697 | |
698 $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq, | |
699 -id => $id, | |
700 -moltype => 'protein' | |
701 ); | |
702 | |
703 return $self->{ 'Seq' }->{ $chain }; | |
704 } | |
705 | |
706 =head1 INTERNAL METHODS | |
707 | |
708 =head2 _pdbNum | |
709 | |
710 Title : _pdbNum | |
711 Usage : fetches the numeric portion of the identifier for a given | |
712 residue as reported by the pdb entry. Note, this DOES NOT | |
713 uniquely specify a residue. There may be an insertion code | |
714 and/or chain identifier differences. | |
715 Function : | |
716 Example : $pdbNum = $self->pdbNum( 3, 'A' ); | |
717 Returns : a scalar | |
718 Args : valid ordinal num / chain combination | |
719 | |
720 | |
721 =cut | |
722 | |
723 sub _pdbNum { | |
724 my $self = shift; | |
725 my $ord = shift; | |
726 my $chain = shift; | |
727 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { | |
728 $self->throw( "No such ordinal $ord in chain $chain.\n" ); | |
729 } | |
730 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; | |
731 my $num_part; | |
732 ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ ); | |
733 return $num_part; | |
734 } | |
735 | |
736 =head2 _resAA | |
737 | |
738 Title : _resAA | |
739 Usage : returns 1 letter abbr. of the amino acid specified by | |
740 the arguments | |
741 Function : | |
742 Examples : $aa = $stride_obj->_resAA( 3, '-' ); | |
743 Returns : scalar character | |
744 Args : ( ord. num, chain ) | |
745 | |
746 | |
747 =cut | |
748 | |
749 sub _resAA { | |
750 my $self = shift; | |
751 my $ord = shift; | |
752 my $chain = shift; | |
753 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { | |
754 $self->throw( "No such ordinal $ord in chain $chain.\n" ); | |
755 } | |
756 return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); | |
757 } | |
758 | |
759 =head2 _pdbInsCo | |
760 | |
761 Title : _pdbInsCo | |
762 Usage : fetches the Insertion code for this residue. | |
763 Function : | |
764 Example : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' ); | |
765 Returns : a scalar | |
766 Args : ordinal number and chain | |
767 | |
768 | |
769 =cut | |
770 | |
771 sub _pdbInsCo { | |
772 my $self = shift; | |
773 my $ord = shift; | |
774 my $chain = shift; | |
775 if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { | |
776 $self->throw( "No such ordinal $ord in chain $chain.\n" ); | |
777 } | |
778 my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; | |
779 my $letter_part; | |
780 ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any | |
781 # non-word character(s) | |
782 return $letter_part; | |
783 } | |
784 | |
785 =head2 _toOrdChain | |
786 | |
787 Title : _toOrdChain | |
788 Usage : takes any set of residue identifying parameters and | |
789 wrestles them into a two element array: the chain and the ordinal | |
790 number of this residue. This two element array can then be | |
791 efficiently used as keys in many of the above accessor methods | |
792 ('#A:B') or ( #, 'A', 'B' ) | |
793 || | | |
794 || - Chain ID (blank for single chain) | |
795 |--- Insertion code for this residue. Blank for most residues. | |
796 |--- Numeric portion of residue ID. | |
797 | |
798 (#) | |
799 | | |
800 --- Numeric portion of residue ID. If there is only one chain and | |
801 it has no ID AND there is no residue with an insertion code at this | |
802 number, then this can uniquely specify a residue. | |
803 | |
804 # ('#:C) or ( #, 'C' ) | |
805 | | | |
806 | -Chain ID | |
807 ---Numeric portion of residue ID. | |
808 | |
809 If a residue is incompletely specified then the first residue that | |
810 fits the arguments is returned. For example, if 19 is the argument | |
811 and there are three chains, A, B, and C with a residue whose number | |
812 is 19, then 19:A will be returned (assuming its listed first). | |
813 | |
814 Function : | |
815 Example : my ( $ord, $chain ) = $self->_toOrdChain( @args ); | |
816 Returns : two element array | |
817 Args : valid set of residue identifier(s) ( SEE NOTE ABOVE ) | |
818 | |
819 | |
820 =cut | |
821 | |
822 sub _toOrdChain { | |
823 my $self = shift; | |
824 my $arg_str; | |
825 | |
826 my ( $key_num, $chain_id, $ins_code, $key, $i ); | |
827 | |
828 # check to see how many args are given | |
829 if ( $#_ >= 1 ) { # multiple args | |
830 $key_num = shift; | |
831 if ( $#_ >= 1 ) { # still multiple args => ins. code, too | |
832 $ins_code = shift; | |
833 $chain_id = shift; | |
834 } | |
835 else { # just one more arg. => chain_id | |
836 $chain_id = shift; | |
837 } | |
838 } | |
839 else { # only single arg. Might be number or string | |
840 $arg_str = shift; | |
841 if ( $arg_str =~ /:/ ) { | |
842 # a chain is specified | |
843 ( $chain_id ) = ( $arg_str =~ /:(.)/); | |
844 $arg_str =~ s/:.//; | |
845 } | |
846 if ( $arg_str =~ /[A-Z]|[a-z]/ ) { | |
847 # an insertion code is specified | |
848 ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ ); | |
849 $arg_str =~ s/[A-Z]|[a-z]//g; | |
850 } | |
851 #now, get the number bit-> everything still around | |
852 $key_num = $arg_str; | |
853 } | |
854 | |
855 $key = "$key_num$ins_code"; | |
856 if ( !( $chain_id ) || $chain_id eq ' ' ) { | |
857 $chain_id = '-'; | |
858 } | |
859 | |
860 if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) { | |
861 $self->throw( "No such chain: $chain_id" ); | |
862 } | |
863 | |
864 for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) { | |
865 if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq | |
866 $key ) { | |
867 return ( $i, $chain_id ); | |
868 } | |
869 } | |
870 | |
871 $self->throw( "No such key: $key" ); | |
872 | |
873 } | |
874 | |
875 =head2 _parse | |
876 | |
877 Title : _parse | |
878 Usage : as name suggests, parses stride output, creating object | |
879 Function : | |
880 Example : $self->_parse( $io ); | |
881 Returns : | |
882 Args : valid Bio::Root::IO object | |
883 | |
884 | |
885 =cut | |
886 | |
887 sub _parse { | |
888 my $self = shift; | |
889 my $io = shift; | |
890 my $file = $io->_fh(); | |
891 | |
892 # Parse top lines | |
893 if ( $self->_parseTop( $io ) ) { | |
894 $self->throw( "Not stride output" ); | |
895 } | |
896 | |
897 # Parse the HDR, CMP, SCR, and AUT lines | |
898 $self->_parseHead( $io ); | |
899 | |
900 # Parse the CHN, SEQ, STR, and LOC lines | |
901 $self->_parseSummary( $io ); # we're ignoring this | |
902 | |
903 # Parse the ASG lines | |
904 $self->_parseASG( $io ); | |
905 } | |
906 | |
907 =head2 _parseTop | |
908 | |
909 Title : _parseTop | |
910 Usage : makes sure this looks like stride output | |
911 Function : | |
912 Example : | |
913 Returns : | |
914 Args : | |
915 | |
916 | |
917 =cut | |
918 | |
919 sub _parseTop { | |
920 my $self = shift; | |
921 my $io = shift; | |
922 my $file = $io->_fh(); | |
923 my $cur = <$file>; | |
924 if ( $cur =~ /^REM ---/ ) { | |
925 return 0; | |
926 } | |
927 return 1; | |
928 } | |
929 | |
930 =head2 _parseHead | |
931 | |
932 Title : _parseHead | |
933 Usage : parses | |
934 Function : HDR, CMP, SRC, and AUT lines | |
935 Example : | |
936 Returns : | |
937 Args : | |
938 | |
939 | |
940 =cut | |
941 | |
942 sub _parseHead { | |
943 my $self = shift; | |
944 my $io = shift; | |
945 my $file = $io->_fh(); | |
946 my $cur; | |
947 my $element; | |
948 my ( @elements, @cmp, @src, @aut ); | |
949 my %head = {}; | |
950 my $still_head = 1; | |
951 | |
952 $cur = <$file>; | |
953 while ( $cur =~ /^REM / ) { | |
954 $cur = <$file>; | |
955 } | |
956 | |
957 if ( $cur =~ /^HDR / ) { | |
958 @elements = split( /\s+/, $cur ); | |
959 shift( @elements ); | |
960 pop( @elements ); | |
961 $self->{ 'PDB' } = pop( @elements ); | |
962 $self->{ 'DATE' } = pop( @elements ); | |
963 # now, everything else is "header" except for the word | |
964 # HDR | |
965 $element = join( ' ', @elements ); | |
966 $head{ 'HEADER' } = $element; | |
967 } | |
968 | |
969 $cur = <$file>; | |
970 while ( $cur =~ /^CMP / ) { | |
971 ( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ ); | |
972 push( @cmp, $cur ); | |
973 $cur = <$file>; | |
974 } | |
975 | |
976 while ( $cur =~ /^SRC / ) { | |
977 ( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ ); | |
978 push( @src, $cur ); | |
979 $cur = <$file>; | |
980 } | |
981 | |
982 while ( $cur =~ /^AUT / ) { | |
983 ( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ ); | |
984 push( @aut, $cur ); | |
985 $cur = <$file>; | |
986 } | |
987 | |
988 $head{ 'CMP' } = \@cmp; | |
989 $head{ 'SRC' } = \@src; | |
990 $head{ 'AUT' } = \@aut; | |
991 $self->{ 'HEAD' } = \%head; | |
992 } | |
993 | |
994 =head2 _parseSummary | |
995 | |
996 Title : _parseSummary | |
997 Usage : parses LOC lines | |
998 Function : | |
999 Example : | |
1000 Returns : | |
1001 Args : | |
1002 | |
1003 | |
1004 =cut | |
1005 | |
1006 sub _parseSummary { | |
1007 my $self = shift; | |
1008 my $io = shift; | |
1009 my $file = $io->_fh(); | |
1010 my $cur = <$file>; | |
1011 my $bound_set; | |
1012 my $element; | |
1013 my ( @elements, @cur ); | |
1014 my @LOC_lookup = ( [ 5, 12 ], # Element name | |
1015 # reduntdant [ 18, 3 ], # First residue name | |
1016 [ 22, 5 ], # First residue PDB number | |
1017 [ 28, 1 ], # First residue Chain ID | |
1018 # redundant [ 35, 3 ], # Last residue name | |
1019 [ 40, 5 ], # Last residue PDB number | |
1020 [ 46, 1 ] ); # Last residue Chain ID | |
1021 | |
1022 #ignore these lines | |
1023 while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) { | |
1024 $cur = <$file>; | |
1025 } | |
1026 | |
1027 while ( $cur =~ /^LOC / ) { | |
1028 foreach $bound_set ( @LOC_lookup ) { | |
1029 $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] ); | |
1030 $element =~ s/\s//g; | |
1031 push( @cur, $element ); | |
1032 } | |
1033 push( @elements, [ @cur ] ); | |
1034 $cur = <$file>; | |
1035 @cur = (); | |
1036 } | |
1037 $self->{ 'LOC' } = \@elements; | |
1038 | |
1039 } | |
1040 | |
1041 =head2 _parseASG | |
1042 | |
1043 Title : _parseASG | |
1044 Usage : parses ASG lines | |
1045 Function : | |
1046 Example : | |
1047 Returns : | |
1048 Args : | |
1049 | |
1050 | |
1051 =cut | |
1052 | |
1053 sub _parseASG { | |
1054 my $self = shift; | |
1055 my $io = shift; | |
1056 my $file = $io->_fh(); | |
1057 my $cur = <$file>; | |
1058 my $bound_set; | |
1059 my $ord_num; | |
1060 my ( $chain, $last_chain ); | |
1061 my $element; | |
1062 my %ASG; | |
1063 my ( @cur, @elements ); | |
1064 my @ASG_lookup = ( [ 5, 3 ], # Residue name | |
1065 # [ 9, 1 ], # Chain ID | |
1066 [ 10, 5 ], # PDB residue number (w/ins.code) | |
1067 # [ 16, 4 ], # ordinal stride number | |
1068 [ 24, 1 ], # one letter sec. stru. abbr. | |
1069 [ 26, 13], # full sec. stru. name | |
1070 [ 42, 7 ], # phi angle | |
1071 [ 52, 7 ], # psi angle | |
1072 [ 64, 5 ] );# residue solv. acc. | |
1073 | |
1074 while ( $cur =~ /^REM / ) { | |
1075 $cur = <$file>; | |
1076 } | |
1077 | |
1078 while ( $cur =~ /^ASG / ) { | |
1079 # get ordinal number for array key | |
1080 $ord_num = substr( $cur, 16, 4 ); | |
1081 $ord_num =~ s/\s//g; | |
1082 | |
1083 # get the chain id | |
1084 $chain = substr( $cur, 9, 1 ); | |
1085 | |
1086 if ( $last_chain && ( $chain ne $last_chain ) ) { | |
1087 $ASG{ $last_chain } = [ @elements ]; | |
1088 @elements = (); | |
1089 } | |
1090 | |
1091 # now get the rest of the info on this line | |
1092 foreach $bound_set ( @ASG_lookup ) { | |
1093 $element = substr( $cur, $bound_set->[ 0 ], | |
1094 $bound_set->[ 1 ] ); | |
1095 $element =~ s/\s//g; | |
1096 push( @cur, $element ); | |
1097 } | |
1098 $elements[ $ord_num ] = [ @cur ]; | |
1099 $cur = <$file>; | |
1100 @cur = (); | |
1101 $last_chain = $chain; | |
1102 } | |
1103 | |
1104 $ASG{ $chain } = [ @elements ]; | |
1105 | |
1106 $self->{ 'ASG' } = \%ASG; | |
1107 } | |
1108 | |
1109 1; | |
1110 | |
1111 | |
1112 |