Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Structure/SecStr/DSSP/Res.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Structure/SecStr/DSSP/Res.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,1319 @@ +# $id $ +# +# bioperl module for Bio::Structure::SecStr::DSSP::Res.pm +# +# Cared for by Ed Green <ed@compbio.berkeley.edu> +# +# Copyright Univ. of California +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Structure::SecStr::DSSP::Res - Module for parsing/accessing dssp output + +=head1 SYNOPSIS + + my $dssp_obj = new Bio::Structure::SecStr::DSSP::Res( '-file' => 'filename.dssp' ); + + # or + + my $dssp-obj = new Bio::Structure::SecStr::DSSP::Res( '-fh' => \*STDOUT ); + + # get DSSP defined Secondary Structure for residue 20 + $sec_str = $dssp_obj->resSecStr( 20 ); + + # get dssp defined sec. structure summary for PDB residue # 10 of chain A + + $sec_str = $dssp_obj->resSecStrSum( '10:A' ); + +=head1 DESCRIPTION + +DSSP::Res is a module for objectifying DSSP output. Methods are then +available for extracting all the information within the output file +and convenient subsets of it. +The principal purpose of DSSP is to determine secondary structural +elements of a given structure. + + ( Dictionary of protein secondary structure: pattern recognition + of hydrogen-bonded and geometrical features. + Biopolymers. 1983 Dec;22(12):2577-637. ) + +The DSSP program is available from: + http://www.cmbi.kun.nl/swift/dssp + +This information is available on a per residue basis ( see resSecStr +and resSecStrSum methods ) or on a per chain basis ( see secBounds +method ). + +resSecStr() & secBounds() return one of the following: + 'H' = alpha helix + 'B' = residue in isolated beta-bridge + 'E' = extended strand, participates in beta ladder + 'G' = 3-helix (3/10 helix) + 'I' = 5 helix (pi helix) + 'T' = hydrogen bonded turn + 'S' = bend + '' = no assignment + +A more general classification is returned using the resSecStrSum() +method. The purpose of this is to have a method for DSSP and STRIDE +derived output whose range is the same. +Its output is one of the following: + + 'H' = helix ( => 'H', 'G', or 'I' from above ) + 'B' = beta ( => 'B' or 'E' from above ) + 'T' = turn ( => 'T' or 'S' from above ) + ' ' = no assignment ( => ' ' from above ) + +The methods are roughly divided into 3 sections: +1. Global features of this structure (PDB ID, total surface area, + etc.). These methods do not require an argument. +2. Residue specific features ( amino acid, secondary structure, + solvent exposed surface area, etc. ). These methods do require an + arguement. The argument is supposed to uniquely identify a + residue described within the structure. It can be of any of the + following forms: + ('#A:B') or ( #, 'A', 'B' ) + || | + || - Chain ID (blank for single chain) + |--- Insertion code for this residue. Blank for most residues. + |--- Numeric portion of residue ID. + + (#) + | + --- Numeric portion of residue ID. If there is only one chain and + it has no ID AND there is no residue with an insertion code at this + number, then this can uniquely specify a residue. + + ('#:C') or ( #, 'C' ) + | | + | -Chain ID + ---Numeric portion of residue ID. + + If a residue is incompletely specified then the first residue that + fits the arguments is returned. For example, if 19 is the argument + and there are three chains, A, B, and C with a residue whose number + is 19, then 19:A will be returned (assuming its listed first). + + Since neither DSSP nor STRIDE correctly handle alt-loc codes, they + are not supported by these modules. + +3. Value-added methods. Return values are not verbatem strings + parsed from DSSP or STRIDE output. + + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Ed Green + +Email ed@compbio.berkeley.edu + + +=head1 APPENDIX + +The rest of the documentation details each method. +Internal methods are preceded with a _ + +=cut + +package Bio::Structure::SecStr::DSSP::Res; +use strict; +use vars qw(@ISA); +use Bio::Root::Root; +use Bio::Root::IO; +use Bio::PrimarySeq; + +@ISA = qw(Bio::Root::Root); + +# Would be a class variable if Perl had them + + #attribute begin col # columns +our %lookUp = ( 'pdb_resnum' => [ 5, 5 ], + 'insertionco' => [ 10, 1 ], + 'pdb_chain' => [ 11, 1 ], + + 'amino_acid' => [ 13, 1 ], + 'term_sig' => [ 14, 1 ], + + 'ss_summary' => [ 16, 1 ], + '3tph' => [ 18, 1 ], + '4tph' => [ 19, 1 ], + '5tph' => [ 20, 1 ], + 'geo_bend' => [ 21, 1 ], + 'chirality' => [ 22, 1 ], + 'beta_br1la' => [ 23, 1 ], + 'beta_br2la' => [ 24, 1 ], + + 'bb_part1nu' => [ 25, 4 ], + 'bb_part2nu' => [ 29, 4 ], + 'betash_lab' => [ 33, 1 ], + + 'solv_acces' => [ 34, 4 ], + + 'hb1_nh_o_p' => [ 39, 6 ], + 'hb1_nh_o_e' => [ 46, 4 ], + + 'hb1_o_hn_p' => [ 50, 6 ], + 'hb1_o_hn_e' => [ 57, 4 ], + + 'hb2_nh_o_p' => [ 61, 6 ], + 'hb2_nh_o_e' => [ 68, 4 ], + + 'hb2_o_hn_p' => [ 72, 6 ], + 'hb2_o_hn_e' => [ 79, 4 ], + + 'tco' => [ 85, 6 ], + + 'kappa' => [ 91, 6 ], + + 'alpha' => [ 97, 6 ], + + 'phi' => [ 103, 6 ], + + 'psi' => [ 109, 6 ], + + 'x_ca' => [ 115, 7 ], + + 'y_ca' => [ 122, 7 ], + + 'z_ca' => [ 129, 7 ] ); + + +=head1 CONSTRUCTOR + + +=cut + + +=head2 new + + Title : new + Usage : makes new object of this class + Function : Constructor + Example : $dssp_obj = Bio::DSSP:Res->new( filename or FILEHANDLE ) + Returns : object (ref) + Args : filename ( must be proper DSSP output file ) + +=cut + +sub new { + my ( $class, @args ) = @_; + my $self = $class->SUPER::new( @args ); + my $io = Bio::Root::IO->new( @args ); + $self->_parse( $io->_fh() ); + $io->close(); + return $self; +} + +=head1 ACCESSORS + + +=cut + +# GLOBAL FEATURES / INFO / STATS + +=head2 totSurfArea + + Title : totSurfArea + Usage : returns total accessible surface area in square Ang. + Function : + Example : $surArea = $dssp_obj->totSurfArea(); + Returns : scalar + Args : none + +=cut + +sub totSurfArea { + my $self = shift; + return $self->{ 'Head' }->{ 'ProAccSurf' }; +} + +=head2 numResidues + + Title : numResidues + Usage : returns the total number of residues in all chains or + just the specified chain if a chain is specified + Function : + Example : $num_res = $dssp_obj->numResidues(); + Returns : scalar int + Args : none + + +=cut + +sub numResidues { + my $self = shift; + my $chain = shift; + if ( !( $chain ) ) { + return $self->{'Head'}->{'TotNumRes'}; + } + else { + my ( $num_res, + $cont_seg ); + my $cont_seg_pnt = $self->_contSegs(); + foreach $cont_seg ( @{ $cont_seg_pnt } ) { + if ( $chain eq $cont_seg->[ 2 ] ) { + # this segment is part of the chain we want + $num_res += ( $self->_toDsspKey( $cont_seg->[ 1 ] ) + - $self->_toDsspKey( $cont_seg->[ 0 ] ) + + 1 ); # this works because we know the + # the region between the start + # and end of a dssp key is + # continuous + } + } + return $num_res; + } +} + +# STRAIGHT FROM PDB ENTRY + +=head2 pdbID + + Title : pdbID + Usage : returns pdb identifier ( 1FJM, e.g.) + Function : + Example : $pdb_id = $dssp_obj->pdbID(); + Returns : scalar string + Args : none + + +=cut + +sub pdbID { + my $self = shift; + return $self->{'Head'}->{'PDB'}; +} + +=head2 pdbAuthor + + Title : pdbAuthor + Usage : returns author field + Function : + Example : $auth = $dssp_obj->pdbAuthor() + Returns : scalar string + Args : none + + +=cut + +sub pdbAuthor { + my $self = shift; + return $self->{'Head'}->{'AUTHOR'}; +} + +=head2 pdbCompound + + Title : pdbCompound + Usage : returns pdbCompound given in PDB file + Function : + Example : $cmpd = $dssp_obj->pdbCompound(); + Returns : scalar string + Args : none + + +=cut + +sub pdbCompound { + my $self = shift; + return $self->{'Head'}->{'COMPND'}; +} + +=head2 pdbDate + + Title : pdbDate + Usage : returns date given in PDB file + Function : + Example : $pdb_date = $dssp_obj->pdbDate(); + Returns : scalar + Args : none + + +=cut + +sub pdbDate { + my $self = shift; + return $self->{'Head'}->{'DATE'}; +} + +=head2 pdbHeader + + Title : pdbHeader + Usage : returns header info from PDB file + Function : + Example : $header = $dssp_obj->pdbHeader(); + Returns : scalar + Args : none + + +=cut + +sub pdbHeader { + my $self = shift; + return $self->{'Head'}->{'HEADER'}; +} + +=head2 pdbSource + + Title : pdbSource + Usage : returns pdbSource information from PDBSOURCE line + Function : + Example : $pdbSource = $dssp_obj->pdbSource(); + Returns : scalar + Args : none + + +=cut + +sub pdbSource { + my $self = shift; + return $self->{'Head'}->{'SOURCE'}; +} + + +# RESIDUE SPECIFIC ACCESSORS + +=head2 resAA + + Title : resAA + Usage : fetches the 1 char amino acid code, given an id + Function : + Example : $aa = $dssp_obj->aminoAcid( '20:A' ); # pdb id as arg + Returns : 1 character scalar string + Args : RESIDUE_ID + + +=cut + +sub resAA { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' }; +} + +=head2 resPhi + + Title : resPhi + Usage : returns phi angle of a single residue + Function : accessor + Example : $phi = $dssp_obj->resPhi( RESIDUE_ID ) + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resPhi { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'phi' }; +} + +=head2 resPsi + + Title : resPsi + Usage : returns psi angle of a single residue + Function : accessor + Example : $psi = $dssp_obj->resPsi( RESIDUE_ID ) + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resPsi { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'psi' }; +} + +=head2 resSolvAcc + + Title : resSolvAcc + Usage : returns solvent exposed area of this residue in + square Angstroms + Function : + Example : $solv_acc = $dssp_obj->resSolvAcc( RESIDUE_ID ); + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resSolvAcc { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' }; +} + +=head2 resSurfArea + + Title : resSurfArea + Usage : returns solvent exposed area of this residue in + square Angstroms + Function : + Example : $solv_acc = $dssp_obj->resSurfArea( RESIDUE_ID ); + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resSurfArea { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' }; +} + +=head2 resSecStr + + Title : resSecStr + Usage : $ss = $dssp_obj->resSecStr( RESIDUE_ID ); + Function : returns the DSSP secondary structural designation of this residue + Example : + Returns : a character ( 'B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ' ) + Args : RESIDUE_ID + NOTE : The range of this method differs from that of the + resSecStr method in the STRIDE SecStr parser. That is because of the + slightly different format for STRIDE and DSSP output. The resSecStrSum + method exists to map these different ranges onto an identical range. + +=cut + +sub resSecStr { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' }; + return $ss_char if $ss_char; + return ' '; +} + + +=head2 resSecStrSum + + Title : resSecStrSum + Usage : $ss = $dssp_obj->resSecStrSum( $id ); + Function : returns what secondary structure group this residue belongs + to. One of: 'H': helix ( H, G, or I ) + 'B': beta ( B or E ) + 'T': turn ( T or S ) + ' ': none ( ' ' ) + This method is similar to resSecStr, but the information + it returns is less specific. + Example : + Returns : a character ( 'H', 'B', 'T', or ' ' ) + Args : dssp residue number of pdb residue identifier + + +=cut + +sub resSecStrSum { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' }; + if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) { + return 'H'; + } + if ( $ss_char eq ' ' || !( $ss_char ) ) { + return ' '; + } + if ( $ss_char eq 'B' || $ss_char eq 'E' ) { + return 'B'; + } + else { + return 'T'; + } +} + +# DSSP SPECIFIC + +=head2 hBonds + + Title : hBonds + Usage : returns number of 14 different types of H Bonds + Function : + Example : $hb = $dssp_obj->hBonds + Returns : pointer to 14 element array of ints + Args : none + NOTE : The different type of H-Bonds reported are, in order: + TYPE O(I)-->H-N(J) + IN PARALLEL BRIDGES + IN ANTIPARALLEL BRIDGES + TYPE O(I)-->H-N(I-5) + TYPE O(I)-->H-N(I-4) + TYPE O(I)-->H-N(I-3) + TYPE O(I)-->H-N(I-2) + TYPE O(I)-->H-N(I-1) + TYPE O(I)-->H-N(I+0) + TYPE O(I)-->H-N(I+1) + TYPE O(I)-->H-N(I+2) + TYPE O(I)-->H-N(I+3) + TYPE O(I)-->H-N(I+4) + TYPE O(I)-->H-N(I+5) + +=cut + +sub hBonds { + my $self = shift; + return $self->{ 'HBond'}; +} + +=head2 numSSBr + + Title : numSSBr + Usage : returns info about number of SS-bridges + Function : + Example : @SS_br = $dssp_obj->numSSbr(); + Returns : 3 element scalar int array + Args : none + + +=cut + +sub numSSBr { + my $self = shift; + return ( $self->{'Head'}->{'TotSSBr'}, + $self->{'Head'}->{'TotIaSSBr'}, + $self->{'Head'}->{'TotIeSSBr'} ); +} + +=head2 resHB_O_HN + + Title : resHB_O_HN + Usage : returns pointer to a 4 element array + consisting of: relative position of binding + partner #1, energy of that bond (kcal/mol), + relative positionof binding partner #2, + energy of that bond (kcal/mol). If the bond + is not bifurcated, the second bond is reported + as 0, 0.0 + Function : accessor + Example : $oBonds_ptr = $dssp_obj->resHB_O_HN( RESIDUE_ID ) + Returns : pointer to 4 element array + Args : RESIDUE_ID + + +=cut + +sub resHB_O_HN { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_p' }, + $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_e' }, + $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_p' }, + $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_e' } ); +} + + +=head2 resHB_NH_O + + Title : resHB_NH_O + Usage : returns pointer to a 4 element array + consisting of: relative position of binding + partner #1, energy of that bond (kcal/mol), + relative positionof binding partner #2, + energy of that bond (kcal/mol). If the bond + is not bifurcated, the second bond is reported + as 0, 0.0 + Function : accessor + Example : $nhBonds_ptr = $dssp_obj->resHB_NH_O( RESIDUE_ID ) + Returns : pointer to 4 element array + Args : RESIDUE_ID + + +=cut + +sub resHB_NH_O { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_p' }, + $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_e' }, + $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_p' }, + $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_e' } ); +} + + +=head2 resTco + + Title : resTco + Usage : returns tco angle around this residue + Function : accessor + Example : resTco = $dssp_obj->resTco( RESIDUE_ID ) + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resTco { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'tco' }; +} + + +=head2 resKappa + + Title : resKappa + Usage : returns kappa angle around this residue + Function : accessor + Example : $kappa = $dssp_obj->resKappa( RESIDUE_ID ) + Returns : scalar + Args : RESIDUE_ID ( dssp or PDB ) + + +=cut + +sub resKappa { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'kappa' }; +} + + +=head2 resAlpha + + Title : resAlpha + Usage : returns alpha angle around this residue + Function : accessor + Example : $alpha = $dssp_obj->resAlpha( RESIDUE_ID ) + Returns : scalar + Args : RESIDUE_ID ( dssp or PDB ) + + +=cut + +sub resAlpha { + my $self = shift; + my @args = @_; + my $dssp_key = $self->_toDsspKey( @args ); + return $self->{ 'Res' }->[ $dssp_key ]->{ 'alpha' }; +} + +# VALUE ADDED METHODS (NOT JUST PARSE/REPORT) + +=head2 secBounds + + Title : secBounds + Usage : gets residue ids of boundary residues in each + contiguous secondary structural element of specified + chain + Function : returns pointer to array of 3 element arrays. First + two elements are the PDB IDs of the start and end points, + respectively and inclusively. The last element is the + DSSP secondary structural assignment code, + i.e. one of : ('B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ') + Example : $ss_elements_pts = $dssp_obj->secBounds( 'A' ); + Returns : pointer to array of arrays + Args : chain id ( 'A', for example ). No arg => no chain id + + +=cut + +sub secBounds { + my $self = shift; + my $chain = shift; + my %sec_bounds; + + $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' ); + + # if we've memoized this chain, use that + if ( $self->{ 'SecBounds' } ) { + # check to make sure chain is valid + if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) { + $self->throw( "No such chain: $chain\n" ); + } + return $self->{ 'SecBounds' }->{ $chain }; + } + + my ( $cur_element, $i, $cur_chain, $beg, ); + + #initialize + $cur_element = $self->{ 'Res' }->[ 1 ]->{ 'ss_summary' }; + $beg = 1; + + for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) { + if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) { + # element is terminated by a chain discontinuity + push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } }, + [ $self->_toPdbId( $beg ), + $self->_toPdbId( $i - 1 ), + $cur_element ] ); + $i++; + $beg = $i; + $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' }; + } + + elsif ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ne $cur_element ) { + # element is terminated by beginning of a new element + push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } }, + [ $self->_toPdbId( $beg ), + $self->_toPdbId( $i - 1 ), + $cur_element ] ); + $beg = $i; + $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' }; + } + } + #last residue + if ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } eq $cur_element ) { + push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } }, + [ $self->_toPdbId( $beg ), + $self->_toPdbId( $i ), + $cur_element ] ); + } + + else { + push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } }, + [ $self->_toPdbId( $beg ), + $self->_toPdbId( $i - 1 ), + $cur_element ] ); + push( @{ $sec_bounds{ $self->_pdbChain( $i ) } }, + [ $self->_toPdbId( $i ), + $self->_toPdbId( $i ), + $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ] ); + } + + $self->{ 'SecBounds' } = \%sec_bounds; + + # check to make sure chain is valid + if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) { + $self->throw( "No such chain: $chain\n" ); + } + + return $self->{ 'SecBounds' }->{ $chain }; +} + + + +=head2 chains + + Title : chains + Usage : returns pointer to array of chain I.D.s (characters) + Function : + Example : $chains_pnt = $dssp_obj->chains(); + Returns : array of characters, one of which may be ' ' + Args : none + + +=cut + +sub chains { + my $self = shift; + my $cont_segs = $self->_contSegs(); + my %chains; + my $seg; + foreach $seg ( @{ $cont_segs } ) { + $chains{ $seg->[ 2 ] } = 1; + } + my @chains = keys( %chains ); + return \@chains; +} + + +=head2 getSeq + + Title : getSeq + Usage : returns a Bio::PrimarySeq object which represents a good + guess at the sequence of the given chain + Function : For most chains of most entries, the sequence returned by + this method will be very good. However, it is inherently + unsafe to rely on DSSP to extract sequence information about + a PDB entry. More reliable information can be obtained from + the PDB entry itself. + Example : $pso = $dssp_obj->getSeq( 'A' ); + Returns : (pointer to) a PrimarySeq object + Args : Chain identifier. If none given, ' ' is assumed. If no ' ' + chain, the first chain is used. + + +=cut + +sub getSeq { + my $self = shift; + my $chain = shift; + + my ( $pot_chain, + $seq, + $frag_num, + $frag, + $curPdbNum, + $lastPdbNum, + $gap_len, + $i, + $id, + ); + my @frags; + + if ( !( $chain ) ) { + $chain = ' '; + } + + if ( $self->{ 'Seq' }->{ $chain } ) { + return $self->{ 'Seq' }->{ $chain }; + } + + my $contSegs_pnt = $self->_contSegs(); + + # load up specified chain + foreach $pot_chain ( @{ $contSegs_pnt } ) { + if ( $pot_chain->[ 2 ] eq $chain ) { + push( @frags, $pot_chain ); + } + } + + # if that didn't work, just get the first one + if ( !( @frags ) ) { + $chain = $contSegs_pnt->[ 0 ]->[ 2 ]; + foreach $pot_chain ( @{ $contSegs_pnt } ) { + if ( $pot_chain->[ 2 ] eq $chain ) { + push( @frags, $chain ); + } + } + } + + # now build the sequence string + $seq = ""; + $frag_num = 0; + foreach $frag ( @frags ) { + $frag_num++; + if ( $frag_num > 1 ) { # we need to put in some gap seq + $curPdbNum = $self->_pdbNum( $frag->[ 0 ] ); + $gap_len = $curPdbNum - $lastPdbNum - 1; + if ( $gap_len > 0 ) { + $seq .= 'u' x $gap_len; + } + else { + $seq .= 'u'; + } + } + for ( $i = $frag->[ 0 ]; $i <= $frag->[ 1 ]; $i++ ) { + $seq .= $self->_resAA( $i ); + } + $lastPdbNum = $self->_pdbNum( $i - 1 ); + } + + + + $id = $self->pdbID(); + $id .= ":$chain"; + + $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new ( -seq => $seq, + -id => $id, + -moltype => 'protein' + ); + return $self->{ 'Seq' }->{ $chain }; +} + +=head1 INTERNAL METHODS + + +=cut + +=head2 _pdbChain + + Title : _pdbChain + Usage : returns the pdb chain id of given residue + Function : + Example : $chain_id = $dssp_obj->pdbChain( DSSP_KEY ); + Returns : scalar + Args : DSSP_KEY ( dssp or pdb ) + + +=cut + +sub _pdbChain { + my $self = shift; + my $dssp_key = shift; + return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_chain' }; +} + +=head2 _resAA + + Title : _resAA + Usage : fetches the 1 char amino acid code, given a dssp id + Function : + Example : $aa = $dssp_obj->_resAA( dssp_id ); + Returns : 1 character scalar string + Args : dssp_id + + +=cut + +sub _resAA { + my $self = shift; + my $dssp_key = shift; + return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' }; +} + + +=head2 _pdbNum + + Title : _pdbNum + Usage : fetches the numeric portion of the identifier for a given + residue as reported by the pdb entry. Note, this DOES NOT + uniquely specify a residue. There may be an insertion code + and/or chain identifier differences. + Function : + Example : $pdbNum = $self->_pdbNum( DSSP_ID ); + Returns : a scalar + Args : DSSP_ID + + +=cut + +sub _pdbNum { + my $self = shift; + my $dssp_key = shift; + return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_resnum' }; +} + +=head2 _pdbInsCo + + Title : _pdbInsCo + Usage : fetches the Insertion Code for this residue, if it has one. + Function : + Example : $pdbNum = $self->_pdbInsCo( DSSP_ID ); + Returns : a scalar + Args : DSSP_ID + + +=cut + +sub _pdbInsCo { + my $self = shift; + my $dssp_key = shift; + return $self->{ 'Res' }->[ $dssp_key ]->{ 'insertionco' }; +} + +=head2 _toPdbId + + Title : _toPdbId + Usage : Takes a dssp key and builds the corresponding + PDB identifier string + Function : + Example : $pdbId = $self->_toPdbId( DSSP_ID ); + Returns : scalar + Args : DSSP_ID + +=cut + +sub _toPdbId { + my $self = shift; + my $dssp_key = shift; + my $pdbId = ( $self->_pdbNum( $dssp_key ). + $self->_pdbInsCo( $dssp_key ) ); + my $chain = $self->_pdbChain( $dssp_key ); + $pdbId = "$pdbId:$chain" if $chain; + return $pdbId; +} + +=head2 _contSegs + + Title : _contSegs + Usage : find the endpoints of continuous regions of this structure + Function : returns pointer to array of 3 element array. + Elements are the dssp keys of the start and end points of each + continuous element and its PDB chain id (may be blank). + Note that it is common to have several + continuous elements with the same chain id. This occurs + when an internal region is disordered and no structural + information is available. + Example : $cont_seg_ptr = $dssp_obj->_contSegs(); + Returns : pointer to array of arrays + Args : none + + +=cut + +sub _contSegs { + my $self = shift; + if ( $self->{ 'contSegs' } ) { + return $self->{ 'contSegs' }; + } + else { + # first time, so make contSegs + my ( $cur_chain, $i, $beg ); + my @contSegs; + #initialize + $cur_chain = $self->_pdbChain( 1 ); + $beg = 1; + #internal residues + for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) { + if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) { + push( @contSegs, [ $beg, $i - 1, $cur_chain ] ); + $beg = $i + 1; + $cur_chain = $self->_pdbChain( $i + 1 ); + } + } + # last residue must be the end of a chain + push( @contSegs, [ $beg, $i, $cur_chain ] ); + + $self->{ 'contSegs' } = \@contSegs; + return $self->{ 'contSegs' }; + } +} + +=head2 _numResLines + + Title : _numResLines + Usage : returns the total number of residue lines in this + dssp file. + This number is DIFFERENT than the number of residues in + the pdb file because dssp has chain termination and chain + discontinuity 'residues'. + Function : + Example : $num_res = $dssp_obj->_numResLines(); + Returns : scalar int + Args : none + + +=cut + +sub _numResLines { + my $self = shift; + return ( $#{$self->{ 'Res' }} ); +} + +=head2 _toDsspKey + + Title : _toDsspKey + Usage : returns the unique dssp integer key given a pdb residue id. + All accessor methods require (internally) + the dssp key. This method is very useful in converting + pdb keys to dssp keys so the accessors can accept pdb keys + as argument. PDB Residue IDs are inherently + problematic since they have multiple parts of + overlapping function and ill-defined or observed + convention in form. Input can be in any of the formats + described in the DESCRIPTION section above. + Function : + Example : $dssp_id = $dssp_obj->_pdbKeyToDsspKey( '10B:A' ) + Returns : scalar int + Args : pdb residue identifier: num[insertion code]:[chain] + + +=cut + +sub _toDsspKey { + # Consider adding lookup table for 'common' name (like 20:A) for + # fast access. Could be built during parse of input. + + my $self = shift; + my $arg_str; + + my ( $key_num, $chain_id, $ins_code ); + + # check to see how many args are given + if ( $#_ > 1 ) { # multiple args + $key_num = shift; + if ( $#_ > 1 ) { # still multiple args => ins. code, too + $ins_code = shift; + $chain_id = shift; + } + else { # just one more arg. => chain_id + $chain_id = shift; + } + } + else { # only single arg. Might be number or string + $arg_str = shift; + if ( $arg_str =~ /:/ ) { + # a chain is specified + ( $chain_id ) = ( $arg_str =~ /:(.)/); + $arg_str =~ s/:.//; + } + if ( $arg_str =~ /[A-Z]|[a-z]/ ) { + # an insertion code is specified + ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ ); + $arg_str =~ s/[A-Z]|[a-z]//g; + } + #now, get the number bit-> everything still around + $key_num = $arg_str; + } + + # Now find the residue which fits this description. Linear search is + # probably not the best way to do this, but oh well... + for ( my $i = 1; $i <= $self->_numResLines(); $i++ ) { + if ( $key_num == $self->{'Res'}->[$i]->{'pdb_resnum'} ) { + if ( $chain_id ) { # if a chain was specified + if ( $chain_id eq $self->{'Res'}->[$i]->{'pdb_chain'} ) { + # and it's the right one + if ( $ins_code ) { # if insertion code was specified + if ( $ins_code eq $self->{'Res'}->[$i]->{'insertionco'} ) { + # and it's the right one + return $i; + } + } + else { # no isertion code specified, this is it + return $i; + } + } + } + else { # no chain was specified + return $i; + } + } + } + $self->throw( "PDB key not found." ); +} + +=head2 _parse + + Title : _parse + Usage : parses dssp output + Function : + Example : used by the constructor + Returns : + Args : input source ( handled by Bio::Root:IO ) + + +=cut + +sub _parse { + my $self = shift; + my $file = shift; + my $cur; + my $current_chain; + my ( @elements, @hbond ); + my ( %head, %his, ); + my $element; + my $res_num; + + $cur = <$file>; + unless ( $cur =~ /^==== Secondary Structure Definition/ ) { + $self->throw( "Not dssp output" ); + return; + } + + $cur = <$file>; + ( $element ) = ( $cur =~ /^REFERENCE\s+(.+?)\s+\./ ); + $head{ 'REFERENCE' } = $element; + + $cur = <$file>; + @elements = split( /\s+/, $cur ); + pop( @elements ); # take off that annoying period + $head{ 'PDB' } = pop( @elements ); + $head{ 'DATE' } = pop( @elements ); + # now, everything else is "header" except for the word + # HEADER + shift( @elements ); + $element = shift( @elements ); + while ( @elements ) { + $element = $element." ".shift( @elements ); + } + $head{ 'HEADER' } = $element; + + $cur = <$file>; + ($element) = ( $cur =~ /^COMPND\s+(.+?)\s+\./ ); + $head{ 'COMPND' } = $element; + + $cur = <$file>; + ($element) = ( $cur =~ /^PDBSOURCE\s+(.+?)\s+\./ ); + $head{ 'SOURCE' } = $element; + + $cur = <$file>; + ($element) = ( $cur =~ /^AUTHOR\s+(.+?)\s+/ ); + $head{ 'AUTHOR' } = $element; + + $cur = <$file>; + @elements = split( /\s+/, $cur ); + shift( @elements ); + $head{ 'TotNumRes' } = shift( @elements ); + $head{ 'NumChain' } = shift( @elements ); + $head{ 'TotSSBr' } = shift( @elements ); + $head{ 'TotIaSSBr' } = shift( @elements ); + $head{ 'TotIeSSBr' } = shift( @elements ); + + $cur = <$file>; + ( $element ) = ( $cur =~ /\s*(\d+\.\d*)\s+ACCESSIBLE SURFACE OF PROTEIN/ ); + $head{ 'ProAccSurf' } = $element; + $self->{ 'Head' } = \%head; + + for ( my $i = 1; $i <= 14; $i++ ) { + $cur = <$file>; + ( $element ) = + $cur =~ /\s*(\d+)\s+\d+\.\d+\s+TOTAL NUMBER OF HYDROGEN/; + push( @hbond, $element ); +# $hbond{ $hBondType } = $element; + } + $self->{ 'HBond' } = \@hbond; + + my $histogram_finished = 0; + while ( !($histogram_finished) && chomp( $cur = <$file> ) ) { + if ( $cur =~ /RESIDUE AA STRUCTURE/ ) { + $histogram_finished = 1; + } + } + + while ( chomp( $cur = <$file> ) ) { + $res_num = substr( $cur, 0, 5 ); + $res_num =~ s/\s//g; + $self->{ 'Res' }->[ $res_num ] = &_parseResLine( $cur ); + } +} + + +=head2 _parseResLine + + Title : _parseResLine + Usage : parses a single residue line + Function : + Example : used internally + Returns : + Args : residue line ( string ) + + +=cut + +sub _parseResLine() { + my $cur = shift; + my ( $feat, $value ); + my %elements; + + foreach $feat ( keys %lookUp ) { + $value = substr( $cur, $lookUp{ $feat }->[0], + $lookUp{ $feat }->[1] ); + $value =~ s/\s//g; + $elements{$feat} = $value ; + } + + # if no chain id, make it '-' (like STRIDE...very convenient) + if ( !( $elements{ 'pdb_chain' } ) || $elements{ 'pdb_chain'} eq ' ' ) { + $elements{ 'pdb_chain' } = '-'; + } + return \%elements; +} + +return 1; #just because +