Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Structure/SecStr/STRIDE/Res.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Structure/SecStr/STRIDE/Res.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,1112 @@ +# $id $ +# +# bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm +# +# Cared for by Ed Green <ed@compbio.berkeley.edu> +# +# Copyright Univ. of California +# +# You may distribute this module under the same terms as perl itself +# +# POD documentation - main docs before the code +=head1 NAME + +Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output + +=head1 SYNOPSIS + + my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-file' => 'filename.stride' ); + + # or + + my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-fh' => \*STDOUT ); + + # Get secondary structure assignment for PDB residue 20 of chain A + $sec_str = $stride_obj->resSecStr( '20:A' ); + + # same + $sec_str = $stride_obj->resSecStr( 20, 'A' ) + +=head1 DESCRIPTION + +STRIDE::Res is a module for objectifying STRIDE output. STRIDE is a +program (similar to DSSP) for assigning secondary structure to +individual residues of a pdb structure file. + + ( Knowledge-Based Protein Secondary Structure Assignment, + PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) ) + +STRIDE is available here: +http://www.embl-heidelberg.de/argos/stride/down_stride.html + +Methods are then available for extracting all of the infomation +present within the output or convenient subsets of it. + +Although they are very similar in function, DSSP and STRIDE differ +somewhat in output format. Thes differences are reflected in the +return value of some methods of these modules. For example, both +the STRIDE and DSSP parsers have resSecStr() methods for returning +the secondary structure of a given residue. However, the range of +return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the +range of values for STRIDE is ( H, G, I, E, B, b, T, and C ). See +individual methods for details. + +The methods are roughly divided into 3 sections: + + 1. Global features of this structure (PDB ID, total surface area, + etc.). These methods do not require an argument. + 2. Residue specific features ( amino acid, secondary structure, + solvent exposed surface area, etc. ). These methods do require an + arguement. The argument is supposed to uniquely identify a + residue described within the structure. It can be of any of the + following forms: + ('#A:B') or ( #, 'A', 'B' ) + || | + || - Chain ID (blank for single chain) + |--- Insertion code for this residue. Blank for most residues. + |--- Numeric portion of residue ID. + + (#) + | + --- Numeric portion of residue ID. If there is only one chain and + it has no ID AND there is no residue with an insertion code at this + number, then this can uniquely specify a residue. + + ('#:C') or ( #, 'C' ) + | | + | -Chain ID + ---Numeric portion of residue ID. + + If a residue is incompletely specified then the first residue that + fits the arguments is returned. For example, if 19 is the argument + and there are three chains, A, B, and C with a residue whose number + is 19, then 19:A will be returned (assuming its listed first). + + Since neither DSSP nor STRIDE correctly handle alt-loc codes, they + are not supported by these modules. + + 3. Value-added methods. Return values are not verbatem strings + parsed from DSSP or STRIDE output. + +=head1 FEEDBACK + +=head2 MailingLists + +UsUser feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to one +of the Bioperl mailing lists. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bio.perl.org/MailList.html - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Ed Green + +Email ed@compbio.berkeley.edu + + +=head1 APPENDIX + +The Rest of the documentation details each method. +Internal methods are preceded with a _. + + +=cut + +package Bio::Structure::SecStr::STRIDE::Res; +use strict; +use vars qw(@ISA); +use Bio::Root::Root; +use Bio::Root::IO; +use Bio::PrimarySeq; + +@ISA = qw(Bio::Root::Root); + +our %ASGTable = ( 'aa' => 0, + 'resNum' => 1, + 'ssAbbr' => 2, + 'ssName' => 3, + 'phi' => 4, + 'psi' => 5, + 'surfArea' => 6 ); + +our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N', + 'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q', + 'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H', + 'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K', + 'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P', + 'SER' => 'S', 'THR' => 'T', 'TRP' => 'W', + 'TYR' => 'Y', 'VAL' => 'V' ); + +=head2 new + + Title : new + Usage : makes new object of this class + Function : Constructor + Example : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' => filename + # or + '-fh' => FILEHANDLE ) + Returns : object (ref) + Args : filename or filehandle( must be proper STRIDE output ) + +=cut + +sub new { + my ( $class, @args ) = @_; + my $self = $class->SUPER::new( @args ); + my $io = Bio::Root::IO->new( @args ); + $self->_parse( $io ); # not passing filehandle ! + $io->close(); + return $self; +} + +# GLOBAL FEATURES / INFO / STATS + +=head2 totSurfArea + + Title : totSurfArea + Usage : returns sum of surface areas of all residues of all + chains considered. Result is memoized. + Function : + Example : $tot_SA = $stride_obj->totSurfArea(); + Returns : scalar + Args : none + + +=cut + +sub totSurfArea { + my $self = shift; + my $total = 0; + my ( $chain, $res ); + + if ( $self->{ 'SurfArea' } ) { + return $self->{ 'SurfArea' }; + } + else { + foreach $chain ( keys %{$self->{ 'ASG' }} ) { + for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { + $total += + $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}]; + } + } + } + + $self->{ 'SurfArea' } = $total; + return $self->{ 'SurfArea' }; + +} + +=head2 numResidues + + Title : numResidues + Usage : returns total number of residues in all chains or + just the specified chain + Function : + Example : $tot_res = $stride_obj->numResidues(); + Returns : scalar int + Args : none or chain id + + +=cut + +sub numResidues { + my $self = shift; + my $chain = shift; + my $total = 0; + my $key; + foreach $key ( keys %{$self->{ 'ASG' }} ) { + if ( $chain ) { + if ( $key eq $chain ) { + $total += $#{$self->{ 'ASG' }{ $key }}; + } + } + else { + $total += $#{$self->{ 'ASG' }{ $key }}; + } + } + return $total; +} + +# STRAIGHT FROM THE PDB ENTRY + +=head2 pdbID + + Title : pdbID + Usage : returns pdb identifier ( 1FJM, e.g. ) + Function : + Example : $pdb_id = $stride_obj->pdbID(); + Returns : scalar string + Args : none + + +=cut + +sub pdbID { + my $self = shift; + return $self->{ 'PDB' }; +} +=head2 pdbAuthor + + Title : pdbAuthor + Usage : returns author of this PDB entry + Function : + Example : $auth = $stride_obj->pdbAuthor() + Returns : scalar string + Args : none + + +=cut + +sub pdbAuthor { + my $self = shift; + return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } ); +} + +=head2 pdbCompound + + Title : pdbCompound + Usage : returns string of what was found on the + CMP lines + Function : + Example : $cmp = $stride_obj->pdbCompound(); + Returns : string + Args : none + + +=cut + +sub pdbCompound { + my $self = shift; + return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } ); +} + +=head2 pdbDate + + Title : pdbDate + Usage : returns date given in PDB file + Function : + Example : $pdb_date = $stride_obj->pdbDate(); + Returns : scalar + Args : none + + +=cut + +sub pdbDate { + my $self = shift; + return $self->{ 'DATE' }; +} + +=head2 pdbHeader + + Title : pdbHeader + Usage : returns string of characters found on the PDB header line + Function : + Example : $head = $stride_obj->pdbHeader(); + Returns : scalar + Args : none + + +=cut + +sub pdbHeader { + my $self = shift; + return $self->{ 'HEAD' }->{ 'HEADER' }; +} + +=head2 pdbSource + + Title : pdbSource + Usage : returns string of what was found on SRC lines + Function : + Example : $src = $stride_obj->pdbSource(); + Returns : scalar + Args : none + + +=cut + +sub pdbSource { + my $self = shift; + return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } ); +} + +# RESIDUE SPECIFIC ACCESSORS + +=head2 resAA + + Title : resAA + Usage : returns 1 letter abbr. of the amino acid specified by + the arguments + Function : + Examples : $aa = $stride_obj->resAA( RESIDUE_ID ); + Returns : scalar character + Args : RESIDUE_ID + + +=cut + +sub resAA { + my $self = shift; + my @args = @_; + my ( $ord, $chain ) = $self->_toOrdChain( @args ); + return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); +} + +=head2 resPhi + + Title : resPhi + Usage : returns phi angle of specified residue + Function : + Example : $phi = $stride_obj->resPhi( RESIDUE_ID ); + Returns : scaler + Args : RESIDUE_ID + + +=cut + +sub resPhi { + my $self = shift; + my @args = @_; + my ( $ord, $chain ) = $self->_toOrdChain( @args ); + return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ]; +} + +=head2 resPsi + + Title : resPsi + Usage : returns psi angle of specified residue + Function : + Example : $psi = $stride_obj->resPsi( RESIDUE_ID ); + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resPsi { + my $self = shift; + my @args = @_; + my ( $ord, $chain ) = $self->_toOrdChain( @args ); + return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ]; +} + +=head2 resSolvAcc + + Title : resSolvAcc + Usage : returns stride calculated surface area of specified residue + Function : + Example : $sa = $stride_obj->resSolvAcc( RESIDUE_ID ); + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resSolvAcc { + my $self = shift; + my @args = @_; + my ( $ord, $chain ) = $self->_toOrdChain( @args ); + return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; +} + +=head2 resSurfArea + + Title : resSurfArea + Usage : returns stride calculated surface area of specified residue + Function : + Example : $sa = $stride_obj->resSurfArea( RESIDUE_ID ); + Returns : scalar + Args : RESIDUE_ID + + +=cut + +sub resSurfArea { + my $self = shift; + my @args = @_; + my ( $ord, $chain ) = $self->_toOrdChain( @args ); + return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; +} + +=head2 resSecStr + + Title : resSecStr + Usage : gives one letter abbr. of stride determined secondary + structure of specified residue + Function : + Example : $ss = $stride_obj->resSecStr( RESIDUE_ID ); + Returns : one of: 'H' => Alpha Helix + 'G' => 3-10 helix + 'I' => PI-helix + 'E' => Extended conformation + 'B' or 'b' => Isolated bridge + 'T' => Turn + 'C' => Coil + ' ' => None + # NOTE: This range is slightly DIFFERENT from the + # DSSP method of the same name + Args : RESIDUE_ID + + +=cut + +sub resSecStr { + my $self = shift; + my @args = @_; + my ( $ord, $chain ) = $self->_toOrdChain( @args ); + return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ]; +} + +=head2 resSecStrSum + + Title : resSecStrSum + Usage : gives one letter summary of secondary structure of + specified residue. More general than secStruc() + Function : + Example : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID ); + Returns : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil) + Args : residue identifier(s) ( SEE INTRO NOTE ) + + +=cut + +sub resSecStrSum { + my $self = shift; + my @args = @_; + my $ss_char = $self->resSecStr( @args ); + + if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) { + return 'H'; + } + if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) { + return 'B'; + } + if ( $ss_char eq 'T' ) { + return 'T'; + } + else { + return 'C'; + } +} + +# STRIDE SPECIFIC + +=head2 resSecStrName + + Title : resSecStrName + Usage : gives full name of the secondary structural element + classification of the specified residue + Function : + Example : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID ); + Returns : scalar string + Args : RESIDUE_ID + + +=cut + +sub resSecStrName { + my $self = shift; + my @args = @_; + my ( $ord, $chain ) = $self->_toOrdChain( @args ); + return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ]; +} + +=head2 strideLocs + + Title : strideLocs + Usage : returns stride determined contiguous secondary + structural elements as specified on the LOC lines + Function : + Example : $loc_pnt = $stride_obj->strideLocs(); + Returns : pointer to array of 5 element arrays. + 0 => stride name of structural element + 1 => first residue pdb key (including insertion code, if app.) + 2 => first residue chain id + 3 => last residue pdb key (including insertion code, if app.) + 4 => last residue chain id + NOTE the differences between this range and the range of SecBounds() + Args : none + + +=cut + +sub strideLocs { + my $self = shift; + return $self->{ 'LOC' }; +} + +# VALUE ADDED METHODS (NOT JUST PARSE/REPORT) + +=head2 secBounds + + Title : secBounds + Usage : gets residue ids of boundary residues in each + contiguous secondary structural element of specified + chain + Function : + Example : $ss_bound_pnt = $stride_obj->secBounds( 'A' ); + Returns : pointer to array of 3 element arrays. First two elements + are the PDB IDs of the start and end points, respectively + and inclusively. The last element is the STRIDE secondary + structural element code (same range as resSecStr). + Args : chain identifier ( one character ). If none, '-' is assumed + + +=cut + +sub secBounds { + # Requires a chain name. If left blank, we assume ' ' which equals '-' + my $self = shift; + my $chain = shift; + my @SecBounds; + + $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' ); + + # if we've memoized this one, use that + if ( $self->{ 'SecBounds' }->{ $chain } ) { + return $self->{ 'SecBounds' }->{ $chain }; + } + + #check to make sure chain is valid + if ( !( $self->{ 'ASG' }->{ $chain } ) ) { + $self->throw( "No such chain: $chain\n" ); + } + + my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]-> + [ $ASGTable{ 'ssAbbr' } ]; + my $beg = 1; + my $i; + + for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { + if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] + ne $cur_element ) { + push( @SecBounds, [ $beg, $i -1 , $cur_element ] ); + $beg = $i; + $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]-> + [ $ASGTable{ 'ssAbbr' } ]; + } + } + + if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] + eq $cur_element ) { + push( @SecBounds, [ $beg, $i, $cur_element ] ); + } + else { + push( @SecBounds, [ $beg, $i - 1, $cur_element ], + [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]-> + [ $ASGTable{ 'ssAbbr' } ] ] ); + } + + $self->{ 'SecBounds' }->{ $chain } = \@SecBounds; + return $self->{ 'SecBounds' }->{ $chain }; +} + +=head2 chains + + Title : chains + Usage : gives array chain I.D.s (characters) + Function : + Example : @chains = $stride_obj->chains(); + Returns : array of characters + Args : none + + +=cut + +sub chains { + my $self = shift; + my @chains = keys ( %{ $self->{ 'ASG' } } ); + return \@chains; +} + +=head2 getSeq + + Title : getSeq + Usage : returns a Bio::PrimarySeq object which represents an + approximation at the sequence of the specified chain. + Function : For most chain of most entries, the sequence returned by + this method will be very good. However, it it inherently + unsafe to rely on STRIDE to extract sequence information about + a PDB entry. More reliable information can be obtained from + the PDB entry itself. If a second option is given + (and evaluates to true), the sequence generated will + have 'X' in spaces where the pdb residue numbers are + discontinuous. In some cases this results in a + better sequence object (when the discontinuity is + due to regions which were present, but could not be + resolved). In other cases, it will result in a WORSE + sequence object (when the discontinuity is due to + historical sequence numbering and all sequence is + actually resolved). + Example : $pso = $dssp_obj->getSeq( 'A' ); + Returns : (pointer to) a PrimarySeq object + Args : Chain identifier. If none given, '-' is assumed. + + +=cut + +sub getSeq { + my $self = shift; + my $chain = shift; + my $fill_in = shift; + + if ( !( $chain ) ) { + $chain = '-'; + } + + if ( $self->{ 'Seq' }->{ $chain } ) { + return $self->{ 'Seq' }->{ $chain }; + } + + my ( $seq, + $num_res, + $last_res_num, + $cur_res_num, + $i, + $step, + $id + ); + + $seq = ""; + $num_res = $self->numResidues( $chain ); + $last_res_num = $self->_pdbNum( 1, $chain ); + for ( $i = 1; $i <= $num_res; $i++ ) { + if ( $fill_in ) { + $cur_res_num = $self->_pdbNum( $i, $chain ); + $step = $cur_res_num - $last_res_num; + if ( $step > 1 ) { + $seq .= 'X' x ( $step - 1 ); + } + } + $seq .= $self->_resAA( $i, $chain ); + $last_res_num = $cur_res_num; + } + + $id = $self->pdbID(); + $id .= "$chain"; + + $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq, + -id => $id, + -moltype => 'protein' + ); + + return $self->{ 'Seq' }->{ $chain }; +} + +=head1 INTERNAL METHODS + +=head2 _pdbNum + + Title : _pdbNum + Usage : fetches the numeric portion of the identifier for a given + residue as reported by the pdb entry. Note, this DOES NOT + uniquely specify a residue. There may be an insertion code + and/or chain identifier differences. + Function : + Example : $pdbNum = $self->pdbNum( 3, 'A' ); + Returns : a scalar + Args : valid ordinal num / chain combination + + +=cut + +sub _pdbNum { + my $self = shift; + my $ord = shift; + my $chain = shift; + if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { + $self->throw( "No such ordinal $ord in chain $chain.\n" ); + } + my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; + my $num_part; + ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ ); + return $num_part; +} + +=head2 _resAA + + Title : _resAA + Usage : returns 1 letter abbr. of the amino acid specified by + the arguments + Function : + Examples : $aa = $stride_obj->_resAA( 3, '-' ); + Returns : scalar character + Args : ( ord. num, chain ) + + +=cut + +sub _resAA { + my $self = shift; + my $ord = shift; + my $chain = shift; + if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { + $self->throw( "No such ordinal $ord in chain $chain.\n" ); + } + return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); +} + +=head2 _pdbInsCo + + Title : _pdbInsCo + Usage : fetches the Insertion code for this residue. + Function : + Example : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' ); + Returns : a scalar + Args : ordinal number and chain + + +=cut + +sub _pdbInsCo { + my $self = shift; + my $ord = shift; + my $chain = shift; + if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { + $self->throw( "No such ordinal $ord in chain $chain.\n" ); + } + my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; + my $letter_part; + ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any + # non-word character(s) + return $letter_part; +} + +=head2 _toOrdChain + + Title : _toOrdChain + Usage : takes any set of residue identifying parameters and + wrestles them into a two element array: the chain and the ordinal + number of this residue. This two element array can then be + efficiently used as keys in many of the above accessor methods + ('#A:B') or ( #, 'A', 'B' ) + || | + || - Chain ID (blank for single chain) + |--- Insertion code for this residue. Blank for most residues. + |--- Numeric portion of residue ID. + + (#) + | + --- Numeric portion of residue ID. If there is only one chain and + it has no ID AND there is no residue with an insertion code at this + number, then this can uniquely specify a residue. + + # ('#:C) or ( #, 'C' ) + | | + | -Chain ID + ---Numeric portion of residue ID. + + If a residue is incompletely specified then the first residue that + fits the arguments is returned. For example, if 19 is the argument + and there are three chains, A, B, and C with a residue whose number + is 19, then 19:A will be returned (assuming its listed first). + + Function : + Example : my ( $ord, $chain ) = $self->_toOrdChain( @args ); + Returns : two element array + Args : valid set of residue identifier(s) ( SEE NOTE ABOVE ) + + +=cut + +sub _toOrdChain { + my $self = shift; + my $arg_str; + + my ( $key_num, $chain_id, $ins_code, $key, $i ); + + # check to see how many args are given + if ( $#_ >= 1 ) { # multiple args + $key_num = shift; + if ( $#_ >= 1 ) { # still multiple args => ins. code, too + $ins_code = shift; + $chain_id = shift; + } + else { # just one more arg. => chain_id + $chain_id = shift; + } + } + else { # only single arg. Might be number or string + $arg_str = shift; + if ( $arg_str =~ /:/ ) { + # a chain is specified + ( $chain_id ) = ( $arg_str =~ /:(.)/); + $arg_str =~ s/:.//; + } + if ( $arg_str =~ /[A-Z]|[a-z]/ ) { + # an insertion code is specified + ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ ); + $arg_str =~ s/[A-Z]|[a-z]//g; + } + #now, get the number bit-> everything still around + $key_num = $arg_str; + } + + $key = "$key_num$ins_code"; + if ( !( $chain_id ) || $chain_id eq ' ' ) { + $chain_id = '-'; + } + + if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) { + $self->throw( "No such chain: $chain_id" ); + } + + for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) { + if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq + $key ) { + return ( $i, $chain_id ); + } + } + + $self->throw( "No such key: $key" ); + +} + +=head2 _parse + + Title : _parse + Usage : as name suggests, parses stride output, creating object + Function : + Example : $self->_parse( $io ); + Returns : + Args : valid Bio::Root::IO object + + +=cut + +sub _parse { + my $self = shift; + my $io = shift; + my $file = $io->_fh(); + + # Parse top lines + if ( $self->_parseTop( $io ) ) { + $self->throw( "Not stride output" ); + } + + # Parse the HDR, CMP, SCR, and AUT lines + $self->_parseHead( $io ); + + # Parse the CHN, SEQ, STR, and LOC lines + $self->_parseSummary( $io ); # we're ignoring this + + # Parse the ASG lines + $self->_parseASG( $io ); +} + +=head2 _parseTop + + Title : _parseTop + Usage : makes sure this looks like stride output + Function : + Example : + Returns : + Args : + + +=cut + +sub _parseTop { + my $self = shift; + my $io = shift; + my $file = $io->_fh(); + my $cur = <$file>; + if ( $cur =~ /^REM ---/ ) { + return 0; + } + return 1; +} + +=head2 _parseHead + + Title : _parseHead + Usage : parses + Function : HDR, CMP, SRC, and AUT lines + Example : + Returns : + Args : + + +=cut + +sub _parseHead { + my $self = shift; + my $io = shift; + my $file = $io->_fh(); + my $cur; + my $element; + my ( @elements, @cmp, @src, @aut ); + my %head = {}; + my $still_head = 1; + + $cur = <$file>; + while ( $cur =~ /^REM / ) { + $cur = <$file>; + } + + if ( $cur =~ /^HDR / ) { + @elements = split( /\s+/, $cur ); + shift( @elements ); + pop( @elements ); + $self->{ 'PDB' } = pop( @elements ); + $self->{ 'DATE' } = pop( @elements ); + # now, everything else is "header" except for the word + # HDR + $element = join( ' ', @elements ); + $head{ 'HEADER' } = $element; + } + + $cur = <$file>; + while ( $cur =~ /^CMP / ) { + ( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ ); + push( @cmp, $cur ); + $cur = <$file>; + } + + while ( $cur =~ /^SRC / ) { + ( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ ); + push( @src, $cur ); + $cur = <$file>; + } + + while ( $cur =~ /^AUT / ) { + ( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ ); + push( @aut, $cur ); + $cur = <$file>; + } + + $head{ 'CMP' } = \@cmp; + $head{ 'SRC' } = \@src; + $head{ 'AUT' } = \@aut; + $self->{ 'HEAD' } = \%head; +} + +=head2 _parseSummary + + Title : _parseSummary + Usage : parses LOC lines + Function : + Example : + Returns : + Args : + + +=cut + +sub _parseSummary { + my $self = shift; + my $io = shift; + my $file = $io->_fh(); + my $cur = <$file>; + my $bound_set; + my $element; + my ( @elements, @cur ); + my @LOC_lookup = ( [ 5, 12 ], # Element name + # reduntdant [ 18, 3 ], # First residue name + [ 22, 5 ], # First residue PDB number + [ 28, 1 ], # First residue Chain ID + # redundant [ 35, 3 ], # Last residue name + [ 40, 5 ], # Last residue PDB number + [ 46, 1 ] ); # Last residue Chain ID + + #ignore these lines + while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) { + $cur = <$file>; + } + + while ( $cur =~ /^LOC / ) { + foreach $bound_set ( @LOC_lookup ) { + $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] ); + $element =~ s/\s//g; + push( @cur, $element ); + } + push( @elements, [ @cur ] ); + $cur = <$file>; + @cur = (); + } + $self->{ 'LOC' } = \@elements; + +} + +=head2 _parseASG + + Title : _parseASG + Usage : parses ASG lines + Function : + Example : + Returns : + Args : + + +=cut + +sub _parseASG { + my $self = shift; + my $io = shift; + my $file = $io->_fh(); + my $cur = <$file>; + my $bound_set; + my $ord_num; + my ( $chain, $last_chain ); + my $element; + my %ASG; + my ( @cur, @elements ); + my @ASG_lookup = ( [ 5, 3 ], # Residue name + # [ 9, 1 ], # Chain ID + [ 10, 5 ], # PDB residue number (w/ins.code) + # [ 16, 4 ], # ordinal stride number + [ 24, 1 ], # one letter sec. stru. abbr. + [ 26, 13], # full sec. stru. name + [ 42, 7 ], # phi angle + [ 52, 7 ], # psi angle + [ 64, 5 ] );# residue solv. acc. + + while ( $cur =~ /^REM / ) { + $cur = <$file>; + } + + while ( $cur =~ /^ASG / ) { + # get ordinal number for array key + $ord_num = substr( $cur, 16, 4 ); + $ord_num =~ s/\s//g; + + # get the chain id + $chain = substr( $cur, 9, 1 ); + + if ( $last_chain && ( $chain ne $last_chain ) ) { + $ASG{ $last_chain } = [ @elements ]; + @elements = (); + } + + # now get the rest of the info on this line + foreach $bound_set ( @ASG_lookup ) { + $element = substr( $cur, $bound_set->[ 0 ], + $bound_set->[ 1 ] ); + $element =~ s/\s//g; + push( @cur, $element ); + } + $elements[ $ord_num ] = [ @cur ]; + $cur = <$file>; + @cur = (); + $last_chain = $chain; + } + + $ASG{ $chain } = [ @elements ]; + + $self->{ 'ASG' } = \%ASG; +} + +1; + + +