Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Search/HSP/FastaHSP.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Search/HSP/FastaHSP.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,196 @@ +# $Id: FastaHSP.pm,v 1.4.2.1 2003/02/28 09:47:19 jason Exp $ +# +# BioPerl module for Bio::Search::HSP::FastaHSP +# +# Cared for by Jason Stajich <jason@bioperl.org> +# +# Copyright Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Search::HSP::FastaHSP - HSP object for FASTA specific data + +=head1 SYNOPSIS + + # get a FastaHSP from a SearchIO stream + my $in = new Bio::SearchIO(-format => 'fasta', -file => 'filename.fasta'); + + while( my $r = $in->next_result) { + while( my $hit = $r->next_result ) { + while( my $hsp = $hit->next_hsp ) { + print "smith-waterman score (if available): ", + $hsp->sw_score(),"\n"; + } + } + } + +=head1 DESCRIPTION + +Describe the object here + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Jason Stajich + +Email jason@bioperl.org + +Describe contact details here + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Search::HSP::FastaHSP; +use vars qw(@ISA); +use strict; + +use Bio::Search::HSP::GenericHSP; + +@ISA = qw(Bio::Search::HSP::GenericHSP ); + +=head2 new + + Title : new + Usage : my $obj = new Bio::Search::HSP::FastaHSP(); + Function: Builds a new Bio::Search::HSP::FastaHSP object + Returns : Bio::Search::HSP::FastaHSP + Args : -swscore => smith-waterman score + +=cut + +sub new { + my($class,@args) = @_; + + my $self = $class->SUPER::new(@args); + + my ($swscore) = $self->_rearrange([qw(SWSCORE)], @args); + + defined $swscore && $self->sw_score($swscore); + + return $self; +} + + +=head2 sw_score + + Title : sw_score + Usage : $obj->sw_score($newval) + Function: Get/Set Smith-Waterman score + Returns : value of sw_score + Args : newvalue (optional) + + +=cut + +sub sw_score{ + my ($self,$value) = @_; + if( defined $value || ! defined $self->{'_sw_score'} ) { + $value = 0 unless defined $value; # default value + $self->{'_sw_score'} = $value; + } + return $self->{'_sw_score'}; +} + + +sub get_aln { + my ($self) = @_; + require Bio::LocatableSeq; + require Bio::SimpleAlign; + my $aln = new Bio::SimpleAlign; + my $hs = $self->hit_string(); + my $qs = $self->query_string(); + + # fasta reports some extra 'regional' sequence information + # we need to clear out first + # this seemed a bit insane to me at first, but it appears to + # work --jason + + # we infer the end of the regional sequence where the first + # non space is in the homology string + # then we use the HSP->length to tell us how far to read + # to cut off the end of the sequence + + my ($start) = 0; + if( $self->homology_string() =~ /^(\s+)/ ) { + $start = CORE::length($1); + } + $self->debug("hs seq is '$hs'\n"); + $self->debug("qs seq is '$qs'\n"); + + + $hs = substr($hs, $start,$self->length('total')); + $qs = substr($qs, $start,$self->length('total')); + foreach my $seq ( $qs,$hs) { + foreach my $f ( '\\', '/', ' ') { + my $index = index($seq,$f); + while( $index >=0 && length($seq) > 0 ) { + substr($hs,$index,1) = ''; + substr($qs,$index,1) = ''; + $self->debug( "$f, $index+1, for ".length($seq). " ($seq)\n"); + $index = index($seq,$f,$index+1); + } + } + } + + my $seqonly = $qs; + $seqonly =~ s/[\-\s]//g; + my ($q_nm,$s_nm) = ($self->query->seq_id(), + $self->hit->seq_id()); + unless( defined $q_nm && CORE::length ($q_nm) ) { + $q_nm = 'query'; + } + unless( defined $s_nm && CORE::length ($s_nm) ) { + $s_nm = 'hit'; + } + my $query = new Bio::LocatableSeq('-seq' => $qs, + '-id' => $q_nm, + '-start' => 1, + '-end' => CORE::length($seqonly), + ); + $seqonly = $hs; + $seqonly =~ s/[\-\s]//g; + my $hit = new Bio::LocatableSeq('-seq' => $hs, + '-id' => $s_nm, + '-start' => 1, + '-end' => CORE::length($seqonly), + ); + $aln->add_seq($query); + $aln->add_seq($hit); + return $aln; +} + + +1;