0
|
1 # $Id: FastaHSP.pm,v 1.4.2.1 2003/02/28 09:47:19 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Search::HSP::FastaHSP
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Search::HSP::FastaHSP - HSP object for FASTA specific data
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # get a FastaHSP from a SearchIO stream
|
|
20 my $in = new Bio::SearchIO(-format => 'fasta', -file => 'filename.fasta');
|
|
21
|
|
22 while( my $r = $in->next_result) {
|
|
23 while( my $hit = $r->next_result ) {
|
|
24 while( my $hsp = $hit->next_hsp ) {
|
|
25 print "smith-waterman score (if available): ",
|
|
26 $hsp->sw_score(),"\n";
|
|
27 }
|
|
28 }
|
|
29 }
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 Describe the object here
|
|
34
|
|
35 =head1 FEEDBACK
|
|
36
|
|
37 =head2 Mailing Lists
|
|
38
|
|
39 User feedback is an integral part of the evolution of this and other
|
|
40 Bioperl modules. Send your comments and suggestions preferably to
|
|
41 the Bioperl mailing list. Your participation is much appreciated.
|
|
42
|
|
43 bioperl-l@bioperl.org - General discussion
|
|
44 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
45
|
|
46 =head2 Reporting Bugs
|
|
47
|
|
48 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
49 of the bugs and their resolution. Bug reports can be submitted via
|
|
50 email or the web:
|
|
51
|
|
52 bioperl-bugs@bioperl.org
|
|
53 http://bugzilla.bioperl.org/
|
|
54
|
|
55 =head1 AUTHOR - Jason Stajich
|
|
56
|
|
57 Email jason@bioperl.org
|
|
58
|
|
59 Describe contact details here
|
|
60
|
|
61 =head1 CONTRIBUTORS
|
|
62
|
|
63 Additional contributors names and emails here
|
|
64
|
|
65 =head1 APPENDIX
|
|
66
|
|
67 The rest of the documentation details each of the object methods.
|
|
68 Internal methods are usually preceded with a _
|
|
69
|
|
70 =cut
|
|
71
|
|
72
|
|
73 # Let the code begin...
|
|
74
|
|
75
|
|
76 package Bio::Search::HSP::FastaHSP;
|
|
77 use vars qw(@ISA);
|
|
78 use strict;
|
|
79
|
|
80 use Bio::Search::HSP::GenericHSP;
|
|
81
|
|
82 @ISA = qw(Bio::Search::HSP::GenericHSP );
|
|
83
|
|
84 =head2 new
|
|
85
|
|
86 Title : new
|
|
87 Usage : my $obj = new Bio::Search::HSP::FastaHSP();
|
|
88 Function: Builds a new Bio::Search::HSP::FastaHSP object
|
|
89 Returns : Bio::Search::HSP::FastaHSP
|
|
90 Args : -swscore => smith-waterman score
|
|
91
|
|
92 =cut
|
|
93
|
|
94 sub new {
|
|
95 my($class,@args) = @_;
|
|
96
|
|
97 my $self = $class->SUPER::new(@args);
|
|
98
|
|
99 my ($swscore) = $self->_rearrange([qw(SWSCORE)], @args);
|
|
100
|
|
101 defined $swscore && $self->sw_score($swscore);
|
|
102
|
|
103 return $self;
|
|
104 }
|
|
105
|
|
106
|
|
107 =head2 sw_score
|
|
108
|
|
109 Title : sw_score
|
|
110 Usage : $obj->sw_score($newval)
|
|
111 Function: Get/Set Smith-Waterman score
|
|
112 Returns : value of sw_score
|
|
113 Args : newvalue (optional)
|
|
114
|
|
115
|
|
116 =cut
|
|
117
|
|
118 sub sw_score{
|
|
119 my ($self,$value) = @_;
|
|
120 if( defined $value || ! defined $self->{'_sw_score'} ) {
|
|
121 $value = 0 unless defined $value; # default value
|
|
122 $self->{'_sw_score'} = $value;
|
|
123 }
|
|
124 return $self->{'_sw_score'};
|
|
125 }
|
|
126
|
|
127
|
|
128 sub get_aln {
|
|
129 my ($self) = @_;
|
|
130 require Bio::LocatableSeq;
|
|
131 require Bio::SimpleAlign;
|
|
132 my $aln = new Bio::SimpleAlign;
|
|
133 my $hs = $self->hit_string();
|
|
134 my $qs = $self->query_string();
|
|
135
|
|
136 # fasta reports some extra 'regional' sequence information
|
|
137 # we need to clear out first
|
|
138 # this seemed a bit insane to me at first, but it appears to
|
|
139 # work --jason
|
|
140
|
|
141 # we infer the end of the regional sequence where the first
|
|
142 # non space is in the homology string
|
|
143 # then we use the HSP->length to tell us how far to read
|
|
144 # to cut off the end of the sequence
|
|
145
|
|
146 my ($start) = 0;
|
|
147 if( $self->homology_string() =~ /^(\s+)/ ) {
|
|
148 $start = CORE::length($1);
|
|
149 }
|
|
150 $self->debug("hs seq is '$hs'\n");
|
|
151 $self->debug("qs seq is '$qs'\n");
|
|
152
|
|
153
|
|
154 $hs = substr($hs, $start,$self->length('total'));
|
|
155 $qs = substr($qs, $start,$self->length('total'));
|
|
156 foreach my $seq ( $qs,$hs) {
|
|
157 foreach my $f ( '\\', '/', ' ') {
|
|
158 my $index = index($seq,$f);
|
|
159 while( $index >=0 && length($seq) > 0 ) {
|
|
160 substr($hs,$index,1) = '';
|
|
161 substr($qs,$index,1) = '';
|
|
162 $self->debug( "$f, $index+1, for ".length($seq). " ($seq)\n");
|
|
163 $index = index($seq,$f,$index+1);
|
|
164 }
|
|
165 }
|
|
166 }
|
|
167
|
|
168 my $seqonly = $qs;
|
|
169 $seqonly =~ s/[\-\s]//g;
|
|
170 my ($q_nm,$s_nm) = ($self->query->seq_id(),
|
|
171 $self->hit->seq_id());
|
|
172 unless( defined $q_nm && CORE::length ($q_nm) ) {
|
|
173 $q_nm = 'query';
|
|
174 }
|
|
175 unless( defined $s_nm && CORE::length ($s_nm) ) {
|
|
176 $s_nm = 'hit';
|
|
177 }
|
|
178 my $query = new Bio::LocatableSeq('-seq' => $qs,
|
|
179 '-id' => $q_nm,
|
|
180 '-start' => 1,
|
|
181 '-end' => CORE::length($seqonly),
|
|
182 );
|
|
183 $seqonly = $hs;
|
|
184 $seqonly =~ s/[\-\s]//g;
|
|
185 my $hit = new Bio::LocatableSeq('-seq' => $hs,
|
|
186 '-id' => $s_nm,
|
|
187 '-start' => 1,
|
|
188 '-end' => CORE::length($seqonly),
|
|
189 );
|
|
190 $aln->add_seq($query);
|
|
191 $aln->add_seq($hit);
|
|
192 return $aln;
|
|
193 }
|
|
194
|
|
195
|
|
196 1;
|