annotate variant_effect_predictor/Bio/Search/HSP/FastaHSP.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # $Id: FastaHSP.pm,v 1.4.2.1 2003/02/28 09:47:19 jason Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # BioPerl module for Bio::Search::HSP::FastaHSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Cared for by Jason Stajich <jason@bioperl.org>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright Jason Stajich
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Bio::Search::HSP::FastaHSP - HSP object for FASTA specific data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 # get a FastaHSP from a SearchIO stream
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20 my $in = new Bio::SearchIO(-format => 'fasta', -file => 'filename.fasta');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22 while( my $r = $in->next_result) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 while( my $hit = $r->next_result ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 while( my $hsp = $hit->next_hsp ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 print "smith-waterman score (if available): ",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 $hsp->sw_score(),"\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 Describe the object here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 Bioperl modules. Send your comments and suggestions preferably to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 the Bioperl mailing list. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 http://bioperl.org/MailList.shtml - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 of the bugs and their resolution. Bug reports can be submitted via
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 email or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 bioperl-bugs@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 =head1 AUTHOR - Jason Stajich
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 Email jason@bioperl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 Describe contact details here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 =head1 CONTRIBUTORS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 Additional contributors names and emails here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 The rest of the documentation details each of the object methods.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 package Bio::Search::HSP::FastaHSP;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 use vars qw(@ISA);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 use Bio::Search::HSP::GenericHSP;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 @ISA = qw(Bio::Search::HSP::GenericHSP );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 Title : new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 Usage : my $obj = new Bio::Search::HSP::FastaHSP();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 Function: Builds a new Bio::Search::HSP::FastaHSP object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 Returns : Bio::Search::HSP::FastaHSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 Args : -swscore => smith-waterman score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 my($class,@args) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 my $self = $class->SUPER::new(@args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 my ($swscore) = $self->_rearrange([qw(SWSCORE)], @args);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 defined $swscore && $self->sw_score($swscore);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 =head2 sw_score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 Title : sw_score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 Usage : $obj->sw_score($newval)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 Function: Get/Set Smith-Waterman score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 Returns : value of sw_score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 Args : newvalue (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 sub sw_score{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 my ($self,$value) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 if( defined $value || ! defined $self->{'_sw_score'} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 $value = 0 unless defined $value; # default value
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 $self->{'_sw_score'} = $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 return $self->{'_sw_score'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 sub get_aln {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 require Bio::LocatableSeq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 require Bio::SimpleAlign;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 my $aln = new Bio::SimpleAlign;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 my $hs = $self->hit_string();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 my $qs = $self->query_string();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 # fasta reports some extra 'regional' sequence information
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 # we need to clear out first
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 # this seemed a bit insane to me at first, but it appears to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 # work --jason
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 # we infer the end of the regional sequence where the first
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 # non space is in the homology string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 # then we use the HSP->length to tell us how far to read
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 # to cut off the end of the sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 my ($start) = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 if( $self->homology_string() =~ /^(\s+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 $start = CORE::length($1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 $self->debug("hs seq is '$hs'\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 $self->debug("qs seq is '$qs'\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 $hs = substr($hs, $start,$self->length('total'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 $qs = substr($qs, $start,$self->length('total'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 foreach my $seq ( $qs,$hs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 foreach my $f ( '\\', '/', ' ') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 my $index = index($seq,$f);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 while( $index >=0 && length($seq) > 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 substr($hs,$index,1) = '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 substr($qs,$index,1) = '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 $self->debug( "$f, $index+1, for ".length($seq). " ($seq)\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 $index = index($seq,$f,$index+1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 my $seqonly = $qs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 $seqonly =~ s/[\-\s]//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 my ($q_nm,$s_nm) = ($self->query->seq_id(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 $self->hit->seq_id());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 unless( defined $q_nm && CORE::length ($q_nm) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 $q_nm = 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 unless( defined $s_nm && CORE::length ($s_nm) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 $s_nm = 'hit';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 my $query = new Bio::LocatableSeq('-seq' => $qs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 '-id' => $q_nm,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 '-start' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 '-end' => CORE::length($seqonly),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 $seqonly = $hs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 $seqonly =~ s/[\-\s]//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 my $hit = new Bio::LocatableSeq('-seq' => $hs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 '-id' => $s_nm,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 '-start' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 '-end' => CORE::length($seqonly),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 $aln->add_seq($query);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 $aln->add_seq($hit);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 return $aln;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 1;