annotate variant_effect_predictor/Bio/Tools/Blast/HSP.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 #----------------------------------------------------------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 # PACKAGE : Bio::Tools::Blast::HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # AUTHOR : Steve Chervitz (sac@bioperl.org)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 # CREATED : March 1996
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # STATUS : Alpha
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 # REVISION: $Id: HSP.pm,v 1.18 2002/10/22 07:38:48 lapp Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 # For the latest version and documentation, visit the distribution site:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # http://genome-www.stanford.edu/perlOOP/bioperl/blast/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # To generate documentation, run this module through pod2html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12 # (preferably from Perl v5.004 or better).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 # Copyright (c) 1996-2000 Steve Chervitz. All Rights Reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 # This module is free software; you can redistribute it and/or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 # modify it under the same terms as Perl itself.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 #----------------------------------------------------------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 package Bio::Tools::Blast::HSP;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 use Bio::Root::Global qw(:devel);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22 use Bio::Root::Object ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 #use Bio::Root::Err qw(:std);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 @ISA = qw( Bio::Root::Object);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 use vars qw($ID $GAP_SYMBOL @SCORE_CUTOFFS $Revision %STRAND_SYMBOL);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 $ID = 'Bio::Tools::Blast::HSP';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 $Revision = '$Id: HSP.pm,v 1.18 2002/10/22 07:38:48 lapp Exp $'; #'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 $GAP_SYMBOL = '-'; # Need a more general way to handle gap symbols.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 @SCORE_CUTOFFS = ( 100, 30 ); # Bit score cutoffs (see homol_score()).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 %STRAND_SYMBOL = ('Plus' => 1, 'Minus' => -1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 ## POD Documentation:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 Bio::Tools::Blast::HSP - Bioperl BLAST High-Scoring Segment Pair object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 =head2 Object Creation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 The construction of HSP objects is handled by Bio::Tools::Blast:: Sbjct.pm.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 You should not need to use this package directly. See L<_initialize()|_initialize>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 for a description of constructor parameters.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 require Bio::Tools::Blast::HSP;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 $hspObj = eval{ new Bio::Tools::Blast::HSP(-DATA =>\@hspData,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 -PARENT =>$sbjct_object,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 -NAME =>$hspCount,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 -PROGRAM =>'TBLASTN',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 @hspData includes the raw BLAST report data for a specific HSP,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 and is prepared by Bio::Tools::Blast::Sbjct.pm.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 =head1 INSTALLATION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 This module is included with the central Bioperl distribution:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 http://bio.perl.org/Core/Latest
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 ftp://bio.perl.org/pub/DIST
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 Follow the installation instructions included in the README file.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 The Bio::Tools::Blast::HSP.pm module encapsulates data and methods for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 manipulating, parsing, and analyzing HSPs ("High-scoring Segment Pairs")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 derived from BLAST sequence analysis.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 This module is a utility module used by the B<Bio::Tools::Blast::Sbjct.pm>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 and is not intended for separate use. Please see documentation for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 B<Bio::Tools::Blast.pm> for some basic information about using
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 HSP objects (L<Links:>).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 =over 0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 =item * Supports BLAST versions 1.x and 2.x, gapped and ungapped.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 =back
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 Bio::Tools::Blast::HSP.pm has the ability to extract a list of all
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 residue indices for identical and conservative matches along both
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 query and sbjct sequences. Since this degree of detail is not always
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 needed, this behavior does not occur during construction of the HSP
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 object. These data will automatically be collected as necessary as
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 the HSP.pm object is used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 =head1 DEPENDENCIES
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 Bio::Tools::Blast::HSP.pm is a concrete class that inherits from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 B<Bio::Root::Object.pm> and relies on B<Bio::Tools::Sbjct.pm> as a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 container for HSP.pm objects. B<Bio::Seq.pm> and B<Bio::UnivAln.pm>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 are employed for creating sequence and alignment objects,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 respectively.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 =head2 Relationship to UnivAln.pm & Seq.pm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 HSP.pm can provide the query or sbjct sequence as a B<Bio::Seq.pm>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 object via the L<seq()|seq> method. The HSP.pm object can also create a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 two-sequence B<Bio::UnivAln.pm> alignment object using the the query
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 and sbjct sequences via the L<get_aln()|get_aln> method. Creation of alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 objects is not automatic when constructing the HSP.pm object since
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 this level of functionality is not always required and would generate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 a lot of extra overhead when crunching many reports.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 Bioperl modules. Send your comments and suggestions preferably to one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 of the Bioperl mailing lists. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 bioperl-l@bioperl.org - General discussion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 http://bio.perl.org/MailList.html - About the mailing lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 the bugs and their resolution. Bug reports can be submitted via email
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 bioperl-bugs@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 =head1 AUTHOR
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 Steve Chervitz, E<lt>sac@bioperl.orgE<gt>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 =head1 SEE ALSO
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 Bio::Tools::Blast::Sbjct.pm - Blast hit object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 Bio::Tools::Blast.pm - Blast object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 Bio::Seq.pm - Biosequence object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 Bio::UnivAln.pm - Biosequence alignment object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 Bio::Root::Object.pm - Proposed base class for all Bioperl objects.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 =head2 Links:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 http://bio.perl.org/Core/POD/Tools/Blast/Sbjct.pm.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 http://bio.perl.org/Projects/modules.html - Online module documentation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 http://bio.perl.org/Projects/Blast/ - Bioperl Blast Project
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 http://bio.perl.org/ - Bioperl Project Homepage
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 =head1 COPYRIGHT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 Copyright (c) 1996-98 Steve Chervitz. All Rights Reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 This module is free software; you can redistribute it and/or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 modify it under the same terms as Perl itself.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 ###
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 #### END of main POD documentation.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 ###
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 Methods beginning with a leading underscore are considered private
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 and are intended for internal use by this module. They are
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 B<not> considered part of the public interface and are described here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 for documentation purposes only.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 ## CONSTRUCTOR ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 =head2 _initialize
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 Usage : n/a; automatically called by Bio::Root::Object::new()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 : Bio::Tools::Blast::HSP.pm objects are constructed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 : automatically by Bio::Tools::Sbjct.pm, so there is no need
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 : for direct consumption.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 Purpose : Initializes HSP data and calls private methods to extract
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 : the data for a given HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 : Calls superclass constructor first (Bio::Root::Object.pm).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 Returns : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 Argument : Named parameters passed from new():
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 : All tags must be uppercase (does not call _rearrange()).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 : -DATA => array ref containing raw data for one HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 : -PARENT => Sbjct.pm object ref.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 : -NAME => integer (1..n).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 : -PROGRAM => string ('TBLASTN', 'BLASTP', etc.).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 See Also : L<_set_data()|_set_data>, B<Bio::Root::Object::new()>, B<Bio::Tools::Blast::Sbjct::_set_hsps()>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 sub _initialize {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 my( $self, %param ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 $self->SUPER::_initialize( %param );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 # The gapped and program booleans may be needed after the HSP object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 # is built.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 # $self->{'_gapped'} = $param{-GAPPED} || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 $self->{'_prog'} = $param{-PROGRAM} || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 $self->_set_data( @{$param{-DATA}} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 ## ACCESSORS ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 =head2 _set_data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 Usage : n/a; called automatically during object construction.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 Purpose : Sets the query sequence, sbjct sequence, and the "match" data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 : which consists of the symbols between the query and sbjct lines
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 : in the alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 Argument : Array (all lines from a single, complete HSP, one line per element)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 Throws : Propagates any exceptions from the methods called ("See Also")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 See Also : L<_set_seq()|_set_seq>, L<_set_residues()|_set_residues>, L<_set_score_stats()|_set_score_stats>, L<_set_match_stats()|_set_match_stats>, L<_initialize()|_initialize>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 #--------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 sub _set_data {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 #--------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 my @data = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 my @queryList = (); # 'Query' = SEQUENCE USED TO QUERY THE DATABASE.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 my @sbjctList = (); # 'Sbjct' = HOMOLOGOUS SEQUENCE FOUND IN THE DATABASE.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 my @matchList = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 my $matchLine = 0; # Alternating boolean: when true, load 'match' data.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 my @linedat = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 $DEBUG and print STDERR "$ID: set_data()\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 my($line, $aln_row_len, $length_diff);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 $length_diff = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 # Collecting data for all lines in the alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 # and then storing the collections for possible processing later.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 # Note that "match" lines may not be properly padded with spaces.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 # This loop now properly handles such cases:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 # Query: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVIXXXXX 1200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 # PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVI
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 # Sbjct: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVILSLKL 1200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 foreach $line( @data ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 next if $line =~ /^\s*$/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 if( $line =~ /^ ?Score/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 $self->_set_score_stats( $line );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 } elsif( $line =~ /^ ?(Identities|Positives|Strand)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 $self->_set_match_stats( $line );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 } elsif( $line =~ /^ ?Frame = ([\d+-]+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 # Version 2.0.8 has Frame information on a separate line.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 $self->{'_frame'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 } elsif( $line =~ /^(Query:?[\s\d]+)([^\s\d]+)/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 push @queryList, $line;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 $self->{'_match_indent'} = CORE::length $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 $aln_row_len = (CORE::length $1) + (CORE::length $2);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 $matchLine = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 } elsif( $matchLine ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 # Pad the match line with spaces if necessary.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 $length_diff = $aln_row_len - CORE::length $line;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 $length_diff and $line .= ' 'x $length_diff;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 push @matchList, $line;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 $matchLine = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 } elsif( $line =~ /^Sbjct/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 push @sbjctList, $line;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 # Storing the query and sbjct lists in case they are needed later.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 # We could make this conditional to save memory.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 $self->{'_queryList'} = \@queryList;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 $self->{'_sbjctList'} = \@sbjctList;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 # Storing the match list in case it is needed later.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 $self->{'_matchList'} = \@matchList;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 if(not defined ($self->{'_numIdentical'})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 $self->throw("Can't parse match statistics.",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 "Possibly a new or unrecognized Blast format.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 if(!scalar @queryList or !scalar @sbjctList) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 $self->throw("Can't find query or sbjct alignment lines.",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 "Possibly unrecognized Blast format.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 =head2 _set_score_stats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 Usage : n/a; called automatically by _set_data()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 Purpose : Sets various score statistics obtained from the HSP listing.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 Argument : String with any of the following formats:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 : blast2: Score = 30.1 bits (66), Expect = 9.2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 : blast2: Score = 158.2 bits (544), Expect(2) = e-110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 : blast1: Score = 410 (144.3 bits), Expect = 1.7e-40, P = 1.7e-40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 : blast1: Score = 55 (19.4 bits), Expect = 5.3, Sum P(3) = 0.99
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 Throws : Exception if the stats cannot be parsed, probably due to a change
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 : in the Blast report format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 See Also : L<_set_data()|_set_data>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 sub _set_score_stats {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 my ($self, $data) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 my ($expect, $p);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 if($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect = +([\d.e+-]+)/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 # blast2 format n = 1
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 $self->{'_bits'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 $self->{'_score'} = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 $expect = $3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 } elsif($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect\((\d+)\) = +([\d.e+-]+)/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 # blast2 format n > 1
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 $self->{'_bits'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 $self->{'_score'} = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 $self->{'_n'} = $3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 $expect = $4;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), P = +([\d.e-]+)/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 # blast1 format, n = 1
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 $self->{'_score'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 $self->{'_bits'} = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 $expect = $3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 $p = $4;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), +Sum P\((\d+)\) = +([\d.e-]+)/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 # blast1 format, n > 1
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 $self->{'_score'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 $self->{'_bits'} = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 $expect = $3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 $self->{'_n'} = $4;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 $p = $5;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 $self->throw("Can't parse score statistics: unrecognized format.", "$data");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 $expect = "1$expect" if $expect =~ /^e/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 $p = "1$p" if defined $p and $p=~ /^e/i;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 $self->{'_expect'} = $expect;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 $self->{'_p'} = $p || undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 =head2 _set_match_stats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 Usage : n/a; called automatically by _set_data()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378 Purpose : Sets various matching statistics obtained from the HSP listing.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 Argument : blast2: Identities = 23/74 (31%), Positives = 29/74 (39%), Gaps = 17/74 (22%)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 : blast2: Identities = 57/98 (58%), Positives = 74/98 (75%)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%), Frame = -3
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 : WU-blast: Identities = 310/553 (56%), Positives = 310/553 (56%), Strand = Minus / Plus
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 Throws : Exception if the stats cannot be parsed, probably due to a change
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 : in the Blast report format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 Comments : The "Gaps = " data in the HSP header has a different meaning depending
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 : on the type of Blast: for BLASTP, this number is the total number of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 : gaps in query+sbjct; for TBLASTN, it is the number of gaps in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 : query sequence only. Thus, it is safer to collect the data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 : separately by examining the actual sequence strings as is done
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 : in _set_seq().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 See Also : L<_set_data()|_set_data>, L<_set_seq()|_set_seq>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 sub _set_match_stats {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400 my ($self, $data) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 if($data =~ m!Identities = (\d+)/(\d+)!) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 # blast1 or 2 format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 $self->{'_numIdentical'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 $self->{'_totalLength'} = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 if($data =~ m!Positives = (\d+)/(\d+)!) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 # blast1 or 2 format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 $self->{'_numConserved'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 $self->{'_totalLength'} = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 if($data =~ m!Frame = ([\d+-]+)!) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 $self->{'_frame'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 # Strand data is not always present in this line.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 # _set_seq() will also set strand information.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 if($data =~ m!Strand = (\w+) / (\w+)!) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 $self->{'_queryStrand'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 $self->{'_sbjctStrand'} = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 # if($data =~ m!Gaps = (\d+)/(\d+)!) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 # $self->{'_totalGaps'} = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 # } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 # $self->{'_totalGaps'} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 # }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 =head2 _set_seq_data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 Usage : n/a; called automatically when sequence data is requested.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 Purpose : Sets the HSP sequence data for both query and sbjct sequences.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 : Includes: start, stop, length, gaps, and raw sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 Throws : Propagates any exception thrown by _set_match_seq()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 Comments : Uses raw data stored by _set_data() during object construction.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 : These data are not always needed, so it is conditionally
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 : executed only upon demand by methods such as gaps(), _set_residues(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 : etc. _set_seq() does the dirty work.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 See Also : L<_set_seq()|_set_seq>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 sub _set_seq_data {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 $self->_set_seq('query', @{$self->{'_queryList'}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 $self->_set_seq('sbjct', @{$self->{'_sbjctList'}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 # Liberate some memory.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 @{$self->{'_queryList'}} = @{$self->{'_sbjctList'}} = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458 undef $self->{'_queryList'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 undef $self->{'_sbjctList'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 $self->{'_set_seq_data'} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 =head2 _set_seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 Usage : n/a; called automatically by _set_seq_data()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 : $hsp_obj->($seq_type, @data);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 Purpose : Sets sequence information for both the query and sbjct sequences.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 : Directly counts the number of gaps in each sequence (if gapped Blast).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472 Argument : $seq_type = 'query' or 'sbjct'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 : @data = all seq lines with the form:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474 : Query: 61 SPHNVKDRKEQNGSINNAISPTATANTSGSQQINIDSALRDRSSNVAAQPSLSDASSGSN 120
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 Throws : Exception if data strings cannot be parsed, probably due to a change
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 : in the Blast report format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 Comments : Uses first argument to determine which data members to set
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 : making this method sensitive data member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 Warning : Sequence endpoints are normalized so that start < end. This affects HSPs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 : for TBLASTN/X hits on the minus strand. Normalization facilitates use
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 : of range information by methods such as match().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 See Also : L<_set_seq_data()|_set_seq_data>, L<matches()|matches>, L<range()|range>, L<start()|start>, L<end()|end>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488 #-------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 sub _set_seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 #-------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 my $seqType = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 my @data = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 my @ranges = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 my @sequence = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 my $numGaps = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 foreach( @data ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 if( m/(\d+) *(\D+) *(\d+)/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 push @ranges, ( $1, $3 ) ;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 push @sequence, $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503 $self->warn("Bad sequence data: $_");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507 (scalar(@sequence) and scalar(@ranges)) || $self->throw("Can't set sequence: missing data",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 "Possibly unrecognized Blast format.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 # Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 $self->{$seqType.'Start'} = $ranges[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 $self->{$seqType.'Stop'} = $ranges[ $#ranges ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 $self->{$seqType.'Seq'} = \@sequence;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 $self->{$seqType.'Length'} = abs($ranges[ $#ranges ] - $ranges[0]) + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 # Adjust lengths for BLASTX, TBLASTN, TBLASTX sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 # Converting nucl coords to amino acid coords.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 my $prog = $self->{'_prog'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 if($prog eq 'TBLASTN' and $seqType eq '_sbjct') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 $self->{$seqType.'Length'} /= 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 } elsif($prog eq 'BLASTX' and $seqType eq '_query') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 $self->{$seqType.'Length'} /= 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 } elsif($prog eq 'TBLASTX') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 $self->{$seqType.'Length'} /= 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 $self->{$seqType.'Strand'} = 'Plus' if $prog =~ /BLAST[NX]/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 # Normalize sequence endpoints so that start < end.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533 # Reverse complement or 'minus strand' HSPs get flipped here.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 if($self->{$seqType.'Start'} > $self->{$seqType.'Stop'}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 ($self->{$seqType.'Start'}, $self->{$seqType.'Stop'}) =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536 ($self->{$seqType.'Stop'}, $self->{$seqType.'Start'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 $self->{$seqType.'Strand'} = 'Minus';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 ## Count number of gaps in each seq. Only need to do this for gapped Blasts.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 # if($self->{'_gapped'}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 my $seqstr = join('', @sequence);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 $seqstr =~ s/\s//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 my $num_gaps = CORE::length($seqstr) - $self->{$seqType.'Length'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 $self->{$seqType.'Gaps'} = $num_gaps if $num_gaps > 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 # }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 =head2 _set_residues
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 Usage : n/a; called automatically when residue data is requested.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 Purpose : Sets the residue numbers representing the identical and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 : conserved positions. These data are obtained by analyzing the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 : symbols between query and sbjct lines of the alignments.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 Throws : Propagates any exception thrown by _set_seq_data() and _set_match_seq().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 Comments : These data are not always needed, so it is conditionally
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 : executed only upon demand by methods such as seq_inds().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 See Also : L<_set_seq_data()|_set_seq_data>, L<_set_match_seq()|_set_match_seq>, L<seq_inds()|seq_inds>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566 #------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 sub _set_residues {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568 #------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 my @sequence = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 # Using hashes to avoid saving duplicate residue numbers.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 my %identicalList_query = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576 my %identicalList_sbjct = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 my %conservedList_query = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 my %conservedList_sbjct = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 my $aref = $self->_set_match_seq() if not ref $self->{'_matchSeq'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 $aref ||= $self->{'_matchSeq'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 my $seqString = join('', @$aref );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 my $qseq = join('',@{$self->{'_querySeq'}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 my $sseq = join('',@{$self->{'_sbjctSeq'}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 my $resCount_query = $self->{'_queryStop'} || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587 my $resCount_sbjct = $self->{'_sbjctStop'} || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 my $prog = $self->{'_prog'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 if($prog !~ /^BLASTP|^BLASTN/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 if($prog eq 'TBLASTN') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 $resCount_sbjct /= 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 } elsif($prog eq 'BLASTX') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 $resCount_query /= 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 } elsif($prog eq 'TBLASTX') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 $resCount_query /= 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 $resCount_sbjct /= 3;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601 my ($mchar, $schar, $qchar);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 while( $mchar = chop($seqString) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 ($qchar, $schar) = (chop($qseq), chop($sseq));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 if( $mchar eq '+' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 $conservedList_query{ $resCount_query } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606 $conservedList_sbjct{ $resCount_sbjct } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 } elsif( $mchar ne ' ' ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 $identicalList_query{ $resCount_query } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609 $identicalList_sbjct{ $resCount_sbjct } = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611 $resCount_query-- if $qchar ne $GAP_SYMBOL;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 $resCount_sbjct-- if $schar ne $GAP_SYMBOL;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614 $self->{'_identicalRes_query'} = \%identicalList_query;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 $self->{'_conservedRes_query'} = \%conservedList_query;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616 $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 =head2 _set_match_seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 Usage : n/a. Internal method.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627 : $hsp_obj->_set_match_seq()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628 Purpose : Set the 'match' sequence for the current HSP (symbols in between
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629 : the query and sbjct lines.)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630 Returns : Array reference holding the match sequences lines.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 Throws : Exception if the _matchList field is not set.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633 Comments : The match information is not always necessary. This method
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634 : allows it to be conditionally prepared.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 : Called by _set_residues>() and seq_str().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637 See Also : L<_set_residues()|_set_residues>, L<seq_str()|seq_str>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 sub _set_match_seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646 ## DEBUGGING CODE:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 # if($self->parent->name eq '1AK5_' and $self->parent->parent->name eq 'YAR073W') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648 # print "\n_set_match_seq() called for HSP ", $self->name, " of hit ${\$self->parent->name} in query ${\$self->parent->parent->name}"; <STDIN>;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649 # }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 ref($self->{'_matchList'}) || $self->throw("Can't set HSP match sequence: No data");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
653 my @data = @{$self->{'_matchList'}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
654
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
655 my(@sequence);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
656 foreach( @data ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
657 chomp($_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
658 ## Remove leading spaces; (note: aln may begin with a space
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
659 ## which is why we can't use s/^ +//).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
660 s/^ {$self->{'_match_indent'}}//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
661 push @sequence, $_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
662 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
663 # Liberate some memory.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
664 @{$self->{'_matchList'}} = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
665 $self->{'_matchList'} = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
666
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
667 $self->{'_matchSeq'} = \@sequence;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
668
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
669 ## DEBUGGING CODE:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
670 # if($self->parent->name eq '1AK5_' and $self->parent->parent->name eq 'YAR073W') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
671 # print "RETURNING: $self->{'_matchSeq'}:\n @{$self->{'_matchSeq'}}";<STDIN>;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
672 # }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
673
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
674 $self->{'_matchSeq'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
675 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
676
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
677
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
678
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
679 =head2 score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
680
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
681 Usage : $hsp_obj->score()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
682 Purpose : Get the Blast score for the HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
683 Returns : Integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
684 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
685 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
686
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
687 See Also : L<bits()|bits>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
688
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
689 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
690
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
691 #---------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
692 sub score { my $self = shift; $self->{'_score'}; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
693 #---------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
694
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
695
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
696
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
697 =head2 bits
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
698
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
699 Usage : $hsp_obj->bits()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
700 Purpose : Get the Blast score in bits for the HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
701 Returns : Float
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
702 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
703 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
704
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
705
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
706 See Also : L<score()|score>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
707
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
708 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
709
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
710 #--------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
711 sub bits { my $self = shift; $self->{'_bits'}; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
712 #--------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
713
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
714
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
715
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
716 =head2 n
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
717
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
718 Usage : $hsp_obj->n()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
719 Purpose : Get the N value (num HSPs on which P/Expect is based).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
720 : This value is not defined with NCBI Blast2 with gapping.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
721 Returns : Integer or null string if not defined.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
722 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
723 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
724 Comments : The 'N' value is listed in parenthesis with P/Expect value:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
725 : e.g., P(3) = 1.2e-30 ---> (N = 3).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
726 : Not defined in NCBI Blast2 with gaps.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
727 : This typically is equal to the number of HSPs but not always.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
728 : To obtain the number of HSPs, use Bio::Tools::Blast::Sbjct::num_hsps().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
729
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
730 See Also : L<score()|score>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
731
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
732 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
733
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
734 #-----
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
735 sub n { my $self = shift; $self->{'_n'} || ''; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
736 #-----
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
737
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
738
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
739
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
740 =head2 frame
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
741
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
742 Usage : $hsp_obj->frame()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
743 Purpose : Get the reading frame number (-/+ 1, 2, 3) (TBLASTN/X only).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
744 Returns : Integer or null string if not defined.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
745 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
746 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
747
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
748 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
749
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
750 #---------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
751 sub frame { my $self = shift; $self->{'_frame'} || ''; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
752 #---------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
753
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
754
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
755
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
756 =head2 signif()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
757
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
758 Usage : $hsp_obj->signif()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
759 Purpose : Get the P-value or Expect value for the HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
760 Returns : Float (0.001 or 1.3e-43)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
761 : Returns P-value if it is defined, otherwise, Expect value.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
762 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
763 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
764 Comments : Provided for consistency with Sbjct::signif()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
765 : Support for returning the significance data in different
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
766 : formats (e.g., exponent only), is not provided for HSP objects.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
767 : This is only available for the Sbjct or Blast object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
768
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
769 See Also : L<p()|p>, L<expect()|expect>, B<Bio::Tools::Blast::Sbjct::signif()>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
770
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
771 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
772
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
773 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
774 sub signif {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
775 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
776 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
777 my $val ||= defined($self->{'_p'}) ? $self->{'_p'} : $self->{'_expect'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
778 $val;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
779 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
780
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
781
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
782
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
783 =head2 expect
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
784
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
785 Usage : $hsp_obj->expect()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
786 Purpose : Get the Expect value for the HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
787 Returns : Float (0.001 or 1.3e-43)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
788 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
789 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
790 Comments : Support for returning the expectation data in different
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
791 : formats (e.g., exponent only), is not provided for HSP objects.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
792 : This is only available for the Sbjct or Blast object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
793
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
794 See Also : L<p()|p>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
795
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
796 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
797
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
798 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
799 sub expect { my $self = shift; $self->{'_expect'}; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
800 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
801
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
802
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
803
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
804 =head2 p
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
805
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
806 Usage : $hsp_obj->p()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
807 Purpose : Get the P-value for the HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
808 Returns : Float (0.001 or 1.3e-43) or undef if not defined.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
809 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
810 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
811 Comments : P-value is not defined with NCBI Blast2 reports.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
812 : Support for returning the expectation data in different
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
813 : formats (e.g., exponent only) is not provided for HSP objects.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
814 : This is only available for the Sbjct or Blast object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
815
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
816 See Also : L<expect()|expect>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
817
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
818 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
819
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
820 #-----
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
821 sub p { my $self = shift; $self->{'_p'}; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
822 #-----
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
823
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
824
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
825 =head2 length
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
826
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
827 Usage : $hsp->length( [seq_type] )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
828 Purpose : Get the length of the aligned portion of the query or sbjct.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
829 Example : $hsp->length('query')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
830 Returns : integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
831 Argument : seq_type: 'query' | 'sbjct' | 'total' (default = 'total')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
832 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
833 Comments : 'total' length is the full length of the alignment
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
834 : as reported in the denominators in the alignment section:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
835 : "Identical = 34/120 Positives = 67/120".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
836 : Developer note: when using the built-in length function within
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
837 : this module, call it as CORE::length().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
838
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
839 See Also : L<gaps()|gaps>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
840
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
841 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
842
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
843 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
844 sub length {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
845 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
846 my( $self, $type ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
847 $type ||= 'total';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
848
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
849 $type ne 'total' and $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
850
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
851 ## Sensitive to member name format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
852 $type = "_\L$type\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
853 $self->{$type.'Length'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
854 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
855
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
856
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
857
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
858 =head2 gaps
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
859
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
860 Usage : $hsp->gaps( [seq_type] )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
861 Purpose : Get the number of gaps in the query, sbjct, or total alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
862 : Also can return query gaps and sbjct gaps as a two-element list
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
863 : when in array context.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
864 Example : $total_gaps = $hsp->gaps();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
865 : ($qgaps, $sgaps) = $hsp->gaps();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
866 : $qgaps = $hsp->gaps('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
867 Returns : scalar context: integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
868 : array context without args: (int, int) = ('queryGaps', 'sbjctGaps')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
869 Argument : seq_type: 'query' | 'sbjct' | 'total'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
870 : (default = 'total', scalar context)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
871 : Array context can be "induced" by providing an argument of 'list' or 'array'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
872 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
873
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
874 See Also : L<length()|length>, L<matches()|matches>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
875
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
876 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
877
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
878 #---------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
879 sub gaps {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
880 #---------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
881 my( $self, $seqType ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
882
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
883 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
884
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
885 $seqType ||= (wantarray ? 'list' : 'total');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
886
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
887 if($seqType =~ /list|array/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
888 return (($self->{'_queryGaps'} || 0), ($self->{'_sbjctGaps'} || 0));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
889 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
890
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
891 if($seqType eq 'total') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
892 return ($self->{'_queryGaps'} + $self->{'_sbjctGaps'}) || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
893 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
894 ## Sensitive to member name format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
895 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
896 return $self->{$seqType.'Gaps'} || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
897 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
898 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
899
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
900
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
901
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
902 =head2 matches
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
903
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
904 Usage : $hsp->matches([seq_type], [start], [stop]);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
905 Purpose : Get the total number of identical and conservative matches
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
906 : in the query or sbjct sequence for the given HSP. Optionally can
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
907 : report data within a defined interval along the seq.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
908 : (Note: 'conservative' matches are called 'positives' in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
909 : Blast report.)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
910 Example : ($id,$cons) = $hsp_object->matches('sbjct');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
911 : ($id,$cons) = $hsp_object->matches('query',300,400);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
912 Returns : 2-element array of integers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
913 Argument : (1) seq_type = 'query' | 'sbjct' (default = query)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
914 : (2) start = Starting coordinate (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
915 : (3) stop = Ending coordinate (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
916 Throws : Exception if the supplied coordinates are out of range.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
917 Comments : Relies on seq_str('match') to get the string of alignment symbols
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
918 : between the query and sbjct lines which are used for determining
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
919 : the number of identical and conservative matches.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
920
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
921 See Also : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, B<Bio::Tools::Blast::Sbjct::_adjust_contigs()>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
922
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
923 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
924
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
925 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
926 sub matches {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
927 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
928 my( $self, %param ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
929 my(@data);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
930 my($seqType, $beg, $end) = ($param{-SEQ}, $param{-START}, $param{-STOP});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
931 $seqType ||= 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
932
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
933 if(!defined $beg && !defined $end) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
934 ## Get data for the whole alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
935 push @data, ($self->{'_numIdentical'}, $self->{'_numConserved'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
936 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
937 ## Get the substring representing the desired sub-section of aln.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
938 $beg ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
939 $end ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
940 my($start,$stop) = $self->range($seqType);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
941 if($beg == 0) { $beg = $start; $end = $beg+$end; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
942 elsif($end == 0) { $end = $stop; $beg = $end-$beg; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
943
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
944 if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
945 else { $end += 1;} ##ML moved from commented position below, makes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
946 ##more sense here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
947 # if($end > $stop) { $end = $stop; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
948 if($beg < $start) { $beg = $start; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
949 # else { $end += 1;}
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
950
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
951 # my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
952
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
953 ## ML: START fix for substr out of range error ------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
954 my $seq = "";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
955 if (($self->{'_prog'} eq 'TBLASTN') and ($seqType eq 'sbjct'))
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
956 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
957 $seq = substr($self->seq_str('match'),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
958 int(($beg-$start)/3), int(($end-$beg+1)/3));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
959
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
960 } elsif (($self->{'_prog'} eq 'BLASTX') and ($seqType eq 'query'))
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
961 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
962 $seq = substr($self->seq_str('match'),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
963 int(($beg-$start)/3), int(($end-$beg+1)/3));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
964 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
965 $seq = substr($self->seq_str('match'),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
966 $beg-$start, ($end-$beg));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
967 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
968 ## ML: End of fix for substr out of range error -----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
969
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
970
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
971 ## ML: debugging code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
972 ## This is where we get our exception. Try printing out the values going
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
973 ## into this:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
974 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
975 # print STDERR
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
976 # qq(*------------MY EXCEPTION --------------------\nSeq: ") ,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
977 # $self->seq_str("$seqType"), qq("\n),$self->name,",( index:";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
978 # print STDERR $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
979 # CORE::length $self->seq_str("$seqType");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
980 # print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
981 # ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
982 ## ML: END DEBUGGING CODE----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
983
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
984 if(!CORE::length $seq) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
985 $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
986 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
987 ## Get data for a substring.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
988 # printf "Collecting HSP subsection data: beg,end = %d,%d; start,stop = %d,%d\n%s<---\n", $beg, $end, $start, $stop, $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
989 # printf "Original match seq:\n%s\n",$self->seq_str('match');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
990 $seq =~ s/ //g; # remove space (no info).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
991 my $len_cons = CORE::length $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
992 $seq =~ s/\+//g; # remove '+' characters (conservative substitutions)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
993 my $len_id = CORE::length $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
994 push @data, ($len_id, $len_cons);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
995 # printf " HSP = %s\n id = %d; cons = %d\n", $self->name, $len_id, $len_cons; <STDIN>;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
996 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
997 @data;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
998 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
999
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1000
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1001
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1002 =head2 frac_identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1003
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1004 Usage : $hsp_object->frac_identical( [seq_type] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1005 Purpose : Get the fraction of identical positions within the given HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1006 Example : $frac_iden = $hsp_object->frac_identical('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1007 Returns : Float (2-decimal precision, e.g., 0.75).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1008 Argument : seq_type: 'query' | 'sbjct' | 'total'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1009 : default = 'total' (but see comments below).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1010 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1011 Comments : Different versions of Blast report different values for the total
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1012 : length of the alignment. This is the number reported in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1013 : denominators in the stats section:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1014 : "Identical = 34/120 Positives = 67/120".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1015 : BLAST-GP uses the total length of the alignment (with gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1016 : WU-BLAST uses the length of the query sequence (without gaps).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1017 : Therefore, when called without an argument or an argument of 'total',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1018 : this method will report different values depending on the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1019 : version of BLAST used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1020 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1021 : To get the fraction identical among only the aligned residues,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1022 : ignoring the gaps, call this method with an argument of 'query'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1023 : or 'sbjct'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1024
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1025 See Also : L<frac_conserved()|frac_conserved>, L<num_identical()|num_identical>, L<matches()|matches>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1026
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1027 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1028
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1029 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1030 sub frac_identical {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1031 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1032 # The value is calculated as opposed to storing it from the parsed results.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1033 # This saves storage and also permits flexibility in determining for which
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1034 # sequence (query or sbjct) the figure is to be calculated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1035
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1036 my( $self, $seqType ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1037 $seqType ||= 'total';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1038
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1039 if($seqType ne 'total') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1040 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1041 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1042 ## Sensitive to member name format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1043 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1044
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1045 sprintf( "%.2f", $self->{'_numIdentical'}/$self->{$seqType.'Length'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1046 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1047
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1048
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1049 =head2 frac_conserved
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1050
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1051 Usage : $hsp_object->frac_conserved( [seq_type] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1052 Purpose : Get the fraction of conserved positions within the given HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1053 : (Note: 'conservative' positions are called 'positives' in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1054 : Blast report.)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1055 Example : $frac_cons = $hsp_object->frac_conserved('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1056 Returns : Float (2-decimal precision, e.g., 0.75).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1057 Argument : seq_type: 'query' | 'sbjct'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1058 : default = 'total' (but see comments below).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1059 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1060 Comments : Different versions of Blast report different values for the total
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1061 : length of the alignment. This is the number reported in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1062 : denominators in the stats section:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1063 : "Identical = 34/120 Positives = 67/120".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1064 : BLAST-GP uses the total length of the alignment (with gaps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1065 : WU-BLAST uses the length of the query sequence (without gaps).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1066 : Therefore, when called without an argument or an argument of 'total',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1067 : this method will report different values depending on the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1068 : version of BLAST used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1069 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1070 : To get the fraction conserved among only the aligned residues,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1071 : ignoring the gaps, call this method with an argument of 'query'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1072 : or 'sbjct'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1073
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1074 See Also : L<frac_conserved()|frac_conserved>, L<num_conserved()|num_conserved>, L<matches()|matches>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1075
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1076 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1077
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1078 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1079 sub frac_conserved {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1080 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1081 # The value is calculated as opposed to storing it from the parsed results.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1082 # This saves storage and also permits flexibility in determining for which
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1083 # sequence (query or sbjct) the figure is to be calculated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1084
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1085 my( $self, $seqType ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1086 $seqType ||= 'total';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1087
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1088 if($seqType ne 'total') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1089 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1090 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1091
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1092 ## Sensitive to member name format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1093 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1094
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1095 sprintf( "%.2f", $self->{'_numConserved'}/$self->{$seqType.'Length'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1096 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1097
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1098
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1099 =head2 num_identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1100
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1101 Usage : $hsp_object->num_identical();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1102 Purpose : Get the number of identical positions within the given HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1103 Example : $num_iden = $hsp_object->num_identical();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1104 Returns : integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1105 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1106 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1107
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1108 See Also : L<num_conserved()|num_conserved>, L<frac_identical()|frac_identical>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1109
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1110 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1111
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1112 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1113 sub num_identical {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1114 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1115 my( $self) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1116
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1117 $self->{'_numIdentical'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1118 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1119
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1120
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1121 =head2 num_conserved
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1123 Usage : $hsp_object->num_conserved();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1124 Purpose : Get the number of conserved positions within the given HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1125 Example : $num_iden = $hsp_object->num_conserved();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1126 Returns : integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1127 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1128 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1129
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1130 See Also : L<num_identical()|num_identical>, L<frac_conserved()|frac_conserved>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1132 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1133
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1134 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1135 sub num_conserved {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1136 #-------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1137 my( $self) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1138
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1139 $self->{'_numConserved'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1140 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1142
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1144 =head2 range
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1146 Usage : $hsp->range( [seq_type] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1147 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1148 : in the HSP alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1149 Example : ($qbeg, $qend) = $hsp->range('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1150 : ($sbeg, $send) = $hsp->range('sbjct');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1151 Returns : Two-element array of integers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1152 Argument : seq_type = string, 'query' or 'sbjct' (default = 'query')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1153 : (case insensitive).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1154 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1156 See Also : L<start()|start>, L<end()|end>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1157
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1158 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1159
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1160 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1161 sub range {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1162 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1163 my ($self, $seqType) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1165 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1166
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1167 $seqType ||= 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1168 ## Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1169 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1170
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1171 return ($self->{$seqType.'Start'},$self->{$seqType.'Stop'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1172 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1174 =head2 start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1175
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1176 Usage : $hsp->start( [seq_type] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1177 Purpose : Gets the start coordinate for the query, sbjct, or both sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1178 : in the HSP alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1179 Example : $qbeg = $hsp->start('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1180 : $sbeg = $hsp->start('sbjct');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1181 : ($qbeg, $sbeg) = $hsp->start();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1182 Returns : scalar context: integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1183 : array context without args: list of two integers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1184 Argument : In scalar context: seq_type = 'query' or 'sbjct'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1185 : (case insensitive). If not supplied, 'query' is used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1186 : Array context can be "induced" by providing an argument of 'list' or 'array'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1187 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1189 See Also : L<end()|end>, L<range()|range>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1190
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1191 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1192
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1193 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1194 sub start {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1195 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1196 my ($self, $seqType) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1197
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1198 $seqType ||= (wantarray ? 'list' : 'query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1200 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1201
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1202 if($seqType =~ /list|array/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1203 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1204 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1205 ## Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1206 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1207 return $self->{$seqType.'Start'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1208 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1209 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1210
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1211 =head2 end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1212
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1213 Usage : $hsp->end( [seq_type] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1214 Purpose : Gets the end coordinate for the query, sbjct, or both sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1215 : in the HSP alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1216 Example : $qbeg = $hsp->end('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1217 : $sbeg = $hsp->end('sbjct');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1218 : ($qbeg, $sbeg) = $hsp->end();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1219 Returns : scalar context: integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1220 : array context without args: list of two integers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1221 Argument : In scalar context: seq_type = 'query' or 'sbjct'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1222 : (case insensitive). If not supplied, 'query' is used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1223 : Array context can be "induced" by providing an argument of 'list' or 'array'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1224 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1226 See Also : L<start()|start>, L<range()|range>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1227
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1228 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1229
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1230 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1231 sub end {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1232 #----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1233 my ($self, $seqType) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1234
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1235 $seqType ||= (wantarray ? 'list' : 'query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1236
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1237 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1239 if($seqType =~ /list|array/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1240 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1241 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1242 ## Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1243 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1244 return $self->{$seqType.'Stop'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1245 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1246 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1247
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1248
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1249
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1250 =head2 strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1251
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1252 Usage : $hsp_object->strand( [seq_type] )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1253 Purpose : Get the strand of the query or sbjct sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1254 Example : print $hsp->strand('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1255 : ($qstrand, $sstrand) = $hsp->strand();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1256 Returns : -1, 0, or 1
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1257 : -1 = Minus strand, +1 = Plus strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1258 : Returns 0 if strand is not defined, which occurs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1259 : for non-TBLASTN/X reports.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1260 : In scalar context without arguments, returns queryStrand value.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1261 : In array context without arguments, returns a two-element list
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1262 : of strings (queryStrand, sbjctStrand).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1263 : Array context can be "induced" by providing an argument of 'list' or 'array'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1264 Argument : seq_type: 'query' | 'sbjct' or undef
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1265 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1266
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1267 See Also : L<_set_seq()|_set_seq>, L<_set_match_stats()|_set_match_stats>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1268
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1269 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1270
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1271 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1272 sub strand {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1273 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1274 my( $self, $seqType ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1275 $seqType ||= (wantarray ? 'list' : 'query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1277 return '' if $seqType eq 'query' and $self->{'_prog'} eq 'TBLASTN';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1278
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1279 ## Sensitive to member name format.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1280 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1282 # $seqType could be '_list'.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1283 $self->{'_queryStrand'} or $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1284
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1285 if($seqType =~ /list|array/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1286 return ('','') unless defined $self->{'_queryStrand'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1287 return ($self->{'_queryStrand'}, $self->{'_sbjctStrand'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1288 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1289 local $^W = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1290 $STRAND_SYMBOL{$self->{$seqType.'Strand'}} || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1291 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1292
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1294 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1295 ## INSTANCE METHODS ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1296 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1298
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1299 =head2 seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1300
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1301 Usage : $hsp->seq( [seq_type] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1302 Purpose : Get the query or sbjct sequence as a Bio::Seq.pm object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1303 Example : $seqObj = $hsp->seq('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1304 Returns : Object reference for a Bio::Seq.pm object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1305 Argument : seq_type = 'query' or 'sbjct' (default = 'query').
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1306 Throws : Propagates any exception that occurs during construction
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1307 : of the Bio::Seq.pm object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1308 Comments : The sequence is returned in an array of strings corresponding
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1309 : to the strings in the original format of the Blast alignment.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1310 : (i.e., same spacing).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1312 See Also : L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, B<Bio::Seq.pm>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1313
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1314 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1315
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1316 #-------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1317 sub seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1318 #-------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1319 my($self,$seqType) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1320 $seqType ||= 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1321 my $str = $self->seq_str($seqType);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1322 my $num = $self->name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1323 my $name = $seqType =~ /query/i
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1324 ? $self->parent->parent->name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1325 : $self->parent->name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1326
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1327 require Bio::Seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1328
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1329 new Bio::Seq (-ID => $name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1330 -SEQ => $str,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1331 -DESC => "Blast HSP #$num, $seqType sequence",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1332 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1333 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1334
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1335
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1336
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1337 =head2 seq_str
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1338
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1339 Usage : $hsp->seq_str( seq_type );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1340 Purpose : Get the full query, sbjct, or 'match' sequence as a string.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1341 : The 'match' sequence is the string of symbols in between the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1342 : query and sbjct sequences.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1343 Example : $str = $hsp->seq_str('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1344 Returns : String
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1345 Argument : seq_Type = 'query' or 'sbjct' or 'match'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1346 Throws : Exception if the argument does not match an accepted seq_type.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1347 Comments : Calls _set_residues() to set the 'match' sequence if it has
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1348 : not been set already.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1349
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1350 See Also : L<seq()|seq>, L<seq_inds()|seq_inds>, L<_set_match_seq()|_set_match_seq>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1351
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1352 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1354 #------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1355 sub seq_str {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1356 #------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1357 my($self,$seqType) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1358
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1359 ## Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1360 $seqType = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1361
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1362 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1363
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1364 if($seqType =~ /sbjct|query/) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1365 my $seq = join('',@{$self->{$seqType.'Seq'}});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1366 $seq =~ s/\s+//g;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1367 return $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1368
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1369 } elsif( $seqType =~ /match/i) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1370 # Only need to call _set_match_seq() if the match seq is requested.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1371 my $aref = $self->_set_match_seq() unless ref $self->{'_matchSeq'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1372 $aref = $self->{'_matchSeq'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1373
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1374 ## DEBUGGING CODE:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1375 # if($self->parent->name eq '1AK5_' and $self->parent->parent->name eq 'YAR073W') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1376 # print "seq_str():\n @$aref";<STDIN>;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1377 # }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1378
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1379 return join('',@$aref);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1380
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1381 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1382 $self->throw("Invalid or undefined sequence type: $seqType",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1383 "Valid types: query, sbjct, match");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1384 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1385 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1386
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1387
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1388
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1389
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1390 =head2 seq_inds
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1391
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1392 Usage : $hsp->seq_inds( seq_type, class, collapse );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1393 Purpose : Get a list of residue positions (indices) for all identical
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1394 : or conserved residues in the query or sbjct sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1395 Example : @ind = $hsp->seq_inds('query', 'identical');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1396 : @ind = $hsp->seq_inds('sbjct', 'conserved');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1397 : @ind = $hsp->seq_inds('sbjct', 'conserved', 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1398 Returns : List of integers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1399 : May include ranges if collapse is true.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1400 Argument : seq_type = 'query' or 'sbjct' (default = query)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1401 : class = 'identical' or 'conserved' (default = identical)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1402 : (can be shortened to 'id' or 'cons')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1403 : (actually, anything not 'id' will evaluate to 'conserved').
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1404 : collapse = boolean, if true, consecutive positions are merged
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1405 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1406 : collapses to "1-5 7 9-11". This is useful for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1407 : consolidating long lists. Default = no collapse.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1408 Throws : n/a.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1409 Comments : Calls _set_residues() to set the 'match' sequence if it has
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1410 : not been set already.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1411
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1412 See Also : L<seq()|seq>, L<_set_residues()|_set_residues>, L<collapse_nums()|collapse_nums>, B<Bio::Tools::Blast::Sbjct::seq_inds()>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1413
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1414 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1415
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1416 #---------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1417 sub seq_inds {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1418 #---------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1419 my ($self, $seq, $class, $collapse) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1420
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1421 $seq ||= 'query';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1422 $class ||= 'identical';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1423 $collapse ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1424
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1425 $self->_set_residues() unless defined $self->{'_identicalRes_query'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1426
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1427 $seq = ($seq !~ /^q/i ? 'sbjct' : 'query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1428 $class = ($class !~ /^id/i ? 'conserved' : 'identical');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1429
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1430 ## Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1431 $seq = "_\L$seq\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1432 $class = "_\L$class\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1434 my @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seq"}};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1435
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1436 return $collapse ? &collapse_nums(@ary) : @ary;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1437 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1438
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1439
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1440
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1441
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1442 =head2 get_aln
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1443
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1444 Usage : $hsp->get_aln()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1445 Purpose : Get a Bio::UnivAln.pm object constructed from the query + sbjct
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1446 : sequences of the present HSP object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1447 Example : $aln_obj = $hsp->get_aln();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1448 Returns : Object reference for a Bio::UnivAln.pm object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1449 Argument : n/a.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1450 Throws : Propagates any exception ocurring during the construction of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1451 : the Bio::UnivAln object.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1452 Comments : Requires Bio::UnivAln.pm.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1453 : The Bio::UnivAln.pm object is constructed from the query + sbjct
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1454 : sequence objects obtained by calling seq().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1455 : Gap residues are included (see $GAP_SYMBOL). It is important that
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1456 : Bio::UnivAln.pm recognizes the gaps correctly. A strategy for doing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1457 : this is being considered. Currently it is hard-wired.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1458
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1459 See Also : L<seq()|seq>, B<Bio::UnivAln.pm>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1460
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1461 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1462
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1463 #------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1464 sub get_aln {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1465 #------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1466 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1467
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1468 require Bio::UnivAln;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1469
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1470 my $qseq = $self->seq('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1471 my $sseq = $self->seq('sbjct');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1472
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1473 my $desc = sprintf "HSP #%s of query %s vs. sbjct %s",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1474 $self->name, $self->parent->parent->name, $self->parent->name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1475
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1476 my $type = $self->{'_prog'} =~ /P$|^T/ ? 'amino' : 'dna';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1477
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1478 Bio::UnivAln->new( -seqs => [$qseq, $sseq],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1479 -desc => $desc,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1480 -type => $type,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1481 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1482 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1483
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1484
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1485 =head2 display
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1486
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1487 Usage : $sbjct_object->display( %named_parameters );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1488 Purpose : Display information about Bio::Tools::Blast::Sbjct.pm data members
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1489 : including: length, gaps, score, significance value,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1490 : sequences and sequence indices.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1491 Example : $object->display(-SHOW=>'stats');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1492 Argument : Named parameters: (TAGS CAN BE UPPER OR LOWER CASE)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1493 : -SHOW => 'hsp',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1494 : -WHERE => filehandle (default = STDOUT)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1495 Returns : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1496 Status : Experimental
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1497 Comments : For more control over the display of sequence data,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1498 : use seq(), seq_str(), seq_inds().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1499
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1500 See Also : L<_display_seq()|_display_seq>, L<seq()|seq>, L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, L<_display_matches()|_display_matches>, B<Bio::Root::Object::display()>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1501
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1502 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1503
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1504 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1505 sub display {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1506 #-----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1507 my( $self, %param ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1508
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1509 my $sbjctName = $self->parent->name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1510 my $queryName = $self->parent->parent->name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1511 my $layout = $self->parent->parent->_layout();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1512
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1513 my $OUT = $self->set_display(%param);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1514
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1515 printf( $OUT "%-15s: %d\n", "LENGTH TOTAL", $self->length('total') );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1516 printf( $OUT "%-15s: %d\n", "LENGTH QUERY", $self->length('query') );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1517 printf( $OUT "%-15s: %d\n", "LENGTH SBJCT", $self->length('sbjct') );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1518 printf( $OUT "%-15s: %d\n", "GAPS QUERY", $self->gaps('query') );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1519 printf( $OUT "%-15s: %d\n", "GAPS SBJCT", $self->gaps('sbjct') );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1520 printf( $OUT "%-15s: %d\n", "SCORE", $self->{'_score'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1521 printf( $OUT "%-15s: %0.1f\n", "BITS", $self->{'_bits'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1522 if($layout == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1523 printf( $OUT "%-15s: %.1e\n", "P-VAL", $self->{'_p'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1524 printf( $OUT "%-15s: %.1e\n", "EXPECT", $self->{'_expect'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1525 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1526 printf( $OUT "%-15s: %.1e\n", "EXPECT", $self->{'_expect'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1527 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1528
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1529 my $queryLength = $self->length('query');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1530
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1531 printf( $OUT "%-15s: %d (%0.0f%%)\n", "IDENTICAL", $self->{'_numIdentical'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1532 $self->{'_numIdentical'}/$queryLength * 100 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1533 printf( $OUT "%-15s: %d (%0.0f%%) %s \n", "CONSERVED", $self->{'_numConserved'},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1534 $self->{'_numConserved'}/$queryLength * 100,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1535 "includes identical" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1536
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1537 $self->_display_seq('query', $queryName, $OUT);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1538 $self->_display_seq('sbjct', $sbjctName, $OUT);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1539 $self->_display_matches($queryName, $sbjctName, $OUT);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1540 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1541
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1542
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1543
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1544
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1545 =head2 _display_seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1546
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1547 Usage : n/a; called automatically by display()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1548 Purpose : Display information about query and sbjct HSP sequences.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1549 : Prints the start, stop coordinates and the actual sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1550 Example : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1551 Argument :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1552 Returns : printf call.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1553 Status : Experimental
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1554 Comments : For more control, use seq(), seq_str(), or seq_inds().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1555
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1556 See Also : L<display()|display>, L<seq()|seq>, L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, L<_display_matches()|_display_matches>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1557
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1558 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1559
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1560 #------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1561 sub _display_seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1562 #------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1563 my( $self, $seqType, $name, $OUT ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1564
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1565 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1566
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1567 # Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1568 my $mem = "_\L$seqType\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1569 printf( $OUT "\n%10s: %s\n%10s %s\n", "\U$seqType\E", "$name", "-----",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1570 ('-'x ((CORE::length $name) + 2)) );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1571 printf( $OUT "%13s: %d\n", "START", $self->{$mem.'Start'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1572 printf( $OUT "%13s: %d\n", "STOP", $self->{$mem.'Stop'} );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1573 printf( $OUT "%13s: \n", "SEQ" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1574 foreach( @{ $self->{$mem.'Seq'}} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1575 printf( $OUT "%15s%s\n", "", $_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1576 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1577 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1578
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1579
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1580 =head2 _display_matches
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1581
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1582 Usage : n/a; called automatically by display()
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1583 Purpose : Display information about identical and conserved positions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1584 : within both the query and sbjct sequences.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1585 Example : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1586 Argument :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1587 Returns : printf call.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1588 Status : Experimental
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1589 Comments : For more control, use seq_inds().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1590
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1591 See Also : L<display()|display>, L<seq_inds()|seq_inds>, L<_display_seq()|_display_seq>,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1592
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1593 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1594
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1595 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1596 sub _display_matches {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1597 #--------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1598 my( $self, $queryName, $sbjctName, $OUT) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1599 my($resNum, $count);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1600
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1601 $self->_set_residues() unless defined $self->{'_identicalRes_query'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1602
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1603 printf( $OUT "\n%10s: \n%10s\n", "HITS", "-----" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1604 foreach( @{ $self->{'_matchSeq'}} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1605 printf( $OUT "%15s%s\n", "", $_ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1606 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1607
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1608 print $OUT "\n\U$queryName\E\n------------\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1609 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "", "IDENTICAL RESIDUES IN $queryName (n=$self->{'_numIdentical'})",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1610 "", "--------------------------------------------" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1611 $count = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1612 foreach $resNum ( sort keys %{ $self->{'_identicalRes_query' }} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1613 $count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1614 print $OUT "$resNum";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1615 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1616 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1617
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1618 print $OUT "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1619
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1620 my $justConserved = ($self->{'_numConserved'})-($self->{'_numIdentical'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1621 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "","CONSERVED RESIDUES IN $queryName (n=$justConserved)",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1622 "", "--------------------------------------------" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1623 $count = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1624 foreach $resNum ( sort keys %{ $self->{'_conservedRes_query' }} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1625 $count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1626 print $OUT "$resNum";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1627 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1628 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1629
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1630
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1631 print $OUT "\n\n\U$sbjctName\E\n------------\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1632 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "", "IDENTICAL RESIDUES IN $sbjctName (n=$self->{'_numIdentical'})",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1633 "", "--------------------------------------------" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1634 $count = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1635 foreach $resNum ( sort keys %{ $self->{'_identicalRes_sbjct' }} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1636 $count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1637 print $OUT "$resNum";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1638 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1639 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1640
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1641 print $OUT "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1642 $justConserved = ($self->{'_numConserved'})-($self->{'_numIdentical'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1643 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "","CONSERVED RESIDUES IN $sbjctName (n=$justConserved)",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1644 "", "--------------------------------------------" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1645 $count = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1646 foreach $resNum ( sort keys %{ $self->{'_conservedRes_sbjct' }} ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1647 $count++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1648 print $OUT "$resNum";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1649 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1650 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1651 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1652
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1653
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1654
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1655
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1656 =head2 homol_data
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1657
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1658 Usage : $data = $hsp_object->homo_data( %named_params );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1659 Purpose : Gets similarity data for a single HSP.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1660 Returns : String:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1661 : "Homology data" for each HSP is in the format:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1662 : "<integer> <start> <stop>"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1663 : where integer is the value returned by homol_score().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1664 Argument : Named params: (UPPER OR LOWERCASE TAGS)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1665 : currently just one param is used:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1666 : -SEQ =>'query' or 'sbjct'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1667 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1668 Status : Experimental
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1669 Comments : This is a very experimental method used for obtaining a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1670 : coarse indication of:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1671 : 1) how strong the similarity is between the sequences in the HSP,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1672 : 3) the endpoints of the alignment (sequence monomer numbers)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1673
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1674 See Also : L<homol_score()|homol_score>, B<Bio::Tools::Blast.::homol_data()>, B<Bio::Tools::Blast::Sbjct::homol_data()>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1675
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1676 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1677
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1678 #---------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1679 sub homol_data {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1680 #---------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1681 my ($self, %param) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1682 my $seq = $param{-SEQ} || $param{'-seq'} || 'sbjct'; # 'query' or 'sbjct'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1683 my $homolScore = $self->homol_score();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1684 # Sensitive to member name changes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1685 $seq = "_\L$seq\E";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1686
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1687 $self->_set_seq_data() unless $self->{'_set_seq_data'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1688 return ( $homolScore.' '.$self->{$seq.'Start'}.' '.$self->{$seq.'Stop'});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1689 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1690
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1691
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1692 =head2 homol_score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1693
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1694 Usage : $self->homol_score();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1695 Purpose : Get a homology score (integer 1 - 3) as a coarse representation of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1696 : the strength of the similarity independent of sequence composition.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1697 : Based on the Blast bit score.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1698 Example : $hscore = $hsp->homol_score();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1699 Returns : Integer
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1700 Argument : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1701 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1702 Status : Experimental
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1703 Comments : See @Bio::Tools::Blast::HSP::SCORE_CUTOFFS for the specific values.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1704 : Currently, BIT_SCORE HOMOL_SCORE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1705 : --------- -----------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1706 : >=100 --> 3
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1707 : 30-100 --> 2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1708 : < 30 --> 1
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1709
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1710 See Also : L<homol_data()|homol_data>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1711
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1712 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1713
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1714 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1715 sub homol_score {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1716 #----------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1717 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1718
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1719 if( $self->{'_bits'} >= $SCORE_CUTOFFS[0] ) { 1 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1720 elsif($self->{'_bits'} < $SCORE_CUTOFFS[0] and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1721 $self->{'_bits'} >= $SCORE_CUTOFFS[1] ) { 2 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1722 else { 3 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1723 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1724
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1725
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1726 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1727 ## CLASS METHODS ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1728 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1729
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1730 =head1 CLASS METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1731
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1732 =head2 collapse_nums
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1733
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1734 Usage : @cnums = collapse_nums( @numbers );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1735 Purpose : Collapses a list of numbers into a set of ranges of consecutive terms:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1736 : Useful for condensing long lists of consecutive numbers.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1737 : EXPANDED:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1738 : 1 2 3 4 5 6 10 12 13 14 15 17 18 20 21 22 24 26 30 31 32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1739 : COLLAPSED:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1740 : 1-6 10 12-15 17 18 20-22 24 26 30-32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1741 Argument : List of numbers and sorted numerically.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1742 Returns : List of numbers mixed with ranges of numbers (see above).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1743 Throws : n/a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1744 Comments : Probably belongs in a more general utility class.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1745
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1746 See Also : L<seq_inds()|seq_inds>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1747
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1748 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1749
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1750 #------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1751 sub collapse_nums {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1752 #------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1753 # This is not the slickest connectivity algorithm, but will do for now.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1754 my @a = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1755 my ($from, $to, $i, @ca, $consec);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1756
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1757 $consec = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1758 for($i=0; $i < @a; $i++) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1759 not $from and do{ $from = $a[$i]; next; };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1760 if($a[$i] == $a[$i-1]+1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1761 $to = $a[$i];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1762 $consec++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1763 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1764 if($consec == 1) { $from .= ",$to"; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1765 else { $from .= $consec>1 ? "\-$to" : ""; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1766 push @ca, split(',', $from);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1767 $from = $a[$i];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1768 $consec = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1769 $to = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1770 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1771 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1772 if(defined $to) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1773 if($consec == 1) { $from .= ",$to"; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1774 else { $from .= $consec>1 ? "\-$to" : ""; }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1775 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1776 push @ca, split(',', $from) if $from;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1777
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1778 @ca;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1779 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1780
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1781
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1782 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1783 __END__
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1784
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1785 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1786 # END OF CLASS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1787 #####################################################################################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1788
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1789 =head1 FOR DEVELOPERS ONLY
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1790
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1791 =head2 Data Members
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1792
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1793 Information about the various data members of this module is provided for those
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1794 wishing to modify or understand the code. Two things to bear in mind:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1795
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1796 =over 4
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1797
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1798 =item 1 Do NOT rely on these in any code outside of this module.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1799
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1800 All data members are prefixed with an underscore to signify that they are private.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1801 Always use accessor methods. If the accessor doesn't exist or is inadequate,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1802 create or modify an accessor (and let me know, too!).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1803
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1804 =item 2 This documentation may be incomplete and out of date.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1805
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1806 It is easy for these data member descriptions to become obsolete as
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1807 this module is still evolving. Always double check this info and search
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1808 for members not described here.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1809
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1810 =back
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1811
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1812 An instance of Bio::Tools::Blast::HSP.pm is a blessed reference to a hash containing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1813 all or some of the following fields:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1814
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1815 FIELD VALUE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1816 --------------------------------------------------------------
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1817 (member names are mostly self-explanatory)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1818
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1819 _score :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1820 _bits :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1821 _p :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1822 _n : Integer. The 'N' value listed in parenthesis with P/Expect value:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1823 : e.g., P(3) = 1.2e-30 ---> (N = 3).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1824 : Not defined in NCBI Blast2 with gaps.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1825 : To obtain the number of HSPs, use Bio::Tools::Blast::Sbjct::num_hsps().
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1826 _expect :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1827 _queryLength :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1828 _queryGaps :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1829 _queryStart :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1830 _queryStop :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1831 _querySeq :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1832 _sbjctLength :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1833 _sbjctGaps :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1834 _sbjctStart :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1835 _sbjctStop :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1836 _sbjctSeq :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1837 _matchSeq : String. Contains the symbols between the query and sbjct lines
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1838 which indicate identical (letter) and conserved ('+') matches
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1839 or a mismatch (' ').
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1840 _numIdentical :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1841 _numConserved :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1842 _identicalRes_query :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1843 _identicalRes_sbjct :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1844 _conservedRes_query :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1845 _conservedRes_sbjct :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1846 _match_indent : The number of leading space characters on each line containing
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1847 the match symbols. _match_indent is 13 in this example:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1848 Query: 285 QNSAPWGLARISHRERLNLGSFNKYLYDDDAG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1849 Q +APWGLARIS G+ + Y YD+ AG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1850 ^^^^^^^^^^^^^
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1851
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1852 INHERITED DATA MEMBERS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1853
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1854 _name : From Bio::Root::Object.pm.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1855 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1856 _parent : From Bio::Root::Object.pm. This member contains a reference to the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1857 : Bio::Tools::Blast::Sbjct.pm object to which this hit belongs.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1858
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1859
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1860 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1861
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1862 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1863