annotate variant_effect_predictor/Bio/Tools/Blast/HSP.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 #----------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # PACKAGE : Bio::Tools::Blast::HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # AUTHOR : Steve Chervitz (sac@bioperl.org)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # CREATED : March 1996
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # STATUS : Alpha
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 # REVISION: $Id: HSP.pm,v 1.18 2002/10/22 07:38:48 lapp Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 # For the latest version and documentation, visit the distribution site:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # http://genome-www.stanford.edu/perlOOP/bioperl/blast/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # To generate documentation, run this module through pod2html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # (preferably from Perl v5.004 or better).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 # Copyright (c) 1996-2000 Steve Chervitz. All Rights Reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 # This module is free software; you can redistribute it and/or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 # modify it under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 #----------------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 package Bio::Tools::Blast::HSP;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 use Bio::Root::Global qw(:devel);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use Bio::Root::Object ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 #use Bio::Root::Err qw(:std);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 @ISA = qw( Bio::Root::Object);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 use vars qw($ID $GAP_SYMBOL @SCORE_CUTOFFS $Revision %STRAND_SYMBOL);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 $ID = 'Bio::Tools::Blast::HSP';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $Revision = '$Id: HSP.pm,v 1.18 2002/10/22 07:38:48 lapp Exp $'; #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 $GAP_SYMBOL = '-'; # Need a more general way to handle gap symbols.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 @SCORE_CUTOFFS = ( 100, 30 ); # Bit score cutoffs (see homol_score()).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 %STRAND_SYMBOL = ('Plus' => 1, 'Minus' => -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 ## POD Documentation:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 Bio::Tools::Blast::HSP - Bioperl BLAST High-Scoring Segment Pair object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 =head2 Object Creation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 The construction of HSP objects is handled by Bio::Tools::Blast:: Sbjct.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 You should not need to use this package directly. See L<_initialize()|_initialize>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 for a description of constructor parameters.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 require Bio::Tools::Blast::HSP;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 $hspObj = eval{ new Bio::Tools::Blast::HSP(-DATA =>\@hspData,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 -PARENT =>$sbjct_object,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 -NAME =>$hspCount,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 -PROGRAM =>'TBLASTN',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 @hspData includes the raw BLAST report data for a specific HSP,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 and is prepared by Bio::Tools::Blast::Sbjct.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 =head1 INSTALLATION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 This module is included with the central Bioperl distribution:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 http://bio.perl.org/Core/Latest
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 ftp://bio.perl.org/pub/DIST
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 Follow the installation instructions included in the README file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 The Bio::Tools::Blast::HSP.pm module encapsulates data and methods for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 manipulating, parsing, and analyzing HSPs ("High-scoring Segment Pairs")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 derived from BLAST sequence analysis.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 This module is a utility module used by the B<Bio::Tools::Blast::Sbjct.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 and is not intended for separate use. Please see documentation for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 B<Bio::Tools::Blast.pm> for some basic information about using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 HSP objects (L<Links:>).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 =over 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 =item * Supports BLAST versions 1.x and 2.x, gapped and ungapped.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 Bio::Tools::Blast::HSP.pm has the ability to extract a list of all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 residue indices for identical and conservative matches along both
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 query and sbjct sequences. Since this degree of detail is not always
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 needed, this behavior does not occur during construction of the HSP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 object. These data will automatically be collected as necessary as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 the HSP.pm object is used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 =head1 DEPENDENCIES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 Bio::Tools::Blast::HSP.pm is a concrete class that inherits from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 B<Bio::Root::Object.pm> and relies on B<Bio::Tools::Sbjct.pm> as a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 container for HSP.pm objects. B<Bio::Seq.pm> and B<Bio::UnivAln.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 are employed for creating sequence and alignment objects,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 respectively.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 =head2 Relationship to UnivAln.pm & Seq.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 HSP.pm can provide the query or sbjct sequence as a B<Bio::Seq.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 object via the L<seq()|seq> method. The HSP.pm object can also create a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 two-sequence B<Bio::UnivAln.pm> alignment object using the the query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 and sbjct sequences via the L<get_aln()|get_aln> method. Creation of alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 objects is not automatic when constructing the HSP.pm object since
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 this level of functionality is not always required and would generate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 a lot of extra overhead when crunching many reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 =head1 AUTHOR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 Steve Chervitz, E<lt>sac@bioperl.orgE<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 =head1 SEE ALSO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 Bio::Tools::Blast::Sbjct.pm - Blast hit object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 Bio::Tools::Blast.pm - Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 Bio::Seq.pm - Biosequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 Bio::UnivAln.pm - Biosequence alignment object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 Bio::Root::Object.pm - Proposed base class for all Bioperl objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 =head2 Links:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 http://bio.perl.org/Core/POD/Tools/Blast/Sbjct.pm.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 http://bio.perl.org/Projects/modules.html - Online module documentation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 http://bio.perl.org/Projects/Blast/ - Bioperl Blast Project
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 http://bio.perl.org/ - Bioperl Project Homepage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 =head1 COPYRIGHT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Copyright (c) 1996-98 Steve Chervitz. All Rights Reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 This module is free software; you can redistribute it and/or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 modify it under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 #### END of main POD documentation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 ###
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 Methods beginning with a leading underscore are considered private
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 and are intended for internal use by this module. They are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 B<not> considered part of the public interface and are described here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 for documentation purposes only.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 ## CONSTRUCTOR ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 =head2 _initialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 Usage : n/a; automatically called by Bio::Root::Object::new()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 : Bio::Tools::Blast::HSP.pm objects are constructed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 : automatically by Bio::Tools::Sbjct.pm, so there is no need
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 : for direct consumption.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 Purpose : Initializes HSP data and calls private methods to extract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 : the data for a given HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 : Calls superclass constructor first (Bio::Root::Object.pm).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 Returns : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 Argument : Named parameters passed from new():
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 : All tags must be uppercase (does not call _rearrange()).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 : -DATA => array ref containing raw data for one HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 : -PARENT => Sbjct.pm object ref.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 : -NAME => integer (1..n).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 : -PROGRAM => string ('TBLASTN', 'BLASTP', etc.).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 See Also : L<_set_data()|_set_data>, B<Bio::Root::Object::new()>, B<Bio::Tools::Blast::Sbjct::_set_hsps()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 sub _initialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my( $self, %param ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 $self->SUPER::_initialize( %param );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 # The gapped and program booleans may be needed after the HSP object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 # is built.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 # $self->{'_gapped'} = $param{-GAPPED} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 $self->{'_prog'} = $param{-PROGRAM} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 $self->_set_data( @{$param{-DATA}} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 ## ACCESSORS ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 =head2 _set_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 Usage : n/a; called automatically during object construction.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 Purpose : Sets the query sequence, sbjct sequence, and the "match" data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 : which consists of the symbols between the query and sbjct lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 : in the alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 Argument : Array (all lines from a single, complete HSP, one line per element)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 Throws : Propagates any exceptions from the methods called ("See Also")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 See Also : L<_set_seq()|_set_seq>, L<_set_residues()|_set_residues>, L<_set_score_stats()|_set_score_stats>, L<_set_match_stats()|_set_match_stats>, L<_initialize()|_initialize>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 sub _set_data {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 #--------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 my @data = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 my @queryList = (); # 'Query' = SEQUENCE USED TO QUERY THE DATABASE.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 my @sbjctList = (); # 'Sbjct' = HOMOLOGOUS SEQUENCE FOUND IN THE DATABASE.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 my @matchList = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 my $matchLine = 0; # Alternating boolean: when true, load 'match' data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 my @linedat = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 $DEBUG and print STDERR "$ID: set_data()\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 my($line, $aln_row_len, $length_diff);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 $length_diff = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 # Collecting data for all lines in the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 # and then storing the collections for possible processing later.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 # Note that "match" lines may not be properly padded with spaces.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 # This loop now properly handles such cases:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 # Query: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVIXXXXX 1200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 # PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 # Sbjct: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVILSLKL 1200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 foreach $line( @data ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 next if $line =~ /^\s*$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 if( $line =~ /^ ?Score/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $self->_set_score_stats( $line );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 } elsif( $line =~ /^ ?(Identities|Positives|Strand)/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 $self->_set_match_stats( $line );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 } elsif( $line =~ /^ ?Frame = ([\d+-]+)/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 # Version 2.0.8 has Frame information on a separate line.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 $self->{'_frame'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 } elsif( $line =~ /^(Query:?[\s\d]+)([^\s\d]+)/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 push @queryList, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 $self->{'_match_indent'} = CORE::length $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 $aln_row_len = (CORE::length $1) + (CORE::length $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 $matchLine = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 } elsif( $matchLine ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 # Pad the match line with spaces if necessary.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 $length_diff = $aln_row_len - CORE::length $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $length_diff and $line .= ' 'x $length_diff;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 push @matchList, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $matchLine = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 } elsif( $line =~ /^Sbjct/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 push @sbjctList, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 # Storing the query and sbjct lists in case they are needed later.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 # We could make this conditional to save memory.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 $self->{'_queryList'} = \@queryList;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 $self->{'_sbjctList'} = \@sbjctList;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 # Storing the match list in case it is needed later.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 $self->{'_matchList'} = \@matchList;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 if(not defined ($self->{'_numIdentical'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 $self->throw("Can't parse match statistics.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 "Possibly a new or unrecognized Blast format.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 if(!scalar @queryList or !scalar @sbjctList) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 $self->throw("Can't find query or sbjct alignment lines.",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 "Possibly unrecognized Blast format.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 =head2 _set_score_stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 Usage : n/a; called automatically by _set_data()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 Purpose : Sets various score statistics obtained from the HSP listing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 Argument : String with any of the following formats:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 : blast2: Score = 30.1 bits (66), Expect = 9.2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 : blast2: Score = 158.2 bits (544), Expect(2) = e-110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 : blast1: Score = 410 (144.3 bits), Expect = 1.7e-40, P = 1.7e-40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 : blast1: Score = 55 (19.4 bits), Expect = 5.3, Sum P(3) = 0.99
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 Throws : Exception if the stats cannot be parsed, probably due to a change
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 : in the Blast report format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 See Also : L<_set_data()|_set_data>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 sub _set_score_stats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 my ($self, $data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 my ($expect, $p);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 if($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect = +([\d.e+-]+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 # blast2 format n = 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 $self->{'_bits'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 $self->{'_score'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 $expect = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 } elsif($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect\((\d+)\) = +([\d.e+-]+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 # blast2 format n > 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 $self->{'_bits'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 $self->{'_score'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 $self->{'_n'} = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 $expect = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), P = +([\d.e-]+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 # blast1 format, n = 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 $self->{'_score'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 $self->{'_bits'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $expect = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 $p = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), +Sum P\((\d+)\) = +([\d.e-]+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 # blast1 format, n > 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 $self->{'_score'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 $self->{'_bits'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 $expect = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 $self->{'_n'} = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 $p = $5;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 $self->throw("Can't parse score statistics: unrecognized format.", "$data");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 $expect = "1$expect" if $expect =~ /^e/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 $p = "1$p" if defined $p and $p=~ /^e/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 $self->{'_expect'} = $expect;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 $self->{'_p'} = $p || undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 =head2 _set_match_stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 Usage : n/a; called automatically by _set_data()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 Purpose : Sets various matching statistics obtained from the HSP listing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 Argument : blast2: Identities = 23/74 (31%), Positives = 29/74 (39%), Gaps = 17/74 (22%)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 : blast2: Identities = 57/98 (58%), Positives = 74/98 (75%)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%), Frame = -3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 : WU-blast: Identities = 310/553 (56%), Positives = 310/553 (56%), Strand = Minus / Plus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 Throws : Exception if the stats cannot be parsed, probably due to a change
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 : in the Blast report format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 Comments : The "Gaps = " data in the HSP header has a different meaning depending
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 : on the type of Blast: for BLASTP, this number is the total number of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 : gaps in query+sbjct; for TBLASTN, it is the number of gaps in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 : query sequence only. Thus, it is safer to collect the data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 : separately by examining the actual sequence strings as is done
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 : in _set_seq().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 See Also : L<_set_data()|_set_data>, L<_set_seq()|_set_seq>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 sub _set_match_stats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 my ($self, $data) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 if($data =~ m!Identities = (\d+)/(\d+)!) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 # blast1 or 2 format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 $self->{'_numIdentical'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 $self->{'_totalLength'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 if($data =~ m!Positives = (\d+)/(\d+)!) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 # blast1 or 2 format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 $self->{'_numConserved'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 $self->{'_totalLength'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 if($data =~ m!Frame = ([\d+-]+)!) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 $self->{'_frame'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 # Strand data is not always present in this line.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 # _set_seq() will also set strand information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 if($data =~ m!Strand = (\w+) / (\w+)!) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 $self->{'_queryStrand'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 $self->{'_sbjctStrand'} = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 # if($data =~ m!Gaps = (\d+)/(\d+)!) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 # $self->{'_totalGaps'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 # } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 # $self->{'_totalGaps'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 =head2 _set_seq_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 Usage : n/a; called automatically when sequence data is requested.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 Purpose : Sets the HSP sequence data for both query and sbjct sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 : Includes: start, stop, length, gaps, and raw sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 Throws : Propagates any exception thrown by _set_match_seq()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 Comments : Uses raw data stored by _set_data() during object construction.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 : These data are not always needed, so it is conditionally
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 : executed only upon demand by methods such as gaps(), _set_residues(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 : etc. _set_seq() does the dirty work.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 See Also : L<_set_seq()|_set_seq>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 sub _set_seq_data {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 $self->_set_seq('query', @{$self->{'_queryList'}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 $self->_set_seq('sbjct', @{$self->{'_sbjctList'}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 # Liberate some memory.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 @{$self->{'_queryList'}} = @{$self->{'_sbjctList'}} = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 undef $self->{'_queryList'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 undef $self->{'_sbjctList'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 $self->{'_set_seq_data'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 =head2 _set_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 Usage : n/a; called automatically by _set_seq_data()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 : $hsp_obj->($seq_type, @data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 Purpose : Sets sequence information for both the query and sbjct sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 : Directly counts the number of gaps in each sequence (if gapped Blast).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 Argument : $seq_type = 'query' or 'sbjct'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 : @data = all seq lines with the form:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 : Query: 61 SPHNVKDRKEQNGSINNAISPTATANTSGSQQINIDSALRDRSSNVAAQPSLSDASSGSN 120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 Throws : Exception if data strings cannot be parsed, probably due to a change
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 : in the Blast report format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 Comments : Uses first argument to determine which data members to set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 : making this method sensitive data member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 Warning : Sequence endpoints are normalized so that start < end. This affects HSPs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 : for TBLASTN/X hits on the minus strand. Normalization facilitates use
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 : of range information by methods such as match().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 See Also : L<_set_seq_data()|_set_seq_data>, L<matches()|matches>, L<range()|range>, L<start()|start>, L<end()|end>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 sub _set_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 #-------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 my $seqType = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 my @data = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 my @ranges = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 my @sequence = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 my $numGaps = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 foreach( @data ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 if( m/(\d+) *(\D+) *(\d+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 push @ranges, ( $1, $3 ) ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 push @sequence, $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 $self->warn("Bad sequence data: $_");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 (scalar(@sequence) and scalar(@ranges)) || $self->throw("Can't set sequence: missing data",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 "Possibly unrecognized Blast format.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 # Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 $self->{$seqType.'Start'} = $ranges[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 $self->{$seqType.'Stop'} = $ranges[ $#ranges ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 $self->{$seqType.'Seq'} = \@sequence;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 $self->{$seqType.'Length'} = abs($ranges[ $#ranges ] - $ranges[0]) + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 # Adjust lengths for BLASTX, TBLASTN, TBLASTX sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 # Converting nucl coords to amino acid coords.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 my $prog = $self->{'_prog'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 if($prog eq 'TBLASTN' and $seqType eq '_sbjct') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 $self->{$seqType.'Length'} /= 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 } elsif($prog eq 'BLASTX' and $seqType eq '_query') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 $self->{$seqType.'Length'} /= 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 } elsif($prog eq 'TBLASTX') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 $self->{$seqType.'Length'} /= 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 $self->{$seqType.'Strand'} = 'Plus' if $prog =~ /BLAST[NX]/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 # Normalize sequence endpoints so that start < end.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 # Reverse complement or 'minus strand' HSPs get flipped here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 if($self->{$seqType.'Start'} > $self->{$seqType.'Stop'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 ($self->{$seqType.'Start'}, $self->{$seqType.'Stop'}) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 ($self->{$seqType.'Stop'}, $self->{$seqType.'Start'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 $self->{$seqType.'Strand'} = 'Minus';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 ## Count number of gaps in each seq. Only need to do this for gapped Blasts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 # if($self->{'_gapped'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 my $seqstr = join('', @sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 $seqstr =~ s/\s//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 my $num_gaps = CORE::length($seqstr) - $self->{$seqType.'Length'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 $self->{$seqType.'Gaps'} = $num_gaps if $num_gaps > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 =head2 _set_residues
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 Usage : n/a; called automatically when residue data is requested.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 Purpose : Sets the residue numbers representing the identical and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 : conserved positions. These data are obtained by analyzing the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 : symbols between query and sbjct lines of the alignments.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 Throws : Propagates any exception thrown by _set_seq_data() and _set_match_seq().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 Comments : These data are not always needed, so it is conditionally
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 : executed only upon demand by methods such as seq_inds().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 See Also : L<_set_seq_data()|_set_seq_data>, L<_set_match_seq()|_set_match_seq>, L<seq_inds()|seq_inds>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 sub _set_residues {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 my @sequence = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 # Using hashes to avoid saving duplicate residue numbers.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 my %identicalList_query = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 my %identicalList_sbjct = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 my %conservedList_query = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 my %conservedList_sbjct = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 my $aref = $self->_set_match_seq() if not ref $self->{'_matchSeq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 $aref ||= $self->{'_matchSeq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 my $seqString = join('', @$aref );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 my $qseq = join('',@{$self->{'_querySeq'}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 my $sseq = join('',@{$self->{'_sbjctSeq'}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 my $resCount_query = $self->{'_queryStop'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 my $resCount_sbjct = $self->{'_sbjctStop'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 my $prog = $self->{'_prog'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 if($prog !~ /^BLASTP|^BLASTN/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 if($prog eq 'TBLASTN') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 $resCount_sbjct /= 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 } elsif($prog eq 'BLASTX') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 $resCount_query /= 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 } elsif($prog eq 'TBLASTX') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 $resCount_query /= 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 $resCount_sbjct /= 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 my ($mchar, $schar, $qchar);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 while( $mchar = chop($seqString) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 ($qchar, $schar) = (chop($qseq), chop($sseq));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 if( $mchar eq '+' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 $conservedList_query{ $resCount_query } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 $conservedList_sbjct{ $resCount_sbjct } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 } elsif( $mchar ne ' ' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 $identicalList_query{ $resCount_query } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 $identicalList_sbjct{ $resCount_sbjct } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 $resCount_query-- if $qchar ne $GAP_SYMBOL;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 $resCount_sbjct-- if $schar ne $GAP_SYMBOL;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 $self->{'_identicalRes_query'} = \%identicalList_query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 $self->{'_conservedRes_query'} = \%conservedList_query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 =head2 _set_match_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 Usage : n/a. Internal method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 : $hsp_obj->_set_match_seq()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 Purpose : Set the 'match' sequence for the current HSP (symbols in between
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 : the query and sbjct lines.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 Returns : Array reference holding the match sequences lines.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 Throws : Exception if the _matchList field is not set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 Comments : The match information is not always necessary. This method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 : allows it to be conditionally prepared.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 : Called by _set_residues>() and seq_str().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 See Also : L<_set_residues()|_set_residues>, L<seq_str()|seq_str>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 sub _set_match_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 ## DEBUGGING CODE:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 # if($self->parent->name eq '1AK5_' and $self->parent->parent->name eq 'YAR073W') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 # print "\n_set_match_seq() called for HSP ", $self->name, " of hit ${\$self->parent->name} in query ${\$self->parent->parent->name}"; <STDIN>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 ref($self->{'_matchList'}) || $self->throw("Can't set HSP match sequence: No data");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 my @data = @{$self->{'_matchList'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 my(@sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 foreach( @data ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 chomp($_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 ## Remove leading spaces; (note: aln may begin with a space
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 ## which is why we can't use s/^ +//).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 s/^ {$self->{'_match_indent'}}//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 push @sequence, $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 # Liberate some memory.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 @{$self->{'_matchList'}} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 $self->{'_matchList'} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 $self->{'_matchSeq'} = \@sequence;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 ## DEBUGGING CODE:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 # if($self->parent->name eq '1AK5_' and $self->parent->parent->name eq 'YAR073W') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 # print "RETURNING: $self->{'_matchSeq'}:\n @{$self->{'_matchSeq'}}";<STDIN>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 $self->{'_matchSeq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 =head2 score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 Usage : $hsp_obj->score()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 Purpose : Get the Blast score for the HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 Returns : Integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 See Also : L<bits()|bits>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 sub score { my $self = shift; $self->{'_score'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 =head2 bits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 Usage : $hsp_obj->bits()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 Purpose : Get the Blast score in bits for the HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 Returns : Float
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 See Also : L<score()|score>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 #--------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 sub bits { my $self = shift; $self->{'_bits'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 #--------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 =head2 n
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 Usage : $hsp_obj->n()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 Purpose : Get the N value (num HSPs on which P/Expect is based).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 : This value is not defined with NCBI Blast2 with gapping.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 Returns : Integer or null string if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 Comments : The 'N' value is listed in parenthesis with P/Expect value:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 : e.g., P(3) = 1.2e-30 ---> (N = 3).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 : Not defined in NCBI Blast2 with gaps.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 : This typically is equal to the number of HSPs but not always.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 : To obtain the number of HSPs, use Bio::Tools::Blast::Sbjct::num_hsps().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 See Also : L<score()|score>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 #-----
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 sub n { my $self = shift; $self->{'_n'} || ''; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 #-----
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 =head2 frame
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 Usage : $hsp_obj->frame()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 Purpose : Get the reading frame number (-/+ 1, 2, 3) (TBLASTN/X only).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 Returns : Integer or null string if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 sub frame { my $self = shift; $self->{'_frame'} || ''; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 =head2 signif()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 Usage : $hsp_obj->signif()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 Purpose : Get the P-value or Expect value for the HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 Returns : Float (0.001 or 1.3e-43)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 : Returns P-value if it is defined, otherwise, Expect value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 Comments : Provided for consistency with Sbjct::signif()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 : Support for returning the significance data in different
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 : formats (e.g., exponent only), is not provided for HSP objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 : This is only available for the Sbjct or Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 See Also : L<p()|p>, L<expect()|expect>, B<Bio::Tools::Blast::Sbjct::signif()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 sub signif {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 my $val ||= defined($self->{'_p'}) ? $self->{'_p'} : $self->{'_expect'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 =head2 expect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 Usage : $hsp_obj->expect()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 Purpose : Get the Expect value for the HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 Returns : Float (0.001 or 1.3e-43)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 Comments : Support for returning the expectation data in different
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 : formats (e.g., exponent only), is not provided for HSP objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 : This is only available for the Sbjct or Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 See Also : L<p()|p>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 sub expect { my $self = shift; $self->{'_expect'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 =head2 p
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 Usage : $hsp_obj->p()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 Purpose : Get the P-value for the HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 Returns : Float (0.001 or 1.3e-43) or undef if not defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 Comments : P-value is not defined with NCBI Blast2 reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 : Support for returning the expectation data in different
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 : formats (e.g., exponent only) is not provided for HSP objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 : This is only available for the Sbjct or Blast object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 See Also : L<expect()|expect>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 #-----
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 sub p { my $self = shift; $self->{'_p'}; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 #-----
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 =head2 length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 Usage : $hsp->length( [seq_type] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 Purpose : Get the length of the aligned portion of the query or sbjct.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 Example : $hsp->length('query')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 Argument : seq_type: 'query' | 'sbjct' | 'total' (default = 'total')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 Comments : 'total' length is the full length of the alignment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 : as reported in the denominators in the alignment section:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 : "Identical = 34/120 Positives = 67/120".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 : Developer note: when using the built-in length function within
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 : this module, call it as CORE::length().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 See Also : L<gaps()|gaps>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 sub length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 my( $self, $type ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 $type ||= 'total';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 $type ne 'total' and $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 ## Sensitive to member name format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 $type = "_\L$type\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 $self->{$type.'Length'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 =head2 gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 Usage : $hsp->gaps( [seq_type] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 Purpose : Get the number of gaps in the query, sbjct, or total alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 : Also can return query gaps and sbjct gaps as a two-element list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 : when in array context.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 Example : $total_gaps = $hsp->gaps();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 : ($qgaps, $sgaps) = $hsp->gaps();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 : $qgaps = $hsp->gaps('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 Returns : scalar context: integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 : array context without args: (int, int) = ('queryGaps', 'sbjctGaps')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 Argument : seq_type: 'query' | 'sbjct' | 'total'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 : (default = 'total', scalar context)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 : Array context can be "induced" by providing an argument of 'list' or 'array'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 See Also : L<length()|length>, L<matches()|matches>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 sub gaps {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 #---------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 my( $self, $seqType ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 $seqType ||= (wantarray ? 'list' : 'total');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 if($seqType =~ /list|array/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 return (($self->{'_queryGaps'} || 0), ($self->{'_sbjctGaps'} || 0));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 if($seqType eq 'total') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 return ($self->{'_queryGaps'} + $self->{'_sbjctGaps'}) || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 ## Sensitive to member name format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 return $self->{$seqType.'Gaps'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 =head2 matches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 Usage : $hsp->matches([seq_type], [start], [stop]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 Purpose : Get the total number of identical and conservative matches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 : in the query or sbjct sequence for the given HSP. Optionally can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 : report data within a defined interval along the seq.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 : (Note: 'conservative' matches are called 'positives' in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 : Blast report.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 Example : ($id,$cons) = $hsp_object->matches('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 : ($id,$cons) = $hsp_object->matches('query',300,400);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 Returns : 2-element array of integers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 Argument : (1) seq_type = 'query' | 'sbjct' (default = query)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 : (2) start = Starting coordinate (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 : (3) stop = Ending coordinate (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 Throws : Exception if the supplied coordinates are out of range.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 Comments : Relies on seq_str('match') to get the string of alignment symbols
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 : between the query and sbjct lines which are used for determining
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 : the number of identical and conservative matches.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 See Also : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, B<Bio::Tools::Blast::Sbjct::_adjust_contigs()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 sub matches {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 my( $self, %param ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 my(@data);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 my($seqType, $beg, $end) = ($param{-SEQ}, $param{-START}, $param{-STOP});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 $seqType ||= 'query';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 if(!defined $beg && !defined $end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 ## Get data for the whole alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935 push @data, ($self->{'_numIdentical'}, $self->{'_numConserved'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 ## Get the substring representing the desired sub-section of aln.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 $beg ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 $end ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 my($start,$stop) = $self->range($seqType);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 if($beg == 0) { $beg = $start; $end = $beg+$end; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 elsif($end == 0) { $end = $stop; $beg = $end-$beg; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944 if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 else { $end += 1;} ##ML moved from commented position below, makes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 ##more sense here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 # if($end > $stop) { $end = $stop; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 if($beg < $start) { $beg = $start; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 # else { $end += 1;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 # my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 ## ML: START fix for substr out of range error ------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 my $seq = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955 if (($self->{'_prog'} eq 'TBLASTN') and ($seqType eq 'sbjct'))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 $seq = substr($self->seq_str('match'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 int(($beg-$start)/3), int(($end-$beg+1)/3));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 } elsif (($self->{'_prog'} eq 'BLASTX') and ($seqType eq 'query'))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 $seq = substr($self->seq_str('match'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 int(($beg-$start)/3), int(($end-$beg+1)/3));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 $seq = substr($self->seq_str('match'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 $beg-$start, ($end-$beg));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 ## ML: End of fix for substr out of range error -----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 ## ML: debugging code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 ## This is where we get our exception. Try printing out the values going
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 ## into this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 # print STDERR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 # qq(*------------MY EXCEPTION --------------------\nSeq: ") ,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 # $self->seq_str("$seqType"), qq("\n),$self->name,",( index:";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 # print STDERR $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 # CORE::length $self->seq_str("$seqType");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 # print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 # ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 ## ML: END DEBUGGING CODE----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 if(!CORE::length $seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 ## Get data for a substring.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 # printf "Collecting HSP subsection data: beg,end = %d,%d; start,stop = %d,%d\n%s<---\n", $beg, $end, $start, $stop, $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 # printf "Original match seq:\n%s\n",$self->seq_str('match');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 $seq =~ s/ //g; # remove space (no info).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 my $len_cons = CORE::length $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 $seq =~ s/\+//g; # remove '+' characters (conservative substitutions)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 my $len_id = CORE::length $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 push @data, ($len_id, $len_cons);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 # printf " HSP = %s\n id = %d; cons = %d\n", $self->name, $len_id, $len_cons; <STDIN>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 @data;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 =head2 frac_identical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 Usage : $hsp_object->frac_identical( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 Purpose : Get the fraction of identical positions within the given HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 Example : $frac_iden = $hsp_object->frac_identical('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 Returns : Float (2-decimal precision, e.g., 0.75).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 Argument : seq_type: 'query' | 'sbjct' | 'total'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 : default = 'total' (but see comments below).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 Comments : Different versions of Blast report different values for the total
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 : length of the alignment. This is the number reported in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 : denominators in the stats section:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 : "Identical = 34/120 Positives = 67/120".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 : BLAST-GP uses the total length of the alignment (with gaps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 : WU-BLAST uses the length of the query sequence (without gaps).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 : Therefore, when called without an argument or an argument of 'total',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018 : this method will report different values depending on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 : version of BLAST used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 : To get the fraction identical among only the aligned residues,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 : ignoring the gaps, call this method with an argument of 'query'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 : or 'sbjct'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 See Also : L<frac_conserved()|frac_conserved>, L<num_identical()|num_identical>, L<matches()|matches>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 sub frac_identical {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 # The value is calculated as opposed to storing it from the parsed results.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 # This saves storage and also permits flexibility in determining for which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 # sequence (query or sbjct) the figure is to be calculated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 my( $self, $seqType ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 $seqType ||= 'total';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 if($seqType ne 'total') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 ## Sensitive to member name format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 sprintf( "%.2f", $self->{'_numIdentical'}/$self->{$seqType.'Length'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 =head2 frac_conserved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 Usage : $hsp_object->frac_conserved( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 Purpose : Get the fraction of conserved positions within the given HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 : (Note: 'conservative' positions are called 'positives' in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 : Blast report.)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 Example : $frac_cons = $hsp_object->frac_conserved('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056 Returns : Float (2-decimal precision, e.g., 0.75).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 Argument : seq_type: 'query' | 'sbjct'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 : default = 'total' (but see comments below).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 Comments : Different versions of Blast report different values for the total
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 : length of the alignment. This is the number reported in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 : denominators in the stats section:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 : "Identical = 34/120 Positives = 67/120".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 : BLAST-GP uses the total length of the alignment (with gaps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 : WU-BLAST uses the length of the query sequence (without gaps).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 : Therefore, when called without an argument or an argument of 'total',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 : this method will report different values depending on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 : version of BLAST used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 : To get the fraction conserved among only the aligned residues,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 : ignoring the gaps, call this method with an argument of 'query'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 : or 'sbjct'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 See Also : L<frac_conserved()|frac_conserved>, L<num_conserved()|num_conserved>, L<matches()|matches>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 sub frac_conserved {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 # The value is calculated as opposed to storing it from the parsed results.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 # This saves storage and also permits flexibility in determining for which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 # sequence (query or sbjct) the figure is to be calculated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 my( $self, $seqType ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 $seqType ||= 'total';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088 if($seqType ne 'total') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 ## Sensitive to member name format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 sprintf( "%.2f", $self->{'_numConserved'}/$self->{$seqType.'Length'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099 =head2 num_identical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 Usage : $hsp_object->num_identical();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 Purpose : Get the number of identical positions within the given HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 Example : $num_iden = $hsp_object->num_identical();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 See Also : L<num_conserved()|num_conserved>, L<frac_identical()|frac_identical>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 sub num_identical {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 my( $self) = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 $self->{'_numIdentical'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 =head2 num_conserved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 Usage : $hsp_object->num_conserved();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 Purpose : Get the number of conserved positions within the given HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 Example : $num_iden = $hsp_object->num_conserved();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 Returns : integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 See Also : L<num_identical()|num_identical>, L<frac_conserved()|frac_conserved>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135 sub num_conserved {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 #-------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 my( $self) = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 $self->{'_numConserved'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 =head2 range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 Usage : $hsp->range( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 : in the HSP alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 Example : ($qbeg, $qend) = $hsp->range('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150 : ($sbeg, $send) = $hsp->range('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 Returns : Two-element array of integers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 Argument : seq_type = string, 'query' or 'sbjct' (default = 'query')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 : (case insensitive).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 See Also : L<start()|start>, L<end()|end>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 sub range {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 my ($self, $seqType) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 $seqType ||= 'query';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 ## Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 return ($self->{$seqType.'Start'},$self->{$seqType.'Stop'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 =head2 start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 Usage : $hsp->start( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177 Purpose : Gets the start coordinate for the query, sbjct, or both sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 : in the HSP alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179 Example : $qbeg = $hsp->start('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 : $sbeg = $hsp->start('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 : ($qbeg, $sbeg) = $hsp->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 Returns : scalar context: integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 : array context without args: list of two integers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 Argument : In scalar context: seq_type = 'query' or 'sbjct'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 : (case insensitive). If not supplied, 'query' is used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 : Array context can be "induced" by providing an argument of 'list' or 'array'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 See Also : L<end()|end>, L<range()|range>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 sub start {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 my ($self, $seqType) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 $seqType ||= (wantarray ? 'list' : 'query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 if($seqType =~ /list|array/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205 ## Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 return $self->{$seqType.'Start'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 =head2 end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213 Usage : $hsp->end( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 Purpose : Gets the end coordinate for the query, sbjct, or both sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215 : in the HSP alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 Example : $qbeg = $hsp->end('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 : $sbeg = $hsp->end('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 : ($qbeg, $sbeg) = $hsp->end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 Returns : scalar context: integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 : array context without args: list of two integers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 Argument : In scalar context: seq_type = 'query' or 'sbjct'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 : (case insensitive). If not supplied, 'query' is used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 : Array context can be "induced" by providing an argument of 'list' or 'array'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 See Also : L<start()|start>, L<range()|range>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 sub end {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232 #----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 my ($self, $seqType) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 $seqType ||= (wantarray ? 'list' : 'query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239 if($seqType =~ /list|array/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242 ## Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 return $self->{$seqType.'Stop'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 =head2 strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 Usage : $hsp_object->strand( [seq_type] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 Purpose : Get the strand of the query or sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254 Example : print $hsp->strand('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 : ($qstrand, $sstrand) = $hsp->strand();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256 Returns : -1, 0, or 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 : -1 = Minus strand, +1 = Plus strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258 : Returns 0 if strand is not defined, which occurs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 : for non-TBLASTN/X reports.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260 : In scalar context without arguments, returns queryStrand value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261 : In array context without arguments, returns a two-element list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 : of strings (queryStrand, sbjctStrand).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 : Array context can be "induced" by providing an argument of 'list' or 'array'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264 Argument : seq_type: 'query' | 'sbjct' or undef
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267 See Also : L<_set_seq()|_set_seq>, L<_set_match_stats()|_set_match_stats>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272 sub strand {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274 my( $self, $seqType ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 $seqType ||= (wantarray ? 'list' : 'query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 return '' if $seqType eq 'query' and $self->{'_prog'} eq 'TBLASTN';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279 ## Sensitive to member name format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282 # $seqType could be '_list'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 $self->{'_queryStrand'} or $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285 if($seqType =~ /list|array/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 return ('','') unless defined $self->{'_queryStrand'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 return ($self->{'_queryStrand'}, $self->{'_sbjctStrand'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289 local $^W = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 $STRAND_SYMBOL{$self->{$seqType.'Strand'}} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295 ## INSTANCE METHODS ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299 =head2 seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 Usage : $hsp->seq( [seq_type] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 Purpose : Get the query or sbjct sequence as a Bio::Seq.pm object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303 Example : $seqObj = $hsp->seq('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304 Returns : Object reference for a Bio::Seq.pm object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 Argument : seq_type = 'query' or 'sbjct' (default = 'query').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 Throws : Propagates any exception that occurs during construction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307 : of the Bio::Seq.pm object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308 Comments : The sequence is returned in an array of strings corresponding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309 : to the strings in the original format of the Blast alignment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310 : (i.e., same spacing).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312 See Also : L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, B<Bio::Seq.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 #-------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 sub seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318 #-------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 my($self,$seqType) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 $seqType ||= 'query';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 my $str = $self->seq_str($seqType);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322 my $num = $self->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323 my $name = $seqType =~ /query/i
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324 ? $self->parent->parent->name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 : $self->parent->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327 require Bio::Seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 new Bio::Seq (-ID => $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 -SEQ => $str,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331 -DESC => "Blast HSP #$num, $seqType sequence",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 =head2 seq_str
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 Usage : $hsp->seq_str( seq_type );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340 Purpose : Get the full query, sbjct, or 'match' sequence as a string.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341 : The 'match' sequence is the string of symbols in between the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342 : query and sbjct sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343 Example : $str = $hsp->seq_str('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344 Returns : String
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345 Argument : seq_Type = 'query' or 'sbjct' or 'match'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 Throws : Exception if the argument does not match an accepted seq_type.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347 Comments : Calls _set_residues() to set the 'match' sequence if it has
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 : not been set already.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350 See Also : L<seq()|seq>, L<seq_inds()|seq_inds>, L<_set_match_seq()|_set_match_seq>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355 sub seq_str {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357 my($self,$seqType) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 ## Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360 $seqType = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 if($seqType =~ /sbjct|query/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 my $seq = join('',@{$self->{$seqType.'Seq'}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 $seq =~ s/\s+//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 return $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369 } elsif( $seqType =~ /match/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370 # Only need to call _set_match_seq() if the match seq is requested.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 my $aref = $self->_set_match_seq() unless ref $self->{'_matchSeq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372 $aref = $self->{'_matchSeq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 ## DEBUGGING CODE:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375 # if($self->parent->name eq '1AK5_' and $self->parent->parent->name eq 'YAR073W') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376 # print "seq_str():\n @$aref";<STDIN>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379 return join('',@$aref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382 $self->throw("Invalid or undefined sequence type: $seqType",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383 "Valid types: query, sbjct, match");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390 =head2 seq_inds
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392 Usage : $hsp->seq_inds( seq_type, class, collapse );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 Purpose : Get a list of residue positions (indices) for all identical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394 : or conserved residues in the query or sbjct sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 Example : @ind = $hsp->seq_inds('query', 'identical');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396 : @ind = $hsp->seq_inds('sbjct', 'conserved');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397 : @ind = $hsp->seq_inds('sbjct', 'conserved', 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 Returns : List of integers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399 : May include ranges if collapse is true.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400 Argument : seq_type = 'query' or 'sbjct' (default = query)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401 : class = 'identical' or 'conserved' (default = identical)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402 : (can be shortened to 'id' or 'cons')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 : (actually, anything not 'id' will evaluate to 'conserved').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 : collapse = boolean, if true, consecutive positions are merged
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406 : collapses to "1-5 7 9-11". This is useful for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407 : consolidating long lists. Default = no collapse.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408 Throws : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409 Comments : Calls _set_residues() to set the 'match' sequence if it has
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410 : not been set already.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412 See Also : L<seq()|seq>, L<_set_residues()|_set_residues>, L<collapse_nums()|collapse_nums>, B<Bio::Tools::Blast::Sbjct::seq_inds()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417 sub seq_inds {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419 my ($self, $seq, $class, $collapse) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421 $seq ||= 'query';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422 $class ||= 'identical';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423 $collapse ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425 $self->_set_residues() unless defined $self->{'_identicalRes_query'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427 $seq = ($seq !~ /^q/i ? 'sbjct' : 'query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428 $class = ($class !~ /^id/i ? 'conserved' : 'identical');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 ## Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431 $seq = "_\L$seq\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432 $class = "_\L$class\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434 my @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seq"}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 return $collapse ? &collapse_nums(@ary) : @ary;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442 =head2 get_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 Usage : $hsp->get_aln()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445 Purpose : Get a Bio::UnivAln.pm object constructed from the query + sbjct
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446 : sequences of the present HSP object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447 Example : $aln_obj = $hsp->get_aln();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448 Returns : Object reference for a Bio::UnivAln.pm object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449 Argument : n/a.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450 Throws : Propagates any exception ocurring during the construction of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451 : the Bio::UnivAln object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452 Comments : Requires Bio::UnivAln.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 : The Bio::UnivAln.pm object is constructed from the query + sbjct
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454 : sequence objects obtained by calling seq().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 : Gap residues are included (see $GAP_SYMBOL). It is important that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456 : Bio::UnivAln.pm recognizes the gaps correctly. A strategy for doing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457 : this is being considered. Currently it is hard-wired.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459 See Also : L<seq()|seq>, B<Bio::UnivAln.pm>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464 sub get_aln {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465 #------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468 require Bio::UnivAln;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470 my $qseq = $self->seq('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471 my $sseq = $self->seq('sbjct');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 my $desc = sprintf "HSP #%s of query %s vs. sbjct %s",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474 $self->name, $self->parent->parent->name, $self->parent->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476 my $type = $self->{'_prog'} =~ /P$|^T/ ? 'amino' : 'dna';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478 Bio::UnivAln->new( -seqs => [$qseq, $sseq],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479 -desc => $desc,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480 -type => $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485 =head2 display
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487 Usage : $sbjct_object->display( %named_parameters );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 Purpose : Display information about Bio::Tools::Blast::Sbjct.pm data members
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 : including: length, gaps, score, significance value,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490 : sequences and sequence indices.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491 Example : $object->display(-SHOW=>'stats');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 Argument : Named parameters: (TAGS CAN BE UPPER OR LOWER CASE)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493 : -SHOW => 'hsp',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494 : -WHERE => filehandle (default = STDOUT)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495 Returns : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497 Comments : For more control over the display of sequence data,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 : use seq(), seq_str(), seq_inds().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500 See Also : L<_display_seq()|_display_seq>, L<seq()|seq>, L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, L<_display_matches()|_display_matches>, B<Bio::Root::Object::display()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 sub display {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506 #-----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507 my( $self, %param ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509 my $sbjctName = $self->parent->name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510 my $queryName = $self->parent->parent->name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511 my $layout = $self->parent->parent->_layout();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513 my $OUT = $self->set_display(%param);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515 printf( $OUT "%-15s: %d\n", "LENGTH TOTAL", $self->length('total') );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516 printf( $OUT "%-15s: %d\n", "LENGTH QUERY", $self->length('query') );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517 printf( $OUT "%-15s: %d\n", "LENGTH SBJCT", $self->length('sbjct') );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518 printf( $OUT "%-15s: %d\n", "GAPS QUERY", $self->gaps('query') );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519 printf( $OUT "%-15s: %d\n", "GAPS SBJCT", $self->gaps('sbjct') );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520 printf( $OUT "%-15s: %d\n", "SCORE", $self->{'_score'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521 printf( $OUT "%-15s: %0.1f\n", "BITS", $self->{'_bits'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522 if($layout == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523 printf( $OUT "%-15s: %.1e\n", "P-VAL", $self->{'_p'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524 printf( $OUT "%-15s: %.1e\n", "EXPECT", $self->{'_expect'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526 printf( $OUT "%-15s: %.1e\n", "EXPECT", $self->{'_expect'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529 my $queryLength = $self->length('query');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531 printf( $OUT "%-15s: %d (%0.0f%%)\n", "IDENTICAL", $self->{'_numIdentical'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532 $self->{'_numIdentical'}/$queryLength * 100 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 printf( $OUT "%-15s: %d (%0.0f%%) %s \n", "CONSERVED", $self->{'_numConserved'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534 $self->{'_numConserved'}/$queryLength * 100,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535 "includes identical" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537 $self->_display_seq('query', $queryName, $OUT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538 $self->_display_seq('sbjct', $sbjctName, $OUT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539 $self->_display_matches($queryName, $sbjctName, $OUT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545 =head2 _display_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 Usage : n/a; called automatically by display()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548 Purpose : Display information about query and sbjct HSP sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549 : Prints the start, stop coordinates and the actual sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550 Example : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552 Returns : printf call.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554 Comments : For more control, use seq(), seq_str(), or seq_inds().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556 See Also : L<display()|display>, L<seq()|seq>, L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, L<_display_matches()|_display_matches>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561 sub _display_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 my( $self, $seqType, $name, $OUT ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567 # Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568 my $mem = "_\L$seqType\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569 printf( $OUT "\n%10s: %s\n%10s %s\n", "\U$seqType\E", "$name", "-----",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570 ('-'x ((CORE::length $name) + 2)) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571 printf( $OUT "%13s: %d\n", "START", $self->{$mem.'Start'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572 printf( $OUT "%13s: %d\n", "STOP", $self->{$mem.'Stop'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573 printf( $OUT "%13s: \n", "SEQ" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574 foreach( @{ $self->{$mem.'Seq'}} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575 printf( $OUT "%15s%s\n", "", $_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580 =head2 _display_matches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582 Usage : n/a; called automatically by display()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 Purpose : Display information about identical and conserved positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584 : within both the query and sbjct sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585 Example : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586 Argument :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587 Returns : printf call.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 Comments : For more control, use seq_inds().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591 See Also : L<display()|display>, L<seq_inds()|seq_inds>, L<_display_seq()|_display_seq>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596 sub _display_matches {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597 #--------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598 my( $self, $queryName, $sbjctName, $OUT) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599 my($resNum, $count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601 $self->_set_residues() unless defined $self->{'_identicalRes_query'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603 printf( $OUT "\n%10s: \n%10s\n", "HITS", "-----" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604 foreach( @{ $self->{'_matchSeq'}} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605 printf( $OUT "%15s%s\n", "", $_ );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608 print $OUT "\n\U$queryName\E\n------------\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "", "IDENTICAL RESIDUES IN $queryName (n=$self->{'_numIdentical'})",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610 "", "--------------------------------------------" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611 $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612 foreach $resNum ( sort keys %{ $self->{'_identicalRes_query' }} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614 print $OUT "$resNum";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618 print $OUT "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620 my $justConserved = ($self->{'_numConserved'})-($self->{'_numIdentical'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "","CONSERVED RESIDUES IN $queryName (n=$justConserved)",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622 "", "--------------------------------------------" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623 $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624 foreach $resNum ( sort keys %{ $self->{'_conservedRes_query' }} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626 print $OUT "$resNum";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631 print $OUT "\n\n\U$sbjctName\E\n------------\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "", "IDENTICAL RESIDUES IN $sbjctName (n=$self->{'_numIdentical'})",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633 "", "--------------------------------------------" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634 $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 foreach $resNum ( sort keys %{ $self->{'_identicalRes_sbjct' }} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637 print $OUT "$resNum";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641 print $OUT "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642 $justConserved = ($self->{'_numConserved'})-($self->{'_numIdentical'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643 printf( $OUT "\n%5s%s:\n%5s%s\n\t", "","CONSERVED RESIDUES IN $sbjctName (n=$justConserved)",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644 "", "--------------------------------------------" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645 $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646 foreach $resNum ( sort keys %{ $self->{'_conservedRes_sbjct' }} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 print $OUT "$resNum";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649 $count > 0 and print $OUT +( $count % 15 ? ", " : "\n\t");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656 =head2 homol_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658 Usage : $data = $hsp_object->homo_data( %named_params );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659 Purpose : Gets similarity data for a single HSP.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 Returns : String:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661 : "Homology data" for each HSP is in the format:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662 : "<integer> <start> <stop>"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663 : where integer is the value returned by homol_score().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664 Argument : Named params: (UPPER OR LOWERCASE TAGS)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665 : currently just one param is used:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 : -SEQ =>'query' or 'sbjct'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669 Comments : This is a very experimental method used for obtaining a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670 : coarse indication of:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671 : 1) how strong the similarity is between the sequences in the HSP,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672 : 3) the endpoints of the alignment (sequence monomer numbers)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674 See Also : L<homol_score()|homol_score>, B<Bio::Tools::Blast.::homol_data()>, B<Bio::Tools::Blast::Sbjct::homol_data()>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679 sub homol_data {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680 #---------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681 my ($self, %param) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682 my $seq = $param{-SEQ} || $param{'-seq'} || 'sbjct'; # 'query' or 'sbjct'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683 my $homolScore = $self->homol_score();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684 # Sensitive to member name changes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685 $seq = "_\L$seq\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687 $self->_set_seq_data() unless $self->{'_set_seq_data'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688 return ( $homolScore.' '.$self->{$seq.'Start'}.' '.$self->{$seq.'Stop'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 =head2 homol_score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 Usage : $self->homol_score();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695 Purpose : Get a homology score (integer 1 - 3) as a coarse representation of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696 : the strength of the similarity independent of sequence composition.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697 : Based on the Blast bit score.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698 Example : $hscore = $hsp->homol_score();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699 Returns : Integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700 Argument : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 Status : Experimental
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703 Comments : See @Bio::Tools::Blast::HSP::SCORE_CUTOFFS for the specific values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704 : Currently, BIT_SCORE HOMOL_SCORE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 : --------- -----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706 : >=100 --> 3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707 : 30-100 --> 2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708 : < 30 --> 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 See Also : L<homol_data()|homol_data>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715 sub homol_score {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716 #----------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719 if( $self->{'_bits'} >= $SCORE_CUTOFFS[0] ) { 1 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720 elsif($self->{'_bits'} < $SCORE_CUTOFFS[0] and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721 $self->{'_bits'} >= $SCORE_CUTOFFS[1] ) { 2 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722 else { 3 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727 ## CLASS METHODS ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 =head1 CLASS METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732 =head2 collapse_nums
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734 Usage : @cnums = collapse_nums( @numbers );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735 Purpose : Collapses a list of numbers into a set of ranges of consecutive terms:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 : Useful for condensing long lists of consecutive numbers.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737 : EXPANDED:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738 : 1 2 3 4 5 6 10 12 13 14 15 17 18 20 21 22 24 26 30 31 32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739 : COLLAPSED:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740 : 1-6 10 12-15 17 18 20-22 24 26 30-32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741 Argument : List of numbers and sorted numerically.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742 Returns : List of numbers mixed with ranges of numbers (see above).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 Throws : n/a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744 Comments : Probably belongs in a more general utility class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746 See Also : L<seq_inds()|seq_inds>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1750 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1751 sub collapse_nums {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1752 #------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1753 # This is not the slickest connectivity algorithm, but will do for now.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1754 my @a = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1755 my ($from, $to, $i, @ca, $consec);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1757 $consec = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1758 for($i=0; $i < @a; $i++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1759 not $from and do{ $from = $a[$i]; next; };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1760 if($a[$i] == $a[$i-1]+1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1761 $to = $a[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1762 $consec++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1763 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1764 if($consec == 1) { $from .= ",$to"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1765 else { $from .= $consec>1 ? "\-$to" : ""; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1766 push @ca, split(',', $from);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1767 $from = $a[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1768 $consec = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1769 $to = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1770 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1771 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1772 if(defined $to) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1773 if($consec == 1) { $from .= ",$to"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1774 else { $from .= $consec>1 ? "\-$to" : ""; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1775 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1776 push @ca, split(',', $from) if $from;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1777
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1778 @ca;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1779 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1781
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1782 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1783 __END__
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1785 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1786 # END OF CLASS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1787 #####################################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1788
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1789 =head1 FOR DEVELOPERS ONLY
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1790
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1791 =head2 Data Members
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1792
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1793 Information about the various data members of this module is provided for those
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1794 wishing to modify or understand the code. Two things to bear in mind:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1796 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1797
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1798 =item 1 Do NOT rely on these in any code outside of this module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1799
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1800 All data members are prefixed with an underscore to signify that they are private.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1801 Always use accessor methods. If the accessor doesn't exist or is inadequate,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1802 create or modify an accessor (and let me know, too!).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1804 =item 2 This documentation may be incomplete and out of date.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1805
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1806 It is easy for these data member descriptions to become obsolete as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1807 this module is still evolving. Always double check this info and search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1808 for members not described here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1809
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1810 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1811
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1812 An instance of Bio::Tools::Blast::HSP.pm is a blessed reference to a hash containing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1813 all or some of the following fields:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1815 FIELD VALUE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1816 --------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1817 (member names are mostly self-explanatory)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1818
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1819 _score :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1820 _bits :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1821 _p :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1822 _n : Integer. The 'N' value listed in parenthesis with P/Expect value:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1823 : e.g., P(3) = 1.2e-30 ---> (N = 3).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1824 : Not defined in NCBI Blast2 with gaps.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1825 : To obtain the number of HSPs, use Bio::Tools::Blast::Sbjct::num_hsps().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1826 _expect :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1827 _queryLength :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1828 _queryGaps :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1829 _queryStart :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1830 _queryStop :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1831 _querySeq :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1832 _sbjctLength :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1833 _sbjctGaps :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1834 _sbjctStart :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1835 _sbjctStop :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1836 _sbjctSeq :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1837 _matchSeq : String. Contains the symbols between the query and sbjct lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1838 which indicate identical (letter) and conserved ('+') matches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1839 or a mismatch (' ').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1840 _numIdentical :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1841 _numConserved :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1842 _identicalRes_query :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1843 _identicalRes_sbjct :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1844 _conservedRes_query :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1845 _conservedRes_sbjct :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1846 _match_indent : The number of leading space characters on each line containing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1847 the match symbols. _match_indent is 13 in this example:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1848 Query: 285 QNSAPWGLARISHRERLNLGSFNKYLYDDDAG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1849 Q +APWGLARIS G+ + Y YD+ AG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1850 ^^^^^^^^^^^^^
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1851
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1852 INHERITED DATA MEMBERS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1853
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1854 _name : From Bio::Root::Object.pm.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1855 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1856 _parent : From Bio::Root::Object.pm. This member contains a reference to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1857 : Bio::Tools::Blast::Sbjct.pm object to which this hit belongs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1858
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1859
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1860 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1862 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1863