0
|
1 #-----------------------------------------------------------------
|
|
2 # $Id: BlastHitFactory.pm,v 1.7 2002/10/22 09:38:09 sac Exp $
|
|
3 #
|
|
4 # BioPerl module for Bio::Factory::BlastHitFactory
|
|
5 #
|
|
6 # Cared for by Steve Chervitz <sac@bioperl.org>
|
|
7 #
|
|
8 # You may distribute this module under the same terms as perl itself
|
|
9 #-----------------------------------------------------------------
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Factory::BlastHitFactory - Factory for Bio::Search::Hit::BlastHit objects
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Factory::BlastHitFactory;
|
|
20
|
|
21 my $hit_fact = Bio::Factory::BlastHitFactory->new();
|
|
22
|
|
23 my $hit = $hit_fact->create_hit( %parameters );
|
|
24
|
|
25 See documentation for create_hit() for information about C<%parameters>.
|
|
26
|
|
27 =head1 DESCRIPTION
|
|
28
|
|
29 This module encapsulates code for creating Bio::Search::Hit::BlastHit
|
|
30 and Bio::Search::HSP::BlastHSP objects from traditional BLAST report
|
|
31 data (i.e., non-XML formatted).
|
|
32
|
|
33 =head1 FEEDBACK
|
|
34
|
|
35 =head2 Mailing Lists
|
|
36
|
|
37 User feedback is an integral part of the evolution of this
|
|
38 and other Bioperl modules. Send your comments and suggestions preferably
|
|
39 to one of the Bioperl mailing lists.
|
|
40 Your participation is much appreciated.
|
|
41
|
|
42 bioperl-l@bioperl.org - General discussion
|
|
43 http://bioperl.org/MailList.html - About the mailing lists
|
|
44
|
|
45 =head2 Reporting Bugs
|
|
46
|
|
47 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
48 the bugs and their resolution. Bug reports can be submitted via email
|
|
49 or the web:
|
|
50
|
|
51 bioperl-bugs@bio.perl.org
|
|
52 http://bugzilla.bioperl.org/
|
|
53
|
|
54 =head1 AUTHOR
|
|
55
|
|
56 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
|
|
57
|
|
58 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
|
|
59
|
|
60 =head1 COPYRIGHT
|
|
61
|
|
62 Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
|
|
63
|
|
64 =head1 DISCLAIMER
|
|
65
|
|
66 This software is provided "as is" without warranty of any kind.
|
|
67
|
|
68 =head1 APPENDIX
|
|
69
|
|
70 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
71
|
|
72 =cut
|
|
73
|
|
74 #'
|
|
75
|
|
76 package Bio::Factory::BlastHitFactory;
|
|
77
|
|
78 use strict;
|
|
79 use Bio::Root::Root;
|
|
80 use Bio::Factory::HitFactoryI;
|
|
81 use Bio::Search::Hit::BlastHit;
|
|
82
|
|
83 use vars qw(@ISA);
|
|
84
|
|
85 @ISA = qw(Bio::Root::Root Bio::Factory::HitFactoryI);
|
|
86
|
|
87 sub new {
|
|
88 my ($class, @args) = @_;
|
|
89 my $self = $class->SUPER::new(@args);
|
|
90 return $self;
|
|
91 }
|
|
92
|
|
93 =head2 create_hit
|
|
94
|
|
95 Title : create_hit
|
|
96 Usage : $hit = $factory->create_hit( %params );
|
|
97 Function: Creates a new Bio::Search::Hit::BlastHit object given
|
|
98 raw BLAST report data, formatted in traditional BLAST report format.
|
|
99 Returns : A single Bio::Search::Hit::BlastHit object
|
|
100 Args : Named parameters to be passed to the BlastHit object.
|
|
101 Parameter keys are case-insensitive.
|
|
102 See Bio::Search::Hit::BlastHit::new() documentation for
|
|
103 details about these parameters.
|
|
104 The only additional parameter required is:
|
|
105 -RESULT => a Bio::Search::Result::BlastResult object.
|
|
106 From this result object, the program, query length,
|
|
107 and iteration are obtained and passed on to the BlastHit.
|
|
108
|
|
109 =cut
|
|
110
|
|
111 sub create_hit {
|
|
112 my ($self, @args) = @_;
|
|
113
|
|
114 my ($blast, $raw_data, $shallow_parse) =
|
|
115 $self->_rearrange( [qw(RESULT
|
|
116 RAW_DATA
|
|
117 SHALLOW_PARSE)], @args);
|
|
118
|
|
119 my %args = @args;
|
|
120 $args{'-PROGRAM'} = $blast->analysis_method;
|
|
121 $args{'-QUERY_LEN'} = $blast->query_length;
|
|
122 $args{'-ITERATION'} = $blast->iterations;
|
|
123
|
|
124 my $hit = Bio::Search::Hit::BlastHit->new( %args );
|
|
125
|
|
126 unless( $shallow_parse ) {
|
|
127 $self->_add_hsps( $hit,
|
|
128 $args{'-PROGRAM'},
|
|
129 $args{'-QUERY_LEN'},
|
|
130 $blast->query_name,
|
|
131 @{$raw_data} );
|
|
132 }
|
|
133
|
|
134 return $hit;
|
|
135 }
|
|
136
|
|
137 #=head2 _add_hsps
|
|
138 #
|
|
139 # Usage : Private method; called automatically by create_hit().
|
|
140 # Purpose : Creates BlastHSP.pm objects for each HSP in a BLAST hit alignment.
|
|
141 # : Also collects the full description of the hit from the
|
|
142 # : HSP alignment section.
|
|
143 # Returns : n/a
|
|
144 # Argument : (<$BlastHit_object>, <$program_name>, <$query_length>, <$query_name>, <@raw_data>
|
|
145 # 'raw data list' consists of traditional BLAST report
|
|
146 # format for a single HSP, supplied as a list of strings.
|
|
147 # Throws : Warnings for each BlastHSP.pm object that fails to be constructed.
|
|
148 # : Exception if no BlastHSP.pm objects can be constructed.
|
|
149 # : Exception if can't parse length data for hit sequence.
|
|
150 # Comments : Requires Bio::Search::HSP::BlastHSP.pm.
|
|
151 # : Sets the description using the full string present in
|
|
152 # : the alignment data.
|
|
153 #=cut
|
|
154
|
|
155 #--------------
|
|
156 sub _add_hsps {
|
|
157 #--------------
|
|
158 my( $self, $hit, $prog, $qlen, $qname, @data ) = @_;
|
|
159 my $start = 0;
|
|
160 my $hspCount = 0;
|
|
161
|
|
162 require Bio::Search::HSP::BlastHSP;
|
|
163
|
|
164 # printf STDERR "\nBlastHit \"$hit\" _process_hsps(). \nDATA (%d lines) =\n@data\n", scalar(@data);
|
|
165
|
|
166 my( @hspData, @hspList, @errs, @bad_names );
|
|
167 my($line, $set_desc, @desc);
|
|
168 $set_desc = 0;
|
|
169 my $hname = $hit->name;
|
|
170 my $hlen;
|
|
171
|
|
172 hit_loop:
|
|
173 foreach $line( @data ) {
|
|
174
|
|
175 if( $line =~ /^\s*Length = ([\d,]+)/ ) {
|
|
176 $hit->_set_description(@desc);
|
|
177 $set_desc = 1;
|
|
178 $hit->_set_length($1);
|
|
179 $hlen = $hit->length;
|
|
180 next hit_loop;
|
|
181 } elsif( !$set_desc) {
|
|
182 $line =~ s/^\s+|\s+$//g;
|
|
183 push @desc, $line;
|
|
184 next hit_loop;
|
|
185 } elsif( $line =~ /^\s*Score/ ) {
|
|
186 ## This block is for setting multiple HSPs.
|
|
187
|
|
188 if( not scalar @hspData ) {
|
|
189 $start = 1;
|
|
190 push @hspData, $line;
|
|
191 next hit_loop;
|
|
192
|
|
193 } elsif( scalar @hspData) {
|
|
194 $hspCount++;
|
|
195 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
|
|
196
|
|
197 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData\n";
|
|
198 my $hspObj = Bio::Search::HSP::BlastHSP->new
|
|
199 (-RAW_DATA => \@hspData,
|
|
200 -RANK => $hspCount,
|
|
201 -PROGRAM => $prog,
|
|
202 -QUERY_NAME => $qname,
|
|
203 -HIT_NAME => $hname,
|
|
204 );
|
|
205 push @hspList, $hspObj;
|
|
206 @hspData = ();
|
|
207 push @hspData, $line;
|
|
208 next;
|
|
209 } else {
|
|
210 push @hspData, $line;
|
|
211 }
|
|
212 } elsif( $start ) {
|
|
213 ## This block is for setting the last HSP (which may be the first as well!).
|
|
214 if( $line =~ /^(end|>|Parameters|CPU|Database:)/ ) {
|
|
215 $hspCount++;
|
|
216 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
|
|
217
|
|
218 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData";
|
|
219
|
|
220 my $hspObj = Bio::Search::HSP::BlastHSP->new
|
|
221 (-RAW_DATA => \@hspData,
|
|
222 -RANK => $hspCount,
|
|
223 -PROGRAM => $prog,
|
|
224 -QUERY_NAME => $qname,
|
|
225 -HIT_NAME => $hname,
|
|
226 );
|
|
227 push @hspList, $hspObj;
|
|
228 } else {
|
|
229 push @hspData, $line;
|
|
230 }
|
|
231 }
|
|
232 }
|
|
233
|
|
234 $hit->{'_length'} or $self->throw( "Can't determine hit sequence length.");
|
|
235
|
|
236 # Adjust logical length based on BLAST flavor.
|
|
237 if($prog =~ /TBLAST[NX]/) {
|
|
238 $hit->{'_logical_length'} = $hit->{'_length'} / 3;
|
|
239 }
|
|
240
|
|
241 $hit->{'_hsps'} = [ @hspList ];
|
|
242
|
|
243 # print STDERR "\n--------> Done building HSPs for $hit (total HSPS: ${\$hit->num_hsps})\n";
|
|
244
|
|
245 }
|
|
246
|
|
247
|
|
248
|
|
249 1;
|