comparison variant_effect_predictor/Bio/Factory/BlastHitFactory.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #-----------------------------------------------------------------
2 # $Id: BlastHitFactory.pm,v 1.7 2002/10/22 09:38:09 sac Exp $
3 #
4 # BioPerl module for Bio::Factory::BlastHitFactory
5 #
6 # Cared for by Steve Chervitz <sac@bioperl.org>
7 #
8 # You may distribute this module under the same terms as perl itself
9 #-----------------------------------------------------------------
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Factory::BlastHitFactory - Factory for Bio::Search::Hit::BlastHit objects
16
17 =head1 SYNOPSIS
18
19 use Bio::Factory::BlastHitFactory;
20
21 my $hit_fact = Bio::Factory::BlastHitFactory->new();
22
23 my $hit = $hit_fact->create_hit( %parameters );
24
25 See documentation for create_hit() for information about C<%parameters>.
26
27 =head1 DESCRIPTION
28
29 This module encapsulates code for creating Bio::Search::Hit::BlastHit
30 and Bio::Search::HSP::BlastHSP objects from traditional BLAST report
31 data (i.e., non-XML formatted).
32
33 =head1 FEEDBACK
34
35 =head2 Mailing Lists
36
37 User feedback is an integral part of the evolution of this
38 and other Bioperl modules. Send your comments and suggestions preferably
39 to one of the Bioperl mailing lists.
40 Your participation is much appreciated.
41
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/MailList.html - About the mailing lists
44
45 =head2 Reporting Bugs
46
47 Report bugs to the Bioperl bug tracking system to help us keep track
48 the bugs and their resolution. Bug reports can be submitted via email
49 or the web:
50
51 bioperl-bugs@bio.perl.org
52 http://bugzilla.bioperl.org/
53
54 =head1 AUTHOR
55
56 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
57
58 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
59
60 =head1 COPYRIGHT
61
62 Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
63
64 =head1 DISCLAIMER
65
66 This software is provided "as is" without warranty of any kind.
67
68 =head1 APPENDIX
69
70 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
71
72 =cut
73
74 #'
75
76 package Bio::Factory::BlastHitFactory;
77
78 use strict;
79 use Bio::Root::Root;
80 use Bio::Factory::HitFactoryI;
81 use Bio::Search::Hit::BlastHit;
82
83 use vars qw(@ISA);
84
85 @ISA = qw(Bio::Root::Root Bio::Factory::HitFactoryI);
86
87 sub new {
88 my ($class, @args) = @_;
89 my $self = $class->SUPER::new(@args);
90 return $self;
91 }
92
93 =head2 create_hit
94
95 Title : create_hit
96 Usage : $hit = $factory->create_hit( %params );
97 Function: Creates a new Bio::Search::Hit::BlastHit object given
98 raw BLAST report data, formatted in traditional BLAST report format.
99 Returns : A single Bio::Search::Hit::BlastHit object
100 Args : Named parameters to be passed to the BlastHit object.
101 Parameter keys are case-insensitive.
102 See Bio::Search::Hit::BlastHit::new() documentation for
103 details about these parameters.
104 The only additional parameter required is:
105 -RESULT => a Bio::Search::Result::BlastResult object.
106 From this result object, the program, query length,
107 and iteration are obtained and passed on to the BlastHit.
108
109 =cut
110
111 sub create_hit {
112 my ($self, @args) = @_;
113
114 my ($blast, $raw_data, $shallow_parse) =
115 $self->_rearrange( [qw(RESULT
116 RAW_DATA
117 SHALLOW_PARSE)], @args);
118
119 my %args = @args;
120 $args{'-PROGRAM'} = $blast->analysis_method;
121 $args{'-QUERY_LEN'} = $blast->query_length;
122 $args{'-ITERATION'} = $blast->iterations;
123
124 my $hit = Bio::Search::Hit::BlastHit->new( %args );
125
126 unless( $shallow_parse ) {
127 $self->_add_hsps( $hit,
128 $args{'-PROGRAM'},
129 $args{'-QUERY_LEN'},
130 $blast->query_name,
131 @{$raw_data} );
132 }
133
134 return $hit;
135 }
136
137 #=head2 _add_hsps
138 #
139 # Usage : Private method; called automatically by create_hit().
140 # Purpose : Creates BlastHSP.pm objects for each HSP in a BLAST hit alignment.
141 # : Also collects the full description of the hit from the
142 # : HSP alignment section.
143 # Returns : n/a
144 # Argument : (<$BlastHit_object>, <$program_name>, <$query_length>, <$query_name>, <@raw_data>
145 # 'raw data list' consists of traditional BLAST report
146 # format for a single HSP, supplied as a list of strings.
147 # Throws : Warnings for each BlastHSP.pm object that fails to be constructed.
148 # : Exception if no BlastHSP.pm objects can be constructed.
149 # : Exception if can't parse length data for hit sequence.
150 # Comments : Requires Bio::Search::HSP::BlastHSP.pm.
151 # : Sets the description using the full string present in
152 # : the alignment data.
153 #=cut
154
155 #--------------
156 sub _add_hsps {
157 #--------------
158 my( $self, $hit, $prog, $qlen, $qname, @data ) = @_;
159 my $start = 0;
160 my $hspCount = 0;
161
162 require Bio::Search::HSP::BlastHSP;
163
164 # printf STDERR "\nBlastHit \"$hit\" _process_hsps(). \nDATA (%d lines) =\n@data\n", scalar(@data);
165
166 my( @hspData, @hspList, @errs, @bad_names );
167 my($line, $set_desc, @desc);
168 $set_desc = 0;
169 my $hname = $hit->name;
170 my $hlen;
171
172 hit_loop:
173 foreach $line( @data ) {
174
175 if( $line =~ /^\s*Length = ([\d,]+)/ ) {
176 $hit->_set_description(@desc);
177 $set_desc = 1;
178 $hit->_set_length($1);
179 $hlen = $hit->length;
180 next hit_loop;
181 } elsif( !$set_desc) {
182 $line =~ s/^\s+|\s+$//g;
183 push @desc, $line;
184 next hit_loop;
185 } elsif( $line =~ /^\s*Score/ ) {
186 ## This block is for setting multiple HSPs.
187
188 if( not scalar @hspData ) {
189 $start = 1;
190 push @hspData, $line;
191 next hit_loop;
192
193 } elsif( scalar @hspData) {
194 $hspCount++;
195 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
196
197 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData\n";
198 my $hspObj = Bio::Search::HSP::BlastHSP->new
199 (-RAW_DATA => \@hspData,
200 -RANK => $hspCount,
201 -PROGRAM => $prog,
202 -QUERY_NAME => $qname,
203 -HIT_NAME => $hname,
204 );
205 push @hspList, $hspObj;
206 @hspData = ();
207 push @hspData, $line;
208 next;
209 } else {
210 push @hspData, $line;
211 }
212 } elsif( $start ) {
213 ## This block is for setting the last HSP (which may be the first as well!).
214 if( $line =~ /^(end|>|Parameters|CPU|Database:)/ ) {
215 $hspCount++;
216 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
217
218 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData";
219
220 my $hspObj = Bio::Search::HSP::BlastHSP->new
221 (-RAW_DATA => \@hspData,
222 -RANK => $hspCount,
223 -PROGRAM => $prog,
224 -QUERY_NAME => $qname,
225 -HIT_NAME => $hname,
226 );
227 push @hspList, $hspObj;
228 } else {
229 push @hspData, $line;
230 }
231 }
232 }
233
234 $hit->{'_length'} or $self->throw( "Can't determine hit sequence length.");
235
236 # Adjust logical length based on BLAST flavor.
237 if($prog =~ /TBLAST[NX]/) {
238 $hit->{'_logical_length'} = $hit->{'_length'} / 3;
239 }
240
241 $hit->{'_hsps'} = [ @hspList ];
242
243 # print STDERR "\n--------> Done building HSPs for $hit (total HSPS: ${\$hit->num_hsps})\n";
244
245 }
246
247
248
249 1;