Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Factory/BlastHitFactory.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 #----------------------------------------------------------------- | |
2 # $Id: BlastHitFactory.pm,v 1.7 2002/10/22 09:38:09 sac Exp $ | |
3 # | |
4 # BioPerl module for Bio::Factory::BlastHitFactory | |
5 # | |
6 # Cared for by Steve Chervitz <sac@bioperl.org> | |
7 # | |
8 # You may distribute this module under the same terms as perl itself | |
9 #----------------------------------------------------------------- | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Factory::BlastHitFactory - Factory for Bio::Search::Hit::BlastHit objects | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Factory::BlastHitFactory; | |
20 | |
21 my $hit_fact = Bio::Factory::BlastHitFactory->new(); | |
22 | |
23 my $hit = $hit_fact->create_hit( %parameters ); | |
24 | |
25 See documentation for create_hit() for information about C<%parameters>. | |
26 | |
27 =head1 DESCRIPTION | |
28 | |
29 This module encapsulates code for creating Bio::Search::Hit::BlastHit | |
30 and Bio::Search::HSP::BlastHSP objects from traditional BLAST report | |
31 data (i.e., non-XML formatted). | |
32 | |
33 =head1 FEEDBACK | |
34 | |
35 =head2 Mailing Lists | |
36 | |
37 User feedback is an integral part of the evolution of this | |
38 and other Bioperl modules. Send your comments and suggestions preferably | |
39 to one of the Bioperl mailing lists. | |
40 Your participation is much appreciated. | |
41 | |
42 bioperl-l@bioperl.org - General discussion | |
43 http://bioperl.org/MailList.html - About the mailing lists | |
44 | |
45 =head2 Reporting Bugs | |
46 | |
47 Report bugs to the Bioperl bug tracking system to help us keep track | |
48 the bugs and their resolution. Bug reports can be submitted via email | |
49 or the web: | |
50 | |
51 bioperl-bugs@bio.perl.org | |
52 http://bugzilla.bioperl.org/ | |
53 | |
54 =head1 AUTHOR | |
55 | |
56 Steve Chervitz E<lt>sac@bioperl.orgE<gt> | |
57 | |
58 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments. | |
59 | |
60 =head1 COPYRIGHT | |
61 | |
62 Copyright (c) 2001 Steve Chervitz. All Rights Reserved. | |
63 | |
64 =head1 DISCLAIMER | |
65 | |
66 This software is provided "as is" without warranty of any kind. | |
67 | |
68 =head1 APPENDIX | |
69 | |
70 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
71 | |
72 =cut | |
73 | |
74 #' | |
75 | |
76 package Bio::Factory::BlastHitFactory; | |
77 | |
78 use strict; | |
79 use Bio::Root::Root; | |
80 use Bio::Factory::HitFactoryI; | |
81 use Bio::Search::Hit::BlastHit; | |
82 | |
83 use vars qw(@ISA); | |
84 | |
85 @ISA = qw(Bio::Root::Root Bio::Factory::HitFactoryI); | |
86 | |
87 sub new { | |
88 my ($class, @args) = @_; | |
89 my $self = $class->SUPER::new(@args); | |
90 return $self; | |
91 } | |
92 | |
93 =head2 create_hit | |
94 | |
95 Title : create_hit | |
96 Usage : $hit = $factory->create_hit( %params ); | |
97 Function: Creates a new Bio::Search::Hit::BlastHit object given | |
98 raw BLAST report data, formatted in traditional BLAST report format. | |
99 Returns : A single Bio::Search::Hit::BlastHit object | |
100 Args : Named parameters to be passed to the BlastHit object. | |
101 Parameter keys are case-insensitive. | |
102 See Bio::Search::Hit::BlastHit::new() documentation for | |
103 details about these parameters. | |
104 The only additional parameter required is: | |
105 -RESULT => a Bio::Search::Result::BlastResult object. | |
106 From this result object, the program, query length, | |
107 and iteration are obtained and passed on to the BlastHit. | |
108 | |
109 =cut | |
110 | |
111 sub create_hit { | |
112 my ($self, @args) = @_; | |
113 | |
114 my ($blast, $raw_data, $shallow_parse) = | |
115 $self->_rearrange( [qw(RESULT | |
116 RAW_DATA | |
117 SHALLOW_PARSE)], @args); | |
118 | |
119 my %args = @args; | |
120 $args{'-PROGRAM'} = $blast->analysis_method; | |
121 $args{'-QUERY_LEN'} = $blast->query_length; | |
122 $args{'-ITERATION'} = $blast->iterations; | |
123 | |
124 my $hit = Bio::Search::Hit::BlastHit->new( %args ); | |
125 | |
126 unless( $shallow_parse ) { | |
127 $self->_add_hsps( $hit, | |
128 $args{'-PROGRAM'}, | |
129 $args{'-QUERY_LEN'}, | |
130 $blast->query_name, | |
131 @{$raw_data} ); | |
132 } | |
133 | |
134 return $hit; | |
135 } | |
136 | |
137 #=head2 _add_hsps | |
138 # | |
139 # Usage : Private method; called automatically by create_hit(). | |
140 # Purpose : Creates BlastHSP.pm objects for each HSP in a BLAST hit alignment. | |
141 # : Also collects the full description of the hit from the | |
142 # : HSP alignment section. | |
143 # Returns : n/a | |
144 # Argument : (<$BlastHit_object>, <$program_name>, <$query_length>, <$query_name>, <@raw_data> | |
145 # 'raw data list' consists of traditional BLAST report | |
146 # format for a single HSP, supplied as a list of strings. | |
147 # Throws : Warnings for each BlastHSP.pm object that fails to be constructed. | |
148 # : Exception if no BlastHSP.pm objects can be constructed. | |
149 # : Exception if can't parse length data for hit sequence. | |
150 # Comments : Requires Bio::Search::HSP::BlastHSP.pm. | |
151 # : Sets the description using the full string present in | |
152 # : the alignment data. | |
153 #=cut | |
154 | |
155 #-------------- | |
156 sub _add_hsps { | |
157 #-------------- | |
158 my( $self, $hit, $prog, $qlen, $qname, @data ) = @_; | |
159 my $start = 0; | |
160 my $hspCount = 0; | |
161 | |
162 require Bio::Search::HSP::BlastHSP; | |
163 | |
164 # printf STDERR "\nBlastHit \"$hit\" _process_hsps(). \nDATA (%d lines) =\n@data\n", scalar(@data); | |
165 | |
166 my( @hspData, @hspList, @errs, @bad_names ); | |
167 my($line, $set_desc, @desc); | |
168 $set_desc = 0; | |
169 my $hname = $hit->name; | |
170 my $hlen; | |
171 | |
172 hit_loop: | |
173 foreach $line( @data ) { | |
174 | |
175 if( $line =~ /^\s*Length = ([\d,]+)/ ) { | |
176 $hit->_set_description(@desc); | |
177 $set_desc = 1; | |
178 $hit->_set_length($1); | |
179 $hlen = $hit->length; | |
180 next hit_loop; | |
181 } elsif( !$set_desc) { | |
182 $line =~ s/^\s+|\s+$//g; | |
183 push @desc, $line; | |
184 next hit_loop; | |
185 } elsif( $line =~ /^\s*Score/ ) { | |
186 ## This block is for setting multiple HSPs. | |
187 | |
188 if( not scalar @hspData ) { | |
189 $start = 1; | |
190 push @hspData, $line; | |
191 next hit_loop; | |
192 | |
193 } elsif( scalar @hspData) { | |
194 $hspCount++; | |
195 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); }; | |
196 | |
197 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData\n"; | |
198 my $hspObj = Bio::Search::HSP::BlastHSP->new | |
199 (-RAW_DATA => \@hspData, | |
200 -RANK => $hspCount, | |
201 -PROGRAM => $prog, | |
202 -QUERY_NAME => $qname, | |
203 -HIT_NAME => $hname, | |
204 ); | |
205 push @hspList, $hspObj; | |
206 @hspData = (); | |
207 push @hspData, $line; | |
208 next; | |
209 } else { | |
210 push @hspData, $line; | |
211 } | |
212 } elsif( $start ) { | |
213 ## This block is for setting the last HSP (which may be the first as well!). | |
214 if( $line =~ /^(end|>|Parameters|CPU|Database:)/ ) { | |
215 $hspCount++; | |
216 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); }; | |
217 | |
218 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData"; | |
219 | |
220 my $hspObj = Bio::Search::HSP::BlastHSP->new | |
221 (-RAW_DATA => \@hspData, | |
222 -RANK => $hspCount, | |
223 -PROGRAM => $prog, | |
224 -QUERY_NAME => $qname, | |
225 -HIT_NAME => $hname, | |
226 ); | |
227 push @hspList, $hspObj; | |
228 } else { | |
229 push @hspData, $line; | |
230 } | |
231 } | |
232 } | |
233 | |
234 $hit->{'_length'} or $self->throw( "Can't determine hit sequence length."); | |
235 | |
236 # Adjust logical length based on BLAST flavor. | |
237 if($prog =~ /TBLAST[NX]/) { | |
238 $hit->{'_logical_length'} = $hit->{'_length'} / 3; | |
239 } | |
240 | |
241 $hit->{'_hsps'} = [ @hspList ]; | |
242 | |
243 # print STDERR "\n--------> Done building HSPs for $hit (total HSPS: ${\$hit->num_hsps})\n"; | |
244 | |
245 } | |
246 | |
247 | |
248 | |
249 1; |