Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Factory/BlastHitFactory.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 #----------------------------------------------------------------- | |
| 2 # $Id: BlastHitFactory.pm,v 1.7 2002/10/22 09:38:09 sac Exp $ | |
| 3 # | |
| 4 # BioPerl module for Bio::Factory::BlastHitFactory | |
| 5 # | |
| 6 # Cared for by Steve Chervitz <sac@bioperl.org> | |
| 7 # | |
| 8 # You may distribute this module under the same terms as perl itself | |
| 9 #----------------------------------------------------------------- | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Factory::BlastHitFactory - Factory for Bio::Search::Hit::BlastHit objects | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::Factory::BlastHitFactory; | |
| 20 | |
| 21 my $hit_fact = Bio::Factory::BlastHitFactory->new(); | |
| 22 | |
| 23 my $hit = $hit_fact->create_hit( %parameters ); | |
| 24 | |
| 25 See documentation for create_hit() for information about C<%parameters>. | |
| 26 | |
| 27 =head1 DESCRIPTION | |
| 28 | |
| 29 This module encapsulates code for creating Bio::Search::Hit::BlastHit | |
| 30 and Bio::Search::HSP::BlastHSP objects from traditional BLAST report | |
| 31 data (i.e., non-XML formatted). | |
| 32 | |
| 33 =head1 FEEDBACK | |
| 34 | |
| 35 =head2 Mailing Lists | |
| 36 | |
| 37 User feedback is an integral part of the evolution of this | |
| 38 and other Bioperl modules. Send your comments and suggestions preferably | |
| 39 to one of the Bioperl mailing lists. | |
| 40 Your participation is much appreciated. | |
| 41 | |
| 42 bioperl-l@bioperl.org - General discussion | |
| 43 http://bioperl.org/MailList.html - About the mailing lists | |
| 44 | |
| 45 =head2 Reporting Bugs | |
| 46 | |
| 47 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 48 the bugs and their resolution. Bug reports can be submitted via email | |
| 49 or the web: | |
| 50 | |
| 51 bioperl-bugs@bio.perl.org | |
| 52 http://bugzilla.bioperl.org/ | |
| 53 | |
| 54 =head1 AUTHOR | |
| 55 | |
| 56 Steve Chervitz E<lt>sac@bioperl.orgE<gt> | |
| 57 | |
| 58 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments. | |
| 59 | |
| 60 =head1 COPYRIGHT | |
| 61 | |
| 62 Copyright (c) 2001 Steve Chervitz. All Rights Reserved. | |
| 63 | |
| 64 =head1 DISCLAIMER | |
| 65 | |
| 66 This software is provided "as is" without warranty of any kind. | |
| 67 | |
| 68 =head1 APPENDIX | |
| 69 | |
| 70 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
| 71 | |
| 72 =cut | |
| 73 | |
| 74 #' | |
| 75 | |
| 76 package Bio::Factory::BlastHitFactory; | |
| 77 | |
| 78 use strict; | |
| 79 use Bio::Root::Root; | |
| 80 use Bio::Factory::HitFactoryI; | |
| 81 use Bio::Search::Hit::BlastHit; | |
| 82 | |
| 83 use vars qw(@ISA); | |
| 84 | |
| 85 @ISA = qw(Bio::Root::Root Bio::Factory::HitFactoryI); | |
| 86 | |
| 87 sub new { | |
| 88 my ($class, @args) = @_; | |
| 89 my $self = $class->SUPER::new(@args); | |
| 90 return $self; | |
| 91 } | |
| 92 | |
| 93 =head2 create_hit | |
| 94 | |
| 95 Title : create_hit | |
| 96 Usage : $hit = $factory->create_hit( %params ); | |
| 97 Function: Creates a new Bio::Search::Hit::BlastHit object given | |
| 98 raw BLAST report data, formatted in traditional BLAST report format. | |
| 99 Returns : A single Bio::Search::Hit::BlastHit object | |
| 100 Args : Named parameters to be passed to the BlastHit object. | |
| 101 Parameter keys are case-insensitive. | |
| 102 See Bio::Search::Hit::BlastHit::new() documentation for | |
| 103 details about these parameters. | |
| 104 The only additional parameter required is: | |
| 105 -RESULT => a Bio::Search::Result::BlastResult object. | |
| 106 From this result object, the program, query length, | |
| 107 and iteration are obtained and passed on to the BlastHit. | |
| 108 | |
| 109 =cut | |
| 110 | |
| 111 sub create_hit { | |
| 112 my ($self, @args) = @_; | |
| 113 | |
| 114 my ($blast, $raw_data, $shallow_parse) = | |
| 115 $self->_rearrange( [qw(RESULT | |
| 116 RAW_DATA | |
| 117 SHALLOW_PARSE)], @args); | |
| 118 | |
| 119 my %args = @args; | |
| 120 $args{'-PROGRAM'} = $blast->analysis_method; | |
| 121 $args{'-QUERY_LEN'} = $blast->query_length; | |
| 122 $args{'-ITERATION'} = $blast->iterations; | |
| 123 | |
| 124 my $hit = Bio::Search::Hit::BlastHit->new( %args ); | |
| 125 | |
| 126 unless( $shallow_parse ) { | |
| 127 $self->_add_hsps( $hit, | |
| 128 $args{'-PROGRAM'}, | |
| 129 $args{'-QUERY_LEN'}, | |
| 130 $blast->query_name, | |
| 131 @{$raw_data} ); | |
| 132 } | |
| 133 | |
| 134 return $hit; | |
| 135 } | |
| 136 | |
| 137 #=head2 _add_hsps | |
| 138 # | |
| 139 # Usage : Private method; called automatically by create_hit(). | |
| 140 # Purpose : Creates BlastHSP.pm objects for each HSP in a BLAST hit alignment. | |
| 141 # : Also collects the full description of the hit from the | |
| 142 # : HSP alignment section. | |
| 143 # Returns : n/a | |
| 144 # Argument : (<$BlastHit_object>, <$program_name>, <$query_length>, <$query_name>, <@raw_data> | |
| 145 # 'raw data list' consists of traditional BLAST report | |
| 146 # format for a single HSP, supplied as a list of strings. | |
| 147 # Throws : Warnings for each BlastHSP.pm object that fails to be constructed. | |
| 148 # : Exception if no BlastHSP.pm objects can be constructed. | |
| 149 # : Exception if can't parse length data for hit sequence. | |
| 150 # Comments : Requires Bio::Search::HSP::BlastHSP.pm. | |
| 151 # : Sets the description using the full string present in | |
| 152 # : the alignment data. | |
| 153 #=cut | |
| 154 | |
| 155 #-------------- | |
| 156 sub _add_hsps { | |
| 157 #-------------- | |
| 158 my( $self, $hit, $prog, $qlen, $qname, @data ) = @_; | |
| 159 my $start = 0; | |
| 160 my $hspCount = 0; | |
| 161 | |
| 162 require Bio::Search::HSP::BlastHSP; | |
| 163 | |
| 164 # printf STDERR "\nBlastHit \"$hit\" _process_hsps(). \nDATA (%d lines) =\n@data\n", scalar(@data); | |
| 165 | |
| 166 my( @hspData, @hspList, @errs, @bad_names ); | |
| 167 my($line, $set_desc, @desc); | |
| 168 $set_desc = 0; | |
| 169 my $hname = $hit->name; | |
| 170 my $hlen; | |
| 171 | |
| 172 hit_loop: | |
| 173 foreach $line( @data ) { | |
| 174 | |
| 175 if( $line =~ /^\s*Length = ([\d,]+)/ ) { | |
| 176 $hit->_set_description(@desc); | |
| 177 $set_desc = 1; | |
| 178 $hit->_set_length($1); | |
| 179 $hlen = $hit->length; | |
| 180 next hit_loop; | |
| 181 } elsif( !$set_desc) { | |
| 182 $line =~ s/^\s+|\s+$//g; | |
| 183 push @desc, $line; | |
| 184 next hit_loop; | |
| 185 } elsif( $line =~ /^\s*Score/ ) { | |
| 186 ## This block is for setting multiple HSPs. | |
| 187 | |
| 188 if( not scalar @hspData ) { | |
| 189 $start = 1; | |
| 190 push @hspData, $line; | |
| 191 next hit_loop; | |
| 192 | |
| 193 } elsif( scalar @hspData) { | |
| 194 $hspCount++; | |
| 195 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); }; | |
| 196 | |
| 197 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData\n"; | |
| 198 my $hspObj = Bio::Search::HSP::BlastHSP->new | |
| 199 (-RAW_DATA => \@hspData, | |
| 200 -RANK => $hspCount, | |
| 201 -PROGRAM => $prog, | |
| 202 -QUERY_NAME => $qname, | |
| 203 -HIT_NAME => $hname, | |
| 204 ); | |
| 205 push @hspList, $hspObj; | |
| 206 @hspData = (); | |
| 207 push @hspData, $line; | |
| 208 next; | |
| 209 } else { | |
| 210 push @hspData, $line; | |
| 211 } | |
| 212 } elsif( $start ) { | |
| 213 ## This block is for setting the last HSP (which may be the first as well!). | |
| 214 if( $line =~ /^(end|>|Parameters|CPU|Database:)/ ) { | |
| 215 $hspCount++; | |
| 216 $self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); }; | |
| 217 | |
| 218 # print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData"; | |
| 219 | |
| 220 my $hspObj = Bio::Search::HSP::BlastHSP->new | |
| 221 (-RAW_DATA => \@hspData, | |
| 222 -RANK => $hspCount, | |
| 223 -PROGRAM => $prog, | |
| 224 -QUERY_NAME => $qname, | |
| 225 -HIT_NAME => $hname, | |
| 226 ); | |
| 227 push @hspList, $hspObj; | |
| 228 } else { | |
| 229 push @hspData, $line; | |
| 230 } | |
| 231 } | |
| 232 } | |
| 233 | |
| 234 $hit->{'_length'} or $self->throw( "Can't determine hit sequence length."); | |
| 235 | |
| 236 # Adjust logical length based on BLAST flavor. | |
| 237 if($prog =~ /TBLAST[NX]/) { | |
| 238 $hit->{'_logical_length'} = $hit->{'_length'} / 3; | |
| 239 } | |
| 240 | |
| 241 $hit->{'_hsps'} = [ @hspList ]; | |
| 242 | |
| 243 # print STDERR "\n--------> Done building HSPs for $hit (total HSPS: ${\$hit->num_hsps})\n"; | |
| 244 | |
| 245 } | |
| 246 | |
| 247 | |
| 248 | |
| 249 1; |
