Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Tools/BPbl2seq.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: BPbl2seq.pm,v 1.21.2.2 2003/06/03 14:38:18 jason Exp $ | |
| 2 # | |
| 3 # Bioperl module Bio::Tools::BPbl2seq | |
| 4 # based closely on the Bio::Tools::BPlite modules | |
| 5 # Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), | |
| 6 # Lorenz Pollak (lorenz@ist.org, bioperl port) | |
| 7 # | |
| 8 # | |
| 9 # Copyright Peter Schattner | |
| 10 # | |
| 11 # You may distribute this module under the same terms as perl itself | |
| 12 # _history | |
| 13 # October 20, 2000 | |
| 14 # May 29, 2001 | |
| 15 # Fixed bug which prevented reading of more than one HSP / hit. | |
| 16 # This fix required changing calling syntax as described below. (PS) | |
| 17 # POD documentation - main docs before the code | |
| 18 | |
| 19 =head1 NAME | |
| 20 | |
| 21 Bio::Tools::BPbl2seq - Lightweight BLAST parser for pair-wise sequence | |
| 22 alignment using the BLAST algorithm. | |
| 23 | |
| 24 =head1 SYNOPSIS | |
| 25 | |
| 26 use Bio::Tools::BPbl2seq; | |
| 27 my $report = Bio::Tools::BPbl2seq->new(-file => 't/bl2seq.out'); | |
| 28 $report->sbjctName; | |
| 29 $report->sbjctLength; | |
| 30 while(my $hsp = $report->next_feature) { | |
| 31 $hsp->score; | |
| 32 $hsp->bits; | |
| 33 $hsp->percent; | |
| 34 $hsp->P; | |
| 35 $hsp->match; | |
| 36 $hsp->positive; | |
| 37 $hsp->length; | |
| 38 $hsp->querySeq; | |
| 39 $hsp->sbjctSeq; | |
| 40 $hsp->homologySeq; | |
| 41 $hsp->query->start; | |
| 42 $hsp->query->end; | |
| 43 $hsp->sbjct->start; | |
| 44 $hsp->sbjct->end; | |
| 45 $hsp->sbjct->seq_id; | |
| 46 $hsp->sbjct->overlaps($exon); | |
| 47 } | |
| 48 | |
| 49 =head1 DESCRIPTION | |
| 50 | |
| 51 BPbl2seq is a package for parsing BLAST bl2seq reports. BLAST bl2seq is a | |
| 52 program for comparing and aligning two sequences using BLAST. Although | |
| 53 the report format is similar to that of a conventional BLAST, there are a | |
| 54 few differences so that BPlite is unable to read bl2seq reports directly. | |
| 55 | |
| 56 From the user's perspective, one difference between bl2seq and | |
| 57 other blast reports is that the bl2seq report does not print out the | |
| 58 name of the first of the two aligned sequences. (The second sequence | |
| 59 name is given in the report as the name of the "hit"). Consequently, | |
| 60 BPbl2seq has no way of identifying the name of the initial sequence | |
| 61 unless it is passed to constructor as a second argument as in: | |
| 62 | |
| 63 my $report = Bio::Tools::BPbl2seq->new(\*FH, "ALEU_HORVU"); | |
| 64 | |
| 65 If the name of the first sequence (the "query") is not passed to | |
| 66 BPbl2seq.pm in this manner, the name of the first sequence will be | |
| 67 left as "unknown". (Note that to preserve a common interface with the | |
| 68 other BLAST programs the two sequences being compared are referred to | |
| 69 in bl2seq as "query" and "subject" although this is perhaps a bit | |
| 70 misleading when simply comparing 2 sequences as opposed to querying a | |
| 71 database.) | |
| 72 | |
| 73 In addition, since there will only be (at most) one "subject" (hit) in | |
| 74 a bl2seq report, one should use the method $report-E<gt>next_feature, | |
| 75 rather than $report-E<gt>nextSbjct-E<gt>nextHSP to obtain the next | |
| 76 high scoring pair. | |
| 77 | |
| 78 One should note that the previous (0.7) version of BPbl2seq used | |
| 79 slightly different syntax. That version had a bug and consequently the | |
| 80 old syntax has been eliminated. Attempts to use the old syntax will | |
| 81 return error messages explaining the (minor) recoding required to use | |
| 82 the current syntax. | |
| 83 | |
| 84 =head1 FEEDBACK | |
| 85 | |
| 86 =head2 Mailing Lists | |
| 87 | |
| 88 User feedback is an integral part of the evolution of this and other | |
| 89 Bioperl modules. Send your comments and suggestions preferably to one | |
| 90 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 91 | |
| 92 bioperl-l@bioperl.org - General discussion | |
| 93 http://bio.perl.org/MailList.html - About the mailing lists | |
| 94 | |
| 95 =head2 Reporting Bugs | |
| 96 | |
| 97 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 98 the bugs and their resolution. Bug reports can be submitted via | |
| 99 email or the web: | |
| 100 | |
| 101 bioperl-bugs@bio.perl.org | |
| 102 http://bugzilla.bioperl.org/ | |
| 103 | |
| 104 =head1 AUTHOR - Peter Schattner | |
| 105 | |
| 106 Email: schattner@alum.mit.edu | |
| 107 | |
| 108 =head1 ACKNOWLEDGEMENTS | |
| 109 | |
| 110 Based on work of: | |
| 111 Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), | |
| 112 Lorenz Pollak (lorenz@ist.org, bioperl port) | |
| 113 | |
| 114 =head1 CONTRIBUTORS | |
| 115 | |
| 116 Jason Stajich, jason@cgt.mc.duke.edu | |
| 117 | |
| 118 =cut | |
| 119 | |
| 120 #' | |
| 121 package Bio::Tools::BPbl2seq; | |
| 122 | |
| 123 use strict; | |
| 124 use vars qw(@ISA); | |
| 125 use Bio::Tools::BPlite; | |
| 126 use Bio::Root::Root; | |
| 127 use Bio::Root::IO; | |
| 128 use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct | |
| 129 use Bio::SeqAnalysisParserI; | |
| 130 use Symbol; | |
| 131 | |
| 132 @ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO); | |
| 133 | |
| 134 #@ISA = qw(Bio::Tools::BPlite); | |
| 135 | |
| 136 =head2 new | |
| 137 | |
| 138 Title : new | |
| 139 Function: Create a new Bio::Tools::BPbl2seq object | |
| 140 Returns : Bio::Tools::BPbl2seq | |
| 141 Args : -file input file (alternative to -fh) | |
| 142 -fh input stream (alternative to -file) | |
| 143 -queryname name of query sequence | |
| 144 | |
| 145 =cut | |
| 146 | |
| 147 sub new { | |
| 148 my ($class, @args) = @_; | |
| 149 my $self = $class->SUPER::new(@args); | |
| 150 # initialize IO | |
| 151 $self->_initialize_io(@args); | |
| 152 | |
| 153 my ($queryname,$rt) = $self->_rearrange([qw(QUERYNAME | |
| 154 REPORT_TYPE)], @args); | |
| 155 $queryname = 'unknown' if( ! defined $queryname ); | |
| 156 if( $rt && $rt =~ /BLAST/i ) { | |
| 157 $self->{'BLAST_TYPE'} = uc($rt); | |
| 158 } else { | |
| 159 $self->warn("Must provide which type of BLAST was run (blastp,blastn, tblastn, tblastx, blastx) if you want strand information to get set properly for DNA query or subjects"); | |
| 160 } | |
| 161 my $sbjct = $self->getSbjct(); | |
| 162 $self->{'_current_sbjct'} = $sbjct; | |
| 163 | |
| 164 $self->{'_query'}->{'NAME'} = $queryname; | |
| 165 return $self; | |
| 166 } | |
| 167 | |
| 168 | |
| 169 =head2 getSbjct | |
| 170 | |
| 171 Title : | |
| 172 Usage : $sbjct = $obj->getSbjct(); | |
| 173 Function : Method of obtaining single "subject" of a bl2seq report | |
| 174 Example : my $sbjct = $obj->getSbjct ) {} | |
| 175 Returns : Sbjct object or null if finished | |
| 176 Args : | |
| 177 | |
| 178 =cut | |
| 179 | |
| 180 sub getSbjct { | |
| 181 my ($self) = @_; | |
| 182 # $self->_fastForward or return undef; | |
| 183 | |
| 184 ####################### | |
| 185 # get bl2seq "sbjct" name and length # | |
| 186 ####################### | |
| 187 my $length; | |
| 188 my $def; | |
| 189 READLOOP: while(defined ($_ = $self->_readline) ) { | |
| 190 if ($_ =~ /^>(.+)$/) { | |
| 191 $def = $1; | |
| 192 next READLOOP; | |
| 193 } | |
| 194 elsif ($_ =~ /^\s*Length\s.+\D(\d+)/i) { | |
| 195 $length = $1; | |
| 196 next READLOOP; | |
| 197 } | |
| 198 elsif ($_ =~ /^\s{0,2}Score/) { | |
| 199 $self->_pushback($_); | |
| 200 last READLOOP; | |
| 201 } | |
| 202 } | |
| 203 return undef if ! defined $def; | |
| 204 $def =~ s/\s+/ /g; | |
| 205 $def =~ s/\s+$//g; | |
| 206 | |
| 207 | |
| 208 #################### | |
| 209 # the Sbjct object # | |
| 210 #################### | |
| 211 my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, | |
| 212 '-length'=>$length, | |
| 213 '-parent'=>$self); | |
| 214 return $sbjct; | |
| 215 } | |
| 216 | |
| 217 | |
| 218 | |
| 219 | |
| 220 =head2 next_feature | |
| 221 | |
| 222 Title : next_feature | |
| 223 Usage : while( my $feat = $res->next_feature ) { # do something } | |
| 224 Function: calls next_feature function from BPlite. | |
| 225 Example : | |
| 226 Returns : A Bio::SeqFeatureI compliant object, in this case a | |
| 227 Bio::Tools::BPlite::HSP object, and FALSE if there are no more | |
| 228 HSPs. | |
| 229 Args : None | |
| 230 | |
| 231 =cut | |
| 232 | |
| 233 sub next_feature{ | |
| 234 my ($self) = @_; | |
| 235 my ($sbjct, $hsp); | |
| 236 $sbjct = $self->{'_current_sbjct'}; | |
| 237 unless( defined $sbjct ) { | |
| 238 $self->debug(" No hit object found for bl2seq report \n ") ; | |
| 239 return undef; | |
| 240 } | |
| 241 $hsp = $sbjct->nextHSP; | |
| 242 return $hsp || undef; | |
| 243 } | |
| 244 | |
| 245 =head2 queryName | |
| 246 | |
| 247 Title : | |
| 248 Usage : $name = $report->queryName(); | |
| 249 Function : get /set the name of the query | |
| 250 Example : | |
| 251 Returns : name of the query | |
| 252 Args : | |
| 253 | |
| 254 =cut | |
| 255 | |
| 256 sub queryName { | |
| 257 my ($self, $queryname) = @_; | |
| 258 if( $queryname ) { | |
| 259 $self->{'_query'}->{'NAME'} = $queryname; | |
| 260 } | |
| 261 $self->{'_query'}->{'NAME'}; | |
| 262 } | |
| 263 | |
| 264 =head2 sbjctName | |
| 265 | |
| 266 Title : | |
| 267 Usage : $name = $report->sbjctName(); | |
| 268 Function : returns the name of the Sbjct | |
| 269 Example : | |
| 270 Returns : name of the Sbjct | |
| 271 Args : | |
| 272 | |
| 273 =cut | |
| 274 | |
| 275 sub sbjctName { | |
| 276 my $self = shift; | |
| 277 # unless( defined $self->{'_current_sbjct'} ) { | |
| 278 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; | |
| 279 # return undef unless defined $sbjct; | |
| 280 # } | |
| 281 $self->{'_current_sbjct'}->{'NAME'} || ''; | |
| 282 } | |
| 283 | |
| 284 =head2 sbjctLength | |
| 285 | |
| 286 Title : sbjctLength | |
| 287 Usage : $length = $report->sbjctLength(); | |
| 288 Function : returns the length of the Sbjct | |
| 289 Example : | |
| 290 Returns : name of the Sbjct | |
| 291 Args : | |
| 292 | |
| 293 =cut | |
| 294 | |
| 295 sub sbjctLength { | |
| 296 my $self = shift; | |
| 297 # unless( defined $self->{'_current_sbjct'} ) { | |
| 298 # my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; | |
| 299 # return undef unless defined $sbjct; | |
| 300 # } | |
| 301 $self->{'_current_sbjct'}->{'LENGTH'}; | |
| 302 } | |
| 303 | |
| 304 =head2 P | |
| 305 | |
| 306 Title : P | |
| 307 Usage : | |
| 308 Function : Syntax no longer supported, error message only | |
| 309 | |
| 310 =cut | |
| 311 | |
| 312 sub P { | |
| 313 my $self = shift; | |
| 314 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); | |
| 315 } | |
| 316 | |
| 317 =head2 percent | |
| 318 | |
| 319 Title : percent | |
| 320 Usage : $hsp->percent(); | |
| 321 Function : Syntax no longer supported, error message only | |
| 322 | |
| 323 =cut | |
| 324 | |
| 325 sub percent { | |
| 326 my $self = shift; | |
| 327 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); | |
| 328 } | |
| 329 | |
| 330 =head2 match | |
| 331 | |
| 332 Title : match | |
| 333 Usage : $hsp->match(); | |
| 334 Function : Syntax no longer supported, error message only | |
| 335 | |
| 336 =cut | |
| 337 | |
| 338 sub match { | |
| 339 my $self = shift; | |
| 340 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n "); | |
| 341 } | |
| 342 | |
| 343 =head2 positive | |
| 344 | |
| 345 Title : positive | |
| 346 Usage : $hsp->positive(); | |
| 347 Function : Syntax no longer supported, error message only | |
| 348 | |
| 349 =cut | |
| 350 | |
| 351 sub positive { | |
| 352 my $self = shift; | |
| 353 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
| 354 } | |
| 355 | |
| 356 =head2 querySeq | |
| 357 | |
| 358 Title : querySeq | |
| 359 Usage : $hsp->querySeq(); | |
| 360 Function : Syntax no longer supported, error message only | |
| 361 | |
| 362 =cut | |
| 363 | |
| 364 sub querySeq { | |
| 365 my $self = shift; | |
| 366 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
| 367 } | |
| 368 | |
| 369 =head2 sbjctSeq | |
| 370 | |
| 371 Title : sbjctSeq | |
| 372 Usage : $hsp->sbjctSeq(); | |
| 373 Function : Syntax no longer supported, error message only | |
| 374 | |
| 375 =cut | |
| 376 | |
| 377 sub sbjctSeq { | |
| 378 my $self = shift; | |
| 379 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
| 380 } | |
| 381 | |
| 382 =head2 homologySeq | |
| 383 | |
| 384 Title : homologySeq | |
| 385 Usage : $hsp->homologySeq(); | |
| 386 Function : Syntax no longer supported, error message only | |
| 387 | |
| 388 =cut | |
| 389 | |
| 390 sub homologySeq { | |
| 391 my $self = shift; | |
| 392 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
| 393 } | |
| 394 | |
| 395 =head2 qs | |
| 396 | |
| 397 Title : qs | |
| 398 Usage : $hsp->qs(); | |
| 399 Function : Syntax no longer supported, error message only | |
| 400 | |
| 401 =cut | |
| 402 | |
| 403 sub qs { | |
| 404 my $self = shift; | |
| 405 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
| 406 } | |
| 407 | |
| 408 =head2 ss | |
| 409 | |
| 410 Title : ss | |
| 411 Usage : $hsp->ss(); | |
| 412 Function : Syntax no longer supported, error message only | |
| 413 | |
| 414 =cut | |
| 415 | |
| 416 sub ss { | |
| 417 my $self = shift; | |
| 418 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
| 419 } | |
| 420 | |
| 421 =head2 hs | |
| 422 | |
| 423 Title : hs | |
| 424 Usage : $hsp->hs(); | |
| 425 Function : Syntax no longer supported, error message only | |
| 426 | |
| 427 =cut | |
| 428 | |
| 429 sub hs { | |
| 430 my $self = shift; | |
| 431 $self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ; | |
| 432 } | |
| 433 | |
| 434 sub _fastForward { | |
| 435 my ($self) = @_; | |
| 436 return 0 if $self->{'REPORT_DONE'}; # empty report | |
| 437 while(defined( $_ = $self->_readline() ) ) { | |
| 438 if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:|^\s*Lambda/) { | |
| 439 $self->_pushback($_); | |
| 440 return 1; | |
| 441 } | |
| 442 } | |
| 443 $self->warn("Possible error (1) while parsing BLAST report!"); | |
| 444 } | |
| 445 | |
| 446 1; | |
| 447 __END__ |
