Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SearchIO/fasta.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: fasta.pm,v 1.33.2.3 2003/08/28 16:01:03 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::SearchIO::fasta | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SearchIO::fasta - A SearchIO parser for FASTA results | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 # Do not use this object directly, use it through the SearchIO system | |
| 20 use Bio::SearchIO; | |
| 21 my $searchio = new Bio::SearchIO(-format => 'fasta', | |
| 22 -file => 'report.FASTA'); | |
| 23 while( my $result = $searchio->next_result ) { | |
| 24 # .... | |
| 25 } | |
| 26 | |
| 27 =head1 DESCRIPTION | |
| 28 | |
| 29 This object contains the event based parsing code for FASTA format reports. | |
| 30 | |
| 31 =head1 FEEDBACK | |
| 32 | |
| 33 =head2 Mailing Lists | |
| 34 | |
| 35 User feedback is an integral part of the evolution of this and other | |
| 36 Bioperl modules. Send your comments and suggestions preferably to | |
| 37 the Bioperl mailing list. Your participation is much appreciated. | |
| 38 | |
| 39 bioperl-l@bioperl.org - General discussion | |
| 40 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 41 | |
| 42 =head2 Reporting Bugs | |
| 43 | |
| 44 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 45 of the bugs and their resolution. Bug reports can be submitted via | |
| 46 email or the web: | |
| 47 | |
| 48 bioperl-bugs@bioperl.org | |
| 49 http://bugzilla.bioperl.org/ | |
| 50 | |
| 51 =head1 AUTHOR - Jason Stajich | |
| 52 | |
| 53 Email jason@bioperl.org | |
| 54 | |
| 55 Describe contact details here | |
| 56 | |
| 57 =head1 CONTRIBUTORS | |
| 58 | |
| 59 Additional contributors names and emails here | |
| 60 | |
| 61 =head1 APPENDIX | |
| 62 | |
| 63 The rest of the documentation details each of the object methods. | |
| 64 Internal methods are usually preceded with a _ | |
| 65 | |
| 66 =cut | |
| 67 | |
| 68 | |
| 69 # Let the code begin... | |
| 70 | |
| 71 | |
| 72 package Bio::SearchIO::fasta; | |
| 73 use vars qw(@ISA %MODEMAP %MAPPING $IDLENGTH); | |
| 74 use strict; | |
| 75 | |
| 76 # Object preamble - inherits from Bio::Root::RootI | |
| 77 | |
| 78 use Bio::SearchIO; | |
| 79 use POSIX; | |
| 80 | |
| 81 BEGIN { | |
| 82 # Set IDLENGTH to a new value if you have | |
| 83 # compile FASTA with a different ID length | |
| 84 # (actually newest FASTA allows the setting of this | |
| 85 # via -C parameter, default is 6) | |
| 86 $IDLENGTH = 6; | |
| 87 | |
| 88 # mapping of NCBI Blast terms to Bioperl hash keys | |
| 89 %MODEMAP = ('FastaOutput' => 'result', | |
| 90 'Hit' => 'hit', | |
| 91 'Hsp' => 'hsp' | |
| 92 ); | |
| 93 | |
| 94 # This should really be done more intelligently, like with | |
| 95 # XSLT | |
| 96 | |
| 97 %MAPPING = | |
| 98 ( | |
| 99 'Hsp_bit-score' => 'HSP-bits', | |
| 100 'Hsp_score' => 'HSP-score', | |
| 101 'Hsp_sw-score' => 'HSP-swscore', | |
| 102 'Hsp_evalue' => 'HSP-evalue', | |
| 103 'Hsp_query-from'=> 'HSP-query_start', | |
| 104 'Hsp_query-to' => 'HSP-query_end', | |
| 105 'Hsp_hit-from' => 'HSP-hit_start', | |
| 106 'Hsp_hit-to' => 'HSP-hit_end', | |
| 107 'Hsp_positive' => 'HSP-conserved', | |
| 108 'Hsp_identity' => 'HSP-identical', | |
| 109 'Hsp_gaps' => 'HSP-hsp_gaps', | |
| 110 'Hsp_hitgaps' => 'HSP-hit_gaps', | |
| 111 'Hsp_querygaps' => 'HSP-query_gaps', | |
| 112 'Hsp_qseq' => 'HSP-query_seq', | |
| 113 'Hsp_hseq' => 'HSP-hit_seq', | |
| 114 'Hsp_midline' => 'HSP-homology_seq', | |
| 115 'Hsp_align-len' => 'HSP-hsp_length', | |
| 116 'Hsp_query-frame'=> 'HSP-query_frame', | |
| 117 'Hsp_hit-frame' => 'HSP-hit_frame', | |
| 118 | |
| 119 'Hit_id' => 'HIT-name', | |
| 120 'Hit_len' => 'HIT-length', | |
| 121 'Hit_accession' => 'HIT-accession', | |
| 122 'Hit_def' => 'HIT-description', | |
| 123 'Hit_signif' => 'HIT-significance', | |
| 124 'Hit_score' => 'HIT-score', | |
| 125 | |
| 126 'FastaOutput_program' => 'RESULT-algorithm_name', | |
| 127 'FastaOutput_version' => 'RESULT-algorithm_version', | |
| 128 'FastaOutput_query-def'=> 'RESULT-query_name', | |
| 129 'FastaOutput_querydesc'=> 'RESULT-query_description', | |
| 130 'FastaOutput_query-len'=> 'RESULT-query_length', | |
| 131 'FastaOutput_db' => 'RESULT-database_name', | |
| 132 'FastaOutput_db-len' => 'RESULT-database_entries', | |
| 133 'FastaOutput_db-let' => 'RESULT-database_letters', | |
| 134 | |
| 135 'Parameters_matrix' => { 'RESULT-parameters' => 'matrix'}, | |
| 136 'Parameters_expect' => { 'RESULT-parameters' => 'expect'}, | |
| 137 'Parameters_include' => { 'RESULT-parameters' => 'include'}, | |
| 138 'Parameters_sc-match' => { 'RESULT-parameters' => 'match'}, | |
| 139 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'}, | |
| 140 'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen'}, | |
| 141 'Parameters_gap-ext' => { 'RESULT-parameters' => 'gapext'}, | |
| 142 'Parameters_word-size' => { 'RESULT-parameters' => 'wordsize'}, | |
| 143 'Parameters_ktup' => { 'RESULT-parameters' => 'ktup'}, | |
| 144 'Parameters_filter' => {'RESULT-parameters' => 'filter'}, | |
| 145 'Statistics_db-num' => { 'RESULT-statistics' => 'dbentries'}, | |
| 146 'Statistics_db-len' => { 'RESULT-statistics' => 'dbletters'}, | |
| 147 'Statistics_hsp-len' => { 'RESULT-statistics' => 'hsplength'}, | |
| 148 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'}, | |
| 149 'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' }, | |
| 150 'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' }, | |
| 151 'Statistics_entropy' => { 'RESULT-statistics' => 'entropy'}, | |
| 152 ); | |
| 153 } | |
| 154 | |
| 155 | |
| 156 @ISA = qw(Bio::SearchIO ); | |
| 157 | |
| 158 =head2 new | |
| 159 | |
| 160 Title : new | |
| 161 Usage : my $obj = new Bio::SearchIO::fasta(); | |
| 162 Function: Builds a new Bio::SearchIO::fasta object | |
| 163 Returns : Bio::SearchIO::fasta | |
| 164 Args : -idlength - set ID length to something other | |
| 165 than the default (7), this is only | |
| 166 necessary if you have compiled FASTA | |
| 167 with a new default id length to display | |
| 168 in the HSP alignment blocks | |
| 169 | |
| 170 =cut | |
| 171 | |
| 172 sub _initialize { | |
| 173 my($self,@args) = @_; | |
| 174 $self->SUPER::_initialize(@args); | |
| 175 return unless @args; | |
| 176 my ($idlength) = $self->_rearrange([qw(IDLENGTH)],@args); | |
| 177 $self->idlength($idlength || $IDLENGTH); | |
| 178 $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::FastaHSP')); | |
| 179 | |
| 180 return 1; | |
| 181 } | |
| 182 | |
| 183 =head2 next_result | |
| 184 | |
| 185 Title : next_result | |
| 186 Usage : my $hit = $searchio->next_result; | |
| 187 Function: Returns the next Result from a search | |
| 188 Returns : Bio::Search::Result::ResultI object | |
| 189 Args : none | |
| 190 | |
| 191 =cut | |
| 192 | |
| 193 sub next_result{ | |
| 194 my ($self) = @_; | |
| 195 | |
| 196 my $data = ''; | |
| 197 my $seentop = 0; | |
| 198 my $current_hsp; | |
| 199 $self->start_document(); | |
| 200 my @hit_signifs; | |
| 201 while( defined ($_ = $self->_readline )) { | |
| 202 next if( ! $self->in_element('hsp') && | |
| 203 /^\s+$/); # skip empty lines | |
| 204 if( /(\S+)\s+searches\s+a\s+((protein\s+or\s+DNA\s+sequence)|(sequence\s+database))/i || /(\S+) compares a/ || | |
| 205 ( m/^# / && ($_ = $self->_readline) && | |
| 206 /(\S+)\s+searches\s+a\s+((protein\s+or\s+DNA\s+sequence)|(sequence\s+database))/i || /(\S+) compares a/ | |
| 207 ) | |
| 208 ) { | |
| 209 if( $seentop ) { | |
| 210 $self->_pushback($_); | |
| 211 $self->end_element({ 'Name' => 'FastaOutput'}); | |
| 212 return $self->end_document(); | |
| 213 } | |
| 214 $self->{'_reporttype'} = $1; | |
| 215 $self->start_element({ 'Name' => 'FastaOutput' } ); | |
| 216 $self->{'_result_count'}++; | |
| 217 $seentop = 1; | |
| 218 | |
| 219 $self->element({ 'Name' => 'FastaOutput_program', | |
| 220 'Data' => $self->{'_reporttype'}}); | |
| 221 $_ = $self->_readline(); | |
| 222 my ($version) = (/version\s+(\S+)/); | |
| 223 $version = '' unless defined $version; | |
| 224 $self->{'_version'} = $version; | |
| 225 $self->element({ 'Name' => 'FastaOutput_version', | |
| 226 'Data' => $version}); | |
| 227 | |
| 228 my ($last, $leadin, $type, $querylen, $querytype, $querydef); | |
| 229 | |
| 230 while( defined($_ = $self->_readline()) ) { | |
| 231 if( /^ ( | |
| 232 (?:\s+>) | # fa33 lead-in | |
| 233 (?:\s*\d+\s*>>>) # fa34 mlib lead-in | |
| 234 ) | |
| 235 (.*) | |
| 236 /x | |
| 237 ) { | |
| 238 ($leadin, $querydef) = ($1, $2); | |
| 239 if ($leadin =~ m/>>>/) { | |
| 240 if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) { | |
| 241 ($querydef, $querylen, $querytype) = ($1, $2, $3); | |
| 242 last; | |
| 243 } | |
| 244 } else { | |
| 245 if( $last =~ /(\S+)[:,]\s*(\d+)\s+(aa|nt)/ ) { | |
| 246 ($querylen, $querytype) = ($2, $3); | |
| 247 $querydef ||= $1; | |
| 248 last; | |
| 249 } | |
| 250 } | |
| 251 } elsif ( m/^\s*vs\s+\S+/o ) { | |
| 252 if ( $last =~ /(\S+)[,:]\s+(\d+)\s+(aa|nt)/o) { | |
| 253 ($querydef, $querylen, $querytype) = ($1, $2, $3); | |
| 254 last; | |
| 255 } | |
| 256 } | |
| 257 $last = $_; | |
| 258 } | |
| 259 | |
| 260 if( $self->{'_reporttype'} && | |
| 261 $self->{'_reporttype'} eq 'FASTA' | |
| 262 ) { | |
| 263 if( $querytype eq 'nt') { | |
| 264 $self->{'_reporttype'} = 'FASTN' ; | |
| 265 } elsif( $querytype eq 'aa' ) { | |
| 266 $self->{'_reporttype'} = 'FASTP' ; | |
| 267 } | |
| 268 } | |
| 269 my ($name, $descr) = $querydef =~ m/^(\S+)\s*(.*?)\s*$/o; | |
| 270 $self->element({'Name' => 'FastaOutput_query-def', | |
| 271 'Data' => $name}); | |
| 272 $self->element({'Name' => 'FastaOutput_querydesc', | |
| 273 'Data' => $descr}); | |
| 274 if ($querylen) { | |
| 275 $self->element({'Name' => 'FastaOutput_query-len', | |
| 276 'Data' => $querylen}); | |
| 277 } else { | |
| 278 $self->warn("unable to find and set query length"); | |
| 279 } | |
| 280 | |
| 281 if( $last =~ /^\s*vs\s+(\S+)/ || | |
| 282 ($last =~ /^searching\s+(\S+)\s+library/) || | |
| 283 (defined $_ && /^\s*vs\s+(\S+)/) || | |
| 284 (defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/) | |
| 285 ) { | |
| 286 $self->element({'Name' => 'FastaOutput_db', | |
| 287 'Data' => $1}); | |
| 288 } elsif (m/^\s+opt(?:\s+E\(\))?$/o) { | |
| 289 # histogram ... read over it more rapidly than the larger outer loop: | |
| 290 while (defined($_ = $self->_readline)) { | |
| 291 last if m/^>\d+/; | |
| 292 } | |
| 293 } | |
| 294 | |
| 295 } elsif( /(\d+) residues in\s+(\d+)\s+sequences/ ) { | |
| 296 $self->element({'Name' => 'FastaOutput_db-let', | |
| 297 'Data' => $1}); | |
| 298 $self->element({'Name' => 'FastaOutput_db-len', | |
| 299 'Data' => $2}); | |
| 300 $self->element({'Name' => 'Statistics_db-len', | |
| 301 'Data' => $1}); | |
| 302 $self->element({'Name' => 'Statistics_db-num', | |
| 303 'Data' => $2}); | |
| 304 } elsif( /Lambda=\s*(\S+)/ ) { | |
| 305 $self->element({'Name' => 'Statistics_lambda', | |
| 306 'Data' => $1}); | |
| 307 } elsif (/K=\s*(\S+)/) { | |
| 308 $self->element({'Name' => 'Statistics_kappa', | |
| 309 'Data' => $1}); | |
| 310 } elsif( /^\s*(Smith-Waterman).+(\S+)\s*matrix [^\]]*?(xS)?\]/ ) { | |
| 311 $self->element({'Name' => 'Parameters_matrix', | |
| 312 'Data' => $2}); | |
| 313 $self->element({'Name' => 'Parameters_filter', | |
| 314 'Data' => defined $3 ? 1 : 0, | |
| 315 }); | |
| 316 $self->{'_reporttype'} = $1; | |
| 317 | |
| 318 $self->element({ 'Name' => 'FastaOutput_program', | |
| 319 'Data' => $self->{'_reporttype'}}); | |
| 320 | |
| 321 } elsif( /The best( related| unrelated)? scores are:/ ) { | |
| 322 my $rel = $1; | |
| 323 my @labels = split; | |
| 324 @labels = map { | |
| 325 if ($_ =~ m/^E\((\d+)\)$/o) { | |
| 326 $self->element({'Name' => 'Statistics_eff-space', 'Data' => $1}); | |
| 327 "evalue"; | |
| 328 } else { | |
| 329 $_; | |
| 330 } | |
| 331 } @labels[$rel ? 5 : 4 .. $#labels]; | |
| 332 | |
| 333 while( defined ($_ = $self->_readline() ) && | |
| 334 ! /^\s+$/ ) { | |
| 335 my @line = split; | |
| 336 | |
| 337 if ($line[-1] =~ m/\=/o && $labels[-1] eq 'fs') { | |
| 338 # unlabelled alignment hit; | |
| 339 push @labels, "aln_code"; | |
| 340 } | |
| 341 | |
| 342 my %data; | |
| 343 @data{@labels} = splice(@line, @line - @labels); | |
| 344 if ($line[-1] =~ m/\[([1-6rf])\]/o) { | |
| 345 my $fr = $1; | |
| 346 $data{lframe} = ($fr =~ /\d/o ? | |
| 347 ($fr <= 3 ? "+$fr" : "-@{[$fr-3]}") : | |
| 348 ($fr eq 'f' ? '+1' : '-1') | |
| 349 ); | |
| 350 pop @line; | |
| 351 } else { | |
| 352 $data{lframe} = '0'; | |
| 353 } | |
| 354 | |
| 355 if ($line[-1] =~ m/^\(?(\d+)\)$/) { | |
| 356 $data{hit_len} = $1; | |
| 357 pop @line; | |
| 358 if ($line[-1] =~ m/^\($/) { | |
| 359 pop @line; | |
| 360 } | |
| 361 } else { | |
| 362 $data{hit_len} = 0; | |
| 363 } | |
| 364 | |
| 365 # rebuild the first part of the line, preserving spaces: | |
| 366 ($_) = m/^(\S+(?:\s+\S+){$#line})/; | |
| 367 | |
| 368 my ($id, $desc) = split(/\s+/,$_,2); | |
| 369 my @pieces = split(/\|/,$id); | |
| 370 my $acc = pop @pieces; | |
| 371 $acc =~ s/\.\d+$//; | |
| 372 | |
| 373 @data{qw(id desc acc)} = ($id, $desc, $acc); | |
| 374 | |
| 375 push @hit_signifs, \%data; | |
| 376 } | |
| 377 } elsif( /^\s*([T]?FAST[XYAF]).+,\s*(\S+)\s*matrix[^\]]+?(xS)?\]\s*ktup:\s*(\d+)/ ) { | |
| 378 $self->element({'Name' => 'Parameters_matrix', | |
| 379 'Data' => $2}); | |
| 380 $self->element({'Name' => 'Parameters_filter', | |
| 381 'Data' => defined $3 ? 1 : 0, | |
| 382 }); | |
| 383 $self->element({'Name' => 'Parameters_ktup', | |
| 384 'Data' => $4}); | |
| 385 $self->{'_reporttype'} = $1 if( $self->{'_reporttype'} !~ /FAST[PN]/i ) ; | |
| 386 | |
| 387 $self->element({ 'Name' => 'FastaOutput_program', | |
| 388 'Data' => $self->{'_reporttype'}}); | |
| 389 | |
| 390 } elsif( /(?:gap\-pen|open\/ext):\s+([\-\+]?\d+)\s*\/\s*([\-\+]?\d+).+width:\s+(\d+)/ ) { | |
| 391 $self->element({'Name' => 'Parameters_gap-open', | |
| 392 'Data' => $1}); | |
| 393 $self->element({'Name' => 'Parameters_gap-ext', | |
| 394 'Data' => $2}); | |
| 395 $self->element({'Name' => 'Parameters_word-size', | |
| 396 'Data' => $3}); | |
| 397 } elsif( /^>>(.+?)\s+\((\d+)\s*(aa|nt)\)$/ ) { | |
| 398 if( $self->in_element('hsp') ) { | |
| 399 $self->end_element({ 'Name' => 'Hsp'}); | |
| 400 } | |
| 401 if( $self->in_element('hit') ) { | |
| 402 $self->end_element({ 'Name' => 'Hit'}); | |
| 403 } | |
| 404 | |
| 405 $self->start_element({'Name' => 'Hit'}); | |
| 406 $self->element({ 'Name' => 'Hit_len', | |
| 407 'Data' => $2}); | |
| 408 my ($id,$desc) = split(/\s+/,$1,2); | |
| 409 $self->element({ 'Name' => 'Hit_id', | |
| 410 'Data' => $id}); | |
| 411 my @pieces = split(/\|/,$id); | |
| 412 my $acc = pop @pieces; | |
| 413 $acc =~ s/\.\d+$//; | |
| 414 $self->element({ 'Name' => 'Hit_accession', | |
| 415 'Data' => $acc}); | |
| 416 $self->element({ 'Name' => 'Hit_def', | |
| 417 'Data' => $desc}); | |
| 418 | |
| 419 $_ = $self->_readline(); | |
| 420 my ($score,$bits,$e) = /Z-score: \s* (\S+) \s* | |
| 421 (?: bits: \s* (\S+) \s+ )? | |
| 422 (?: E|expect ) \s* \(\) :? \s*(\S+)/x; | |
| 423 $bits = $score unless defined $bits; | |
| 424 | |
| 425 my $v = shift @hit_signifs; | |
| 426 if( defined $v ) { | |
| 427 @{$v}{qw(evalue bits z-sc)} = ($e, $bits, $score); | |
| 428 } | |
| 429 | |
| 430 $self->element({'Name' => 'Hit_signif', | |
| 431 'Data' => $v ? $v->{evalue} : $e }); | |
| 432 $self->element({'Name' => 'Hit_score', | |
| 433 'Data' => $v ? $v->{bits} : $bits }); | |
| 434 $self->start_element({'Name' => 'Hsp'}); | |
| 435 | |
| 436 $self->element({'Name' => 'Hsp_score', | |
| 437 'Data' => $v ? $v->{'z-sc'} : $score }); | |
| 438 $self->element({'Name' => 'Hsp_evalue', | |
| 439 'Data' => $v ? $v->{evalue} : $e }); | |
| 440 $self->element({'Name' => 'Hsp_bit-score', | |
| 441 'Data' => $v ? $v->{bits} : $bits }); | |
| 442 $_ = $self->_readline(); | |
| 443 if( /Smith-Waterman score:\s*(\d+)/ ) { | |
| 444 $self->element({'Name' => 'Hsp_sw-score', | |
| 445 'Data' => $1}); | |
| 446 } | |
| 447 if( / (\S+)\% \s* identity | |
| 448 (?:\s* \( \s* (\S+)\% \s* ungapped \) )? | |
| 449 \s* in \s* (\d+) \s+ (?:aa|nt) \s+ overlap \s* | |
| 450 \( (\d+) \- (\d+) : (\d+) \- (\d+) \) | |
| 451 /x ) { | |
| 452 my ($identper,$gapper,$len,$querystart, | |
| 453 $queryend,$hitstart,$hitend) = ($1,$2,$3,$4,$5,$6,$7); | |
| 454 my $ident = POSIX::ceil(($identper/100) * $len); | |
| 455 my $gaps = ( defined $gapper ) ? POSIX::ceil ( ($gapper/100) * $len) : undef; | |
| 456 | |
| 457 $self->element({'Name' => 'Hsp_gaps', | |
| 458 'Data' => $gaps}); | |
| 459 $self->element({'Name' => 'Hsp_identity', | |
| 460 'Data' => $ident}); | |
| 461 $self->element({'Name' => 'Hsp_positive', | |
| 462 'Data' => $ident}); | |
| 463 $self->element({'Name' => 'Hsp_align-len', | |
| 464 'Data' => $len}); | |
| 465 | |
| 466 $self->element({'Name' => 'Hsp_query-from', | |
| 467 'Data' => $querystart}); | |
| 468 $self->element({'Name' => 'Hsp_query-to', | |
| 469 'Data' => $queryend}); | |
| 470 $self->element({'Name' => 'Hsp_hit-from', | |
| 471 'Data' => $hitstart}); | |
| 472 $self->element({'Name' => 'Hsp_hit-to', | |
| 473 'Data' => $hitend}); | |
| 474 | |
| 475 } | |
| 476 | |
| 477 if ($v) { | |
| 478 $self->element({'Name' => 'Hsp_querygaps', 'Data' => $v->{qgaps} }) if exists $v->{qgaps}; | |
| 479 $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $v->{lgaps} }) if exists $v->{lgaps}; | |
| 480 | |
| 481 if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) { | |
| 482 if( 8 == scalar grep { exists $v->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) { | |
| 483 if ($v->{ax0} < $v->{an0}) { | |
| 484 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($v->{px0} - $v->{ax0}) % 3) + 1]}" }); | |
| 485 } else { | |
| 486 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($v->{an0} - $v->{pn0}) % 3) + 1]}" }); | |
| 487 } | |
| 488 if ($v->{ax1} < $v->{an1}) { | |
| 489 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($v->{px1} - $v->{ax1}) % 3) + 1]}" }); | |
| 490 } else { | |
| 491 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($v->{an1} - $v->{pn1}) % 3) + 1]}" }); | |
| 492 } | |
| 493 } else { | |
| 494 $self->element({'Name' => 'Hsp_query-frame', 'Data' => $v->{lframe} }); | |
| 495 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 }); | |
| 496 } | |
| 497 } else { | |
| 498 $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 }); | |
| 499 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $v->{lframe} }); | |
| 500 } | |
| 501 | |
| 502 } else { | |
| 503 $self->warn( "unable to parse FASTA score line: $_"); | |
| 504 } | |
| 505 } elsif( /\d+\s*residues\s*in\s*\d+\s*query\s*sequences/ ) { | |
| 506 if( $self->in_element('hsp') ) { | |
| 507 $self->end_element({'Name' => 'Hsp'}); | |
| 508 } | |
| 509 if( $self->in_element('hit') ) { | |
| 510 $self->end_element({'Name' => 'Hit'}); | |
| 511 } | |
| 512 | |
| 513 # $_ = $self->_readline(); | |
| 514 # my ( $liblen,$libsize) = /(\d+)\s+residues\s*in(\d+)\s*library/; | |
| 515 # fast forward to the end of the file as there is | |
| 516 # nothing else left to do with this file and want to be sure and | |
| 517 # reset it | |
| 518 while(defined($_ = $self->_readline() ) ) { | |
| 519 last if( /^Function used was/); | |
| 520 if( /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+sequence)|(sequence\s+database)/ ) { | |
| 521 $self->_pushback($_); | |
| 522 } | |
| 523 } | |
| 524 | |
| 525 if (@hit_signifs) { | |
| 526 # process remaining best hits | |
| 527 for my $h (@hit_signifs) { | |
| 528 # Hsp_score Hsp_evalue Hsp_bit-score | |
| 529 # Hsp_sw-score Hsp_gaps Hsp_identity Hsp_positive | |
| 530 # Hsp_align-len Hsp_query-from Hsp_query-to | |
| 531 # Hsp_hit-from Hsp_hit-to Hsp_qseq Hsp_midline | |
| 532 | |
| 533 $self->start_element({'Name' => 'Hit'}); | |
| 534 $self->element({ 'Name' => 'Hit_len', | |
| 535 'Data' => $h->{hit_len} | |
| 536 }) if exists $h->{hit_len}; | |
| 537 $self->element({ 'Name' => 'Hit_id', | |
| 538 'Data' => $h->{id} | |
| 539 }) if exists $h->{id}; | |
| 540 $self->element({ 'Name' => 'Hit_accession', | |
| 541 'Data' => $h->{acc} | |
| 542 }) if exists $h->{acc}; | |
| 543 $self->element({ 'Name' => 'Hit_def', | |
| 544 'Data' => $h->{desc} | |
| 545 }) if exists $h->{desc}; | |
| 546 $self->element({'Name' => 'Hit_signif', | |
| 547 'Data' => $h->{evalue} | |
| 548 }) if exists $h->{evalue}; | |
| 549 $self->element({'Name' => 'Hit_score', | |
| 550 'Data' => $h->{bits} | |
| 551 }) if exists $h->{bits}; | |
| 552 | |
| 553 $self->start_element({'Name' => 'Hsp'}); | |
| 554 $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'}; | |
| 555 $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue}; | |
| 556 $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits}; | |
| 557 $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw}; | |
| 558 $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'}; | |
| 559 $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) }) | |
| 560 if (exists $h->{'%_id'} && exists $h->{alen}); | |
| 561 $self->element({'Name' => 'Hsp_positive', 'Data' => 100 * $h->{'%_id'} }) if exists $h->{'%_id'}; | |
| 562 $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen}; | |
| 563 $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0}; | |
| 564 $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0}; | |
| 565 $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1}; | |
| 566 $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1}; | |
| 567 | |
| 568 $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps}; | |
| 569 $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps}; | |
| 570 | |
| 571 if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) { | |
| 572 if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) { | |
| 573 if ($h->{ax0} < $h->{an0}) { | |
| 574 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" }); | |
| 575 } else { | |
| 576 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" }); | |
| 577 } | |
| 578 if ($h->{ax1} < $h->{an1}) { | |
| 579 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" }); | |
| 580 } else { | |
| 581 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" }); | |
| 582 } | |
| 583 } else { | |
| 584 $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} }); | |
| 585 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 }); | |
| 586 } | |
| 587 } else { | |
| 588 $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 }); | |
| 589 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} }); | |
| 590 } | |
| 591 | |
| 592 $self->end_element({'Name' => 'Hsp'}); | |
| 593 $self->end_element({'Name' => 'Hit'}); | |
| 594 } | |
| 595 } | |
| 596 | |
| 597 $self->end_element({ 'Name' => 'FastaOutput'}); | |
| 598 return $self->end_document(); | |
| 599 } elsif( /^\s*\d+\s*>>>/) { | |
| 600 if ($self->within_element('FastaOutput')) { | |
| 601 if( $self->in_element('hsp') ) { | |
| 602 $self->end_element({'Name' => 'Hsp'}); | |
| 603 } | |
| 604 if( $self->in_element('hit') ) { | |
| 605 $self->end_element({'Name' => 'Hit'}); | |
| 606 } | |
| 607 | |
| 608 if (@hit_signifs) { | |
| 609 # process remaining best hits | |
| 610 for my $h (@hit_signifs) { | |
| 611 $self->start_element({'Name' => 'Hit'}); | |
| 612 $self->element({ 'Name' => 'Hit_len', | |
| 613 'Data' => $h->{hit_len} | |
| 614 }) if exists $h->{hit_len}; | |
| 615 $self->element({ 'Name' => 'Hit_id', | |
| 616 'Data' => $h->{id} | |
| 617 }) if exists $h->{id}; | |
| 618 $self->element({ 'Name' => 'Hit_accession', | |
| 619 'Data' => $h->{acc} | |
| 620 }) if exists $h->{acc}; | |
| 621 $self->element({ 'Name' => 'Hit_def', | |
| 622 'Data' => $h->{desc} | |
| 623 }) if exists $h->{desc}; | |
| 624 $self->element({'Name' => 'Hit_signif', | |
| 625 'Data' => $h->{evalue} | |
| 626 }) if exists $h->{evalue}; | |
| 627 $self->element({'Name' => 'Hit_score', | |
| 628 'Data' => $h->{bits} | |
| 629 }) if exists $h->{bits}; | |
| 630 | |
| 631 $self->start_element({'Name' => 'Hsp'}); | |
| 632 $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'}; | |
| 633 $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue}; | |
| 634 $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits}; | |
| 635 $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw}; | |
| 636 $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'}; | |
| 637 $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) }) | |
| 638 if (exists $h->{'%_id'} && exists $h->{alen}); | |
| 639 $self->element({'Name' => 'Hsp_positive', 'Data' => $h->{'%_id'} }) if exists $h->{'%_id'}; | |
| 640 $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen}; | |
| 641 $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0}; | |
| 642 $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0}; | |
| 643 $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1}; | |
| 644 $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1}; | |
| 645 | |
| 646 $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps}; | |
| 647 $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps}; | |
| 648 | |
| 649 if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) { | |
| 650 if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) { | |
| 651 if ($h->{ax0} < $h->{an0}) { | |
| 652 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" }); | |
| 653 } else { | |
| 654 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" }); | |
| 655 } | |
| 656 if ($h->{ax1} < $h->{an1}) { | |
| 657 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" }); | |
| 658 } else { | |
| 659 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" }); | |
| 660 } | |
| 661 } else { | |
| 662 $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} }); | |
| 663 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 }); | |
| 664 } | |
| 665 } else { | |
| 666 $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 }); | |
| 667 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} }); | |
| 668 } | |
| 669 | |
| 670 $self->end_element({'Name' => 'Hsp'}); | |
| 671 $self->end_element({'Name' => 'Hit'}); | |
| 672 } | |
| 673 } | |
| 674 $self->end_element({ 'Name' => 'FastaOutput' }); | |
| 675 $self->_pushback($_); | |
| 676 return $self->end_document(); | |
| 677 } else { | |
| 678 $self->start_element({ 'Name' => 'FastaOutput' }); | |
| 679 $self->{'_result_count'}++; | |
| 680 $seentop = 1; | |
| 681 $self->element({ 'Name' => 'FastaOutput_program', | |
| 682 'Data' => $self->{'_reporttype'} }); | |
| 683 $self->element({ 'Name' => 'FastaOutput_version', | |
| 684 'Data' => $self->{'_version'} }); | |
| 685 | |
| 686 my ($type, $querylen, $querytype, $querydef); | |
| 687 | |
| 688 if( /^\s*\d+\s*>>>(.*)/ ) { | |
| 689 $querydef = $1; | |
| 690 if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) { | |
| 691 ($querydef, $querylen, $querytype) = ($1, $2, $3); | |
| 692 } | |
| 693 } | |
| 694 | |
| 695 if( $self->{'_reporttype'} && | |
| 696 $self->{'_reporttype'} eq 'FASTA' | |
| 697 ) { | |
| 698 if( $querytype eq 'nt') { | |
| 699 $self->{'_reporttype'} = 'FASTN' ; | |
| 700 } elsif( $querytype eq 'aa' ) { | |
| 701 $self->{'_reporttype'} = 'FASTP' ; | |
| 702 } | |
| 703 } | |
| 704 my ($name,$descr) = ($querydef =~ m/^(\S+)(?:\s+(.*))?\s*$/o); | |
| 705 $self->element({'Name' => 'FastaOutput_query-def', | |
| 706 'Data' => $name}); | |
| 707 $self->element({'Name' => 'FastaOutput_querydesc', | |
| 708 'Data' => $descr}); | |
| 709 if ($querylen) { | |
| 710 $self->element({'Name' => 'FastaOutput_query-len', | |
| 711 'Data' => $querylen}); | |
| 712 } else { | |
| 713 $self->warn("unable to find and set query length"); | |
| 714 } | |
| 715 | |
| 716 | |
| 717 if( defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/ ) { | |
| 718 $self->element({'Name' => 'FastaOutput_db', | |
| 719 'Data' => $1}); | |
| 720 } | |
| 721 } | |
| 722 } elsif( $self->in_element('hsp' ) ) { | |
| 723 | |
| 724 my @data = ( '','',''); | |
| 725 my $count = 0; | |
| 726 my $len = $self->idlength + 1; | |
| 727 my ($seq1_id); | |
| 728 while( defined($_ ) ) { | |
| 729 chomp; | |
| 730 $self->debug( "$count $_\n"); | |
| 731 | |
| 732 if( /residues in \d+\s+query\s+sequences/) { | |
| 733 $self->_pushback($_); | |
| 734 last; | |
| 735 } elsif( /^>>/ ) { | |
| 736 $self->_pushback($_); | |
| 737 last; | |
| 738 } elsif (/^\s*\d+\s*>>>/) { | |
| 739 $self->_pushback($_); | |
| 740 last; | |
| 741 } | |
| 742 if( $count == 0 ) { | |
| 743 unless( /^\s+\d+/ || /^\s+$/) { | |
| 744 $self->_pushback($_); | |
| 745 $count = 2; | |
| 746 } | |
| 747 } elsif( $count == 1 || $count == 3 ) { | |
| 748 if( /^(\S+)\s+/ ) { | |
| 749 $len = CORE::length($1) if $len < CORE::length($1); | |
| 750 s/\s+$//; # trim trailing spaces,we don't want them | |
| 751 $data[$count-1] = substr($_,$len); | |
| 752 } elsif( /^\s+(\d+)/ ) { | |
| 753 $count = -1; | |
| 754 $self->_pushback($_); | |
| 755 } elsif( /^\s+$/ || length($_) == 0) { | |
| 756 $count = 5; | |
| 757 # going to skip these | |
| 758 } else { | |
| 759 $self->warn("Unrecognized alignment line ($count) '$_'"); | |
| 760 } | |
| 761 } elsif( $count == 2 ) { | |
| 762 if( /^\s+\d+\s+/ ) { | |
| 763 $self->warn("$_\n"); | |
| 764 $count = 4; | |
| 765 } else { | |
| 766 # toss the first IDLENGTH characters of the line | |
| 767 if( length($_) >= $len ) { | |
| 768 $data[$count-1] = substr($_,$len); | |
| 769 } | |
| 770 } | |
| 771 } | |
| 772 last if( $count++ >= 5); | |
| 773 $_ = $self->_readline(); | |
| 774 } | |
| 775 if( length($data[0]) > 0 || length($data[2]) > 0 ) { | |
| 776 $self->characters({'Name' => 'Hsp_qseq', | |
| 777 'Data' => $data[0] }); | |
| 778 $self->characters({'Name' => 'Hsp_midline', | |
| 779 'Data' => $data[1]}); | |
| 780 $self->characters({'Name' => 'Hsp_hseq', | |
| 781 'Data' => $data[2]}); | |
| 782 } | |
| 783 } else { | |
| 784 if( ! $seentop ) { | |
| 785 $self->debug($_); | |
| 786 $self->warn("unrecognized FASTA Family report file!"); | |
| 787 return undef; | |
| 788 } | |
| 789 } | |
| 790 } | |
| 791 } | |
| 792 | |
| 793 | |
| 794 =head2 start_element | |
| 795 | |
| 796 Title : start_element | |
| 797 Usage : $eventgenerator->start_element | |
| 798 Function: Handles a start element event | |
| 799 Returns : none | |
| 800 Args : hashref with at least 2 keys 'Data' and 'Name' | |
| 801 | |
| 802 | |
| 803 =cut | |
| 804 | |
| 805 sub start_element{ | |
| 806 my ($self,$data) = @_; | |
| 807 # we currently don't care about attributes | |
| 808 my $nm = $data->{'Name'}; | |
| 809 if( my $type = $MODEMAP{$nm} ) { | |
| 810 $self->_mode($type); | |
| 811 if( $self->_eventHandler->will_handle($type) ) { | |
| 812 my $func = sprintf("start_%s",lc $type); | |
| 813 $self->_eventHandler->$func($data->{'Attributes'}); | |
| 814 } | |
| 815 unshift @{$self->{'_elements'}}, $type; | |
| 816 } | |
| 817 if($nm eq 'FastaOutput') { | |
| 818 $self->{'_values'} = {}; | |
| 819 $self->{'_result'}= undef; | |
| 820 $self->{'_mode'} = ''; | |
| 821 } | |
| 822 | |
| 823 } | |
| 824 | |
| 825 =head2 end_element | |
| 826 | |
| 827 Title : start_element | |
| 828 Usage : $eventgenerator->end_element | |
| 829 Function: Handles an end element event | |
| 830 Returns : none | |
| 831 Args : hashref with at least 2 keys 'Data' and 'Name' | |
| 832 | |
| 833 | |
| 834 =cut | |
| 835 | |
| 836 sub end_element { | |
| 837 my ($self,$data) = @_; | |
| 838 my $nm = $data->{'Name'}; | |
| 839 my $rc; | |
| 840 # Hsp are sort of weird, in that they end when another | |
| 841 # object begins so have to detect this in end_element for now | |
| 842 if( $nm eq 'Hsp' ) { | |
| 843 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) { | |
| 844 $self->element({'Name' => $_, | |
| 845 'Data' => $self->{'_last_hspdata'}->{$_}}); | |
| 846 } | |
| 847 $self->{'_last_hspdata'} = {} | |
| 848 } | |
| 849 | |
| 850 if( my $type = $MODEMAP{$nm} ) { | |
| 851 if( $self->_eventHandler->will_handle($type) ) { | |
| 852 my $func = sprintf("end_%s",lc $type); | |
| 853 $rc = $self->_eventHandler->$func($self->{'_reporttype'}, | |
| 854 $self->{'_values'}); | |
| 855 } | |
| 856 shift @{$self->{'_elements'}}; | |
| 857 | |
| 858 } elsif( $MAPPING{$nm} ) { | |
| 859 if ( ref($MAPPING{$nm}) =~ /hash/i ) { | |
| 860 my $key = (keys %{$MAPPING{$nm}})[0]; | |
| 861 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'}; | |
| 862 } else { | |
| 863 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'}; | |
| 864 } | |
| 865 } else { | |
| 866 $self->warn( "unknown nm $nm, ignoring\n"); | |
| 867 } | |
| 868 $self->{'_last_data'} = ''; # remove read data if we are at | |
| 869 # end of an element | |
| 870 $self->{'_result'} = $rc if( $nm eq 'FastaOutput' ); | |
| 871 return $rc; | |
| 872 | |
| 873 } | |
| 874 | |
| 875 =head2 element | |
| 876 | |
| 877 Title : element | |
| 878 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); | |
| 879 Function: Convience method that calls start_element, characters, end_element | |
| 880 Returns : none | |
| 881 Args : Hash ref with the keys 'Name' and 'Data' | |
| 882 | |
| 883 | |
| 884 =cut | |
| 885 | |
| 886 sub element{ | |
| 887 my ($self,$data) = @_; | |
| 888 $self->start_element($data); | |
| 889 $self->characters($data); | |
| 890 $self->end_element($data); | |
| 891 } | |
| 892 | |
| 893 | |
| 894 =head2 characters | |
| 895 | |
| 896 Title : characters | |
| 897 Usage : $eventgenerator->characters($str) | |
| 898 Function: Send a character events | |
| 899 Returns : none | |
| 900 Args : string | |
| 901 | |
| 902 | |
| 903 =cut | |
| 904 | |
| 905 sub characters{ | |
| 906 my ($self,$data) = @_; | |
| 907 | |
| 908 return unless ( defined $data->{'Data'} ); | |
| 909 if( $data->{'Data'} =~ /^\s+$/ ) { | |
| 910 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/; | |
| 911 } | |
| 912 | |
| 913 if( $self->in_element('hsp') && | |
| 914 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) { | |
| 915 | |
| 916 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'}; | |
| 917 } | |
| 918 | |
| 919 $self->{'_last_data'} = $data->{'Data'}; | |
| 920 } | |
| 921 | |
| 922 =head2 _mode | |
| 923 | |
| 924 Title : _mode | |
| 925 Usage : $obj->_mode($newval) | |
| 926 Function: | |
| 927 Example : | |
| 928 Returns : value of _mode | |
| 929 Args : newvalue (optional) | |
| 930 | |
| 931 | |
| 932 =cut | |
| 933 | |
| 934 sub _mode{ | |
| 935 my ($self,$value) = @_; | |
| 936 if( defined $value) { | |
| 937 $self->{'_mode'} = $value; | |
| 938 } | |
| 939 return $self->{'_mode'}; | |
| 940 } | |
| 941 | |
| 942 =head2 within_element | |
| 943 | |
| 944 Title : within_element | |
| 945 Usage : if( $eventgenerator->within_element($element) ) {} | |
| 946 Function: Test if we are within a particular element | |
| 947 This is different than 'in' because within can be tested | |
| 948 for a whole block. | |
| 949 Returns : boolean | |
| 950 Args : string element name | |
| 951 | |
| 952 | |
| 953 =cut | |
| 954 | |
| 955 sub within_element{ | |
| 956 my ($self,$name) = @_; | |
| 957 return 0 if ( ! defined $name && | |
| 958 ! defined $self->{'_elements'} || | |
| 959 scalar @{$self->{'_elements'}} == 0) ; | |
| 960 foreach ( @{$self->{'_elements'}} ) { | |
| 961 if( $_ eq $name || $_ eq $MODEMAP{$name} ) { | |
| 962 return 1; | |
| 963 } | |
| 964 } | |
| 965 return 0; | |
| 966 } | |
| 967 | |
| 968 =head2 in_element | |
| 969 | |
| 970 Title : in_element | |
| 971 Usage : if( $eventgenerator->in_element($element) ) {} | |
| 972 Function: Test if we are in a particular element | |
| 973 This is different than 'in' because within can be tested | |
| 974 for a whole block. | |
| 975 Returns : boolean | |
| 976 Args : string element name | |
| 977 | |
| 978 | |
| 979 =cut | |
| 980 | |
| 981 sub in_element{ | |
| 982 my ($self,$name) = @_; | |
| 983 return 0 if ! defined $self->{'_elements'}->[0]; | |
| 984 return ( $self->{'_elements'}->[0] eq $name || | |
| 985 (exists $MODEMAP{$name} && $self->{'_elements'}->[0] eq $MODEMAP{$name}) | |
| 986 ); | |
| 987 } | |
| 988 | |
| 989 | |
| 990 =head2 start_document | |
| 991 | |
| 992 Title : start_document | |
| 993 Usage : $eventgenerator->start_document | |
| 994 Function: Handles a start document event | |
| 995 Returns : none | |
| 996 Args : none | |
| 997 | |
| 998 | |
| 999 =cut | |
| 1000 | |
| 1001 sub start_document{ | |
| 1002 my ($self) = @_; | |
| 1003 $self->{'_lasttype'} = ''; | |
| 1004 $self->{'_values'} = {}; | |
| 1005 $self->{'_result'}= undef; | |
| 1006 $self->{'_mode'} = ''; | |
| 1007 $self->{'_elements'} = []; | |
| 1008 } | |
| 1009 | |
| 1010 | |
| 1011 =head2 end_document | |
| 1012 | |
| 1013 Title : end_document | |
| 1014 Usage : $eventgenerator->end_document | |
| 1015 Function: Handles an end document event | |
| 1016 Returns : Bio::Search::Result::ResultI object | |
| 1017 Args : none | |
| 1018 | |
| 1019 | |
| 1020 =cut | |
| 1021 | |
| 1022 sub end_document{ | |
| 1023 my ($self,@args) = @_; | |
| 1024 return $self->{'_result'}; | |
| 1025 } | |
| 1026 | |
| 1027 =head2 idlength | |
| 1028 | |
| 1029 Title : idlength | |
| 1030 Usage : $obj->idlength($newval) | |
| 1031 Function: Internal storage of the length of the ID desc | |
| 1032 in the HSP alignment blocks. Defaults to | |
| 1033 $IDLENGTH class variable value | |
| 1034 Returns : value of idlength | |
| 1035 Args : newvalue (optional) | |
| 1036 | |
| 1037 | |
| 1038 =cut | |
| 1039 | |
| 1040 sub idlength{ | |
| 1041 my ($self,$value) = @_; | |
| 1042 if( defined $value) { | |
| 1043 $self->{'_idlength'} = $value; | |
| 1044 } | |
| 1045 return $self->{'_idlength'} || $IDLENGTH; | |
| 1046 } | |
| 1047 | |
| 1048 | |
| 1049 =head2 result_count | |
| 1050 | |
| 1051 Title : result_count | |
| 1052 Usage : my $count = $searchio->result_count | |
| 1053 Function: Returns the number of results we have processed | |
| 1054 Returns : integer | |
| 1055 Args : none | |
| 1056 | |
| 1057 | |
| 1058 =cut | |
| 1059 | |
| 1060 sub result_count { | |
| 1061 my $self = shift; | |
| 1062 return $self->{'_result_count'}; | |
| 1063 } | |
| 1064 | |
| 1065 1; | |
| 1066 |
