0
|
1 # $Id: fasta.pm,v 1.33.2.3 2003/08/28 16:01:03 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::SearchIO::fasta
|
|
4 #
|
|
5 # Cared for by Jason Stajich <jason@bioperl.org>
|
|
6 #
|
|
7 # Copyright Jason Stajich
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SearchIO::fasta - A SearchIO parser for FASTA results
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # Do not use this object directly, use it through the SearchIO system
|
|
20 use Bio::SearchIO;
|
|
21 my $searchio = new Bio::SearchIO(-format => 'fasta',
|
|
22 -file => 'report.FASTA');
|
|
23 while( my $result = $searchio->next_result ) {
|
|
24 # ....
|
|
25 }
|
|
26
|
|
27 =head1 DESCRIPTION
|
|
28
|
|
29 This object contains the event based parsing code for FASTA format reports.
|
|
30
|
|
31 =head1 FEEDBACK
|
|
32
|
|
33 =head2 Mailing Lists
|
|
34
|
|
35 User feedback is an integral part of the evolution of this and other
|
|
36 Bioperl modules. Send your comments and suggestions preferably to
|
|
37 the Bioperl mailing list. Your participation is much appreciated.
|
|
38
|
|
39 bioperl-l@bioperl.org - General discussion
|
|
40 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
41
|
|
42 =head2 Reporting Bugs
|
|
43
|
|
44 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
45 of the bugs and their resolution. Bug reports can be submitted via
|
|
46 email or the web:
|
|
47
|
|
48 bioperl-bugs@bioperl.org
|
|
49 http://bugzilla.bioperl.org/
|
|
50
|
|
51 =head1 AUTHOR - Jason Stajich
|
|
52
|
|
53 Email jason@bioperl.org
|
|
54
|
|
55 Describe contact details here
|
|
56
|
|
57 =head1 CONTRIBUTORS
|
|
58
|
|
59 Additional contributors names and emails here
|
|
60
|
|
61 =head1 APPENDIX
|
|
62
|
|
63 The rest of the documentation details each of the object methods.
|
|
64 Internal methods are usually preceded with a _
|
|
65
|
|
66 =cut
|
|
67
|
|
68
|
|
69 # Let the code begin...
|
|
70
|
|
71
|
|
72 package Bio::SearchIO::fasta;
|
|
73 use vars qw(@ISA %MODEMAP %MAPPING $IDLENGTH);
|
|
74 use strict;
|
|
75
|
|
76 # Object preamble - inherits from Bio::Root::RootI
|
|
77
|
|
78 use Bio::SearchIO;
|
|
79 use POSIX;
|
|
80
|
|
81 BEGIN {
|
|
82 # Set IDLENGTH to a new value if you have
|
|
83 # compile FASTA with a different ID length
|
|
84 # (actually newest FASTA allows the setting of this
|
|
85 # via -C parameter, default is 6)
|
|
86 $IDLENGTH = 6;
|
|
87
|
|
88 # mapping of NCBI Blast terms to Bioperl hash keys
|
|
89 %MODEMAP = ('FastaOutput' => 'result',
|
|
90 'Hit' => 'hit',
|
|
91 'Hsp' => 'hsp'
|
|
92 );
|
|
93
|
|
94 # This should really be done more intelligently, like with
|
|
95 # XSLT
|
|
96
|
|
97 %MAPPING =
|
|
98 (
|
|
99 'Hsp_bit-score' => 'HSP-bits',
|
|
100 'Hsp_score' => 'HSP-score',
|
|
101 'Hsp_sw-score' => 'HSP-swscore',
|
|
102 'Hsp_evalue' => 'HSP-evalue',
|
|
103 'Hsp_query-from'=> 'HSP-query_start',
|
|
104 'Hsp_query-to' => 'HSP-query_end',
|
|
105 'Hsp_hit-from' => 'HSP-hit_start',
|
|
106 'Hsp_hit-to' => 'HSP-hit_end',
|
|
107 'Hsp_positive' => 'HSP-conserved',
|
|
108 'Hsp_identity' => 'HSP-identical',
|
|
109 'Hsp_gaps' => 'HSP-hsp_gaps',
|
|
110 'Hsp_hitgaps' => 'HSP-hit_gaps',
|
|
111 'Hsp_querygaps' => 'HSP-query_gaps',
|
|
112 'Hsp_qseq' => 'HSP-query_seq',
|
|
113 'Hsp_hseq' => 'HSP-hit_seq',
|
|
114 'Hsp_midline' => 'HSP-homology_seq',
|
|
115 'Hsp_align-len' => 'HSP-hsp_length',
|
|
116 'Hsp_query-frame'=> 'HSP-query_frame',
|
|
117 'Hsp_hit-frame' => 'HSP-hit_frame',
|
|
118
|
|
119 'Hit_id' => 'HIT-name',
|
|
120 'Hit_len' => 'HIT-length',
|
|
121 'Hit_accession' => 'HIT-accession',
|
|
122 'Hit_def' => 'HIT-description',
|
|
123 'Hit_signif' => 'HIT-significance',
|
|
124 'Hit_score' => 'HIT-score',
|
|
125
|
|
126 'FastaOutput_program' => 'RESULT-algorithm_name',
|
|
127 'FastaOutput_version' => 'RESULT-algorithm_version',
|
|
128 'FastaOutput_query-def'=> 'RESULT-query_name',
|
|
129 'FastaOutput_querydesc'=> 'RESULT-query_description',
|
|
130 'FastaOutput_query-len'=> 'RESULT-query_length',
|
|
131 'FastaOutput_db' => 'RESULT-database_name',
|
|
132 'FastaOutput_db-len' => 'RESULT-database_entries',
|
|
133 'FastaOutput_db-let' => 'RESULT-database_letters',
|
|
134
|
|
135 'Parameters_matrix' => { 'RESULT-parameters' => 'matrix'},
|
|
136 'Parameters_expect' => { 'RESULT-parameters' => 'expect'},
|
|
137 'Parameters_include' => { 'RESULT-parameters' => 'include'},
|
|
138 'Parameters_sc-match' => { 'RESULT-parameters' => 'match'},
|
|
139 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
|
|
140 'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen'},
|
|
141 'Parameters_gap-ext' => { 'RESULT-parameters' => 'gapext'},
|
|
142 'Parameters_word-size' => { 'RESULT-parameters' => 'wordsize'},
|
|
143 'Parameters_ktup' => { 'RESULT-parameters' => 'ktup'},
|
|
144 'Parameters_filter' => {'RESULT-parameters' => 'filter'},
|
|
145 'Statistics_db-num' => { 'RESULT-statistics' => 'dbentries'},
|
|
146 'Statistics_db-len' => { 'RESULT-statistics' => 'dbletters'},
|
|
147 'Statistics_hsp-len' => { 'RESULT-statistics' => 'hsplength'},
|
|
148 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
|
|
149 'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' },
|
|
150 'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' },
|
|
151 'Statistics_entropy' => { 'RESULT-statistics' => 'entropy'},
|
|
152 );
|
|
153 }
|
|
154
|
|
155
|
|
156 @ISA = qw(Bio::SearchIO );
|
|
157
|
|
158 =head2 new
|
|
159
|
|
160 Title : new
|
|
161 Usage : my $obj = new Bio::SearchIO::fasta();
|
|
162 Function: Builds a new Bio::SearchIO::fasta object
|
|
163 Returns : Bio::SearchIO::fasta
|
|
164 Args : -idlength - set ID length to something other
|
|
165 than the default (7), this is only
|
|
166 necessary if you have compiled FASTA
|
|
167 with a new default id length to display
|
|
168 in the HSP alignment blocks
|
|
169
|
|
170 =cut
|
|
171
|
|
172 sub _initialize {
|
|
173 my($self,@args) = @_;
|
|
174 $self->SUPER::_initialize(@args);
|
|
175 return unless @args;
|
|
176 my ($idlength) = $self->_rearrange([qw(IDLENGTH)],@args);
|
|
177 $self->idlength($idlength || $IDLENGTH);
|
|
178 $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::FastaHSP'));
|
|
179
|
|
180 return 1;
|
|
181 }
|
|
182
|
|
183 =head2 next_result
|
|
184
|
|
185 Title : next_result
|
|
186 Usage : my $hit = $searchio->next_result;
|
|
187 Function: Returns the next Result from a search
|
|
188 Returns : Bio::Search::Result::ResultI object
|
|
189 Args : none
|
|
190
|
|
191 =cut
|
|
192
|
|
193 sub next_result{
|
|
194 my ($self) = @_;
|
|
195
|
|
196 my $data = '';
|
|
197 my $seentop = 0;
|
|
198 my $current_hsp;
|
|
199 $self->start_document();
|
|
200 my @hit_signifs;
|
|
201 while( defined ($_ = $self->_readline )) {
|
|
202 next if( ! $self->in_element('hsp') &&
|
|
203 /^\s+$/); # skip empty lines
|
|
204 if( /(\S+)\s+searches\s+a\s+((protein\s+or\s+DNA\s+sequence)|(sequence\s+database))/i || /(\S+) compares a/ ||
|
|
205 ( m/^# / && ($_ = $self->_readline) &&
|
|
206 /(\S+)\s+searches\s+a\s+((protein\s+or\s+DNA\s+sequence)|(sequence\s+database))/i || /(\S+) compares a/
|
|
207 )
|
|
208 ) {
|
|
209 if( $seentop ) {
|
|
210 $self->_pushback($_);
|
|
211 $self->end_element({ 'Name' => 'FastaOutput'});
|
|
212 return $self->end_document();
|
|
213 }
|
|
214 $self->{'_reporttype'} = $1;
|
|
215 $self->start_element({ 'Name' => 'FastaOutput' } );
|
|
216 $self->{'_result_count'}++;
|
|
217 $seentop = 1;
|
|
218
|
|
219 $self->element({ 'Name' => 'FastaOutput_program',
|
|
220 'Data' => $self->{'_reporttype'}});
|
|
221 $_ = $self->_readline();
|
|
222 my ($version) = (/version\s+(\S+)/);
|
|
223 $version = '' unless defined $version;
|
|
224 $self->{'_version'} = $version;
|
|
225 $self->element({ 'Name' => 'FastaOutput_version',
|
|
226 'Data' => $version});
|
|
227
|
|
228 my ($last, $leadin, $type, $querylen, $querytype, $querydef);
|
|
229
|
|
230 while( defined($_ = $self->_readline()) ) {
|
|
231 if( /^ (
|
|
232 (?:\s+>) | # fa33 lead-in
|
|
233 (?:\s*\d+\s*>>>) # fa34 mlib lead-in
|
|
234 )
|
|
235 (.*)
|
|
236 /x
|
|
237 ) {
|
|
238 ($leadin, $querydef) = ($1, $2);
|
|
239 if ($leadin =~ m/>>>/) {
|
|
240 if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) {
|
|
241 ($querydef, $querylen, $querytype) = ($1, $2, $3);
|
|
242 last;
|
|
243 }
|
|
244 } else {
|
|
245 if( $last =~ /(\S+)[:,]\s*(\d+)\s+(aa|nt)/ ) {
|
|
246 ($querylen, $querytype) = ($2, $3);
|
|
247 $querydef ||= $1;
|
|
248 last;
|
|
249 }
|
|
250 }
|
|
251 } elsif ( m/^\s*vs\s+\S+/o ) {
|
|
252 if ( $last =~ /(\S+)[,:]\s+(\d+)\s+(aa|nt)/o) {
|
|
253 ($querydef, $querylen, $querytype) = ($1, $2, $3);
|
|
254 last;
|
|
255 }
|
|
256 }
|
|
257 $last = $_;
|
|
258 }
|
|
259
|
|
260 if( $self->{'_reporttype'} &&
|
|
261 $self->{'_reporttype'} eq 'FASTA'
|
|
262 ) {
|
|
263 if( $querytype eq 'nt') {
|
|
264 $self->{'_reporttype'} = 'FASTN' ;
|
|
265 } elsif( $querytype eq 'aa' ) {
|
|
266 $self->{'_reporttype'} = 'FASTP' ;
|
|
267 }
|
|
268 }
|
|
269 my ($name, $descr) = $querydef =~ m/^(\S+)\s*(.*?)\s*$/o;
|
|
270 $self->element({'Name' => 'FastaOutput_query-def',
|
|
271 'Data' => $name});
|
|
272 $self->element({'Name' => 'FastaOutput_querydesc',
|
|
273 'Data' => $descr});
|
|
274 if ($querylen) {
|
|
275 $self->element({'Name' => 'FastaOutput_query-len',
|
|
276 'Data' => $querylen});
|
|
277 } else {
|
|
278 $self->warn("unable to find and set query length");
|
|
279 }
|
|
280
|
|
281 if( $last =~ /^\s*vs\s+(\S+)/ ||
|
|
282 ($last =~ /^searching\s+(\S+)\s+library/) ||
|
|
283 (defined $_ && /^\s*vs\s+(\S+)/) ||
|
|
284 (defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/)
|
|
285 ) {
|
|
286 $self->element({'Name' => 'FastaOutput_db',
|
|
287 'Data' => $1});
|
|
288 } elsif (m/^\s+opt(?:\s+E\(\))?$/o) {
|
|
289 # histogram ... read over it more rapidly than the larger outer loop:
|
|
290 while (defined($_ = $self->_readline)) {
|
|
291 last if m/^>\d+/;
|
|
292 }
|
|
293 }
|
|
294
|
|
295 } elsif( /(\d+) residues in\s+(\d+)\s+sequences/ ) {
|
|
296 $self->element({'Name' => 'FastaOutput_db-let',
|
|
297 'Data' => $1});
|
|
298 $self->element({'Name' => 'FastaOutput_db-len',
|
|
299 'Data' => $2});
|
|
300 $self->element({'Name' => 'Statistics_db-len',
|
|
301 'Data' => $1});
|
|
302 $self->element({'Name' => 'Statistics_db-num',
|
|
303 'Data' => $2});
|
|
304 } elsif( /Lambda=\s*(\S+)/ ) {
|
|
305 $self->element({'Name' => 'Statistics_lambda',
|
|
306 'Data' => $1});
|
|
307 } elsif (/K=\s*(\S+)/) {
|
|
308 $self->element({'Name' => 'Statistics_kappa',
|
|
309 'Data' => $1});
|
|
310 } elsif( /^\s*(Smith-Waterman).+(\S+)\s*matrix [^\]]*?(xS)?\]/ ) {
|
|
311 $self->element({'Name' => 'Parameters_matrix',
|
|
312 'Data' => $2});
|
|
313 $self->element({'Name' => 'Parameters_filter',
|
|
314 'Data' => defined $3 ? 1 : 0,
|
|
315 });
|
|
316 $self->{'_reporttype'} = $1;
|
|
317
|
|
318 $self->element({ 'Name' => 'FastaOutput_program',
|
|
319 'Data' => $self->{'_reporttype'}});
|
|
320
|
|
321 } elsif( /The best( related| unrelated)? scores are:/ ) {
|
|
322 my $rel = $1;
|
|
323 my @labels = split;
|
|
324 @labels = map {
|
|
325 if ($_ =~ m/^E\((\d+)\)$/o) {
|
|
326 $self->element({'Name' => 'Statistics_eff-space', 'Data' => $1});
|
|
327 "evalue";
|
|
328 } else {
|
|
329 $_;
|
|
330 }
|
|
331 } @labels[$rel ? 5 : 4 .. $#labels];
|
|
332
|
|
333 while( defined ($_ = $self->_readline() ) &&
|
|
334 ! /^\s+$/ ) {
|
|
335 my @line = split;
|
|
336
|
|
337 if ($line[-1] =~ m/\=/o && $labels[-1] eq 'fs') {
|
|
338 # unlabelled alignment hit;
|
|
339 push @labels, "aln_code";
|
|
340 }
|
|
341
|
|
342 my %data;
|
|
343 @data{@labels} = splice(@line, @line - @labels);
|
|
344 if ($line[-1] =~ m/\[([1-6rf])\]/o) {
|
|
345 my $fr = $1;
|
|
346 $data{lframe} = ($fr =~ /\d/o ?
|
|
347 ($fr <= 3 ? "+$fr" : "-@{[$fr-3]}") :
|
|
348 ($fr eq 'f' ? '+1' : '-1')
|
|
349 );
|
|
350 pop @line;
|
|
351 } else {
|
|
352 $data{lframe} = '0';
|
|
353 }
|
|
354
|
|
355 if ($line[-1] =~ m/^\(?(\d+)\)$/) {
|
|
356 $data{hit_len} = $1;
|
|
357 pop @line;
|
|
358 if ($line[-1] =~ m/^\($/) {
|
|
359 pop @line;
|
|
360 }
|
|
361 } else {
|
|
362 $data{hit_len} = 0;
|
|
363 }
|
|
364
|
|
365 # rebuild the first part of the line, preserving spaces:
|
|
366 ($_) = m/^(\S+(?:\s+\S+){$#line})/;
|
|
367
|
|
368 my ($id, $desc) = split(/\s+/,$_,2);
|
|
369 my @pieces = split(/\|/,$id);
|
|
370 my $acc = pop @pieces;
|
|
371 $acc =~ s/\.\d+$//;
|
|
372
|
|
373 @data{qw(id desc acc)} = ($id, $desc, $acc);
|
|
374
|
|
375 push @hit_signifs, \%data;
|
|
376 }
|
|
377 } elsif( /^\s*([T]?FAST[XYAF]).+,\s*(\S+)\s*matrix[^\]]+?(xS)?\]\s*ktup:\s*(\d+)/ ) {
|
|
378 $self->element({'Name' => 'Parameters_matrix',
|
|
379 'Data' => $2});
|
|
380 $self->element({'Name' => 'Parameters_filter',
|
|
381 'Data' => defined $3 ? 1 : 0,
|
|
382 });
|
|
383 $self->element({'Name' => 'Parameters_ktup',
|
|
384 'Data' => $4});
|
|
385 $self->{'_reporttype'} = $1 if( $self->{'_reporttype'} !~ /FAST[PN]/i ) ;
|
|
386
|
|
387 $self->element({ 'Name' => 'FastaOutput_program',
|
|
388 'Data' => $self->{'_reporttype'}});
|
|
389
|
|
390 } elsif( /(?:gap\-pen|open\/ext):\s+([\-\+]?\d+)\s*\/\s*([\-\+]?\d+).+width:\s+(\d+)/ ) {
|
|
391 $self->element({'Name' => 'Parameters_gap-open',
|
|
392 'Data' => $1});
|
|
393 $self->element({'Name' => 'Parameters_gap-ext',
|
|
394 'Data' => $2});
|
|
395 $self->element({'Name' => 'Parameters_word-size',
|
|
396 'Data' => $3});
|
|
397 } elsif( /^>>(.+?)\s+\((\d+)\s*(aa|nt)\)$/ ) {
|
|
398 if( $self->in_element('hsp') ) {
|
|
399 $self->end_element({ 'Name' => 'Hsp'});
|
|
400 }
|
|
401 if( $self->in_element('hit') ) {
|
|
402 $self->end_element({ 'Name' => 'Hit'});
|
|
403 }
|
|
404
|
|
405 $self->start_element({'Name' => 'Hit'});
|
|
406 $self->element({ 'Name' => 'Hit_len',
|
|
407 'Data' => $2});
|
|
408 my ($id,$desc) = split(/\s+/,$1,2);
|
|
409 $self->element({ 'Name' => 'Hit_id',
|
|
410 'Data' => $id});
|
|
411 my @pieces = split(/\|/,$id);
|
|
412 my $acc = pop @pieces;
|
|
413 $acc =~ s/\.\d+$//;
|
|
414 $self->element({ 'Name' => 'Hit_accession',
|
|
415 'Data' => $acc});
|
|
416 $self->element({ 'Name' => 'Hit_def',
|
|
417 'Data' => $desc});
|
|
418
|
|
419 $_ = $self->_readline();
|
|
420 my ($score,$bits,$e) = /Z-score: \s* (\S+) \s*
|
|
421 (?: bits: \s* (\S+) \s+ )?
|
|
422 (?: E|expect ) \s* \(\) :? \s*(\S+)/x;
|
|
423 $bits = $score unless defined $bits;
|
|
424
|
|
425 my $v = shift @hit_signifs;
|
|
426 if( defined $v ) {
|
|
427 @{$v}{qw(evalue bits z-sc)} = ($e, $bits, $score);
|
|
428 }
|
|
429
|
|
430 $self->element({'Name' => 'Hit_signif',
|
|
431 'Data' => $v ? $v->{evalue} : $e });
|
|
432 $self->element({'Name' => 'Hit_score',
|
|
433 'Data' => $v ? $v->{bits} : $bits });
|
|
434 $self->start_element({'Name' => 'Hsp'});
|
|
435
|
|
436 $self->element({'Name' => 'Hsp_score',
|
|
437 'Data' => $v ? $v->{'z-sc'} : $score });
|
|
438 $self->element({'Name' => 'Hsp_evalue',
|
|
439 'Data' => $v ? $v->{evalue} : $e });
|
|
440 $self->element({'Name' => 'Hsp_bit-score',
|
|
441 'Data' => $v ? $v->{bits} : $bits });
|
|
442 $_ = $self->_readline();
|
|
443 if( /Smith-Waterman score:\s*(\d+)/ ) {
|
|
444 $self->element({'Name' => 'Hsp_sw-score',
|
|
445 'Data' => $1});
|
|
446 }
|
|
447 if( / (\S+)\% \s* identity
|
|
448 (?:\s* \( \s* (\S+)\% \s* ungapped \) )?
|
|
449 \s* in \s* (\d+) \s+ (?:aa|nt) \s+ overlap \s*
|
|
450 \( (\d+) \- (\d+) : (\d+) \- (\d+) \)
|
|
451 /x ) {
|
|
452 my ($identper,$gapper,$len,$querystart,
|
|
453 $queryend,$hitstart,$hitend) = ($1,$2,$3,$4,$5,$6,$7);
|
|
454 my $ident = POSIX::ceil(($identper/100) * $len);
|
|
455 my $gaps = ( defined $gapper ) ? POSIX::ceil ( ($gapper/100) * $len) : undef;
|
|
456
|
|
457 $self->element({'Name' => 'Hsp_gaps',
|
|
458 'Data' => $gaps});
|
|
459 $self->element({'Name' => 'Hsp_identity',
|
|
460 'Data' => $ident});
|
|
461 $self->element({'Name' => 'Hsp_positive',
|
|
462 'Data' => $ident});
|
|
463 $self->element({'Name' => 'Hsp_align-len',
|
|
464 'Data' => $len});
|
|
465
|
|
466 $self->element({'Name' => 'Hsp_query-from',
|
|
467 'Data' => $querystart});
|
|
468 $self->element({'Name' => 'Hsp_query-to',
|
|
469 'Data' => $queryend});
|
|
470 $self->element({'Name' => 'Hsp_hit-from',
|
|
471 'Data' => $hitstart});
|
|
472 $self->element({'Name' => 'Hsp_hit-to',
|
|
473 'Data' => $hitend});
|
|
474
|
|
475 }
|
|
476
|
|
477 if ($v) {
|
|
478 $self->element({'Name' => 'Hsp_querygaps', 'Data' => $v->{qgaps} }) if exists $v->{qgaps};
|
|
479 $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $v->{lgaps} }) if exists $v->{lgaps};
|
|
480
|
|
481 if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
|
|
482 if( 8 == scalar grep { exists $v->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
|
|
483 if ($v->{ax0} < $v->{an0}) {
|
|
484 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($v->{px0} - $v->{ax0}) % 3) + 1]}" });
|
|
485 } else {
|
|
486 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($v->{an0} - $v->{pn0}) % 3) + 1]}" });
|
|
487 }
|
|
488 if ($v->{ax1} < $v->{an1}) {
|
|
489 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($v->{px1} - $v->{ax1}) % 3) + 1]}" });
|
|
490 } else {
|
|
491 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($v->{an1} - $v->{pn1}) % 3) + 1]}" });
|
|
492 }
|
|
493 } else {
|
|
494 $self->element({'Name' => 'Hsp_query-frame', 'Data' => $v->{lframe} });
|
|
495 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
|
|
496 }
|
|
497 } else {
|
|
498 $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
|
|
499 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $v->{lframe} });
|
|
500 }
|
|
501
|
|
502 } else {
|
|
503 $self->warn( "unable to parse FASTA score line: $_");
|
|
504 }
|
|
505 } elsif( /\d+\s*residues\s*in\s*\d+\s*query\s*sequences/ ) {
|
|
506 if( $self->in_element('hsp') ) {
|
|
507 $self->end_element({'Name' => 'Hsp'});
|
|
508 }
|
|
509 if( $self->in_element('hit') ) {
|
|
510 $self->end_element({'Name' => 'Hit'});
|
|
511 }
|
|
512
|
|
513 # $_ = $self->_readline();
|
|
514 # my ( $liblen,$libsize) = /(\d+)\s+residues\s*in(\d+)\s*library/;
|
|
515 # fast forward to the end of the file as there is
|
|
516 # nothing else left to do with this file and want to be sure and
|
|
517 # reset it
|
|
518 while(defined($_ = $self->_readline() ) ) {
|
|
519 last if( /^Function used was/);
|
|
520 if( /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+sequence)|(sequence\s+database)/ ) {
|
|
521 $self->_pushback($_);
|
|
522 }
|
|
523 }
|
|
524
|
|
525 if (@hit_signifs) {
|
|
526 # process remaining best hits
|
|
527 for my $h (@hit_signifs) {
|
|
528 # Hsp_score Hsp_evalue Hsp_bit-score
|
|
529 # Hsp_sw-score Hsp_gaps Hsp_identity Hsp_positive
|
|
530 # Hsp_align-len Hsp_query-from Hsp_query-to
|
|
531 # Hsp_hit-from Hsp_hit-to Hsp_qseq Hsp_midline
|
|
532
|
|
533 $self->start_element({'Name' => 'Hit'});
|
|
534 $self->element({ 'Name' => 'Hit_len',
|
|
535 'Data' => $h->{hit_len}
|
|
536 }) if exists $h->{hit_len};
|
|
537 $self->element({ 'Name' => 'Hit_id',
|
|
538 'Data' => $h->{id}
|
|
539 }) if exists $h->{id};
|
|
540 $self->element({ 'Name' => 'Hit_accession',
|
|
541 'Data' => $h->{acc}
|
|
542 }) if exists $h->{acc};
|
|
543 $self->element({ 'Name' => 'Hit_def',
|
|
544 'Data' => $h->{desc}
|
|
545 }) if exists $h->{desc};
|
|
546 $self->element({'Name' => 'Hit_signif',
|
|
547 'Data' => $h->{evalue}
|
|
548 }) if exists $h->{evalue};
|
|
549 $self->element({'Name' => 'Hit_score',
|
|
550 'Data' => $h->{bits}
|
|
551 }) if exists $h->{bits};
|
|
552
|
|
553 $self->start_element({'Name' => 'Hsp'});
|
|
554 $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'};
|
|
555 $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue};
|
|
556 $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits};
|
|
557 $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw};
|
|
558 $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'};
|
|
559 $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
|
|
560 if (exists $h->{'%_id'} && exists $h->{alen});
|
|
561 $self->element({'Name' => 'Hsp_positive', 'Data' => 100 * $h->{'%_id'} }) if exists $h->{'%_id'};
|
|
562 $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen};
|
|
563 $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0};
|
|
564 $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0};
|
|
565 $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1};
|
|
566 $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1};
|
|
567
|
|
568 $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps};
|
|
569 $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps};
|
|
570
|
|
571 if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
|
|
572 if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
|
|
573 if ($h->{ax0} < $h->{an0}) {
|
|
574 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" });
|
|
575 } else {
|
|
576 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" });
|
|
577 }
|
|
578 if ($h->{ax1} < $h->{an1}) {
|
|
579 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" });
|
|
580 } else {
|
|
581 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" });
|
|
582 }
|
|
583 } else {
|
|
584 $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} });
|
|
585 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
|
|
586 }
|
|
587 } else {
|
|
588 $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
|
|
589 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} });
|
|
590 }
|
|
591
|
|
592 $self->end_element({'Name' => 'Hsp'});
|
|
593 $self->end_element({'Name' => 'Hit'});
|
|
594 }
|
|
595 }
|
|
596
|
|
597 $self->end_element({ 'Name' => 'FastaOutput'});
|
|
598 return $self->end_document();
|
|
599 } elsif( /^\s*\d+\s*>>>/) {
|
|
600 if ($self->within_element('FastaOutput')) {
|
|
601 if( $self->in_element('hsp') ) {
|
|
602 $self->end_element({'Name' => 'Hsp'});
|
|
603 }
|
|
604 if( $self->in_element('hit') ) {
|
|
605 $self->end_element({'Name' => 'Hit'});
|
|
606 }
|
|
607
|
|
608 if (@hit_signifs) {
|
|
609 # process remaining best hits
|
|
610 for my $h (@hit_signifs) {
|
|
611 $self->start_element({'Name' => 'Hit'});
|
|
612 $self->element({ 'Name' => 'Hit_len',
|
|
613 'Data' => $h->{hit_len}
|
|
614 }) if exists $h->{hit_len};
|
|
615 $self->element({ 'Name' => 'Hit_id',
|
|
616 'Data' => $h->{id}
|
|
617 }) if exists $h->{id};
|
|
618 $self->element({ 'Name' => 'Hit_accession',
|
|
619 'Data' => $h->{acc}
|
|
620 }) if exists $h->{acc};
|
|
621 $self->element({ 'Name' => 'Hit_def',
|
|
622 'Data' => $h->{desc}
|
|
623 }) if exists $h->{desc};
|
|
624 $self->element({'Name' => 'Hit_signif',
|
|
625 'Data' => $h->{evalue}
|
|
626 }) if exists $h->{evalue};
|
|
627 $self->element({'Name' => 'Hit_score',
|
|
628 'Data' => $h->{bits}
|
|
629 }) if exists $h->{bits};
|
|
630
|
|
631 $self->start_element({'Name' => 'Hsp'});
|
|
632 $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'};
|
|
633 $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue};
|
|
634 $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits};
|
|
635 $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw};
|
|
636 $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'};
|
|
637 $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
|
|
638 if (exists $h->{'%_id'} && exists $h->{alen});
|
|
639 $self->element({'Name' => 'Hsp_positive', 'Data' => $h->{'%_id'} }) if exists $h->{'%_id'};
|
|
640 $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen};
|
|
641 $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0};
|
|
642 $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0};
|
|
643 $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1};
|
|
644 $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1};
|
|
645
|
|
646 $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps};
|
|
647 $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps};
|
|
648
|
|
649 if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
|
|
650 if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
|
|
651 if ($h->{ax0} < $h->{an0}) {
|
|
652 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" });
|
|
653 } else {
|
|
654 $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" });
|
|
655 }
|
|
656 if ($h->{ax1} < $h->{an1}) {
|
|
657 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" });
|
|
658 } else {
|
|
659 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" });
|
|
660 }
|
|
661 } else {
|
|
662 $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} });
|
|
663 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
|
|
664 }
|
|
665 } else {
|
|
666 $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
|
|
667 $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} });
|
|
668 }
|
|
669
|
|
670 $self->end_element({'Name' => 'Hsp'});
|
|
671 $self->end_element({'Name' => 'Hit'});
|
|
672 }
|
|
673 }
|
|
674 $self->end_element({ 'Name' => 'FastaOutput' });
|
|
675 $self->_pushback($_);
|
|
676 return $self->end_document();
|
|
677 } else {
|
|
678 $self->start_element({ 'Name' => 'FastaOutput' });
|
|
679 $self->{'_result_count'}++;
|
|
680 $seentop = 1;
|
|
681 $self->element({ 'Name' => 'FastaOutput_program',
|
|
682 'Data' => $self->{'_reporttype'} });
|
|
683 $self->element({ 'Name' => 'FastaOutput_version',
|
|
684 'Data' => $self->{'_version'} });
|
|
685
|
|
686 my ($type, $querylen, $querytype, $querydef);
|
|
687
|
|
688 if( /^\s*\d+\s*>>>(.*)/ ) {
|
|
689 $querydef = $1;
|
|
690 if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) {
|
|
691 ($querydef, $querylen, $querytype) = ($1, $2, $3);
|
|
692 }
|
|
693 }
|
|
694
|
|
695 if( $self->{'_reporttype'} &&
|
|
696 $self->{'_reporttype'} eq 'FASTA'
|
|
697 ) {
|
|
698 if( $querytype eq 'nt') {
|
|
699 $self->{'_reporttype'} = 'FASTN' ;
|
|
700 } elsif( $querytype eq 'aa' ) {
|
|
701 $self->{'_reporttype'} = 'FASTP' ;
|
|
702 }
|
|
703 }
|
|
704 my ($name,$descr) = ($querydef =~ m/^(\S+)(?:\s+(.*))?\s*$/o);
|
|
705 $self->element({'Name' => 'FastaOutput_query-def',
|
|
706 'Data' => $name});
|
|
707 $self->element({'Name' => 'FastaOutput_querydesc',
|
|
708 'Data' => $descr});
|
|
709 if ($querylen) {
|
|
710 $self->element({'Name' => 'FastaOutput_query-len',
|
|
711 'Data' => $querylen});
|
|
712 } else {
|
|
713 $self->warn("unable to find and set query length");
|
|
714 }
|
|
715
|
|
716
|
|
717 if( defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/ ) {
|
|
718 $self->element({'Name' => 'FastaOutput_db',
|
|
719 'Data' => $1});
|
|
720 }
|
|
721 }
|
|
722 } elsif( $self->in_element('hsp' ) ) {
|
|
723
|
|
724 my @data = ( '','','');
|
|
725 my $count = 0;
|
|
726 my $len = $self->idlength + 1;
|
|
727 my ($seq1_id);
|
|
728 while( defined($_ ) ) {
|
|
729 chomp;
|
|
730 $self->debug( "$count $_\n");
|
|
731
|
|
732 if( /residues in \d+\s+query\s+sequences/) {
|
|
733 $self->_pushback($_);
|
|
734 last;
|
|
735 } elsif( /^>>/ ) {
|
|
736 $self->_pushback($_);
|
|
737 last;
|
|
738 } elsif (/^\s*\d+\s*>>>/) {
|
|
739 $self->_pushback($_);
|
|
740 last;
|
|
741 }
|
|
742 if( $count == 0 ) {
|
|
743 unless( /^\s+\d+/ || /^\s+$/) {
|
|
744 $self->_pushback($_);
|
|
745 $count = 2;
|
|
746 }
|
|
747 } elsif( $count == 1 || $count == 3 ) {
|
|
748 if( /^(\S+)\s+/ ) {
|
|
749 $len = CORE::length($1) if $len < CORE::length($1);
|
|
750 s/\s+$//; # trim trailing spaces,we don't want them
|
|
751 $data[$count-1] = substr($_,$len);
|
|
752 } elsif( /^\s+(\d+)/ ) {
|
|
753 $count = -1;
|
|
754 $self->_pushback($_);
|
|
755 } elsif( /^\s+$/ || length($_) == 0) {
|
|
756 $count = 5;
|
|
757 # going to skip these
|
|
758 } else {
|
|
759 $self->warn("Unrecognized alignment line ($count) '$_'");
|
|
760 }
|
|
761 } elsif( $count == 2 ) {
|
|
762 if( /^\s+\d+\s+/ ) {
|
|
763 $self->warn("$_\n");
|
|
764 $count = 4;
|
|
765 } else {
|
|
766 # toss the first IDLENGTH characters of the line
|
|
767 if( length($_) >= $len ) {
|
|
768 $data[$count-1] = substr($_,$len);
|
|
769 }
|
|
770 }
|
|
771 }
|
|
772 last if( $count++ >= 5);
|
|
773 $_ = $self->_readline();
|
|
774 }
|
|
775 if( length($data[0]) > 0 || length($data[2]) > 0 ) {
|
|
776 $self->characters({'Name' => 'Hsp_qseq',
|
|
777 'Data' => $data[0] });
|
|
778 $self->characters({'Name' => 'Hsp_midline',
|
|
779 'Data' => $data[1]});
|
|
780 $self->characters({'Name' => 'Hsp_hseq',
|
|
781 'Data' => $data[2]});
|
|
782 }
|
|
783 } else {
|
|
784 if( ! $seentop ) {
|
|
785 $self->debug($_);
|
|
786 $self->warn("unrecognized FASTA Family report file!");
|
|
787 return undef;
|
|
788 }
|
|
789 }
|
|
790 }
|
|
791 }
|
|
792
|
|
793
|
|
794 =head2 start_element
|
|
795
|
|
796 Title : start_element
|
|
797 Usage : $eventgenerator->start_element
|
|
798 Function: Handles a start element event
|
|
799 Returns : none
|
|
800 Args : hashref with at least 2 keys 'Data' and 'Name'
|
|
801
|
|
802
|
|
803 =cut
|
|
804
|
|
805 sub start_element{
|
|
806 my ($self,$data) = @_;
|
|
807 # we currently don't care about attributes
|
|
808 my $nm = $data->{'Name'};
|
|
809 if( my $type = $MODEMAP{$nm} ) {
|
|
810 $self->_mode($type);
|
|
811 if( $self->_eventHandler->will_handle($type) ) {
|
|
812 my $func = sprintf("start_%s",lc $type);
|
|
813 $self->_eventHandler->$func($data->{'Attributes'});
|
|
814 }
|
|
815 unshift @{$self->{'_elements'}}, $type;
|
|
816 }
|
|
817 if($nm eq 'FastaOutput') {
|
|
818 $self->{'_values'} = {};
|
|
819 $self->{'_result'}= undef;
|
|
820 $self->{'_mode'} = '';
|
|
821 }
|
|
822
|
|
823 }
|
|
824
|
|
825 =head2 end_element
|
|
826
|
|
827 Title : start_element
|
|
828 Usage : $eventgenerator->end_element
|
|
829 Function: Handles an end element event
|
|
830 Returns : none
|
|
831 Args : hashref with at least 2 keys 'Data' and 'Name'
|
|
832
|
|
833
|
|
834 =cut
|
|
835
|
|
836 sub end_element {
|
|
837 my ($self,$data) = @_;
|
|
838 my $nm = $data->{'Name'};
|
|
839 my $rc;
|
|
840 # Hsp are sort of weird, in that they end when another
|
|
841 # object begins so have to detect this in end_element for now
|
|
842 if( $nm eq 'Hsp' ) {
|
|
843 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
|
|
844 $self->element({'Name' => $_,
|
|
845 'Data' => $self->{'_last_hspdata'}->{$_}});
|
|
846 }
|
|
847 $self->{'_last_hspdata'} = {}
|
|
848 }
|
|
849
|
|
850 if( my $type = $MODEMAP{$nm} ) {
|
|
851 if( $self->_eventHandler->will_handle($type) ) {
|
|
852 my $func = sprintf("end_%s",lc $type);
|
|
853 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
|
|
854 $self->{'_values'});
|
|
855 }
|
|
856 shift @{$self->{'_elements'}};
|
|
857
|
|
858 } elsif( $MAPPING{$nm} ) {
|
|
859 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
|
|
860 my $key = (keys %{$MAPPING{$nm}})[0];
|
|
861 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
|
|
862 } else {
|
|
863 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
|
|
864 }
|
|
865 } else {
|
|
866 $self->warn( "unknown nm $nm, ignoring\n");
|
|
867 }
|
|
868 $self->{'_last_data'} = ''; # remove read data if we are at
|
|
869 # end of an element
|
|
870 $self->{'_result'} = $rc if( $nm eq 'FastaOutput' );
|
|
871 return $rc;
|
|
872
|
|
873 }
|
|
874
|
|
875 =head2 element
|
|
876
|
|
877 Title : element
|
|
878 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
|
|
879 Function: Convience method that calls start_element, characters, end_element
|
|
880 Returns : none
|
|
881 Args : Hash ref with the keys 'Name' and 'Data'
|
|
882
|
|
883
|
|
884 =cut
|
|
885
|
|
886 sub element{
|
|
887 my ($self,$data) = @_;
|
|
888 $self->start_element($data);
|
|
889 $self->characters($data);
|
|
890 $self->end_element($data);
|
|
891 }
|
|
892
|
|
893
|
|
894 =head2 characters
|
|
895
|
|
896 Title : characters
|
|
897 Usage : $eventgenerator->characters($str)
|
|
898 Function: Send a character events
|
|
899 Returns : none
|
|
900 Args : string
|
|
901
|
|
902
|
|
903 =cut
|
|
904
|
|
905 sub characters{
|
|
906 my ($self,$data) = @_;
|
|
907
|
|
908 return unless ( defined $data->{'Data'} );
|
|
909 if( $data->{'Data'} =~ /^\s+$/ ) {
|
|
910 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
|
|
911 }
|
|
912
|
|
913 if( $self->in_element('hsp') &&
|
|
914 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
|
|
915
|
|
916 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
|
|
917 }
|
|
918
|
|
919 $self->{'_last_data'} = $data->{'Data'};
|
|
920 }
|
|
921
|
|
922 =head2 _mode
|
|
923
|
|
924 Title : _mode
|
|
925 Usage : $obj->_mode($newval)
|
|
926 Function:
|
|
927 Example :
|
|
928 Returns : value of _mode
|
|
929 Args : newvalue (optional)
|
|
930
|
|
931
|
|
932 =cut
|
|
933
|
|
934 sub _mode{
|
|
935 my ($self,$value) = @_;
|
|
936 if( defined $value) {
|
|
937 $self->{'_mode'} = $value;
|
|
938 }
|
|
939 return $self->{'_mode'};
|
|
940 }
|
|
941
|
|
942 =head2 within_element
|
|
943
|
|
944 Title : within_element
|
|
945 Usage : if( $eventgenerator->within_element($element) ) {}
|
|
946 Function: Test if we are within a particular element
|
|
947 This is different than 'in' because within can be tested
|
|
948 for a whole block.
|
|
949 Returns : boolean
|
|
950 Args : string element name
|
|
951
|
|
952
|
|
953 =cut
|
|
954
|
|
955 sub within_element{
|
|
956 my ($self,$name) = @_;
|
|
957 return 0 if ( ! defined $name &&
|
|
958 ! defined $self->{'_elements'} ||
|
|
959 scalar @{$self->{'_elements'}} == 0) ;
|
|
960 foreach ( @{$self->{'_elements'}} ) {
|
|
961 if( $_ eq $name || $_ eq $MODEMAP{$name} ) {
|
|
962 return 1;
|
|
963 }
|
|
964 }
|
|
965 return 0;
|
|
966 }
|
|
967
|
|
968 =head2 in_element
|
|
969
|
|
970 Title : in_element
|
|
971 Usage : if( $eventgenerator->in_element($element) ) {}
|
|
972 Function: Test if we are in a particular element
|
|
973 This is different than 'in' because within can be tested
|
|
974 for a whole block.
|
|
975 Returns : boolean
|
|
976 Args : string element name
|
|
977
|
|
978
|
|
979 =cut
|
|
980
|
|
981 sub in_element{
|
|
982 my ($self,$name) = @_;
|
|
983 return 0 if ! defined $self->{'_elements'}->[0];
|
|
984 return ( $self->{'_elements'}->[0] eq $name ||
|
|
985 (exists $MODEMAP{$name} && $self->{'_elements'}->[0] eq $MODEMAP{$name})
|
|
986 );
|
|
987 }
|
|
988
|
|
989
|
|
990 =head2 start_document
|
|
991
|
|
992 Title : start_document
|
|
993 Usage : $eventgenerator->start_document
|
|
994 Function: Handles a start document event
|
|
995 Returns : none
|
|
996 Args : none
|
|
997
|
|
998
|
|
999 =cut
|
|
1000
|
|
1001 sub start_document{
|
|
1002 my ($self) = @_;
|
|
1003 $self->{'_lasttype'} = '';
|
|
1004 $self->{'_values'} = {};
|
|
1005 $self->{'_result'}= undef;
|
|
1006 $self->{'_mode'} = '';
|
|
1007 $self->{'_elements'} = [];
|
|
1008 }
|
|
1009
|
|
1010
|
|
1011 =head2 end_document
|
|
1012
|
|
1013 Title : end_document
|
|
1014 Usage : $eventgenerator->end_document
|
|
1015 Function: Handles an end document event
|
|
1016 Returns : Bio::Search::Result::ResultI object
|
|
1017 Args : none
|
|
1018
|
|
1019
|
|
1020 =cut
|
|
1021
|
|
1022 sub end_document{
|
|
1023 my ($self,@args) = @_;
|
|
1024 return $self->{'_result'};
|
|
1025 }
|
|
1026
|
|
1027 =head2 idlength
|
|
1028
|
|
1029 Title : idlength
|
|
1030 Usage : $obj->idlength($newval)
|
|
1031 Function: Internal storage of the length of the ID desc
|
|
1032 in the HSP alignment blocks. Defaults to
|
|
1033 $IDLENGTH class variable value
|
|
1034 Returns : value of idlength
|
|
1035 Args : newvalue (optional)
|
|
1036
|
|
1037
|
|
1038 =cut
|
|
1039
|
|
1040 sub idlength{
|
|
1041 my ($self,$value) = @_;
|
|
1042 if( defined $value) {
|
|
1043 $self->{'_idlength'} = $value;
|
|
1044 }
|
|
1045 return $self->{'_idlength'} || $IDLENGTH;
|
|
1046 }
|
|
1047
|
|
1048
|
|
1049 =head2 result_count
|
|
1050
|
|
1051 Title : result_count
|
|
1052 Usage : my $count = $searchio->result_count
|
|
1053 Function: Returns the number of results we have processed
|
|
1054 Returns : integer
|
|
1055 Args : none
|
|
1056
|
|
1057
|
|
1058 =cut
|
|
1059
|
|
1060 sub result_count {
|
|
1061 my $self = shift;
|
|
1062 return $self->{'_result_count'};
|
|
1063 }
|
|
1064
|
|
1065 1;
|
|
1066
|