comparison variant_effect_predictor/Bio/SearchIO/waba.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: waba.pm,v 1.8 2002/12/11 22:12:32 jason Exp $
2 #
3 # BioPerl module for Bio::SearchIO::waba
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::SearchIO::waba - SearchIO parser for Jim Kent WABA program
16 alignment output
17
18 =head1 SYNOPSIS
19
20 # do not use this object directly, rather through Bio::SearchIO
21
22 use Bio::SearchIO;
23 my $in = new Bio::SearchIO(-format => 'waba',
24 -file => 'output.wab');
25 while( my $result = $in->next_result ) {
26 while( my $hit = $result->next_hit ) {
27 while( my $hsp = $result->next_hsp ) {
28
29 }
30 }
31 }
32
33 =head1 DESCRIPTION
34
35 This parser will process the waba output (NOT the human readable format).
36
37 =head1 FEEDBACK
38
39 =head2 Mailing Lists
40
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
44
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/MailList.shtml - About the mailing lists
47
48 =head2 Reporting Bugs
49
50 Report bugs to the Bioperl bug tracking system to help us keep track
51 of the bugs and their resolution. Bug reports can be submitted via
52 email or the web:
53
54 bioperl-bugs@bioperl.org
55 http://bugzilla.bioperl.org/
56
57 =head1 AUTHOR - Jason Stajich
58
59 Email jason@bioperl.org
60
61 Describe contact details here
62
63 =head1 CONTRIBUTORS
64
65 Additional contributors names and emails here
66
67 =head1 APPENDIX
68
69 The rest of the documentation details each of the object methods.
70 Internal methods are usually preceded with a _
71
72 =cut
73
74
75 # Let the code begin...
76
77
78 package Bio::SearchIO::waba;
79 use vars qw(@ISA %MODEMAP %MAPPING @STATES);
80 use strict;
81
82 # Object preamble - inherits from Bio::Root::Root
83
84 use Bio::SearchIO;
85
86 use POSIX;
87
88 BEGIN {
89 # mapping of NCBI Blast terms to Bioperl hash keys
90 %MODEMAP = ('WABAOutput' => 'result',
91 'Hit' => 'hit',
92 'Hsp' => 'hsp'
93 );
94 @STATES = qw(Hsp_qseq Hsp_hseq Hsp_stateseq);
95 %MAPPING =
96 (
97 'Hsp_query-from'=> 'HSP-query_start',
98 'Hsp_query-to' => 'HSP-query_end',
99 'Hsp_hit-from' => 'HSP-hit_start',
100 'Hsp_hit-to' => 'HSP-hit_end',
101 'Hsp_qseq' => 'HSP-query_seq',
102 'Hsp_hseq' => 'HSP-hit_seq',
103 'Hsp_midline' => 'HSP-homology_seq',
104 'Hsp_stateseq' => 'HSP-hmmstate_seq',
105 'Hsp_align-len' => 'HSP-hsp_length',
106
107 'Hit_id' => 'HIT-name',
108 'Hit_accession' => 'HIT-accession',
109
110 'WABAOutput_program' => 'RESULT-algorithm_name',
111 'WABAOutput_version' => 'RESULT-algorithm_version',
112 'WABAOutput_query-def'=> 'RESULT-query_name',
113 'WABAOutput_query-db' => 'RESULT-query_database',
114 'WABAOutput_db' => 'RESULT-database_name',
115 );
116 }
117
118
119 @ISA = qw(Bio::SearchIO );
120
121 =head2 new
122
123 Title : new
124 Usage : my $obj = new Bio::SearchIO::waba();
125 Function: Builds a new Bio::SearchIO::waba object
126 Returns : Bio::SearchIO::waba
127 Args : see Bio::SearchIO
128
129 =cut
130
131 sub _initialize {
132 my ($self,@args) = @_;
133 $self->SUPER::_initialize(@args);
134 $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::WABAResult'));
135
136 $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::WABAHSP'));
137 }
138
139
140 =head2 next_result
141
142 Title : next_result
143 Usage : my $hit = $searchio->next_result;
144 Function: Returns the next Result from a search
145 Returns : Bio::Search::Result::ResultI object
146 Args : none
147
148 =cut
149
150 sub next_result{
151 my ($self) = @_;
152
153 my ($curquery,$curhit);
154 my $state = -1;
155 $self->start_document();
156 my @hit_signifs;
157 while( defined ($_ = $self->_readline )) {
158
159 if( $state == -1 ) {
160 my ($qid, $qhspid,$qpercent, $junk,
161 $alnlen,$qdb,$qacc,$qstart,$qend,$qstrand,
162 $hitdb,$hacc,$hstart,$hend,
163 $hstrand) =
164 ( /^(\S+)\.(\S+)\s+align\s+ # get the queryid
165 (\d+(\.\d+)?)\%\s+ # get the percentage
166 of\s+(\d+)\s+ # get the length of the alignment
167 (\S+)\s+ # this is the query database
168 (\S+):(\d+)\-(\d+) # The accession:start-end for query
169 \s+([\-\+]) # query strand
170 \s+(\S+)\. # hit db
171 (\S+):(\d+)\-(\d+) # The accession:start-end for hit
172 \s+([\-\+])\s*$ # hit strand
173 /ox );
174
175 # Curses. Jim's code is 0 based, the following is to readjust
176 $hstart++; $hend++; $qstart++; $qend++;
177
178 if( ! defined $alnlen ) {
179 $self->warn("Unable to parse the rest of the WABA alignment info for: $_");
180 last;
181 }
182 $self->{'_reporttype'} = 'WABA'; # hardcoded - only
183 # one type of WABA AFAIK
184 if( defined $curquery &&
185 $curquery ne $qid ) {
186 $self->end_element({'Name' => 'Hit'});
187 $self->_pushback($_);
188 $self->end_element({'Name' => 'WABAOutput'});
189 return $self->end_document();
190 }
191
192 if( defined $curhit &&
193 $curhit ne $hacc) {
194 # slight duplication here -- keep these in SYNC
195 $self->end_element({'Name' => 'Hit'});
196 $self->start_element({'Name' => 'Hit'});
197 $self->element({'Name' => 'Hit_id',
198 'Data' => $hacc});
199 $self->element({'Name' => 'Hit_accession',
200 'Data' => $hacc});
201
202 } elsif ( ! defined $curquery ) {
203 $self->start_element({'Name' => 'WABAOutput'});
204 $self->{'_result_count'}++;
205 $self->element({'Name' => 'WABAOutput_query-def',
206 'Data' => $qid });
207 $self->element({'Name' => 'WABAOutput_program',
208 'Data' => 'WABA'});
209 $self->element({'Name' => 'WABAOutput_query-db',
210 'Data' => $qdb});
211 $self->element({'Name' => 'WABAOutput_db',
212 'Data' => $hitdb});
213
214 # slight duplication here -- keep these N'SYNC ;-)
215 $self->start_element({'Name' => 'Hit'});
216 $self->element({'Name' => 'Hit_id',
217 'Data' => $hacc});
218 $self->element({'Name' => 'Hit_accession',
219 'Data' => $hacc});
220 }
221
222
223 # strand is inferred by start,end values
224 # in the Result Builder
225 if( $qstrand eq '-' ) {
226 ($qstart,$qend) = ($qend,$qstart);
227 }
228 if( $hstrand eq '-' ) {
229 ($hstart,$hend) = ($hend,$hstart);
230 }
231
232 $self->start_element({'Name' => 'Hsp'});
233 $self->element({'Name' => 'Hsp_query-from',
234 'Data' => $qstart});
235 $self->element({'Name' => 'Hsp_query-to',
236 'Data' => $qend});
237 $self->element({'Name' => 'Hsp_hit-from',
238 'Data' => $hstart});
239 $self->element({'Name' => 'Hsp_hit-to',
240 'Data' => $hend});
241 $self->element({'Name' => 'Hsp_align-len',
242 'Data' => $alnlen});
243
244 $curquery = $qid;
245 $curhit = $hacc;
246 $state = 0;
247 } elsif( ! defined $curquery ) {
248 $self->warn("skipping because no Hit begin line was recognized\n$_") if( $_ !~ /^\s+$/ );
249 next;
250 } else {
251 chomp;
252 $self->element({'Name' => $STATES[$state++],
253 'Data' => $_});
254 if( $state >= scalar @STATES ) {
255 $state = -1;
256 $self->end_element({'Name' => 'Hsp'});
257 }
258 }
259 }
260 if( defined $curquery ) {
261 $self->end_element({'Name' => 'Hit'});
262 $self->end_element({'Name' => 'WABAOutput'});
263 return $self->end_document();
264 }
265 return undef;
266 }
267
268 =head2 start_element
269
270 Title : start_element
271 Usage : $eventgenerator->start_element
272 Function: Handles a start element event
273 Returns : none
274 Args : hashref with at least 2 keys 'Data' and 'Name'
275
276
277 =cut
278
279 sub start_element{
280 my ($self,$data) = @_;
281 # we currently don't care about attributes
282 my $nm = $data->{'Name'};
283 if( my $type = $MODEMAP{$nm} ) {
284 $self->_mode($type);
285 if( $self->_eventHandler->will_handle($type) ) {
286 my $func = sprintf("start_%s",lc $type);
287 $self->_eventHandler->$func($data->{'Attributes'});
288 }
289 unshift @{$self->{'_elements'}}, $type;
290 }
291 if($nm eq 'WABAOutput') {
292 $self->{'_values'} = {};
293 $self->{'_result'}= undef;
294 $self->{'_mode'} = '';
295 }
296
297 }
298
299 =head2 end_element
300
301 Title : start_element
302 Usage : $eventgenerator->end_element
303 Function: Handles an end element event
304 Returns : none
305 Args : hashref with at least 2 keys 'Data' and 'Name'
306
307
308 =cut
309
310 sub end_element {
311 my ($self,$data) = @_;
312 my $nm = $data->{'Name'};
313 my $rc;
314 # Hsp are sort of weird, in that they end when another
315 # object begins so have to detect this in end_element for now
316 if( $nm eq 'Hsp' ) {
317 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
318 $self->element({'Name' => $_,
319 'Data' => $self->{'_last_hspdata'}->{$_}});
320 }
321 $self->{'_last_hspdata'} = {}
322 }
323
324 if( my $type = $MODEMAP{$nm} ) {
325 if( $self->_eventHandler->will_handle($type) ) {
326 my $func = sprintf("end_%s",lc $type);
327 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
328 $self->{'_values'});
329 }
330 shift @{$self->{'_elements'}};
331
332 } elsif( $MAPPING{$nm} ) {
333 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
334 my $key = (keys %{$MAPPING{$nm}})[0];
335 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
336 } else {
337 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
338 }
339 } else {
340 $self->warn( "unknown nm $nm ignoring\n");
341 }
342 $self->{'_last_data'} = ''; # remove read data if we are at
343 # end of an element
344 $self->{'_result'} = $rc if( $nm eq 'WABAOutput' );
345 return $rc;
346
347 }
348
349 =head2 element
350
351 Title : element
352 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
353 Function: Convience method that calls start_element, characters, end_element
354 Returns : none
355 Args : Hash ref with the keys 'Name' and 'Data'
356
357
358 =cut
359
360 sub element{
361 my ($self,$data) = @_;
362 $self->start_element($data);
363 $self->characters($data);
364 $self->end_element($data);
365 }
366
367
368 =head2 characters
369
370 Title : characters
371 Usage : $eventgenerator->characters($str)
372 Function: Send a character events
373 Returns : none
374 Args : string
375
376
377 =cut
378
379 sub characters{
380 my ($self,$data) = @_;
381
382 return unless ( defined $data->{'Data'} );
383 if( $data->{'Data'} =~ /^\s+$/ ) {
384 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
385 }
386
387 if( $self->in_element('hsp') &&
388 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
389
390 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
391 }
392
393 $self->{'_last_data'} = $data->{'Data'};
394 }
395
396 =head2 _mode
397
398 Title : _mode
399 Usage : $obj->_mode($newval)
400 Function:
401 Example :
402 Returns : value of _mode
403 Args : newvalue (optional)
404
405
406 =cut
407
408 sub _mode{
409 my ($self,$value) = @_;
410 if( defined $value) {
411 $self->{'_mode'} = $value;
412 }
413 return $self->{'_mode'};
414 }
415
416 =head2 within_element
417
418 Title : within_element
419 Usage : if( $eventgenerator->within_element($element) ) {}
420 Function: Test if we are within a particular element
421 This is different than 'in' because within can be tested
422 for a whole block.
423 Returns : boolean
424 Args : string element name
425
426
427 =cut
428
429 sub within_element{
430 my ($self,$name) = @_;
431 return 0 if ( ! defined $name &&
432 ! defined $self->{'_elements'} ||
433 scalar @{$self->{'_elements'}} == 0) ;
434 foreach ( @{$self->{'_elements'}} ) {
435 if( $_ eq $name ) {
436 return 1;
437 }
438 }
439 return 0;
440 }
441
442 =head2 in_element
443
444 Title : in_element
445 Usage : if( $eventgenerator->in_element($element) ) {}
446 Function: Test if we are in a particular element
447 This is different than 'in' because within can be tested
448 for a whole block.
449 Returns : boolean
450 Args : string element name
451
452
453 =cut
454
455 sub in_element{
456 my ($self,$name) = @_;
457 return 0 if ! defined $self->{'_elements'}->[0];
458 return ( $self->{'_elements'}->[0] eq $name)
459 }
460
461
462 =head2 start_document
463
464 Title : start_document
465 Usage : $eventgenerator->start_document
466 Function: Handles a start document event
467 Returns : none
468 Args : none
469
470
471 =cut
472
473 sub start_document{
474 my ($self) = @_;
475 $self->{'_lasttype'} = '';
476 $self->{'_values'} = {};
477 $self->{'_result'}= undef;
478 $self->{'_mode'} = '';
479 $self->{'_elements'} = [];
480 }
481
482
483 =head2 end_document
484
485 Title : end_document
486 Usage : $eventgenerator->end_document
487 Function: Handles an end document event
488 Returns : Bio::Search::Result::ResultI object
489 Args : none
490
491
492 =cut
493
494 sub end_document{
495 my ($self,@args) = @_;
496 return $self->{'_result'};
497 }
498
499 =head2 result_count
500
501 Title : result_count
502 Usage : my $count = $searchio->result_count
503 Function: Returns the number of results we have processed
504 Returns : integer
505 Args : none
506
507
508 =cut
509
510 sub result_count {
511 my $self = shift;
512 return $self->{'_result_count'};
513 }
514
515 sub report_count { shift->result_count }
516
517 1;