comparison variant_effect_predictor/Bio/SearchIO/exonerate.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: exonerate.pm,v 1.3.2.3 2003/03/29 20:30:54 jason Exp $
2 #
3 # BioPerl module for Bio::SearchIO::exonerate
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::SearchIO::exonerate - parser for Exonerate
16
17 =head1 SYNOPSIS
18
19 # do not use this module directly, it is a driver for SearchIO
20
21 use Bio::SearchIO;
22 my $searchio = new Bio::SearchIO(-file => 'file.exonerate',
23 -format => 'exonerate');
24
25
26 while( my $r = $searchio->next_result ) {
27 print $r->query_name, "\n";
28 }
29
30 =head1 DESCRIPTION
31
32 This is a driver for the SearchIO system for parsing Exonerate (Guy
33 Slater) output. You can get Exonerate at
34 http://cvsweb.sanger.ac.uk/cgi-bin/cvsweb.cgi/exonerate/?cvsroot=Ensembl
35 [until Guy puts up a Web reference,publication for it.]).
36
37 An optional parameter -min_intron is supported by the L<new>
38 initialization method. This is if you run Exonerate with a different
39 minimum intron length (default is 30) the parser will be able to
40 detect the difference between standard deletions and an intron. Still
41 some room to play with there that might cause this to get
42 misinterpreted that has not been fully tested or explored.
43
44 =head1 FEEDBACK
45
46 =head2 Mailing Lists
47
48 User feedback is an integral part of the evolution of this and other
49 Bioperl modules. Send your comments and suggestions preferably to
50 the Bioperl mailing list. Your participation is much appreciated.
51
52 bioperl-l@bioperl.org - General discussion
53 http://bioperl.org/MailList.shtml - About the mailing lists
54
55 =head2 Reporting Bugs
56
57 Report bugs to the Bioperl bug tracking system to help us keep track
58 of the bugs and their resolution. Bug reports can be submitted via
59 email or the web:
60
61 bioperl-bugs@bioperl.org
62 http://bioperl.org/bioperl-bugs/
63
64 =head1 AUTHOR - Jason Stajich
65
66 Email jason@bioperl.org
67
68 Describe contact details here
69
70 =head1 CONTRIBUTORS
71
72 Additional contributors names and emails here
73
74 =head1 APPENDIX
75
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
78
79 =cut
80
81
82 # Let the code begin...
83
84
85 package Bio::SearchIO::exonerate;
86 use strict;
87 use vars qw(@ISA @STATES %MAPPING %MODEMAP $DEFAULT_WRITER_CLASS $MIN_INTRON);
88 use Bio::SearchIO;
89
90 @ISA = qw(Bio::SearchIO );
91
92 use POSIX;
93
94
95 %MODEMAP = ('ExonerateOutput' => 'result',
96 'Hit' => 'hit',
97 'Hsp' => 'hsp'
98 );
99 %MAPPING =
100 (
101 'Hsp_query-from'=> 'HSP-query_start',
102 'Hsp_query-to' => 'HSP-query_end',
103 'Hsp_hit-from' => 'HSP-hit_start',
104 'Hsp_hit-to' => 'HSP-hit_end',
105 'Hsp_qseq' => 'HSP-query_seq',
106 'Hsp_hseq' => 'HSP-hit_seq',
107 'Hsp_midline' => 'HSP-homology_seq',
108 'Hsp_score' => 'HSP-score',
109 'Hsp_qlength' => 'HSP-query_length',
110 'Hsp_hlength' => 'HSP-hit_length',
111 'Hsp_align-len' => 'HSP-hsp_length',
112 'Hsp_identity' => 'HSP-identical',
113 'Hsp_gaps' => 'HSP-hsp_gaps',
114 'Hsp_hitgaps' => 'HSP-hit_gaps',
115 'Hsp_querygaps' => 'HSP-query_gaps',
116
117 'Hit_id' => 'HIT-name',
118 'Hit_desc' => 'HIT-description',
119 'Hit_len' => 'HIT-length',
120 'Hit_score' => 'HIT-score',
121
122 'ExonerateOutput_program' => 'RESULT-algorithm_name',
123 'ExonerateOutput_query-def' => 'RESULT-query_name',
124 'ExonerateOutput_query-desc'=> 'RESULT-query_description',
125 'ExonerateOutput_query-len' => 'RESULT-query_length',
126 );
127
128 $DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
129
130 $MIN_INTRON=30; # This is the minimum intron size
131
132 =head2 new
133
134 Title : new
135 Usage : my $obj = new Bio::SearchIO::exonerate();
136 Function: Builds a new Bio::SearchIO::exonerate object
137 Returns : an instance of Bio::SearchIO::exonerate
138 Args :
139
140
141 =cut
142
143 sub new {
144 my ($class) = shift;
145 my $self = $class->SUPER::new(@_);
146
147 my ($min_intron) = $self->_rearrange([qw(MIN_INTRON)], @_);
148 if( $min_intron ) {
149 $MIN_INTRON = $min_intron;
150 }
151 $self;
152 }
153
154 =head2 next_result
155
156 Title : next_result
157 Usage : my $hit = $searchio->next_result;
158 Function: Returns the next Result from a search
159 Returns : Bio::Search::Result::ResultI object
160 Args : none
161
162 =cut
163
164 sub next_result{
165 my ($self) = @_;
166 $self->{'_last_data'} = '';
167 my ($reporttype,$seenquery,$reportline);
168 $self->start_document();
169 my @hit_signifs;
170 my $seentop;
171 my (@q_ex, @m_ex, @h_ex); ## gc addition
172 while( defined($_ = $self->_readline) ) {
173 if( /^Query:\s+(\S+)(\s+(.+))?/ ) {
174 if( $seentop ) {
175 $self->end_element({'Name' => 'ExonerateOutput'});
176 $self->_pushback($_);
177 return $self->end_document();
178 }
179 $seentop = 1;
180 my ($nm,$desc) = ($1,$2);
181 chomp($desc) if defined $desc;
182 $self->{'_result_count'}++;
183 $self->start_element({'Name' => 'ExonerateOutput'});
184 $self->element({'Name' => 'ExonerateOutput_query-def',
185 'Data' => $nm });
186 $self->element({'Name' => 'ExonerateOutput_query-desc',
187 'Data' => $desc });
188 $self->element({'Name' => 'ExonerateOutput_program',
189 'Data' => 'Exonerate' });
190
191 } elsif ( /^Target:\s+(\S+)(\s+(.+))?/ ) {
192 my ($nm,$desc) = ($1,$2);
193 chomp($desc) if defined $desc;
194 $self->start_element({'Name' => 'Hit'});
195 $self->element({'Name' => 'Hit_id',
196 'Data' => $nm});
197 $self->element({'Name' => 'Hit_desc',
198 'Data' => $desc});
199 } elsif( s/^cigar:\s+(\S+)\s+ # query sequence id
200 (\d+)\s+(\d+)\s+([\-\+])\s+ # query start-end-strand
201 (\S+)\s+ # target sequence id
202 (\d+)\s+(\d+)\s+([\-\+])\s+ # target start-end-strand
203 (\d+)\s+ # score
204 //ox ) {
205
206 ## gc note:
207 ## $qe and $he are no longer used for calculating the ends,
208 ## just the $qs and $hs values and the alignment and insert lenghts
209 my ($qs,$qe,$qstrand) = ($2,$3,$4);
210 my ($hs,$he,$hstrand) = ($6,$7,$8);
211 my $score = $9;
212 # $self->element({'Name' => 'ExonerateOutput_query-len',
213 # 'Data' => $qe});
214 # $self->element({'Name' => 'Hit_len',
215 # 'Data' => $he});
216
217 my @rest = split;
218 if( $qstrand eq '-' ) {
219 $qstrand = -1;
220 ($qs,$qe) = ($qe,$qs); # flip-flop if we're on opp strand
221 $qs--; $qe++;
222 } else { $qstrand = 1; }
223 if( $hstrand eq '-' ) {
224 $hstrand = -1;
225 ($hs,$he) = ($he,$hs); # flip-flop if we're on opp strand
226 $hs--; $he++;
227 } else { $hstrand = 1; }
228 # okay let's do this right and generate a set of HSPs
229 # from the cigar line
230
231 ## gc note:
232 ## add one because these values are zero-based
233 ## this calculation was originally done lower in the code,
234 ## but it's clearer to do it just once at the start
235 $qs++;
236 $hs++;
237
238 my ($aln_len,$inserts,$deletes) = (0,0,0);
239 while( @rest >= 2 ) {
240 my ($state,$len) = (shift @rest, shift @rest);
241 if( $state eq 'I' ) {
242 $inserts+=$len;
243 } elsif( $state eq 'D' ) {
244 if( $len >= $MIN_INTRON ) {
245 $self->start_element({'Name' => 'Hsp'});
246
247 $self->element({'Name' => 'Hsp_score',
248 'Data' => $score});
249 $self->element({'Name' => 'Hsp_align-len',
250 'Data' => $aln_len});
251 $self->element({'Name' => 'Hsp_identity',
252 'Data' => $aln_len -
253 ($inserts + $deletes)});
254
255 # HSP ends where the other begins
256 $self->element({'Name' => 'Hsp_query-from',
257 'Data' => $qs});
258 ## gc note:
259 ## $qs is now the start of the next hsp
260 ## the end of this hsp is 1 before this position
261 ## (or 1 after in case of reverse strand)
262 $qs += $aln_len*$qstrand;
263 $self->element({'Name' => 'Hsp_query-to',
264 'Data' => $qs - ($qstrand*1)});
265
266 $hs += $deletes*$hstrand;
267 $self->element({'Name' => 'Hsp_hit-from',
268 'Data' => $hs});
269 $hs += $aln_len*$hstrand;
270 $self->element({'Name' => 'Hsp_hit-to',
271 'Data' => $hs-($hstrand*1)});
272
273 $self->element({'Name' => 'Hsp_align-len',
274 'Data' => $aln_len + $inserts
275 + $deletes});
276 $self->element({'Name' => 'Hsp_identity',
277 'Data' => $aln_len });
278
279 $self->element({'Name' => 'Hsp_gaps',
280 'Data' => $inserts + $deletes});
281 $self->element({'Name' => 'Hsp_querygaps',
282 'Data' => $inserts});
283 $self->element({'Name' => 'Hsp_hitgaps',
284 'Data' => $deletes});
285
286 ## gc addition start
287
288 $self->element({'Name' => 'Hsp_qseq',
289 'Data' => shift @q_ex,
290 });
291 $self->element({'Name' => 'Hsp_hseq',
292 'Data' => shift @h_ex,
293 });
294 $self->element({'Name' => 'Hsp_midline',
295 'Data' => shift @m_ex,
296 });
297 ## gc addition end
298 $self->end_element({'Name' => 'Hsp'});
299
300 $aln_len = $inserts = $deletes = 0;
301 }
302 $deletes+=$len;
303 } else {
304 $aln_len += $len;
305 }
306 }
307 $self->start_element({'Name' => 'Hsp'});
308
309 ## gc addition start
310
311 $self->element({'Name' => 'Hsp_qseq',
312 'Data' => shift @q_ex,
313 });
314 $self->element({'Name' => 'Hsp_hseq',
315 'Data' => shift @h_ex,
316 });
317 $self->element({'Name' => 'Hsp_midline',
318 'Data' => shift @m_ex,
319 });
320 ## gc addition end
321
322 $self->element({'Name' => 'Hsp_score',
323 'Data' => $score});
324
325 $self->element({'Name' => 'Hsp_query-from',
326 'Data' => $qs});
327
328 $qs += $aln_len*$qstrand;
329 $self->element({'Name' => 'Hsp_query-to',
330 'Data' => $qs - ($qstrand*1)});
331
332 $hs += $deletes*$hstrand;
333 $self->element({'Name' => 'Hsp_hit-from',
334 'Data' => $hs});
335 $hs += $aln_len*$hstrand;
336 $self->element({'Name' => 'Hsp_hit-to',
337 'Data' => $hs -($hstrand*1)});
338
339 $self->element({'Name' => 'Hsp_align-len',
340 'Data' => $aln_len});
341
342 $self->element({'Name' => 'Hsp_identity',
343 'Data' => $aln_len - ($inserts + $deletes)});
344
345 $self->element({'Name' => 'Hsp_gaps',
346 'Data' => $inserts + $deletes});
347
348 $self->element({'Name' => 'Hsp_querygaps',
349 'Data' => $inserts});
350 $self->element({'Name' => 'Hsp_hitgaps',
351 'Data' => $deletes});
352 $self->end_element({'Name' => 'Hsp'});
353 $self->element({'Name' => 'Hit_score',
354 'Data' => $score});
355 $self->end_element({'Name' => 'Hit'});
356 $self->end_element({'Name' => 'ExonerateOutput'});
357
358 return $self->end_document();
359 } else {
360 }
361 }
362 return $self->end_document() if( $seentop );
363 }
364
365 =head2 start_element
366
367 Title : start_element
368 Usage : $eventgenerator->start_element
369 Function: Handles a start element event
370 Returns : none
371 Args : hashref with at least 2 keys 'Data' and 'Name'
372
373
374 =cut
375
376 sub start_element{
377 my ($self,$data) = @_;
378 # we currently don't care about attributes
379 my $nm = $data->{'Name'};
380 my $type = $MODEMAP{$nm};
381
382 if( $type ) {
383 if( $self->_eventHandler->will_handle($type) ) {
384 my $func = sprintf("start_%s",lc $type);
385 $self->_eventHandler->$func($data->{'Attributes'});
386 }
387 unshift @{$self->{'_elements'}}, $type;
388
389 if($type eq 'result') {
390 $self->{'_values'} = {};
391 $self->{'_result'}= undef;
392 }
393 }
394
395 }
396
397 =head2 end_element
398
399 Title : start_element
400 Usage : $eventgenerator->end_element
401 Function: Handles an end element event
402 Returns : none
403 Args : hashref with at least 2 keys 'Data' and 'Name'
404
405
406 =cut
407
408 sub end_element {
409 my ($self,$data) = @_;
410 my $nm = $data->{'Name'};
411 my $type = $MODEMAP{$nm};
412 my $rc;
413
414 if( $type = $MODEMAP{$nm} ) {
415 if( $self->_eventHandler->will_handle($type) ) {
416 my $func = sprintf("end_%s",lc $type);
417 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
418 $self->{'_values'});
419 }
420 shift @{$self->{'_elements'}};
421
422 } elsif( $MAPPING{$nm} ) {
423
424 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
425 my $key = (keys %{$MAPPING{$nm}})[0];
426 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
427 } else {
428 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
429 }
430 } else {
431 $self->debug( "unknown nm $nm, ignoring\n");
432 }
433 $self->{'_last_data'} = ''; # remove read data if we are at
434 # end of an element
435 $self->{'_result'} = $rc if( defined $type && $type eq 'result' );
436 return $rc;
437 }
438
439 =head2 element
440
441 Title : element
442 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
443 Function: Convience method that calls start_element, characters, end_element
444 Returns : none
445 Args : Hash ref with the keys 'Name' and 'Data'
446
447
448 =cut
449
450 sub element{
451 my ($self,$data) = @_;
452 $self->start_element($data);
453 $self->characters($data);
454 $self->end_element($data);
455 }
456
457 =head2 characters
458
459 Title : characters
460 Usage : $eventgenerator->characters($str)
461 Function: Send a character events
462 Returns : none
463 Args : string
464
465
466 =cut
467
468 sub characters{
469 my ($self,$data) = @_;
470
471 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
472
473 $self->{'_last_data'} = $data->{'Data'};
474 }
475
476 =head2 within_element
477
478 Title : within_element
479 Usage : if( $eventgenerator->within_element($element) ) {}
480 Function: Test if we are within a particular element
481 This is different than 'in' because within can be tested
482 for a whole block.
483 Returns : boolean
484 Args : string element name
485
486
487 =cut
488
489 sub within_element{
490 my ($self,$name) = @_;
491 return 0 if ( ! defined $name &&
492 ! defined $self->{'_elements'} ||
493 scalar @{$self->{'_elements'}} == 0) ;
494 foreach ( @{$self->{'_elements'}} ) {
495 if( $_ eq $name ) {
496 return 1;
497 }
498 }
499 return 0;
500 }
501
502
503 =head2 in_element
504
505 Title : in_element
506 Usage : if( $eventgenerator->in_element($element) ) {}
507 Function: Test if we are in a particular element
508 This is different than 'in' because within can be tested
509 for a whole block.
510 Returns : boolean
511 Args : string element name
512
513
514 =cut
515
516 sub in_element{
517 my ($self,$name) = @_;
518 return 0 if ! defined $self->{'_elements'}->[0];
519 return ( $self->{'_elements'}->[0] eq $name)
520 }
521
522 =head2 start_document
523
524 Title : start_document
525 Usage : $eventgenerator->start_document
526 Function: Handle a start document event
527 Returns : none
528 Args : none
529
530
531 =cut
532
533 sub start_document{
534 my ($self) = @_;
535 $self->{'_lasttype'} = '';
536 $self->{'_values'} = {};
537 $self->{'_result'}= undef;
538 $self->{'_elements'} = [];
539 $self->{'_reporttype'} = 'exonerate';
540 }
541
542
543 =head2 end_document
544
545 Title : end_document
546 Usage : $eventgenerator->end_document
547 Function: Handles an end document event
548 Returns : Bio::Search::Result::ResultI object
549 Args : none
550
551
552 =cut
553
554 sub end_document{
555 my ($self,@args) = @_;
556 return $self->{'_result'};
557 }
558
559
560 sub write_result {
561 my ($self, $blast, @args) = @_;
562
563 if( not defined($self->writer) ) {
564 $self->warn("Writer not defined. Using a $DEFAULT_WRITER_CLASS");
565 $self->writer( $DEFAULT_WRITER_CLASS->new() );
566 }
567 $self->SUPER::write_result( $blast, @args );
568 }
569
570 sub result_count {
571 my $self = shift;
572 return $self->{'_result_count'};
573 }
574
575 sub report_count { shift->result_count }
576
577 1;
578