comparison variant_effect_predictor/Bio/EnsEMBL/BaseAlignFeature.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::BaseAlignFeature - Baseclass providing a common abstract
24 implmentation for alignment features
25
26 =head1 SYNOPSIS
27
28 my $feat = new Bio::EnsEMBL::DnaPepAlignFeature(
29 -slice => $slice,
30 -start => 100,
31 -end => 120,
32 -strand => 1,
33 -hseqname => 'SP:RF1231',
34 -hstart => 200,
35 -hend => 220,
36 -analysis => $analysis,
37 -cigar_string => '10M3D5M2I'
38 );
39
40 Alternatively if you have an array of ungapped features
41
42 my $feat =
43 new Bio::EnsEMBL::DnaPepAlignFeature( -features => \@features );
44
45 Where @features is an array of Bio::EnsEMBL::FeaturePair
46
47 There is a method to manipulate the cigar_string into ungapped features
48
49 my @ungapped_features = $feat->ungapped_features();
50
51 This converts the cigar string into an array of Bio::EnsEMBL::FeaturePair
52
53 $analysis is a Bio::EnsEMBL::Analysis object
54
55 Bio::EnsEMBL::Feature methods can be used
56 Bio::EnsEMBL::FeaturePair methods can be used
57
58 The cigar_string contains the ungapped pieces that make up the gapped
59 alignment
60
61 It looks like: n Matches [ x Deletes or Inserts m Matches ]*
62 but a bit more condensed like "23M4I12M2D1M"
63 and evenmore condensed as you can ommit 1s "23M4I12M2DM"
64
65
66 To make things clearer this is how a blast HSP would be parsed
67
68 >AK014066
69 Length = 146
70
71 Minus Strand HSPs:
72
73 Score = 76 (26.8 bits), Expect = 1.4, P = 0.74
74 Identities = 20/71 (28%), Positives = 29/71 (40%), Frame = -1
75
76 Query: 479 GLQAPPPTPQGCRLIPPPPLGLQAPLPTLRAVGSSHHHP*GRQGSSLSSFRSSLASKASA 300
77 G APPP PQG R P P G + P L + + ++ R +A +
78 Sbjct: 7 GALAPPPAPQG-RWAFPRPTG-KRPATPLHGTARQDRQVRRSEAAKVTGCRGRVAPHVAP 64
79
80 Query: 299 SSPHNPSPLPS 267
81 H P+P P+
82 Sbjct: 65 PLTHTPTPTPT 75
83
84 The alignment goes from 267 to 479 in sequence 1 and 7 to 75 in sequence 2
85 and the strand is -1.
86
87 The alignment is made up of the following ungapped pieces :
88
89 sequence 1 start 447 , sequence 2 start 7 , match length 33 , strand -1
90 sequence 1 start 417 , sequence 2 start 18 , match length 27 , strand -1
91 sequence 1 start 267 , sequence 2 start 27 , match length 137 , strand -1
92
93 These ungapped pieces are made up into the following string (called a cigar
94 string) "33M3I27M3I137M" with start 267 end 479 strand -1 hstart 7 hend 75
95 hstrand 1 and feature type would be DnaPepAlignFeature
96
97 =cut
98
99
100 package Bio::EnsEMBL::BaseAlignFeature;
101
102 use Bio::EnsEMBL::FeaturePair;
103 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
104 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
105 use Scalar::Util qw(weaken isweak);
106
107 use vars qw(@ISA);
108 use strict;
109
110 @ISA = qw(Bio::EnsEMBL::FeaturePair);
111
112
113 =head2 new
114
115 Arg [..] : List of named arguments. (-cigar_string , -features) defined
116 in this constructor, others defined in FeaturePair and
117 SeqFeature superclasses. Either cigar_string or a list
118 of ungapped features should be provided - not both.
119 Example : $baf = new BaseAlignFeatureSubclass(-cigar_string => '3M3I12M');
120 Description: Creates a new BaseAlignFeature using either a cigarstring or
121 a list of ungapped features. BaseAlignFeature is an abstract
122 baseclass and should not actually be instantiated - rather its
123 subclasses should be.
124 Returntype : Bio::EnsEMBL::BaseAlignFeature
125 Exceptions : thrown if both feature and cigar string args are provided
126 thrown if neither feature nor cigar string args are provided
127 Caller : general
128 Status : Stable
129
130 =cut
131
132 sub new {
133
134 my $caller = shift;
135
136 my $class = ref($caller) || $caller;
137
138 my $self = $class->SUPER::new(@_);
139
140 my ($cigar_string,$features) = rearrange([qw(CIGAR_STRING FEATURES)], @_);
141
142 if (defined($cigar_string) && defined($features)) {
143 throw("CIGAR_STRING or FEATURES argument is required - not both.");
144 } elsif (defined($features)) {
145 $self->_parse_features($features);
146
147 } elsif (defined($cigar_string)) {
148 $self->{'cigar_string'} = $cigar_string;
149 } else {
150 throw("CIGAR_STRING or FEATURES argument is required");
151 }
152
153 return $self;
154 }
155
156
157 =head2 new_fast
158
159 Arg [1] : hashref $hashref
160 A hashref which will be blessed into a PepDnaAlignFeature.
161 Example : none
162 Description: This allows for very fast object creation when a large number
163 of PepDnaAlignFeatures needs to be created. This is a bit of
164 a hack but necessary when thousands of features need to be
165 generated within a couple of seconds for web display. It is
166 not recommended that this method be called unless you know what
167 you are doing. It requires knowledge of the internals of this
168 class and its superclasses.
169 Returntype : Bio::EnsEMBL::BaseAlignFeature
170 Exceptions : none
171 Caller : none currently
172 Status : Stable
173
174 =cut
175
176 sub new_fast {
177 my ($class, $hashref) = @_;
178 my $self = bless $hashref, $class;
179 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
180 return $self;
181 }
182
183
184 =head2 cigar_string
185
186 Arg [1] : string $cigar_string
187 Example : $feature->cigar_string( "12MI3M" );
188 Description: get/set for attribute cigar_string
189 cigar_string describes the alignment. "xM" stands for
190 x matches (mismatches), "xI" for inserts into query sequence
191 (thats the ensembl sequence), "xD" for deletions
192 (inserts in the subject). an "x" that is 1 can be omitted.
193 Returntype : string
194 Exceptions : none
195 Caller : general
196 Status : Stable
197
198 =cut
199
200 sub cigar_string {
201 my $self = shift;
202 $self->{'cigar_string'} = shift if(@_);
203 return $self->{'cigar_string'};
204 }
205
206
207 =head2 alignment_length
208
209 Arg [1] : None
210 Description: return the alignment length (including indels) based on the cigar_string
211 Returntype : int
212 Exceptions :
213 Caller :
214 Status : Stable
215
216 =cut
217
218 sub alignment_length {
219 my $self = shift;
220
221 if (! defined $self->{'_alignment_length'} && defined $self->cigar_string) {
222
223 my @pieces = ( $self->cigar_string =~ /(\d*[MDI])/g );
224 unless (@pieces) {
225 print STDERR "Error parsing cigar_string\n";
226 }
227 my $alignment_length = 0;
228 foreach my $piece (@pieces) {
229 my ($length) = ( $piece =~ /^(\d*)/ );
230 if (! defined $length || $length eq "") {
231 $length = 1;
232 }
233 $alignment_length += $length;
234 }
235 $self->{'_alignment_length'} = $alignment_length;
236 }
237 return $self->{'_alignment_length'};
238 }
239
240 =head2 ungapped_features
241
242 Args : none
243 Example : @ungapped_features = $align_feature->get_feature
244 Description: converts the internal cigar_string into an array of
245 ungapped feature pairs
246 Returntype : list of Bio::EnsEMBL::FeaturePair
247 Exceptions : cigar_string not set internally
248 Caller : general
249 Status : Stable
250
251 =cut
252
253 sub ungapped_features {
254 my ($self) = @_;
255
256 if (!defined($self->{'cigar_string'})) {
257 throw("No cigar_string defined. Can't return ungapped features");
258 }
259
260 return @{$self->_parse_cigar()};
261 }
262
263 =head2 strands_reversed
264
265 Arg [1] : int $strands_reversed
266 Description: get/set for attribute strands_reversed
267 0 means that strand and hstrand are the original strands obtained
268 from the alignment program used
269 1 means that strand and hstrand have been flipped as compared to
270 the original result provided by the alignment program used.
271 You may want to use the reverse_complement method to restore the
272 original strandness.
273 Returntype : int
274 Exceptions : none
275 Caller : general
276 Status : Stable
277
278 =cut
279
280 sub strands_reversed {
281 my ($self, $arg) = @_;
282
283 if ( defined $arg ) {
284 $self->{'strands_reversed'} = $arg ;
285 }
286
287 $self->{'strands_reversed'} = 0 unless (defined $self->{'strands_reversed'});
288
289 return $self->{'strands_reversed'};
290 }
291
292 =head2 reverse_complement
293
294 Args : none
295 Description: reverse complement the FeaturePair,
296 modifing strand, hstrand and cigar_string in consequence
297 Returntype : none
298 Exceptions : none
299 Caller : general
300 Status : Stable
301
302 =cut
303
304 sub reverse_complement {
305 my ($self) = @_;
306
307 # reverse strand in both sequences
308 $self->strand($self->strand * -1);
309 $self->hstrand($self->hstrand * -1);
310
311 # reverse cigar_string as consequence
312 my $cigar_string = $self->cigar_string;
313 $cigar_string =~ s/(D|I|M)/$1 /g;
314 my @cigar_pieces = split / /,$cigar_string;
315 $cigar_string = "";
316 while (my $piece = pop @cigar_pieces) {
317 $cigar_string .= $piece;
318 }
319
320 $self->{'strands_reversed'} = 0 unless (defined $self->{'strands_reversed'});
321
322 if ($self->strands_reversed) {
323 $self->strands_reversed(0)
324 } else {
325 $self->strands_reversed(1);
326 }
327
328 $self->cigar_string($cigar_string);
329 }
330
331
332
333 =head2 transform
334
335 Arg 1 : String $coordinate_system_name
336 Arg [2] : String $coordinate_system_version
337 Example : $feature = $feature->transform('contig');
338 $feature = $feature->transform('chromosome', 'NCBI33');
339 Description: Moves this AlignFeature to the given coordinate system.
340 If the feature cannot be transformed to the destination
341 coordinate system undef is returned instead.
342 Returntype : Bio::EnsEMBL::BaseAlignFeature;
343 Exceptions : wrong parameters
344 Caller : general
345 Status : Medium Risk
346 : deprecation needs to be removed at some time
347
348 =cut
349
350 sub transform {
351 my $self = shift;
352
353 # catch for old style transform calls
354 if( ref $_[0] eq 'HASH') {
355 deprecate("Calling transform with a hashref is deprecate.\n" .
356 'Use $feat->transfer($slice) or ' .
357 '$feat->transform("coordsysname") instead.');
358 my (undef, $new_feat) = each(%{$_[0]});
359 return $self->transfer($new_feat->slice);
360 }
361
362 my $new_feature = $self->SUPER::transform(@_);
363 if ( !defined($new_feature)
364 || $new_feature->length() != $self->length() )
365 {
366 my @segments = @{ $self->project(@_) };
367
368 if ( !@segments ) {
369 return undef;
370 }
371
372 my @ungapped;
373 foreach my $f ( $self->ungapped_features() ) {
374 $f = $f->transform(@_);
375 if ( defined($f) ) {
376 push( @ungapped, $f );
377 } else {
378 warning( "Failed to transform alignment feature; "
379 . "ungapped component could not be transformed" );
380 return undef;
381 }
382 }
383
384 eval { $new_feature = $self->new( -features => \@ungapped ); };
385
386 if ($@) {
387 warning($@);
388 return undef;
389 }
390 } ## end if ( !defined($new_feature...))
391
392 return $new_feature;
393 }
394
395
396 =head2 _parse_cigar
397
398 Args : none
399 Description: PRIVATE (internal) method - creates ungapped features from
400 internally stored cigar line
401 Returntype : list of Bio::EnsEMBL::FeaturePair
402 Exceptions : none
403 Caller : ungapped_features
404 Status : Stable
405
406 =cut
407
408 sub _parse_cigar {
409 my ( $self ) = @_;
410
411 my $query_unit = $self->_query_unit();
412 my $hit_unit = $self->_hit_unit();
413
414 my $string = $self->{'cigar_string'};
415
416 throw("No cigar string defined in object") if(!defined($string));
417
418 my @pieces = ( $string =~ /(\d*[MDI])/g );
419 #print "cigar: ",join ( ",", @pieces ),"\n";
420
421 my @features;
422 my $strand1 = $self->{'strand'} || 1;
423 my $strand2 = $self->{'hstrand'}|| 1;
424
425 my ( $start1, $start2 );
426
427 if( $strand1 == 1 ) {
428 $start1 = $self->{'start'};
429 } else {
430 $start1 = $self->{'end'};
431 }
432
433 if( $strand2 == 1 ) {
434 $start2 = $self->{'hstart'};
435 } else {
436 $start2 = $self->{'hend'};
437 }
438
439 #
440 # Construct ungapped blocks as FeaturePairs objects for each MATCH
441 #
442 foreach my $piece (@pieces) {
443
444 my ($length) = ( $piece =~ /^(\d*)/ );
445 if( $length eq "" ) { $length = 1 }
446 my $mapped_length;
447
448 # explicit if statements to avoid rounding problems
449 # and make sure we have sane coordinate systems
450 if( $query_unit == 1 && $hit_unit == 3 ) {
451 $mapped_length = $length*3;
452 } elsif( $query_unit == 3 && $hit_unit == 1 ) {
453 $mapped_length = $length / 3;
454 } elsif ( $query_unit == 1 && $hit_unit == 1 ) {
455 $mapped_length = $length;
456 } else {
457 throw("Internal error $query_unit $hit_unit, currently only " .
458 "allowing 1 or 3 ");
459 }
460
461 if( int($mapped_length) != $mapped_length and
462 ($piece =~ /M$/ or $piece =~ /D$/)) {
463 throw("Internal error with mismapped length of hit, query " .
464 "$query_unit, hit $hit_unit, length $length");
465 }
466
467 if( $piece =~ /M$/ ) {
468 #
469 # MATCH
470 #
471 my ( $qstart, $qend);
472 if( $strand1 == 1 ) {
473 $qstart = $start1;
474 $qend = $start1 + $length - 1;
475 $start1 = $qend + 1;
476 } else {
477 $qend = $start1;
478 $qstart = $start1 - $length + 1;
479 $start1 = $qstart - 1;
480 }
481
482 my ($hstart, $hend);
483 if( $strand2 == 1 ) {
484 $hstart = $start2;
485 $hend = $start2 + $mapped_length - 1;
486 $start2 = $hend + 1;
487 } else {
488 $hend = $start2;
489 $hstart = $start2 - $mapped_length + 1;
490 $start2 = $hstart - 1;
491 }
492
493
494 push @features, Bio::EnsEMBL::FeaturePair->new
495 (-SLICE => $self->{'slice'},
496 -SEQNAME => $self->{'seqname'},
497 -START => $qstart,
498 -END => $qend,
499 -STRAND => $strand1,
500 -HSLICE => $self->{'hslice'},
501 -HSEQNAME => $self->{'hseqname'},
502 -HSTART => $hstart,
503 -HEND => $hend,
504 -HSTRAND => $strand2,
505 -SCORE => $self->{'score'},
506 -PERCENT_ID => $self->{'percent_id'},
507 -ANALYSIS => $self->{'analysis'},
508 -P_VALUE => $self->{'p_value'},
509 -EXTERNAL_DB_ID => $self->{'external_db_id'},
510 -HCOVERAGE => $self->{'hcoverage'},
511 -GROUP_ID => $self->{'group_id'},
512 -LEVEL_ID => $self->{'level_id'});
513
514
515 # end M cigar bits
516 } elsif( $piece =~ /I$/ ) {
517 #
518 # INSERT
519 #
520 if( $strand1 == 1 ) {
521 $start1 += $length;
522 } else {
523 $start1 -= $length;
524 }
525 } elsif( $piece =~ /D$/ ) {
526 #
527 # DELETION
528 #
529 if( $strand2 == 1 ) {
530 $start2 += $mapped_length;
531 } else {
532 $start2 -= $mapped_length;
533 }
534 } else {
535 throw( "Illegal cigar line $string!" );
536 }
537 }
538
539 return \@features;
540 }
541
542
543
544
545 =head2 _parse_features
546
547 Arg [1] : listref Bio::EnsEMBL::FeaturePair $ungapped_features
548 Description: creates internal cigarstring and start,end hstart,hend
549 entries.
550 Returntype : none, fills in values of self
551 Exceptions : argument list undergoes many sanity checks - throws under many
552 invalid conditions
553 Caller : new
554 Status : Stable
555
556 =cut
557
558 my $message_only_once = 1;
559
560 sub _parse_features {
561 my ($self,$features ) = @_;
562
563 my $query_unit = $self->_query_unit();
564 my $hit_unit = $self->_hit_unit();
565
566 if (ref($features) ne "ARRAY") {
567 throw("features must be an array reference not a [".ref($features)."]");
568 }
569
570 my $strand = $features->[0]->strand;
571
572 throw ('FeaturePair needs to have strand == 1 or strand == -1') if(!$strand);
573
574 my @f;
575
576 #
577 # Sort the features on their start position
578 # Ascending order on positive strand, descending on negative strand
579 #
580 if( $strand == 1 ) {
581 @f = sort {$a->start <=> $b->start} @$features;
582 } else {
583 @f = sort { $b->start <=> $a->start} @$features;
584 }
585
586 my $hstrand = $f[0]->hstrand;
587 my $slice = $f[0]->slice();
588 my $hslice = $f[0]->hslice();
589 my $name = $slice->name() if($slice);
590 my $hname = $f[0]->hseqname;
591 my $score = $f[0]->score;
592 my $percent = $f[0]->percent_id;
593 my $analysis = $f[0]->analysis;
594 my $pvalue = $f[0]->p_value();
595 my $external_db_id = $f[0]->external_db_id;
596 my $hcoverage = $f[0]->hcoverage;
597 my $group_id = $f[0]->group_id;
598 my $level_id = $f[0]->level_id;
599
600 my $seqname = $f[0]->seqname;
601 # implicit strand 1 for peptide sequences
602 $strand ||= 1;
603 $hstrand ||= 1;
604 my $ori = $strand * $hstrand;
605
606 throw("No features in the array to parse") if(scalar(@f) == 0);
607
608 my $prev1; # where last feature q part ended
609 my $prev2; # where last feature s part ended
610
611 my $string;
612
613 # Use strandedness info of query and hit to make sure both sets of
614 # start and end coordinates are oriented the right way around.
615 my $f1start;
616 my $f1end;
617 my $f2end;
618 my $f2start;
619
620 if ($strand == 1) {
621 $f1start = $f[0]->start;
622 $f1end = $f[-1]->end;
623 } else {
624 $f1start = $f[-1]->start;
625 $f1end = $f[0]->end;
626 }
627
628 if ($hstrand == 1) {
629 $f2start = $f[0]->hstart;
630 $f2end = $f[-1]->hend;
631 } else {
632 $f2start = $f[-1]->hstart;
633 $f2end = $f[0]->hend;
634 }
635
636 #
637 # Loop through each portion of alignment and construct cigar string
638 #
639
640 foreach my $f (@f) {
641 #
642 # Sanity checks
643 #
644
645 if (!$f->isa("Bio::EnsEMBL::FeaturePair")) {
646 throw("Array element [$f] is not a Bio::EnsEMBL::FeaturePair");
647 }
648 if( defined($f->hstrand()) && $f->hstrand() != $hstrand ) {
649 throw("Inconsistent hstrands in feature array");
650 }
651 if( defined($f->strand()) && ($f->strand != $strand)) {
652 throw("Inconsistent strands in feature array");
653 }
654 if ( defined($name) && $name ne $f->slice->name()) {
655 throw("Inconsistent names in feature array [$name - ".
656 $f->slice->name()."]");
657 }
658 if ( defined($hname) && $hname ne $f->hseqname) {
659 throw("Inconsistent hit names in feature array [$hname - ".
660 $f->hseqname . "]");
661 }
662 if ( defined($score) && $score ne $f->score) {
663 throw("Inconsisent scores in feature array [$score - " .
664 $f->score . "]");
665 }
666 if (defined($f->percent_id) && $percent ne $f->percent_id) {
667 throw("Inconsistent pids in feature array [$percent - " .
668 $f->percent_id . "]");
669 }
670 if(defined($pvalue) && $pvalue != $f->p_value()) {
671 throw("Inconsistant p_values in feature arraw [$pvalue " .
672 $f->p_value() . "]");
673 }
674 if($seqname && $seqname ne $f->seqname){
675 throw("Inconsistent seqname in feature array [$seqname - ".
676 $f->seqname . "]");
677 }
678 my $start1 = $f->start; #source sequence alignment start
679 my $start2 = $f->hstart(); #hit sequence alignment start
680
681 #
682 # More sanity checking
683 #
684 if (defined($prev1)) {
685 if ( $strand == 1 ) {
686 if ($f->start < $prev1) {
687 throw("Inconsistent coords in feature array (forward strand).\n" .
688 "Start [".$f->start()."] in current feature should be greater " .
689 "than previous feature end [$prev1].");
690 }
691 } else {
692 if ($f->end > $prev1) {
693 throw("Inconsistent coords in feature array (reverse strand).\n" .
694 "End [".$f->end() ."] should be less than previous feature " .
695 "start [$prev1].");
696 }
697 }
698 }
699
700 my $length = ($f->end - $f->start + 1); #length of source seq alignment
701 my $hlength = ($f->hend - $f->hstart + 1); #length of hit seq alignment
702
703 # using multiplication to avoid rounding errors, hence the
704 # switch from query to hit for the ratios
705
706 #
707 # Yet more sanity checking
708 #
709 if($query_unit > $hit_unit){
710 # I am going to make the assumption here that this situation will
711 # only occur with DnaPepAlignFeatures, this may not be true
712 my $query_p_length = sprintf "%.0f", ($length/$query_unit);
713 my $hit_p_length = sprintf "%.0f", ($hlength * $hit_unit);
714 if( $query_p_length != $hit_p_length) {
715 throw( "Feature lengths not comparable Lengths:" .$length .
716 " " . $hlength . " Ratios:" . $query_unit . " " .
717 $hit_unit );
718 }
719 } else{
720 my $query_d_length = sprintf "%.0f", ($length*$hit_unit);
721 my $hit_d_length = sprintf "%.0f", ($hlength * $query_unit);
722 if( $length * $hit_unit != $hlength * $query_unit ) {
723 throw( "Feature lengths not comparable Lengths:" . $length .
724 " " . $hlength . " Ratios:" . $query_unit . " " .
725 $hit_unit );
726 }
727 }
728
729 my $hlengthfactor = ($query_unit/$hit_unit);
730
731 #
732 # Check to see if there is an I type (insertion) gap:
733 # If there is a space between the end of the last source sequence
734 # alignment and the start of this one, then this is an insertion
735 #
736
737 my $insertion_flag = 0;
738 if( $strand == 1 ) {
739 if( ( defined $prev1 ) && ( $f->start > $prev1 + 1 )) {
740
741 #there is an insertion
742 $insertion_flag = 1;
743 my $gap = $f->start - $prev1 - 1;
744 if( $gap == 1 ) {
745 $gap = ""; # no need for a number if gap length is 1
746 }
747 $string .= "$gap"."I";
748
749 }
750
751 #shift our position in the source seq alignment
752 $prev1 = $f->end();
753 } else {
754
755 if(( defined $prev1 ) && ($f->end + 1 < $prev1 )) {
756
757 #there is an insertion
758 $insertion_flag = 1;
759 my $gap = $prev1 - $f->end() - 1;
760 if( $gap == 1 ) {
761 $gap = ""; # no need for a number if gap length is 1
762 }
763 $string .= "$gap"."I";
764 }
765
766 #shift our position in the source seq alignment
767 $prev1 = $f->start();
768 }
769
770 #
771 # Check to see if there is a D type (deletion) gap
772 # There is a deletion gap if there is a space between the end of the
773 # last portion of the hit sequence alignment and this one
774 #
775 if( $hstrand == 1 ) {
776 if(( defined $prev2 ) && ( $f->hstart() > $prev2 + 1 )) {
777
778 #there is a deletion
779 my $gap = $f->hstart - $prev2 - 1;
780 my $gap2 = int( $gap * $hlengthfactor + 0.5 );
781
782 if( $gap2 == 1 ) {
783 $gap2 = ""; # no need for a number if gap length is 1
784 }
785 $string .= "$gap2"."D";
786
787 #sanity check, Should not be an insertion and deletion
788 if($insertion_flag) {
789 if ($message_only_once) {
790 warning("Should not be an deletion and insertion on the " .
791 "same alignment region. cigar_line=$string\n");
792 $message_only_once = 0;
793 }
794 }
795 }
796 #shift our position in the hit seq alignment
797 $prev2 = $f->hend();
798
799 } else {
800 if( ( defined $prev2 ) && ( $f->hend() + 1 < $prev2 )) {
801
802 #there is a deletion
803 my $gap = $prev2 - $f->hend - 1;
804 my $gap2 = int( $gap * $hlengthfactor + 0.5 );
805
806 if( $gap2 == 1 ) {
807 $gap2 = ""; # no need for a number if gap length is 1
808 }
809 $string .= "$gap2"."D";
810
811 #sanity check, Should not be an insertion and deletion
812 if($insertion_flag) {
813 if ($message_only_once) {
814 warning("Should not be an deletion and insertion on the " .
815 "same alignment region. prev2 = $prev2; f->hend() = " .
816 $f->hend() . "; cigar_line = $string;\n");
817 $message_only_once = 0;
818 }
819 }
820 }
821 #shift our position in the hit seq alignment
822 $prev2 = $f->hstart();
823 }
824
825 my $matchlength = $f->end() - $f->start() + 1;
826 if( $matchlength == 1 ) {
827 $matchlength = "";
828 }
829 $string .= $matchlength."M";
830 }
831
832 $self->{'start'} = $f1start;
833 $self->{'end'} = $f1end;
834 $self->{'seqname'} = $seqname;
835 $self->{'strand'} = $strand;
836 $self->{'score'} = $score;
837 $self->{'percent_id'} = $percent;
838 $self->{'analysis'} = $analysis;
839 $self->{'slice'} = $slice;
840 $self->{'hslice'} = $hslice;
841 $self->{'hstart'} = $f2start;
842 $self->{'hend'} = $f2end;
843 $self->{'hstrand'} = $hstrand;
844 $self->{'hseqname'} = $hname;
845 $self->{'cigar_string'} = $string;
846 $self->{'p_value'} = $pvalue;
847 $self->{'external_db_id'} = $external_db_id;
848 $self->{'hcoverage'} = $hcoverage;
849 $self->{'group_id'} = $group_id;
850 $self->{'level_id'} = $level_id;
851 }
852
853
854
855
856
857
858 =head2 _hit_unit
859
860 Args : none
861 Description: abstract method, overwrite with something that returns
862 one or three
863 Returntype : int 1,3
864 Exceptions : none
865 Caller : internal
866 Status : Stable
867
868 =cut
869
870 sub _hit_unit {
871 my $self = shift;
872 throw( "Abstract method call!" );
873 }
874
875
876
877 =head2 _query_unit
878
879 Args : none
880 Description: abstract method, overwrite with something that returns
881 one or three
882 Returntype : int 1,3
883 Exceptions : none
884 Caller : internal
885 Status : Stable
886
887 =cut
888
889 sub _query_unit {
890 my $self = shift;
891 throw( "Abstract method call!" );
892 }
893
894
895
896
897 1;