Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Analysis/PairAlign.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Analysis/PairAlign.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,383 @@ +=head1 LICENSE + + Copyright (c) 1999-2012 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <dev@ensembl.org>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +=cut + +=head1 NAME + +PairAlign - Dna pairwise alignment module + +=head1 SYNOPSIS + +#To convert between coordinates: + + my $cdna_coord = $pair->genomic2cDNA($gen_coord); + my $gen_coord = $pair->cDNA2genomic($cdna_coord); + +=head1 DESCRIPTION + +Contains list of sub alignments making up a dna-dna alignment + +Creation: + + my $pair = new Bio::EnsEMBL::FeaturePair( + -start => $qstart, + -end => $qend, + -strand => $qstrand, + -hstart => $hstart, + -hend => $hend, + -hend => $hstrand, + ); + + my $pairaln = new Bio::EnsEMBL::Analysis::PairAlign; + $pairaln->addFeaturePair($pair); + +Any number of pair alignments can be added to the PairAlign object + +=cut + +package Bio::EnsEMBL::Analysis::PairAlign; + +use vars qw(@ISA); +use strict; + + +@ISA = qw(Bio::EnsEMBL::Root); + +sub new { + my($class,@args) = @_; + my $self = {}; + bless $self, $class; + + $self->{'_homol'} = []; + + return $self; # success - we hope! +} + +sub addFeaturePair { + my ($self,$pair) = @_; + + $self->throw("Not a Bio::EnsEMBL::FeaturePair object") unless ($pair->isa("Bio::EnsEMBL::FeaturePair")); + + push(@{$self->{'_pairs'}},$pair); + +} + + +=head2 eachFeaturePair + + Title : eachFeaturePair + Example : my @pairs = $pair->eachFeaturePair + Returns : Array of Bio::SeqFeature::FeaturePair + Args : none +=cut + +sub eachFeaturePair { + my ($self) = @_; + + if (defined($self->{'_pairs'})) { + return @{$self->{'_pairs'}}; + } +} + +sub get_hstrand { + my ($self) = @_; + + my @features = $self->eachFeaturePair; + + return $features[0]->hstrand; +} + +=head2 genomic2cDNA + + Title : genomic2cDNA + Usage : my $cdna_coord = $pair->genomic2cDNA($gen_coord) + Function: Converts a genomic coordinate to a cdna coordinate + Returns : int + Args : int +=cut + +sub genomic2cDNA { + my ($self,$coord) = @_; + my @pairs = $self->eachFeaturePair; + + @pairs = sort {$a->start <=> $b->start} @pairs; + + my $newcoord; + + HOMOL: while (my $sf1 = shift(@pairs)) { + next HOMOL unless ($coord >= $sf1->start && $coord <= $sf1->end); + + if ($sf1->strand == 1 && $sf1->hstrand == 1) { + $newcoord = $sf1->hstart + ($coord - $sf1->start); + last HOMOL; + } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) { + $newcoord = $sf1->hend - ($coord - $sf1->start); + last HOMOL; + } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) { + $newcoord = $sf1->hstart + ($sf1->end - $coord); + last HOMOL; + } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) { + $newcoord = $sf1->hend - ($sf1->end - $coord); + last HOMOL; + } else { + $self->throw("ERROR: Wrong strand value in FeaturePair (" . $sf1->strand . "/" . $sf1->hstrand . "\n"); + } + } + + if (defined($newcoord)) { + + return $newcoord; + } else { + $self->throw("Couldn't convert $coord"); + } +} + +=head2 cDNA2genomic + + Title : cDNA2genomic + Usage : my $gen_coord = $pair->genomic2cDNA($cdna_coord) + Function: Converts a cdna coordinate to a genomic coordinate + Example : + Returns : int + Args : int + + +=cut + +sub cDNA2genomic { + my ($self,$coord) = @_; + + my @pairs = $self->eachFeaturePair; + + my $newcoord; + + HOMOL: while (my $sf1 = shift(@pairs)) { + next HOMOL unless ($coord >= $sf1->hstart && $coord <= $sf1->hend); + + if ($sf1->strand == 1 && $sf1->hstrand == 1) { + $newcoord = $sf1->start + ($coord - $sf1->hstart); + last HOMOL; + } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) { + $newcoord = $sf1->start +($sf1->hend - $coord); + last HOMOL; + } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) { + $newcoord = $sf1->end - ($coord - $sf1->hstart); + last HOMOL; + } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) { + $newcoord = $sf1->end - ($sf1->hend - $coord); + last HOMOL; + } else { + $self->throw("ERROR: Wrong strand value in homol (" . $sf1->strand . "/" . $sf1->hstrand . "\n"); + } + } + + if (defined ($newcoord)) { + return $newcoord; + } else { + $self->throw("Couldn't convert $coord\n"); + } +} + +sub find_Pair { + my ($self,$coord) = @_; + + foreach my $p ($self->eachFeaturePair) { + if ($coord >= $p->hstart && $coord <= $p->hend) { + return $p; + } + } +} + +=head2 convert_cDNA_feature + + Title : convert_cDNA_feature + Usage : my @newfeatures = $self->convert_cDNA_feature($f); + Function: Converts a feature on the cDNA into an array of + features on the genomic (for features that span across introns); + Returns : Array of Bio::EnsEMBL::FeaturePair + Args : Bio::EnsEMBL::FeaturePair + +=cut + +sub convert_cDNA_feature { + my ($self,$feature) = @_; + + my $foundstart = 0; + my $foundend = 0; + + my @pairs = $self->eachFeaturePair; + my @newfeatures; + + HOMOL: while (my $sf1 = shift(@pairs)) { + my $skip = 0; + #print STDERR "Looking at cDNA exon " . $sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n"; + + $skip = 1 unless ($feature->start >= $sf1->hstart + && $feature->start <= $sf1->hend); + + if($skip){ + #print STDERR "Skipping ".$sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n"; + next HOMOL; + } + if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) { + $foundend = 1; + } + + my $startcoord = $self->cDNA2genomic($feature->start); + my $endcoord; + + if ($sf1->hstrand == 1) { + $endcoord = $sf1->end; + } else { + $endcoord = $sf1->start; + } + + if ($foundend) { + $endcoord = $self->cDNA2genomic($feature->end); + } + + #print STDERR "Making new genomic feature $startcoord\t$endcoord\n"; + + my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname, + -start => $startcoord, + -end => $endcoord, + -strand => $feature->strand); + push(@newfeatures,$tmpf); + last; + } + + # Now the rest of the pairs until we find the endcoord + + while ((my $sf1 = shift(@pairs)) && ($foundend == 0)) { + + if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) { + $foundend = 1; + } + + my $startcoord; + my $endcoord; + + if ($sf1->hstrand == 1) { + $startcoord = $sf1->start; + $endcoord = $sf1->end; + } else { + $startcoord = $sf1->end; + $endcoord = $sf1->start; + } + + if ($foundend) { + $endcoord = $self->cDNA2genomic($feature->end); + } + + # #print STDERR "Making new genomic feature $startcoord\t$endcoord\n"; + + my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname, + -start => $startcoord, + -end => $endcoord, + -strand => $feature->strand); + push(@newfeatures,$tmpf); + } + #print STDERR "Have ".@newfeatures." features from ".$feature."\n"; + return @newfeatures; +} + + +sub convert_FeaturePair { + my ($self,$pair) = @_; + + my $hstrand = $self->get_hstrand; + + my $feat = $self->create_Feature($pair->start, $pair->end, $pair->strand, + $pair->slice); + my @newfeatures = $self->convert_cDNA_feature($feat); + my @newpairs; + + my $hitpairaln = new Bio::EnsEMBL::Analysis::PairAlign; + $hitpairaln->addFeaturePair($pair); + + foreach my $new (@newfeatures) { + + # Now we want to convert these cDNA coords into hit coords + + my $hstart1 = $self->genomic2cDNA($new->start); + my $hend1 = $self->genomic2cDNA($new->end); + + my $hstart2 = $hitpairaln->genomic2cDNA($hstart1); + my $hend2 = $hitpairaln->genomic2cDNA($hend1); + + # We can now put the final feature together + + my $finalstrand = $hstrand * $pair->strand * $pair->hstrand; + + if ($hstart2 > $hend2) { + my $tmp = $hstart2; + $hstart2 = $hend2; + $hend2 = $tmp; + } + + my $finalpair = $self->create_FeaturePair($new->start, $new->end, + $new->strand, + $hstart2, $hend2, + $finalstrand, $pair->score); + + push(@newpairs,$finalpair); + + } + + return @newpairs; +} + +sub create_FeaturePair { + my ($self, $start, $end, $strand, $hstart, $hend, + $hstrand, $score) = @_; + + my $fp = Bio::EnsEMBL::FeaturePair->new( + -start => $start, + -end => $end, + -strand => $strand, + -hstart => $hstart, + -hend => $hend, + -hstrand => $hstrand, + -score => $score, + ); + + + return $fp; +} + +sub create_Feature{ + my ($self, $start, $end, $strand, $slice) = @_; + + my $feat = new Bio::EnsEMBL::Feature(-start => $start, + -end => $end, + -strand => $strand, + -slice => $slice, + ); + return $feat; +} + +1; + + + + + + + +