Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Compara/PeptideAlignFeature.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Compara/PeptideAlignFeature.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,451 @@ +=head1 NAME - Bio::EnsEMBL::Compara::PeptideAlignFeature + +=head1 SYNOPSIS + + # Get an $homology object somehow + + # For Homology PeptideAlignFeatures, you normally get 2 pafs, + # one for each member used alternatively as query and database + # (hit) in the blast run + + my $pafs = $homology->get_all_PeptideAlignFeature; + + foreach my $paf (@{$pafs}) { + print $paf->query_member->stable_id," ",$self->hit_member->stable_id," ",$paf->evalue,"\n"; + } + + # Other stuff in the object: + # $paf->qstart + # $paf->qend + # $paf->hstart + # $paf->hend + # $paf->score + # $paf->alignment_length + # $paf->identical_matches + # $paf->perc_ident + # $paf->positive_matches + # $paf->perc_pos + # $paf->hit_rank + # $paf->cigar_line + +=head1 DESCRIPTION + +=head1 CONTACT + +Describe contact details here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ + +=cut + +my $_paf_build_homology_idx = time(); #global index counter + +package Bio::EnsEMBL::Compara::PeptideAlignFeature; + +use strict; +use Bio::EnsEMBL::Compara::Homology; +use Bio::EnsEMBL::Utils::Exception; + +#se overload '<=>' => "sort_by_score_evalue_and_pid"; # named method + +sub new { + my ($class) = @_; + my $self = {}; + + bless $self,$class; + + $self->query_member(new Bio::EnsEMBL::Compara::Member); + $self->hit_member(new Bio::EnsEMBL::Compara::Member); + return $self; +} + + +sub create_homology +{ + my $self = shift; + + # create an Homology object + my $homology = new Bio::EnsEMBL::Compara::Homology; + + my $stable_id; + if($self->query_member->taxon_id < $self->hit_member->taxon_id) { + $stable_id = $self->query_member->taxon_id() . "_" . $self->hit_member->taxon_id . "_"; + } else { + $stable_id = $self->hit_member->taxon_id . "_" . $self->query_member->taxon_id . "_"; + } + $stable_id .= sprintf ("%011.0d",$_paf_build_homology_idx++); + $homology->stable_id($stable_id); + $homology->method_link_type("ENSEMBL_ORTHOLOGUES"); + + # + # QUERY member + # + my $qlen = ($self->qend - $self->qstart + 1); + $self->query_member->perc_cov(int($qlen*100/$self->query_member->seq_length)); + $self->query_member->perc_id(int($self->identical_matches*100.0/$qlen)); + $self->query_member->perc_pos(int($self->positive_matches*100/$qlen)); + + my $cigar_line = $self->cigar_line; + #print("original cigar_line '$cigar_line'\n"); + $cigar_line =~ s/I/M/g; + $cigar_line = compact_cigar_line($cigar_line); + $self->query_member->cigar_line($cigar_line); + #print(" '$cigar_line'\n"); + + $homology->add_Member($self->query_member); + + # HIT member + # + # $self->hstart and $self->hend could be stored + my $hlen = ($self->hend - $self->hstart + 1); + $self->hit_member->perc_cov(int($hlen*100/$self->hit_member->seq_length)); + $self->hit_member->perc_id(int($self->identical_matches*100.0/$hlen)); + $self->hit_member->perc_pos(int($self->positive_matches*100/$hlen)); + + $cigar_line = $self->cigar_line; + #print("original cigar_line\n '$cigar_line'\n"); + $cigar_line =~ s/D/M/g; + $cigar_line =~ s/I/D/g; + $cigar_line = compact_cigar_line($cigar_line); + $self->hit_member->cigar_line($cigar_line); + #print(" '$cigar_line'\n"); + + $homology->add_Member($self->hit_member); + + return $homology; +} + + +sub compact_cigar_line +{ + my $cigar_line = shift; + + #print("cigar_line '$cigar_line' => "); + my @pieces = ( $cigar_line =~ /(\d*[MDI])/g ); + my @new_pieces = (); + foreach my $piece (@pieces) { + $piece =~ s/I/M/; + if (! scalar @new_pieces || $piece =~ /D/) { + push @new_pieces, $piece; + next; + } + if ($piece =~ /\d*M/ && $new_pieces[-1] =~ /\d*M/) { + my ($matches1) = ($piece =~ /(\d*)M/); + my ($matches2) = ($new_pieces[-1] =~ /(\d*)M/); + if (! defined $matches1 || $matches1 eq "") { + $matches1 = 1; + } + if (! defined $matches2 || $matches2 eq "") { + $matches2 = 1; + } + $new_pieces[-1] = $matches1 + $matches2 . "M"; + } else { + push @new_pieces, $piece; + } + } + my $new_cigar_line = join("", @new_pieces); + #print(" '$new_cigar_line'\n"); + return $new_cigar_line; +} + + +########################## +# +# getter/setter methods +# +########################## + +sub query_member { + my ($self,$arg) = @_; + + if (defined($arg)) { + throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]") + unless($arg->isa('Bio::EnsEMBL::Compara::Member')); + $self->{'_query_member'} = $arg; + } + return $self->{'_query_member'}; +} + +sub query_member_id { + my $self = shift; + $self->{'_query_member_id'} = shift if (@_); + if ($self->{'_query_member_id'}) { + return $self->{'_query_member_id'}; + } elsif ($self->{'_query_member'} and $self->{'_query_member'}->dbID) { + return $self->{'_query_member'}->dbID; + } + return undef; +} + +sub query_genome_db_id { + my $self = shift; + $self->{'_query_genome_db_id'} = shift if (@_); + if ($self->{'_query_genome_db_id'}) { + return $self->{'_query_genome_db_id'}; + } elsif ($self->{'_query_member'} and $self->{'_query_member'}->genome_db + and $self->{'_query_member'}->genome_db->dbID) { + return $self->{'_query_member'}->genome_db->dbID; + } + return undef; +} + +sub hit_member { + my ($self,$arg) = @_; + + if (defined($arg)) { + throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]") + unless($arg->isa('Bio::EnsEMBL::Compara::Member')); + $self->{'_hit_member'} = $arg; + } + return $self->{'_hit_member'}; +} + +sub hit_member_id { + my $self = shift; + $self->{'_hit_member_id'} = shift if (@_); + if ($self->{'_hit_member_id'}) { + return $self->{'_hit_member_id'}; + } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->dbID) { + return $self->{'_hit_member'}->dbID; + } + return undef; +} + +sub hit_genome_db_id { + my $self = shift; + $self->{'_hit_genome_db_id'} = shift if (@_); + if ($self->{'_hit_genome_db_id'}) { + return $self->{'_hit_genome_db_id'}; + } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->genome_db + and $self->{'_hit_member'}->genome_db->dbID) { + return $self->{'_hit_member'}->genome_db->dbID; + } + return undef; +} + +sub qstart { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_qstart} = $arg; + } + return $self->{_qstart}; +} + +sub hstart { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_hstart} = $arg; + } + return $self->{_hstart}; +} + +sub qend { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_qend} = $arg; + } + return $self->{_qend}; +} + +sub qlength { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_qlength} = $arg; + } + return $self->{_qlength}; +} + +sub hend { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_hend} = $arg; + } + return $self->{_hend}; +} + +sub hlength{ + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_hlength} = $arg; + } + return $self->{_hlength}; +} + +sub score{ + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_score} = $arg; + } + return $self->{_score}; +} + +sub evalue { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_evalue} = $arg; + } + return $self->{_evalue}; +} + +sub perc_ident { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_perc_ident} = $arg; + } + return $self->{_perc_ident}; +} + +sub perc_pos { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_perc_pos} = $arg; + } + return $self->{_perc_pos}; +} + +sub identical_matches { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_identical_matches} = $arg; + if(defined($self->alignment_length)) { + $self->perc_ident(int($arg*100/$self->alignment_length)); + } + } + return $self->{_identical_matches}; +} + +sub positive_matches { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_positive_matches} = $arg; + if(defined($self->alignment_length)) { + $self->perc_pos(int($arg*100/$self->alignment_length)); + } + } + return $self->{_positive_matches}; +} + +sub alignment_length { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_alignment_length} = $arg; + } + return $self->{_alignment_length}; +} + +sub cigar_line { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_cigar_line} = $arg; + } + return $self->{_cigar_line}; +} + +sub hit_rank { + my ($self,$arg) = @_; + + if (defined($arg)) { + $self->{_hit_rank} = $arg; + } + return $self->{_hit_rank}; +} + +sub dbID { + my ( $self, $dbID ) = @_; + $self->{'_dbID'} = $dbID if defined $dbID; + return $self->{'_dbID'}; +} + +sub rhit_dbID { + my ( $self, $dbID ) = @_; + $self->{'_rhit_dbID'} = $dbID if defined $dbID; + return $self->{'_rhit_dbID'}; +} + +sub display_short { + my $self = shift; + print($self->get_description(), "\n"); +} + +sub get_description { + my($self) = @_; + + unless(defined($self)) { + print("qy_stable_id\t\t\thit_stable_id\t\t\tscore\talen\t\%ident\t\%positive\thit_rank\n"); + return; + } + + my $qm = $self->query_member; + my $hm = $self->hit_member; + my $dbID = $self->dbID; $dbID = '' unless($dbID); + + my $header = "PAF(".$dbID.")"; + $header .= "(".$self->rhit_dbID.")" if($self->rhit_dbID); + while(length($header)<17) { $header .= ' '; } + + my $qmem = sprintf("%s(%d,%d)(%s:%d)", + $qm->stable_id, $self->qstart, $self->qend, $qm->chr_name, $qm->chr_start); + my $hmem = sprintf("%s(%d,%d)(%s:%d)", + $hm->stable_id, $self->hstart, $self->hend, $hm->chr_name, $hm->chr_start); + while(length($qmem)<50) { $qmem .= ' '; } + while(length($hmem)<50) { $hmem .= ' '; } + + + my $desc_string = sprintf("%s%s%s%7.3f%7d%7d%7d%7d", + $header, $qmem, $hmem, + $self->score, + $self->alignment_length, + $self->perc_ident, + $self->perc_pos, + $self->hit_rank); + + return $desc_string; +} + + +=head2 hash_key + Args : none + Example : $somehash->{$paf->hash_key} = $someValue; + Description: used for keeping track of known/stored gene/gene relationships + Returntype : string $key + Exceptions : none + Caller : general +=cut + +sub hash_key +{ + my $self = shift; + my $key = '1'; + + return $key unless($self->query_member); + return $key unless($self->hit_member); + my $gene1 = $self->query_member->gene_member; + my $gene2 = $self->hit_member->gene_member; + $gene1 = $self->query_member unless($gene1); + $gene2 = $self->hit_member unless($gene2); + if($gene1->genome_db_id > $gene2->genome_db_id) { + my $temp = $gene1; + $gene1 = $gene2; + $gene2 = $temp; + } + $key = $gene1->stable_id . '_' . $gene2->stable_id; + return $key; +} + +1;