diff variant_effect_predictor/Bio/EnsEMBL/Compara/ConstrainedElement.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Compara/ConstrainedElement.pm	Fri Aug 03 10:04:48 2012 -0400
@@ -0,0 +1,622 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=head1 NAME
+
+Bio::EnsEMBL::Compara::ConstrainedElement - constrained element data produced by Gerp
+
+=head1 SYNOPSIS
+  
+  use Bio::EnsEMBL::Compara::ConstrainedElement;
+  
+  my $constrained_element = new Bio::EnsEMBL::Compara::ConstrainedElement(
+          -adaptor => $constrained_element_adaptor,
+          -method_link_species_set_id => $method_link_species_set_id,
+	  -reference_dnafrag_id => $dnafrag_id,
+          -score => 56.2,
+          -p_value => '1.203e-6',
+          -alignment_segments => [ [$dnafrag1_id, $start, $end, $genome_db_id, $dnafrag1_name ], [$dnafrag2_id, ... ], ... ],
+      );
+
+GET / SET VALUES
+  $constrained_element->adaptor($constrained_element_adaptor);
+  $constrained_element->dbID($constrained_element_id);
+  $constrained_element->method_link_species_set_id($method_link_species_set_id);
+  $constrained_element->score(56.2);
+  $constrained_element->p_value('5.62e-9');
+  $constrained_element->alignment_segments([ [$dnafrag_id, $start, $end, $genome_db_id, $dnafrag_name ], ... ]);
+  $constrained_element->slice($slice);
+  $constrained_element->start($constrained_element_start - $slice_start + 1);
+  $constrained_element->end($constrained_element_end - $slice_start + 1);
+  $constrained_element->seq_region_start($self->slice->start + $self->{'start'} - 1);
+  $constrained_element->seq_region_end($self->slice->start + $self->{'end'} - 1);
+  $constrained_element->strand($strand);
+  $constrained_element->reference_dnafrag_id($dnafrag_id);
+
+=head1 OBJECT ATTRIBUTES
+
+=over
+
+=item dbID
+
+corresponds to constrained_element.constrained_element_id
+
+=item adaptor
+
+Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object to access DB
+
+=item method_link_species_set_id
+
+corresponds to method_link_species_set.method_link_species_set_id (external ref.)
+
+=item score
+
+corresponds to constrained_element.score
+
+=item p_value
+
+corresponds to constrained_element.p_value
+
+=item slice
+
+corresponds to a Bio::EnsEMBL::Slice 
+
+=item start
+
+corresponds to a constrained_element.dnafrag_start (in slice coordinates)
+
+=item end
+
+corresponds to a constrained_element.dnafrag_end (in slice coordinates)
+
+=item seq_region_start
+
+corresponds to a constrained_element.dnafrag_start (in genomic (absolute) coordinates)
+
+=item seq_region_end
+
+corresponds to a constrained_element.dnafrag_end (in genomic (absolute) coordinates)
+
+=item strand
+
+corresponds to a constrained_element.strand
+
+=item $alignment_segments
+
+listref of listrefs (each of which contain 5 strings (dnafrag.dnafrag_id, constrained_element.dnafrag_start, 
+constrained_element.dnafrag_end, constrained_element.strand, genome_db.genome_db_id, dnafrag.dnafrag_name) 
+   [ [ $dnafrag_id, $start, $end, $genome_db_id, $dnafrag_name ], .. ]
+Each inner listref contains information about one of the species sequences which make up the constarained 
+element block from the alignment. 
+
+=back
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::EnsEMBL::Compara::ConstrainedElement;
+use strict;
+
+# Object preamble
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning info deprecate verbose);
+use Bio::EnsEMBL::Compara::DnaFrag;
+use Bio::SimpleAlign;
+use Data::Dumper;
+
+
+=head2 new (CONSTRUCTOR)
+
+  Arg [-dbID] : int $dbID (the database ID for 
+		the constrained element block for this object)
+  Arg [-ADAPTOR]
+              : (opt.) Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor $adaptor
+                (the adaptor for connecting to the database)
+  Arg [-METHOD_LINK_SPECIES_SET_ID]
+              : int $mlss_id (the database internal ID for the $mlss)
+  Arg [-SCORE]
+              : float $score (the score of this alignment)
+  Arg [-ALIGNMENT_SEGMENTS]
+              : (opt.) listref of listrefs which each contain 5 values 
+		[ [ $dnafrag_id, $dnafrag_start, $dnafrag_end, $genome_db_id, $dnafrag_name ], ... ]
+		corresponding to the all the species in the constrained element block.
+  Arg [-P_VALUE]
+              : (opt.) string $p_value (the p_value of this constrained element)
+  Arg [-SLICE]
+	     : (opt.) Bio::EnsEMBL::Slice object
+  Arg [-START]
+	     : (opt.) int ($dnafrag_start - Bio::EnsEMBL::Slice->start + 1).
+  Arg [-END]
+	     : (opt.) int ($dnafrag_end - Bio::EnsEMBL::Slice->start + 1).
+  Arg [-STRAND]
+	     : (opt.) int (the strand from the genomic_align).
+  Arg [-REFERENCE_DNAFRAG_ID]
+	     : (opt.) int $dnafrag_id of the slice or dnafrag 
+
+  Example    : my $constrained_element =
+                   new Bio::EnsEMBL::Compara::ConstrainedElement(
+		       -dbID => $constrained_element_id,
+                       -adaptor => $adaptor,
+                       -method_link_species_set_id => $method_link_species_set_id,
+                       -score => 28.2,
+                       -alignment_segments => [ [ $dnafrag_id, $dnafrag_start, $dnafrag_end, $genome_db_id, $dnafrag_name ], .. ], 
+									#danfarg_[start|end|id] from constrained_element table
+                       -p_value => '5.023e-6',
+		       -slice => $slice_obj,
+		       -start => ( $dnafrag_start - $slice_obj->start + 1),
+		       -end => ( $dnafrag_end - $slice_obj->start + 1),
+		       -strand => $strand,
+		       -reference_dnafrag_id => $dnafrag_id,
+                   );
+  Description: Creates a new ConstrainedElement object
+  Returntype : Bio::EnsEMBL::Compara::DBSQL::ConstrainedElement
+  Exceptions : none
+  Caller     : general
+
+=cut
+
+sub new {
+  my($class, @args) = @_;
+  
+  my $self = {};
+  bless $self,$class;
+    
+  my ($adaptor, $dbID, $alignment_segments, 
+	$method_link_species_set_id, $score, $p_value, 
+	$slice, $start, $end, $strand, $reference_dnafrag_id) = 
+    rearrange([qw(
+        ADAPTOR DBID ALIGNMENT_SEGMENTS 
+  METHOD_LINK_SPECIES_SET_ID SCORE P_VALUE 
+  SLICE START END STRAND REFERENCE_DNAFRAG_ID 
+	)],
+            @args);
+
+  $self->adaptor($adaptor) if (defined ($adaptor));
+  $self->dbID($dbID) 
+	if (defined ($dbID));
+  $self->method_link_species_set_id($method_link_species_set_id)
+      if (defined ($method_link_species_set_id));
+  $self->alignment_segments($alignment_segments) 
+      if (defined ($alignment_segments));
+  $self->score($score) if (defined ($score));
+  $self->p_value($p_value) if (defined ($p_value));
+  $self->slice($slice) if (defined ($slice));
+  $self->start($start) if (defined ($start));
+  $self->end($end) if (defined ($end));
+  $self->strand($strand) if (defined ($strand));
+  $self->reference_dnafrag_id($reference_dnafrag_id)
+      if (defined($reference_dnafrag_id));
+  return $self;
+}
+
+sub new_fast {
+  my $class = shift;
+  my $hashref = shift;
+
+  return bless $hashref, $class;
+}
+
+=head2 adaptor
+
+  Arg [1]    : Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor
+  Example    : my $cons_ele_adaptor = $constrained_element->adaptor();
+  Example    : $cons_ele_adaptor->adaptor($cons_ele_adaptor);
+  Description: Getter/Setter for the adaptor this object uses for database
+               interaction.
+  Returntype : Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object
+  Exceptions : thrown if $adaptor is not a
+               Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object
+  Caller     : general
+
+=cut
+
+sub adaptor {
+  my ($self, $adaptor) = @_;
+
+  if (defined($adaptor)) {
+    throw("$adaptor is not a Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object")
+        unless ($adaptor->isa("Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor"));
+    $self->{'adaptor'} = $adaptor;
+  }
+
+  return $self->{'adaptor'};
+}
+
+=head2 dbID
+
+  Arg [1]    : integer $dbID
+  Example    : my $dbID = $constrained_element->dbID();
+  Example    : $constrained_element->dbID(2);
+  Description: Getter/Setter for the attribute dbID 
+  Returntype : integer
+  Exceptions : returns undef if no ref.dbID
+  Caller     : general
+
+=cut
+
+sub dbID {
+  my ($self, $dbID) = @_;
+
+  if (defined($dbID)) {
+    $self->{'dbID'} = $dbID;
+  }
+
+  return $self->{'dbID'};
+}
+
+
+=head2 p_value 
+
+  Arg [1]    : float $p_value
+  Example    : my $p_value = $constrained_element->p_value();
+  Example    : $constrained_element->p_value('5.35242e-105');
+  Description: Getter/Setter for the attribute p_value
+  Returntype : float 
+  Exceptions : returns undef if no ref.p_value
+  Caller     : general
+
+=cut
+
+sub p_value {
+  my ($self, $p_value) = @_;
+
+  if (defined($p_value)) {
+    $self->{'p_value'} = $p_value;
+  }
+
+  return $self->{'p_value'};
+}
+
+
+=head2 score
+
+  Arg [1]    : float $score
+  Example    : my $score = $constrained_element->score();
+  Example    : $constrained_element->score(16.8);
+  Description: Getter/Setter for the attribute score 
+  Returntype : float
+  Exceptions : returns undef if no ref.score
+  Caller     : general
+
+=cut
+
+sub score {
+  my ($self, $score) = @_;
+
+  if (defined($score)) {
+    $self->{'score'} = $score;
+  } 
+  return $self->{'score'};
+}
+
+=head2 method_link_species_set_id
+
+  Arg [1]    : integer $method_link_species_set_id
+  Example    : $method_link_species_set_id = $constrained_element->method_link_species_set_id;
+  Example    : $constrained_element->method_link_species_set_id(3);
+  Description: Getter/Setter for the attribute method_link_species_set_id.
+  Returntype : integer
+  Exceptions : returns undef if no ref.method_link_species_set_id
+  Caller     : object::methodname
+
+=cut
+
+sub method_link_species_set_id {
+  my ($self, $method_link_species_set_id) = @_;
+
+  if (defined($method_link_species_set_id)) {
+    $self->{'method_link_species_set_id'} = $method_link_species_set_id;
+  } 
+
+  return $self->{'method_link_species_set_id'};
+}
+
+=head2 alignment_segments
+ 
+  Arg [1]    : listref $alignment_segments [ [ $dnafrag_id, $start, $end, $genome_db_id, $dnafrag_name ], .. ]
+  Example    : my $alignment_segments = $constrained_element->alignment_segments();
+               $constrained_element->alignment_segments($alignment_segments);
+  Description: Getter/Setter for the attribute alignment_segments 
+  Returntype : listref  
+  Exceptions : returns undef if no ref.alignment_segments
+  Caller     : general
+
+=cut
+
+sub alignment_segments {
+  my ($self, $alignment_segments) = @_;
+
+  if (defined($alignment_segments)) {
+    $self->{'alignment_segments'} = $alignment_segments;
+  } 
+
+  return $self->{'alignment_segments'};
+}
+
+
+=head2 slice
+
+  Arg [1]    : Bio::EnsEMBL::Slice $slice
+  Example    : $slice = $constrained_element->slice;
+  Example    : $constrained_element->slice($slice);
+  Description: Getter/Setter for the attribute slice.
+  Returntype : Bio::EnsEMBL::Slice object
+  Exceptions : returns undef if no ref.slice
+  Caller     : object::methodname
+
+=cut
+
+sub slice {
+  my ($self, $slice) = @_;
+
+  if (defined($slice)) {
+    $self->{'slice'} = $slice;
+  } 
+
+  return $self->{'slice'};
+}
+
+=head2 start
+
+  Arg [1]    : (optional) int $start
+  Example    : $start = $constrained_element->start;
+  Example    : $constrained_element->start($start);
+  Description: Getter/Setter for the attribute start.
+  Returntype : int
+  Exceptions : returns undef if no ref.start
+  Caller     : object::methodname
+
+=cut
+
+sub start {
+  my ($self, $start) = @_;
+
+  if (defined($start)) {
+    $self->{'start'} = $start;
+  }
+
+  return $self->{'start'};
+}
+
+=head2 end
+
+  Arg [1]    : (optional) int $end
+  Example    : $end = $constrained_element->end;
+  Example    : $constrained_element->end($end);
+  Description: Getter/Setter for the attribute end relative to the begining of the slice.
+  Returntype : int
+  Exceptions : returns undef if no ref.end
+  Caller     : object::methodname
+
+=cut
+
+sub end {
+  my ($self, $end) = @_;
+
+  if (defined($end)) {
+    $self->{'end'} = $end;
+  }
+
+  return $self->{'end'};
+}
+
+
+=head2 seq_region_start
+
+  Arg [1]    : (optional) int $seq_region_start
+  Example    : $seq_region_start = $constrained_element->seq_region_start;
+  Example    : $constrained_element->seq_region_start($seq_region_start);
+  Description: Getter/Setter for the attribute start relative to the begining of the dnafrag (genomic coords).
+  Returntype : int
+  Exceptions : returns undef if no ref.seq_region_start
+  Caller     : object::methodname
+
+=cut
+sub seq_region_start {
+	my ($self, $seq_region_start) = @_;
+	
+	if(defined($seq_region_start)) {
+		$self->{'seq_region_start'} = $seq_region_start;
+	} else {
+		$self->{'seq_region_start'} = $self->slice->start + $self->{'start'} - 1;
+	}
+	return $self->{'seq_region_start'};
+}
+
+
+=head2 seq_region_end
+
+  Arg [1]    : (optional) int $seq_region_end
+  Example    : $seq_region_end = $constrained_element->seq_region_end
+  Example    : $constrained_element->seq_region_end($seq_region_end);
+  Description: Getter/Setter for the attribute end relative to the begining of the dnafrag (genomic coords).
+  Returntype : int
+  Exceptions : returns undef if no ref.seq_region_end
+  Caller     : object::methodname
+
+=cut
+sub seq_region_end {
+	my ($self, $seq_region_end) = @_;
+	
+	if(defined($seq_region_end)) {
+		$self->{'seq_region_end'} = $seq_region_end;
+	} else {
+		$self->{'seq_region_end'} = $self->slice->start + $self->{'end'} - 1;
+	}
+	return $self->{'seq_region_end'};
+}
+
+
+
+=head2 strand
+
+  Arg [1]    : (optional) int $stand$
+  Example    : $end = $constrained_element->strand;
+  Example    : $constrained_element->end($strand);
+  Description: Getter/Setter for the attribute genomic_align strand.
+  Returntype : int
+  Exceptions : returns undef if no ref.strand
+  Caller     : object::methodname
+
+=cut
+
+sub strand {
+  my ($self, $strand) = @_;
+
+  if (defined($strand)) {
+    $self->{'strand'} = $strand;
+  }
+
+  return $self->{'strand'};
+}
+
+=head2 reference_dnafrag_id
+
+  Arg [1]    : (optional) int $reference_dnafrag_id
+  Example    : $dnafrag_id = $constrained_element->reference_dnafrag_id;
+  Example    : $constrained_element->reference_dnafrag_id($dnafrag_id);
+  Description: Getter/Setter for the attribute end.
+  Returntype : int
+  Exceptions : returns undef if no ref.reference_dnafrag_id 
+  Caller     : object::methodname
+
+=cut
+
+sub reference_dnafrag_id {
+  my ($self, $reference_dnafrag_id) = @_;
+
+  if (defined($reference_dnafrag_id)) {
+    $self->{'reference_dnafrag_id'} = $reference_dnafrag_id;
+  }
+
+  return $self->{'reference_dnafrag_id'};
+}
+
+=head2 get_SimpleAlign
+
+  Arg [1]    : Optional flags for formatting displayed MSA  
+  Example    : my $out = Bio::AlignIO->newFh(-fh=>\*STDOUT, -format=> "clustalw");
+	       my $cons = $ce_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $slice);
+               foreach my $constrained_element(@{ $cons }) {
+			my $simple_align = $constrained_element->get_SimpleAlign("uc");
+			print $out $simple_align;
+	       }
+  Description: Rebuilds the constrained element alignment
+  Returntype : Bio::SimpleAlign object
+  Exceptions : throw if you can not get a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object from the constrained element
+  Caller     : object::methodname
+
+=cut
+
+sub get_SimpleAlign {
+	my ($self, @flags) = @_;
+
+	my $mlss_adaptor = $self->adaptor->db->get_MethodLinkSpeciesSet;
+
+	my $cons_eles_mlss = $mlss_adaptor->fetch_by_dbID($self->method_link_species_set_id());
+
+	if (defined($cons_eles_mlss)) {
+		throw("$cons_eles_mlss is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
+		unless ($cons_eles_mlss->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
+	} else {
+		throw("unable to get a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object from this constrained element");
+	}
+
+	my $msa_mlss_id = $cons_eles_mlss->get_tagvalue("msa_mlss_id"); # The mlss_id of the alignments from which the constrained elements were generated
+
+	my $msa_mlss = $mlss_adaptor->fetch_by_dbID( $msa_mlss_id );
+	
+	# setting the flags
+	my $skip_empty_GenomicAligns = 1;
+	my $uc = 0;
+	my $translated = 0;
+
+	for my $flag ( @flags ) {
+		$uc = 1 if ($flag =~ /^uc$/i);
+		$translated = 1 if ($flag =~ /^translated$/i);
+	}		
+
+	my $genomic_align_block_adaptor = $self->adaptor->db->get_GenomicAlignBlock;
+	$self->start(1) if $self->start <= 0;
+	my $gabs = $genomic_align_block_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice(
+		$msa_mlss, $self->slice->sub_Slice($self->start, $self->end, $self->slice->strand));
+
+	my $sa = Bio::SimpleAlign->new();
+
+	warn "should be only one genomic_align_block associated with each constrained element\n" if @$gabs > 1;
+
+	my $this_genomic_align_block = $gabs->[0];
+	my $bio07 = 0; 
+	if(!$sa->can('add_seq')) {
+		$bio07 = 1; 
+	}
+	my $reference_genomic_align = $this_genomic_align_block->reference_genomic_align();
+
+	my $restricted_gab = $this_genomic_align_block->restrict_between_reference_positions(
+		($self->slice->start + $self->start - 1),
+		($self->slice->start + $self->end - 1),
+		$reference_genomic_align,
+		$skip_empty_GenomicAligns);
+	print "dbID: ", $this_genomic_align_block->dbID, ". "; 
+	foreach my $genomic_align( @{ $restricted_gab->get_all_GenomicAligns } ) {
+		my $alignSeq = $genomic_align->aligned_sequence;
+		my $loc_seq = Bio::LocatableSeq->new(
+			-SEQ    => $uc ? uc $alignSeq : lc $alignSeq,
+			-START  => $genomic_align->dnafrag_start,
+			-END    => $genomic_align->dnafrag_end,
+			-ID     => $genomic_align->dnafrag->genome_db->name . "/" . $genomic_align->dnafrag->name,
+			-STRAND => $genomic_align->dnafrag_strand);
+
+		if($bio07) { 
+			$sa->addSeq($loc_seq); 
+		}else{ 
+			$sa->add_seq($loc_seq); 
+		}				
+	}
+	return $sa;
+}
+
+=head2 summary_as_hash
+
+  Example       : $constrained_summary = $constrained_element->summary_as_hash();
+  Description   : Retrieves a textual summary of this ConstrainedElement object.
+	              Sadly not descended from Feature, so certain attributes must be explicitly requested
+  Returns       : hashref of descriptive strings
+
+=cut
+
+sub summary_as_hash {
+  my $self = shift;
+  my $summary_ref;
+  $summary_ref->{'ID'} = $self->dbID;
+  $summary_ref->{'start'} = $self->seq_region_start;
+  $summary_ref->{'end'} = $self->seq_region_end;
+  $summary_ref->{'strand'} = $self->strand;
+  $summary_ref->{'seq_region_name'} = $self->slice->seq_region_name;
+  $summary_ref->{'score'} = $self->score;
+  return $summary_ref;
+}
+
+1;