diff variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyAdaptor.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,234 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::DBSQL::AssemblyAdaptor - Retrieves meta information
+related to the assembly, density features/counts per chromosome or if none
+provided, all top level seq regions
+
+
+=head1 SYNOPSIS
+
+
+=head1 DESCRIPTION
+
+=head1 METHODS
+
+=cut
+
+package Bio::EnsEMBL::DBSQL::AssemblyAdaptor;
+
+use strict;
+use warnings;
+
+use Bio::EnsEMBL::DBSQL::BaseAdaptor;
+use Bio::EnsEMBL::DBSQL::MetaContainer;
+use Bio::EnsEMBL::Attribute;
+
+use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
+
+use vars qw(@ISA);
+
+@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
+
+
+
+=head2 new
+
+  Arg [1]    : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
+               the database this assembly info adaptor is using.
+  Example    : my $aia = new Bio::EnsEMBL::AssemblyAdaptor($dbadaptor);
+  Description: Creates a new AssemblyAdaptor object
+  Returntype : Bio::EnsEMBL::DBSQL::AssemblyAdaptor
+  Exceptions : none
+  Caller     : Bio::EnsEMBL::DBSQL::DBAdaptor
+  Status     : Stable
+
+=cut
+
+sub new {
+  my($class, $dbadaptor) = @_;
+
+  my $self = $class->SUPER::new($dbadaptor);
+
+  return $self;
+}
+
+=head2 fetch_info
+    
+  Description: Returns a hash containing information about the assembly
+               stored in the meta table, such as assembly name, date etc., 
+               a reference to array of top level seq_region names and a
+               reference to array of all coordinate system versions found
+  Returntype : reference to a hash with assembly info key and value pairs
+  Exceptions : none
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+
+sub fetch_info {
+  my $self = shift;
+
+  #fetch assembly information stored in the meta table
+
+  my $meta_container = $self->db()->get_adaptor('MetaContainer');
+
+  my @meta_keys = qw(assembly.name       assembly.date                    genebuild.start_date
+                     genebuild.method    genebuild.initial_release_date   genebuild.last_geneset_update);
+  my %assembly_info;
+
+  foreach my $meta_key (@meta_keys) {
+      my @values = @{ $meta_container->list_value_by_key($meta_key) };     
+      if (@values) {
+	  $assembly_info{$meta_key} = $values[0];
+      }
+  }
+ 
+  my $schema_build = $self->db()->_get_schema_build();
+  if ($schema_build) {
+      $assembly_info{'schema_build'} = $schema_build;
+  }
+  
+ #fetch available coordinate systems
+
+  my $csa = $self->db()->get_adaptor('CoordSystem');
+  my %versions;
+  foreach my $cs (@{$csa->fetch_all()}) {
+    $versions{$cs->version()} = 1;
+  }
+  my @coord_system_versions = keys %versions;
+
+  $assembly_info{'coord_system_versions'} = \@coord_system_versions;
+
+  #fetch top level seq_region names
+
+  my $sa = $self->db()->get_adaptor('Slice');
+
+  my $slices = $sa->fetch_all('toplevel');
+
+  my @top_level_seq_region_names;
+
+  if ($slices) {
+      @top_level_seq_region_names = sort(map { $_->seq_region_name() } @$slices);
+  }
+
+  $assembly_info{'top_level_seq_region_names'} = \@top_level_seq_region_names;
+
+  return \%assembly_info;
+}
+
+
+=head2 fetch_stats
+
+  Arg [1]    : string $seq_region_name (optional)
+               The name of the toplevel seq_region for which statistics should be fetched
+
+  Description: Returns a reference to a hash containing density features/ density related 
+               seq_region attributes for a toplevel seq_region provided or if none
+               provided - all top level seq regions
+  Returntype : hashref
+  Exceptions : throw if the toplevel slice with seq_region_name provided does not exist
+  Caller     : general
+  Status     : Stable
+
+=cut
+
+
+sub fetch_stats {
+  my $self = shift;
+
+  my $seq_region_name = shift;
+
+  my @slices;
+
+  my %assembly_stats;
+
+  my $sa = $self->db()->get_adaptor('Slice');
+  
+  if ($seq_region_name) {
+      my $slice = $sa->fetch_by_region('toplevel',$seq_region_name);
+      if (!$slice) {
+	  throw("Top level slice $seq_region_name not found");
+      }
+      push(@slices, $slice);
+      $assembly_stats{'seq_region_name'} = $seq_region_name;
+  } else {
+      @slices = @{$sa->fetch_all('toplevel')};
+  }
+
+  my @density_types  = qw(genedensity knowngenedensity snpdensity percentgc);
+
+  my @attrib_types = qw(GeneNo% SNPCount);
+
+  my $aa = $self->db()->get_adaptor('Attribute');
+
+  my $dfa = $self->db()->get_adaptor('DensityFeature');
+
+  #used to calculate the average density value for density types represented as ratios
+ 
+  my %density_ft_count = ();
+
+  foreach my $slice (@slices) {
+
+     $assembly_stats{'Length (bps)'} += $slice->length();
+
+     foreach my $density_type (@density_types) {
+	      
+	  my $density_features = $dfa->fetch_all_by_Slice($slice,$density_type);
+	  
+	  foreach my $density_feature (@$density_features) {
+
+	      if ($density_feature->density_type()->value_type() eq 'ratio') {
+		  $density_ft_count{$density_feature->density_type()->analysis()->display_label()} += 1;
+	      }
+
+	       $assembly_stats{$density_feature->density_type()->analysis()->display_label()} += $density_feature->density_value(); 
+	  }
+     }
+
+     foreach my $attrib_type (@attrib_types) {
+
+	  my $attribs = $aa->fetch_all_by_Slice($slice,$attrib_type);
+	      
+	  foreach my $attrib (@$attribs) {
+		 $assembly_stats{$attrib->description()} += $attrib->value(); 
+	  }
+     }
+  }
+
+  foreach my $density_analysis (keys %density_ft_count) {
+
+      if ($density_ft_count{$density_analysis} > 1) {
+	  $assembly_stats{$density_analysis} /= $density_ft_count{$density_analysis};
+	  $assembly_stats{$density_analysis} = sprintf "%.2f", $assembly_stats{$density_analysis}; 
+	  $assembly_stats{$density_analysis} .= '%';
+      }
+  }
+
+  return \%assembly_stats;
+}
+
+
+
+1;
+