comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::AssemblyAdaptor - Retrieves meta information
24 related to the assembly, density features/counts per chromosome or if none
25 provided, all top level seq regions
26
27
28 =head1 SYNOPSIS
29
30
31 =head1 DESCRIPTION
32
33 =head1 METHODS
34
35 =cut
36
37 package Bio::EnsEMBL::DBSQL::AssemblyAdaptor;
38
39 use strict;
40 use warnings;
41
42 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
43 use Bio::EnsEMBL::DBSQL::MetaContainer;
44 use Bio::EnsEMBL::Attribute;
45
46 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
47
48 use vars qw(@ISA);
49
50 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
51
52
53
54 =head2 new
55
56 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
57 the database this assembly info adaptor is using.
58 Example : my $aia = new Bio::EnsEMBL::AssemblyAdaptor($dbadaptor);
59 Description: Creates a new AssemblyAdaptor object
60 Returntype : Bio::EnsEMBL::DBSQL::AssemblyAdaptor
61 Exceptions : none
62 Caller : Bio::EnsEMBL::DBSQL::DBAdaptor
63 Status : Stable
64
65 =cut
66
67 sub new {
68 my($class, $dbadaptor) = @_;
69
70 my $self = $class->SUPER::new($dbadaptor);
71
72 return $self;
73 }
74
75 =head2 fetch_info
76
77 Description: Returns a hash containing information about the assembly
78 stored in the meta table, such as assembly name, date etc.,
79 a reference to array of top level seq_region names and a
80 reference to array of all coordinate system versions found
81 Returntype : reference to a hash with assembly info key and value pairs
82 Exceptions : none
83 Caller : general
84 Status : Stable
85
86 =cut
87
88
89 sub fetch_info {
90 my $self = shift;
91
92 #fetch assembly information stored in the meta table
93
94 my $meta_container = $self->db()->get_adaptor('MetaContainer');
95
96 my @meta_keys = qw(assembly.name assembly.date genebuild.start_date
97 genebuild.method genebuild.initial_release_date genebuild.last_geneset_update);
98 my %assembly_info;
99
100 foreach my $meta_key (@meta_keys) {
101 my @values = @{ $meta_container->list_value_by_key($meta_key) };
102 if (@values) {
103 $assembly_info{$meta_key} = $values[0];
104 }
105 }
106
107 my $schema_build = $self->db()->_get_schema_build();
108 if ($schema_build) {
109 $assembly_info{'schema_build'} = $schema_build;
110 }
111
112 #fetch available coordinate systems
113
114 my $csa = $self->db()->get_adaptor('CoordSystem');
115 my %versions;
116 foreach my $cs (@{$csa->fetch_all()}) {
117 $versions{$cs->version()} = 1;
118 }
119 my @coord_system_versions = keys %versions;
120
121 $assembly_info{'coord_system_versions'} = \@coord_system_versions;
122
123 #fetch top level seq_region names
124
125 my $sa = $self->db()->get_adaptor('Slice');
126
127 my $slices = $sa->fetch_all('toplevel');
128
129 my @top_level_seq_region_names;
130
131 if ($slices) {
132 @top_level_seq_region_names = sort(map { $_->seq_region_name() } @$slices);
133 }
134
135 $assembly_info{'top_level_seq_region_names'} = \@top_level_seq_region_names;
136
137 return \%assembly_info;
138 }
139
140
141 =head2 fetch_stats
142
143 Arg [1] : string $seq_region_name (optional)
144 The name of the toplevel seq_region for which statistics should be fetched
145
146 Description: Returns a reference to a hash containing density features/ density related
147 seq_region attributes for a toplevel seq_region provided or if none
148 provided - all top level seq regions
149 Returntype : hashref
150 Exceptions : throw if the toplevel slice with seq_region_name provided does not exist
151 Caller : general
152 Status : Stable
153
154 =cut
155
156
157 sub fetch_stats {
158 my $self = shift;
159
160 my $seq_region_name = shift;
161
162 my @slices;
163
164 my %assembly_stats;
165
166 my $sa = $self->db()->get_adaptor('Slice');
167
168 if ($seq_region_name) {
169 my $slice = $sa->fetch_by_region('toplevel',$seq_region_name);
170 if (!$slice) {
171 throw("Top level slice $seq_region_name not found");
172 }
173 push(@slices, $slice);
174 $assembly_stats{'seq_region_name'} = $seq_region_name;
175 } else {
176 @slices = @{$sa->fetch_all('toplevel')};
177 }
178
179 my @density_types = qw(genedensity knowngenedensity snpdensity percentgc);
180
181 my @attrib_types = qw(GeneNo% SNPCount);
182
183 my $aa = $self->db()->get_adaptor('Attribute');
184
185 my $dfa = $self->db()->get_adaptor('DensityFeature');
186
187 #used to calculate the average density value for density types represented as ratios
188
189 my %density_ft_count = ();
190
191 foreach my $slice (@slices) {
192
193 $assembly_stats{'Length (bps)'} += $slice->length();
194
195 foreach my $density_type (@density_types) {
196
197 my $density_features = $dfa->fetch_all_by_Slice($slice,$density_type);
198
199 foreach my $density_feature (@$density_features) {
200
201 if ($density_feature->density_type()->value_type() eq 'ratio') {
202 $density_ft_count{$density_feature->density_type()->analysis()->display_label()} += 1;
203 }
204
205 $assembly_stats{$density_feature->density_type()->analysis()->display_label()} += $density_feature->density_value();
206 }
207 }
208
209 foreach my $attrib_type (@attrib_types) {
210
211 my $attribs = $aa->fetch_all_by_Slice($slice,$attrib_type);
212
213 foreach my $attrib (@$attribs) {
214 $assembly_stats{$attrib->description()} += $attrib->value();
215 }
216 }
217 }
218
219 foreach my $density_analysis (keys %density_ft_count) {
220
221 if ($density_ft_count{$density_analysis} > 1) {
222 $assembly_stats{$density_analysis} /= $density_ft_count{$density_analysis};
223 $assembly_stats{$density_analysis} = sprintf "%.2f", $assembly_stats{$density_analysis};
224 $assembly_stats{$density_analysis} .= '%';
225 }
226 }
227
228 return \%assembly_stats;
229 }
230
231
232
233 1;
234