0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::AssemblyAdaptor - Retrieves meta information
|
|
24 related to the assembly, density features/counts per chromosome or if none
|
|
25 provided, all top level seq regions
|
|
26
|
|
27
|
|
28 =head1 SYNOPSIS
|
|
29
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 =head1 METHODS
|
|
34
|
|
35 =cut
|
|
36
|
|
37 package Bio::EnsEMBL::DBSQL::AssemblyAdaptor;
|
|
38
|
|
39 use strict;
|
|
40 use warnings;
|
|
41
|
|
42 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
|
|
43 use Bio::EnsEMBL::DBSQL::MetaContainer;
|
|
44 use Bio::EnsEMBL::Attribute;
|
|
45
|
|
46 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
|
|
47
|
|
48 use vars qw(@ISA);
|
|
49
|
|
50 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
|
|
51
|
|
52
|
|
53
|
|
54 =head2 new
|
|
55
|
|
56 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
|
|
57 the database this assembly info adaptor is using.
|
|
58 Example : my $aia = new Bio::EnsEMBL::AssemblyAdaptor($dbadaptor);
|
|
59 Description: Creates a new AssemblyAdaptor object
|
|
60 Returntype : Bio::EnsEMBL::DBSQL::AssemblyAdaptor
|
|
61 Exceptions : none
|
|
62 Caller : Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
63 Status : Stable
|
|
64
|
|
65 =cut
|
|
66
|
|
67 sub new {
|
|
68 my($class, $dbadaptor) = @_;
|
|
69
|
|
70 my $self = $class->SUPER::new($dbadaptor);
|
|
71
|
|
72 return $self;
|
|
73 }
|
|
74
|
|
75 =head2 fetch_info
|
|
76
|
|
77 Description: Returns a hash containing information about the assembly
|
|
78 stored in the meta table, such as assembly name, date etc.,
|
|
79 a reference to array of top level seq_region names and a
|
|
80 reference to array of all coordinate system versions found
|
|
81 Returntype : reference to a hash with assembly info key and value pairs
|
|
82 Exceptions : none
|
|
83 Caller : general
|
|
84 Status : Stable
|
|
85
|
|
86 =cut
|
|
87
|
|
88
|
|
89 sub fetch_info {
|
|
90 my $self = shift;
|
|
91
|
|
92 #fetch assembly information stored in the meta table
|
|
93
|
|
94 my $meta_container = $self->db()->get_adaptor('MetaContainer');
|
|
95
|
|
96 my @meta_keys = qw(assembly.name assembly.date genebuild.start_date
|
|
97 genebuild.method genebuild.initial_release_date genebuild.last_geneset_update);
|
|
98 my %assembly_info;
|
|
99
|
|
100 foreach my $meta_key (@meta_keys) {
|
|
101 my @values = @{ $meta_container->list_value_by_key($meta_key) };
|
|
102 if (@values) {
|
|
103 $assembly_info{$meta_key} = $values[0];
|
|
104 }
|
|
105 }
|
|
106
|
|
107 my $schema_build = $self->db()->_get_schema_build();
|
|
108 if ($schema_build) {
|
|
109 $assembly_info{'schema_build'} = $schema_build;
|
|
110 }
|
|
111
|
|
112 #fetch available coordinate systems
|
|
113
|
|
114 my $csa = $self->db()->get_adaptor('CoordSystem');
|
|
115 my %versions;
|
|
116 foreach my $cs (@{$csa->fetch_all()}) {
|
|
117 $versions{$cs->version()} = 1;
|
|
118 }
|
|
119 my @coord_system_versions = keys %versions;
|
|
120
|
|
121 $assembly_info{'coord_system_versions'} = \@coord_system_versions;
|
|
122
|
|
123 #fetch top level seq_region names
|
|
124
|
|
125 my $sa = $self->db()->get_adaptor('Slice');
|
|
126
|
|
127 my $slices = $sa->fetch_all('toplevel');
|
|
128
|
|
129 my @top_level_seq_region_names;
|
|
130
|
|
131 if ($slices) {
|
|
132 @top_level_seq_region_names = sort(map { $_->seq_region_name() } @$slices);
|
|
133 }
|
|
134
|
|
135 $assembly_info{'top_level_seq_region_names'} = \@top_level_seq_region_names;
|
|
136
|
|
137 return \%assembly_info;
|
|
138 }
|
|
139
|
|
140
|
|
141 =head2 fetch_stats
|
|
142
|
|
143 Arg [1] : string $seq_region_name (optional)
|
|
144 The name of the toplevel seq_region for which statistics should be fetched
|
|
145
|
|
146 Description: Returns a reference to a hash containing density features/ density related
|
|
147 seq_region attributes for a toplevel seq_region provided or if none
|
|
148 provided - all top level seq regions
|
|
149 Returntype : hashref
|
|
150 Exceptions : throw if the toplevel slice with seq_region_name provided does not exist
|
|
151 Caller : general
|
|
152 Status : Stable
|
|
153
|
|
154 =cut
|
|
155
|
|
156
|
|
157 sub fetch_stats {
|
|
158 my $self = shift;
|
|
159
|
|
160 my $seq_region_name = shift;
|
|
161
|
|
162 my @slices;
|
|
163
|
|
164 my %assembly_stats;
|
|
165
|
|
166 my $sa = $self->db()->get_adaptor('Slice');
|
|
167
|
|
168 if ($seq_region_name) {
|
|
169 my $slice = $sa->fetch_by_region('toplevel',$seq_region_name);
|
|
170 if (!$slice) {
|
|
171 throw("Top level slice $seq_region_name not found");
|
|
172 }
|
|
173 push(@slices, $slice);
|
|
174 $assembly_stats{'seq_region_name'} = $seq_region_name;
|
|
175 } else {
|
|
176 @slices = @{$sa->fetch_all('toplevel')};
|
|
177 }
|
|
178
|
|
179 my @density_types = qw(genedensity knowngenedensity snpdensity percentgc);
|
|
180
|
|
181 my @attrib_types = qw(GeneNo% SNPCount);
|
|
182
|
|
183 my $aa = $self->db()->get_adaptor('Attribute');
|
|
184
|
|
185 my $dfa = $self->db()->get_adaptor('DensityFeature');
|
|
186
|
|
187 #used to calculate the average density value for density types represented as ratios
|
|
188
|
|
189 my %density_ft_count = ();
|
|
190
|
|
191 foreach my $slice (@slices) {
|
|
192
|
|
193 $assembly_stats{'Length (bps)'} += $slice->length();
|
|
194
|
|
195 foreach my $density_type (@density_types) {
|
|
196
|
|
197 my $density_features = $dfa->fetch_all_by_Slice($slice,$density_type);
|
|
198
|
|
199 foreach my $density_feature (@$density_features) {
|
|
200
|
|
201 if ($density_feature->density_type()->value_type() eq 'ratio') {
|
|
202 $density_ft_count{$density_feature->density_type()->analysis()->display_label()} += 1;
|
|
203 }
|
|
204
|
|
205 $assembly_stats{$density_feature->density_type()->analysis()->display_label()} += $density_feature->density_value();
|
|
206 }
|
|
207 }
|
|
208
|
|
209 foreach my $attrib_type (@attrib_types) {
|
|
210
|
|
211 my $attribs = $aa->fetch_all_by_Slice($slice,$attrib_type);
|
|
212
|
|
213 foreach my $attrib (@$attribs) {
|
|
214 $assembly_stats{$attrib->description()} += $attrib->value();
|
|
215 }
|
|
216 }
|
|
217 }
|
|
218
|
|
219 foreach my $density_analysis (keys %density_ft_count) {
|
|
220
|
|
221 if ($density_ft_count{$density_analysis} > 1) {
|
|
222 $assembly_stats{$density_analysis} /= $density_ft_count{$density_analysis};
|
|
223 $assembly_stats{$density_analysis} = sprintf "%.2f", $assembly_stats{$density_analysis};
|
|
224 $assembly_stats{$density_analysis} .= '%';
|
|
225 }
|
|
226 }
|
|
227
|
|
228 return \%assembly_stats;
|
|
229 }
|
|
230
|
|
231
|
|
232
|
|
233 1;
|
|
234
|