Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/AssemblyAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::AssemblyAdaptor - Retrieves meta information | |
24 related to the assembly, density features/counts per chromosome or if none | |
25 provided, all top level seq regions | |
26 | |
27 | |
28 =head1 SYNOPSIS | |
29 | |
30 | |
31 =head1 DESCRIPTION | |
32 | |
33 =head1 METHODS | |
34 | |
35 =cut | |
36 | |
37 package Bio::EnsEMBL::DBSQL::AssemblyAdaptor; | |
38 | |
39 use strict; | |
40 use warnings; | |
41 | |
42 use Bio::EnsEMBL::DBSQL::BaseAdaptor; | |
43 use Bio::EnsEMBL::DBSQL::MetaContainer; | |
44 use Bio::EnsEMBL::Attribute; | |
45 | |
46 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); | |
47 | |
48 use vars qw(@ISA); | |
49 | |
50 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); | |
51 | |
52 | |
53 | |
54 =head2 new | |
55 | |
56 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for | |
57 the database this assembly info adaptor is using. | |
58 Example : my $aia = new Bio::EnsEMBL::AssemblyAdaptor($dbadaptor); | |
59 Description: Creates a new AssemblyAdaptor object | |
60 Returntype : Bio::EnsEMBL::DBSQL::AssemblyAdaptor | |
61 Exceptions : none | |
62 Caller : Bio::EnsEMBL::DBSQL::DBAdaptor | |
63 Status : Stable | |
64 | |
65 =cut | |
66 | |
67 sub new { | |
68 my($class, $dbadaptor) = @_; | |
69 | |
70 my $self = $class->SUPER::new($dbadaptor); | |
71 | |
72 return $self; | |
73 } | |
74 | |
75 =head2 fetch_info | |
76 | |
77 Description: Returns a hash containing information about the assembly | |
78 stored in the meta table, such as assembly name, date etc., | |
79 a reference to array of top level seq_region names and a | |
80 reference to array of all coordinate system versions found | |
81 Returntype : reference to a hash with assembly info key and value pairs | |
82 Exceptions : none | |
83 Caller : general | |
84 Status : Stable | |
85 | |
86 =cut | |
87 | |
88 | |
89 sub fetch_info { | |
90 my $self = shift; | |
91 | |
92 #fetch assembly information stored in the meta table | |
93 | |
94 my $meta_container = $self->db()->get_adaptor('MetaContainer'); | |
95 | |
96 my @meta_keys = qw(assembly.name assembly.date genebuild.start_date | |
97 genebuild.method genebuild.initial_release_date genebuild.last_geneset_update); | |
98 my %assembly_info; | |
99 | |
100 foreach my $meta_key (@meta_keys) { | |
101 my @values = @{ $meta_container->list_value_by_key($meta_key) }; | |
102 if (@values) { | |
103 $assembly_info{$meta_key} = $values[0]; | |
104 } | |
105 } | |
106 | |
107 my $schema_build = $self->db()->_get_schema_build(); | |
108 if ($schema_build) { | |
109 $assembly_info{'schema_build'} = $schema_build; | |
110 } | |
111 | |
112 #fetch available coordinate systems | |
113 | |
114 my $csa = $self->db()->get_adaptor('CoordSystem'); | |
115 my %versions; | |
116 foreach my $cs (@{$csa->fetch_all()}) { | |
117 $versions{$cs->version()} = 1; | |
118 } | |
119 my @coord_system_versions = keys %versions; | |
120 | |
121 $assembly_info{'coord_system_versions'} = \@coord_system_versions; | |
122 | |
123 #fetch top level seq_region names | |
124 | |
125 my $sa = $self->db()->get_adaptor('Slice'); | |
126 | |
127 my $slices = $sa->fetch_all('toplevel'); | |
128 | |
129 my @top_level_seq_region_names; | |
130 | |
131 if ($slices) { | |
132 @top_level_seq_region_names = sort(map { $_->seq_region_name() } @$slices); | |
133 } | |
134 | |
135 $assembly_info{'top_level_seq_region_names'} = \@top_level_seq_region_names; | |
136 | |
137 return \%assembly_info; | |
138 } | |
139 | |
140 | |
141 =head2 fetch_stats | |
142 | |
143 Arg [1] : string $seq_region_name (optional) | |
144 The name of the toplevel seq_region for which statistics should be fetched | |
145 | |
146 Description: Returns a reference to a hash containing density features/ density related | |
147 seq_region attributes for a toplevel seq_region provided or if none | |
148 provided - all top level seq regions | |
149 Returntype : hashref | |
150 Exceptions : throw if the toplevel slice with seq_region_name provided does not exist | |
151 Caller : general | |
152 Status : Stable | |
153 | |
154 =cut | |
155 | |
156 | |
157 sub fetch_stats { | |
158 my $self = shift; | |
159 | |
160 my $seq_region_name = shift; | |
161 | |
162 my @slices; | |
163 | |
164 my %assembly_stats; | |
165 | |
166 my $sa = $self->db()->get_adaptor('Slice'); | |
167 | |
168 if ($seq_region_name) { | |
169 my $slice = $sa->fetch_by_region('toplevel',$seq_region_name); | |
170 if (!$slice) { | |
171 throw("Top level slice $seq_region_name not found"); | |
172 } | |
173 push(@slices, $slice); | |
174 $assembly_stats{'seq_region_name'} = $seq_region_name; | |
175 } else { | |
176 @slices = @{$sa->fetch_all('toplevel')}; | |
177 } | |
178 | |
179 my @density_types = qw(genedensity knowngenedensity snpdensity percentgc); | |
180 | |
181 my @attrib_types = qw(GeneNo% SNPCount); | |
182 | |
183 my $aa = $self->db()->get_adaptor('Attribute'); | |
184 | |
185 my $dfa = $self->db()->get_adaptor('DensityFeature'); | |
186 | |
187 #used to calculate the average density value for density types represented as ratios | |
188 | |
189 my %density_ft_count = (); | |
190 | |
191 foreach my $slice (@slices) { | |
192 | |
193 $assembly_stats{'Length (bps)'} += $slice->length(); | |
194 | |
195 foreach my $density_type (@density_types) { | |
196 | |
197 my $density_features = $dfa->fetch_all_by_Slice($slice,$density_type); | |
198 | |
199 foreach my $density_feature (@$density_features) { | |
200 | |
201 if ($density_feature->density_type()->value_type() eq 'ratio') { | |
202 $density_ft_count{$density_feature->density_type()->analysis()->display_label()} += 1; | |
203 } | |
204 | |
205 $assembly_stats{$density_feature->density_type()->analysis()->display_label()} += $density_feature->density_value(); | |
206 } | |
207 } | |
208 | |
209 foreach my $attrib_type (@attrib_types) { | |
210 | |
211 my $attribs = $aa->fetch_all_by_Slice($slice,$attrib_type); | |
212 | |
213 foreach my $attrib (@$attribs) { | |
214 $assembly_stats{$attrib->description()} += $attrib->value(); | |
215 } | |
216 } | |
217 } | |
218 | |
219 foreach my $density_analysis (keys %density_ft_count) { | |
220 | |
221 if ($density_ft_count{$density_analysis} > 1) { | |
222 $assembly_stats{$density_analysis} /= $density_ft_count{$density_analysis}; | |
223 $assembly_stats{$density_analysis} = sprintf "%.2f", $assembly_stats{$density_analysis}; | |
224 $assembly_stats{$density_analysis} .= '%'; | |
225 } | |
226 } | |
227 | |
228 return \%assembly_stats; | |
229 } | |
230 | |
231 | |
232 | |
233 1; | |
234 |