comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/SimpleFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor
24
25 =head1 SYNOPSIS
26
27 my $reg = 'Bio::EnsEMBL::Registry';
28
29 $reg->
30 load_registry_from_db( ...
31
32 my $sfa =
33 $reg->get_adaptor('homo sapiens', 'core', 'SimpleFeature');
34
35 print ref($sfa), "\n";
36
37 my $sf_aref =
38 $sfa->fetch_all;
39
40 print scalar @$sf_aref, "\n";
41
42 =head1 DESCRIPTION
43
44 Simple Feature Adaptor - database access for simple features
45
46 =head1 METHODS
47
48 =cut
49
50 package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor;
51 use vars qw(@ISA);
52 use strict;
53
54 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
55 use Bio::EnsEMBL::SimpleFeature;
56 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
57
58 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
59
60
61 =head2 store
62
63 Arg [1] : list of Bio::EnsEMBL::SimpleFeatures @sf
64 the simple features to store in the database
65 Example : $simple_feature_adaptor->store(@simple_feats);
66 Description: Stores a list of simple feature objects in the database
67 Returntype : none
68 Exceptions : thrown if @sf is not defined, if any of the features do not
69 have an attached slice.
70 or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures
71 Caller : general
72 Status : Stable
73
74 =cut
75
76 sub store{
77 my ($self,@sf) = @_;
78
79 if( scalar(@sf) == 0 ) {
80 throw("Must call store with list of SimpleFeatures");
81 }
82
83 my $sth = $self->prepare
84 ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " .
85 "seq_region_end, seq_region_strand, " .
86 "display_label, analysis_id, score) " .
87 "VALUES (?,?,?,?,?,?,?)");
88
89 my $db = $self->db();
90 my $analysis_adaptor = $db->get_AnalysisAdaptor();
91
92 FEATURE: foreach my $sf ( @sf ) {
93
94 if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) {
95 throw("SimpleFeature must be an Ensembl SimpleFeature, " .
96 "not a [".ref($sf)."]");
97 }
98
99 if($sf->is_stored($db)) {
100 warning("SimpleFeature [".$sf->dbID."] is already stored" .
101 " in this database.");
102 next FEATURE;
103 }
104
105 if(!defined($sf->analysis)) {
106 throw("An analysis must be attached to the features to be stored.");
107 }
108
109 #store the analysis if it has not been stored yet
110 if(!$sf->analysis->is_stored($db)) {
111 $analysis_adaptor->store($sf->analysis());
112 }
113
114 my $original = $sf;
115 my $seq_region_id;
116 ($sf, $seq_region_id) = $self->_pre_store($sf);
117
118 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
119 $sth->bind_param(2,$sf->start,SQL_INTEGER);
120 $sth->bind_param(3,$sf->end,SQL_INTEGER);
121 $sth->bind_param(4,$sf->strand,SQL_TINYINT);
122 $sth->bind_param(5,$sf->display_label,SQL_VARCHAR);
123 $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER);
124 $sth->bind_param(7,$sf->score,SQL_DOUBLE);
125
126 $sth->execute();
127
128 $original->dbID($sth->{'mysql_insertid'});
129 $original->adaptor($self);
130 }
131 }
132
133
134 =head2 _tables
135
136 Arg [1] : none
137 Example : none
138 Description: PROTECTED implementation of superclass abstract method
139 returns the names, aliases of the tables to use for queries
140 Returntype : list of listrefs of strings
141 Exceptions : none
142 Caller : internal
143 Status : Stable
144
145 =cut
146
147 sub _tables {
148 my $self = shift;
149
150 return ['simple_feature', 'sf'];
151 }
152
153
154 =head2 _columns
155
156 Arg [1] : none
157 Example : none
158 Description: PROTECTED implementation of superclass abstract method
159 returns a list of columns to use for queries
160 Returntype : list of strings
161 Exceptions : none
162 Caller : internal
163 Status : Stable
164
165 =cut
166
167 sub _columns {
168 my $self = shift;
169
170 return qw( sf.simple_feature_id
171 sf.seq_region_id sf.seq_region_start sf.seq_region_end
172 sf.seq_region_strand sf.display_label sf.analysis_id sf.score );
173 }
174
175
176 =head2 _objs_from_sth
177
178 Arg [1] : hash reference $hashref
179 Example : none
180 Description: PROTECTED implementation of superclass abstract method.
181 creates SimpleFeatures from an executed DBI statement handle.
182 Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects
183 Exceptions : none
184 Caller : internal
185 Status : Stable
186
187 =cut
188
189 sub _objs_from_sth {
190 my ($self, $sth, $mapper, $dest_slice) = @_;
191
192 #
193 # This code is ugly because an attempt has been made to remove as many
194 # function calls as possible for speed purposes. Thus many caches and
195 # a fair bit of gymnastics is used.
196 #
197
198 my $sa = $self->db()->get_SliceAdaptor();
199 my $aa = $self->db->get_AnalysisAdaptor();
200
201 my @features;
202 my %analysis_hash;
203 my %slice_hash;
204 my %sr_name_hash;
205 my %sr_cs_hash;
206
207
208 my($simple_feature_id,$seq_region_id, $seq_region_start, $seq_region_end,
209 $seq_region_strand, $display_label, $analysis_id, $score);
210
211 $sth->bind_columns(\$simple_feature_id,\$seq_region_id, \$seq_region_start,
212 \$seq_region_end, \$seq_region_strand, \$display_label,
213 \$analysis_id, \$score);
214
215 my $asm_cs;
216 my $cmp_cs;
217 my $asm_cs_vers;
218 my $asm_cs_name;
219 my $cmp_cs_vers;
220 my $cmp_cs_name;
221 if($mapper) {
222 $asm_cs = $mapper->assembled_CoordSystem();
223 $cmp_cs = $mapper->component_CoordSystem();
224 $asm_cs_name = $asm_cs->name();
225 $asm_cs_vers = $asm_cs->version();
226 $cmp_cs_name = $cmp_cs->name();
227 $cmp_cs_vers = $cmp_cs->version();
228 }
229
230 my $dest_slice_start;
231 my $dest_slice_end;
232 my $dest_slice_strand;
233 my $dest_slice_length;
234 my $dest_slice_sr_name;
235 my $dest_slice_seq_region_id;
236 if($dest_slice) {
237 $dest_slice_start = $dest_slice->start();
238 $dest_slice_end = $dest_slice->end();
239 $dest_slice_strand = $dest_slice->strand();
240 $dest_slice_length = $dest_slice->length();
241 $dest_slice_sr_name = $dest_slice->seq_region_name();
242 $dest_slice_seq_region_id =$dest_slice->get_seq_region_id();
243 }
244
245 my $count =0;
246 FEATURE: while($sth->fetch()) {
247 $count++;
248 #get the analysis object
249 my $analysis = $analysis_hash{$analysis_id} ||=
250 $aa->fetch_by_dbID($analysis_id);
251
252 #need to get the internal_seq_region, if present
253 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
254 #get the slice object
255 my $slice = $slice_hash{"ID:".$seq_region_id};
256
257 if(!$slice) {
258 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
259 $slice_hash{"ID:".$seq_region_id} = $slice;
260 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
261 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
262 }
263
264 my $sr_name = $sr_name_hash{$seq_region_id};
265 my $sr_cs = $sr_cs_hash{$seq_region_id};
266 #
267 # remap the feature coordinates to another coord system
268 # if a mapper was provided
269 #
270 if($mapper) {
271
272 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
273 ( $seq_region_id, $seq_region_start,
274 $seq_region_end, $seq_region_strand )
275 =
276 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
277 $seq_region_strand, $sr_cs, 1, $dest_slice);
278
279 } else {
280
281 ( $seq_region_id, $seq_region_start,
282 $seq_region_end, $seq_region_strand )
283 =
284 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
285 $seq_region_strand, $sr_cs );
286 }
287
288 #skip features that map to gaps or coord system boundaries
289 next FEATURE if(!defined($seq_region_id));
290
291 #get a slice in the coord system we just mapped to
292 if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
293 $slice = $slice_hash{"ID:".$seq_region_id} ||=
294 $sa->fetch_by_seq_region_id($seq_region_id);
295 } else {
296 $slice = $slice_hash{"ID:".$seq_region_id} ||=
297 $sa->fetch_by_seq_region_id($seq_region_id);
298 }
299 }
300
301 #
302 # If a destination slice was provided convert the coords
303 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
304 #
305 if($dest_slice) {
306 if($dest_slice_start != 1 || $dest_slice_strand != 1) {
307 if($dest_slice_strand == 1) {
308 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
309 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
310 } else {
311 my $tmp_seq_region_start = $seq_region_start;
312 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
313 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
314 $seq_region_strand *= -1;
315 }
316 }
317
318 #throw away features off the end of the requested slice
319 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
320 ( $dest_slice_seq_region_id != $seq_region_id )) {
321 # print STDERR "IGNORED DUE TO CUTOFF $dest_slice_seq_region_id ne $seq_region_id . $sr_name\n";
322 next FEATURE;
323 }
324 $slice = $dest_slice;
325 }
326
327 push( @features,
328 $self->_create_feature_fast(
329 'Bio::EnsEMBL::SimpleFeature', {
330 'start' => $seq_region_start,
331 'end' => $seq_region_end,
332 'strand' => $seq_region_strand,
333 'slice' => $slice,
334 'analysis' => $analysis,
335 'adaptor' => $self,
336 'dbID' => $simple_feature_id,
337 'display_label' => $display_label,
338 'score' => $score
339 } ) );
340
341 }
342
343 return \@features;
344 }
345
346
347 =head2 list_dbIDs
348
349 Arg [1] : none
350 Example : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()};
351 Description: Gets an array of internal ids for all simple features in the current db
352 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
353 Returntype : list of ints
354 Exceptions : none
355 Caller : ?
356 Status : Stable
357
358 =cut
359
360 sub list_dbIDs {
361 my ($self, $ordered) = @_;
362
363 return $self->_list_dbIDs("simple_feature", undef, $ordered);
364 }
365
366 1;