0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 my $reg = 'Bio::EnsEMBL::Registry';
|
|
28
|
|
29 $reg->
|
|
30 load_registry_from_db( ...
|
|
31
|
|
32 my $sfa =
|
|
33 $reg->get_adaptor('homo sapiens', 'core', 'SimpleFeature');
|
|
34
|
|
35 print ref($sfa), "\n";
|
|
36
|
|
37 my $sf_aref =
|
|
38 $sfa->fetch_all;
|
|
39
|
|
40 print scalar @$sf_aref, "\n";
|
|
41
|
|
42 =head1 DESCRIPTION
|
|
43
|
|
44 Simple Feature Adaptor - database access for simple features
|
|
45
|
|
46 =head1 METHODS
|
|
47
|
|
48 =cut
|
|
49
|
|
50 package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor;
|
|
51 use vars qw(@ISA);
|
|
52 use strict;
|
|
53
|
|
54 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
|
|
55 use Bio::EnsEMBL::SimpleFeature;
|
|
56 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
57
|
|
58 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
|
|
59
|
|
60
|
|
61 =head2 store
|
|
62
|
|
63 Arg [1] : list of Bio::EnsEMBL::SimpleFeatures @sf
|
|
64 the simple features to store in the database
|
|
65 Example : $simple_feature_adaptor->store(@simple_feats);
|
|
66 Description: Stores a list of simple feature objects in the database
|
|
67 Returntype : none
|
|
68 Exceptions : thrown if @sf is not defined, if any of the features do not
|
|
69 have an attached slice.
|
|
70 or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures
|
|
71 Caller : general
|
|
72 Status : Stable
|
|
73
|
|
74 =cut
|
|
75
|
|
76 sub store{
|
|
77 my ($self,@sf) = @_;
|
|
78
|
|
79 if( scalar(@sf) == 0 ) {
|
|
80 throw("Must call store with list of SimpleFeatures");
|
|
81 }
|
|
82
|
|
83 my $sth = $self->prepare
|
|
84 ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " .
|
|
85 "seq_region_end, seq_region_strand, " .
|
|
86 "display_label, analysis_id, score) " .
|
|
87 "VALUES (?,?,?,?,?,?,?)");
|
|
88
|
|
89 my $db = $self->db();
|
|
90 my $analysis_adaptor = $db->get_AnalysisAdaptor();
|
|
91
|
|
92 FEATURE: foreach my $sf ( @sf ) {
|
|
93
|
|
94 if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) {
|
|
95 throw("SimpleFeature must be an Ensembl SimpleFeature, " .
|
|
96 "not a [".ref($sf)."]");
|
|
97 }
|
|
98
|
|
99 if($sf->is_stored($db)) {
|
|
100 warning("SimpleFeature [".$sf->dbID."] is already stored" .
|
|
101 " in this database.");
|
|
102 next FEATURE;
|
|
103 }
|
|
104
|
|
105 if(!defined($sf->analysis)) {
|
|
106 throw("An analysis must be attached to the features to be stored.");
|
|
107 }
|
|
108
|
|
109 #store the analysis if it has not been stored yet
|
|
110 if(!$sf->analysis->is_stored($db)) {
|
|
111 $analysis_adaptor->store($sf->analysis());
|
|
112 }
|
|
113
|
|
114 my $original = $sf;
|
|
115 my $seq_region_id;
|
|
116 ($sf, $seq_region_id) = $self->_pre_store($sf);
|
|
117
|
|
118 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
|
|
119 $sth->bind_param(2,$sf->start,SQL_INTEGER);
|
|
120 $sth->bind_param(3,$sf->end,SQL_INTEGER);
|
|
121 $sth->bind_param(4,$sf->strand,SQL_TINYINT);
|
|
122 $sth->bind_param(5,$sf->display_label,SQL_VARCHAR);
|
|
123 $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER);
|
|
124 $sth->bind_param(7,$sf->score,SQL_DOUBLE);
|
|
125
|
|
126 $sth->execute();
|
|
127
|
|
128 $original->dbID($sth->{'mysql_insertid'});
|
|
129 $original->adaptor($self);
|
|
130 }
|
|
131 }
|
|
132
|
|
133
|
|
134 =head2 _tables
|
|
135
|
|
136 Arg [1] : none
|
|
137 Example : none
|
|
138 Description: PROTECTED implementation of superclass abstract method
|
|
139 returns the names, aliases of the tables to use for queries
|
|
140 Returntype : list of listrefs of strings
|
|
141 Exceptions : none
|
|
142 Caller : internal
|
|
143 Status : Stable
|
|
144
|
|
145 =cut
|
|
146
|
|
147 sub _tables {
|
|
148 my $self = shift;
|
|
149
|
|
150 return ['simple_feature', 'sf'];
|
|
151 }
|
|
152
|
|
153
|
|
154 =head2 _columns
|
|
155
|
|
156 Arg [1] : none
|
|
157 Example : none
|
|
158 Description: PROTECTED implementation of superclass abstract method
|
|
159 returns a list of columns to use for queries
|
|
160 Returntype : list of strings
|
|
161 Exceptions : none
|
|
162 Caller : internal
|
|
163 Status : Stable
|
|
164
|
|
165 =cut
|
|
166
|
|
167 sub _columns {
|
|
168 my $self = shift;
|
|
169
|
|
170 return qw( sf.simple_feature_id
|
|
171 sf.seq_region_id sf.seq_region_start sf.seq_region_end
|
|
172 sf.seq_region_strand sf.display_label sf.analysis_id sf.score );
|
|
173 }
|
|
174
|
|
175
|
|
176 =head2 _objs_from_sth
|
|
177
|
|
178 Arg [1] : hash reference $hashref
|
|
179 Example : none
|
|
180 Description: PROTECTED implementation of superclass abstract method.
|
|
181 creates SimpleFeatures from an executed DBI statement handle.
|
|
182 Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects
|
|
183 Exceptions : none
|
|
184 Caller : internal
|
|
185 Status : Stable
|
|
186
|
|
187 =cut
|
|
188
|
|
189 sub _objs_from_sth {
|
|
190 my ($self, $sth, $mapper, $dest_slice) = @_;
|
|
191
|
|
192 #
|
|
193 # This code is ugly because an attempt has been made to remove as many
|
|
194 # function calls as possible for speed purposes. Thus many caches and
|
|
195 # a fair bit of gymnastics is used.
|
|
196 #
|
|
197
|
|
198 my $sa = $self->db()->get_SliceAdaptor();
|
|
199 my $aa = $self->db->get_AnalysisAdaptor();
|
|
200
|
|
201 my @features;
|
|
202 my %analysis_hash;
|
|
203 my %slice_hash;
|
|
204 my %sr_name_hash;
|
|
205 my %sr_cs_hash;
|
|
206
|
|
207
|
|
208 my($simple_feature_id,$seq_region_id, $seq_region_start, $seq_region_end,
|
|
209 $seq_region_strand, $display_label, $analysis_id, $score);
|
|
210
|
|
211 $sth->bind_columns(\$simple_feature_id,\$seq_region_id, \$seq_region_start,
|
|
212 \$seq_region_end, \$seq_region_strand, \$display_label,
|
|
213 \$analysis_id, \$score);
|
|
214
|
|
215 my $asm_cs;
|
|
216 my $cmp_cs;
|
|
217 my $asm_cs_vers;
|
|
218 my $asm_cs_name;
|
|
219 my $cmp_cs_vers;
|
|
220 my $cmp_cs_name;
|
|
221 if($mapper) {
|
|
222 $asm_cs = $mapper->assembled_CoordSystem();
|
|
223 $cmp_cs = $mapper->component_CoordSystem();
|
|
224 $asm_cs_name = $asm_cs->name();
|
|
225 $asm_cs_vers = $asm_cs->version();
|
|
226 $cmp_cs_name = $cmp_cs->name();
|
|
227 $cmp_cs_vers = $cmp_cs->version();
|
|
228 }
|
|
229
|
|
230 my $dest_slice_start;
|
|
231 my $dest_slice_end;
|
|
232 my $dest_slice_strand;
|
|
233 my $dest_slice_length;
|
|
234 my $dest_slice_sr_name;
|
|
235 my $dest_slice_seq_region_id;
|
|
236 if($dest_slice) {
|
|
237 $dest_slice_start = $dest_slice->start();
|
|
238 $dest_slice_end = $dest_slice->end();
|
|
239 $dest_slice_strand = $dest_slice->strand();
|
|
240 $dest_slice_length = $dest_slice->length();
|
|
241 $dest_slice_sr_name = $dest_slice->seq_region_name();
|
|
242 $dest_slice_seq_region_id =$dest_slice->get_seq_region_id();
|
|
243 }
|
|
244
|
|
245 my $count =0;
|
|
246 FEATURE: while($sth->fetch()) {
|
|
247 $count++;
|
|
248 #get the analysis object
|
|
249 my $analysis = $analysis_hash{$analysis_id} ||=
|
|
250 $aa->fetch_by_dbID($analysis_id);
|
|
251
|
|
252 #need to get the internal_seq_region, if present
|
|
253 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
|
|
254 #get the slice object
|
|
255 my $slice = $slice_hash{"ID:".$seq_region_id};
|
|
256
|
|
257 if(!$slice) {
|
|
258 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
|
|
259 $slice_hash{"ID:".$seq_region_id} = $slice;
|
|
260 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
|
|
261 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
|
|
262 }
|
|
263
|
|
264 my $sr_name = $sr_name_hash{$seq_region_id};
|
|
265 my $sr_cs = $sr_cs_hash{$seq_region_id};
|
|
266 #
|
|
267 # remap the feature coordinates to another coord system
|
|
268 # if a mapper was provided
|
|
269 #
|
|
270 if($mapper) {
|
|
271
|
|
272 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
|
|
273 ( $seq_region_id, $seq_region_start,
|
|
274 $seq_region_end, $seq_region_strand )
|
|
275 =
|
|
276 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
|
|
277 $seq_region_strand, $sr_cs, 1, $dest_slice);
|
|
278
|
|
279 } else {
|
|
280
|
|
281 ( $seq_region_id, $seq_region_start,
|
|
282 $seq_region_end, $seq_region_strand )
|
|
283 =
|
|
284 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
|
|
285 $seq_region_strand, $sr_cs );
|
|
286 }
|
|
287
|
|
288 #skip features that map to gaps or coord system boundaries
|
|
289 next FEATURE if(!defined($seq_region_id));
|
|
290
|
|
291 #get a slice in the coord system we just mapped to
|
|
292 if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
|
|
293 $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
294 $sa->fetch_by_seq_region_id($seq_region_id);
|
|
295 } else {
|
|
296 $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
297 $sa->fetch_by_seq_region_id($seq_region_id);
|
|
298 }
|
|
299 }
|
|
300
|
|
301 #
|
|
302 # If a destination slice was provided convert the coords
|
|
303 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
|
|
304 #
|
|
305 if($dest_slice) {
|
|
306 if($dest_slice_start != 1 || $dest_slice_strand != 1) {
|
|
307 if($dest_slice_strand == 1) {
|
|
308 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
|
|
309 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
|
|
310 } else {
|
|
311 my $tmp_seq_region_start = $seq_region_start;
|
|
312 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
|
|
313 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
|
|
314 $seq_region_strand *= -1;
|
|
315 }
|
|
316 }
|
|
317
|
|
318 #throw away features off the end of the requested slice
|
|
319 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
|
|
320 ( $dest_slice_seq_region_id != $seq_region_id )) {
|
|
321 # print STDERR "IGNORED DUE TO CUTOFF $dest_slice_seq_region_id ne $seq_region_id . $sr_name\n";
|
|
322 next FEATURE;
|
|
323 }
|
|
324 $slice = $dest_slice;
|
|
325 }
|
|
326
|
|
327 push( @features,
|
|
328 $self->_create_feature_fast(
|
|
329 'Bio::EnsEMBL::SimpleFeature', {
|
|
330 'start' => $seq_region_start,
|
|
331 'end' => $seq_region_end,
|
|
332 'strand' => $seq_region_strand,
|
|
333 'slice' => $slice,
|
|
334 'analysis' => $analysis,
|
|
335 'adaptor' => $self,
|
|
336 'dbID' => $simple_feature_id,
|
|
337 'display_label' => $display_label,
|
|
338 'score' => $score
|
|
339 } ) );
|
|
340
|
|
341 }
|
|
342
|
|
343 return \@features;
|
|
344 }
|
|
345
|
|
346
|
|
347 =head2 list_dbIDs
|
|
348
|
|
349 Arg [1] : none
|
|
350 Example : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()};
|
|
351 Description: Gets an array of internal ids for all simple features in the current db
|
|
352 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
|
|
353 Returntype : list of ints
|
|
354 Exceptions : none
|
|
355 Caller : ?
|
|
356 Status : Stable
|
|
357
|
|
358 =cut
|
|
359
|
|
360 sub list_dbIDs {
|
|
361 my ($self, $ordered) = @_;
|
|
362
|
|
363 return $self->_list_dbIDs("simple_feature", undef, $ordered);
|
|
364 }
|
|
365
|
|
366 1;
|