Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/SimpleFeatureAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 my $reg = 'Bio::EnsEMBL::Registry'; | |
28 | |
29 $reg-> | |
30 load_registry_from_db( ... | |
31 | |
32 my $sfa = | |
33 $reg->get_adaptor('homo sapiens', 'core', 'SimpleFeature'); | |
34 | |
35 print ref($sfa), "\n"; | |
36 | |
37 my $sf_aref = | |
38 $sfa->fetch_all; | |
39 | |
40 print scalar @$sf_aref, "\n"; | |
41 | |
42 =head1 DESCRIPTION | |
43 | |
44 Simple Feature Adaptor - database access for simple features | |
45 | |
46 =head1 METHODS | |
47 | |
48 =cut | |
49 | |
50 package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor; | |
51 use vars qw(@ISA); | |
52 use strict; | |
53 | |
54 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; | |
55 use Bio::EnsEMBL::SimpleFeature; | |
56 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
57 | |
58 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor); | |
59 | |
60 | |
61 =head2 store | |
62 | |
63 Arg [1] : list of Bio::EnsEMBL::SimpleFeatures @sf | |
64 the simple features to store in the database | |
65 Example : $simple_feature_adaptor->store(@simple_feats); | |
66 Description: Stores a list of simple feature objects in the database | |
67 Returntype : none | |
68 Exceptions : thrown if @sf is not defined, if any of the features do not | |
69 have an attached slice. | |
70 or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures | |
71 Caller : general | |
72 Status : Stable | |
73 | |
74 =cut | |
75 | |
76 sub store{ | |
77 my ($self,@sf) = @_; | |
78 | |
79 if( scalar(@sf) == 0 ) { | |
80 throw("Must call store with list of SimpleFeatures"); | |
81 } | |
82 | |
83 my $sth = $self->prepare | |
84 ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " . | |
85 "seq_region_end, seq_region_strand, " . | |
86 "display_label, analysis_id, score) " . | |
87 "VALUES (?,?,?,?,?,?,?)"); | |
88 | |
89 my $db = $self->db(); | |
90 my $analysis_adaptor = $db->get_AnalysisAdaptor(); | |
91 | |
92 FEATURE: foreach my $sf ( @sf ) { | |
93 | |
94 if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) { | |
95 throw("SimpleFeature must be an Ensembl SimpleFeature, " . | |
96 "not a [".ref($sf)."]"); | |
97 } | |
98 | |
99 if($sf->is_stored($db)) { | |
100 warning("SimpleFeature [".$sf->dbID."] is already stored" . | |
101 " in this database."); | |
102 next FEATURE; | |
103 } | |
104 | |
105 if(!defined($sf->analysis)) { | |
106 throw("An analysis must be attached to the features to be stored."); | |
107 } | |
108 | |
109 #store the analysis if it has not been stored yet | |
110 if(!$sf->analysis->is_stored($db)) { | |
111 $analysis_adaptor->store($sf->analysis()); | |
112 } | |
113 | |
114 my $original = $sf; | |
115 my $seq_region_id; | |
116 ($sf, $seq_region_id) = $self->_pre_store($sf); | |
117 | |
118 $sth->bind_param(1,$seq_region_id,SQL_INTEGER); | |
119 $sth->bind_param(2,$sf->start,SQL_INTEGER); | |
120 $sth->bind_param(3,$sf->end,SQL_INTEGER); | |
121 $sth->bind_param(4,$sf->strand,SQL_TINYINT); | |
122 $sth->bind_param(5,$sf->display_label,SQL_VARCHAR); | |
123 $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER); | |
124 $sth->bind_param(7,$sf->score,SQL_DOUBLE); | |
125 | |
126 $sth->execute(); | |
127 | |
128 $original->dbID($sth->{'mysql_insertid'}); | |
129 $original->adaptor($self); | |
130 } | |
131 } | |
132 | |
133 | |
134 =head2 _tables | |
135 | |
136 Arg [1] : none | |
137 Example : none | |
138 Description: PROTECTED implementation of superclass abstract method | |
139 returns the names, aliases of the tables to use for queries | |
140 Returntype : list of listrefs of strings | |
141 Exceptions : none | |
142 Caller : internal | |
143 Status : Stable | |
144 | |
145 =cut | |
146 | |
147 sub _tables { | |
148 my $self = shift; | |
149 | |
150 return ['simple_feature', 'sf']; | |
151 } | |
152 | |
153 | |
154 =head2 _columns | |
155 | |
156 Arg [1] : none | |
157 Example : none | |
158 Description: PROTECTED implementation of superclass abstract method | |
159 returns a list of columns to use for queries | |
160 Returntype : list of strings | |
161 Exceptions : none | |
162 Caller : internal | |
163 Status : Stable | |
164 | |
165 =cut | |
166 | |
167 sub _columns { | |
168 my $self = shift; | |
169 | |
170 return qw( sf.simple_feature_id | |
171 sf.seq_region_id sf.seq_region_start sf.seq_region_end | |
172 sf.seq_region_strand sf.display_label sf.analysis_id sf.score ); | |
173 } | |
174 | |
175 | |
176 =head2 _objs_from_sth | |
177 | |
178 Arg [1] : hash reference $hashref | |
179 Example : none | |
180 Description: PROTECTED implementation of superclass abstract method. | |
181 creates SimpleFeatures from an executed DBI statement handle. | |
182 Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects | |
183 Exceptions : none | |
184 Caller : internal | |
185 Status : Stable | |
186 | |
187 =cut | |
188 | |
189 sub _objs_from_sth { | |
190 my ($self, $sth, $mapper, $dest_slice) = @_; | |
191 | |
192 # | |
193 # This code is ugly because an attempt has been made to remove as many | |
194 # function calls as possible for speed purposes. Thus many caches and | |
195 # a fair bit of gymnastics is used. | |
196 # | |
197 | |
198 my $sa = $self->db()->get_SliceAdaptor(); | |
199 my $aa = $self->db->get_AnalysisAdaptor(); | |
200 | |
201 my @features; | |
202 my %analysis_hash; | |
203 my %slice_hash; | |
204 my %sr_name_hash; | |
205 my %sr_cs_hash; | |
206 | |
207 | |
208 my($simple_feature_id,$seq_region_id, $seq_region_start, $seq_region_end, | |
209 $seq_region_strand, $display_label, $analysis_id, $score); | |
210 | |
211 $sth->bind_columns(\$simple_feature_id,\$seq_region_id, \$seq_region_start, | |
212 \$seq_region_end, \$seq_region_strand, \$display_label, | |
213 \$analysis_id, \$score); | |
214 | |
215 my $asm_cs; | |
216 my $cmp_cs; | |
217 my $asm_cs_vers; | |
218 my $asm_cs_name; | |
219 my $cmp_cs_vers; | |
220 my $cmp_cs_name; | |
221 if($mapper) { | |
222 $asm_cs = $mapper->assembled_CoordSystem(); | |
223 $cmp_cs = $mapper->component_CoordSystem(); | |
224 $asm_cs_name = $asm_cs->name(); | |
225 $asm_cs_vers = $asm_cs->version(); | |
226 $cmp_cs_name = $cmp_cs->name(); | |
227 $cmp_cs_vers = $cmp_cs->version(); | |
228 } | |
229 | |
230 my $dest_slice_start; | |
231 my $dest_slice_end; | |
232 my $dest_slice_strand; | |
233 my $dest_slice_length; | |
234 my $dest_slice_sr_name; | |
235 my $dest_slice_seq_region_id; | |
236 if($dest_slice) { | |
237 $dest_slice_start = $dest_slice->start(); | |
238 $dest_slice_end = $dest_slice->end(); | |
239 $dest_slice_strand = $dest_slice->strand(); | |
240 $dest_slice_length = $dest_slice->length(); | |
241 $dest_slice_sr_name = $dest_slice->seq_region_name(); | |
242 $dest_slice_seq_region_id =$dest_slice->get_seq_region_id(); | |
243 } | |
244 | |
245 my $count =0; | |
246 FEATURE: while($sth->fetch()) { | |
247 $count++; | |
248 #get the analysis object | |
249 my $analysis = $analysis_hash{$analysis_id} ||= | |
250 $aa->fetch_by_dbID($analysis_id); | |
251 | |
252 #need to get the internal_seq_region, if present | |
253 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); | |
254 #get the slice object | |
255 my $slice = $slice_hash{"ID:".$seq_region_id}; | |
256 | |
257 if(!$slice) { | |
258 $slice = $sa->fetch_by_seq_region_id($seq_region_id); | |
259 $slice_hash{"ID:".$seq_region_id} = $slice; | |
260 $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); | |
261 $sr_cs_hash{$seq_region_id} = $slice->coord_system(); | |
262 } | |
263 | |
264 my $sr_name = $sr_name_hash{$seq_region_id}; | |
265 my $sr_cs = $sr_cs_hash{$seq_region_id}; | |
266 # | |
267 # remap the feature coordinates to another coord system | |
268 # if a mapper was provided | |
269 # | |
270 if($mapper) { | |
271 | |
272 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { | |
273 ( $seq_region_id, $seq_region_start, | |
274 $seq_region_end, $seq_region_strand ) | |
275 = | |
276 $mapper->map( $sr_name, $seq_region_start, $seq_region_end, | |
277 $seq_region_strand, $sr_cs, 1, $dest_slice); | |
278 | |
279 } else { | |
280 | |
281 ( $seq_region_id, $seq_region_start, | |
282 $seq_region_end, $seq_region_strand ) | |
283 = | |
284 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, | |
285 $seq_region_strand, $sr_cs ); | |
286 } | |
287 | |
288 #skip features that map to gaps or coord system boundaries | |
289 next FEATURE if(!defined($seq_region_id)); | |
290 | |
291 #get a slice in the coord system we just mapped to | |
292 if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { | |
293 $slice = $slice_hash{"ID:".$seq_region_id} ||= | |
294 $sa->fetch_by_seq_region_id($seq_region_id); | |
295 } else { | |
296 $slice = $slice_hash{"ID:".$seq_region_id} ||= | |
297 $sa->fetch_by_seq_region_id($seq_region_id); | |
298 } | |
299 } | |
300 | |
301 # | |
302 # If a destination slice was provided convert the coords | |
303 # If the dest_slice starts at 1 and is foward strand, nothing needs doing | |
304 # | |
305 if($dest_slice) { | |
306 if($dest_slice_start != 1 || $dest_slice_strand != 1) { | |
307 if($dest_slice_strand == 1) { | |
308 $seq_region_start = $seq_region_start - $dest_slice_start + 1; | |
309 $seq_region_end = $seq_region_end - $dest_slice_start + 1; | |
310 } else { | |
311 my $tmp_seq_region_start = $seq_region_start; | |
312 $seq_region_start = $dest_slice_end - $seq_region_end + 1; | |
313 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; | |
314 $seq_region_strand *= -1; | |
315 } | |
316 } | |
317 | |
318 #throw away features off the end of the requested slice | |
319 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length || | |
320 ( $dest_slice_seq_region_id != $seq_region_id )) { | |
321 # print STDERR "IGNORED DUE TO CUTOFF $dest_slice_seq_region_id ne $seq_region_id . $sr_name\n"; | |
322 next FEATURE; | |
323 } | |
324 $slice = $dest_slice; | |
325 } | |
326 | |
327 push( @features, | |
328 $self->_create_feature_fast( | |
329 'Bio::EnsEMBL::SimpleFeature', { | |
330 'start' => $seq_region_start, | |
331 'end' => $seq_region_end, | |
332 'strand' => $seq_region_strand, | |
333 'slice' => $slice, | |
334 'analysis' => $analysis, | |
335 'adaptor' => $self, | |
336 'dbID' => $simple_feature_id, | |
337 'display_label' => $display_label, | |
338 'score' => $score | |
339 } ) ); | |
340 | |
341 } | |
342 | |
343 return \@features; | |
344 } | |
345 | |
346 | |
347 =head2 list_dbIDs | |
348 | |
349 Arg [1] : none | |
350 Example : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()}; | |
351 Description: Gets an array of internal ids for all simple features in the current db | |
352 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. | |
353 Returntype : list of ints | |
354 Exceptions : none | |
355 Caller : ? | |
356 Status : Stable | |
357 | |
358 =cut | |
359 | |
360 sub list_dbIDs { | |
361 my ($self, $ordered) = @_; | |
362 | |
363 return $self->_list_dbIDs("simple_feature", undef, $ordered); | |
364 } | |
365 | |
366 1; |