Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/RepeatFeatureAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 $rfa = $database_adaptor->get_RepeatFeatureAdaptor(); | |
28 | |
29 my $repeat = $rfa->fetch_by_dbID(1234); | |
30 my @repeats = @{ $rfa->fetch_all_by_Slice($slice) }; | |
31 | |
32 =head1 DESCRIPTION | |
33 | |
34 This is an adaptor for the retrieval and storage of RepeatFeature | |
35 objects from the database. Most of the implementation is in the | |
36 superclass BaseFeatureAdaptor. | |
37 | |
38 =head1 METHODS | |
39 | |
40 =cut | |
41 | |
42 package Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor; | |
43 | |
44 use strict; | |
45 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; | |
46 use Bio::EnsEMBL::RepeatFeature; | |
47 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
48 use Bio::EnsEMBL::Utils::Scalar qw/wrap_array/; | |
49 | |
50 use vars qw(@ISA); | |
51 | |
52 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor); | |
53 | |
54 | |
55 =head2 fetch_all_by_Slice | |
56 | |
57 Arg [1] : Bio::EnsEMBL::Slice $slice | |
58 Arg [2] : (optional) string $logic_name | |
59 Limits RepeatFeatures obtained to those having an Analysis with | |
60 of the specified logic_name. If no logic name is specified | |
61 Repeats of all analysis types are retrieved. | |
62 Arg [3] : (optional) string/array $repeat_type | |
63 Limits RepeatFeatures obtained to those of specified | |
64 repeat_type | |
65 Example : @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef, 'Type II Transposons')}; | |
66 @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef, ['Type II Transposons', 'RNA repeats'])}; | |
67 Description: Retrieves repeat features overlapping the area designated by | |
68 the provided slice argument. Returned features will be in | |
69 in the same coordinate system as the provided slice and will | |
70 have coordinates relative to the slice start. | |
71 Returntype : reference to a list of Bio::EnsEMBL::RepeatFeatures. | |
72 Exceptions : throw on bad argument | |
73 Caller : Slice::get_all_RepeatFeatures | |
74 Status : Stable | |
75 | |
76 =cut | |
77 | |
78 sub fetch_all_by_Slice { | |
79 my $self = shift; | |
80 my $slice = shift; | |
81 my $logic_name = shift; | |
82 my $repeat_type = shift; | |
83 | |
84 my $constraint = ''; | |
85 | |
86 # MySQL was optimising the query the incorrect way when joining to | |
87 # the repeat_consensus table on type | |
88 $self->_straight_join(1); | |
89 | |
90 if($repeat_type) { | |
91 my $rta = wrap_array($repeat_type); | |
92 if(scalar(@{$rta}) > 1) { | |
93 $constraint .= sprintf('rc.repeat_type IN (%s)', join(q{,}, map {"'${_}'"} @{$rta})); | |
94 } | |
95 else { | |
96 $constraint .= "rc.repeat_type = '${repeat_type}'"; | |
97 } | |
98 } | |
99 | |
100 my $result = | |
101 $self->fetch_all_by_Slice_constraint($slice,$constraint,$logic_name); | |
102 | |
103 | |
104 $self->_straight_join(0); | |
105 | |
106 return $result; | |
107 } | |
108 | |
109 | |
110 # _tablename | |
111 # | |
112 # Arg [1] : none | |
113 # Example : none | |
114 # Description: PROTECTED Implementation of abstract superclass method to | |
115 # provide the name of the tables to query | |
116 # Returntype : string | |
117 # Exceptions : none | |
118 # Caller : internal | |
119 | |
120 | |
121 sub _tables { | |
122 my $self = shift; | |
123 | |
124 return (['repeat_feature', 'r'], ['repeat_consensus', 'rc']); | |
125 } | |
126 | |
127 | |
128 # _columns | |
129 # | |
130 # Arg [1] : none | |
131 # Example : none | |
132 # Description: PROTECTED Implementation of abstract superclass method to | |
133 # provide the name of the columns to query | |
134 # Returntype : list of strings | |
135 # Exceptions : none | |
136 # Caller : internal | |
137 | |
138 sub _columns { | |
139 my $self = shift; | |
140 | |
141 return qw (r.repeat_feature_id | |
142 r.seq_region_id | |
143 r.seq_region_start | |
144 r.seq_region_end | |
145 r.seq_region_strand | |
146 r.repeat_consensus_id | |
147 r.repeat_start | |
148 r.repeat_end | |
149 r.analysis_id | |
150 r.score | |
151 rc.repeat_name | |
152 rc.repeat_class | |
153 rc.repeat_type | |
154 rc.repeat_consensus); | |
155 } | |
156 | |
157 | |
158 # _default_where_clause | |
159 # Arg [1] : none | |
160 # Example : none | |
161 # Description: Overrides superclass method to provide an additional | |
162 # table joining constraint before the SQL query is performed. | |
163 # Returntype : string | |
164 # Exceptions : none | |
165 # Caller : generic_fetch | |
166 # | |
167 | |
168 sub _default_where_clause { | |
169 my $self = shift; | |
170 | |
171 return 'r.repeat_consensus_id = rc.repeat_consensus_id'; | |
172 } | |
173 | |
174 | |
175 | |
176 # Description: PROTECTED implementation of abstract superclass method. | |
177 # responsible for the creation of RepeatFeatures from a | |
178 # hashref generated from an SQL query | |
179 | |
180 sub _objs_from_sth { | |
181 my ($self, $sth, $mapper, $dest_slice) = @_; | |
182 | |
183 # | |
184 # This code is ugly because an attempt has been made to remove as many | |
185 # function calls as possible for speed purposes. Thus many caches and | |
186 # a fair bit of gymnastics is used. | |
187 # | |
188 | |
189 my $rca = $self->db()->get_RepeatConsensusAdaptor(); | |
190 my $sa = $self->db()->get_SliceAdaptor(); | |
191 my $aa = $self->db->get_AnalysisAdaptor(); | |
192 | |
193 my @features; | |
194 my %rc_hash; | |
195 my %analysis_hash; | |
196 my %slice_hash; | |
197 my %sr_name_hash; | |
198 my %sr_cs_hash; | |
199 | |
200 my($repeat_feature_id, $seq_region_id, $seq_region_start, $seq_region_end, | |
201 $seq_region_strand, $repeat_consensus_id, $repeat_start, $repeat_end, | |
202 $analysis_id, $score, $repeat_name, $repeat_class, $repeat_type, | |
203 $repeat_consensus); | |
204 | |
205 $sth->bind_columns( \$repeat_feature_id, \$seq_region_id, \$seq_region_start, | |
206 \$seq_region_end, \$seq_region_strand, | |
207 \$repeat_consensus_id, \$repeat_start,\$repeat_end, | |
208 \$analysis_id, \$score, \$repeat_name, \$repeat_class, | |
209 \$repeat_type, \$repeat_consensus ); | |
210 | |
211 my $asm_cs; | |
212 my $cmp_cs; | |
213 my $asm_cs_vers; | |
214 my $asm_cs_name; | |
215 my $cmp_cs_vers; | |
216 my $cmp_cs_name; | |
217 if($mapper) { | |
218 $asm_cs = $mapper->assembled_CoordSystem(); | |
219 $cmp_cs = $mapper->component_CoordSystem(); | |
220 $asm_cs_name = $asm_cs->name(); | |
221 $asm_cs_vers = $asm_cs->version(); | |
222 $cmp_cs_name = $cmp_cs->name(); | |
223 $cmp_cs_vers = $cmp_cs->version(); | |
224 } | |
225 | |
226 my $dest_slice_start; | |
227 my $dest_slice_end; | |
228 my $dest_slice_strand; | |
229 my $dest_slice_length; | |
230 my $dest_slice_sr_name; | |
231 my $dest_slice_sr_id; | |
232 if($dest_slice) { | |
233 $dest_slice_start = $dest_slice->start(); | |
234 $dest_slice_end = $dest_slice->end(); | |
235 $dest_slice_strand = $dest_slice->strand(); | |
236 $dest_slice_length = $dest_slice->length(); | |
237 $dest_slice_sr_name = $dest_slice->seq_region_name(); | |
238 $dest_slice_sr_id = $dest_slice->get_seq_region_id(); | |
239 } | |
240 | |
241 FEATURE: while($sth->fetch()) { | |
242 #create a repeat consensus object | |
243 | |
244 my $rc = $rc_hash{$repeat_consensus_id} ||= | |
245 Bio::EnsEMBL::RepeatConsensus->new_fast | |
246 ({'dbID' => $repeat_consensus_id, | |
247 'adaptor' => $rca, | |
248 'name' => $repeat_name, | |
249 'repeat_class' => $repeat_class, | |
250 'repeat_type' => $repeat_type, | |
251 'repeat_consensus' => $repeat_consensus, | |
252 'length' => length($repeat_consensus)}); | |
253 | |
254 #get the analysis object | |
255 my $analysis = $analysis_hash{$analysis_id} ||= | |
256 $aa->fetch_by_dbID($analysis_id); | |
257 #need to get the internal_seq_region, if present | |
258 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); | |
259 my $slice = $slice_hash{"ID:".$seq_region_id}; | |
260 | |
261 if(!$slice) { | |
262 $slice = $sa->fetch_by_seq_region_id($seq_region_id); | |
263 $slice_hash{"ID:".$seq_region_id} = $slice; | |
264 $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); | |
265 $sr_cs_hash{$seq_region_id} = $slice->coord_system(); | |
266 } | |
267 | |
268 my $sr_name = $sr_name_hash{$seq_region_id}; | |
269 my $sr_cs = $sr_cs_hash{$seq_region_id}; | |
270 # | |
271 # remap the feature coordinates to another coord system | |
272 # if a mapper was provided | |
273 # | |
274 if($mapper) { | |
275 | |
276 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { | |
277 ( $seq_region_id, $seq_region_start, | |
278 $seq_region_end, $seq_region_strand ) | |
279 = | |
280 $mapper->map( $sr_name, $seq_region_start, $seq_region_end, | |
281 $seq_region_strand, $sr_cs, 1, $dest_slice); | |
282 | |
283 } else { | |
284 | |
285 ( $seq_region_id, $seq_region_start, | |
286 $seq_region_end, $seq_region_strand ) | |
287 = | |
288 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, | |
289 $seq_region_strand, $sr_cs ); | |
290 } | |
291 | |
292 #skip features that map to gaps or coord system boundaries | |
293 next FEATURE if(!defined($seq_region_id)); | |
294 | |
295 #get a slice in the coord system we just mapped to | |
296 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { | |
297 $slice = $slice_hash{"ID:".$seq_region_id} ||= | |
298 $sa->fetch_by_seq_region_id($seq_region_id); | |
299 # } else { | |
300 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= | |
301 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, | |
302 # $asm_cs_vers); | |
303 # } | |
304 } | |
305 | |
306 # | |
307 # If a destination slice was provided convert the coords | |
308 # If the dest_slice starts at 1 and is foward strand, nothing needs doing | |
309 # | |
310 if($dest_slice) { | |
311 if($dest_slice_start != 1 || $dest_slice_strand != 1) { | |
312 if($dest_slice_strand == 1) { | |
313 $seq_region_start = $seq_region_start - $dest_slice_start + 1; | |
314 $seq_region_end = $seq_region_end - $dest_slice_start + 1; | |
315 } else { | |
316 my $tmp_seq_region_start = $seq_region_start; | |
317 $seq_region_start = $dest_slice_end - $seq_region_end + 1; | |
318 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; | |
319 $seq_region_strand *= -1; | |
320 } | |
321 } | |
322 | |
323 #throw away features off the end of the requested slice | |
324 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length || | |
325 ( $dest_slice_sr_id ne $seq_region_id )) { | |
326 next FEATURE; | |
327 } | |
328 $slice = $dest_slice; | |
329 } | |
330 | |
331 # Finally, create the new RepeatFeature. | |
332 push( @features, | |
333 $self->_create_feature_fast( 'Bio::EnsEMBL::RepeatFeature', { | |
334 'dbID' => $repeat_feature_id, | |
335 'analysis' => $analysis, | |
336 'start' => $seq_region_start, | |
337 'end' => $seq_region_end, | |
338 'strand' => $seq_region_strand, | |
339 'score' => $score, | |
340 'hstart' => $repeat_start, | |
341 'hend' => $repeat_end, | |
342 'repeat_consensus' => $rc, | |
343 'adaptor' => $self, | |
344 'slice' => $slice | |
345 } ) ); | |
346 | |
347 } | |
348 | |
349 return \@features; | |
350 } | |
351 | |
352 | |
353 =head2 store | |
354 | |
355 Arg [1] : list of Bio::EnsEMBL::RepeatFeatures $repeat_feature_id | |
356 the list of repeat features to store in the database | |
357 Example : $repeat_feature_adaptor->store(@repeat_features); | |
358 Description: stores a repeat feature in the database | |
359 Returntype : none | |
360 Exceptions : if the repeat features do not have attached sequences | |
361 or if repeat_consensus are not present | |
362 Caller : general | |
363 Status : Stable | |
364 | |
365 =cut | |
366 | |
367 sub store { | |
368 my( $self, @repeats ) = @_; | |
369 | |
370 my $db = $self->db(); | |
371 my $rca = $db->get_RepeatConsensusAdaptor(); | |
372 my $sa = $db->get_SliceAdaptor(); | |
373 my ($cons, $db_id); | |
374 | |
375 my $sth = $self->prepare(qq{ | |
376 INSERT into repeat_feature( repeat_feature_id | |
377 , seq_region_id | |
378 , seq_region_start | |
379 , seq_region_end | |
380 , seq_region_strand | |
381 , repeat_consensus_id | |
382 , repeat_start | |
383 , repeat_end | |
384 , score | |
385 , analysis_id ) | |
386 VALUES(NULL, ?,?,?,?,?,?,?,?,?) | |
387 }); | |
388 | |
389 FEATURE: foreach my $rf (@repeats) { | |
390 if(!ref($rf) || !$rf->isa('Bio::EnsEMBL::RepeatFeature')) { | |
391 throw('Expected RepeatFeature argument not [' . ref($rf) .'].'); | |
392 } | |
393 | |
394 if($rf->is_stored($db)) { | |
395 warning("RepeatFeature [".$rf->dbID."] is already stored in this DB."); | |
396 next FEATURE; | |
397 } | |
398 | |
399 my $cons = $rf->repeat_consensus(); | |
400 throw("Must have a RepeatConsensus attached") if(!defined($cons)); | |
401 | |
402 # for tandem repeats - simply store consensus and repeat | |
403 # one pair per hit. don't need to check consensi stored | |
404 # already. consensus has name and class set to 'trf' | |
405 | |
406 if ($cons->repeat_class eq 'trf') { | |
407 | |
408 # Look for matches already stored | |
409 my @match = @{$rca->fetch_all_by_class_seq('trf', $cons->repeat_consensus)}; | |
410 if (@match) { | |
411 $cons->dbID($match[0]->dbID()); | |
412 } | |
413 else { | |
414 $rca->store($cons); | |
415 } | |
416 | |
417 } elsif ($cons->repeat_class eq 'Simple_repeat') { | |
418 | |
419 my $rcon = $cons->name; | |
420 $rcon =~ s/\((\S+)\)n/$1/; # get repeat element | |
421 $cons->repeat_consensus($rcon); | |
422 | |
423 # Look for matches already stored | |
424 my $match = $rca->fetch_by_name_class($cons->name, 'Simple_repeat'); | |
425 if ($match) { | |
426 $cons->dbID($match->dbID()); | |
427 } | |
428 else { | |
429 $rca->store($cons); | |
430 } | |
431 | |
432 } else { | |
433 | |
434 # for other repeats - need to see if a consensus is stored already | |
435 if(!$cons->dbID) { | |
436 my $match = ($rca->fetch_by_name($cons->name)); | |
437 | |
438 if($match) { | |
439 #set the consensus dbID to be the same as the database one | |
440 $cons->dbID($match->dbID()); | |
441 } else { | |
442 # if we don't match a consensus already stored create a fake one | |
443 # and set consensus to 'N' as null seq not allowed | |
444 # FIXME: not happy with this, but ho hum ... | |
445 warning("Can't find " . $cons->name . "\n"); | |
446 $cons->repeat_consensus("N"); | |
447 $rca->store($cons); | |
448 } | |
449 } | |
450 | |
451 #if (@match > 1) { | |
452 #multiple consensi were matched | |
453 # $self->warn(@match . " consensi for " . $cons->name . "\n"); | |
454 #} | |
455 } | |
456 | |
457 my $slice = $rf->slice(); | |
458 if(!ref($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice'))) { | |
459 throw("RepeatFeature cannot be stored without an associated slice."); | |
460 } | |
461 | |
462 my $original = $rf; | |
463 my $seq_region_id; | |
464 ($rf, $seq_region_id) = $self->_pre_store($rf); | |
465 | |
466 $sth->bind_param(1,$seq_region_id,SQL_INTEGER); | |
467 $sth->bind_param(2,$rf->start,SQL_INTEGER); | |
468 $sth->bind_param(3,$rf->end,SQL_INTEGER); | |
469 $sth->bind_param(4,$rf->strand,SQL_TINYINT); | |
470 $sth->bind_param(5,$rf->repeat_consensus->dbID,SQL_INTEGER); | |
471 $sth->bind_param(6,$rf->hstart,SQL_INTEGER); | |
472 $sth->bind_param(7,$rf->hend,SQL_INTEGER); | |
473 $sth->bind_param(8,$rf->score,SQL_DOUBLE); | |
474 $sth->bind_param(9,$rf->analysis->dbID,SQL_INTEGER); | |
475 | |
476 $sth->execute(); | |
477 | |
478 my $db_id = $sth->{'mysql_insertid'} | |
479 or throw("Didn't get an insertid from the INSERT statement"); | |
480 | |
481 $original->dbID($db_id); | |
482 $original->adaptor($self); | |
483 } | |
484 } | |
485 | |
486 | |
487 =head2 list_dbIDs | |
488 | |
489 Arg [1] : none | |
490 Example : @feature_ids = @{$repeat_feature_adaptor->list_dbIDs()}; | |
491 Description: Gets an array of internal ids for all repeat features in the current db | |
492 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. | |
493 Returntype : list of ints | |
494 Exceptions : none | |
495 Caller : ? | |
496 Status : Stable | |
497 | |
498 =cut | |
499 | |
500 sub list_dbIDs { | |
501 my ($self, $ordered) = @_; | |
502 | |
503 return $self->_list_dbIDs("repeat_feature", undef, $ordered); | |
504 } | |
505 | |
506 1; | |
507 | |
508 | |
509 | |
510 | |
511 |