comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/RepeatFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor
24
25 =head1 SYNOPSIS
26
27 $rfa = $database_adaptor->get_RepeatFeatureAdaptor();
28
29 my $repeat = $rfa->fetch_by_dbID(1234);
30 my @repeats = @{ $rfa->fetch_all_by_Slice($slice) };
31
32 =head1 DESCRIPTION
33
34 This is an adaptor for the retrieval and storage of RepeatFeature
35 objects from the database. Most of the implementation is in the
36 superclass BaseFeatureAdaptor.
37
38 =head1 METHODS
39
40 =cut
41
42 package Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor;
43
44 use strict;
45 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
46 use Bio::EnsEMBL::RepeatFeature;
47 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
48 use Bio::EnsEMBL::Utils::Scalar qw/wrap_array/;
49
50 use vars qw(@ISA);
51
52 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor);
53
54
55 =head2 fetch_all_by_Slice
56
57 Arg [1] : Bio::EnsEMBL::Slice $slice
58 Arg [2] : (optional) string $logic_name
59 Limits RepeatFeatures obtained to those having an Analysis with
60 of the specified logic_name. If no logic name is specified
61 Repeats of all analysis types are retrieved.
62 Arg [3] : (optional) string/array $repeat_type
63 Limits RepeatFeatures obtained to those of specified
64 repeat_type
65 Example : @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef, 'Type II Transposons')};
66 @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef, ['Type II Transposons', 'RNA repeats'])};
67 Description: Retrieves repeat features overlapping the area designated by
68 the provided slice argument. Returned features will be in
69 in the same coordinate system as the provided slice and will
70 have coordinates relative to the slice start.
71 Returntype : reference to a list of Bio::EnsEMBL::RepeatFeatures.
72 Exceptions : throw on bad argument
73 Caller : Slice::get_all_RepeatFeatures
74 Status : Stable
75
76 =cut
77
78 sub fetch_all_by_Slice {
79 my $self = shift;
80 my $slice = shift;
81 my $logic_name = shift;
82 my $repeat_type = shift;
83
84 my $constraint = '';
85
86 # MySQL was optimising the query the incorrect way when joining to
87 # the repeat_consensus table on type
88 $self->_straight_join(1);
89
90 if($repeat_type) {
91 my $rta = wrap_array($repeat_type);
92 if(scalar(@{$rta}) > 1) {
93 $constraint .= sprintf('rc.repeat_type IN (%s)', join(q{,}, map {"'${_}'"} @{$rta}));
94 }
95 else {
96 $constraint .= "rc.repeat_type = '${repeat_type}'";
97 }
98 }
99
100 my $result =
101 $self->fetch_all_by_Slice_constraint($slice,$constraint,$logic_name);
102
103
104 $self->_straight_join(0);
105
106 return $result;
107 }
108
109
110 # _tablename
111 #
112 # Arg [1] : none
113 # Example : none
114 # Description: PROTECTED Implementation of abstract superclass method to
115 # provide the name of the tables to query
116 # Returntype : string
117 # Exceptions : none
118 # Caller : internal
119
120
121 sub _tables {
122 my $self = shift;
123
124 return (['repeat_feature', 'r'], ['repeat_consensus', 'rc']);
125 }
126
127
128 # _columns
129 #
130 # Arg [1] : none
131 # Example : none
132 # Description: PROTECTED Implementation of abstract superclass method to
133 # provide the name of the columns to query
134 # Returntype : list of strings
135 # Exceptions : none
136 # Caller : internal
137
138 sub _columns {
139 my $self = shift;
140
141 return qw (r.repeat_feature_id
142 r.seq_region_id
143 r.seq_region_start
144 r.seq_region_end
145 r.seq_region_strand
146 r.repeat_consensus_id
147 r.repeat_start
148 r.repeat_end
149 r.analysis_id
150 r.score
151 rc.repeat_name
152 rc.repeat_class
153 rc.repeat_type
154 rc.repeat_consensus);
155 }
156
157
158 # _default_where_clause
159 # Arg [1] : none
160 # Example : none
161 # Description: Overrides superclass method to provide an additional
162 # table joining constraint before the SQL query is performed.
163 # Returntype : string
164 # Exceptions : none
165 # Caller : generic_fetch
166 #
167
168 sub _default_where_clause {
169 my $self = shift;
170
171 return 'r.repeat_consensus_id = rc.repeat_consensus_id';
172 }
173
174
175
176 # Description: PROTECTED implementation of abstract superclass method.
177 # responsible for the creation of RepeatFeatures from a
178 # hashref generated from an SQL query
179
180 sub _objs_from_sth {
181 my ($self, $sth, $mapper, $dest_slice) = @_;
182
183 #
184 # This code is ugly because an attempt has been made to remove as many
185 # function calls as possible for speed purposes. Thus many caches and
186 # a fair bit of gymnastics is used.
187 #
188
189 my $rca = $self->db()->get_RepeatConsensusAdaptor();
190 my $sa = $self->db()->get_SliceAdaptor();
191 my $aa = $self->db->get_AnalysisAdaptor();
192
193 my @features;
194 my %rc_hash;
195 my %analysis_hash;
196 my %slice_hash;
197 my %sr_name_hash;
198 my %sr_cs_hash;
199
200 my($repeat_feature_id, $seq_region_id, $seq_region_start, $seq_region_end,
201 $seq_region_strand, $repeat_consensus_id, $repeat_start, $repeat_end,
202 $analysis_id, $score, $repeat_name, $repeat_class, $repeat_type,
203 $repeat_consensus);
204
205 $sth->bind_columns( \$repeat_feature_id, \$seq_region_id, \$seq_region_start,
206 \$seq_region_end, \$seq_region_strand,
207 \$repeat_consensus_id, \$repeat_start,\$repeat_end,
208 \$analysis_id, \$score, \$repeat_name, \$repeat_class,
209 \$repeat_type, \$repeat_consensus );
210
211 my $asm_cs;
212 my $cmp_cs;
213 my $asm_cs_vers;
214 my $asm_cs_name;
215 my $cmp_cs_vers;
216 my $cmp_cs_name;
217 if($mapper) {
218 $asm_cs = $mapper->assembled_CoordSystem();
219 $cmp_cs = $mapper->component_CoordSystem();
220 $asm_cs_name = $asm_cs->name();
221 $asm_cs_vers = $asm_cs->version();
222 $cmp_cs_name = $cmp_cs->name();
223 $cmp_cs_vers = $cmp_cs->version();
224 }
225
226 my $dest_slice_start;
227 my $dest_slice_end;
228 my $dest_slice_strand;
229 my $dest_slice_length;
230 my $dest_slice_sr_name;
231 my $dest_slice_sr_id;
232 if($dest_slice) {
233 $dest_slice_start = $dest_slice->start();
234 $dest_slice_end = $dest_slice->end();
235 $dest_slice_strand = $dest_slice->strand();
236 $dest_slice_length = $dest_slice->length();
237 $dest_slice_sr_name = $dest_slice->seq_region_name();
238 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
239 }
240
241 FEATURE: while($sth->fetch()) {
242 #create a repeat consensus object
243
244 my $rc = $rc_hash{$repeat_consensus_id} ||=
245 Bio::EnsEMBL::RepeatConsensus->new_fast
246 ({'dbID' => $repeat_consensus_id,
247 'adaptor' => $rca,
248 'name' => $repeat_name,
249 'repeat_class' => $repeat_class,
250 'repeat_type' => $repeat_type,
251 'repeat_consensus' => $repeat_consensus,
252 'length' => length($repeat_consensus)});
253
254 #get the analysis object
255 my $analysis = $analysis_hash{$analysis_id} ||=
256 $aa->fetch_by_dbID($analysis_id);
257 #need to get the internal_seq_region, if present
258 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
259 my $slice = $slice_hash{"ID:".$seq_region_id};
260
261 if(!$slice) {
262 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
263 $slice_hash{"ID:".$seq_region_id} = $slice;
264 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
265 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
266 }
267
268 my $sr_name = $sr_name_hash{$seq_region_id};
269 my $sr_cs = $sr_cs_hash{$seq_region_id};
270 #
271 # remap the feature coordinates to another coord system
272 # if a mapper was provided
273 #
274 if($mapper) {
275
276 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
277 ( $seq_region_id, $seq_region_start,
278 $seq_region_end, $seq_region_strand )
279 =
280 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
281 $seq_region_strand, $sr_cs, 1, $dest_slice);
282
283 } else {
284
285 ( $seq_region_id, $seq_region_start,
286 $seq_region_end, $seq_region_strand )
287 =
288 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
289 $seq_region_strand, $sr_cs );
290 }
291
292 #skip features that map to gaps or coord system boundaries
293 next FEATURE if(!defined($seq_region_id));
294
295 #get a slice in the coord system we just mapped to
296 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
297 $slice = $slice_hash{"ID:".$seq_region_id} ||=
298 $sa->fetch_by_seq_region_id($seq_region_id);
299 # } else {
300 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
301 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
302 # $asm_cs_vers);
303 # }
304 }
305
306 #
307 # If a destination slice was provided convert the coords
308 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
309 #
310 if($dest_slice) {
311 if($dest_slice_start != 1 || $dest_slice_strand != 1) {
312 if($dest_slice_strand == 1) {
313 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
314 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
315 } else {
316 my $tmp_seq_region_start = $seq_region_start;
317 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
318 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
319 $seq_region_strand *= -1;
320 }
321 }
322
323 #throw away features off the end of the requested slice
324 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
325 ( $dest_slice_sr_id ne $seq_region_id )) {
326 next FEATURE;
327 }
328 $slice = $dest_slice;
329 }
330
331 # Finally, create the new RepeatFeature.
332 push( @features,
333 $self->_create_feature_fast( 'Bio::EnsEMBL::RepeatFeature', {
334 'dbID' => $repeat_feature_id,
335 'analysis' => $analysis,
336 'start' => $seq_region_start,
337 'end' => $seq_region_end,
338 'strand' => $seq_region_strand,
339 'score' => $score,
340 'hstart' => $repeat_start,
341 'hend' => $repeat_end,
342 'repeat_consensus' => $rc,
343 'adaptor' => $self,
344 'slice' => $slice
345 } ) );
346
347 }
348
349 return \@features;
350 }
351
352
353 =head2 store
354
355 Arg [1] : list of Bio::EnsEMBL::RepeatFeatures $repeat_feature_id
356 the list of repeat features to store in the database
357 Example : $repeat_feature_adaptor->store(@repeat_features);
358 Description: stores a repeat feature in the database
359 Returntype : none
360 Exceptions : if the repeat features do not have attached sequences
361 or if repeat_consensus are not present
362 Caller : general
363 Status : Stable
364
365 =cut
366
367 sub store {
368 my( $self, @repeats ) = @_;
369
370 my $db = $self->db();
371 my $rca = $db->get_RepeatConsensusAdaptor();
372 my $sa = $db->get_SliceAdaptor();
373 my ($cons, $db_id);
374
375 my $sth = $self->prepare(qq{
376 INSERT into repeat_feature( repeat_feature_id
377 , seq_region_id
378 , seq_region_start
379 , seq_region_end
380 , seq_region_strand
381 , repeat_consensus_id
382 , repeat_start
383 , repeat_end
384 , score
385 , analysis_id )
386 VALUES(NULL, ?,?,?,?,?,?,?,?,?)
387 });
388
389 FEATURE: foreach my $rf (@repeats) {
390 if(!ref($rf) || !$rf->isa('Bio::EnsEMBL::RepeatFeature')) {
391 throw('Expected RepeatFeature argument not [' . ref($rf) .'].');
392 }
393
394 if($rf->is_stored($db)) {
395 warning("RepeatFeature [".$rf->dbID."] is already stored in this DB.");
396 next FEATURE;
397 }
398
399 my $cons = $rf->repeat_consensus();
400 throw("Must have a RepeatConsensus attached") if(!defined($cons));
401
402 # for tandem repeats - simply store consensus and repeat
403 # one pair per hit. don't need to check consensi stored
404 # already. consensus has name and class set to 'trf'
405
406 if ($cons->repeat_class eq 'trf') {
407
408 # Look for matches already stored
409 my @match = @{$rca->fetch_all_by_class_seq('trf', $cons->repeat_consensus)};
410 if (@match) {
411 $cons->dbID($match[0]->dbID());
412 }
413 else {
414 $rca->store($cons);
415 }
416
417 } elsif ($cons->repeat_class eq 'Simple_repeat') {
418
419 my $rcon = $cons->name;
420 $rcon =~ s/\((\S+)\)n/$1/; # get repeat element
421 $cons->repeat_consensus($rcon);
422
423 # Look for matches already stored
424 my $match = $rca->fetch_by_name_class($cons->name, 'Simple_repeat');
425 if ($match) {
426 $cons->dbID($match->dbID());
427 }
428 else {
429 $rca->store($cons);
430 }
431
432 } else {
433
434 # for other repeats - need to see if a consensus is stored already
435 if(!$cons->dbID) {
436 my $match = ($rca->fetch_by_name($cons->name));
437
438 if($match) {
439 #set the consensus dbID to be the same as the database one
440 $cons->dbID($match->dbID());
441 } else {
442 # if we don't match a consensus already stored create a fake one
443 # and set consensus to 'N' as null seq not allowed
444 # FIXME: not happy with this, but ho hum ...
445 warning("Can't find " . $cons->name . "\n");
446 $cons->repeat_consensus("N");
447 $rca->store($cons);
448 }
449 }
450
451 #if (@match > 1) {
452 #multiple consensi were matched
453 # $self->warn(@match . " consensi for " . $cons->name . "\n");
454 #}
455 }
456
457 my $slice = $rf->slice();
458 if(!ref($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
459 throw("RepeatFeature cannot be stored without an associated slice.");
460 }
461
462 my $original = $rf;
463 my $seq_region_id;
464 ($rf, $seq_region_id) = $self->_pre_store($rf);
465
466 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
467 $sth->bind_param(2,$rf->start,SQL_INTEGER);
468 $sth->bind_param(3,$rf->end,SQL_INTEGER);
469 $sth->bind_param(4,$rf->strand,SQL_TINYINT);
470 $sth->bind_param(5,$rf->repeat_consensus->dbID,SQL_INTEGER);
471 $sth->bind_param(6,$rf->hstart,SQL_INTEGER);
472 $sth->bind_param(7,$rf->hend,SQL_INTEGER);
473 $sth->bind_param(8,$rf->score,SQL_DOUBLE);
474 $sth->bind_param(9,$rf->analysis->dbID,SQL_INTEGER);
475
476 $sth->execute();
477
478 my $db_id = $sth->{'mysql_insertid'}
479 or throw("Didn't get an insertid from the INSERT statement");
480
481 $original->dbID($db_id);
482 $original->adaptor($self);
483 }
484 }
485
486
487 =head2 list_dbIDs
488
489 Arg [1] : none
490 Example : @feature_ids = @{$repeat_feature_adaptor->list_dbIDs()};
491 Description: Gets an array of internal ids for all repeat features in the current db
492 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
493 Returntype : list of ints
494 Exceptions : none
495 Caller : ?
496 Status : Stable
497
498 =cut
499
500 sub list_dbIDs {
501 my ($self, $ordered) = @_;
502
503 return $self->_list_dbIDs("repeat_feature", undef, $ordered);
504 }
505
506 1;
507
508
509
510
511