0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::PredictionExonAdaptor - Performs database interaction for
|
|
24 PredictionExons.
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 $pea = $database_adaptor->get_PredictionExonAdaptor();
|
|
29 $pexon = $pea->fetch_by_dbID();
|
|
30
|
|
31 my $slice =
|
|
32 $database_adaptor->get_SliceAdaptor->fetch_by_region( 'X', 1, 1e6 );
|
|
33
|
|
34 my @pexons = @{ $pea->fetch_all_by_Slice($slice) };
|
|
35
|
|
36 =head1 METHODS
|
|
37
|
|
38 =cut
|
|
39
|
|
40 package Bio::EnsEMBL::DBSQL::PredictionExonAdaptor;
|
|
41
|
|
42 use vars qw( @ISA );
|
|
43 use strict;
|
|
44
|
|
45
|
|
46 use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
|
|
47 use Bio::EnsEMBL::PredictionExon;
|
|
48 use Bio::EnsEMBL::Utils::Exception qw( warning throw deprecate );
|
|
49
|
|
50
|
|
51 @ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
|
|
52
|
|
53
|
|
54 #_tables
|
|
55 #
|
|
56 # Arg [1] : none
|
|
57 # Example : none
|
|
58 # Description: PROTECTED implementation of superclass abstract method
|
|
59 # returns the names, aliases of the tables to use for queries
|
|
60 # Returntype : list of listrefs of strings
|
|
61 # Exceptions : none
|
|
62 # Caller : internal
|
|
63 #
|
|
64
|
|
65 sub _tables {
|
|
66 return ([ 'prediction_exon', 'pe' ] );
|
|
67 }
|
|
68
|
|
69
|
|
70
|
|
71 #_columns
|
|
72 #
|
|
73 # Arg [1] : none
|
|
74 # Example : none
|
|
75 # Description: PROTECTED implementation of superclass abstract method
|
|
76 # returns a list of columns to use for queries
|
|
77 # Returntype : list of strings
|
|
78 # Exceptions : none
|
|
79 # Caller : internal
|
|
80
|
|
81 sub _columns {
|
|
82 my $self = shift;
|
|
83
|
|
84 return qw( pe.prediction_exon_id
|
|
85 pe.seq_region_id
|
|
86 pe.seq_region_start
|
|
87 pe.seq_region_end
|
|
88 pe.seq_region_strand
|
|
89 pe.start_phase
|
|
90 pe.score
|
|
91 pe.p_value );
|
|
92 }
|
|
93
|
|
94
|
|
95 # _final_clause
|
|
96 #
|
|
97 # Arg [1] : none
|
|
98 # Example : none
|
|
99 # Description: PROTECTED implementation of superclass abstract method
|
|
100 # returns a default end for the SQL-query (ORDER BY)
|
|
101 # Returntype : string
|
|
102 # Exceptions : none
|
|
103 # Caller : internal
|
|
104
|
|
105 sub _final_clause {
|
|
106 return "ORDER BY pe.prediction_transcript_id, pe.exon_rank";
|
|
107 }
|
|
108
|
|
109
|
|
110 =head2 fetch_all_by_PredictionTranscript
|
|
111
|
|
112 Arg [1] : Bio::EnsEMBL::PredcitionTranscript $transcript
|
|
113 Example : none
|
|
114 Description: Retrieves all Exons for the Transcript in 5-3 order
|
|
115 Returntype : listref Bio::EnsEMBL::Exon on Transcript slice
|
|
116 Exceptions : throws if transcript does not have a slice
|
|
117 Caller : Transcript->get_all_Exons()
|
|
118 Status : Stable
|
|
119
|
|
120 =cut
|
|
121
|
|
122 sub fetch_all_by_PredictionTranscript {
|
|
123 my ( $self, $transcript ) = @_;
|
|
124 my $constraint = "pe.prediction_transcript_id = ".$transcript->dbID();
|
|
125
|
|
126 # use 'keep_all' option to keep exons that are off end of slice
|
|
127
|
|
128 my $tslice = $transcript->slice();
|
|
129 my $slice;
|
|
130
|
|
131 if(!$tslice) {
|
|
132 throw("Transcript must have attached slice to retrieve exons.");
|
|
133 }
|
|
134
|
|
135 # use a small slice the same size as the prediction transcript
|
|
136 $slice = $self->db->get_SliceAdaptor->fetch_by_Feature($transcript);
|
|
137
|
|
138 my $exons = $self->fetch_all_by_Slice_constraint($slice, $constraint);
|
|
139
|
|
140 # remap exon coordinates if necessary
|
|
141 if($slice->name() ne $tslice->name()) {
|
|
142 my @out;
|
|
143 foreach my $ex (@$exons) {
|
|
144 push @out, $ex->transfer($tslice);
|
|
145 }
|
|
146 $exons = \@out;
|
|
147 }
|
|
148
|
|
149 return $exons;
|
|
150 }
|
|
151
|
|
152
|
|
153
|
|
154 =head2 store
|
|
155
|
|
156 Arg [1] : Bio::EnsEMBL::PredictionExon $exon
|
|
157 The exon to store in this database
|
|
158 Arg [2] : int $prediction_transcript_id
|
|
159 The internal identifier of the prediction exon that that this
|
|
160 exon is associated with.
|
|
161 Arg [3] : int $rank
|
|
162 The rank of the exon in the transcript (starting at 1)
|
|
163 Example : $pexon_adaptor->store($pexon, 1211, 2);
|
|
164 Description: Stores a PredictionExon in the database
|
|
165 Returntype : none
|
|
166 Exceptions : thrown if exon does not have a slice attached
|
|
167 or if $exon->start, $exon->end, $exon->strand, or $exon->phase
|
|
168 are not defined or if $exon is not a Bio::EnsEMBL::PredictionExon
|
|
169 Caller : general
|
|
170 Status : Stable
|
|
171
|
|
172 =cut
|
|
173
|
|
174 sub store {
|
|
175 my ( $self, $pexon, $pt_id, $rank ) = @_;
|
|
176
|
|
177 if(!ref($pexon) || !$pexon->isa('Bio::EnsEMBL::PredictionExon') ) {
|
|
178 throw("Expected PredictionExon argument");
|
|
179 }
|
|
180
|
|
181 throw("Expected PredictionTranscript id argument.") if(!$pt_id);
|
|
182 throw("Expected rank argument.") if(!$rank);
|
|
183
|
|
184 my $db = $self->db();
|
|
185
|
|
186 if($pexon->is_stored($db)) {
|
|
187 warning('PredictionExon is already stored in this DB.');
|
|
188 return $pexon->dbID();
|
|
189 }
|
|
190
|
|
191 if( ! $pexon->start || ! $pexon->end ||
|
|
192 ! $pexon->strand || ! defined $pexon->phase ) {
|
|
193 throw("PredictionExon does not have all attributes to store.\n" .
|
|
194 "start, end, strand and phase attributes must be set.");
|
|
195 }
|
|
196
|
|
197 #maintain reference to original passed-in prediction exon
|
|
198 my $original = $pexon;
|
|
199 my $seq_region_id;
|
|
200 ($pexon, $seq_region_id) = $self->_pre_store($pexon);
|
|
201
|
|
202 my $sth = $db->dbc->prepare
|
|
203 ("INSERT into prediction_exon (prediction_transcript_id, exon_rank, " .
|
|
204 "seq_region_id, seq_region_start, seq_region_end, " .
|
|
205 "seq_region_strand, start_phase, score, p_value) " .
|
|
206 "VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ? )");
|
|
207
|
|
208 $sth->bind_param(1,$pt_id,SQL_INTEGER);
|
|
209 $sth->bind_param(2,$rank,SQL_SMALLINT);
|
|
210 $sth->bind_param(3,$seq_region_id,SQL_INTEGER);
|
|
211 $sth->bind_param(4,$pexon->start,SQL_INTEGER);
|
|
212 $sth->bind_param(5,$pexon->end,SQL_INTEGER);
|
|
213 $sth->bind_param(6,$pexon->strand,SQL_TINYINT);
|
|
214 $sth->bind_param(7,$pexon->phase,SQL_TINYINT);
|
|
215 $sth->bind_param(8,$pexon->score,SQL_DOUBLE);
|
|
216 $sth->bind_param(9,$pexon->p_value,SQL_DOUBLE);
|
|
217
|
|
218 $sth->execute();
|
|
219
|
|
220 my $dbID = $sth->{'mysql_insertid'};
|
|
221
|
|
222 #set the adaptor and dbID of the object they passed in
|
|
223 $original->dbID($dbID);
|
|
224 $original->adaptor($self);
|
|
225
|
|
226 return $dbID;
|
|
227 }
|
|
228
|
|
229
|
|
230
|
|
231 =head2 remove
|
|
232
|
|
233 Arg [1] : Bio::EnsEMBL::PredictionExon $exon
|
|
234 the exon to remove from the database
|
|
235 Example : $exon_adaptor->remove($exon);
|
|
236 Description: Removes an exon from the database
|
|
237 Returntype : none
|
|
238 Exceptions : none
|
|
239 Caller : general
|
|
240 Status : Stable
|
|
241
|
|
242 =cut
|
|
243
|
|
244 sub remove {
|
|
245 my $self = shift;
|
|
246 my $pexon = shift;
|
|
247
|
|
248 my $db = $self->db();
|
|
249
|
|
250 if(!$pexon->is_stored($db)) {
|
|
251 warning('PredictionExon is not in this DB - not removing');
|
|
252 return undef;
|
|
253 }
|
|
254
|
|
255 my $sth = $self->prepare(
|
|
256 "DELETE FROM prediction_exon WHERE prediction_exon_id = ?");
|
|
257 $sth->bind_param( 1, $pexon->dbID, SQL_INTEGER );
|
|
258 $sth->execute();
|
|
259
|
|
260 $pexon->dbID(undef);
|
|
261 $pexon->adaptor(undef);
|
|
262 }
|
|
263
|
|
264
|
|
265
|
|
266 =head2 list_dbIDs
|
|
267
|
|
268 Arg [1] : none
|
|
269 Example : @exon_ids = @{$exon_adaptor->list_dbIDs()};
|
|
270 Description: Gets an array of internal ids for all exons in the current db
|
|
271 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
|
|
272 Returntype : list of ints
|
|
273 Exceptions : none
|
|
274 Caller : ?
|
|
275 Status : Stable
|
|
276
|
|
277 =cut
|
|
278
|
|
279 sub list_dbIDs {
|
|
280 my ($self,$ordered) = @_;
|
|
281
|
|
282 return $self->_list_dbIDs("prediction_exon",undef, $ordered);
|
|
283 }
|
|
284
|
|
285
|
|
286
|
|
287 #_objs_from_sth
|
|
288
|
|
289 # Arg [1] : Hashreference $hashref
|
|
290 # Example : none
|
|
291 # Description: PROTECTED implementation of abstract superclass method.
|
|
292 # responsible for the creation of Genes
|
|
293 # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system
|
|
294 # Exceptions : none
|
|
295 # Caller : internal
|
|
296 #
|
|
297
|
|
298 sub _objs_from_sth {
|
|
299 my ($self, $sth, $mapper, $dest_slice) = @_;
|
|
300
|
|
301 #
|
|
302 # This code is ugly because an attempt has been made to remove as many
|
|
303 # function calls as possible for speed purposes. Thus many caches and
|
|
304 # a fair bit of gymnastics is used.
|
|
305 #
|
|
306 my $sa = $self->db()->get_SliceAdaptor();
|
|
307
|
|
308 my @exons;
|
|
309 my %slice_hash;
|
|
310 my %sr_name_hash;
|
|
311 my %sr_cs_hash;
|
|
312
|
|
313 my($prediction_exon_id,$seq_region_id,
|
|
314 $seq_region_start, $seq_region_end, $seq_region_strand,
|
|
315 $start_phase, $score, $p_value);
|
|
316
|
|
317 $sth->bind_columns(\$prediction_exon_id,\$seq_region_id,
|
|
318 \$seq_region_start, \$seq_region_end, \$seq_region_strand,
|
|
319 \$start_phase, \$score, \$p_value);
|
|
320
|
|
321 my $asm_cs;
|
|
322 my $cmp_cs;
|
|
323 my $asm_cs_vers;
|
|
324 my $asm_cs_name;
|
|
325 my $cmp_cs_vers;
|
|
326 my $cmp_cs_name;
|
|
327 if($mapper) {
|
|
328 $asm_cs = $mapper->assembled_CoordSystem();
|
|
329 $cmp_cs = $mapper->component_CoordSystem();
|
|
330 $asm_cs_name = $asm_cs->name();
|
|
331 $asm_cs_vers = $asm_cs->version();
|
|
332 $cmp_cs_name = $cmp_cs->name();
|
|
333 $cmp_cs_vers = $cmp_cs->version();
|
|
334 }
|
|
335
|
|
336 my $dest_slice_start;
|
|
337 my $dest_slice_end;
|
|
338 my $dest_slice_strand;
|
|
339 my $dest_slice_length;
|
|
340 my $dest_slice_cs;
|
|
341 my $asma;
|
|
342 my $dest_slice_sr_name;
|
|
343 my $dest_slice_sr_id;
|
|
344
|
|
345 if($dest_slice) {
|
|
346 $dest_slice_start = $dest_slice->start();
|
|
347 $dest_slice_end = $dest_slice->end();
|
|
348 $dest_slice_strand = $dest_slice->strand();
|
|
349 $dest_slice_length = $dest_slice->length();
|
|
350 $dest_slice_cs = $dest_slice->coord_system;
|
|
351 $dest_slice_sr_name = $dest_slice->seq_region_name();
|
|
352 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
|
|
353 $asma = $self->db->get_AssemblyMapperAdaptor();
|
|
354 }
|
|
355
|
|
356 FEATURE: while($sth->fetch()) {
|
|
357 #need to get the internal_seq_region, if present
|
|
358 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
|
|
359 my $slice = $slice_hash{"ID:".$seq_region_id};
|
|
360 my $dest_mapper = $mapper;
|
|
361
|
|
362
|
|
363 if(!$slice) {
|
|
364 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
|
|
365 $slice_hash{"ID:".$seq_region_id} = $slice;
|
|
366 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
|
|
367 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
|
|
368 }
|
|
369
|
|
370 #obtain a mapper if none was defined, but a dest_seq_region was
|
|
371 if(!$dest_mapper && $dest_slice &&
|
|
372 !$dest_slice_cs->equals($slice->coord_system)) {
|
|
373 $dest_mapper = $asma->fetch_by_CoordSystems($dest_slice_cs,
|
|
374 $slice->coord_system);
|
|
375 $asm_cs = $dest_mapper->assembled_CoordSystem();
|
|
376 $cmp_cs = $dest_mapper->component_CoordSystem();
|
|
377 $asm_cs_name = $asm_cs->name();
|
|
378 $asm_cs_vers = $asm_cs->version();
|
|
379 $cmp_cs_name = $cmp_cs->name();
|
|
380 $cmp_cs_vers = $cmp_cs->version();
|
|
381 }
|
|
382
|
|
383 my $sr_name = $sr_name_hash{$seq_region_id};
|
|
384 my $sr_cs = $sr_cs_hash{$seq_region_id};
|
|
385
|
|
386 #
|
|
387 # remap the feature coordinates to another coord system
|
|
388 # if a mapper was provided
|
|
389 #
|
|
390 if($dest_mapper) {
|
|
391
|
|
392 if (defined $dest_slice && $dest_mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
|
|
393 ( $seq_region_id, $seq_region_start,
|
|
394 $seq_region_end, $seq_region_strand )
|
|
395 =
|
|
396 $dest_mapper->map( $sr_name, $seq_region_start, $seq_region_end,
|
|
397 $seq_region_strand, $sr_cs, 1, $dest_slice);
|
|
398
|
|
399 } else {
|
|
400
|
|
401 ( $seq_region_id, $seq_region_start,
|
|
402 $seq_region_end, $seq_region_strand )
|
|
403 = $dest_mapper->fastmap( $sr_name, $seq_region_start,
|
|
404 $seq_region_end, $seq_region_strand,
|
|
405 $sr_cs );
|
|
406 }
|
|
407
|
|
408 #skip features that map to gaps or coord system boundaries
|
|
409 next FEATURE if(!defined($seq_region_id));
|
|
410
|
|
411 #get a slice in the coord system we just mapped to
|
|
412 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
|
|
413 $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
414 $sa->fetch_by_seq_region_id($seq_region_id);
|
|
415 # } else {
|
|
416 # $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
|
|
417 # $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
|
|
418 # $asm_cs_vers);
|
|
419 # }
|
|
420 }
|
|
421
|
|
422 #
|
|
423 # If a destination slice was provided convert the coords
|
|
424 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
|
|
425 #
|
|
426 if($dest_slice) {
|
|
427 if($dest_slice_start != 1 || $dest_slice_strand != 1) {
|
|
428 if($dest_slice_strand == 1) {
|
|
429 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
|
|
430 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
|
|
431 } else {
|
|
432 my $tmp_seq_region_start = $seq_region_start;
|
|
433 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
|
|
434 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
|
|
435 $seq_region_strand *= -1;
|
|
436 }
|
|
437 }
|
|
438
|
|
439 #throw away features off the end of the requested slice
|
|
440 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
|
|
441 ( $dest_slice_sr_id ne $seq_region_id )) {
|
|
442 next FEATURE;
|
|
443 }
|
|
444
|
|
445 $slice = $dest_slice;
|
|
446 }
|
|
447
|
|
448 # Finally, create the new PredictionExon.
|
|
449 push( @exons,
|
|
450 $self->_create_feature( 'Bio::EnsEMBL::PredictionExon', {
|
|
451 '-start' => $seq_region_start,
|
|
452 '-end' => $seq_region_end,
|
|
453 '-strand' => $seq_region_strand,
|
|
454 '-adaptor' => $self,
|
|
455 '-slice' => $slice,
|
|
456 '-dbID' => $prediction_exon_id,
|
|
457 '-phase' => $start_phase,
|
|
458 '-score' => $score,
|
|
459 '-p_value' => $p_value
|
|
460 } ) );
|
|
461
|
|
462 }
|
|
463
|
|
464 return \@exons;
|
|
465 }
|
|
466
|
|
467
|
|
468 1;
|