0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor -
|
|
24 Adaptor for ProteinAlignFeatures
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 $pafa =
|
|
29 $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' );
|
|
30
|
|
31 my @features = @{ $pafa->fetch_all_by_Slice($slice) };
|
|
32
|
|
33 $pafa->store(@features);
|
|
34
|
|
35 =head1 METHODS
|
|
36
|
|
37 =cut
|
|
38
|
|
39
|
|
40 package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor;
|
|
41 use vars qw(@ISA);
|
|
42 use strict;
|
|
43
|
|
44 use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
|
|
45 use Bio::EnsEMBL::DnaPepAlignFeature;
|
|
46 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
47 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
|
|
48
|
|
49
|
|
50 =head2 store
|
|
51
|
|
52 Arg [1] : list of Bio::EnsEMBL::DnaPepAlignFeature @feats
|
|
53 Example : $protein_align_feature_adaptor->store(@feats);
|
|
54 Description: stores a list of ProteinAlignFeatures in the database
|
|
55 Returntype : none
|
|
56 Exceptions : throw if any of the provided features cannot be stored
|
|
57 which may occur if:
|
|
58 * The feature does not have an associated Slice
|
|
59 * The feature does not have an associated analysis
|
|
60 * The Slice the feature is associated with is on a seq_region
|
|
61 unknown to this database
|
|
62 A warning is given if:
|
|
63 * The feature has already been stored in this db
|
|
64 Caller : Pipeline
|
|
65 Status : Stable
|
|
66
|
|
67 =cut
|
|
68
|
|
69
|
|
70 sub store{
|
|
71 my ($self, @feats) = @_;
|
|
72
|
|
73 throw("Must call store with features") if( scalar(@feats) == 0 );
|
|
74
|
|
75 my @tabs = $self->_tables;
|
|
76 my ($tablename) = @{$tabs[0]};
|
|
77
|
|
78 my $db = $self->db();
|
|
79 my $slice_adaptor = $db->get_SliceAdaptor();
|
|
80 my $analysis_adaptor = $db->get_AnalysisAdaptor();
|
|
81
|
|
82 my $sth = $self->prepare(
|
|
83 "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
|
|
84 seq_region_strand, hit_start, hit_end,
|
|
85 hit_name, cigar_line,
|
|
86 analysis_id, score, evalue, perc_ident, external_db_id, hcoverage)
|
|
87 VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)");
|
|
88
|
|
89 FEATURE: foreach my $feat ( @feats ) {
|
|
90 if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
|
|
91 throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature,"
|
|
92 . " not a [".ref($feat)."].");
|
|
93 }
|
|
94
|
|
95 if($feat->is_stored($db)) {
|
|
96 warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" .
|
|
97 " in this database.");
|
|
98 next FEATURE;
|
|
99 }
|
|
100
|
|
101 #sanity check the hstart and hend
|
|
102 my $hstart = $feat->hstart();
|
|
103 my $hend = $feat->hend();
|
|
104 $self->_check_start_end_strand($hstart,$hend,1);
|
|
105
|
|
106 my $cigar_string = $feat->cigar_string();
|
|
107 if(!$cigar_string) {
|
|
108 $cigar_string = $feat->length() . 'M';
|
|
109 warning("DnaPepAlignFeature does not define a cigar_string.\n" .
|
|
110 "Assuming ungapped block with cigar_string=$cigar_string\n");
|
|
111 }
|
|
112
|
|
113 my $hseqname = $feat->hseqname();
|
|
114 if(!$hseqname) {
|
|
115 throw("DnaPepAlignFeature must define an hseqname.");
|
|
116 }
|
|
117
|
|
118 if(!defined($feat->analysis)) {
|
|
119 throw("An analysis must be attached to the features to be stored.");
|
|
120 }
|
|
121
|
|
122 #store the analysis if it has not been stored yet
|
|
123 if(!$feat->analysis->is_stored($db)) {
|
|
124 $analysis_adaptor->store($feat->analysis());
|
|
125 }
|
|
126
|
|
127 my $slice = $feat->slice();
|
|
128 if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
|
|
129 throw("A slice must be attached to the features to be stored.");
|
|
130 }
|
|
131
|
|
132 my $original = $feat;
|
|
133 my $seq_region_id;
|
|
134 ($feat, $seq_region_id) = $self->_pre_store($feat);
|
|
135
|
|
136 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
|
|
137 $sth->bind_param(2,$feat->start,SQL_INTEGER);
|
|
138 $sth->bind_param(3,$feat->end,SQL_INTEGER);
|
|
139 $sth->bind_param(4,$feat->strand,SQL_TINYINT);
|
|
140 $sth->bind_param(5,$feat->hstart,SQL_INTEGER);
|
|
141 $sth->bind_param(6,$feat->hend,SQL_INTEGER);
|
|
142 $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR);
|
|
143 $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR);
|
|
144 $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER);
|
|
145 $sth->bind_param(10,$feat->score,SQL_DOUBLE);
|
|
146 $sth->bind_param(11,$feat->p_value,SQL_DOUBLE);
|
|
147 $sth->bind_param(12,$feat->percent_id,SQL_REAL);
|
|
148 $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
|
|
149 $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);
|
|
150
|
|
151 $sth->execute();
|
|
152 $original->dbID($sth->{'mysql_insertid'});
|
|
153 $original->adaptor($self);
|
|
154 }
|
|
155
|
|
156 $sth->finish();
|
|
157 }
|
|
158
|
|
159
|
|
160 =head2 _objs_from_sth
|
|
161
|
|
162 Arg [1] : DBI statement handle $sth
|
|
163 an exectuted DBI statement handle generated by selecting
|
|
164 the columns specified by _columns() from the table specified
|
|
165 by _table()
|
|
166 Example : @dna_dna_align_feats = $self->_obj_from_hashref
|
|
167 Description: PROTECTED implementation of superclass abstract method.
|
|
168 Creates DnaDnaAlignFeature objects from a DBI hashref
|
|
169 Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures
|
|
170 Exceptions : none
|
|
171 Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
|
|
172 Status : Stable
|
|
173
|
|
174 =cut
|
|
175
|
|
176 sub _objs_from_sth {
|
|
177 my ($self, $sth, $mapper, $dest_slice) = @_;
|
|
178
|
|
179 #
|
|
180 # This code is ugly because an attempt has been made to remove as many
|
|
181 # function calls as possible for speed purposes. Thus many caches and
|
|
182 # a fair bit of gymnastics is used.
|
|
183 #
|
|
184
|
|
185 my $sa = $self->db()->get_SliceAdaptor();
|
|
186 my $aa = $self->db->get_AnalysisAdaptor();
|
|
187
|
|
188 my @features;
|
|
189 my %analysis_hash;
|
|
190 my %slice_hash;
|
|
191 my %sr_name_hash;
|
|
192 my %sr_cs_hash;
|
|
193
|
|
194 my ($protein_align_feature_id, $seq_region_id, $seq_region_start,
|
|
195 $seq_region_end, $analysis_id, $seq_region_strand, $hit_start,
|
|
196 $hit_end, $hit_name, $cigar_line, $evalue, $perc_ident, $score,
|
|
197 $external_db_id, $hcoverage, $external_db_name, $external_display_db_name );
|
|
198
|
|
199 $sth->bind_columns(\$protein_align_feature_id, \$seq_region_id,
|
|
200 \$seq_region_start,\$seq_region_end, \$analysis_id,
|
|
201 \$seq_region_strand, \$hit_start,\$hit_end, \$hit_name,
|
|
202 \$cigar_line, \$evalue, \$perc_ident, \$score,
|
|
203 \$external_db_id, \$hcoverage, \$external_db_name, \$external_display_db_name );
|
|
204
|
|
205 my $asm_cs;
|
|
206 my $cmp_cs;
|
|
207 my $asm_cs_vers;
|
|
208 my $asm_cs_name;
|
|
209 my $cmp_cs_vers;
|
|
210 my $cmp_cs_name;
|
|
211 if($mapper) {
|
|
212 $asm_cs = $mapper->assembled_CoordSystem();
|
|
213 $cmp_cs = $mapper->component_CoordSystem();
|
|
214 $asm_cs_name = $asm_cs->name();
|
|
215 $asm_cs_vers = $asm_cs->version();
|
|
216 $cmp_cs_name = $cmp_cs->name();
|
|
217 $cmp_cs_vers = $cmp_cs->version();
|
|
218 }
|
|
219
|
|
220 my $dest_slice_start;
|
|
221 my $dest_slice_end;
|
|
222 my $dest_slice_strand;
|
|
223 my $dest_slice_length;
|
|
224 my $dest_slice_sr_name;
|
|
225 my $dest_slice_sr_id;
|
|
226 if($dest_slice) {
|
|
227 $dest_slice_start = $dest_slice->start();
|
|
228 $dest_slice_end = $dest_slice->end();
|
|
229 $dest_slice_strand = $dest_slice->strand();
|
|
230 $dest_slice_length = $dest_slice->length();
|
|
231 $dest_slice_sr_name = $dest_slice->seq_region_name();
|
|
232 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
|
|
233 }
|
|
234
|
|
235 FEATURE: while($sth->fetch()) {
|
|
236 #get the analysis object
|
|
237 my $analysis = $analysis_hash{$analysis_id} ||=
|
|
238 $aa->fetch_by_dbID($analysis_id);
|
|
239
|
|
240 #get the slice object
|
|
241 my $slice = $slice_hash{"ID:".$seq_region_id};
|
|
242
|
|
243 if(!$slice) {
|
|
244 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
|
|
245 $slice_hash{"ID:".$seq_region_id} = $slice;
|
|
246 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
|
|
247 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
|
|
248 }
|
|
249
|
|
250 my $sr_name = $sr_name_hash{$seq_region_id};
|
|
251 my $sr_cs = $sr_cs_hash{$seq_region_id};
|
|
252 #
|
|
253 # remap the feature coordinates to another coord system
|
|
254 # if a mapper was provided
|
|
255 #
|
|
256 if($mapper) {
|
|
257
|
|
258 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
|
|
259 ( $seq_region_id, $seq_region_start,
|
|
260 $seq_region_end, $seq_region_strand )
|
|
261 =
|
|
262 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
|
|
263 $seq_region_strand, $sr_cs, 1, $dest_slice);
|
|
264
|
|
265 } else {
|
|
266
|
|
267 ( $seq_region_id, $seq_region_start,
|
|
268 $seq_region_end, $seq_region_strand )
|
|
269 =
|
|
270 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
|
|
271 $seq_region_strand, $sr_cs );
|
|
272 }
|
|
273
|
|
274 #skip features that map to gaps or coord system boundaries
|
|
275 next FEATURE if(!defined($seq_region_id));
|
|
276
|
|
277 # #get a slice in the coord system we just mapped to
|
|
278 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
|
|
279 $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
280 $sa->fetch_by_seq_region_id($seq_region_id);
|
|
281 # } else {
|
|
282 # $slice = $slice_hash{"ID:".$seq_region_id} ||=
|
|
283 # $sa->fetch_by_seq_region_id($asm_cs_name, $sr_name, undef, undef, undef,
|
|
284 # $asm_cs_vers);
|
|
285 # }
|
|
286 }
|
|
287
|
|
288 #
|
|
289 # If a destination slice was provided convert the coords
|
|
290 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
|
|
291 #
|
|
292 if($dest_slice) {
|
|
293 if($dest_slice_start != 1 || $dest_slice_strand != 1) {
|
|
294 if($dest_slice_strand == 1) {
|
|
295 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
|
|
296 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
|
|
297 } else {
|
|
298 my $tmp_seq_region_start = $seq_region_start;
|
|
299 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
|
|
300 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
|
|
301 $seq_region_strand *= -1;
|
|
302 }
|
|
303 }
|
|
304
|
|
305 #throw away features off the end of the requested slice
|
|
306 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
|
|
307 ( $dest_slice_sr_id ne $seq_region_id )) {
|
|
308 next FEATURE;
|
|
309 }
|
|
310 $slice = $dest_slice;
|
|
311 }
|
|
312
|
|
313 # Finally, create the new ProteinAlignFeature.
|
|
314 push(
|
|
315 @features,
|
|
316 $self->_create_feature_fast(
|
|
317 'Bio::EnsEMBL::DnaPepAlignFeature', {
|
|
318 'slice' => $slice,
|
|
319 'start' => $seq_region_start,
|
|
320 'end' => $seq_region_end,
|
|
321 'strand' => $seq_region_strand,
|
|
322 'hseqname' => $hit_name,
|
|
323 'hstart' => $hit_start,
|
|
324 'hend' => $hit_end,
|
|
325 'hstrand' => 1, # dna_pep_align features
|
|
326 # are always hstrand 1
|
|
327 'score' => $score,
|
|
328 'p_value' => $evalue,
|
|
329 'percent_id' => $perc_ident,
|
|
330 'cigar_string' => $cigar_line,
|
|
331 'analysis' => $analysis,
|
|
332 'adaptor' => $self,
|
|
333 'dbID' => $protein_align_feature_id,
|
|
334 'external_db_id' => $external_db_id,
|
|
335 'hcoverage' => $hcoverage,
|
|
336 'dbname' => $external_db_name,
|
|
337 'db_display_name' => $external_display_db_name
|
|
338 } ) );
|
|
339
|
|
340 }
|
|
341
|
|
342 return \@features;
|
|
343 }
|
|
344
|
|
345
|
|
346
|
|
347 sub _tables {
|
|
348 my $self = shift;
|
|
349
|
|
350 return (['protein_align_feature', 'paf'], ['external_db','exdb']);
|
|
351 }
|
|
352
|
|
353
|
|
354 sub _columns {
|
|
355 my $self = shift;
|
|
356
|
|
357 #warning _objs_from_hashref method depends on ordering of this list
|
|
358 return qw( paf.protein_align_feature_id
|
|
359 paf.seq_region_id
|
|
360 paf.seq_region_start
|
|
361 paf.seq_region_end
|
|
362 paf.analysis_id
|
|
363 paf.seq_region_strand
|
|
364 paf.hit_start
|
|
365 paf.hit_end
|
|
366 paf.hit_name
|
|
367 paf.cigar_line
|
|
368 paf.evalue
|
|
369 paf.perc_ident
|
|
370 paf.score
|
|
371 paf.external_db_id
|
|
372 paf.hcoverage
|
|
373 exdb.db_name
|
|
374 exdb.db_display_name);
|
|
375 }
|
|
376
|
|
377 sub _left_join{
|
|
378 return (['external_db',"exdb.external_db_id = paf.external_db_id"]);
|
|
379 }
|
|
380
|
|
381 =head2 list_dbIDs
|
|
382
|
|
383 Arg [1] : none
|
|
384 Example : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()};
|
|
385 Description: Gets an array of internal ids for all protein align
|
|
386 features in the current db
|
|
387 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
|
|
388 Returntype : listref of ints
|
|
389 Exceptions : none
|
|
390 Caller : ?
|
|
391 Status : Stable
|
|
392
|
|
393 =cut
|
|
394
|
|
395 sub list_dbIDs {
|
|
396 my ($self,$ordered) = @_;
|
|
397
|
|
398 return $self->_list_dbIDs("protein_align_feature", undef, $ordered);
|
|
399 }
|
|
400
|
|
401
|
|
402
|
|
403
|
|
404
|
|
405
|
|
406 1;
|