comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/ProteinAlignFeatureAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor -
24 Adaptor for ProteinAlignFeatures
25
26 =head1 SYNOPSIS
27
28 $pafa =
29 $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' );
30
31 my @features = @{ $pafa->fetch_all_by_Slice($slice) };
32
33 $pafa->store(@features);
34
35 =head1 METHODS
36
37 =cut
38
39
40 package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor;
41 use vars qw(@ISA);
42 use strict;
43
44 use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor;
45 use Bio::EnsEMBL::DnaPepAlignFeature;
46 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
47 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor);
48
49
50 =head2 store
51
52 Arg [1] : list of Bio::EnsEMBL::DnaPepAlignFeature @feats
53 Example : $protein_align_feature_adaptor->store(@feats);
54 Description: stores a list of ProteinAlignFeatures in the database
55 Returntype : none
56 Exceptions : throw if any of the provided features cannot be stored
57 which may occur if:
58 * The feature does not have an associated Slice
59 * The feature does not have an associated analysis
60 * The Slice the feature is associated with is on a seq_region
61 unknown to this database
62 A warning is given if:
63 * The feature has already been stored in this db
64 Caller : Pipeline
65 Status : Stable
66
67 =cut
68
69
70 sub store{
71 my ($self, @feats) = @_;
72
73 throw("Must call store with features") if( scalar(@feats) == 0 );
74
75 my @tabs = $self->_tables;
76 my ($tablename) = @{$tabs[0]};
77
78 my $db = $self->db();
79 my $slice_adaptor = $db->get_SliceAdaptor();
80 my $analysis_adaptor = $db->get_AnalysisAdaptor();
81
82 my $sth = $self->prepare(
83 "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
84 seq_region_strand, hit_start, hit_end,
85 hit_name, cigar_line,
86 analysis_id, score, evalue, perc_ident, external_db_id, hcoverage)
87 VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)");
88
89 FEATURE: foreach my $feat ( @feats ) {
90 if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
91 throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature,"
92 . " not a [".ref($feat)."].");
93 }
94
95 if($feat->is_stored($db)) {
96 warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" .
97 " in this database.");
98 next FEATURE;
99 }
100
101 #sanity check the hstart and hend
102 my $hstart = $feat->hstart();
103 my $hend = $feat->hend();
104 $self->_check_start_end_strand($hstart,$hend,1);
105
106 my $cigar_string = $feat->cigar_string();
107 if(!$cigar_string) {
108 $cigar_string = $feat->length() . 'M';
109 warning("DnaPepAlignFeature does not define a cigar_string.\n" .
110 "Assuming ungapped block with cigar_string=$cigar_string\n");
111 }
112
113 my $hseqname = $feat->hseqname();
114 if(!$hseqname) {
115 throw("DnaPepAlignFeature must define an hseqname.");
116 }
117
118 if(!defined($feat->analysis)) {
119 throw("An analysis must be attached to the features to be stored.");
120 }
121
122 #store the analysis if it has not been stored yet
123 if(!$feat->analysis->is_stored($db)) {
124 $analysis_adaptor->store($feat->analysis());
125 }
126
127 my $slice = $feat->slice();
128 if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
129 throw("A slice must be attached to the features to be stored.");
130 }
131
132 my $original = $feat;
133 my $seq_region_id;
134 ($feat, $seq_region_id) = $self->_pre_store($feat);
135
136 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
137 $sth->bind_param(2,$feat->start,SQL_INTEGER);
138 $sth->bind_param(3,$feat->end,SQL_INTEGER);
139 $sth->bind_param(4,$feat->strand,SQL_TINYINT);
140 $sth->bind_param(5,$feat->hstart,SQL_INTEGER);
141 $sth->bind_param(6,$feat->hend,SQL_INTEGER);
142 $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR);
143 $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR);
144 $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER);
145 $sth->bind_param(10,$feat->score,SQL_DOUBLE);
146 $sth->bind_param(11,$feat->p_value,SQL_DOUBLE);
147 $sth->bind_param(12,$feat->percent_id,SQL_REAL);
148 $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
149 $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);
150
151 $sth->execute();
152 $original->dbID($sth->{'mysql_insertid'});
153 $original->adaptor($self);
154 }
155
156 $sth->finish();
157 }
158
159
160 =head2 _objs_from_sth
161
162 Arg [1] : DBI statement handle $sth
163 an exectuted DBI statement handle generated by selecting
164 the columns specified by _columns() from the table specified
165 by _table()
166 Example : @dna_dna_align_feats = $self->_obj_from_hashref
167 Description: PROTECTED implementation of superclass abstract method.
168 Creates DnaDnaAlignFeature objects from a DBI hashref
169 Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures
170 Exceptions : none
171 Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
172 Status : Stable
173
174 =cut
175
176 sub _objs_from_sth {
177 my ($self, $sth, $mapper, $dest_slice) = @_;
178
179 #
180 # This code is ugly because an attempt has been made to remove as many
181 # function calls as possible for speed purposes. Thus many caches and
182 # a fair bit of gymnastics is used.
183 #
184
185 my $sa = $self->db()->get_SliceAdaptor();
186 my $aa = $self->db->get_AnalysisAdaptor();
187
188 my @features;
189 my %analysis_hash;
190 my %slice_hash;
191 my %sr_name_hash;
192 my %sr_cs_hash;
193
194 my ($protein_align_feature_id, $seq_region_id, $seq_region_start,
195 $seq_region_end, $analysis_id, $seq_region_strand, $hit_start,
196 $hit_end, $hit_name, $cigar_line, $evalue, $perc_ident, $score,
197 $external_db_id, $hcoverage, $external_db_name, $external_display_db_name );
198
199 $sth->bind_columns(\$protein_align_feature_id, \$seq_region_id,
200 \$seq_region_start,\$seq_region_end, \$analysis_id,
201 \$seq_region_strand, \$hit_start,\$hit_end, \$hit_name,
202 \$cigar_line, \$evalue, \$perc_ident, \$score,
203 \$external_db_id, \$hcoverage, \$external_db_name, \$external_display_db_name );
204
205 my $asm_cs;
206 my $cmp_cs;
207 my $asm_cs_vers;
208 my $asm_cs_name;
209 my $cmp_cs_vers;
210 my $cmp_cs_name;
211 if($mapper) {
212 $asm_cs = $mapper->assembled_CoordSystem();
213 $cmp_cs = $mapper->component_CoordSystem();
214 $asm_cs_name = $asm_cs->name();
215 $asm_cs_vers = $asm_cs->version();
216 $cmp_cs_name = $cmp_cs->name();
217 $cmp_cs_vers = $cmp_cs->version();
218 }
219
220 my $dest_slice_start;
221 my $dest_slice_end;
222 my $dest_slice_strand;
223 my $dest_slice_length;
224 my $dest_slice_sr_name;
225 my $dest_slice_sr_id;
226 if($dest_slice) {
227 $dest_slice_start = $dest_slice->start();
228 $dest_slice_end = $dest_slice->end();
229 $dest_slice_strand = $dest_slice->strand();
230 $dest_slice_length = $dest_slice->length();
231 $dest_slice_sr_name = $dest_slice->seq_region_name();
232 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
233 }
234
235 FEATURE: while($sth->fetch()) {
236 #get the analysis object
237 my $analysis = $analysis_hash{$analysis_id} ||=
238 $aa->fetch_by_dbID($analysis_id);
239
240 #get the slice object
241 my $slice = $slice_hash{"ID:".$seq_region_id};
242
243 if(!$slice) {
244 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
245 $slice_hash{"ID:".$seq_region_id} = $slice;
246 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
247 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
248 }
249
250 my $sr_name = $sr_name_hash{$seq_region_id};
251 my $sr_cs = $sr_cs_hash{$seq_region_id};
252 #
253 # remap the feature coordinates to another coord system
254 # if a mapper was provided
255 #
256 if($mapper) {
257
258 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
259 ( $seq_region_id, $seq_region_start,
260 $seq_region_end, $seq_region_strand )
261 =
262 $mapper->map( $sr_name, $seq_region_start, $seq_region_end,
263 $seq_region_strand, $sr_cs, 1, $dest_slice);
264
265 } else {
266
267 ( $seq_region_id, $seq_region_start,
268 $seq_region_end, $seq_region_strand )
269 =
270 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end,
271 $seq_region_strand, $sr_cs );
272 }
273
274 #skip features that map to gaps or coord system boundaries
275 next FEATURE if(!defined($seq_region_id));
276
277 # #get a slice in the coord system we just mapped to
278 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
279 $slice = $slice_hash{"ID:".$seq_region_id} ||=
280 $sa->fetch_by_seq_region_id($seq_region_id);
281 # } else {
282 # $slice = $slice_hash{"ID:".$seq_region_id} ||=
283 # $sa->fetch_by_seq_region_id($asm_cs_name, $sr_name, undef, undef, undef,
284 # $asm_cs_vers);
285 # }
286 }
287
288 #
289 # If a destination slice was provided convert the coords
290 # If the dest_slice starts at 1 and is foward strand, nothing needs doing
291 #
292 if($dest_slice) {
293 if($dest_slice_start != 1 || $dest_slice_strand != 1) {
294 if($dest_slice_strand == 1) {
295 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
296 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
297 } else {
298 my $tmp_seq_region_start = $seq_region_start;
299 $seq_region_start = $dest_slice_end - $seq_region_end + 1;
300 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
301 $seq_region_strand *= -1;
302 }
303 }
304
305 #throw away features off the end of the requested slice
306 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
307 ( $dest_slice_sr_id ne $seq_region_id )) {
308 next FEATURE;
309 }
310 $slice = $dest_slice;
311 }
312
313 # Finally, create the new ProteinAlignFeature.
314 push(
315 @features,
316 $self->_create_feature_fast(
317 'Bio::EnsEMBL::DnaPepAlignFeature', {
318 'slice' => $slice,
319 'start' => $seq_region_start,
320 'end' => $seq_region_end,
321 'strand' => $seq_region_strand,
322 'hseqname' => $hit_name,
323 'hstart' => $hit_start,
324 'hend' => $hit_end,
325 'hstrand' => 1, # dna_pep_align features
326 # are always hstrand 1
327 'score' => $score,
328 'p_value' => $evalue,
329 'percent_id' => $perc_ident,
330 'cigar_string' => $cigar_line,
331 'analysis' => $analysis,
332 'adaptor' => $self,
333 'dbID' => $protein_align_feature_id,
334 'external_db_id' => $external_db_id,
335 'hcoverage' => $hcoverage,
336 'dbname' => $external_db_name,
337 'db_display_name' => $external_display_db_name
338 } ) );
339
340 }
341
342 return \@features;
343 }
344
345
346
347 sub _tables {
348 my $self = shift;
349
350 return (['protein_align_feature', 'paf'], ['external_db','exdb']);
351 }
352
353
354 sub _columns {
355 my $self = shift;
356
357 #warning _objs_from_hashref method depends on ordering of this list
358 return qw( paf.protein_align_feature_id
359 paf.seq_region_id
360 paf.seq_region_start
361 paf.seq_region_end
362 paf.analysis_id
363 paf.seq_region_strand
364 paf.hit_start
365 paf.hit_end
366 paf.hit_name
367 paf.cigar_line
368 paf.evalue
369 paf.perc_ident
370 paf.score
371 paf.external_db_id
372 paf.hcoverage
373 exdb.db_name
374 exdb.db_display_name);
375 }
376
377 sub _left_join{
378 return (['external_db',"exdb.external_db_id = paf.external_db_id"]);
379 }
380
381 =head2 list_dbIDs
382
383 Arg [1] : none
384 Example : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()};
385 Description: Gets an array of internal ids for all protein align
386 features in the current db
387 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
388 Returntype : listref of ints
389 Exceptions : none
390 Caller : ?
391 Status : Stable
392
393 =cut
394
395 sub list_dbIDs {
396 my ($self,$ordered) = @_;
397
398 return $self->_list_dbIDs("protein_align_feature", undef, $ordered);
399 }
400
401
402
403
404
405
406 1;