Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/ProteinAlignFeatureAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor - | |
24 Adaptor for ProteinAlignFeatures | |
25 | |
26 =head1 SYNOPSIS | |
27 | |
28 $pafa = | |
29 $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' ); | |
30 | |
31 my @features = @{ $pafa->fetch_all_by_Slice($slice) }; | |
32 | |
33 $pafa->store(@features); | |
34 | |
35 =head1 METHODS | |
36 | |
37 =cut | |
38 | |
39 | |
40 package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor; | |
41 use vars qw(@ISA); | |
42 use strict; | |
43 | |
44 use Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor; | |
45 use Bio::EnsEMBL::DnaPepAlignFeature; | |
46 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
47 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor); | |
48 | |
49 | |
50 =head2 store | |
51 | |
52 Arg [1] : list of Bio::EnsEMBL::DnaPepAlignFeature @feats | |
53 Example : $protein_align_feature_adaptor->store(@feats); | |
54 Description: stores a list of ProteinAlignFeatures in the database | |
55 Returntype : none | |
56 Exceptions : throw if any of the provided features cannot be stored | |
57 which may occur if: | |
58 * The feature does not have an associated Slice | |
59 * The feature does not have an associated analysis | |
60 * The Slice the feature is associated with is on a seq_region | |
61 unknown to this database | |
62 A warning is given if: | |
63 * The feature has already been stored in this db | |
64 Caller : Pipeline | |
65 Status : Stable | |
66 | |
67 =cut | |
68 | |
69 | |
70 sub store{ | |
71 my ($self, @feats) = @_; | |
72 | |
73 throw("Must call store with features") if( scalar(@feats) == 0 ); | |
74 | |
75 my @tabs = $self->_tables; | |
76 my ($tablename) = @{$tabs[0]}; | |
77 | |
78 my $db = $self->db(); | |
79 my $slice_adaptor = $db->get_SliceAdaptor(); | |
80 my $analysis_adaptor = $db->get_AnalysisAdaptor(); | |
81 | |
82 my $sth = $self->prepare( | |
83 "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end, | |
84 seq_region_strand, hit_start, hit_end, | |
85 hit_name, cigar_line, | |
86 analysis_id, score, evalue, perc_ident, external_db_id, hcoverage) | |
87 VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?)"); | |
88 | |
89 FEATURE: foreach my $feat ( @feats ) { | |
90 if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) { | |
91 throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature," | |
92 . " not a [".ref($feat)."]."); | |
93 } | |
94 | |
95 if($feat->is_stored($db)) { | |
96 warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" . | |
97 " in this database."); | |
98 next FEATURE; | |
99 } | |
100 | |
101 #sanity check the hstart and hend | |
102 my $hstart = $feat->hstart(); | |
103 my $hend = $feat->hend(); | |
104 $self->_check_start_end_strand($hstart,$hend,1); | |
105 | |
106 my $cigar_string = $feat->cigar_string(); | |
107 if(!$cigar_string) { | |
108 $cigar_string = $feat->length() . 'M'; | |
109 warning("DnaPepAlignFeature does not define a cigar_string.\n" . | |
110 "Assuming ungapped block with cigar_string=$cigar_string\n"); | |
111 } | |
112 | |
113 my $hseqname = $feat->hseqname(); | |
114 if(!$hseqname) { | |
115 throw("DnaPepAlignFeature must define an hseqname."); | |
116 } | |
117 | |
118 if(!defined($feat->analysis)) { | |
119 throw("An analysis must be attached to the features to be stored."); | |
120 } | |
121 | |
122 #store the analysis if it has not been stored yet | |
123 if(!$feat->analysis->is_stored($db)) { | |
124 $analysis_adaptor->store($feat->analysis()); | |
125 } | |
126 | |
127 my $slice = $feat->slice(); | |
128 if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) { | |
129 throw("A slice must be attached to the features to be stored."); | |
130 } | |
131 | |
132 my $original = $feat; | |
133 my $seq_region_id; | |
134 ($feat, $seq_region_id) = $self->_pre_store($feat); | |
135 | |
136 $sth->bind_param(1,$seq_region_id,SQL_INTEGER); | |
137 $sth->bind_param(2,$feat->start,SQL_INTEGER); | |
138 $sth->bind_param(3,$feat->end,SQL_INTEGER); | |
139 $sth->bind_param(4,$feat->strand,SQL_TINYINT); | |
140 $sth->bind_param(5,$feat->hstart,SQL_INTEGER); | |
141 $sth->bind_param(6,$feat->hend,SQL_INTEGER); | |
142 $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR); | |
143 $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR); | |
144 $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER); | |
145 $sth->bind_param(10,$feat->score,SQL_DOUBLE); | |
146 $sth->bind_param(11,$feat->p_value,SQL_DOUBLE); | |
147 $sth->bind_param(12,$feat->percent_id,SQL_REAL); | |
148 $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER); | |
149 $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE); | |
150 | |
151 $sth->execute(); | |
152 $original->dbID($sth->{'mysql_insertid'}); | |
153 $original->adaptor($self); | |
154 } | |
155 | |
156 $sth->finish(); | |
157 } | |
158 | |
159 | |
160 =head2 _objs_from_sth | |
161 | |
162 Arg [1] : DBI statement handle $sth | |
163 an exectuted DBI statement handle generated by selecting | |
164 the columns specified by _columns() from the table specified | |
165 by _table() | |
166 Example : @dna_dna_align_feats = $self->_obj_from_hashref | |
167 Description: PROTECTED implementation of superclass abstract method. | |
168 Creates DnaDnaAlignFeature objects from a DBI hashref | |
169 Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures | |
170 Exceptions : none | |
171 Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch | |
172 Status : Stable | |
173 | |
174 =cut | |
175 | |
176 sub _objs_from_sth { | |
177 my ($self, $sth, $mapper, $dest_slice) = @_; | |
178 | |
179 # | |
180 # This code is ugly because an attempt has been made to remove as many | |
181 # function calls as possible for speed purposes. Thus many caches and | |
182 # a fair bit of gymnastics is used. | |
183 # | |
184 | |
185 my $sa = $self->db()->get_SliceAdaptor(); | |
186 my $aa = $self->db->get_AnalysisAdaptor(); | |
187 | |
188 my @features; | |
189 my %analysis_hash; | |
190 my %slice_hash; | |
191 my %sr_name_hash; | |
192 my %sr_cs_hash; | |
193 | |
194 my ($protein_align_feature_id, $seq_region_id, $seq_region_start, | |
195 $seq_region_end, $analysis_id, $seq_region_strand, $hit_start, | |
196 $hit_end, $hit_name, $cigar_line, $evalue, $perc_ident, $score, | |
197 $external_db_id, $hcoverage, $external_db_name, $external_display_db_name ); | |
198 | |
199 $sth->bind_columns(\$protein_align_feature_id, \$seq_region_id, | |
200 \$seq_region_start,\$seq_region_end, \$analysis_id, | |
201 \$seq_region_strand, \$hit_start,\$hit_end, \$hit_name, | |
202 \$cigar_line, \$evalue, \$perc_ident, \$score, | |
203 \$external_db_id, \$hcoverage, \$external_db_name, \$external_display_db_name ); | |
204 | |
205 my $asm_cs; | |
206 my $cmp_cs; | |
207 my $asm_cs_vers; | |
208 my $asm_cs_name; | |
209 my $cmp_cs_vers; | |
210 my $cmp_cs_name; | |
211 if($mapper) { | |
212 $asm_cs = $mapper->assembled_CoordSystem(); | |
213 $cmp_cs = $mapper->component_CoordSystem(); | |
214 $asm_cs_name = $asm_cs->name(); | |
215 $asm_cs_vers = $asm_cs->version(); | |
216 $cmp_cs_name = $cmp_cs->name(); | |
217 $cmp_cs_vers = $cmp_cs->version(); | |
218 } | |
219 | |
220 my $dest_slice_start; | |
221 my $dest_slice_end; | |
222 my $dest_slice_strand; | |
223 my $dest_slice_length; | |
224 my $dest_slice_sr_name; | |
225 my $dest_slice_sr_id; | |
226 if($dest_slice) { | |
227 $dest_slice_start = $dest_slice->start(); | |
228 $dest_slice_end = $dest_slice->end(); | |
229 $dest_slice_strand = $dest_slice->strand(); | |
230 $dest_slice_length = $dest_slice->length(); | |
231 $dest_slice_sr_name = $dest_slice->seq_region_name(); | |
232 $dest_slice_sr_id = $dest_slice->get_seq_region_id(); | |
233 } | |
234 | |
235 FEATURE: while($sth->fetch()) { | |
236 #get the analysis object | |
237 my $analysis = $analysis_hash{$analysis_id} ||= | |
238 $aa->fetch_by_dbID($analysis_id); | |
239 | |
240 #get the slice object | |
241 my $slice = $slice_hash{"ID:".$seq_region_id}; | |
242 | |
243 if(!$slice) { | |
244 $slice = $sa->fetch_by_seq_region_id($seq_region_id); | |
245 $slice_hash{"ID:".$seq_region_id} = $slice; | |
246 $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); | |
247 $sr_cs_hash{$seq_region_id} = $slice->coord_system(); | |
248 } | |
249 | |
250 my $sr_name = $sr_name_hash{$seq_region_id}; | |
251 my $sr_cs = $sr_cs_hash{$seq_region_id}; | |
252 # | |
253 # remap the feature coordinates to another coord system | |
254 # if a mapper was provided | |
255 # | |
256 if($mapper) { | |
257 | |
258 if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) { | |
259 ( $seq_region_id, $seq_region_start, | |
260 $seq_region_end, $seq_region_strand ) | |
261 = | |
262 $mapper->map( $sr_name, $seq_region_start, $seq_region_end, | |
263 $seq_region_strand, $sr_cs, 1, $dest_slice); | |
264 | |
265 } else { | |
266 | |
267 ( $seq_region_id, $seq_region_start, | |
268 $seq_region_end, $seq_region_strand ) | |
269 = | |
270 $mapper->fastmap( $sr_name, $seq_region_start, $seq_region_end, | |
271 $seq_region_strand, $sr_cs ); | |
272 } | |
273 | |
274 #skip features that map to gaps or coord system boundaries | |
275 next FEATURE if(!defined($seq_region_id)); | |
276 | |
277 # #get a slice in the coord system we just mapped to | |
278 # if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { | |
279 $slice = $slice_hash{"ID:".$seq_region_id} ||= | |
280 $sa->fetch_by_seq_region_id($seq_region_id); | |
281 # } else { | |
282 # $slice = $slice_hash{"ID:".$seq_region_id} ||= | |
283 # $sa->fetch_by_seq_region_id($asm_cs_name, $sr_name, undef, undef, undef, | |
284 # $asm_cs_vers); | |
285 # } | |
286 } | |
287 | |
288 # | |
289 # If a destination slice was provided convert the coords | |
290 # If the dest_slice starts at 1 and is foward strand, nothing needs doing | |
291 # | |
292 if($dest_slice) { | |
293 if($dest_slice_start != 1 || $dest_slice_strand != 1) { | |
294 if($dest_slice_strand == 1) { | |
295 $seq_region_start = $seq_region_start - $dest_slice_start + 1; | |
296 $seq_region_end = $seq_region_end - $dest_slice_start + 1; | |
297 } else { | |
298 my $tmp_seq_region_start = $seq_region_start; | |
299 $seq_region_start = $dest_slice_end - $seq_region_end + 1; | |
300 $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; | |
301 $seq_region_strand *= -1; | |
302 } | |
303 } | |
304 | |
305 #throw away features off the end of the requested slice | |
306 if($seq_region_end < 1 || $seq_region_start > $dest_slice_length || | |
307 ( $dest_slice_sr_id ne $seq_region_id )) { | |
308 next FEATURE; | |
309 } | |
310 $slice = $dest_slice; | |
311 } | |
312 | |
313 # Finally, create the new ProteinAlignFeature. | |
314 push( | |
315 @features, | |
316 $self->_create_feature_fast( | |
317 'Bio::EnsEMBL::DnaPepAlignFeature', { | |
318 'slice' => $slice, | |
319 'start' => $seq_region_start, | |
320 'end' => $seq_region_end, | |
321 'strand' => $seq_region_strand, | |
322 'hseqname' => $hit_name, | |
323 'hstart' => $hit_start, | |
324 'hend' => $hit_end, | |
325 'hstrand' => 1, # dna_pep_align features | |
326 # are always hstrand 1 | |
327 'score' => $score, | |
328 'p_value' => $evalue, | |
329 'percent_id' => $perc_ident, | |
330 'cigar_string' => $cigar_line, | |
331 'analysis' => $analysis, | |
332 'adaptor' => $self, | |
333 'dbID' => $protein_align_feature_id, | |
334 'external_db_id' => $external_db_id, | |
335 'hcoverage' => $hcoverage, | |
336 'dbname' => $external_db_name, | |
337 'db_display_name' => $external_display_db_name | |
338 } ) ); | |
339 | |
340 } | |
341 | |
342 return \@features; | |
343 } | |
344 | |
345 | |
346 | |
347 sub _tables { | |
348 my $self = shift; | |
349 | |
350 return (['protein_align_feature', 'paf'], ['external_db','exdb']); | |
351 } | |
352 | |
353 | |
354 sub _columns { | |
355 my $self = shift; | |
356 | |
357 #warning _objs_from_hashref method depends on ordering of this list | |
358 return qw( paf.protein_align_feature_id | |
359 paf.seq_region_id | |
360 paf.seq_region_start | |
361 paf.seq_region_end | |
362 paf.analysis_id | |
363 paf.seq_region_strand | |
364 paf.hit_start | |
365 paf.hit_end | |
366 paf.hit_name | |
367 paf.cigar_line | |
368 paf.evalue | |
369 paf.perc_ident | |
370 paf.score | |
371 paf.external_db_id | |
372 paf.hcoverage | |
373 exdb.db_name | |
374 exdb.db_display_name); | |
375 } | |
376 | |
377 sub _left_join{ | |
378 return (['external_db',"exdb.external_db_id = paf.external_db_id"]); | |
379 } | |
380 | |
381 =head2 list_dbIDs | |
382 | |
383 Arg [1] : none | |
384 Example : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()}; | |
385 Description: Gets an array of internal ids for all protein align | |
386 features in the current db | |
387 Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region. | |
388 Returntype : listref of ints | |
389 Exceptions : none | |
390 Caller : ? | |
391 Status : Stable | |
392 | |
393 =cut | |
394 | |
395 sub list_dbIDs { | |
396 my ($self,$ordered) = @_; | |
397 | |
398 return $self->_list_dbIDs("protein_align_feature", undef, $ordered); | |
399 } | |
400 | |
401 | |
402 | |
403 | |
404 | |
405 | |
406 1; |