0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DB::ExternalFeatureFactoryI -
|
|
24 Legacy Abstract interface for External Feature
|
|
25 Factories. Bio::EnsEMBL::External::ExternalFeatureAdaptor should be used
|
|
26 instead if possible.
|
|
27
|
|
28 =head1 SYNOPSIS
|
|
29
|
|
30 $external_ff = new ImplementingExternalFeatureFactoryClass;
|
|
31
|
|
32 $database_adaptor = new Bio::EnsEMBL::DBSQL::DBAdaptor(
|
|
33 -host => 'blah',
|
|
34 -dbname => 'other',
|
|
35 -pass => 'pass'
|
|
36 );
|
|
37
|
|
38 # alternatively, you can add external databases to an obj once made
|
|
39 $database_adaptor->add_ExternalFeatureFactory($external_ff);
|
|
40
|
|
41 # now the ExternalFeatureFactory has been added, Ensembl RawContigs
|
|
42 # and Slices will now have ExternalFeatures on them
|
|
43 $contig =
|
|
44 $db_adaptor->get_RawContigAdaptor->fetch_by_name('AC00056.00001');
|
|
45 @external = @{ $contig->get_all_ExternalFeatures() };
|
|
46
|
|
47 # this works on Slices as well
|
|
48 $slice =
|
|
49 $db_adaptor->get_SliceAdaptor->fetch_by_chr_start_end( '12', 10000,
|
|
50 30000 );
|
|
51 @external = @{ $slice->get_all_ExternalFeatures() };
|
|
52
|
|
53 =head1 DESCRIPTION
|
|
54
|
|
55 This is a legacy class. It is included only for backwards
|
|
56 compatibility with ExternalFeatureFactories which are presumably
|
|
57 still used to place data into ensembl. It is recommended that if
|
|
58 you wish to create EnsEMBL features externally that you use the
|
|
59 Bio::EnsEMBL::External::ExternalFeatureAdaptor instead.
|
|
60
|
|
61 This object defines the abstract interface for External Database access
|
|
62 inside Ensembl. The aim is that one can attach an External Database
|
|
63 which will generate Sequence Features and these Sequence Features will
|
|
64 be accessible along side all the internal Ensembl sequence features, for
|
|
65 drawing, EMBL dumping etc. In particular, the external database does not
|
|
66 have to worry about the transformation of the Sequence Feature objects
|
|
67 into VirtualContigs.
|
|
68
|
|
69 Sequence Features have to be defined in one of two coordinate systems:
|
|
70 Original EMBL/GenBank coordinates of a particular sequnence version or
|
|
71 the Ensembl contig coordinates. This means you have to calculate your
|
|
72 sequence features in one these two coordinate systems
|
|
73
|
|
74 The methods that have to be implemented are:
|
|
75
|
|
76 get_External_SeqFeatures_contig( $ensembl_contig_identifier,
|
|
77 $sequence_version, $start, $end );
|
|
78
|
|
79 get_External_SeqFeatures_clone( $embl_accession_number,
|
|
80 $sequence_version, $start, $end );
|
|
81
|
|
82 The semantics of this method is as follows:
|
|
83
|
|
84 $ensembl_contig_identifier - the ensembl contig id (external id).
|
|
85 $sequence_version - embl/genbank sequence version
|
|
86 $embl_accession_number - the embl/genbank accession number
|
|
87
|
|
88 The $start/$end can be ignored, but methods can take advantage of it.
|
|
89 This is so that ensembl can ask for features only on a region of DNA,
|
|
90 and if desired, the external database can respond with features only in
|
|
91 this region, rather than the entire sequence.
|
|
92
|
|
93 The hope is that the second method could potentially have a very complex
|
|
94 set of mappings of other embl_accession numbers to one embl_accession
|
|
95 number and provide the complex mapping.
|
|
96
|
|
97 The methods should return Sequence Features with the following spec:
|
|
98
|
|
99 a) must implement the Bio::SeqFeatureI interface.
|
|
100
|
|
101 b) must accept "set" calls on
|
|
102
|
|
103 start,end,strand
|
|
104
|
|
105 to provide coordinate transformation of the feature.
|
|
106
|
|
107 c) must be unique in-memory objects, ie, the implementation is not
|
|
108 allowed to cache the sequence feature in its entirity. Two separate
|
|
109 calls to get_External_SeqFeatures_contig must be able to separately
|
|
110 set start,end,strand information without clobbering each other. The
|
|
111 other information, if so wished, can be cached by each SeqFeature
|
|
112 holding onto another object, but this is left to the implementor to
|
|
113 decide on the correct strategy.
|
|
114
|
|
115 d) must return an unique identifier when called with method id.
|
|
116
|
|
117 You must implement both functions. In most cases, one function will
|
|
118 always return an empty list, whereas the other function will actually
|
|
119 query the external database.
|
|
120
|
|
121 The second way of accessing the External Database from Ensembl is using
|
|
122 unique internal identifiers in that database. The method is:
|
|
123
|
|
124 get_SeqFeature_by_id($id);
|
|
125
|
|
126 It should return exactly one Sequence Feature object of the same type as
|
|
127 above.
|
|
128
|
|
129 =head1 METHODS
|
|
130
|
|
131 =cut
|
|
132
|
|
133 package Bio::EnsEMBL::DB::ExternalFeatureFactoryI;
|
|
134 use Bio::EnsEMBL::External::ExternalFeatureAdaptor;
|
|
135 use vars qw(@ISA);
|
|
136
|
|
137 @ISA = ( 'Bio::EnsEMBL::External::ExternalFeatureAdaptor' );
|
|
138
|
|
139
|
|
140 =head2 coordinate_systems
|
|
141
|
|
142 Arg [1] : none
|
|
143 Example : none
|
|
144 Description: This method is present to make the ExternalFeatureFactory
|
|
145 interface behave as an ExternalFeatureAdaptor. It is for
|
|
146 backwards compatibility.
|
|
147 Returntype : none
|
|
148 Exceptions : none
|
|
149 Caller : internal
|
|
150
|
|
151 =cut
|
|
152
|
|
153 sub coordinate_systems {
|
|
154 my $self = shift;
|
|
155 return qw(CONTIG);
|
|
156 }
|
|
157
|
|
158
|
|
159 =head2 fetch_all_by_contig_name
|
|
160
|
|
161 Arg [1] : none
|
|
162 Example : none
|
|
163 Description: This method is present to make the ExternalFeatureFactory
|
|
164 interface behave as an ExternalFeatureAdaptor. It is for
|
|
165 backwards compatibility.
|
|
166 Returntype : none
|
|
167 Exceptions : none
|
|
168 Caller : internal
|
|
169
|
|
170 =cut
|
|
171
|
|
172 sub fetch_all_by_contig_name {
|
|
173 my ($self, $contig_name) = @_;
|
|
174
|
|
175 unless($self->db) {
|
|
176 $self->throw('DB attribute not set. This value must be set for the ' .
|
|
177 'ExternalFeatureFactory to function correctly');
|
|
178 }
|
|
179
|
|
180 my @features = ();
|
|
181
|
|
182 my $ctg = $self->db->get_RawContigAdaptor->fetch_by_name($contig_name);
|
|
183 my $clone = $ctg->clone;
|
|
184 my $version = $clone->version;
|
|
185 my $ctg_length = $ctg->length;
|
|
186
|
|
187 #get contig features
|
|
188 push @features, $self->get_Ensembl_SeqFeatures_contig($ctg->name,
|
|
189 $version,
|
|
190 1,
|
|
191 $ctg_length);
|
|
192
|
|
193 #get clone features
|
|
194 my $clone_start = $ctg->embl_offset;
|
|
195 my $clone_end = $clone_start + $ctg_length - 1;
|
|
196 my @clone_features = $self->get_Ensembl_SeqFeatures_clone($clone->id,
|
|
197 $version,
|
|
198 $clone_start,
|
|
199 $clone_end);
|
|
200
|
|
201 #change clone coordinates to contig coordinates
|
|
202 my ($start, $end);
|
|
203 foreach my $f (@clone_features) {
|
|
204 $start = $f->start - $clone_start + 1;
|
|
205 $end = $f->end - $clone_start + 1;
|
|
206
|
|
207 #skip features outside the contig
|
|
208 next if($end < 1 || $start > $ctg_length);
|
|
209
|
|
210 $f->start($start);
|
|
211 $f->end($end);
|
|
212
|
|
213 push @features, $f;
|
|
214 }
|
|
215
|
|
216 return \@features;
|
|
217 }
|
|
218
|
|
219 =head2 get_Ensembl_SeqFeatures_contig
|
|
220
|
|
221 Title : get_Ensembl_SeqFeatures_contig (Abstract)
|
|
222 Usage :
|
|
223 Function:
|
|
224 Example :
|
|
225 Returns :
|
|
226 Args :
|
|
227
|
|
228
|
|
229 =cut
|
|
230
|
|
231 sub get_Ensembl_SeqFeatures_contig{
|
|
232 my ($self) = @_;
|
|
233
|
|
234 $self->warn("Abstract method get_External_SeqFeatures_contig " .
|
|
235 "encountered in base class. Implementation failed to complete it");
|
|
236
|
|
237 }
|
|
238
|
|
239 =head2 get_Ensembl_SeqFeatures_clone
|
|
240
|
|
241 Title : get_Ensembl_SeqFeatures_clone (Abstract)
|
|
242 Usage :
|
|
243 Function:
|
|
244 Example :
|
|
245 Returns :
|
|
246 Args :
|
|
247
|
|
248
|
|
249 =cut
|
|
250
|
|
251 sub get_Ensembl_SeqFeatures_clone{
|
|
252 my ($self) = @_;
|
|
253
|
|
254 $self->warn("Abstract method get_Ensembl_SeqFeatures_clone " .
|
|
255 "encountered in base class. Implementation failed to complete it");
|
|
256
|
|
257 }
|
|
258
|
|
259 =head2 get_Ensembl_Genes_clone
|
|
260
|
|
261 Title : get_Ensembl_Genes_clone
|
|
262 Function: returns Gene objects in clone coordinates from a gene id
|
|
263 Returns : An array of Gene objects
|
|
264 Args : clone id
|
|
265
|
|
266 =cut
|
|
267
|
|
268 sub get_Ensembl_Genes_clone {
|
|
269 my $self = @_;
|
|
270
|
|
271 return;
|
|
272 }
|
|
273
|
|
274 =head2 get_SeqFeature_by_id
|
|
275
|
|
276 Title : get_SeqFeature_by_id (Abstract)
|
|
277 Usage :
|
|
278 Function: Return SeqFeature object for any valid unique id
|
|
279 Example :
|
|
280 Returns :
|
|
281 Args : id as determined by the External Database
|
|
282
|
|
283
|
|
284 =cut
|
|
285
|
|
286
|
|
287 sub get_SeqFeature_by_id {
|
|
288 my ($self) = @_;
|
|
289 $self->warn("Abstract method get_SeqFeature_by_id encountered " .
|
|
290 "in base class. Implementation failed to complete it");
|
|
291 }
|
|
292
|
|
293
|
|
294 1;
|
|
295
|
|
296
|
|
297
|
|
298
|
|
299
|
|
300
|
|
301
|