0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2009 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices
|
|
24 representing alternative assemblies
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 my $slice =
|
|
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
|
|
30
|
|
31 my $msc = Bio::EnsEMBL::MappedSliceContainer->new(-SLICE => $slice);
|
|
32
|
|
33 # create a new strain slice adaptor and attach it to the MSC
|
|
34 my $ssa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new($sa->db);
|
|
35 $msc->set_StrainSliceAdaptor($ssa);
|
|
36
|
|
37 # now attach strain
|
|
38 $msc->attach_StrainSlice('Watson');
|
|
39
|
|
40 =head1 DESCRIPTION
|
|
41
|
|
42 NOTE: this code is under development and not fully functional nor tested
|
|
43 yet. Use only for development.
|
|
44
|
|
45 This adaptor is a factory for creating MappedSlices representing
|
|
46 strains and attaching them to a MappedSliceContainer. A mapper will be created
|
|
47 to map between the reference slice and the common container slice coordinate
|
|
48 system.
|
|
49
|
|
50 =head1 METHODS
|
|
51
|
|
52 new
|
|
53 fetch_by_name
|
|
54
|
|
55 =head1 REALTED MODULES
|
|
56
|
|
57 Bio::EnsEMBL::MappedSlice
|
|
58 Bio::EnsEMBL::MappedSliceContainer
|
|
59 Bio::EnsEMBL::AlignStrainSlice
|
|
60 Bio::EnsEMBL::StrainSlice
|
|
61
|
|
62 =cut
|
|
63
|
|
64 package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor;
|
|
65
|
|
66 use strict;
|
|
67 use warnings;
|
|
68 no warnings 'uninitialized';
|
|
69
|
|
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
71 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
72 use Bio::EnsEMBL::MappedSlice;
|
|
73 use Bio::EnsEMBL::Mapper;
|
|
74 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
|
|
75
|
|
76 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
|
|
77
|
|
78
|
|
79 =head2 new
|
|
80
|
|
81 Example : my $strain_slice_adaptor =
|
|
82 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
|
|
83 Description : Constructor.
|
|
84 Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
|
|
85 Exceptions : none
|
|
86 Caller : general
|
|
87 Status : At Risk
|
|
88 : under development
|
|
89
|
|
90 =cut
|
|
91
|
|
92 sub new {
|
|
93 my $caller = shift;
|
|
94
|
|
95 my $class = ref($caller) || $caller;
|
|
96 my $self = $class->SUPER::new(@_);
|
|
97
|
|
98 return $self;
|
|
99 }
|
|
100
|
|
101
|
|
102 =head2 fetch_by_name
|
|
103
|
|
104 Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container
|
|
105 to attach MappedSlices to
|
|
106 Arg[2] : String $name - the name of the strain to fetch
|
|
107 Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') };
|
|
108 Description : Creates a MappedSlice representing a version of the container's
|
|
109 reference slice with variant alleles from the named strain
|
|
110 Return type : listref of Bio::EnsEMBL::MappedSlice
|
|
111 Exceptions : thrown on wrong or missing arguments
|
|
112 Caller : general, Bio::EnsEMBL::MappedSliceContainer
|
|
113 Status : At Risk
|
|
114 : under development
|
|
115
|
|
116 =cut
|
|
117
|
|
118 sub fetch_by_name {
|
|
119 my $self = shift;
|
|
120 my $container = shift;
|
|
121 my $name = shift;
|
|
122
|
|
123 # argueent check
|
|
124 unless ($container and ref($container) and
|
|
125 $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
|
|
126 throw("Need a MappedSliceContainer.");
|
|
127 }
|
|
128
|
|
129 unless ($name) {
|
|
130 throw("Need a strain name.");
|
|
131 }
|
|
132
|
|
133 my $slice = $container->ref_slice;
|
|
134
|
|
135 # get a connection to the variation DB
|
|
136 my $variation_db = $self->db->get_db_adaptor('variation');
|
|
137
|
|
138 unless($variation_db) {
|
|
139 warning("Variation database must be attached to core database to retrieve variation information");
|
|
140 return '';
|
|
141 }
|
|
142
|
|
143 # now get an allele feature adaptor
|
|
144 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
|
|
145
|
|
146 # check we got it
|
|
147 unless(defined $af_adaptor) {
|
|
148 warning("Not possible to retrieve AlleleFeatureAdaptor from variation database");
|
|
149 return '';
|
|
150 }
|
|
151
|
|
152 # now get an individual adaptor
|
|
153 my $ind_adaptor = $variation_db->get_IndividualAdaptor;
|
|
154
|
|
155 # check we got it
|
|
156 unless(defined $ind_adaptor) {
|
|
157 warning("Not possible to retrieve IndividualAdaptor from variation database");
|
|
158 return '';
|
|
159 }
|
|
160
|
|
161 # fetch individual object for this strain name
|
|
162 my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)};
|
|
163
|
|
164 # check we got a result
|
|
165 unless(defined $ind) {
|
|
166 warn("Strain ".$name." not found in the database");
|
|
167 return '';
|
|
168 }
|
|
169
|
|
170
|
|
171 ## MAP STRAIN SLICE TO REF SLICE
|
|
172 ################################
|
|
173
|
|
174 # create a mapper
|
|
175 my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice');
|
|
176
|
|
177 # create a mapped_slice object
|
|
178 my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
|
|
179 -ADAPTOR => $self,
|
|
180 -CONTAINER => $container,
|
|
181 -NAME => $slice->name."\#strain_$name",
|
|
182 );
|
|
183
|
|
184 # get the strain slice
|
|
185 my $strain_slice = $slice->get_by_strain($ind->name);
|
|
186
|
|
187 # get all allele features for this slice and individual
|
|
188 #my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)};
|
|
189
|
|
190 # get allele features with coverage info
|
|
191 my $afs = $strain_slice->get_all_AlleleFeatures_Slice(1);
|
|
192
|
|
193 # check we got some data
|
|
194 #warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0];
|
|
195
|
|
196
|
|
197 my $start_slice = $slice->start;
|
|
198 my $start_strain = 1;
|
|
199 my $sr_name = $slice->seq_region_name;
|
|
200 #my $sr_name = 'ref_slice';
|
|
201 my ($end_slice, $end_strain, $allele_length);
|
|
202
|
|
203 my $indel_flag = 0;
|
|
204 my $total_length_diff = 0;
|
|
205
|
|
206 # check for AFs
|
|
207 if(defined($afs) && scalar @$afs) {
|
|
208
|
|
209 # go through each AF
|
|
210 foreach my $af(@$afs) {
|
|
211
|
|
212 # find out if it changes the length
|
|
213 if($af->length_diff != 0) {
|
|
214
|
|
215 $indel_flag = 1;
|
|
216 $total_length_diff += $af->length_diff;
|
|
217
|
|
218 # get the allele length
|
|
219 $allele_length = $af->length + $af->length_diff();
|
|
220
|
|
221 $end_slice = $slice->start + $af->start() - 2;
|
|
222
|
|
223 if ($end_slice >= $start_slice){
|
|
224 $end_strain = $end_slice - $start_slice + $start_strain;
|
|
225
|
|
226 #add the sequence that maps
|
|
227 $mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice);
|
|
228
|
|
229 #add the indel
|
|
230 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
|
|
231
|
|
232 $start_strain = $end_strain + $allele_length + 1;
|
|
233 }
|
|
234
|
|
235 else{
|
|
236
|
|
237 #add the indel
|
|
238 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
|
|
239
|
|
240 $start_strain += $allele_length;
|
|
241 }
|
|
242
|
|
243 $start_slice = $end_slice + $af->length+ 1;
|
|
244 }
|
|
245 }
|
|
246 }
|
|
247
|
|
248 # add the remaining coordinates (or the whole length if no indels found)
|
|
249 $mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end);
|
|
250
|
|
251 # add the slice/mapper pair
|
|
252 $mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper);
|
|
253
|
|
254
|
|
255
|
|
256 ## MAP REF_SLICE TO CONTAINER SLICE
|
|
257 ###################################
|
|
258
|
|
259 if($total_length_diff > 0) {
|
|
260
|
|
261 # create a new mapper
|
|
262 my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
|
|
263
|
|
264 # get existing pairs
|
|
265 my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container');
|
|
266 my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice');
|
|
267
|
|
268 # we need a list of indels (specifically inserts)
|
|
269 my @indels;
|
|
270
|
|
271 # go through existing first
|
|
272 foreach my $pair(@existing_pairs) {
|
|
273
|
|
274 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
|
|
275 my $indel;
|
|
276 $indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start);
|
|
277
|
|
278 # we're only interested in inserts here, not deletions
|
|
279 next unless $indel->{'length_diff'} > 0;
|
|
280
|
|
281 $indel->{'ref_start'} = $pair->from->start;
|
|
282 $indel->{'ref_end'} = $pair->from->end;
|
|
283 $indel->{'length'} = $pair->to->end - $pair->to->start;
|
|
284
|
|
285 push @indels, $indel;
|
|
286 }
|
|
287 }
|
|
288
|
|
289 # now new ones
|
|
290 foreach my $pair(@new_pairs) {
|
|
291
|
|
292 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
|
|
293 my $indel;
|
|
294 $indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start);
|
|
295
|
|
296 # we're only interested in inserts here, not deletions
|
|
297 next unless $indel->{'length_diff'} > 0;
|
|
298
|
|
299 $indel->{'ref_start'} = $pair->to->start;
|
|
300 $indel->{'ref_end'} = $pair->to->end;
|
|
301 $indel->{'length'} = $pair->from->end - $pair->from->start;
|
|
302
|
|
303 push @indels, $indel;
|
|
304 }
|
|
305 }
|
|
306
|
|
307 # sort them
|
|
308 @indels = sort {
|
|
309 $a->{'ref_start'} <=> $b->{'ref_start'} || # by position
|
|
310 $b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest
|
|
311 } @indels;
|
|
312
|
|
313 # clean them
|
|
314 my @new_indels = ();
|
|
315 my $p = $indels[0];
|
|
316 push @new_indels, $indels[0] if scalar @indels;
|
|
317
|
|
318 for my $i(1..$#indels) {
|
|
319 my $c = $indels[$i];
|
|
320
|
|
321 if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) {
|
|
322 push @new_indels, $c;
|
|
323 $p = $c;
|
|
324 }
|
|
325 }
|
|
326
|
|
327 $start_slice = $slice->start;
|
|
328 $start_strain = 1;
|
|
329 $sr_name = $slice->seq_region_name;
|
|
330 #$sr_name = 'ref_slice';
|
|
331
|
|
332 foreach my $indel(@new_indels) {
|
|
333
|
|
334 $end_slice = $indel->{'ref_end'};
|
|
335 $end_strain = $start_strain + ($end_slice - $start_slice);
|
|
336
|
|
337 $allele_length = $indel->{'length'} + $indel->{'length_diff'};
|
|
338
|
|
339 $new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain);
|
|
340
|
|
341 $new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length);
|
|
342
|
|
343 $start_strain = $end_strain + $allele_length + 1;
|
|
344 $start_slice = $end_slice + $indel->{'length'} + 1;
|
|
345 }
|
|
346
|
|
347 $new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice));
|
|
348
|
|
349 # replace the mapper with the new mapper
|
|
350 $container->mapper($new_mapper);
|
|
351
|
|
352 # change the container slice's length according to length diff
|
|
353 $container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1));
|
|
354 }
|
|
355
|
|
356 return [$mapped_slice];
|
|
357 }
|
|
358
|
|
359
|
|
360 1;
|
|
361
|