comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/StrainSliceAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2009 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices
24 representing alternative assemblies
25
26 =head1 SYNOPSIS
27
28 my $slice =
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
30
31 my $msc = Bio::EnsEMBL::MappedSliceContainer->new(-SLICE => $slice);
32
33 # create a new strain slice adaptor and attach it to the MSC
34 my $ssa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new($sa->db);
35 $msc->set_StrainSliceAdaptor($ssa);
36
37 # now attach strain
38 $msc->attach_StrainSlice('Watson');
39
40 =head1 DESCRIPTION
41
42 NOTE: this code is under development and not fully functional nor tested
43 yet. Use only for development.
44
45 This adaptor is a factory for creating MappedSlices representing
46 strains and attaching them to a MappedSliceContainer. A mapper will be created
47 to map between the reference slice and the common container slice coordinate
48 system.
49
50 =head1 METHODS
51
52 new
53 fetch_by_name
54
55 =head1 REALTED MODULES
56
57 Bio::EnsEMBL::MappedSlice
58 Bio::EnsEMBL::MappedSliceContainer
59 Bio::EnsEMBL::AlignStrainSlice
60 Bio::EnsEMBL::StrainSlice
61
62 =cut
63
64 package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor;
65
66 use strict;
67 use warnings;
68 no warnings 'uninitialized';
69
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
71 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
72 use Bio::EnsEMBL::MappedSlice;
73 use Bio::EnsEMBL::Mapper;
74 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
75
76 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
77
78
79 =head2 new
80
81 Example : my $strain_slice_adaptor =
82 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
83 Description : Constructor.
84 Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
85 Exceptions : none
86 Caller : general
87 Status : At Risk
88 : under development
89
90 =cut
91
92 sub new {
93 my $caller = shift;
94
95 my $class = ref($caller) || $caller;
96 my $self = $class->SUPER::new(@_);
97
98 return $self;
99 }
100
101
102 =head2 fetch_by_name
103
104 Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container
105 to attach MappedSlices to
106 Arg[2] : String $name - the name of the strain to fetch
107 Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') };
108 Description : Creates a MappedSlice representing a version of the container's
109 reference slice with variant alleles from the named strain
110 Return type : listref of Bio::EnsEMBL::MappedSlice
111 Exceptions : thrown on wrong or missing arguments
112 Caller : general, Bio::EnsEMBL::MappedSliceContainer
113 Status : At Risk
114 : under development
115
116 =cut
117
118 sub fetch_by_name {
119 my $self = shift;
120 my $container = shift;
121 my $name = shift;
122
123 # argueent check
124 unless ($container and ref($container) and
125 $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
126 throw("Need a MappedSliceContainer.");
127 }
128
129 unless ($name) {
130 throw("Need a strain name.");
131 }
132
133 my $slice = $container->ref_slice;
134
135 # get a connection to the variation DB
136 my $variation_db = $self->db->get_db_adaptor('variation');
137
138 unless($variation_db) {
139 warning("Variation database must be attached to core database to retrieve variation information");
140 return '';
141 }
142
143 # now get an allele feature adaptor
144 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
145
146 # check we got it
147 unless(defined $af_adaptor) {
148 warning("Not possible to retrieve AlleleFeatureAdaptor from variation database");
149 return '';
150 }
151
152 # now get an individual adaptor
153 my $ind_adaptor = $variation_db->get_IndividualAdaptor;
154
155 # check we got it
156 unless(defined $ind_adaptor) {
157 warning("Not possible to retrieve IndividualAdaptor from variation database");
158 return '';
159 }
160
161 # fetch individual object for this strain name
162 my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)};
163
164 # check we got a result
165 unless(defined $ind) {
166 warn("Strain ".$name." not found in the database");
167 return '';
168 }
169
170
171 ## MAP STRAIN SLICE TO REF SLICE
172 ################################
173
174 # create a mapper
175 my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice');
176
177 # create a mapped_slice object
178 my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
179 -ADAPTOR => $self,
180 -CONTAINER => $container,
181 -NAME => $slice->name."\#strain_$name",
182 );
183
184 # get the strain slice
185 my $strain_slice = $slice->get_by_strain($ind->name);
186
187 # get all allele features for this slice and individual
188 #my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)};
189
190 # get allele features with coverage info
191 my $afs = $strain_slice->get_all_AlleleFeatures_Slice(1);
192
193 # check we got some data
194 #warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0];
195
196
197 my $start_slice = $slice->start;
198 my $start_strain = 1;
199 my $sr_name = $slice->seq_region_name;
200 #my $sr_name = 'ref_slice';
201 my ($end_slice, $end_strain, $allele_length);
202
203 my $indel_flag = 0;
204 my $total_length_diff = 0;
205
206 # check for AFs
207 if(defined($afs) && scalar @$afs) {
208
209 # go through each AF
210 foreach my $af(@$afs) {
211
212 # find out if it changes the length
213 if($af->length_diff != 0) {
214
215 $indel_flag = 1;
216 $total_length_diff += $af->length_diff;
217
218 # get the allele length
219 $allele_length = $af->length + $af->length_diff();
220
221 $end_slice = $slice->start + $af->start() - 2;
222
223 if ($end_slice >= $start_slice){
224 $end_strain = $end_slice - $start_slice + $start_strain;
225
226 #add the sequence that maps
227 $mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice);
228
229 #add the indel
230 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
231
232 $start_strain = $end_strain + $allele_length + 1;
233 }
234
235 else{
236
237 #add the indel
238 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
239
240 $start_strain += $allele_length;
241 }
242
243 $start_slice = $end_slice + $af->length+ 1;
244 }
245 }
246 }
247
248 # add the remaining coordinates (or the whole length if no indels found)
249 $mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end);
250
251 # add the slice/mapper pair
252 $mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper);
253
254
255
256 ## MAP REF_SLICE TO CONTAINER SLICE
257 ###################################
258
259 if($total_length_diff > 0) {
260
261 # create a new mapper
262 my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
263
264 # get existing pairs
265 my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container');
266 my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice');
267
268 # we need a list of indels (specifically inserts)
269 my @indels;
270
271 # go through existing first
272 foreach my $pair(@existing_pairs) {
273
274 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
275 my $indel;
276 $indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start);
277
278 # we're only interested in inserts here, not deletions
279 next unless $indel->{'length_diff'} > 0;
280
281 $indel->{'ref_start'} = $pair->from->start;
282 $indel->{'ref_end'} = $pair->from->end;
283 $indel->{'length'} = $pair->to->end - $pair->to->start;
284
285 push @indels, $indel;
286 }
287 }
288
289 # now new ones
290 foreach my $pair(@new_pairs) {
291
292 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
293 my $indel;
294 $indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start);
295
296 # we're only interested in inserts here, not deletions
297 next unless $indel->{'length_diff'} > 0;
298
299 $indel->{'ref_start'} = $pair->to->start;
300 $indel->{'ref_end'} = $pair->to->end;
301 $indel->{'length'} = $pair->from->end - $pair->from->start;
302
303 push @indels, $indel;
304 }
305 }
306
307 # sort them
308 @indels = sort {
309 $a->{'ref_start'} <=> $b->{'ref_start'} || # by position
310 $b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest
311 } @indels;
312
313 # clean them
314 my @new_indels = ();
315 my $p = $indels[0];
316 push @new_indels, $indels[0] if scalar @indels;
317
318 for my $i(1..$#indels) {
319 my $c = $indels[$i];
320
321 if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) {
322 push @new_indels, $c;
323 $p = $c;
324 }
325 }
326
327 $start_slice = $slice->start;
328 $start_strain = 1;
329 $sr_name = $slice->seq_region_name;
330 #$sr_name = 'ref_slice';
331
332 foreach my $indel(@new_indels) {
333
334 $end_slice = $indel->{'ref_end'};
335 $end_strain = $start_strain + ($end_slice - $start_slice);
336
337 $allele_length = $indel->{'length'} + $indel->{'length_diff'};
338
339 $new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain);
340
341 $new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length);
342
343 $start_strain = $end_strain + $allele_length + 1;
344 $start_slice = $end_slice + $indel->{'length'} + 1;
345 }
346
347 $new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice));
348
349 # replace the mapper with the new mapper
350 $container->mapper($new_mapper);
351
352 # change the container slice's length according to length diff
353 $container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1));
354 }
355
356 return [$mapped_slice];
357 }
358
359
360 1;
361