Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/StrainSliceAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2009 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices | |
24 representing alternative assemblies | |
25 | |
26 =head1 SYNOPSIS | |
27 | |
28 my $slice = | |
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 ); | |
30 | |
31 my $msc = Bio::EnsEMBL::MappedSliceContainer->new(-SLICE => $slice); | |
32 | |
33 # create a new strain slice adaptor and attach it to the MSC | |
34 my $ssa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new($sa->db); | |
35 $msc->set_StrainSliceAdaptor($ssa); | |
36 | |
37 # now attach strain | |
38 $msc->attach_StrainSlice('Watson'); | |
39 | |
40 =head1 DESCRIPTION | |
41 | |
42 NOTE: this code is under development and not fully functional nor tested | |
43 yet. Use only for development. | |
44 | |
45 This adaptor is a factory for creating MappedSlices representing | |
46 strains and attaching them to a MappedSliceContainer. A mapper will be created | |
47 to map between the reference slice and the common container slice coordinate | |
48 system. | |
49 | |
50 =head1 METHODS | |
51 | |
52 new | |
53 fetch_by_name | |
54 | |
55 =head1 REALTED MODULES | |
56 | |
57 Bio::EnsEMBL::MappedSlice | |
58 Bio::EnsEMBL::MappedSliceContainer | |
59 Bio::EnsEMBL::AlignStrainSlice | |
60 Bio::EnsEMBL::StrainSlice | |
61 | |
62 =cut | |
63 | |
64 package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor; | |
65 | |
66 use strict; | |
67 use warnings; | |
68 no warnings 'uninitialized'; | |
69 | |
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
71 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
72 use Bio::EnsEMBL::MappedSlice; | |
73 use Bio::EnsEMBL::Mapper; | |
74 use Bio::EnsEMBL::DBSQL::BaseAdaptor; | |
75 | |
76 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); | |
77 | |
78 | |
79 =head2 new | |
80 | |
81 Example : my $strain_slice_adaptor = | |
82 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new; | |
83 Description : Constructor. | |
84 Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor | |
85 Exceptions : none | |
86 Caller : general | |
87 Status : At Risk | |
88 : under development | |
89 | |
90 =cut | |
91 | |
92 sub new { | |
93 my $caller = shift; | |
94 | |
95 my $class = ref($caller) || $caller; | |
96 my $self = $class->SUPER::new(@_); | |
97 | |
98 return $self; | |
99 } | |
100 | |
101 | |
102 =head2 fetch_by_name | |
103 | |
104 Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container | |
105 to attach MappedSlices to | |
106 Arg[2] : String $name - the name of the strain to fetch | |
107 Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') }; | |
108 Description : Creates a MappedSlice representing a version of the container's | |
109 reference slice with variant alleles from the named strain | |
110 Return type : listref of Bio::EnsEMBL::MappedSlice | |
111 Exceptions : thrown on wrong or missing arguments | |
112 Caller : general, Bio::EnsEMBL::MappedSliceContainer | |
113 Status : At Risk | |
114 : under development | |
115 | |
116 =cut | |
117 | |
118 sub fetch_by_name { | |
119 my $self = shift; | |
120 my $container = shift; | |
121 my $name = shift; | |
122 | |
123 # argueent check | |
124 unless ($container and ref($container) and | |
125 $container->isa('Bio::EnsEMBL::MappedSliceContainer')) { | |
126 throw("Need a MappedSliceContainer."); | |
127 } | |
128 | |
129 unless ($name) { | |
130 throw("Need a strain name."); | |
131 } | |
132 | |
133 my $slice = $container->ref_slice; | |
134 | |
135 # get a connection to the variation DB | |
136 my $variation_db = $self->db->get_db_adaptor('variation'); | |
137 | |
138 unless($variation_db) { | |
139 warning("Variation database must be attached to core database to retrieve variation information"); | |
140 return ''; | |
141 } | |
142 | |
143 # now get an allele feature adaptor | |
144 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor; | |
145 | |
146 # check we got it | |
147 unless(defined $af_adaptor) { | |
148 warning("Not possible to retrieve AlleleFeatureAdaptor from variation database"); | |
149 return ''; | |
150 } | |
151 | |
152 # now get an individual adaptor | |
153 my $ind_adaptor = $variation_db->get_IndividualAdaptor; | |
154 | |
155 # check we got it | |
156 unless(defined $ind_adaptor) { | |
157 warning("Not possible to retrieve IndividualAdaptor from variation database"); | |
158 return ''; | |
159 } | |
160 | |
161 # fetch individual object for this strain name | |
162 my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)}; | |
163 | |
164 # check we got a result | |
165 unless(defined $ind) { | |
166 warn("Strain ".$name." not found in the database"); | |
167 return ''; | |
168 } | |
169 | |
170 | |
171 ## MAP STRAIN SLICE TO REF SLICE | |
172 ################################ | |
173 | |
174 # create a mapper | |
175 my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice'); | |
176 | |
177 # create a mapped_slice object | |
178 my $mapped_slice = Bio::EnsEMBL::MappedSlice->new( | |
179 -ADAPTOR => $self, | |
180 -CONTAINER => $container, | |
181 -NAME => $slice->name."\#strain_$name", | |
182 ); | |
183 | |
184 # get the strain slice | |
185 my $strain_slice = $slice->get_by_strain($ind->name); | |
186 | |
187 # get all allele features for this slice and individual | |
188 #my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)}; | |
189 | |
190 # get allele features with coverage info | |
191 my $afs = $strain_slice->get_all_AlleleFeatures_Slice(1); | |
192 | |
193 # check we got some data | |
194 #warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0]; | |
195 | |
196 | |
197 my $start_slice = $slice->start; | |
198 my $start_strain = 1; | |
199 my $sr_name = $slice->seq_region_name; | |
200 #my $sr_name = 'ref_slice'; | |
201 my ($end_slice, $end_strain, $allele_length); | |
202 | |
203 my $indel_flag = 0; | |
204 my $total_length_diff = 0; | |
205 | |
206 # check for AFs | |
207 if(defined($afs) && scalar @$afs) { | |
208 | |
209 # go through each AF | |
210 foreach my $af(@$afs) { | |
211 | |
212 # find out if it changes the length | |
213 if($af->length_diff != 0) { | |
214 | |
215 $indel_flag = 1; | |
216 $total_length_diff += $af->length_diff; | |
217 | |
218 # get the allele length | |
219 $allele_length = $af->length + $af->length_diff(); | |
220 | |
221 $end_slice = $slice->start + $af->start() - 2; | |
222 | |
223 if ($end_slice >= $start_slice){ | |
224 $end_strain = $end_slice - $start_slice + $start_strain; | |
225 | |
226 #add the sequence that maps | |
227 $mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice); | |
228 | |
229 #add the indel | |
230 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length); | |
231 | |
232 $start_strain = $end_strain + $allele_length + 1; | |
233 } | |
234 | |
235 else{ | |
236 | |
237 #add the indel | |
238 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length); | |
239 | |
240 $start_strain += $allele_length; | |
241 } | |
242 | |
243 $start_slice = $end_slice + $af->length+ 1; | |
244 } | |
245 } | |
246 } | |
247 | |
248 # add the remaining coordinates (or the whole length if no indels found) | |
249 $mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end); | |
250 | |
251 # add the slice/mapper pair | |
252 $mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper); | |
253 | |
254 | |
255 | |
256 ## MAP REF_SLICE TO CONTAINER SLICE | |
257 ################################### | |
258 | |
259 if($total_length_diff > 0) { | |
260 | |
261 # create a new mapper | |
262 my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container'); | |
263 | |
264 # get existing pairs | |
265 my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container'); | |
266 my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice'); | |
267 | |
268 # we need a list of indels (specifically inserts) | |
269 my @indels; | |
270 | |
271 # go through existing first | |
272 foreach my $pair(@existing_pairs) { | |
273 | |
274 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) { | |
275 my $indel; | |
276 $indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start); | |
277 | |
278 # we're only interested in inserts here, not deletions | |
279 next unless $indel->{'length_diff'} > 0; | |
280 | |
281 $indel->{'ref_start'} = $pair->from->start; | |
282 $indel->{'ref_end'} = $pair->from->end; | |
283 $indel->{'length'} = $pair->to->end - $pair->to->start; | |
284 | |
285 push @indels, $indel; | |
286 } | |
287 } | |
288 | |
289 # now new ones | |
290 foreach my $pair(@new_pairs) { | |
291 | |
292 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) { | |
293 my $indel; | |
294 $indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start); | |
295 | |
296 # we're only interested in inserts here, not deletions | |
297 next unless $indel->{'length_diff'} > 0; | |
298 | |
299 $indel->{'ref_start'} = $pair->to->start; | |
300 $indel->{'ref_end'} = $pair->to->end; | |
301 $indel->{'length'} = $pair->from->end - $pair->from->start; | |
302 | |
303 push @indels, $indel; | |
304 } | |
305 } | |
306 | |
307 # sort them | |
308 @indels = sort { | |
309 $a->{'ref_start'} <=> $b->{'ref_start'} || # by position | |
310 $b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest | |
311 } @indels; | |
312 | |
313 # clean them | |
314 my @new_indels = (); | |
315 my $p = $indels[0]; | |
316 push @new_indels, $indels[0] if scalar @indels; | |
317 | |
318 for my $i(1..$#indels) { | |
319 my $c = $indels[$i]; | |
320 | |
321 if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) { | |
322 push @new_indels, $c; | |
323 $p = $c; | |
324 } | |
325 } | |
326 | |
327 $start_slice = $slice->start; | |
328 $start_strain = 1; | |
329 $sr_name = $slice->seq_region_name; | |
330 #$sr_name = 'ref_slice'; | |
331 | |
332 foreach my $indel(@new_indels) { | |
333 | |
334 $end_slice = $indel->{'ref_end'}; | |
335 $end_strain = $start_strain + ($end_slice - $start_slice); | |
336 | |
337 $allele_length = $indel->{'length'} + $indel->{'length_diff'}; | |
338 | |
339 $new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain); | |
340 | |
341 $new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length); | |
342 | |
343 $start_strain = $end_strain + $allele_length + 1; | |
344 $start_slice = $end_slice + $indel->{'length'} + 1; | |
345 } | |
346 | |
347 $new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice)); | |
348 | |
349 # replace the mapper with the new mapper | |
350 $container->mapper($new_mapper); | |
351 | |
352 # change the container slice's length according to length diff | |
353 $container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1)); | |
354 } | |
355 | |
356 return [$mapped_slice]; | |
357 } | |
358 | |
359 | |
360 1; | |
361 |