Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/StrainSliceAdaptor.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2009 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices | |
| 24 representing alternative assemblies | |
| 25 | |
| 26 =head1 SYNOPSIS | |
| 27 | |
| 28 my $slice = | |
| 29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 ); | |
| 30 | |
| 31 my $msc = Bio::EnsEMBL::MappedSliceContainer->new(-SLICE => $slice); | |
| 32 | |
| 33 # create a new strain slice adaptor and attach it to the MSC | |
| 34 my $ssa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new($sa->db); | |
| 35 $msc->set_StrainSliceAdaptor($ssa); | |
| 36 | |
| 37 # now attach strain | |
| 38 $msc->attach_StrainSlice('Watson'); | |
| 39 | |
| 40 =head1 DESCRIPTION | |
| 41 | |
| 42 NOTE: this code is under development and not fully functional nor tested | |
| 43 yet. Use only for development. | |
| 44 | |
| 45 This adaptor is a factory for creating MappedSlices representing | |
| 46 strains and attaching them to a MappedSliceContainer. A mapper will be created | |
| 47 to map between the reference slice and the common container slice coordinate | |
| 48 system. | |
| 49 | |
| 50 =head1 METHODS | |
| 51 | |
| 52 new | |
| 53 fetch_by_name | |
| 54 | |
| 55 =head1 REALTED MODULES | |
| 56 | |
| 57 Bio::EnsEMBL::MappedSlice | |
| 58 Bio::EnsEMBL::MappedSliceContainer | |
| 59 Bio::EnsEMBL::AlignStrainSlice | |
| 60 Bio::EnsEMBL::StrainSlice | |
| 61 | |
| 62 =cut | |
| 63 | |
| 64 package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor; | |
| 65 | |
| 66 use strict; | |
| 67 use warnings; | |
| 68 no warnings 'uninitialized'; | |
| 69 | |
| 70 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
| 71 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
| 72 use Bio::EnsEMBL::MappedSlice; | |
| 73 use Bio::EnsEMBL::Mapper; | |
| 74 use Bio::EnsEMBL::DBSQL::BaseAdaptor; | |
| 75 | |
| 76 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); | |
| 77 | |
| 78 | |
| 79 =head2 new | |
| 80 | |
| 81 Example : my $strain_slice_adaptor = | |
| 82 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new; | |
| 83 Description : Constructor. | |
| 84 Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor | |
| 85 Exceptions : none | |
| 86 Caller : general | |
| 87 Status : At Risk | |
| 88 : under development | |
| 89 | |
| 90 =cut | |
| 91 | |
| 92 sub new { | |
| 93 my $caller = shift; | |
| 94 | |
| 95 my $class = ref($caller) || $caller; | |
| 96 my $self = $class->SUPER::new(@_); | |
| 97 | |
| 98 return $self; | |
| 99 } | |
| 100 | |
| 101 | |
| 102 =head2 fetch_by_name | |
| 103 | |
| 104 Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container | |
| 105 to attach MappedSlices to | |
| 106 Arg[2] : String $name - the name of the strain to fetch | |
| 107 Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') }; | |
| 108 Description : Creates a MappedSlice representing a version of the container's | |
| 109 reference slice with variant alleles from the named strain | |
| 110 Return type : listref of Bio::EnsEMBL::MappedSlice | |
| 111 Exceptions : thrown on wrong or missing arguments | |
| 112 Caller : general, Bio::EnsEMBL::MappedSliceContainer | |
| 113 Status : At Risk | |
| 114 : under development | |
| 115 | |
| 116 =cut | |
| 117 | |
| 118 sub fetch_by_name { | |
| 119 my $self = shift; | |
| 120 my $container = shift; | |
| 121 my $name = shift; | |
| 122 | |
| 123 # argueent check | |
| 124 unless ($container and ref($container) and | |
| 125 $container->isa('Bio::EnsEMBL::MappedSliceContainer')) { | |
| 126 throw("Need a MappedSliceContainer."); | |
| 127 } | |
| 128 | |
| 129 unless ($name) { | |
| 130 throw("Need a strain name."); | |
| 131 } | |
| 132 | |
| 133 my $slice = $container->ref_slice; | |
| 134 | |
| 135 # get a connection to the variation DB | |
| 136 my $variation_db = $self->db->get_db_adaptor('variation'); | |
| 137 | |
| 138 unless($variation_db) { | |
| 139 warning("Variation database must be attached to core database to retrieve variation information"); | |
| 140 return ''; | |
| 141 } | |
| 142 | |
| 143 # now get an allele feature adaptor | |
| 144 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor; | |
| 145 | |
| 146 # check we got it | |
| 147 unless(defined $af_adaptor) { | |
| 148 warning("Not possible to retrieve AlleleFeatureAdaptor from variation database"); | |
| 149 return ''; | |
| 150 } | |
| 151 | |
| 152 # now get an individual adaptor | |
| 153 my $ind_adaptor = $variation_db->get_IndividualAdaptor; | |
| 154 | |
| 155 # check we got it | |
| 156 unless(defined $ind_adaptor) { | |
| 157 warning("Not possible to retrieve IndividualAdaptor from variation database"); | |
| 158 return ''; | |
| 159 } | |
| 160 | |
| 161 # fetch individual object for this strain name | |
| 162 my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)}; | |
| 163 | |
| 164 # check we got a result | |
| 165 unless(defined $ind) { | |
| 166 warn("Strain ".$name." not found in the database"); | |
| 167 return ''; | |
| 168 } | |
| 169 | |
| 170 | |
| 171 ## MAP STRAIN SLICE TO REF SLICE | |
| 172 ################################ | |
| 173 | |
| 174 # create a mapper | |
| 175 my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice'); | |
| 176 | |
| 177 # create a mapped_slice object | |
| 178 my $mapped_slice = Bio::EnsEMBL::MappedSlice->new( | |
| 179 -ADAPTOR => $self, | |
| 180 -CONTAINER => $container, | |
| 181 -NAME => $slice->name."\#strain_$name", | |
| 182 ); | |
| 183 | |
| 184 # get the strain slice | |
| 185 my $strain_slice = $slice->get_by_strain($ind->name); | |
| 186 | |
| 187 # get all allele features for this slice and individual | |
| 188 #my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)}; | |
| 189 | |
| 190 # get allele features with coverage info | |
| 191 my $afs = $strain_slice->get_all_AlleleFeatures_Slice(1); | |
| 192 | |
| 193 # check we got some data | |
| 194 #warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0]; | |
| 195 | |
| 196 | |
| 197 my $start_slice = $slice->start; | |
| 198 my $start_strain = 1; | |
| 199 my $sr_name = $slice->seq_region_name; | |
| 200 #my $sr_name = 'ref_slice'; | |
| 201 my ($end_slice, $end_strain, $allele_length); | |
| 202 | |
| 203 my $indel_flag = 0; | |
| 204 my $total_length_diff = 0; | |
| 205 | |
| 206 # check for AFs | |
| 207 if(defined($afs) && scalar @$afs) { | |
| 208 | |
| 209 # go through each AF | |
| 210 foreach my $af(@$afs) { | |
| 211 | |
| 212 # find out if it changes the length | |
| 213 if($af->length_diff != 0) { | |
| 214 | |
| 215 $indel_flag = 1; | |
| 216 $total_length_diff += $af->length_diff; | |
| 217 | |
| 218 # get the allele length | |
| 219 $allele_length = $af->length + $af->length_diff(); | |
| 220 | |
| 221 $end_slice = $slice->start + $af->start() - 2; | |
| 222 | |
| 223 if ($end_slice >= $start_slice){ | |
| 224 $end_strain = $end_slice - $start_slice + $start_strain; | |
| 225 | |
| 226 #add the sequence that maps | |
| 227 $mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice); | |
| 228 | |
| 229 #add the indel | |
| 230 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length); | |
| 231 | |
| 232 $start_strain = $end_strain + $allele_length + 1; | |
| 233 } | |
| 234 | |
| 235 else{ | |
| 236 | |
| 237 #add the indel | |
| 238 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length); | |
| 239 | |
| 240 $start_strain += $allele_length; | |
| 241 } | |
| 242 | |
| 243 $start_slice = $end_slice + $af->length+ 1; | |
| 244 } | |
| 245 } | |
| 246 } | |
| 247 | |
| 248 # add the remaining coordinates (or the whole length if no indels found) | |
| 249 $mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end); | |
| 250 | |
| 251 # add the slice/mapper pair | |
| 252 $mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper); | |
| 253 | |
| 254 | |
| 255 | |
| 256 ## MAP REF_SLICE TO CONTAINER SLICE | |
| 257 ################################### | |
| 258 | |
| 259 if($total_length_diff > 0) { | |
| 260 | |
| 261 # create a new mapper | |
| 262 my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container'); | |
| 263 | |
| 264 # get existing pairs | |
| 265 my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container'); | |
| 266 my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice'); | |
| 267 | |
| 268 # we need a list of indels (specifically inserts) | |
| 269 my @indels; | |
| 270 | |
| 271 # go through existing first | |
| 272 foreach my $pair(@existing_pairs) { | |
| 273 | |
| 274 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) { | |
| 275 my $indel; | |
| 276 $indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start); | |
| 277 | |
| 278 # we're only interested in inserts here, not deletions | |
| 279 next unless $indel->{'length_diff'} > 0; | |
| 280 | |
| 281 $indel->{'ref_start'} = $pair->from->start; | |
| 282 $indel->{'ref_end'} = $pair->from->end; | |
| 283 $indel->{'length'} = $pair->to->end - $pair->to->start; | |
| 284 | |
| 285 push @indels, $indel; | |
| 286 } | |
| 287 } | |
| 288 | |
| 289 # now new ones | |
| 290 foreach my $pair(@new_pairs) { | |
| 291 | |
| 292 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) { | |
| 293 my $indel; | |
| 294 $indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start); | |
| 295 | |
| 296 # we're only interested in inserts here, not deletions | |
| 297 next unless $indel->{'length_diff'} > 0; | |
| 298 | |
| 299 $indel->{'ref_start'} = $pair->to->start; | |
| 300 $indel->{'ref_end'} = $pair->to->end; | |
| 301 $indel->{'length'} = $pair->from->end - $pair->from->start; | |
| 302 | |
| 303 push @indels, $indel; | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 # sort them | |
| 308 @indels = sort { | |
| 309 $a->{'ref_start'} <=> $b->{'ref_start'} || # by position | |
| 310 $b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest | |
| 311 } @indels; | |
| 312 | |
| 313 # clean them | |
| 314 my @new_indels = (); | |
| 315 my $p = $indels[0]; | |
| 316 push @new_indels, $indels[0] if scalar @indels; | |
| 317 | |
| 318 for my $i(1..$#indels) { | |
| 319 my $c = $indels[$i]; | |
| 320 | |
| 321 if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) { | |
| 322 push @new_indels, $c; | |
| 323 $p = $c; | |
| 324 } | |
| 325 } | |
| 326 | |
| 327 $start_slice = $slice->start; | |
| 328 $start_strain = 1; | |
| 329 $sr_name = $slice->seq_region_name; | |
| 330 #$sr_name = 'ref_slice'; | |
| 331 | |
| 332 foreach my $indel(@new_indels) { | |
| 333 | |
| 334 $end_slice = $indel->{'ref_end'}; | |
| 335 $end_strain = $start_strain + ($end_slice - $start_slice); | |
| 336 | |
| 337 $allele_length = $indel->{'length'} + $indel->{'length_diff'}; | |
| 338 | |
| 339 $new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain); | |
| 340 | |
| 341 $new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length); | |
| 342 | |
| 343 $start_strain = $end_strain + $allele_length + 1; | |
| 344 $start_slice = $end_slice + $indel->{'length'} + 1; | |
| 345 } | |
| 346 | |
| 347 $new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice)); | |
| 348 | |
| 349 # replace the mapper with the new mapper | |
| 350 $container->mapper($new_mapper); | |
| 351 | |
| 352 # change the container slice's length according to length diff | |
| 353 $container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1)); | |
| 354 } | |
| 355 | |
| 356 return [$mapped_slice]; | |
| 357 } | |
| 358 | |
| 359 | |
| 360 1; | |
| 361 |
