Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/AssemblyMapper.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::AssemblyMapper - | |
| 24 Handles mapping between two coordinate systems using the information | |
| 25 stored in the assembly table. | |
| 26 | |
| 27 =head1 SYNOPSIS | |
| 28 | |
| 29 $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...); | |
| 30 $asma = $db->get_AssemblyMapperAdaptor(); | |
| 31 $csa = $db->get_CoordSystemAdaptor(); | |
| 32 | |
| 33 my $chr_cs = $cs_adaptor->fetch_by_name( 'chromosome', 'NCBI33' ); | |
| 34 my $ctg_cs = $cs_adaptor->fetch_by_name('contig'); | |
| 35 | |
| 36 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 ); | |
| 37 | |
| 38 # Map to contig coordinate system from chromosomal. | |
| 39 @ctg_coords = | |
| 40 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, $chr_cs ); | |
| 41 | |
| 42 # Map to chromosome coordinate system from contig. | |
| 43 @chr_coords = | |
| 44 $asm_mapper->map( 'AL30421.1.200.92341', 100, 10000, -1, | |
| 45 $ctg_cs ); | |
| 46 | |
| 47 # List contig names for a region of chromsome. | |
| 48 @ctg_ids = $asm_mapper->list_ids( '13', 1_000_000, 1, $chr_cs ); | |
| 49 | |
| 50 # List chromosome names for a contig region. | |
| 51 @chr_ids = | |
| 52 $asm_mapper->list_ids( 'AL30421.1.200.92341', 1, 1000, -1, | |
| 53 $ctg_cs ); | |
| 54 | |
| 55 =head1 DESCRIPTION | |
| 56 | |
| 57 The AssemblyMapper is a database aware mapper which faciliates | |
| 58 conversion of coordinates between any two coordinate systems with an | |
| 59 relationship explicitly defined in the assembly table. In the future | |
| 60 it may be possible to perform multiple step (implicit) mapping between | |
| 61 coordinate systems. | |
| 62 | |
| 63 It is implemented using the Bio::EnsEMBL::Mapper object, which is a | |
| 64 generic mapper object between disjoint coordinate systems. | |
| 65 | |
| 66 =head1 METHODS | |
| 67 | |
| 68 =cut | |
| 69 | |
| 70 | |
| 71 package Bio::EnsEMBL::AssemblyMapper; | |
| 72 | |
| 73 use strict; | |
| 74 use warnings; | |
| 75 | |
| 76 use Bio::EnsEMBL::Mapper; | |
| 77 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate); | |
| 78 use Scalar::Util qw(weaken); | |
| 79 | |
| 80 my $ASSEMBLED = 'assembled'; | |
| 81 my $COMPONENT = 'component'; | |
| 82 | |
| 83 my $DEFAULT_MAX_PAIR_COUNT = 1000; | |
| 84 | |
| 85 | |
| 86 =head2 new | |
| 87 | |
| 88 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
| 89 Arg [2] : Bio::EnsEMBL::CoordSystem $asm_cs | |
| 90 Arg [3] : Bio::EnsEMBL::CoordSystem $cmp_cs | |
| 91 Example : Should use AssemblyMapperAdaptor->fetch_by_CoordSystems() | |
| 92 Description: Creates a new AssemblyMapper | |
| 93 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
| 94 Exceptions : Throws if multiple coord_systems are provided | |
| 95 Caller : AssemblyMapperAdaptor | |
| 96 Status : Stable | |
| 97 | |
| 98 =cut | |
| 99 | |
| 100 sub new { | |
| 101 my ( $proto, $adaptor, @coord_systems ) = @_; | |
| 102 | |
| 103 my $class = ref($proto) || $proto; | |
| 104 | |
| 105 my $self = bless( {}, $class ); | |
| 106 | |
| 107 $self->adaptor($adaptor); | |
| 108 | |
| 109 $adaptor->cache_seq_ids_with_mult_assemblys(); | |
| 110 | |
| 111 if ( @coord_systems != 2 ) { | |
| 112 throw( 'Can only map between two coordinate systems. ' | |
| 113 . scalar(@coord_systems) | |
| 114 . ' were provided' ); | |
| 115 } | |
| 116 | |
| 117 # Set the component and assembled coordinate systems | |
| 118 $self->{'asm_cs'} = $coord_systems[0]; | |
| 119 $self->{'cmp_cs'} = $coord_systems[1]; | |
| 120 | |
| 121 # We load the mapper calling the 'ASSEMBLED' the 'from' coord system | |
| 122 # and the 'COMPONENT' the 'to' coord system. | |
| 123 | |
| 124 $self->{'mapper'} = Bio::EnsEMBL::Mapper->new( $ASSEMBLED, $COMPONENT, | |
| 125 $coord_systems[0], $coord_systems[1] ); | |
| 126 | |
| 127 $self->{'max_pair_count'} = $DEFAULT_MAX_PAIR_COUNT; | |
| 128 | |
| 129 return $self; | |
| 130 } ## end sub new | |
| 131 | |
| 132 =head2 max_pair_count | |
| 133 | |
| 134 Arg [1] : (optional) int $max_pair_count | |
| 135 Example : $mapper->max_pair_count(100000) | |
| 136 Description: Getter/Setter for the number of mapping pairs allowed | |
| 137 in the internal cache. This can be used to override | |
| 138 the default value (1000) to tune the performance and | |
| 139 memory usage for certain scenarios. Higher value | |
| 140 means bigger cache, more memory used. | |
| 141 Return type: int | |
| 142 Exceptions : None | |
| 143 Caller : General | |
| 144 Status : Stable | |
| 145 | |
| 146 =cut | |
| 147 | |
| 148 sub max_pair_count { | |
| 149 my ( $self, $value ) = @_; | |
| 150 | |
| 151 if ( defined($value) ) { | |
| 152 $self->{'max_pair_count'} = $value; | |
| 153 } | |
| 154 | |
| 155 return $self->{'max_pair_count'}; | |
| 156 } | |
| 157 | |
| 158 =head2 register_all | |
| 159 | |
| 160 Arg [1] : None | |
| 161 Example : $mapper->max_pair_count(10e6); | |
| 162 $mapper->register_all(); | |
| 163 Description: Pre-registers all assembly information in this | |
| 164 mapper. The cache size should be set to a | |
| 165 sufficiently large value so that all of the | |
| 166 information can be stored. This method is useful | |
| 167 when *a lot* of mapping will be done in regions | |
| 168 which are distributed around the genome. After | |
| 169 registration the mapper will consume a lot of memory | |
| 170 but will not have to perform any SQL and will be | |
| 171 faster. | |
| 172 Return type: None | |
| 173 Exceptions : None | |
| 174 Caller : Specialised programs doing a lot of mapping. | |
| 175 Status : Stable | |
| 176 | |
| 177 =cut | |
| 178 | |
| 179 sub register_all { | |
| 180 my ($self) = @_; | |
| 181 | |
| 182 $self->adaptor()->register_all($self); | |
| 183 } | |
| 184 | |
| 185 =head2 map | |
| 186 | |
| 187 Arg [1] : string $frm_seq_region | |
| 188 The name of the sequence region to transform FROM. | |
| 189 Arg [2] : int $frm_start | |
| 190 The start of the region to transform FROM. | |
| 191 Arg [3] : int $frm_end | |
| 192 The end of the region to transform FROM. | |
| 193 Arg [4] : int $strand | |
| 194 The strand of the region to transform FROM. | |
| 195 Arg [5] : Bio::EnsEMBL::CoordSystem | |
| 196 The coordinate system to transform FROM | |
| 197 Example : @coords = | |
| 198 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, | |
| 199 $chr_cs ); | |
| 200 Description: Transforms coordinates from one coordinate system to | |
| 201 another. | |
| 202 Return type: List of Bio::EnsEMBL::Mapper::Coordinate and/or | |
| 203 Bio::EnsEMBL::Mapper:Gap objects. | |
| 204 Exceptions : Throws if if the specified TO coordinat system is not | |
| 205 one of the coordinate systems associated with this | |
| 206 assembly mapper. | |
| 207 Caller : General | |
| 208 Status : Stable | |
| 209 | |
| 210 =cut | |
| 211 | |
| 212 sub map { | |
| 213 throw('Incorrect number of arguments.') if (!( @_ >= 6)); | |
| 214 | |
| 215 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, | |
| 216 $frm_cs, $to_slice ) | |
| 217 = @_; | |
| 218 | |
| 219 my $mapper = $self->{'mapper'}; | |
| 220 my $asm_cs = $self->{'asm_cs'}; | |
| 221 my $cmp_cs = $self->{'cmp_cs'}; | |
| 222 my $adaptor = $self->{'adaptor'}; | |
| 223 my $frm; | |
| 224 | |
| 225 | |
| 226 my $seq_region_id = | |
| 227 $self->adaptor() | |
| 228 ->seq_regions_to_ids( $frm_cs, [$frm_seq_region_name] )->[0]; | |
| 229 | |
| 230 # Speed critical section: | |
| 231 # Try to do simple pointer equality comparisons of the coord system | |
| 232 # objects first since this is likely to work most of the time and is | |
| 233 # much faster than a function call. | |
| 234 | |
| 235 if ( $frm_cs == $cmp_cs | |
| 236 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) ) | |
| 237 { | |
| 238 if ( !$self->{'cmp_register'}->{$seq_region_id} ) { | |
| 239 $adaptor->register_component( $self, $seq_region_id ); | |
| 240 } | |
| 241 $frm = $COMPONENT; | |
| 242 | |
| 243 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) { | |
| 244 | |
| 245 # This can be probably be sped up some by only calling registered | |
| 246 # assembled if needed. | |
| 247 $adaptor->register_assembled( $self, $seq_region_id, $frm_start, | |
| 248 $frm_end ); | |
| 249 $frm = $ASSEMBLED; | |
| 250 | |
| 251 } else { | |
| 252 | |
| 253 throw( | |
| 254 sprintf( "Coordinate system %s %s is neither the assembled " | |
| 255 . "nor the component coordinate system " | |
| 256 . "of this AssemblyMapper", | |
| 257 $frm_cs->name(), $frm_cs->version() ) ); | |
| 258 | |
| 259 } | |
| 260 | |
| 261 return | |
| 262 $mapper->map_coordinates( $seq_region_id, $frm_start, $frm_end, | |
| 263 $frm_strand, $frm ); | |
| 264 } ## end sub map | |
| 265 | |
| 266 | |
| 267 =head2 flush | |
| 268 | |
| 269 Args : None | |
| 270 Example : None | |
| 271 Description: Remove all cached items from this AssemblyMapper. | |
| 272 Return type: None | |
| 273 Exceptions : None | |
| 274 Caller : AssemblyMapperAdaptor | |
| 275 Status : Stable | |
| 276 | |
| 277 =cut | |
| 278 | |
| 279 sub flush { | |
| 280 my ($self) = @_; | |
| 281 | |
| 282 $self->{'mapper'}->flush(); | |
| 283 $self->{'cmp_register'} = {}; | |
| 284 $self->{'asm_register'} = {}; | |
| 285 } | |
| 286 | |
| 287 =head2 size | |
| 288 | |
| 289 Args : None | |
| 290 Example : $num_of_pairs = $mapper->size(); | |
| 291 Description: Returns the number of pairs currently stored. | |
| 292 Return type: int | |
| 293 Exceptions : None | |
| 294 Caller : General | |
| 295 Status : Stable | |
| 296 | |
| 297 =cut | |
| 298 | |
| 299 sub size { | |
| 300 my ($self) = @_; | |
| 301 | |
| 302 return $self->{'mapper'}->{'pair_count'}; | |
| 303 } | |
| 304 | |
| 305 =head2 fastmap | |
| 306 | |
| 307 Arg [1] : string $frm_seq_region | |
| 308 The name of the sequence region to transform FROM. | |
| 309 Arg [2] : int $frm_start | |
| 310 The start of the region to transform FROM. | |
| 311 Arg [3] : int $frm_end | |
| 312 The end of the region to transform FROM. | |
| 313 Arg [4] : int $strand | |
| 314 The strand of the region to transform FROM. | |
| 315 Arg [5] : Bio::EnsEMBL::CoordSystem | |
| 316 The coordinate system to transform FROM. | |
| 317 Example : @coords = | |
| 318 $asm_mapper->map( 'X', 1_000_000, 2_000_000, 1, | |
| 319 $chr_cs ); | |
| 320 Description: Transforms coordinates from one coordinate system to | |
| 321 another. | |
| 322 Return type: List of Bio::EnsEMBL::Mapper::Coordinate and/or | |
| 323 Bio::EnsEMBL::Mapper:Gap objects. | |
| 324 Exceptions : Throws if the specified TO coordinat system is not | |
| 325 one of the coordinate systems associated with this | |
| 326 assembly mapper. | |
| 327 Caller : General | |
| 328 Status : Stable | |
| 329 | |
| 330 =cut | |
| 331 | |
| 332 sub fastmap { | |
| 333 if ( @_ != 6 ) { | |
| 334 throw('Incorrect number of arguments.'); | |
| 335 } | |
| 336 | |
| 337 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, | |
| 338 $frm_cs ) | |
| 339 = @_; | |
| 340 | |
| 341 my $mapper = $self->{'mapper'}; | |
| 342 my $asm_cs = $self->{'asm_cs'}; | |
| 343 my $cmp_cs = $self->{'cmp_cs'}; | |
| 344 my $adaptor = $self->adaptor(); | |
| 345 my $frm; | |
| 346 | |
| 347 my @tmp; | |
| 348 push @tmp, $frm_seq_region_name; | |
| 349 | |
| 350 my $seq_region_id = | |
| 351 $self->adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0]; | |
| 352 | |
| 353 # Speed critical section: | |
| 354 # Try to do simple pointer equality comparisons of the coord system | |
| 355 # objects first since this is likely to work most of the time and is | |
| 356 # much faster than a function call. | |
| 357 | |
| 358 if ( $frm_cs == $cmp_cs | |
| 359 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) ) | |
| 360 { | |
| 361 | |
| 362 if ( !$self->{'cmp_register'}->{$seq_region_id} ) { | |
| 363 $adaptor->register_component( $self, $seq_region_id ); | |
| 364 } | |
| 365 $frm = $COMPONENT; | |
| 366 | |
| 367 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) { | |
| 368 | |
| 369 # This can be probably be sped up some by only calling registered | |
| 370 # assembled if needed | |
| 371 $adaptor->register_assembled( $self, $seq_region_id, $frm_start, | |
| 372 $frm_end ); | |
| 373 $frm = $ASSEMBLED; | |
| 374 | |
| 375 } else { | |
| 376 | |
| 377 throw( | |
| 378 sprintf( "Coordinate system %s %s is neither the assembled " | |
| 379 . "nor the component coordinate system " | |
| 380 . "of this AssemblyMapper", | |
| 381 $frm_cs->name(), $frm_cs->version() ) ); | |
| 382 | |
| 383 } | |
| 384 | |
| 385 return | |
| 386 $mapper->fastmap( $seq_region_id, $frm_start, $frm_end, $frm_strand, | |
| 387 $frm ); | |
| 388 } ## end sub fastmap | |
| 389 | |
| 390 =head2 list_ids | |
| 391 | |
| 392 Arg [1] : string $frm_seq_region | |
| 393 The name of the sequence region of interest. | |
| 394 Arg [2] : int $frm_start | |
| 395 The start of the region of interest. | |
| 396 Arg [3] : int $frm_end | |
| 397 The end of the region to transform of interest. | |
| 398 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs | |
| 399 The coordinate system to obtain overlapping IDs of. | |
| 400 Example : foreach my $id ( | |
| 401 $asm_mapper->list_ids( 'X', 1, 1000, $ctg_cs ) ) | |
| 402 { ... } | |
| 403 Description: Retrieves a list of overlapping seq_region names of | |
| 404 another coordinate system. This is the same as the | |
| 405 list_ids method but uses seq_region names rather | |
| 406 internal IDs. | |
| 407 Return type: List of strings. | |
| 408 Exceptions : None | |
| 409 Caller : General | |
| 410 Status : Stable | |
| 411 | |
| 412 =cut | |
| 413 | |
| 414 sub list_ids { | |
| 415 if ( @_ != 5 ) { | |
| 416 throw('Incorrect number of arguments.'); | |
| 417 } | |
| 418 | |
| 419 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs ) = | |
| 420 @_; | |
| 421 | |
| 422 my @tmp = ($frm_seq_region_name); | |
| 423 | |
| 424 my $seq_region_id = | |
| 425 $self->adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0]; | |
| 426 | |
| 427 if ( $frm_cs->equals( $self->component_CoordSystem() ) ) { | |
| 428 | |
| 429 if ( !$self->have_registered_component($seq_region_id) ) { | |
| 430 $self->adaptor->register_component( $self, $seq_region_id ); | |
| 431 } | |
| 432 | |
| 433 # Pull out the 'from' identifiers of the mapper pairs. The we | |
| 434 # loaded the assembled side as the 'from' side in the constructor. | |
| 435 | |
| 436 return | |
| 437 map ( { $_->from()->id() } | |
| 438 $self->mapper()->list_pairs( | |
| 439 $seq_region_id, $frm_start, $frm_end, $COMPONENT | |
| 440 ) ); | |
| 441 | |
| 442 } elsif ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) { | |
| 443 | |
| 444 $self->adaptor->register_assembled( $self, $seq_region_id, | |
| 445 $frm_start, $frm_end ); | |
| 446 | |
| 447 # Pull out the 'to' identifiers of the mapper pairs we loaded the | |
| 448 # component side as the 'to' coord system in the constructor. | |
| 449 | |
| 450 return | |
| 451 map ( { $_->to->id() } | |
| 452 $self->mapper()->list_pairs( | |
| 453 $seq_region_id, $frm_start, $frm_end, $ASSEMBLED | |
| 454 ) ); | |
| 455 | |
| 456 } else { | |
| 457 | |
| 458 throw( | |
| 459 sprintf( "Coordinate system %s %s is neither the assembled " | |
| 460 . "nor the component coordinate system " | |
| 461 . "of this AssemblyMapper", | |
| 462 $frm_cs->name(), $frm_cs->version() ) ); | |
| 463 | |
| 464 } | |
| 465 } ## end sub list_ids | |
| 466 | |
| 467 #sub list_seq_regions { | |
| 468 # throw('Incorrect number of arguments.') if(@_ != 5); | |
| 469 # my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_; | |
| 470 | |
| 471 # if($frm_cs->equals($self->component_CoordSystem())) { | |
| 472 | |
| 473 # if(!$self->have_registered_component($seq_region_id)) { | |
| 474 # $self->adaptor->register_component($self,$seq_region_id); | |
| 475 # } | |
| 476 | |
| 477 # #pull out the 'from' identifiers of the mapper pairs. The | |
| 478 # #we loaded the assembled side as the 'from' side in the constructor | |
| 479 # return | |
| 480 # map {$_->from()->id()} | |
| 481 # $self->mapper()->list_pairs($seq_region_id, $frm_start, | |
| 482 # $frm_end, $COMPONENT); | |
| 483 | |
| 484 # } elsif($frm_cs->equals($self->assembled_CoordSystem())) { | |
| 485 | |
| 486 # $self->adaptor->register_assembled($self, | |
| 487 # $frm_seq_region,$frm_start,$frm_end); | |
| 488 | |
| 489 # #pull out the 'to' identifiers of the mapper pairs | |
| 490 # #we loaded the component side as the 'to' coord system in the constructor | |
| 491 # return | |
| 492 # map {$_->to->id()} | |
| 493 # $self->mapper()->list_pairs($frm_seq_region, $frm_start, | |
| 494 # $frm_end, $ASSEMBLED); | |
| 495 # } else { | |
| 496 # throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version . | |
| 497 # " is neither the assembled nor the component coordinate system " . | |
| 498 # " of this AssemblyMapper"); | |
| 499 # } | |
| 500 #} | |
| 501 | |
| 502 | |
| 503 =head2 list_seq_regions | |
| 504 | |
| 505 Arg [1] : string $frm_seq_region | |
| 506 The name of the sequence region of interest. | |
| 507 Arg [2] : int $frm_start | |
| 508 The start of the region of interest. | |
| 509 Arg [3] : int $frm_end | |
| 510 The end of the region to transform of interest. | |
| 511 Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs | |
| 512 The coordinate system to obtain overlapping IDs of. | |
| 513 Example : foreach my $id ( | |
| 514 $asm_mapper->list_seq_regions( | |
| 515 'X', 1, 1000, $chr_cs | |
| 516 ) ) { ... } | |
| 517 Description: Retrieves a list of overlapping seq_region internal | |
| 518 identifiers of another coordinate system. This is | |
| 519 the same as the list_seq_regions method but uses | |
| 520 internal identfiers rather than seq_region strings. | |
| 521 Return type: List of ints. | |
| 522 Exceptions : None | |
| 523 Caller : General | |
| 524 Status : Stable | |
| 525 | |
| 526 =cut | |
| 527 | |
| 528 sub list_seq_regions { | |
| 529 if ( @_ != 5 ) { | |
| 530 throw('Incorrect number of arguments.'); | |
| 531 } | |
| 532 | |
| 533 my ( $self, $frm_seq_region, $frm_start, $frm_end, $frm_cs ) = @_; | |
| 534 | |
| 535 # Retrieve the seq_region names. | |
| 536 | |
| 537 my @seq_ids = | |
| 538 $self->list_ids( $frm_seq_region, $frm_start, $frm_end, $frm_cs ); | |
| 539 | |
| 540 # The seq_regions are from the 'to' coordinate system not the from | |
| 541 # coordinate system we used to obtain them. | |
| 542 | |
| 543 my $to_cs; | |
| 544 if ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) { | |
| 545 $to_cs = $self->component_CoordSystem(); | |
| 546 } else { | |
| 547 $to_cs = $self->assembled_CoordSystem(); | |
| 548 } | |
| 549 | |
| 550 # Convert them to IDs. | |
| 551 return @{ $self->adaptor()->seq_ids_to_regions( \@seq_ids ) }; | |
| 552 } | |
| 553 | |
| 554 #sub list_ids { | |
| 555 # throw('Incorrect number of arguments.') if(@_ != 5); | |
| 556 # my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_; | |
| 557 | |
| 558 # #retrieve the seq_region names | |
| 559 # my @seq_regs = | |
| 560 # $self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs); | |
| 561 | |
| 562 # #The seq_regions are from the 'to' coordinate system not the | |
| 563 # #from coordinate system we used to obtain them | |
| 564 # my $to_cs; | |
| 565 # if($frm_cs->equals($self->assembled_CoordSystem())) { | |
| 566 # $to_cs = $self->component_CoordSystem(); | |
| 567 # } else { | |
| 568 # $to_cs = $self->assembled_CoordSystem(); | |
| 569 # } | |
| 570 | |
| 571 # #convert them to ids | |
| 572 # return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)}; | |
| 573 #} | |
| 574 | |
| 575 =head2 have_registered_component | |
| 576 | |
| 577 Arg [1] : string $cmp_seq_region | |
| 578 The name of the sequence region to check for | |
| 579 registration. | |
| 580 Example : if ( $asm_mapper->have_registered_component('AL240214.1') ) {} | |
| 581 Description: Returns true if a given component region has | |
| 582 been registered with this assembly mapper. This | |
| 583 should only be called by this class or the | |
| 584 AssemblyMapperAdaptor. In other words, do not use | |
| 585 this method unless you really know what you are | |
| 586 doing. | |
| 587 Return type: Boolean (0 or 1) | |
| 588 Exceptions : Throws on incorrect arguments. | |
| 589 Caller : Internal, AssemblyMapperAdaptor | |
| 590 Status : Stable | |
| 591 | |
| 592 =cut | |
| 593 | |
| 594 sub have_registered_component { | |
| 595 my ( $self, $cmp_seq_region ) = @_; | |
| 596 | |
| 597 if ( !defined($cmp_seq_region) ) { | |
| 598 throw('cmp_seq_region argument is required'); | |
| 599 } | |
| 600 | |
| 601 if ( exists( $self->{'cmp_register'}->{$cmp_seq_region} ) ) { | |
| 602 return 1; | |
| 603 } | |
| 604 | |
| 605 return 0; | |
| 606 } | |
| 607 | |
| 608 =head2 have_registered_assembled | |
| 609 | |
| 610 Arg [1] : string $asm_seq_region | |
| 611 The name of the sequence region to check for | |
| 612 registration. | |
| 613 Arg [2] : int $chunk_id | |
| 614 The chunk number of the provided seq_region to check | |
| 615 for registration. | |
| 616 Example : if ( $asm_mapper->have_registered_component( 'X', 9 ) ) { } | |
| 617 Description: Returns true if a given assembled region chunk | |
| 618 has been registered with this assembly mapper. | |
| 619 This should only be called by this class or the | |
| 620 AssemblyMapperAdaptor. In other words, do not use | |
| 621 this method unless you really know what you are | |
| 622 doing. | |
| 623 Return type: Boolean (0 or 1) | |
| 624 Exceptions : Throws on incorrect arguments | |
| 625 Caller : Internal, AssemblyMapperAdaptor | |
| 626 Status : Stable | |
| 627 | |
| 628 =cut | |
| 629 | |
| 630 sub have_registered_assembled { | |
| 631 my ( $self, $asm_seq_region, $chunk_id ) = @_; | |
| 632 | |
| 633 if ( !defined($asm_seq_region) ) { | |
| 634 throw('asm_seq_region argument is required'); | |
| 635 } | |
| 636 if ( !defined($chunk_id) ) { | |
| 637 throw('chunk_id is required'); | |
| 638 } | |
| 639 | |
| 640 if ( | |
| 641 exists( $self->{'asm_register'}->{$asm_seq_region}->{$chunk_id} ) ) | |
| 642 { | |
| 643 return 1; | |
| 644 } | |
| 645 | |
| 646 return 0; | |
| 647 } | |
| 648 | |
| 649 | |
| 650 =head2 register_component | |
| 651 | |
| 652 Arg [1] : integer $cmp_seq_region | |
| 653 The dbID of the component sequence region to | |
| 654 register. | |
| 655 Example : $asm_mapper->register_component('AL312341.1'); | |
| 656 Description: Flags a given component sequence region as registered | |
| 657 in this assembly mapper. This should only be called | |
| 658 by this class or the AssemblyMapperAdaptor. | |
| 659 Return type: None | |
| 660 Exceptions : Throws on incorrect arguments | |
| 661 Caller : Internal, AssemblyMapperAdaptor | |
| 662 Status : Stable | |
| 663 | |
| 664 =cut | |
| 665 | |
| 666 sub register_component { | |
| 667 my ( $self, $cmp_seq_region ) = @_; | |
| 668 | |
| 669 if ( !defined($cmp_seq_region) ) { | |
| 670 throw('cmp_seq_region argument is required'); | |
| 671 } | |
| 672 | |
| 673 $self->{'cmp_register'}->{$cmp_seq_region} = 1; | |
| 674 } | |
| 675 | |
| 676 =head2 register_assembled | |
| 677 | |
| 678 Arg [1] : integer $asm_seq_region | |
| 679 The dbID of the sequence region to register. | |
| 680 Arg [2] : int $chunk_id | |
| 681 The chunk number of the provided seq_region to register. | |
| 682 Example : $asm_mapper->register_assembled( 'X', 4 ); | |
| 683 Description: Flags a given assembled region as registered in this | |
| 684 assembly mapper. This should only be called by this | |
| 685 class or the AssemblyMapperAdaptor. Do not call this | |
| 686 method unless you really know what you are doing. | |
| 687 Return type: None | |
| 688 Exceptions : Throws on incorrect arguments | |
| 689 Caller : Internal, AssemblyMapperAdaptor | |
| 690 Status : Stable | |
| 691 | |
| 692 =cut | |
| 693 | |
| 694 sub register_assembled { | |
| 695 my ( $self, $asm_seq_region, $chunk_id ) = @_; | |
| 696 | |
| 697 if ( !defined($asm_seq_region) ) { | |
| 698 throw('asm_seq_region argument is required'); | |
| 699 } | |
| 700 if ( !defined($chunk_id) ) { | |
| 701 throw('chunk_id srgument is required'); | |
| 702 } | |
| 703 | |
| 704 $self->{'asm_register'}->{$asm_seq_region}->{$chunk_id} = 1; | |
| 705 } | |
| 706 | |
| 707 =head2 mapper | |
| 708 | |
| 709 Arg [1] : None | |
| 710 Example : $mapper = $asm_mapper->mapper(); | |
| 711 Description: Retrieves the internal mapper used by this Assembly | |
| 712 Mapper. This is unlikely to be useful unless you | |
| 713 _really_ know what you are doing. | |
| 714 Return type: Bio::EnsEMBL::Mapper | |
| 715 Exceptions : None | |
| 716 Caller : Internal, AssemblyMapperAdaptor | |
| 717 Status : Stable | |
| 718 | |
| 719 =cut | |
| 720 | |
| 721 sub mapper { | |
| 722 my ($self) = @_; | |
| 723 | |
| 724 return $self->{'mapper'}; | |
| 725 } | |
| 726 | |
| 727 =head2 assembled_CoordSystem | |
| 728 | |
| 729 Arg [1] : None | |
| 730 Example : $cs = $asm_mapper->assembled_CoordSystem(); | |
| 731 Description: Retrieves the assembled CoordSystem from this | |
| 732 assembly mapper. | |
| 733 Return type: Bio::EnsEMBL::CoordSystem | |
| 734 Exceptions : None | |
| 735 Caller : Internal, AssemblyMapperAdaptor | |
| 736 Status : Stable | |
| 737 | |
| 738 =cut | |
| 739 | |
| 740 sub assembled_CoordSystem { | |
| 741 my ($self) = @_; | |
| 742 | |
| 743 return $self->{'asm_cs'}; | |
| 744 } | |
| 745 | |
| 746 =head2 component_CoordSystem | |
| 747 | |
| 748 Arg [1] : None | |
| 749 Example : $cs = $asm_mapper->component_CoordSystem(); | |
| 750 Description: Retrieves the component CoordSystem from this | |
| 751 assembly mapper. | |
| 752 Return type: Bio::EnsEMBL::CoordSystem | |
| 753 Exceptions : None | |
| 754 Caller : Internal, AssemblyMapperAdaptor | |
| 755 Status : Stable | |
| 756 | |
| 757 =cut | |
| 758 | |
| 759 sub component_CoordSystem { | |
| 760 my ($self) = @_; | |
| 761 | |
| 762 return $self->{'cmp_cs'}; | |
| 763 } | |
| 764 | |
| 765 =head2 adaptor | |
| 766 | |
| 767 Arg [1] : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor $adaptor | |
| 768 Description: Getter/set terfor this object's database adaptor. | |
| 769 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor | |
| 770 Exceptions : None | |
| 771 Caller : General | |
| 772 Status : Stable | |
| 773 | |
| 774 =cut | |
| 775 | |
| 776 sub adaptor { | |
| 777 my ( $self, $value ) = @_; | |
| 778 | |
| 779 if ( defined($value) ) { | |
| 780 weaken($self->{'adaptor'} = $value); | |
| 781 } | |
| 782 | |
| 783 return $self->{'adaptor'}; | |
| 784 } | |
| 785 | |
| 786 =head2 in_assembly | |
| 787 | |
| 788 Description: DEPRECATED, use map() or list_ids() instead. | |
| 789 | |
| 790 =cut | |
| 791 | |
| 792 sub in_assembly { | |
| 793 my ( $self, $object ) = @_; | |
| 794 | |
| 795 deprecate('Use map() or list_ids() instead.'); | |
| 796 | |
| 797 my $csa = $self->db->get_CoordSystemAdaptor(); | |
| 798 | |
| 799 my $top_level = $csa->fetch_top_level(); | |
| 800 | |
| 801 my $asma = | |
| 802 $self->adaptor->fetch_by_CoordSystems( $object->coord_system(), | |
| 803 $top_level ); | |
| 804 | |
| 805 my @list = $asma->list_ids( $object->seq_region(), | |
| 806 $object->start(), | |
| 807 $object->end(), | |
| 808 $object->coord_system() ); | |
| 809 | |
| 810 return ( @list > 0 ); | |
| 811 } | |
| 812 | |
| 813 =head2 map_coordinates_to_assembly | |
| 814 | |
| 815 Description: DEPRECATED, use map() instead. | |
| 816 | |
| 817 =cut | |
| 818 | |
| 819 sub map_coordinates_to_assembly { | |
| 820 my ( $self, $contig_id, $start, $end, $strand ) = @_; | |
| 821 | |
| 822 deprecate('Use map() instead.'); | |
| 823 | |
| 824 # Not sure if contig_id is seq_region_id or name... | |
| 825 return | |
| 826 $self->map( $contig_id, $start, $end, $strand, | |
| 827 $self->contig_CoordSystem() ); | |
| 828 | |
| 829 } | |
| 830 | |
| 831 =head2 fast_to_assembly | |
| 832 | |
| 833 Description: DEPRECATED, use map() instead. | |
| 834 | |
| 835 =cut | |
| 836 | |
| 837 sub fast_to_assembly { | |
| 838 my ( $self, $contig_id, $start, $end, $strand ) = @_; | |
| 839 | |
| 840 deprecate('Use map() instead.'); | |
| 841 | |
| 842 # Not sure if contig_id is seq_region_id or name... | |
| 843 return | |
| 844 $self->map( $contig_id, $start, $end, $strand, | |
| 845 $self->contig_CoordSystem() ); | |
| 846 } | |
| 847 | |
| 848 =head2 map_coordinates_to_rawcontig | |
| 849 | |
| 850 Description: DEPRECATED, use map() instead. | |
| 851 | |
| 852 =cut | |
| 853 | |
| 854 sub map_coordinates_to_rawcontig { | |
| 855 my ( $self, $chr_name, $start, $end, $strand ) = @_; | |
| 856 | |
| 857 deprecate('Use map() instead.'); | |
| 858 | |
| 859 return | |
| 860 $self->map( $chr_name, $start, $end, $strand, | |
| 861 $self->assembled_CoordSystem() ); | |
| 862 } | |
| 863 | |
| 864 =head2 list_contig_ids | |
| 865 | |
| 866 Description: DEPRECATED, use list_ids() instead. | |
| 867 | |
| 868 =cut | |
| 869 | |
| 870 sub list_contig_ids { | |
| 871 my ( $self, $chr_name, $start, $end ) = @_; | |
| 872 | |
| 873 deprecate('Use list_ids() instead.'); | |
| 874 | |
| 875 return | |
| 876 $self->list_ids( $chr_name, $start, $end, | |
| 877 $self->assembled_CoordSystem() ); | |
| 878 } | |
| 879 | |
| 880 1; |
