Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Variation/VariationFeature.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 # Ensembl module for Bio::EnsEMBL::Variation::VariationFeature | |
| 22 # | |
| 23 # Copyright (c) 2004 Ensembl | |
| 24 # | |
| 25 | |
| 26 | |
| 27 =head1 NAME | |
| 28 | |
| 29 Bio::EnsEMBL::Variation::VariationFeature - A genomic position for a nucleotide variation. | |
| 30 | |
| 31 =head1 SYNOPSIS | |
| 32 | |
| 33 # Variation feature representing a single nucleotide polymorphism | |
| 34 $vf = Bio::EnsEMBL::Variation::VariationFeature->new | |
| 35 (-start => 100, | |
| 36 -end => 100, | |
| 37 -strand => 1, | |
| 38 -slice => $slice, | |
| 39 -allele_string => 'A/T', | |
| 40 -variation_name => 'rs635421', | |
| 41 -map_weight => 1, | |
| 42 -variation => $v); | |
| 43 | |
| 44 # Variation feature representing a 2bp insertion | |
| 45 $vf = Bio::EnsEMBL::Variation::VariationFeature->new | |
| 46 (-start => 1522, | |
| 47 -end => 1521, # end = start-1 for insert | |
| 48 -strand => -1, | |
| 49 -slice => $slice, | |
| 50 -allele_string => '-/AA', | |
| 51 -variation_name => 'rs12111', | |
| 52 -map_weight => 1, | |
| 53 -variation => $v2); | |
| 54 | |
| 55 ... | |
| 56 | |
| 57 # a variation feature is like any other ensembl feature, can be | |
| 58 # transformed etc. | |
| 59 $vf = $vf->transform('supercontig'); | |
| 60 | |
| 61 print $vf->start(), "-", $vf->end(), '(', $vf->strand(), ')', "\n"; | |
| 62 | |
| 63 print $vf->name(), ":", $vf->allele_string(); | |
| 64 | |
| 65 # Get the Variation object which this feature represents the genomic | |
| 66 # position of. If not already retrieved from the DB, this will be | |
| 67 # transparently lazy-loaded | |
| 68 my $v = $vf->variation(); | |
| 69 | |
| 70 =head1 DESCRIPTION | |
| 71 | |
| 72 This is a class representing the genomic position of a nucleotide variation | |
| 73 from the ensembl-variation database. The actual variation information is | |
| 74 represented by an associated Bio::EnsEMBL::Variation::Variation object. Some | |
| 75 of the information has been denormalized and is available on the feature for | |
| 76 speed purposes. A VariationFeature behaves as any other Ensembl feature. | |
| 77 See B<Bio::EnsEMBL::Feature> and B<Bio::EnsEMBL::Variation::Variation>. | |
| 78 | |
| 79 =head1 METHODS | |
| 80 | |
| 81 =cut | |
| 82 | |
| 83 use strict; | |
| 84 use warnings; | |
| 85 | |
| 86 package Bio::EnsEMBL::Variation::VariationFeature; | |
| 87 | |
| 88 use Scalar::Util qw(weaken isweak); | |
| 89 | |
| 90 use Bio::EnsEMBL::Variation::BaseVariationFeature; | |
| 91 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
| 92 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); | |
| 93 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
| 94 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
| 95 use Bio::EnsEMBL::Variation::Utils::Sequence qw(ambiguity_code hgvs_variant_notation SO_variation_class format_hgvs_string); | |
| 96 use Bio::EnsEMBL::Variation::Utils::Sequence; | |
| 97 use Bio::EnsEMBL::Variation::Variation; | |
| 98 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(MAX_DISTANCE_FROM_TRANSCRIPT); | |
| 99 use Bio::EnsEMBL::Variation::Utils::Constants qw($DEFAULT_OVERLAP_CONSEQUENCE %VARIATION_CLASSES); | |
| 100 use Bio::EnsEMBL::Variation::RegulatoryFeatureVariation; | |
| 101 use Bio::EnsEMBL::Variation::MotifFeatureVariation; | |
| 102 use Bio::EnsEMBL::Variation::ExternalFeatureVariation; | |
| 103 use Bio::EnsEMBL::Variation::IntergenicVariation; | |
| 104 use Bio::EnsEMBL::Slice; | |
| 105 use Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor; | |
| 106 use Bio::PrimarySeq; | |
| 107 use Bio::SeqUtils; | |
| 108 | |
| 109 our @ISA = ('Bio::EnsEMBL::Variation::BaseVariationFeature'); | |
| 110 | |
| 111 our $DEBUG = 0; | |
| 112 =head2 new | |
| 113 | |
| 114 Arg [-dbID] : | |
| 115 see superclass constructor | |
| 116 | |
| 117 Arg [-ADAPTOR] : | |
| 118 see superclass constructor | |
| 119 | |
| 120 Arg [-START] : | |
| 121 see superclass constructor | |
| 122 Arg [-END] : | |
| 123 see superclass constructor | |
| 124 | |
| 125 Arg [-STRAND] : | |
| 126 see superclass constructor | |
| 127 | |
| 128 Arg [-SLICE] : | |
| 129 see superclass constructor | |
| 130 | |
| 131 Arg [-VARIATION_NAME] : | |
| 132 string - the name of the variation this feature is for (denormalisation | |
| 133 from Variation object). | |
| 134 | |
| 135 Arg [-MAP_WEIGHT] : | |
| 136 int - the number of times that the variation associated with this feature | |
| 137 has hit the genome. If this was the only feature associated with this | |
| 138 variation_feature the map_weight would be 1. | |
| 139 | |
| 140 Arg [-VARIATION] : | |
| 141 int - the variation object associated with this feature. | |
| 142 | |
| 143 Arg [-SOURCE] : | |
| 144 string - the name of the source where the SNP comes from | |
| 145 | |
| 146 Arg [-SOURCE_VERSION] : | |
| 147 number - the version of the source where the SNP comes from | |
| 148 | |
| 149 Arg [-VALIDATION_CODE] : | |
| 150 reference to list of strings | |
| 151 | |
| 152 Arg [-OVERLAP_CONSEQUENCES] : | |
| 153 listref of Bio::EnsEMBL::Variation::OverlapConsequences - all the consequences of this VariationFeature | |
| 154 | |
| 155 Arg [-VARIATION_ID] : | |
| 156 int - the internal id of the variation object associated with this | |
| 157 identifier. This may be provided instead of a variation object so that | |
| 158 the variation may be lazy-loaded from the database on demand. | |
| 159 | |
| 160 Example : | |
| 161 $vf = Bio::EnsEMBL::Variation::VariationFeature->new( | |
| 162 -start => 100, | |
| 163 -end => 100, | |
| 164 -strand => 1, | |
| 165 -slice => $slice, | |
| 166 -allele_string => 'A/T', | |
| 167 -variation_name => 'rs635421', | |
| 168 -map_weight => 1, | |
| 169 -source => 'dbSNP', | |
| 170 -validation_code => ['cluster','doublehit'], | |
| 171 -variation => $v | |
| 172 ); | |
| 173 | |
| 174 Description: Constructor. Instantiates a new VariationFeature object. | |
| 175 Returntype : Bio::EnsEMBL::Variation::Variation | |
| 176 Exceptions : none | |
| 177 Caller : general | |
| 178 Status : At Risk | |
| 179 | |
| 180 =cut | |
| 181 | |
| 182 sub new { | |
| 183 my $caller = shift; | |
| 184 my $class = ref($caller) || $caller; | |
| 185 | |
| 186 my $self = $class->SUPER::new(@_); | |
| 187 | |
| 188 my ( | |
| 189 $allele_str, | |
| 190 $var_name, | |
| 191 $map_weight, | |
| 192 $variation, | |
| 193 $variation_id, | |
| 194 $source, | |
| 195 $source_version, | |
| 196 $is_somatic, | |
| 197 $validation_code, | |
| 198 $overlap_consequences, | |
| 199 $class_so_term, | |
| 200 $minor_allele, | |
| 201 $minor_allele_freq, | |
| 202 $minor_allele_count | |
| 203 ) = rearrange([qw( | |
| 204 ALLELE_STRING | |
| 205 VARIATION_NAME | |
| 206 MAP_WEIGHT | |
| 207 VARIATION | |
| 208 _VARIATION_ID | |
| 209 SOURCE | |
| 210 SOURCE_VERSION | |
| 211 IS_SOMATIC | |
| 212 VALIDATION_CODE | |
| 213 OVERLAP_CONSEQUENCES | |
| 214 CLASS_SO_TERM | |
| 215 MINOR_ALLELE | |
| 216 MINOR_ALLELE_FREQUENCY | |
| 217 MINOR_ALLELE_COUNT | |
| 218 )], @_); | |
| 219 | |
| 220 $self->{'allele_string'} = $allele_str; | |
| 221 $self->{'variation_name'} = $var_name; | |
| 222 $self->{'map_weight'} = $map_weight; | |
| 223 $self->{'variation'} = $variation; | |
| 224 $self->{'_variation_id'} = $variation_id; | |
| 225 $self->{'source'} = $source; | |
| 226 $self->{'source_version'} = $source_version; | |
| 227 $self->{'is_somatic'} = $is_somatic; | |
| 228 $self->{'validation_code'} = $validation_code; | |
| 229 $self->{'overlap_consequences'} = $overlap_consequences; | |
| 230 $self->{'class_SO_term'} = $class_so_term; | |
| 231 $self->{'minor_allele'} = $minor_allele; | |
| 232 $self->{'minor_allele_frequency'} = $minor_allele_freq; | |
| 233 $self->{'minor_allele_count'} = $minor_allele_count; | |
| 234 | |
| 235 return $self; | |
| 236 } | |
| 237 | |
| 238 | |
| 239 | |
| 240 sub new_fast { | |
| 241 | |
| 242 my $class = shift; | |
| 243 my $hashref = shift; | |
| 244 my $self = bless $hashref, $class; | |
| 245 weaken($self->{'adaptor'}) if ( ! isweak($self->{'adaptor'}) ); | |
| 246 return $self; | |
| 247 | |
| 248 } | |
| 249 | |
| 250 | |
| 251 =head2 allele_string | |
| 252 | |
| 253 Arg [1] : string $newval (optional) | |
| 254 The new value to set the allele_string attribute to | |
| 255 Example : $allele_string = $obj->allele_string() | |
| 256 Description: Getter/Setter for the allele_string attribute. | |
| 257 The allele_string is a '/' demimited string representing the | |
| 258 alleles associated with this features variation. | |
| 259 Returntype : string | |
| 260 Exceptions : none | |
| 261 Caller : general | |
| 262 Status : Stable | |
| 263 | |
| 264 =cut | |
| 265 | |
| 266 sub allele_string{ | |
| 267 my $self = shift; | |
| 268 return $self->{'allele_string'} = shift if(@_); | |
| 269 return $self->{'allele_string'}; | |
| 270 } | |
| 271 | |
| 272 =head2 display_id | |
| 273 | |
| 274 Arg [1] : none | |
| 275 Example : print $vf->display_id(), "\n"; | |
| 276 Description: Returns the 'display' identifier for this feature. For | |
| 277 VariationFeatures this is simply the name of the variation | |
| 278 it is associated with. | |
| 279 Returntype : string | |
| 280 Exceptions : none | |
| 281 Caller : webcode | |
| 282 Status : At Risk | |
| 283 | |
| 284 =cut | |
| 285 | |
| 286 sub display_id { | |
| 287 my $self = shift; | |
| 288 return $self->{'variation_name'} || ''; | |
| 289 } | |
| 290 | |
| 291 | |
| 292 | |
| 293 =head2 variation_name | |
| 294 | |
| 295 Arg [1] : string $newval (optional) | |
| 296 The new value to set the variation_name attribute to | |
| 297 Example : $variation_name = $obj->variation_name() | |
| 298 Description: Getter/Setter for the variation_name attribute. This is the | |
| 299 name of the variation associated with this feature. | |
| 300 Returntype : string | |
| 301 Exceptions : none | |
| 302 Caller : general | |
| 303 Status : Stable | |
| 304 | |
| 305 =cut | |
| 306 | |
| 307 sub variation_name{ | |
| 308 my $self = shift; | |
| 309 return $self->{'variation_name'} = shift if(@_); | |
| 310 return $self->{'variation_name'}; | |
| 311 } | |
| 312 | |
| 313 | |
| 314 | |
| 315 =head2 map_weight | |
| 316 | |
| 317 Arg [1] : int $newval (optional) | |
| 318 The new value to set the map_weight attribute to | |
| 319 Example : $map_weight = $obj->map_weight() | |
| 320 Description: Getter/Setter for the map_weight attribute. The map_weight | |
| 321 is the number of times this features variation was mapped to | |
| 322 the genome. | |
| 323 Returntype : int | |
| 324 Exceptions : none | |
| 325 Caller : general | |
| 326 Status : At Risk | |
| 327 | |
| 328 =cut | |
| 329 | |
| 330 sub map_weight{ | |
| 331 my $self = shift; | |
| 332 return $self->{'map_weight'} = shift if(@_); | |
| 333 return $self->{'map_weight'}; | |
| 334 } | |
| 335 | |
| 336 =head2 minor_allele | |
| 337 | |
| 338 Arg [1] : string $minor_allele (optional) | |
| 339 The new minor allele string | |
| 340 Example : $ma = $obj->minor_allele() | |
| 341 Description: Get/set the minor allele of this variation, as reported by dbSNP | |
| 342 Returntype : string | |
| 343 Exceptions : none | |
| 344 Caller : general | |
| 345 Status : Stable | |
| 346 | |
| 347 =cut | |
| 348 | |
| 349 sub minor_allele { | |
| 350 my ($self, $minor_allele) = @_; | |
| 351 $self->{minor_allele} = $minor_allele if defined $minor_allele; | |
| 352 return $self->{minor_allele} | |
| 353 } | |
| 354 | |
| 355 =head2 minor_allele_frequency | |
| 356 | |
| 357 Arg [1] : float $minor_allele_frequency (optional) | |
| 358 The new minor allele frequency | |
| 359 Example : $maf = $obj->minor_allele_frequency() | |
| 360 Description: Get/set the frequency of the minor allele of this variation, as reported by dbSNP | |
| 361 Returntype : float | |
| 362 Exceptions : none | |
| 363 Caller : general | |
| 364 Status : Stable | |
| 365 | |
| 366 =cut | |
| 367 | |
| 368 sub minor_allele_frequency { | |
| 369 my ($self, $minor_allele_frequency) = @_; | |
| 370 $self->{minor_allele_frequency} = $minor_allele_frequency if defined $minor_allele_frequency; | |
| 371 return $self->{minor_allele_frequency} | |
| 372 } | |
| 373 | |
| 374 =head2 minor_allele_count | |
| 375 | |
| 376 Arg [1] : int $minor_allele_count (optional) | |
| 377 The new minor allele count | |
| 378 Example : $maf_count = $obj->minor_allele_count() | |
| 379 Description: Get/set the sample count of the minor allele of this variation, as reported by dbSNP | |
| 380 Returntype : int | |
| 381 Exceptions : none | |
| 382 Caller : general | |
| 383 Status : Stable | |
| 384 | |
| 385 =cut | |
| 386 | |
| 387 sub minor_allele_count { | |
| 388 my ($self, $minor_allele_count) = @_; | |
| 389 $self->{minor_allele_count} = $minor_allele_count if defined $minor_allele_count; | |
| 390 return $self->{minor_allele_count} | |
| 391 } | |
| 392 | |
| 393 | |
| 394 | |
| 395 =head2 get_all_TranscriptVariations | |
| 396 | |
| 397 Arg [1] : (optional) listref of Bio::EnsEMBL::Transcript objects | |
| 398 Example : $vf->get_all_TranscriptVariations; | |
| 399 Description : Get all the TranscriptVariations associated with this VariationFeature. | |
| 400 If the optional list of Transcripts is supplied, get only TranscriptVariations | |
| 401 associated with those Transcripts. | |
| 402 Returntype : listref of Bio::EnsEMBL::Variation::TranscriptVariation objects | |
| 403 Exceptions : Thrown on wrong argument type | |
| 404 Caller : general | |
| 405 Status : At Risk | |
| 406 | |
| 407 =cut | |
| 408 | |
| 409 sub get_all_TranscriptVariations { | |
| 410 | |
| 411 my ($self, $transcripts) = @_; | |
| 412 | |
| 413 if ($transcripts) { | |
| 414 assert_ref($transcripts, 'ARRAY'); | |
| 415 map { assert_ref($_, 'Bio::EnsEMBL::Transcript') } @$transcripts; | |
| 416 } | |
| 417 | |
| 418 #die unless $self->{transcript_variations}; | |
| 419 | |
| 420 if ($self->dbID && not defined $self->{transcript_variations}) { | |
| 421 # this VariationFeature is from the database, so we can just fetch the | |
| 422 # TranscriptVariations from the database as well | |
| 423 | |
| 424 if (my $db = $self->adaptor->db) { | |
| 425 my $tva = $db->get_TranscriptVariationAdaptor; | |
| 426 | |
| 427 # just fetch TVs for all Transcripts because that's more efficient, | |
| 428 # we'll only return those asked for later on | |
| 429 | |
| 430 my $tvs = $tva->fetch_all_by_VariationFeatures([$self]); | |
| 431 | |
| 432 map { $self->add_TranscriptVariation($_) } @$tvs; | |
| 433 } | |
| 434 } | |
| 435 elsif (not defined $self->{transcript_variations}) { | |
| 436 # this VariationFeature is not in the database so we have to build the | |
| 437 # TranscriptVariations ourselves | |
| 438 | |
| 439 unless ($transcripts) { | |
| 440 # if the caller didn't supply some transcripts fetch those around this VariationFeature | |
| 441 | |
| 442 # get a slice around this transcript including the maximum distance up and down-stream | |
| 443 # that we still call consequences for | |
| 444 | |
| 445 my $slice = $self->feature_Slice->expand( | |
| 446 MAX_DISTANCE_FROM_TRANSCRIPT, | |
| 447 MAX_DISTANCE_FROM_TRANSCRIPT | |
| 448 ); | |
| 449 | |
| 450 # fetch all transcripts on this slice | |
| 451 | |
| 452 $transcripts = $slice->get_all_Transcripts(1); | |
| 453 } | |
| 454 | |
| 455 my @unfetched_transcripts = grep { | |
| 456 not exists $self->{transcript_variations}->{$_->stable_id} | |
| 457 } @$transcripts; | |
| 458 | |
| 459 for my $transcript (@unfetched_transcripts) { | |
| 460 $self->add_TranscriptVariation( | |
| 461 Bio::EnsEMBL::Variation::TranscriptVariation->new( | |
| 462 -variation_feature => $self, | |
| 463 -transcript => $transcript, | |
| 464 -adaptor => ($self->adaptor->db ? $self->adaptor->db->get_TranscriptVariationAdaptor : undef), | |
| 465 ) | |
| 466 ); | |
| 467 } | |
| 468 } | |
| 469 | |
| 470 if ($transcripts) { | |
| 471 # just return TranscriptVariations for the requested Transcripts | |
| 472 return [ map { $self->{transcript_variations}->{$_->stable_id} } @$transcripts ]; | |
| 473 } | |
| 474 else { | |
| 475 # return all TranscriptVariations | |
| 476 return [ values %{ $self->{transcript_variations} } ]; | |
| 477 } | |
| 478 } | |
| 479 | |
| 480 =head2 get_all_RegulatoryFeatureVariations | |
| 481 | |
| 482 Description : Get all the RegulatoryFeatureVariations associated with this VariationFeature. | |
| 483 Returntype : listref of Bio::EnsEMBL::Variation::RegulatoryFeatureVariation objects | |
| 484 Exceptions : none | |
| 485 Status : At Risk | |
| 486 | |
| 487 =cut | |
| 488 | |
| 489 sub get_all_RegulatoryFeatureVariations { | |
| 490 my $self = shift; | |
| 491 return $self->_get_all_RegulationVariations('RegulatoryFeature', @_); | |
| 492 } | |
| 493 | |
| 494 =head2 get_all_MotifFeatureVariations | |
| 495 | |
| 496 Description : Get all the MotifFeatureVariations associated with this VariationFeature. | |
| 497 Returntype : listref of Bio::EnsEMBL::Variation::MotifFeatureVariation objects | |
| 498 Exceptions : none | |
| 499 Status : At Risk | |
| 500 | |
| 501 =cut | |
| 502 | |
| 503 sub get_all_MotifFeatureVariations { | |
| 504 my $self = shift; | |
| 505 return $self->_get_all_RegulationVariations('MotifFeature', @_); | |
| 506 } | |
| 507 | |
| 508 =head2 get_all_ExternalFeatureVariations | |
| 509 | |
| 510 Description : Get all the ExternalFeatureVariations associated with this VariationFeature. | |
| 511 Returntype : listref of Bio::EnsEMBL::Variation::ExternalFeatureVariation objects | |
| 512 Exceptions : none | |
| 513 Status : At Risk | |
| 514 | |
| 515 =cut | |
| 516 | |
| 517 sub get_all_ExternalFeatureVariations { | |
| 518 my $self = shift; | |
| 519 return $self->_get_all_RegulationVariations('ExternalFeature', @_); | |
| 520 } | |
| 521 | |
| 522 sub _get_all_RegulationVariations { | |
| 523 my ($self, $type) = @_; | |
| 524 | |
| 525 unless ($type && ($type eq 'RegulatoryFeature' || $type eq 'MotifFeature' || $type eq 'ExternalFeature')) { | |
| 526 throw("Invalid Ensembl Regulation type '$type'"); | |
| 527 } | |
| 528 | |
| 529 unless ($self->{regulation_variations}->{$type}) { | |
| 530 | |
| 531 my $fg_adaptor; | |
| 532 | |
| 533 if (my $adap = $self->adaptor) { | |
| 534 if(my $db = $adap->db) { | |
| 535 $fg_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new( | |
| 536 -species => $adap->db->species, | |
| 537 -type => $type, | |
| 538 ); | |
| 539 } | |
| 540 | |
| 541 unless ($fg_adaptor) { | |
| 542 warning("Failed to get adaptor for $type"); | |
| 543 return []; | |
| 544 } | |
| 545 } | |
| 546 else { | |
| 547 warning('Cannot get variation features without attached adaptor'); | |
| 548 return []; | |
| 549 } | |
| 550 | |
| 551 my $slice = $self->feature_Slice; | |
| 552 | |
| 553 my $constructor = 'Bio::EnsEMBL::Variation::'.$type.'Variation'; | |
| 554 | |
| 555 eval { | |
| 556 $self->{regulation_variations}->{$type} = [ | |
| 557 map { | |
| 558 $constructor->new( | |
| 559 -variation_feature => $self, | |
| 560 -feature => $_, | |
| 561 ); | |
| 562 } map { $_->transfer($self->slice) } @{ $fg_adaptor->fetch_all_by_Slice($slice) } | |
| 563 ]; | |
| 564 }; | |
| 565 | |
| 566 $self->{regulation_variations}->{$type} ||= []; | |
| 567 } | |
| 568 | |
| 569 return $self->{regulation_variations}->{$type}; | |
| 570 } | |
| 571 | |
| 572 sub get_IntergenicVariation { | |
| 573 my $self = shift; | |
| 574 my $no_ref_check = shift; | |
| 575 | |
| 576 unless (exists $self->{intergenic_variation}) { | |
| 577 if (scalar(@{ $self->get_all_TranscriptVariations }) == 0) { | |
| 578 $self->{intergenic_variation} = Bio::EnsEMBL::Variation::IntergenicVariation->new( | |
| 579 -variation_feature => $self, | |
| 580 -no_ref_check => $no_ref_check, | |
| 581 ); | |
| 582 } | |
| 583 else { | |
| 584 $self->{intergenic_variation} = undef; | |
| 585 } | |
| 586 } | |
| 587 | |
| 588 return $self->{intergenic_variation}; | |
| 589 } | |
| 590 | |
| 591 =head2 get_all_VariationFeatureOverlaps | |
| 592 | |
| 593 Description : Get all the VariationFeatureOverlaps associated with this VariationFeature, this | |
| 594 includes TranscriptVariations and regulatory feature overlap object. | |
| 595 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeatureOverlap objects | |
| 596 Exceptions : none | |
| 597 Status : At Risk | |
| 598 | |
| 599 =cut | |
| 600 | |
| 601 sub get_all_VariationFeatureOverlaps { | |
| 602 my $self = shift; | |
| 603 | |
| 604 my $vfos = [ | |
| 605 @{ $self->get_all_TranscriptVariations }, | |
| 606 @{ $self->get_all_RegulatoryFeatureVariations }, | |
| 607 @{ $self->get_all_MotifFeatureVariations }, | |
| 608 @{ $self->get_all_ExternalFeatureVariations }, | |
| 609 ]; | |
| 610 | |
| 611 if (my $iv = $self->get_IntergenicVariation) { | |
| 612 push @$vfos, $iv; | |
| 613 } | |
| 614 | |
| 615 return $vfos; | |
| 616 } | |
| 617 | |
| 618 =head2 add_TranscriptVariation | |
| 619 | |
| 620 Arg [1] : Bio::EnsEMBL::Variation::TranscriptVariation | |
| 621 Example : $vf->add_TranscriptVariation($tv); | |
| 622 Description : Adds a TranscriptVariation to the variation feature object. | |
| 623 Exceptions : thrown on bad argument | |
| 624 Caller : Bio::EnsEMBL::Variation::TranscriptVariationAdaptor | |
| 625 Status : At Risk | |
| 626 | |
| 627 =cut | |
| 628 | |
| 629 sub add_TranscriptVariation { | |
| 630 my ($self, $tv) = @_; | |
| 631 assert_ref($tv, 'Bio::EnsEMBL::Variation::TranscriptVariation'); | |
| 632 # we need to weaken the reference back to us to avoid a circular reference | |
| 633 weaken($tv->{base_variation_feature}); | |
| 634 $self->{transcript_variations}->{$tv->transcript_stable_id} = $tv; | |
| 635 } | |
| 636 | |
| 637 =head2 variation | |
| 638 | |
| 639 Arg [1] : (optional) Bio::EnsEMBL::Variation::Variation $variation | |
| 640 Example : $v = $vf->variation(); | |
| 641 Description: Getter/Setter for the variation associated with this feature. | |
| 642 If not set, and this VariationFeature has an associated adaptor | |
| 643 an attempt will be made to lazy-load the variation from the | |
| 644 database. | |
| 645 Returntype : Bio::EnsEMBL::Variation::Variation | |
| 646 Exceptions : throw on incorrect argument | |
| 647 Caller : general | |
| 648 Status : Stable | |
| 649 | |
| 650 =cut | |
| 651 | |
| 652 sub variation { | |
| 653 my $self = shift; | |
| 654 | |
| 655 if(@_) { | |
| 656 if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::Variation')) { | |
| 657 throw("Bio::EnsEMBL::Variation::Variation argument expected"); | |
| 658 } | |
| 659 $self->{'variation'} = shift; | |
| 660 } | |
| 661 elsif(!defined($self->{'variation'}) && $self->adaptor() && | |
| 662 defined($self->{'_variation_id'})) { | |
| 663 # lazy-load from database on demand | |
| 664 my $va = $self->adaptor->db()->get_VariationAdaptor(); | |
| 665 $self->{'variation'} = $va->fetch_by_dbID($self->{'_variation_id'}); | |
| 666 } | |
| 667 | |
| 668 return $self->{'variation'}; | |
| 669 } | |
| 670 | |
| 671 =head2 consequence_type | |
| 672 | |
| 673 Arg [1] : (optional) String $term_type | |
| 674 Description: Get a list of all the unique consequence terms of this | |
| 675 VariationFeature. By default returns Ensembl display terms | |
| 676 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label' | |
| 677 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g. | |
| 678 'non_synonymous_codon') or 'NCBI' (e.g. 'missense') | |
| 679 Returntype : listref of strings | |
| 680 Exceptions : none | |
| 681 Status : At Risk | |
| 682 | |
| 683 =cut | |
| 684 | |
| 685 sub consequence_type { | |
| 686 | |
| 687 my $self = shift; | |
| 688 my $term_type = shift; | |
| 689 | |
| 690 my $method_name; | |
| 691 | |
| 692 # delete cached term | |
| 693 if(defined($term_type)) { | |
| 694 delete $self->{consequence_types}; | |
| 695 $method_name = $term_type.($term_type eq 'label' ? '' : '_term'); | |
| 696 $method_name = 'SO_term' unless @{$self->get_all_OverlapConsequences} && $self->get_all_OverlapConsequences->[0]->can($method_name); | |
| 697 } | |
| 698 | |
| 699 $method_name ||= 'SO_term'; | |
| 700 | |
| 701 if (exists($self->{current_consequence_method}) && $self->{current_consequence_method} ne $method_name) { | |
| 702 delete $self->{consequence_type}; | |
| 703 } | |
| 704 | |
| 705 unless ($self->{consequence_types}) { | |
| 706 | |
| 707 # work out the terms from the OverlapConsequence objects | |
| 708 | |
| 709 $self->{consequence_types} = | |
| 710 [ map { $_->$method_name } @{ $self->get_all_OverlapConsequences } ]; | |
| 711 } | |
| 712 | |
| 713 $self->{current_consequence_method} = $method_name; | |
| 714 | |
| 715 return $self->{consequence_types}; | |
| 716 } | |
| 717 | |
| 718 =head2 get_all_OverlapConsequences | |
| 719 | |
| 720 Description: Get a list of all the unique OverlapConsequences of this VariationFeature, | |
| 721 calculating them on the fly from the TranscriptVariations if necessary | |
| 722 Returntype : listref of Bio::EnsEMBL::Variation::OverlapConsequence objects | |
| 723 Exceptions : none | |
| 724 Status : At Risk | |
| 725 | |
| 726 =cut | |
| 727 | |
| 728 sub get_all_OverlapConsequences { | |
| 729 my $self = shift; | |
| 730 | |
| 731 unless ($self->{overlap_consequences}) { | |
| 732 | |
| 733 # work them out and store them in a hash keyed by SO_term as we don't | |
| 734 # want duplicates from different VFOs | |
| 735 | |
| 736 my %overlap_cons; | |
| 737 | |
| 738 for my $vfo (@{ $self->get_all_TranscriptVariations }) { | |
| 739 for my $allele (@{ $vfo->get_all_alternate_VariationFeatureOverlapAlleles }) { | |
| 740 for my $cons (@{ $allele->get_all_OverlapConsequences }) { | |
| 741 $overlap_cons{$cons->SO_term} = $cons; | |
| 742 } | |
| 743 } | |
| 744 } | |
| 745 | |
| 746 # if we don't have any consequences we use a default from Constants.pm | |
| 747 # (currently set to the intergenic consequence) | |
| 748 | |
| 749 $self->{overlap_consequences} = [ | |
| 750 %overlap_cons ? values %overlap_cons : $DEFAULT_OVERLAP_CONSEQUENCE | |
| 751 ]; | |
| 752 } | |
| 753 | |
| 754 return $self->{overlap_consequences}; | |
| 755 } | |
| 756 | |
| 757 =head2 add_OverlapConsequence | |
| 758 | |
| 759 Arg [1] : Bio::EnsEMBL::Variation::OverlapConsequence instance | |
| 760 Description: Add an OverlapConsequence to this VariationFeature's list | |
| 761 Returntype : none | |
| 762 Exceptions : throws if the argument is the wrong type | |
| 763 Status : At Risk | |
| 764 | |
| 765 =cut | |
| 766 | |
| 767 sub add_OverlapConsequence { | |
| 768 my ($self, $oc) = @_; | |
| 769 assert_ref($oc, 'Bio::EnsEMBL::Variation::OverlapConsequence'); | |
| 770 push @{ $self->{overlap_consequences} ||= [] }, $oc; | |
| 771 } | |
| 772 | |
| 773 =head2 most_severe_OverlapConsequence | |
| 774 | |
| 775 Description: Get the OverlapConsequence considered (by Ensembl) to be the most severe | |
| 776 consequence of all the alleles of this VariationFeature | |
| 777 Returntype : Bio::EnsEMBL::Variation::OverlapConsequence | |
| 778 Exceptions : none | |
| 779 Status : At Risk | |
| 780 | |
| 781 =cut | |
| 782 | |
| 783 sub most_severe_OverlapConsequence { | |
| 784 my $self = shift; | |
| 785 | |
| 786 unless ($self->{_most_severe_consequence}) { | |
| 787 | |
| 788 my $highest; | |
| 789 | |
| 790 for my $cons (@{ $self->get_all_OverlapConsequences }) { | |
| 791 $highest ||= $cons; | |
| 792 if ($cons->rank < $highest->rank) { | |
| 793 $highest = $cons; | |
| 794 } | |
| 795 } | |
| 796 | |
| 797 $self->{_most_severe_consequence} = $highest; | |
| 798 } | |
| 799 | |
| 800 return $self->{_most_severe_consequence}; | |
| 801 } | |
| 802 | |
| 803 =head2 display_consequence | |
| 804 | |
| 805 Arg [1] : (optional) String $term_type | |
| 806 Description: Get the term for the most severe consequence of this | |
| 807 VariationFeature. By default returns Ensembl display terms | |
| 808 (e.g. 'NON_SYNONYMOUS_CODING'). $term_type can also be 'label' | |
| 809 (e.g. 'Non-synonymous coding'), 'SO' (Sequence Ontology, e.g. | |
| 810 'non_synonymous_codon') or 'NCBI' (e.g. 'missense') | |
| 811 Returntype : string | |
| 812 Exceptions : none | |
| 813 Status : At Risk | |
| 814 | |
| 815 =cut | |
| 816 | |
| 817 sub display_consequence { | |
| 818 my $self = shift; | |
| 819 my $term_type = shift; | |
| 820 | |
| 821 my $method_name; | |
| 822 | |
| 823 # delete cached term | |
| 824 if(defined($term_type)) { | |
| 825 $method_name = $term_type.($term_type eq 'label' ? '' : '_term'); | |
| 826 $method_name = 'SO_term' unless @{$self->get_all_OverlapConsequences} && $self->get_all_OverlapConsequences->[0]->can($method_name); | |
| 827 } | |
| 828 | |
| 829 $method_name ||= 'SO_term'; | |
| 830 | |
| 831 return $self->most_severe_OverlapConsequence->$method_name; | |
| 832 } | |
| 833 | |
| 834 =head2 add_consequence_type | |
| 835 | |
| 836 Status : Deprecated, use add_OverlapConsequence instead | |
| 837 | |
| 838 =cut | |
| 839 | |
| 840 sub add_consequence_type{ | |
| 841 my $self = shift; | |
| 842 warning('Deprecated method, use add_OverlapConsequence instead'); | |
| 843 return $self->add_OverlapConsequence(@_); | |
| 844 } | |
| 845 | |
| 846 =head2 get_consequence_type | |
| 847 | |
| 848 Status : Deprecated, use consequence_type instead | |
| 849 | |
| 850 =cut | |
| 851 | |
| 852 sub get_consequence_type { | |
| 853 my $self = shift; | |
| 854 warning('Deprecated method, use consequence_type instead'); | |
| 855 return $self->consequence_type; | |
| 856 } | |
| 857 | |
| 858 =head2 ambig_code | |
| 859 | |
| 860 Args : None | |
| 861 Example : my $ambiguity_code = $vf->ambig_code() | |
| 862 Description : Returns the ambigutiy code for the alleles in the VariationFeature | |
| 863 ReturnType : String $ambiguity_code | |
| 864 Exceptions : none | |
| 865 Caller : General | |
| 866 Status : At Risk | |
| 867 | |
| 868 =cut | |
| 869 | |
| 870 sub ambig_code{ | |
| 871 my $self = shift; | |
| 872 | |
| 873 return &ambiguity_code($self->allele_string()); | |
| 874 } | |
| 875 | |
| 876 =head2 var_class | |
| 877 | |
| 878 Args[1] : (optional) no_db - don't use the term from the database, always calculate it from the allele string | |
| 879 (used by the ensembl variation pipeline) | |
| 880 Example : my $variation_class = $vf->var_class | |
| 881 Description : returns the Ensembl term for the class of this variation | |
| 882 ReturnType : string | |
| 883 Exceptions : throws if we can't find a corresponding display term for an SO term | |
| 884 Caller : General | |
| 885 Status : At Risk | |
| 886 | |
| 887 =cut | |
| 888 | |
| 889 sub var_class { | |
| 890 | |
| 891 my $self = shift; | |
| 892 my $no_db = shift; | |
| 893 | |
| 894 unless ($self->{class_display_term}) { | |
| 895 | |
| 896 my $so_term = $self->class_SO_term(undef, $no_db); | |
| 897 | |
| 898 # convert the SO term to the ensembl display term | |
| 899 | |
| 900 $self->{class_display_term} = $self->is_somatic ? | |
| 901 $VARIATION_CLASSES{$so_term}->{somatic_display_term} : | |
| 902 $VARIATION_CLASSES{$so_term}->{display_term}; | |
| 903 } | |
| 904 | |
| 905 return $self->{class_display_term}; | |
| 906 } | |
| 907 | |
| 908 =head2 class_SO_term | |
| 909 | |
| 910 Args[1] : (optional) class_SO_term - the SO term for the class of this variation feature | |
| 911 Args[2] : (optional) no_db - don't use the term from the database, always calculate it from the allele string | |
| 912 (used by the ensembl variation pipeline) | |
| 913 Example : my $SO_variation_class = $vf->class_SO_term() | |
| 914 Description : Get/set the SO term for the class of this variation | |
| 915 ReturnType : string | |
| 916 Exceptions : none | |
| 917 Caller : General | |
| 918 Status : At Risk | |
| 919 | |
| 920 =cut | |
| 921 | |
| 922 sub class_SO_term { | |
| 923 my ($self, $class_SO_term, $no_db) = @_; | |
| 924 | |
| 925 $self->{class_SO_term} = $class_SO_term if $class_SO_term; | |
| 926 | |
| 927 if ($no_db || !$self->{class_SO_term}) { | |
| 928 $self->{class_SO_term} = SO_variation_class($self->allele_string); | |
| 929 } | |
| 930 | |
| 931 return $self->{class_SO_term}; | |
| 932 } | |
| 933 | |
| 934 =head2 get_all_validation_states | |
| 935 | |
| 936 Arg [1] : none | |
| 937 Example : my @vstates = @{$vf->get_all_validation_states()}; | |
| 938 Description: Retrieves all validation states for this variationFeature. Current | |
| 939 possible validation statuses are 'cluster','freq','submitter', | |
| 940 'doublehit', 'hapmap' | |
| 941 Returntype : reference to list of strings | |
| 942 Exceptions : none | |
| 943 Caller : general | |
| 944 Status : At Risk | |
| 945 | |
| 946 =cut | |
| 947 | |
| 948 sub get_all_validation_states { | |
| 949 my $self = shift; | |
| 950 return Bio::EnsEMBL::Variation::Utils::Sequence::get_all_validation_states($self->{'validation_code'}); | |
| 951 } | |
| 952 | |
| 953 | |
| 954 =head2 add_validation_state | |
| 955 | |
| 956 Arg [1] : string $state | |
| 957 Example : $vf->add_validation_state('cluster'); | |
| 958 Description: Adds a validation state to this variation. | |
| 959 Returntype : none | |
| 960 Exceptions : warning if validation state is not a recognised type | |
| 961 Caller : general | |
| 962 Status : At Risk | |
| 963 | |
| 964 =cut | |
| 965 | |
| 966 sub add_validation_state { | |
| 967 Bio::EnsEMBL::Variation::Utils::Sequence::add_validation_state(@_); | |
| 968 } | |
| 969 | |
| 970 =head2 source | |
| 971 | |
| 972 Arg [1] : string $source_name (optional) - the new value to set the source attribute to | |
| 973 Example : $source = $vf->source; | |
| 974 Description: Getter/Setter for the source attribute | |
| 975 Returntype : the source name as a string, | |
| 976 Exceptions : none | |
| 977 Caller : general | |
| 978 Status : At Risk | |
| 979 | |
| 980 =cut | |
| 981 | |
| 982 sub source { | |
| 983 my ($self, $source) = @_; | |
| 984 $self->{source} = $source if $source; | |
| 985 return $self->{source}; | |
| 986 } | |
| 987 | |
| 988 =head2 source_version | |
| 989 | |
| 990 Arg [1] : number $source_version (optional) - the new value to set the source version attribute to | |
| 991 Example : $source_version = $vf->source_version; | |
| 992 Description: Getter/Setter for the source version attribute | |
| 993 Returntype : the source version as a number | |
| 994 Exceptions : none | |
| 995 Caller : general | |
| 996 Status : At Risk | |
| 997 | |
| 998 =cut | |
| 999 | |
| 1000 sub source_version { | |
| 1001 my ($self, $source_version) = @_; | |
| 1002 $self->{source_version} = $source_version if $source_version; | |
| 1003 return $self->{source_version}; | |
| 1004 } | |
| 1005 | |
| 1006 =head2 is_somatic | |
| 1007 | |
| 1008 Arg [1] : boolean $is_somatic (optional) | |
| 1009 The new value to set the is_somatic flag to | |
| 1010 Example : $is_somatic = $vf->is_somatic | |
| 1011 Description: Getter/Setter for the is_somatic flag, which identifies this variation feature as either somatic or germline | |
| 1012 Returntype : boolean | |
| 1013 Exceptions : none | |
| 1014 Caller : general | |
| 1015 Status : Stable | |
| 1016 | |
| 1017 =cut | |
| 1018 | |
| 1019 sub is_somatic { | |
| 1020 my ($self, $is_somatic) = @_; | |
| 1021 $self->{'is_somatic'} = $is_somatic if defined $is_somatic; | |
| 1022 return $self->{'is_somatic'}; | |
| 1023 } | |
| 1024 | |
| 1025 =head2 is_tagged | |
| 1026 | |
| 1027 Args : None | |
| 1028 Example : my $populations = $vf->is_tagged(); | |
| 1029 Description : If the variation is tagged in any population, returns an array with the populations where the variation_feature | |
| 1030 is tagged (using a criteria of r2 > 0.99). Otherwise, returns null | |
| 1031 ReturnType : list of Bio::EnsEMBL::Variation::Population | |
| 1032 Exceptions : none | |
| 1033 Caller : general | |
| 1034 Status : At Risk | |
| 1035 | |
| 1036 =cut | |
| 1037 | |
| 1038 sub is_tagged{ | |
| 1039 my $self = shift; | |
| 1040 | |
| 1041 if ($self->adaptor()){ | |
| 1042 my $population_adaptor = $self->adaptor()->db()->get_PopulationAdaptor(); | |
| 1043 return $population_adaptor->fetch_tagged_Population($self); | |
| 1044 } | |
| 1045 } | |
| 1046 | |
| 1047 =head2 is_tag | |
| 1048 | |
| 1049 Args : None | |
| 1050 Example : my $populations = $vf->is_tag(); | |
| 1051 Description : Returns an array of populations in which this variation feature | |
| 1052 is a tag SNP. | |
| 1053 ReturnType : list of Bio::EnsEMBL::Variation::Population | |
| 1054 Exceptions : none | |
| 1055 Caller : general | |
| 1056 Status : At Risk | |
| 1057 | |
| 1058 =cut | |
| 1059 | |
| 1060 sub is_tag{ | |
| 1061 my $self = shift; | |
| 1062 | |
| 1063 if ($self->adaptor()){ | |
| 1064 my $population_adaptor = $self->adaptor()->db()->get_PopulationAdaptor(); | |
| 1065 return $population_adaptor->fetch_tag_Population($self); | |
| 1066 } | |
| 1067 } | |
| 1068 | |
| 1069 =head2 get_all_tagged_VariationFeatures | |
| 1070 | |
| 1071 Args : Bio::EnsEMBL::Variation::Population $pop (optional) | |
| 1072 Example : my $vfs = $vf->get_all_tagged_VariationFeatures(); | |
| 1073 Description : Returns an arrayref of variation features that are tagged by | |
| 1074 this variation feature, in the population $pop if specified. | |
| 1075 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature | |
| 1076 Exceptions : none | |
| 1077 Caller : general | |
| 1078 Status : At Risk | |
| 1079 | |
| 1080 =cut | |
| 1081 | |
| 1082 sub get_all_tagged_VariationFeatures { | |
| 1083 return $_[0]->adaptor->fetch_all_tagged_by_VariationFeature(@_); | |
| 1084 } | |
| 1085 | |
| 1086 =head2 get_all_tag_VariationFeatures | |
| 1087 | |
| 1088 Args : Bio::EnsEMBL::Variation::Population $pop (optional) | |
| 1089 Example : my $vfs = $vf->get_all_tag_VariationFeatures(); | |
| 1090 Description : Returns an arrayref of variation features that tag this | |
| 1091 variation feature, in the population $pop if specified. | |
| 1092 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature | |
| 1093 Exceptions : none | |
| 1094 Caller : general | |
| 1095 Status : At Risk | |
| 1096 | |
| 1097 =cut | |
| 1098 | |
| 1099 sub get_all_tag_VariationFeatures { | |
| 1100 return $_[0]->adaptor->fetch_all_tags_by_VariationFeature(@_); | |
| 1101 } | |
| 1102 | |
| 1103 =head2 get_all_tag_and_tagged_VariationFeatures | |
| 1104 | |
| 1105 Args : Bio::EnsEMBL::Variation::Population $pop (optional) | |
| 1106 Example : my $vfs = $vf->get_all_tag_and_tagged_VariationFeatures(); | |
| 1107 Description : Returns an arrayref of variation features that either tag or are | |
| 1108 tagged by this variation feature, in the population $pop if | |
| 1109 specified. | |
| 1110 ReturnType : list of Bio::EnsEMBL::Variation::VariationFeature | |
| 1111 Exceptions : none | |
| 1112 Caller : general | |
| 1113 Status : At Risk | |
| 1114 | |
| 1115 =cut | |
| 1116 | |
| 1117 sub get_all_tag_and_tagged_VariationFeatures { | |
| 1118 return $_[0]->adaptor->fetch_all_tags_and_tagged_by_VariationFeature(@_); | |
| 1119 } | |
| 1120 | |
| 1121 | |
| 1122 | |
| 1123 =head2 is_reference | |
| 1124 Arg : none | |
| 1125 Example : my $reference = $vf->is_reference() | |
| 1126 Description: Returns 1 if VF's slice is a reference slice else 0 | |
| 1127 Returntype : int | |
| 1128 Caller : general | |
| 1129 Status : At Risk | |
| 1130 | |
| 1131 =cut | |
| 1132 | |
| 1133 sub is_reference { | |
| 1134 my ($self) = @_; | |
| 1135 my $slice = $self->slice; | |
| 1136 | |
| 1137 if ( !defined( $self->{'is_reference'} ) ) { | |
| 1138 $self->{'is_reference'} = $slice->is_reference(); | |
| 1139 } | |
| 1140 | |
| 1141 return $self->{'is_reference'}; | |
| 1142 } | |
| 1143 | |
| 1144 =head2 convert_to_SNP | |
| 1145 | |
| 1146 Args : None | |
| 1147 Example : my $snp = $vf->convert_to_SNP() | |
| 1148 Description : Creates a Bio::EnsEMBL::SNP object from Bio::EnsEMBL::VariationFeature. Mainly used for | |
| 1149 backwards compatibility | |
| 1150 ReturnType : Bio::EnsEMBL::SNP | |
| 1151 Exceptions : None | |
| 1152 Caller : general | |
| 1153 Status : At Risk | |
| 1154 | |
| 1155 =cut | |
| 1156 | |
| 1157 sub convert_to_SNP{ | |
| 1158 my $self = shift; | |
| 1159 | |
| 1160 require Bio::EnsEMBL::SNP; #for backwards compatibility. It will only be loaded if the function is called | |
| 1161 | |
| 1162 my $snp = Bio::EnsEMBL::SNP->new_fast({ | |
| 1163 'dbID' => $self->variation()->dbID(), | |
| 1164 '_gsf_start' => $self->start, | |
| 1165 '_gsf_end' => $self->end, | |
| 1166 '_snp_strand' => $self->strand, | |
| 1167 '_gsf_score' => 1, | |
| 1168 '_type' => $self->var_class, | |
| 1169 '_validated' => $self->get_all_validation_states(), | |
| 1170 'alleles' => $self->allele_string, | |
| 1171 '_ambiguity_code' => $self->ambig_code, | |
| 1172 '_mapweight' => $self->map_weight, | |
| 1173 '_source' => $self->source | |
| 1174 }); | |
| 1175 return $snp; | |
| 1176 } | |
| 1177 | |
| 1178 =head2 get_all_LD_values | |
| 1179 | |
| 1180 Args : none | |
| 1181 Description : returns all LD values for this variation feature. This function will only work correctly if the variation | |
| 1182 database has been attached to the core database. | |
| 1183 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer | |
| 1184 Exceptions : none | |
| 1185 Caller : snpview | |
| 1186 Status : At Risk | |
| 1187 : Variation database is under development. | |
| 1188 | |
| 1189 =cut | |
| 1190 | |
| 1191 sub get_all_LD_values{ | |
| 1192 my $self = shift; | |
| 1193 | |
| 1194 if ($self->adaptor()){ | |
| 1195 my $ld_adaptor = $self->adaptor()->db()->get_LDFeatureContainerAdaptor(); | |
| 1196 return $ld_adaptor->fetch_by_VariationFeature($self); | |
| 1197 } | |
| 1198 return {}; | |
| 1199 } | |
| 1200 | |
| 1201 =head2 get_all_LD_Populations | |
| 1202 | |
| 1203 Args : none | |
| 1204 Description : returns a list of populations that could produces LD values | |
| 1205 for this VariationFeature | |
| 1206 ReturnType : listref of Bio::EnsEMBL::Variation::Population objects | |
| 1207 Exceptions : none | |
| 1208 Caller : snpview | |
| 1209 Status : At Risk | |
| 1210 | |
| 1211 =cut | |
| 1212 | |
| 1213 sub get_all_LD_Populations{ | |
| 1214 my $self = shift; | |
| 1215 | |
| 1216 my $pa = $self->adaptor->db->get_PopulationAdaptor; | |
| 1217 return [] unless $pa; | |
| 1218 | |
| 1219 my $ld_pops = $pa->fetch_all_LD_Populations; | |
| 1220 return [] unless $ld_pops; | |
| 1221 | |
| 1222 my $sth = $self->adaptor->db->prepare(qq{ | |
| 1223 SELECT ip.population_sample_id, c.seq_region_start, c.genotypes | |
| 1224 FROM compressed_genotype_region c, individual_population ip | |
| 1225 WHERE c.sample_id = ip.individual_sample_id | |
| 1226 AND c.seq_region_id = ? | |
| 1227 AND c.seq_region_start < ? | |
| 1228 AND c.seq_region_end > ? | |
| 1229 }); | |
| 1230 | |
| 1231 my $this_vf_start = $self->seq_region_start; | |
| 1232 | |
| 1233 $sth->bind_param(1, $self->feature_Slice->get_seq_region_id); | |
| 1234 $sth->bind_param(2, $self->seq_region_end); | |
| 1235 $sth->bind_param(3, $this_vf_start); | |
| 1236 | |
| 1237 $sth->execute; | |
| 1238 | |
| 1239 my ($sample_id, $seq_region_start, $genotypes); | |
| 1240 $sth->bind_columns(\$sample_id, \$seq_region_start, \$genotypes); | |
| 1241 | |
| 1242 my %have_genotypes = (); | |
| 1243 | |
| 1244 while($sth->fetch()) { | |
| 1245 | |
| 1246 next if $have_genotypes{$sample_id}; | |
| 1247 | |
| 1248 if($seq_region_start == $this_vf_start) { | |
| 1249 $have_genotypes{$sample_id} = 1; | |
| 1250 next; | |
| 1251 } | |
| 1252 | |
| 1253 my @genotypes = unpack '(www)*', $genotypes; | |
| 1254 my $gt_start = $seq_region_start; | |
| 1255 | |
| 1256 while(my( $var_id, $gt_code, $gap ) = splice @genotypes, 0, 3 ) { | |
| 1257 if($gt_start == $this_vf_start) { | |
| 1258 $have_genotypes{$sample_id} = 1; | |
| 1259 last; | |
| 1260 } | |
| 1261 $gt_start += $gap + 1 if defined $gap; | |
| 1262 } | |
| 1263 } | |
| 1264 | |
| 1265 my @final_list = grep {$have_genotypes{$_->dbID}} @$ld_pops; | |
| 1266 | |
| 1267 return \@final_list; | |
| 1268 } | |
| 1269 | |
| 1270 =head2 get_all_sources | |
| 1271 | |
| 1272 Args : none | |
| 1273 Example : my @sources = @{$vf->get_all_sources()}; | |
| 1274 Description : returns a list of all the sources for this | |
| 1275 VariationFeature | |
| 1276 ReturnType : reference to list of strings | |
| 1277 Exceptions : none | |
| 1278 Caller : general | |
| 1279 Status : At Risk | |
| 1280 : Variation database is under development. | |
| 1281 =cut | |
| 1282 | |
| 1283 sub get_all_sources{ | |
| 1284 my $self = shift; | |
| 1285 | |
| 1286 my @sources; | |
| 1287 my %sources; | |
| 1288 if ($self->adaptor()){ | |
| 1289 map {$sources{$_}++} @{$self->adaptor()->get_all_synonym_sources($self)}; | |
| 1290 $sources{$self->source}++; | |
| 1291 @sources = keys %sources; | |
| 1292 return \@sources; | |
| 1293 } | |
| 1294 return \@sources; | |
| 1295 } | |
| 1296 | |
| 1297 =head2 ref_allele_string | |
| 1298 | |
| 1299 Args : none | |
| 1300 Example : $reference_allele_string = $self->ref_allele_string() | |
| 1301 Description: Getter for the reference allele_string, always the first. | |
| 1302 Returntype : string | |
| 1303 Exceptions : none | |
| 1304 Caller : general | |
| 1305 Status : Stable | |
| 1306 | |
| 1307 =cut | |
| 1308 | |
| 1309 sub ref_allele_string{ | |
| 1310 my $self = shift; | |
| 1311 | |
| 1312 my @alleles = split /[\|\\\/]/,$self->allele_string; | |
| 1313 return $alleles[0]; | |
| 1314 } | |
| 1315 | |
| 1316 | |
| 1317 =head2 get_all_VariationSets | |
| 1318 | |
| 1319 Args : none | |
| 1320 Example : my @vs = @{$vf->get_all_VariationSets()}; | |
| 1321 Description : returns a reference to a list of all the VariationSets this | |
| 1322 VariationFeature is a member of | |
| 1323 ReturnType : reference to list of Bio::EnsEMBL::Variation::VariationSets | |
| 1324 Exceptions : if no adaptor is attached to this object | |
| 1325 Caller : general | |
| 1326 Status : At Risk | |
| 1327 =cut | |
| 1328 | |
| 1329 sub get_all_VariationSets { | |
| 1330 my $self = shift; | |
| 1331 | |
| 1332 if (!$self->adaptor()) { | |
| 1333 throw('An adaptor must be attached in order to get all variation sets'); | |
| 1334 } | |
| 1335 my $vs_adaptor = $self->adaptor()->db()->get_VariationSetAdaptor(); | |
| 1336 my $variation_sets = $vs_adaptor->fetch_all_by_Variation($self->variation()); | |
| 1337 | |
| 1338 return $variation_sets; | |
| 1339 } | |
| 1340 | |
| 1341 | |
| 1342 =head2 get_all_Alleles | |
| 1343 | |
| 1344 Args : none | |
| 1345 Example : @alleles = @{$vf->get_all_Alleles} | |
| 1346 Description: Gets all Allele objects from the underlying variation object, | |
| 1347 with reference alleles first. | |
| 1348 Returntype : listref of Bio::EnsEMBL::Variation::Allele objects | |
| 1349 Exceptions : none | |
| 1350 Caller : general | |
| 1351 Status : Stable | |
| 1352 | |
| 1353 =cut | |
| 1354 | |
| 1355 sub get_all_Alleles{ | |
| 1356 my $self = shift; | |
| 1357 | |
| 1358 my @alleles = @{$self->variation->get_all_Alleles}; | |
| 1359 | |
| 1360 # put all alleles in a hash | |
| 1361 my %order = (); | |
| 1362 foreach my $allele(@alleles) { | |
| 1363 $order{$allele->allele} = 1; | |
| 1364 } | |
| 1365 | |
| 1366 $order{$self->ref_allele_string} = 2; | |
| 1367 | |
| 1368 # now sort them by population, submitter, allele | |
| 1369 my @new_alleles = sort { | |
| 1370 ($a->population ? $a->population->name : "") cmp ($b->population ? $b->population->name : "") || | |
| 1371 ($a->subsnp ? $a->subsnp : "") cmp ($b->subsnp ? $b->subsnp : "") || | |
| 1372 $order{$b->allele} <=> $order{$a->allele} | |
| 1373 } @alleles; | |
| 1374 | |
| 1375 return \@new_alleles; | |
| 1376 } | |
| 1377 | |
| 1378 | |
| 1379 =head2 get_all_PopulationGenotypes | |
| 1380 | |
| 1381 Args : none | |
| 1382 Example : @pop_gens = @{$vf->get_all_PopulationGenotypes} | |
| 1383 Description: Gets all PopulationGenotype objects from the underlying variation | |
| 1384 object, with reference genotypes first. | |
| 1385 Returntype : listref of Bio::EnsEMBL::Variation::PopulationGenotype objects | |
| 1386 Exceptions : none | |
| 1387 Caller : general | |
| 1388 Status : Stable | |
| 1389 | |
| 1390 =cut | |
| 1391 | |
| 1392 sub get_all_PopulationGenotypes{ | |
| 1393 my $self = shift; | |
| 1394 | |
| 1395 my @gens = @{$self->variation->get_all_PopulationGenotypes}; | |
| 1396 | |
| 1397 # put all alleles in a hash | |
| 1398 my %order = (); | |
| 1399 foreach my $gen(@gens) { | |
| 1400 # homs low priority, hets higher | |
| 1401 $order{$gen->allele1.$gen->allele2} = ($gen->allele1 eq $gen->allele2 ? 1 : 2); | |
| 1402 } | |
| 1403 | |
| 1404 # ref hom highest priority | |
| 1405 $order{$self->ref_allele_string x 2} = 3; | |
| 1406 | |
| 1407 # now sort them by population, submitter, genotype | |
| 1408 my @new_gens = sort { | |
| 1409 ($a->population ? $a->population->name : "") cmp ($b->population ? $b->population->name : "") || | |
| 1410 ($a->subsnp ? $a->subsnp : "") cmp ($b->subsnp ? $b->subsnp : "") || | |
| 1411 $order{$b->allele1.$b->allele2} <=> $order{$a->allele1.$a->allele2} | |
| 1412 } @gens; | |
| 1413 | |
| 1414 return \@new_gens; | |
| 1415 } | |
| 1416 | |
| 1417 =head2 get_all_hgvs_notations | |
| 1418 | |
| 1419 Arg [1] : Bio::EnsEMBL::Feature $ref_feature (optional) | |
| 1420 Get the HGVS notation of this VariationFeature relative to the slice it is on. If an optional reference feature is supplied, returns the coordinates | |
| 1421 relative to this feature. | |
| 1422 Arg [2] : string (Optional) | |
| 1423 Indicate whether the HGVS notation should be reported in genomic coordinates or cDNA coordinates. | |
| 1424 'g' -> Genomic position numbering | |
| 1425 'c' -> cDNA position numbering | |
| 1426 'p' -> protein position numbering | |
| 1427 Arg [3] : string (Optional) | |
| 1428 A name to use for the reference can be supplied. By default the name returned by the display_id() method of the reference feature will be used. | |
| 1429 Arg [4] : string (Optional) | |
| 1430 Return just the HGVS notation corresponding to this allele | |
| 1431 | |
| 1432 Example : my $vf = $variation_feature_adaptor->fetch_by_dbID(565770); | |
| 1433 my $tr = $transcript_adaptor->fetch_by_stable_id('ENST00000335295'); | |
| 1434 my $hgvs = $vf->get_all_hgvs_notations($tr,'p'); | |
| 1435 while (my ($allele,$hgvs_str) = each(%{$hgvs})) { | |
| 1436 print "Allele $allele :\t$hgvs_str\n"; # Will print 'Allele - : ENSP00000333994.3:p.Val34_Tyr36delinsAsp' | |
| 1437 } | |
| 1438 | |
| 1439 Description: Returns a reference to a hash with the allele as key and a string with the HGVS notation of this VariationFeature as value. By default uses the | |
| 1440 slice it is plcaed on as reference but a different reference feature can be supplied. | |
| 1441 Returntype : Hash reference | |
| 1442 Exceptions : Throws exception if VariationFeature can not be described relative to the feature_Slice of the supplied reference feature | |
| 1443 Caller : general | |
| 1444 Status : Experimental | |
| 1445 | |
| 1446 =cut | |
| 1447 sub get_all_hgvs_notations { | |
| 1448 | |
| 1449 my $self = shift; | |
| 1450 my $ref_feature = shift; | |
| 1451 my $numbering = shift; ## HGVS system g=genomic, c=coding, p=protein | |
| 1452 my $reference_name = shift; ## If the ref_feature is a slice, this is over-written | |
| 1453 my $use_allele = shift; ## optional single allele to check | |
| 1454 my $transcript_variation = shift; ## optional transcript variation - looked up for c|p if not supplied | |
| 1455 | |
| 1456 my %hgvs; | |
| 1457 | |
| 1458 ##### don't get them for HGMD mutations or CNV probes | |
| 1459 return {} if ($self->allele_string =~ /INS|DEL|HGMD|CNV/ig || $self->var_class() =~ /microsat/i); | |
| 1460 ##### By default, use genomic position numbering | |
| 1461 $numbering ||= 'g'; | |
| 1462 | |
| 1463 # If no reference feature is supplied, set it to the slice underlying this VariationFeature | |
| 1464 $ref_feature ||= $self->slice(); | |
| 1465 | |
| 1466 # Special parsing for LRG | |
| 1467 if (defined $reference_name && $reference_name =~ /^LRG_/) { | |
| 1468 # Remove version | |
| 1469 if ($reference_name =~ /(.+)\.\d+$/) { | |
| 1470 $reference_name = $1; | |
| 1471 } | |
| 1472 } | |
| 1473 | |
| 1474 ### Check/get transcript variation available for protein & coding | |
| 1475 if ($ref_feature->isa('Bio::EnsEMBL::Transcript')) { | |
| 1476 | |
| 1477 # Get a TranscriptVariation object for this VariationFeature and the supplied Transcript if it wasn't passed in the call | |
| 1478 $transcript_variation = $self->get_all_TranscriptVariations([$ref_feature])->[0] if (!defined($transcript_variation)); | |
| 1479 | |
| 1480 ##### call new TranscriptVariationAllele method for each allele | |
| 1481 } | |
| 1482 | |
| 1483 | |
| 1484 if ($numbering eq 'p') { | |
| 1485 | |
| 1486 #### If there is no transcript variation supplied and the variant | |
| 1487 #### is not in the translated region there is no protein change | |
| 1488 return {} if (!defined($transcript_variation) || | |
| 1489 !defined($transcript_variation->translation_start()) || | |
| 1490 !defined($transcript_variation->translation_end())); | |
| 1491 | |
| 1492 ##### call TranscriptVariationAllele method for each allele | |
| 1493 foreach my $transcriptVariationAllele (@{$transcript_variation->get_all_alternate_TranscriptVariationAlleles()} ){ | |
| 1494 | |
| 1495 my $allele_string = $transcriptVariationAllele->feature_seq(); | |
| 1496 my $hgvs_full_string = $transcriptVariationAllele->hgvs_protein(); | |
| 1497 | |
| 1498 if($allele_string ne (split/\//,$self->allele_string())[1] ){ | |
| 1499 reverse_comp(\$allele_string); ### hash returned relative to input variation feature strand regardless of transcript strand | |
| 1500 } | |
| 1501 $hgvs{$allele_string} = $hgvs_full_string ; | |
| 1502 } | |
| 1503 return \%hgvs; | |
| 1504 } | |
| 1505 | |
| 1506 elsif ( $numbering =~ m/c|n/) { ### coding or non- coding transcript | |
| 1507 | |
| 1508 return {} if (!defined $transcript_variation); | |
| 1509 | |
| 1510 foreach my $transcriptVariationAllele (@{$transcript_variation->get_all_alternate_TranscriptVariationAlleles()} ){ | |
| 1511 | |
| 1512 my $allele_string = $transcriptVariationAllele->feature_seq(); | |
| 1513 my $hgvs_full_string = $transcriptVariationAllele->hgvs_transcript(); | |
| 1514 | |
| 1515 if($allele_string ne (split/\//,$self->allele_string())[1] ){ | |
| 1516 reverse_comp(\$allele_string); ### hash returned relative to input variation feature strand regardless of transcript strand | |
| 1517 } | |
| 1518 $hgvs{$allele_string} = $hgvs_full_string ; | |
| 1519 } | |
| 1520 return \%hgvs; | |
| 1521 } | |
| 1522 | |
| 1523 elsif( $numbering =~ m/g/ ) { | |
| 1524 #### handling both alleles together locally for genomic class | |
| 1525 my $hgvs = $self->hgvs_genomic($ref_feature, $reference_name, $use_allele ); | |
| 1526 return $hgvs; | |
| 1527 } | |
| 1528 else{ | |
| 1529 warn("HGVS notation is not available for coordinate system: $numbering"); | |
| 1530 return {}; | |
| 1531 } | |
| 1532 } | |
| 1533 | |
| 1534 ### HGVS short flanking sequence required to check if HGVS variant class should be dup rather than ins | |
| 1535 sub _get_flank_seq{ | |
| 1536 | |
| 1537 my $self = shift; | |
| 1538 | |
| 1539 # Get the underlying slice and sequence | |
| 1540 my $ref_slice = $self->slice(); | |
| 1541 | |
| 1542 my @allele = split(/\//,$self->allele_string()); | |
| 1543 #### add flank at least as long as longest allele to allow checking | |
| 1544 my $add_length = 0; | |
| 1545 | |
| 1546 foreach my $al(@allele){ ## may have >2 alleles | |
| 1547 if(length($al) > $add_length){ | |
| 1548 $add_length = length $al ; | |
| 1549 } | |
| 1550 } | |
| 1551 $add_length++; | |
| 1552 | |
| 1553 my $ref_start = $add_length ; | |
| 1554 my $ref_end = $add_length + ($self->end() - $self->start()); | |
| 1555 my $seq_start = ($self->start() - $ref_start); | |
| 1556 | |
| 1557 # Should we be at the beginning of the sequence, adjust the coordinates to not cause an exception | |
| 1558 if ($seq_start < 0) { | |
| 1559 $ref_start += $seq_start; | |
| 1560 $ref_end += $seq_start; | |
| 1561 $seq_start = 0; | |
| 1562 } | |
| 1563 | |
| 1564 my $flank_seq = $ref_slice->subseq($seq_start +1 , $seq_start + $ref_end, 1); | |
| 1565 | |
| 1566 | |
| 1567 return ($flank_seq, $ref_start, $ref_end ); | |
| 1568 } | |
| 1569 | |
| 1570 #### format HGVS hash for genomic reference sequence | |
| 1571 ### Input: Variation feature | |
| 1572 ### Output: $hgvs_notation hash | |
| 1573 | |
| 1574 | |
| 1575 | |
| 1576 =head2 hgvs_genomic | |
| 1577 | |
| 1578 Arg [1] : Bio::EnsEMBL::Feature $ref_feature (optional) | |
| 1579 Get the HGVS notation of this VariationFeature relative to the slice it is on. If an optional reference feature is supplied, returns the coordinates | |
| 1580 relative to this feature. | |
| 1581 Arg [2] : string (Optional) | |
| 1582 A name to use for the reference can be supplied. By default the name returned by the display_id() method of the reference feature will be used. | |
| 1583 Arg [4] : string (Optional) | |
| 1584 Return just the HGVS notation corresponding to this allele | |
| 1585 | |
| 1586 | |
| 1587 | |
| 1588 Description: Returns a reference to a hash with the allele as key and a string with the genomic HGVS notation of this VariationFeature as value. By default uses the | |
| 1589 slice it is placed on as reference but a different reference feature can be supplied. | |
| 1590 Returntype : Hash reference | |
| 1591 Exceptions : Throws exception if VariationFeature can not be described relative to the feature_Slice of the supplied reference feature | |
| 1592 Caller : general | |
| 1593 Status : Experimental | |
| 1594 | |
| 1595 =cut | |
| 1596 sub hgvs_genomic{ | |
| 1597 | |
| 1598 my $self = shift; | |
| 1599 my $ref_feature = shift; ## can be a transcript | |
| 1600 my $reference_name = shift; ## If the ref_feature is a slice, this is over-written | |
| 1601 my $use_allele = shift; ## optional single allele to check | |
| 1602 | |
| 1603 my %hgvs; | |
| 1604 ########set up sequence reference | |
| 1605 my $ref_slice; #### need this for genomic notation | |
| 1606 | |
| 1607 if ($ref_feature->isa('Bio::EnsEMBL::Slice')) { | |
| 1608 $ref_slice = $ref_feature; | |
| 1609 } | |
| 1610 else { | |
| 1611 # get slice if not supplied | |
| 1612 $ref_slice = $ref_feature->feature_Slice; | |
| 1613 } | |
| 1614 | |
| 1615 # Create new VariationFeature on the slice of the reference feature (unless the reference feature is the slice the VF is on) | |
| 1616 my $tr_vf; | |
| 1617 if ( $self->slice->coord_system->name() eq "chromosome") { | |
| 1618 $tr_vf = $self; | |
| 1619 } | |
| 1620 else { | |
| 1621 $tr_vf = $self->transfer($ref_slice); | |
| 1622 } | |
| 1623 # Return undef if this VariationFeature could not be transferred | |
| 1624 return {} if (!defined($tr_vf)); | |
| 1625 | |
| 1626 | |
| 1627 # Return undef if this VariationFeature does not fall within the supplied feature. | |
| 1628 return {} if ($tr_vf->start < 1 || | |
| 1629 $tr_vf->end < 1 || | |
| 1630 $tr_vf->start > ($ref_feature->end - $ref_feature->start + 1) || | |
| 1631 $tr_vf->end > ($ref_feature->end - $ref_feature->start + 1)); | |
| 1632 | |
| 1633 ######### define reference sequence name ################################### | |
| 1634 | |
| 1635 # If the reference is a slice, use the seq_region_name as identifier | |
| 1636 $reference_name ||= $ref_feature->seq_region_name if ($ref_feature->isa('Bio::EnsEMBL::Slice')); | |
| 1637 | |
| 1638 # Use the feature's display id as reference name unless specified otherwise. | |
| 1639 # If the feature is a transcript or translation, append the version number as well | |
| 1640 $reference_name ||= $ref_feature->display_id() . ($ref_feature->isa('Bio::EnsEMBL::Transcript') && | |
| 1641 $ref_feature->display_id !~ /\.\d+$/ ? '.' . $ref_feature->version() : ''); | |
| 1642 | |
| 1643 | |
| 1644 ##### get short flank sequence for duplication checking & adjusted variation coordinates | |
| 1645 my ($ref_seq, $ref_start, $ref_end) = _get_flank_seq($tr_vf);; | |
| 1646 | |
| 1647 foreach my $allele ( split(/\//,$tr_vf->allele_string()) ) { | |
| 1648 | |
| 1649 ## If a particular allele was requested, ignore others | |
| 1650 next if (defined($use_allele) && $allele ne $use_allele); | |
| 1651 | |
| 1652 # Skip if the allele contains weird characters | |
| 1653 next if $allele =~ m/[^ACGT\-]/ig; | |
| 1654 | |
| 1655 ##### vf strand is relative to slice - if transcript feature slice, may need complimenting | |
| 1656 my $check_allele = $allele; | |
| 1657 if( $self->strand() <0 && $ref_slice->strand >0 || | |
| 1658 $self->strand() >0 && $ref_slice->strand < 0 | |
| 1659 ){ | |
| 1660 reverse_comp(\$check_allele); | |
| 1661 if($DEBUG ==1){print "***************Flipping alt allele $allele => $check_allele to match coding strand\n";} | |
| 1662 } | |
| 1663 | |
| 1664 ## work out chrom coord for hgvs string if transcript slice supplied | |
| 1665 my ($chr_start,$chr_end); | |
| 1666 if ( $tr_vf->slice->is_toplevel() == 1) { | |
| 1667 $chr_start = $tr_vf->seq_region_start(); | |
| 1668 $chr_end = $tr_vf->seq_region_end(); | |
| 1669 } | |
| 1670 else{ | |
| 1671 ## add feature start to start of var-in-feature | |
| 1672 if( $tr_vf->seq_region_start() < $tr_vf->seq_region_end()){ | |
| 1673 $chr_start = $tr_vf->start() + $tr_vf->seq_region_start() ; | |
| 1674 $chr_end = $tr_vf->end() + $tr_vf->seq_region_start() ; | |
| 1675 } | |
| 1676 else{ | |
| 1677 $chr_start = $tr_vf->seq_region_start() - $tr_vf->start() ; | |
| 1678 $chr_end = $tr_vf->seq_region_start() - $tr_vf->end() ; | |
| 1679 } | |
| 1680 } | |
| 1681 | |
| 1682 | |
| 1683 my $hgvs_notation = hgvs_variant_notation( $check_allele, ## alt allele in refseq strand orientation | |
| 1684 $ref_seq, ## substring of slice for ref allele extraction | |
| 1685 $ref_start, ## start on substring of slice for ref allele extraction | |
| 1686 $ref_end, | |
| 1687 $chr_start, ## start wrt seq region slice is on (eg. chrom) | |
| 1688 $chr_end, | |
| 1689 ""); | |
| 1690 | |
| 1691 | |
| 1692 # Skip if e.g. allele is identical to the reference slice | |
| 1693 next if (!defined($hgvs_notation)); | |
| 1694 | |
| 1695 # Add the name of the reference | |
| 1696 $hgvs_notation->{'ref_name'} = $reference_name; | |
| 1697 # Add the position_numbering scheme | |
| 1698 $hgvs_notation->{'numbering'} = 'g'; | |
| 1699 | |
| 1700 # Construct the HGVS notation from the data in the hash | |
| 1701 $hgvs_notation->{'hgvs'} = format_hgvs_string( $hgvs_notation); | |
| 1702 | |
| 1703 $hgvs{$allele} = $hgvs_notation->{'hgvs'}; | |
| 1704 } | |
| 1705 return \%hgvs; | |
| 1706 | |
| 1707 } | |
| 1708 | |
| 1709 | |
| 1710 | |
| 1711 sub length { | |
| 1712 my $self = shift; | |
| 1713 return $self->{'end'} - $self->{'start'} + 1; | |
| 1714 } | |
| 1715 | |
| 1716 =head2 summary_as_hash | |
| 1717 | |
| 1718 Example : $feature_summary = $feature->summary_as_hash(); | |
| 1719 Description : Extends Feature::summary_as_hash | |
| 1720 Retrieves a summary of this VariationFeature object. | |
| 1721 | |
| 1722 Returns : hashref of descriptive strings | |
| 1723 | |
| 1724 =cut | |
| 1725 | |
| 1726 sub summary_as_hash { | |
| 1727 my $self = shift; | |
| 1728 my $summary_ref = $self->SUPER::summary_as_hash; | |
| 1729 $summary_ref->{'consequence_type'} = $self->display_consequence; | |
| 1730 my @allele_list = split(/\//,$self->allele_string); | |
| 1731 $summary_ref->{'alt_alleles'} = \@allele_list; | |
| 1732 return $summary_ref; | |
| 1733 } | |
| 1734 | |
| 1735 1; |
