Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/SeqFeature.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL::SeqFeature - Ensembl specific sequence feature. | |
| 24 | |
| 25 =head1 DESCRIPTION | |
| 26 | |
| 27 Do not use this module if you can avoid it. It has been replaced by | |
| 28 Bio::EnsEMBL::Feature. This module has a long history of usage but has | |
| 29 become very bloated, and quite unweildy. It was decided to replace | |
| 30 it completely with a smaller, light-weight feature class rather than | |
| 31 attempting to refactor this class, and maintain strict backwards | |
| 32 compatibility. | |
| 33 | |
| 34 Part of the complexity of this class was in its extensive | |
| 35 inheritance. As an example the following is a simplified inheritance | |
| 36 heirarchy that was present for Bio::EnsEMBL::DnaAlignFeature: | |
| 37 | |
| 38 Bio::EnsEMBL::DnaAlignFeature | |
| 39 Bio::EnsEMBL::BaseAlignFeature | |
| 40 Bio::EnsEMBL::FeaturePair | |
| 41 Bio::EnsEMBL::SeqFeature | |
| 42 Bio::EnsEMBL::SeqFeatureI | |
| 43 Bio::SeqFeatureI | |
| 44 Bio::RangeI | |
| 45 Bio::Root::RootI | |
| 46 | |
| 47 The new Bio::EnsEMBL::Feature class is much shorter, and hopefully much | |
| 48 easier to understand and maintain. | |
| 49 | |
| 50 =head1 METHODS | |
| 51 | |
| 52 =cut | |
| 53 | |
| 54 | |
| 55 # Let the code begin... | |
| 56 | |
| 57 | |
| 58 package Bio::EnsEMBL::SeqFeature; | |
| 59 | |
| 60 use vars qw(@ISA); | |
| 61 use strict; | |
| 62 | |
| 63 | |
| 64 use Bio::EnsEMBL::SeqFeatureI; | |
| 65 use Bio::EnsEMBL::Analysis; | |
| 66 use Bio::EnsEMBL::Root; | |
| 67 | |
| 68 @ISA = qw(Bio::EnsEMBL::Root Bio::EnsEMBL::SeqFeatureI); | |
| 69 | |
| 70 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
| 71 | |
| 72 sub new { | |
| 73 my($caller,@args) = @_; | |
| 74 | |
| 75 my $self = {}; | |
| 76 | |
| 77 if(ref $caller) { | |
| 78 bless $self, ref $caller; | |
| 79 } else { | |
| 80 bless $self, $caller; | |
| 81 } | |
| 82 | |
| 83 $self->{'_gsf_tag_hash'} = {}; | |
| 84 $self->{'_gsf_sub_array'} = []; | |
| 85 $self->{'_parse_h'} = {}; | |
| 86 $self->{'_is_splittable'} = 0; | |
| 87 | |
| 88 my ($start,$end,$strand,$frame,$score,$analysis,$seqname, $source_tag, | |
| 89 $primary_tag, $percent_id, $p_value, $phase, $end_phase) = | |
| 90 | |
| 91 &rearrange([qw(START | |
| 92 END | |
| 93 STRAND | |
| 94 FRAME | |
| 95 SCORE | |
| 96 ANALYSIS | |
| 97 SEQNAME | |
| 98 SOURCE_TAG | |
| 99 PRIMARY_TAG | |
| 100 PERCENT_ID | |
| 101 P_VALUE | |
| 102 PHASE | |
| 103 END_PHASE | |
| 104 )],@args); | |
| 105 | |
| 106 # $gff_string && $self->_from_gff_string($gff_string); | |
| 107 | |
| 108 if ( defined $analysis && $analysis ne "") { $self->analysis($analysis)}; | |
| 109 if ( defined ($start) && $start ne "" ) { $self->start($start)}; | |
| 110 if ( defined ($end ) && $end ne "" ) { $self->end($end)} | |
| 111 if ( defined $strand && $strand ne "") { $self->strand($strand)} | |
| 112 if ( defined $frame && $frame ne "") { $self->frame($frame)} | |
| 113 if ( defined $score && $score ne "") { $self->score($score)} | |
| 114 if ( defined $seqname && $seqname ne "") { $self->seqname($seqname)}; | |
| 115 if ( defined $percent_id && $percent_id ne ""){ $self->percent_id($percent_id)}; | |
| 116 if ( defined $p_value && $p_value ne "") { $self->p_value($p_value)}; | |
| 117 if ( defined $phase && $phase ne "") { $self->phase($phase)}; | |
| 118 if ( defined $end_phase && $end_phase ne "") { $self->end_phase($end_phase)}; | |
| 119 | |
| 120 return $self; # success - we hope! | |
| 121 | |
| 122 } | |
| 123 | |
| 124 | |
| 125 | |
| 126 | |
| 127 | |
| 128 | |
| 129 =head2 start | |
| 130 | |
| 131 Title : start | |
| 132 Usage : $start = $feat->start | |
| 133 $feat->start(20) | |
| 134 Function: Get/set on the start coordinate of the feature | |
| 135 Returns : integer | |
| 136 Args : none | |
| 137 | |
| 138 | |
| 139 =cut | |
| 140 | |
| 141 sub start{ | |
| 142 my ($self,$value) = @_; | |
| 143 | |
| 144 if (defined($value)) { | |
| 145 if ($value !~ /^\-?\d+/ ) { | |
| 146 $self->throw("$value is not a valid start"); | |
| 147 } | |
| 148 | |
| 149 $self->{'_gsf_start'} = $value | |
| 150 } | |
| 151 | |
| 152 return $self->{'_gsf_start'}; | |
| 153 | |
| 154 } | |
| 155 | |
| 156 =head2 end | |
| 157 | |
| 158 Title : end | |
| 159 Usage : $end = $feat->end | |
| 160 $feat->end($end) | |
| 161 Function: get/set on the end coordinate of the feature | |
| 162 Returns : integer | |
| 163 Args : none | |
| 164 | |
| 165 | |
| 166 =cut | |
| 167 | |
| 168 sub end{ | |
| 169 my ($self,$value) = @_; | |
| 170 | |
| 171 if (defined($value)) { | |
| 172 if( $value !~ /^\-?\d+/ ) { | |
| 173 $self->throw("[$value] is not a valid end"); | |
| 174 } | |
| 175 | |
| 176 $self->{'_gsf_end'} = $value; | |
| 177 } | |
| 178 | |
| 179 return $self->{'_gsf_end'}; | |
| 180 } | |
| 181 | |
| 182 =head2 length | |
| 183 | |
| 184 Title : length | |
| 185 Usage : | |
| 186 Function: | |
| 187 Example : | |
| 188 Returns : | |
| 189 Args : | |
| 190 | |
| 191 | |
| 192 =cut | |
| 193 | |
| 194 sub length{ | |
| 195 my ($self,@args) = @_; | |
| 196 | |
| 197 return $self->end - $self->start +1; | |
| 198 } | |
| 199 | |
| 200 | |
| 201 =head2 strand | |
| 202 | |
| 203 Title : strand | |
| 204 Usage : $strand = $feat->strand() | |
| 205 $feat->strand($strand) | |
| 206 Function: get/set on strand information, being 1,-1 or 0 | |
| 207 Returns : -1,1 or 0 | |
| 208 Args : none | |
| 209 | |
| 210 | |
| 211 =cut | |
| 212 | |
| 213 sub strand { | |
| 214 my ($self,$value) = @_; | |
| 215 | |
| 216 if (defined($value)) { | |
| 217 if( $value eq '+' ) { $value = 1; } | |
| 218 if( $value eq '-' ) { $value = -1; } | |
| 219 if( $value eq '.' ) { $value = 0; } | |
| 220 | |
| 221 if( $value != -1 && $value != 1 && $value != 0 ) { | |
| 222 $self->throw("$value is not a valid strand info"); | |
| 223 } | |
| 224 $self->{'_gsf_strand'} = $value; | |
| 225 } | |
| 226 | |
| 227 return $self->{'_gsf_strand'}; | |
| 228 } | |
| 229 | |
| 230 | |
| 231 =head2 move | |
| 232 | |
| 233 Arg [1] : int $start | |
| 234 Arg [2] : int $end | |
| 235 Arg [3] : (optional) int $strand | |
| 236 Example : $feature->move(100, 200, -1); | |
| 237 Description: Moves a feature to a different location. This is faster | |
| 238 then calling 3 seperate accesors in a large loop. | |
| 239 Returntype : none | |
| 240 Exceptions : none | |
| 241 Caller : BaseFeatureAdaptor | |
| 242 | |
| 243 =cut | |
| 244 | |
| 245 sub move { | |
| 246 my ($self, $start, $end, $strand) = @_; | |
| 247 | |
| 248 $self->{'_gsf_start'} = $start; | |
| 249 $self->{'_gsf_end'} = $end; | |
| 250 if(defined $strand) { | |
| 251 $self->{'_gsf_strand'} = $strand; | |
| 252 } | |
| 253 } | |
| 254 | |
| 255 | |
| 256 =head2 score | |
| 257 | |
| 258 Title : score | |
| 259 Usage : $score = $feat->score() | |
| 260 $feat->score($score) | |
| 261 Function: get/set on score information | |
| 262 Returns : float | |
| 263 Args : none if get, the new value if set | |
| 264 | |
| 265 | |
| 266 =cut | |
| 267 | |
| 268 sub score { | |
| 269 my ($self,$value) = @_; | |
| 270 | |
| 271 if(defined ($value) ) { | |
| 272 if( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) { | |
| 273 $self->throw("'$value' is not a valid score"); | |
| 274 } | |
| 275 $self->{'_gsf_score'} = $value; | |
| 276 } | |
| 277 | |
| 278 return $self->{'_gsf_score'}; | |
| 279 } | |
| 280 | |
| 281 =head2 frame | |
| 282 | |
| 283 Title : frame | |
| 284 Usage : $frame = $feat->frame() | |
| 285 $feat->frame($frame) | |
| 286 Function: get/set on frame information | |
| 287 Returns : 0,1,2 | |
| 288 Args : none if get, the new value if set | |
| 289 | |
| 290 | |
| 291 =cut | |
| 292 | |
| 293 sub frame { | |
| 294 my ($self,$value) = @_; | |
| 295 | |
| 296 if (defined($value)) { | |
| 297 if( $value != 1 && $value != 2 && $value != 3 ) { | |
| 298 $self->throw("'$value' is not a valid frame"); | |
| 299 } | |
| 300 $self->{'_gsf_frame'} = $value; | |
| 301 } | |
| 302 | |
| 303 return $self->{'_gsf_frame'}; | |
| 304 } | |
| 305 | |
| 306 =head2 primary_tag | |
| 307 | |
| 308 Title : primary_tag | |
| 309 Usage : $tag = $feat->primary_tag() | |
| 310 $feat->primary_tag('exon') | |
| 311 Function: get/set on the primary tag for a feature, | |
| 312 eg 'exon' | |
| 313 Returns : a string | |
| 314 Args : none | |
| 315 | |
| 316 | |
| 317 =cut | |
| 318 | |
| 319 sub primary_tag{ | |
| 320 my ($self,$arg) = @_; | |
| 321 | |
| 322 if (defined($arg)) { | |
| 323 # throw warnings about setting primary tag | |
| 324 my ($p,$f,$l) = caller; | |
| 325 $self->warn("$f:$l setting primary_tag now deprecated." . | |
| 326 "Primary tag is delegated to analysis object"); | |
| 327 } | |
| 328 | |
| 329 unless($self->analysis) { | |
| 330 return ''; | |
| 331 } | |
| 332 | |
| 333 return $self->analysis->gff_feature(); | |
| 334 } | |
| 335 | |
| 336 =head2 source_tag | |
| 337 | |
| 338 Title : source_tag | |
| 339 Usage : $tag = $feat->source_tag() | |
| 340 $feat->source_tag('genscan'); | |
| 341 Function: Returns the source tag for a feature, | |
| 342 eg, 'genscan' | |
| 343 Returns : a string | |
| 344 Args : none | |
| 345 | |
| 346 | |
| 347 =cut | |
| 348 | |
| 349 sub source_tag{ | |
| 350 my ($self,$arg) = @_; | |
| 351 | |
| 352 if (defined($arg)) { | |
| 353 # throw warnings about setting primary tag | |
| 354 my ($p,$f,$l) = caller; | |
| 355 $self->warn("$f:$l setting source_tag now deprecated. " . | |
| 356 "Source tag is delegated to analysis object"); | |
| 357 } | |
| 358 | |
| 359 unless($self->analysis) { | |
| 360 return ""; | |
| 361 } | |
| 362 | |
| 363 return $self->analysis->gff_source(); | |
| 364 } | |
| 365 | |
| 366 | |
| 367 =head2 analysis | |
| 368 | |
| 369 Title : analysis | |
| 370 Usage : $sf->analysis(); | |
| 371 Function: Store details of the program/database | |
| 372 and versions used to create this feature. | |
| 373 | |
| 374 Example : | |
| 375 Returns : | |
| 376 Args : | |
| 377 | |
| 378 | |
| 379 =cut | |
| 380 | |
| 381 sub analysis { | |
| 382 my ($self,$value) = @_; | |
| 383 | |
| 384 if (defined($value)) { | |
| 385 unless(ref($value) && $value->isa('Bio::EnsEMBL::Analysis')) { | |
| 386 $self->throw("Analysis is not a Bio::EnsEMBL::Analysis object " | |
| 387 . "but a $value object"); | |
| 388 } | |
| 389 | |
| 390 $self->{_analysis} = $value; | |
| 391 } else { | |
| 392 #if _analysis is not defined, create a new analysis object | |
| 393 unless(defined $self->{_analysis}) { | |
| 394 $self->{_analysis} = new Bio::EnsEMBL::Analysis(); | |
| 395 } | |
| 396 } | |
| 397 | |
| 398 return $self->{_analysis}; | |
| 399 } | |
| 400 | |
| 401 =head2 validate | |
| 402 | |
| 403 Title : validate | |
| 404 Usage : $sf->validate; | |
| 405 Function: Checks whether all the data is present in the | |
| 406 object. | |
| 407 Example : | |
| 408 Returns : | |
| 409 Args : | |
| 410 | |
| 411 | |
| 412 =cut | |
| 413 | |
| 414 sub validate { | |
| 415 my ($self) = @_; | |
| 416 | |
| 417 $self->vthrow("Seqname not defined in feature") unless defined($self->seqname); | |
| 418 $self->vthrow("start not defined in feature") unless defined($self->start); | |
| 419 $self->vthrow("end not defined in feature") unless defined($self->end); | |
| 420 $self->vthrow("strand not defined in feature") unless defined($self->strand); | |
| 421 $self->vthrow("score not defined in feature") unless defined($self->score); | |
| 422 $self->vthrow("analysis not defined in feature") unless defined($self->analysis); | |
| 423 | |
| 424 if ($self->end < $self->start) { | |
| 425 $self->vthrow("End coordinate < start coordinate"); | |
| 426 } | |
| 427 | |
| 428 } | |
| 429 | |
| 430 | |
| 431 | |
| 432 sub vthrow { | |
| 433 my ($self,$message) = @_; | |
| 434 | |
| 435 print(STDERR "Error validating feature [$message]\n"); | |
| 436 print(STDERR " Seqname : [" . $self->{_seqname} . "]\n"); | |
| 437 print(STDERR " Start : [" . $self->{_gsf_start} . "]\n"); | |
| 438 print(STDERR " End : [" . $self->{_gsf_end} . "]\n"); | |
| 439 print(STDERR " Strand : [" . | |
| 440 ((defined ($self->{_gsf_strand})) ? $self->{_gsf_strand} : "undefined") . "]\n"); | |
| 441 | |
| 442 print(STDERR " Score : [" . $self->{_gsf_score} . "]\n"); | |
| 443 | |
| 444 print(STDERR " Analysis : [" . $self->{_analysis}->dbID . "]\n"); | |
| 445 | |
| 446 $self->throw("Invalid feature - see dump on STDERR"); | |
| 447 } | |
| 448 | |
| 449 | |
| 450 =head2 validate_prot_feature | |
| 451 | |
| 452 Title : validate_prot_feature | |
| 453 Usage : | |
| 454 Function: | |
| 455 Example : | |
| 456 Returns : | |
| 457 Args : | |
| 458 | |
| 459 | |
| 460 =cut | |
| 461 | |
| 462 # Shouldn't this go as "validate" into Pro_SeqFeature? | |
| 463 sub validate_prot_feature{ | |
| 464 my ($self,$num) = @_; | |
| 465 $self->throw("Seqname not defined in feature") unless defined($self->seqname); | |
| 466 $self->throw("start not defined in feature") unless defined($self->start); | |
| 467 $self->throw("end not defined in feature") unless defined($self->end); | |
| 468 if ($num == 1) { | |
| 469 $self->throw("score not defined in feature") unless defined($self->score); | |
| 470 $self->throw("percent_id not defined in feature") unless defined($self->percent_id); | |
| 471 $self->throw("evalue not defined in feature") unless defined($self->p_value); | |
| 472 } | |
| 473 $self->throw("analysis not defined in feature") unless defined($self->analysis); | |
| 474 } | |
| 475 | |
| 476 | |
| 477 | |
| 478 # These methods are specified in the SeqFeatureI interface but we don't want | |
| 479 # people to store data in them. These are just here in order to keep | |
| 480 # existing code working | |
| 481 | |
| 482 | |
| 483 =head2 has_tag | |
| 484 | |
| 485 Title : has_tag | |
| 486 Usage : $value = $self->has_tag('some_tag') | |
| 487 Function: Returns the value of the tag (undef if | |
| 488 none) | |
| 489 Returns : | |
| 490 Args : | |
| 491 | |
| 492 | |
| 493 =cut | |
| 494 | |
| 495 sub has_tag{ | |
| 496 my ($self,$tag) = (shift, shift); | |
| 497 | |
| 498 return exists $self->{'_gsf_tag_hash'}->{$tag}; | |
| 499 } | |
| 500 | |
| 501 =head2 add_tag_value | |
| 502 | |
| 503 Title : add_tag_value | |
| 504 Usage : $self->add_tag_value('note',"this is a note"); | |
| 505 Returns : nothing | |
| 506 Args : tag (string) and value (any scalar) | |
| 507 | |
| 508 | |
| 509 =cut | |
| 510 | |
| 511 sub add_tag_value{ | |
| 512 my ($self,$tag,$value) = @_; | |
| 513 | |
| 514 if( !defined $self->{'_gsf_tag_hash'}->{$tag} ) { | |
| 515 $self->{'_gsf_tag_hash'}->{$tag} = []; | |
| 516 } | |
| 517 | |
| 518 push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value); | |
| 519 } | |
| 520 | |
| 521 =head2 each_tag_value | |
| 522 | |
| 523 Title : each_tag_value | |
| 524 Usage : | |
| 525 Function: | |
| 526 Example : | |
| 527 Returns : | |
| 528 Args : | |
| 529 | |
| 530 | |
| 531 =cut | |
| 532 | |
| 533 sub each_tag_value { | |
| 534 my ($self,$tag) = @_; | |
| 535 if( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) { | |
| 536 $self->throw("asking for tag value that does not exist $tag"); | |
| 537 } | |
| 538 | |
| 539 return @{$self->{'_gsf_tag_hash'}->{$tag}}; | |
| 540 } | |
| 541 | |
| 542 | |
| 543 =head2 all_tags | |
| 544 | |
| 545 Title : all_tags | |
| 546 Usage : @tags = $feat->all_tags() | |
| 547 Function: gives all tags for this feature | |
| 548 Returns : an array of strings | |
| 549 Args : none | |
| 550 | |
| 551 | |
| 552 =cut | |
| 553 | |
| 554 sub all_tags{ | |
| 555 my ($self,@args) = @_; | |
| 556 | |
| 557 return keys %{$self->{'_gsf_tag_hash'}}; | |
| 558 } | |
| 559 | |
| 560 | |
| 561 | |
| 562 =head2 seqname | |
| 563 | |
| 564 Arg [1] : string $seqname | |
| 565 Example : $seqname = $self->seqname(); | |
| 566 Description: Obtains the seqname of this features sequence. This is set | |
| 567 automatically when a sequence with a name is attached, or may | |
| 568 be set manually. | |
| 569 Returntype : string | |
| 570 Exceptions : none | |
| 571 Caller : general, attach_seq | |
| 572 | |
| 573 =cut | |
| 574 | |
| 575 sub seqname{ | |
| 576 my ($self,$seqname) = @_; | |
| 577 | |
| 578 my $seq = $self->contig(); | |
| 579 | |
| 580 if(defined $seqname) { | |
| 581 $self->{_seqname} = $seqname; | |
| 582 } else { | |
| 583 if($seq && ref $seq && $seq->can('name')) { | |
| 584 $self->{_seqname} = $seq->name(); | |
| 585 } | |
| 586 } | |
| 587 | |
| 588 return $self->{_seqname}; | |
| 589 } | |
| 590 | |
| 591 | |
| 592 =head2 attach_seq | |
| 593 | |
| 594 Title : attach_seq | |
| 595 Usage : $sf->attach_seq($seq) | |
| 596 Function: Attaches a Bio::PrimarySeqI object to this feature. This | |
| 597 Bio::PrimarySeqI object is for the *entire* sequence: ie | |
| 598 from 1 to 10000 | |
| 599 Example : | |
| 600 Returns : | |
| 601 Args : | |
| 602 | |
| 603 | |
| 604 =cut | |
| 605 | |
| 606 sub attach_seq{ | |
| 607 my ($self, $seq) = @_; | |
| 608 | |
| 609 $self->contig($seq); | |
| 610 } | |
| 611 | |
| 612 =head2 seq | |
| 613 | |
| 614 Example : $tseq = $sf->seq() | |
| 615 Function: returns the sequence (if any ) for this feature truncated to the range spanning the feature | |
| 616 Returns : a Bio::PrimarySeq object (I reckon) | |
| 617 | |
| 618 =cut | |
| 619 | |
| 620 sub seq{ | |
| 621 my ($self,$arg) = @_; | |
| 622 | |
| 623 if( defined $arg ) { | |
| 624 $self->throw("Calling SeqFeature::Generic->seq with an argument. " . | |
| 625 "You probably want attach_seq"); | |
| 626 } | |
| 627 | |
| 628 if( ! exists $self->{'_gsf_seq'} ) { | |
| 629 return undef; | |
| 630 } | |
| 631 | |
| 632 # assumming our seq object is sensible, it should not have to yank | |
| 633 # the entire sequence out here. | |
| 634 | |
| 635 my $seq = $self->{'_gsf_seq'}->trunc($self->start(),$self->end()); | |
| 636 | |
| 637 | |
| 638 if( $self->strand == -1 ) { | |
| 639 | |
| 640 # ok. this does not work well (?) | |
| 641 #print STDERR "Before revcom", $seq->str, "\n"; | |
| 642 $seq = $seq->revcom; | |
| 643 #print STDERR "After revcom", $seq->str, "\n"; | |
| 644 } | |
| 645 | |
| 646 return $seq; | |
| 647 } | |
| 648 | |
| 649 =head2 entire_seq | |
| 650 | |
| 651 Title : entire_seq | |
| 652 Usage : $whole_seq = $sf->entire_seq() | |
| 653 Function: gives the entire sequence that this seqfeature is attached to | |
| 654 Example : | |
| 655 Returns : | |
| 656 Args : | |
| 657 | |
| 658 | |
| 659 =cut | |
| 660 | |
| 661 sub entire_seq{ | |
| 662 my ($self) = @_; | |
| 663 | |
| 664 return $self->contig; | |
| 665 } | |
| 666 | |
| 667 | |
| 668 =head2 sub_SeqFeature | |
| 669 | |
| 670 Title : sub_SeqFeature | |
| 671 Usage : @feats = $feat->sub_SeqFeature(); | |
| 672 Function: Returns an array of sub Sequence Features | |
| 673 Returns : An array | |
| 674 Args : none | |
| 675 | |
| 676 | |
| 677 =cut | |
| 678 | |
| 679 sub sub_SeqFeature { | |
| 680 my ($self) = @_; | |
| 681 | |
| 682 if ( $self->{'_gsf_sub_array'} ) { | |
| 683 return @{ $self->{'_gsf_sub_array'} }; | |
| 684 } else { | |
| 685 return (); | |
| 686 } | |
| 687 } | |
| 688 | |
| 689 =head2 add_sub_SeqFeature | |
| 690 | |
| 691 Title : add_sub_SeqFeature | |
| 692 Usage : $feat->add_sub_SeqFeature($subfeat); | |
| 693 $feat->add_sub_SeqFeature($subfeat,'EXPAND') | |
| 694 Function: adds a SeqFeature into the subSeqFeature array. | |
| 695 with no 'EXPAND' qualifer, subfeat will be tested | |
| 696 as to whether it lies inside the parent, and throw | |
| 697 an exception if not. | |
| 698 | |
| 699 If EXPAND is used, the parents start/end/strand will | |
| 700 be adjusted so that it grows to accommodate the new | |
| 701 subFeature | |
| 702 Returns : nothing | |
| 703 Args : An object which has the SeqFeatureI interface | |
| 704 | |
| 705 | |
| 706 =cut | |
| 707 | |
| 708 sub add_sub_SeqFeature{ | |
| 709 my ($self,$feat,$expand) = @_; | |
| 710 | |
| 711 if( !$feat->isa('Bio::SeqFeatureI') ) { | |
| 712 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware..."); | |
| 713 } | |
| 714 | |
| 715 if( $expand eq 'EXPAND' ) { | |
| 716 # if this doesn't have start/end set - forget it! | |
| 717 if( !defined $self->start && !defined $self->end ) { | |
| 718 $self->start($feat->start()); | |
| 719 $self->end($feat->end()); | |
| 720 $self->strand($feat->strand); | |
| 721 } else { | |
| 722 my ($start,$end); | |
| 723 if( $feat->start < $self->start ) { | |
| 724 $start = $feat->start; | |
| 725 } | |
| 726 | |
| 727 if( $feat->end > $self->end ) { | |
| 728 $end = $feat->end; | |
| 729 } | |
| 730 | |
| 731 $self->start($start); | |
| 732 $self->end($end); | |
| 733 | |
| 734 } | |
| 735 } else { | |
| 736 if( !defined($feat->start()) || !defined($feat->end()) || | |
| 737 !defined($self->start()) || !defined($self->end())) { | |
| 738 $self->throw( "This SeqFeature and the sub_SeqFeature must define". | |
| 739 " start and end."); | |
| 740 } | |
| 741 if($feat->start() > $feat->end() || $self->start() > $self->end()) { | |
| 742 $self->throw("This SeqFeature and the sub_SeqFeature must have " . | |
| 743 "start that is less than or equal to end."); | |
| 744 } | |
| 745 if($feat->start() < $self->start() || $feat->end() > $self->end() ) { | |
| 746 $self->throw("$feat is not contained within parent feature, " . | |
| 747 "and expansion is not valid"); | |
| 748 } | |
| 749 } | |
| 750 | |
| 751 push(@{$self->{'_gsf_sub_array'}},$feat); | |
| 752 | |
| 753 } | |
| 754 | |
| 755 =head2 flush_sub_SeqFeature | |
| 756 | |
| 757 Title : flush_sub_SeqFeature | |
| 758 Usage : $sf->flush_sub_SeqFeature | |
| 759 Function: Removes all sub SeqFeature | |
| 760 (if you want to remove only a subset, take | |
| 761 an array of them all, flush them, and add | |
| 762 back only the guys you want) | |
| 763 Example : | |
| 764 Returns : none | |
| 765 Args : none | |
| 766 | |
| 767 | |
| 768 =cut | |
| 769 | |
| 770 sub flush_sub_SeqFeature { | |
| 771 my ($self) = @_; | |
| 772 | |
| 773 $self->{'_gsf_sub_array'} = []; # zap the array implicitly. | |
| 774 } | |
| 775 | |
| 776 | |
| 777 sub id { | |
| 778 my ($self,$value) = @_; | |
| 779 | |
| 780 if (defined($value)) { | |
| 781 $self->{_id} = $value; | |
| 782 } | |
| 783 | |
| 784 return $self->{_id}; | |
| 785 | |
| 786 } | |
| 787 | |
| 788 =head2 percent_id | |
| 789 | |
| 790 Title : percent_id | |
| 791 Usage : $pid = $feat->percent_id() | |
| 792 $feat->percent_id($pid) | |
| 793 Function: get/set on percentage identity information | |
| 794 Returns : float | |
| 795 Args : none if get, the new value if set | |
| 796 | |
| 797 =cut | |
| 798 | |
| 799 sub percent_id { | |
| 800 my ($self,$value) = @_; | |
| 801 | |
| 802 if (defined($value)) | |
| 803 { | |
| 804 $self->{_percent_id} = $value; | |
| 805 } | |
| 806 | |
| 807 return $self->{_percent_id}; | |
| 808 } | |
| 809 | |
| 810 =head2 p_value | |
| 811 | |
| 812 Title : p_value | |
| 813 Usage : $p_val = $feat->p_value() | |
| 814 $feat->p_value($p_val) | |
| 815 Function: get/set on p value information | |
| 816 Returns : float | |
| 817 Args : none if get, the new value if set | |
| 818 | |
| 819 =cut | |
| 820 | |
| 821 sub p_value { | |
| 822 my ($self,$value) = @_; | |
| 823 | |
| 824 if (defined($value)) | |
| 825 { | |
| 826 $self->{_p_value} = $value; | |
| 827 } | |
| 828 | |
| 829 return $self->{_p_value}; | |
| 830 } | |
| 831 | |
| 832 =head2 phase | |
| 833 | |
| 834 Title : phase | |
| 835 Usage : $phase = $feat->phase() | |
| 836 $feat->phase($phase) | |
| 837 Function: get/set on start phase of predicted exon feature | |
| 838 Returns : [0,1,2] | |
| 839 Args : none if get, 0,1 or 2 if set. | |
| 840 | |
| 841 =cut | |
| 842 | |
| 843 sub phase { | |
| 844 my ($self, $value) = @_; | |
| 845 | |
| 846 if (defined($value) ) | |
| 847 { | |
| 848 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2); | |
| 849 $self->{_phase} = $value; | |
| 850 } | |
| 851 | |
| 852 return $self->{_phase}; | |
| 853 } | |
| 854 | |
| 855 =head2 end_phase | |
| 856 | |
| 857 Title : end_phase | |
| 858 Usage : $end_phase = $feat->end_phase() | |
| 859 $feat->end_phase($end_phase) | |
| 860 Function: returns end_phase based on phase and length of feature | |
| 861 Returns : [0,1,2] | |
| 862 Args : none if get, 0,1 or 2 if set. | |
| 863 | |
| 864 =cut | |
| 865 | |
| 866 sub end_phase { | |
| 867 my ($self, $value) = @_; | |
| 868 | |
| 869 if (defined($value)) | |
| 870 { | |
| 871 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2); | |
| 872 $self->{_end_phase} = $value; | |
| 873 } | |
| 874 | |
| 875 return $self->{_end_phase}; | |
| 876 } | |
| 877 | |
| 878 | |
| 879 sub gffstring { | |
| 880 my ($self) = @_; | |
| 881 | |
| 882 my $str; | |
| 883 | |
| 884 my $strand = "+"; | |
| 885 | |
| 886 if ((defined $self->strand)&&($self->strand == -1)) { | |
| 887 $strand = "-"; | |
| 888 } | |
| 889 | |
| 890 $str .= (defined $self->seqname) ? $self->seqname."\t" : "\t"; | |
| 891 $str .= (defined $self->source_tag) ? $self->source_tag."\t" : "\t"; | |
| 892 $str .= (defined $self->primary_tag) ? $self->primary_tag."\t" : "\t"; | |
| 893 $str .= (defined $self->start) ? $self->start."\t" : "\t"; | |
| 894 $str .= (defined $self->end) ? $self->end."\t" : "\t"; | |
| 895 $str .= (defined $self->score) ? $self->score."\t" : "\t"; | |
| 896 $str .= (defined $self->strand) ? $strand."\t" : ".\t"; | |
| 897 $str .= (defined $self->phase) ? $self->phase."\t" : ".\t"; | |
| 898 eval{ | |
| 899 $str .= (defined $self->end_phase) ? $self->end_phase."\t" : ".\t"; | |
| 900 }; | |
| 901 | |
| 902 return $str; | |
| 903 } | |
| 904 | |
| 905 | |
| 906 =head2 external_db | |
| 907 | |
| 908 Title : external_db | |
| 909 Usage : $pid = $feat->external_db() | |
| 910 $feat->external_db($dbid) | |
| 911 Function: get/set for an external db accession number (e.g.: Interpro) | |
| 912 Returns : | |
| 913 Args : none if get, the new value if set | |
| 914 | |
| 915 =cut | |
| 916 | |
| 917 sub external_db { | |
| 918 my ($self,$value) = @_; | |
| 919 | |
| 920 if (defined($value)) | |
| 921 { | |
| 922 $self->{'_external_db'} = $value; | |
| 923 } | |
| 924 | |
| 925 return $self->{'_external_db'}; | |
| 926 } | |
| 927 | |
| 928 | |
| 929 | |
| 930 =head2 contig | |
| 931 | |
| 932 Arg [1] : Bio::PrimarySeqI $seq | |
| 933 Example : $seq = $self->contig; | |
| 934 Description: Accessor to attach/retrieve a sequence to/from a feature | |
| 935 Returntype : Bio::PrimarySeqI | |
| 936 Exceptions : none | |
| 937 Caller : general | |
| 938 | |
| 939 =cut | |
| 940 | |
| 941 sub contig { | |
| 942 my ($self, $arg) = @_; | |
| 943 | |
| 944 if ($arg) { | |
| 945 unless (defined $arg && ref $arg && $arg->isa("Bio::PrimarySeqI")) { | |
| 946 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures"); | |
| 947 } | |
| 948 | |
| 949 $self->{'_gsf_seq'} = $arg; | |
| 950 | |
| 951 # attach to sub features if they want it | |
| 952 | |
| 953 foreach my $sf ($self->sub_SeqFeature) { | |
| 954 if ($sf->can("attach_seq")) { | |
| 955 $sf->attach_seq($arg); | |
| 956 } | |
| 957 } | |
| 958 } | |
| 959 #print STDERR "contig is ".$self->{'_gsf_seq'}." with name ".$self->{'_gsf_seq'}->name."\n" unless(!$self->{'_gsf_seq'}); | |
| 960 # my ($p, $f, $l) = caller; | |
| 961 # print STDERR "Caller = ".$f." ".$l."\n"; | |
| 962 return $self->{'_gsf_seq'}; | |
| 963 } | |
| 964 | |
| 965 | |
| 966 | |
| 967 sub is_splittable { | |
| 968 my ($self, $arg) = @_; | |
| 969 | |
| 970 if (defined $arg) { | |
| 971 $self->{'_is_splittable'} = $arg; | |
| 972 } | |
| 973 return $self->{'_is_splittable'}; | |
| 974 } | |
| 975 | |
| 976 | |
| 977 sub transform { | |
| 978 my ($self, $slice) = @_; | |
| 979 | |
| 980 unless (defined $slice) { | |
| 981 | |
| 982 if ((defined $self->contig) && | |
| 983 ($self->contig->isa("Bio::EnsEMBL::RawContig"))) { | |
| 984 | |
| 985 # we are already in rawcontig coords, nothing needs to be done | |
| 986 return $self; | |
| 987 | |
| 988 } | |
| 989 else { | |
| 990 # transform to raw_contig coords from Slice coords | |
| 991 return $self->_transform_to_RawContig(); | |
| 992 } | |
| 993 } | |
| 994 | |
| 995 if (defined $self->contig) { | |
| 996 | |
| 997 if ($self->contig->isa("Bio::EnsEMBL::RawContig")) { | |
| 998 | |
| 999 # transform to slice coords from raw contig coords | |
| 1000 return $self->_transform_to_Slice($slice); | |
| 1001 } | |
| 1002 elsif ($self->contig->isa( "Bio::EnsEMBL::Slice" ) or $self->contig->isa( "Bio::EnsEMBL::LRGSlice" )) { | |
| 1003 | |
| 1004 # transform to slice coords from other slice coords | |
| 1005 return $self->_transform_between_Slices($slice); | |
| 1006 } | |
| 1007 else { | |
| 1008 | |
| 1009 # Unknown contig type | |
| 1010 $self->throw("Cannot transform unknown contig type @{[$self->contig]}"); | |
| 1011 } | |
| 1012 } | |
| 1013 else { | |
| 1014 | |
| 1015 #Can't convert to slice coords without a contig to work with | |
| 1016 return $self->throw("Object's contig is not defined - cannot transform"); | |
| 1017 } | |
| 1018 | |
| 1019 } | |
| 1020 | |
| 1021 | |
| 1022 sub _transform_to_Slice { | |
| 1023 my ($self, $slice) = @_; | |
| 1024 | |
| 1025 $self->throw("can't transform coordinates of $self without a contig defined") | |
| 1026 unless $self->contig; | |
| 1027 | |
| 1028 unless($self->contig->adaptor) { | |
| 1029 $self->throw("cannot transform coordinates of $self without adaptor " . | |
| 1030 "attached to contig"); | |
| 1031 } | |
| 1032 | |
| 1033 my $dbh = $self->contig->adaptor->db; | |
| 1034 | |
| 1035 my $mapper = | |
| 1036 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type); | |
| 1037 my $rca = $dbh->get_RawContigAdaptor; | |
| 1038 | |
| 1039 my @mapped = $mapper->map_coordinates_to_assembly( | |
| 1040 $self->contig->dbID, | |
| 1041 $self->start, | |
| 1042 $self->end, | |
| 1043 $self->strand | |
| 1044 ); | |
| 1045 | |
| 1046 unless (@mapped) { | |
| 1047 $self->throw("couldn't map $self to Slice"); | |
| 1048 } | |
| 1049 | |
| 1050 unless (@mapped == 1) { | |
| 1051 $self->throw("$self should only map to one chromosome - " . | |
| 1052 "something bad has happened ..."); | |
| 1053 } | |
| 1054 | |
| 1055 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
| 1056 $self->warn("feature lies on gap\n"); | |
| 1057 return; | |
| 1058 } | |
| 1059 | |
| 1060 if( ! defined $slice->chr_name() ) { | |
| 1061 my $slice_adaptor = $slice->adaptor(); | |
| 1062 %$slice = %{$slice_adaptor->fetch_by_chr_name( $mapped[0]->id() )}; | |
| 1063 } | |
| 1064 | |
| 1065 # mapped coords are on chromosome - need to convert to slice | |
| 1066 if($slice->strand == 1) { | |
| 1067 $self->start ($mapped[0]->start - $slice->chr_start + 1); | |
| 1068 $self->end ($mapped[0]->end - $slice->chr_start + 1); | |
| 1069 $self->strand ($mapped[0]->strand); | |
| 1070 } else { | |
| 1071 $self->start ($slice->chr_end - $mapped[0]->end + 1); | |
| 1072 $self->end ($slice->chr_end - $mapped[0]->start + 1); | |
| 1073 $self->strand ($mapped[0]->strand * -1); | |
| 1074 } | |
| 1075 | |
| 1076 $self->seqname($mapped[0]->id); | |
| 1077 | |
| 1078 #set the contig to the slice | |
| 1079 $self->contig($slice); | |
| 1080 | |
| 1081 return $self; | |
| 1082 } | |
| 1083 | |
| 1084 | |
| 1085 sub _transform_between_Slices { | |
| 1086 my ($self, $to_slice) = @_; | |
| 1087 | |
| 1088 my $from_slice = $self->contig; | |
| 1089 | |
| 1090 $self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice") | |
| 1091 unless ($to_slice->isa("Bio::EnsEMBL::Slice") or $to_slice->isa("Bio::EnsEMBL::LRGSlice") ); | |
| 1092 | |
| 1093 if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) { | |
| 1094 $self->warn("Can't transform between chromosomes: $c1 and $c2"); | |
| 1095 return; | |
| 1096 } | |
| 1097 | |
| 1098 my($start, $end, $strand); | |
| 1099 | |
| 1100 #first convert to assembly coords | |
| 1101 if($from_slice->strand == 1) { | |
| 1102 $start = $from_slice->chr_start + $self->start - 1; | |
| 1103 $end = $from_slice->chr_start + $self->end - 1; | |
| 1104 $strand = $self->strand; | |
| 1105 } else { | |
| 1106 $start = $from_slice->chr_end - $self->end + 1; | |
| 1107 $end = $from_slice->chr_end - $self->start + 1; | |
| 1108 $strand = $self->strand; | |
| 1109 } | |
| 1110 | |
| 1111 #now convert to the other slice's coords | |
| 1112 if($to_slice->strand == 1) { | |
| 1113 $self->start ($start - $to_slice->chr_start + 1); | |
| 1114 $self->end ($end - $to_slice->chr_start + 1); | |
| 1115 $self->strand($strand); | |
| 1116 } else { | |
| 1117 $self->start ($to_slice->chr_end - $end + 1); | |
| 1118 $self->end ($to_slice->chr_end - $start + 1); | |
| 1119 $self->strand($strand * -1); | |
| 1120 } | |
| 1121 | |
| 1122 $self->contig($to_slice); | |
| 1123 | |
| 1124 return $self; | |
| 1125 } | |
| 1126 | |
| 1127 | |
| 1128 sub _transform_to_RawContig { | |
| 1129 my($self) = @_; | |
| 1130 | |
| 1131 #print STDERR "transforming ".$self." to raw contig coords\n"; | |
| 1132 $self->throw("can't transform coordinates of $self without a contig defined") | |
| 1133 unless $self->contig; | |
| 1134 | |
| 1135 my $slice = $self->contig; | |
| 1136 | |
| 1137 unless($slice->adaptor) { | |
| 1138 $self->throw("can't transform coordinates of $self without an adaptor " . | |
| 1139 "attached to the feature's slice"); | |
| 1140 } | |
| 1141 | |
| 1142 my $dbh = $slice->adaptor->db; | |
| 1143 | |
| 1144 my $mapper = | |
| 1145 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type); | |
| 1146 my $rca = $dbh->get_RawContigAdaptor; | |
| 1147 | |
| 1148 #first convert the features coordinates to assembly coordinates | |
| 1149 my($start, $end, $strand); | |
| 1150 if($slice->strand == 1) { | |
| 1151 $start = $slice->chr_start + $self->start - 1; | |
| 1152 $end = $slice->chr_start + $self->end - 1; | |
| 1153 $strand = $self->strand; | |
| 1154 } else { | |
| 1155 $start = $slice->chr_end - $self->end + 1; | |
| 1156 $end = $slice->chr_end - $self->start + 1; | |
| 1157 $strand = $self->strand * -1; | |
| 1158 } | |
| 1159 | |
| 1160 #convert the assembly coordinates to RawContig coordinates | |
| 1161 my @mapped = $mapper->map_coordinates_to_rawcontig( | |
| 1162 $slice->chr_name, | |
| 1163 $start, | |
| 1164 $end, | |
| 1165 $strand | |
| 1166 ); | |
| 1167 | |
| 1168 unless (@mapped) { | |
| 1169 $self->throw("couldn't map $self"); | |
| 1170 return $self; | |
| 1171 } | |
| 1172 | |
| 1173 if (@mapped == 1) { | |
| 1174 | |
| 1175 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
| 1176 $self->warn("feature lies on gap\n"); | |
| 1177 return; | |
| 1178 } | |
| 1179 | |
| 1180 my $rc = $rca->fetch_by_dbID($mapped[0]->id); | |
| 1181 | |
| 1182 $self->start ($mapped[0]->start); | |
| 1183 $self->end ($mapped[0]->end); | |
| 1184 $self->strand ($mapped[0]->strand); | |
| 1185 $self->seqname ($mapped[0]->id); | |
| 1186 #print STDERR "setting contig to be ".$mapped[0]->id."\n"; | |
| 1187 $self->contig($rca->fetch_by_dbID($mapped[0]->id)); | |
| 1188 | |
| 1189 return $self; | |
| 1190 } | |
| 1191 else { | |
| 1192 | |
| 1193 # more than one object returned from mapper | |
| 1194 # possibly more than one RawContig in region | |
| 1195 | |
| 1196 my (@gaps, @coords); | |
| 1197 | |
| 1198 foreach my $m (@mapped) { | |
| 1199 | |
| 1200 if ($m->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
| 1201 push @gaps, $m; | |
| 1202 } | |
| 1203 elsif ($m->isa("Bio::EnsEMBL::Mapper::Coordinate")) { | |
| 1204 push @coords, $m; | |
| 1205 } | |
| 1206 } | |
| 1207 | |
| 1208 # case where only one RawContig maps | |
| 1209 if (@coords == 1) { | |
| 1210 | |
| 1211 $self->start ($coords[0]->start); | |
| 1212 $self->end ($coords[0]->end); | |
| 1213 $self->strand ($coords[0]->strand); | |
| 1214 $self->seqname($coords[0]->id); | |
| 1215 #print STDERR "2 setting contig to be ".$coords[0]->id."\n"; | |
| 1216 $self->contig ($rca->fetch_by_dbID($coords[0]->id)); | |
| 1217 | |
| 1218 $self->warn("Feature [$self] truncated as lies partially on a gap"); | |
| 1219 return $self; | |
| 1220 } | |
| 1221 | |
| 1222 unless ($self->is_splittable) { | |
| 1223 $self->warn("Feature spans >1 raw contig - can't split\n"); | |
| 1224 return; | |
| 1225 } | |
| 1226 | |
| 1227 my @out; | |
| 1228 my $obj = ref $self; | |
| 1229 | |
| 1230 SPLIT: foreach my $map (@mapped) { | |
| 1231 | |
| 1232 if ($map->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
| 1233 $self->warn("piece of evidence lies on gap\n"); | |
| 1234 next SPLIT; | |
| 1235 } | |
| 1236 | |
| 1237 my $feat = $obj->new; | |
| 1238 | |
| 1239 $feat->start ($map->start); | |
| 1240 $feat->end ($map->end); | |
| 1241 $feat->strand ($map->strand); | |
| 1242 #print STDERR "3 setting contig to be ".$mapped[0]->id."\n"; | |
| 1243 $feat->contig ($rca->fetch_by_dbID($map->id)); | |
| 1244 $feat->adaptor($self->adaptor) if $self->adaptor(); | |
| 1245 $feat->display_label($self->display_label) if($self->can('display_label')); | |
| 1246 $feat->analysis($self->analysis); | |
| 1247 push @out, $feat; | |
| 1248 } | |
| 1249 | |
| 1250 return @out; | |
| 1251 } | |
| 1252 } | |
| 1253 | |
| 1254 | |
| 1255 1; |
