Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/SeqFeature.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::SeqFeature - Ensembl specific sequence feature. | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 Do not use this module if you can avoid it. It has been replaced by | |
28 Bio::EnsEMBL::Feature. This module has a long history of usage but has | |
29 become very bloated, and quite unweildy. It was decided to replace | |
30 it completely with a smaller, light-weight feature class rather than | |
31 attempting to refactor this class, and maintain strict backwards | |
32 compatibility. | |
33 | |
34 Part of the complexity of this class was in its extensive | |
35 inheritance. As an example the following is a simplified inheritance | |
36 heirarchy that was present for Bio::EnsEMBL::DnaAlignFeature: | |
37 | |
38 Bio::EnsEMBL::DnaAlignFeature | |
39 Bio::EnsEMBL::BaseAlignFeature | |
40 Bio::EnsEMBL::FeaturePair | |
41 Bio::EnsEMBL::SeqFeature | |
42 Bio::EnsEMBL::SeqFeatureI | |
43 Bio::SeqFeatureI | |
44 Bio::RangeI | |
45 Bio::Root::RootI | |
46 | |
47 The new Bio::EnsEMBL::Feature class is much shorter, and hopefully much | |
48 easier to understand and maintain. | |
49 | |
50 =head1 METHODS | |
51 | |
52 =cut | |
53 | |
54 | |
55 # Let the code begin... | |
56 | |
57 | |
58 package Bio::EnsEMBL::SeqFeature; | |
59 | |
60 use vars qw(@ISA); | |
61 use strict; | |
62 | |
63 | |
64 use Bio::EnsEMBL::SeqFeatureI; | |
65 use Bio::EnsEMBL::Analysis; | |
66 use Bio::EnsEMBL::Root; | |
67 | |
68 @ISA = qw(Bio::EnsEMBL::Root Bio::EnsEMBL::SeqFeatureI); | |
69 | |
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
71 | |
72 sub new { | |
73 my($caller,@args) = @_; | |
74 | |
75 my $self = {}; | |
76 | |
77 if(ref $caller) { | |
78 bless $self, ref $caller; | |
79 } else { | |
80 bless $self, $caller; | |
81 } | |
82 | |
83 $self->{'_gsf_tag_hash'} = {}; | |
84 $self->{'_gsf_sub_array'} = []; | |
85 $self->{'_parse_h'} = {}; | |
86 $self->{'_is_splittable'} = 0; | |
87 | |
88 my ($start,$end,$strand,$frame,$score,$analysis,$seqname, $source_tag, | |
89 $primary_tag, $percent_id, $p_value, $phase, $end_phase) = | |
90 | |
91 &rearrange([qw(START | |
92 END | |
93 STRAND | |
94 FRAME | |
95 SCORE | |
96 ANALYSIS | |
97 SEQNAME | |
98 SOURCE_TAG | |
99 PRIMARY_TAG | |
100 PERCENT_ID | |
101 P_VALUE | |
102 PHASE | |
103 END_PHASE | |
104 )],@args); | |
105 | |
106 # $gff_string && $self->_from_gff_string($gff_string); | |
107 | |
108 if ( defined $analysis && $analysis ne "") { $self->analysis($analysis)}; | |
109 if ( defined ($start) && $start ne "" ) { $self->start($start)}; | |
110 if ( defined ($end ) && $end ne "" ) { $self->end($end)} | |
111 if ( defined $strand && $strand ne "") { $self->strand($strand)} | |
112 if ( defined $frame && $frame ne "") { $self->frame($frame)} | |
113 if ( defined $score && $score ne "") { $self->score($score)} | |
114 if ( defined $seqname && $seqname ne "") { $self->seqname($seqname)}; | |
115 if ( defined $percent_id && $percent_id ne ""){ $self->percent_id($percent_id)}; | |
116 if ( defined $p_value && $p_value ne "") { $self->p_value($p_value)}; | |
117 if ( defined $phase && $phase ne "") { $self->phase($phase)}; | |
118 if ( defined $end_phase && $end_phase ne "") { $self->end_phase($end_phase)}; | |
119 | |
120 return $self; # success - we hope! | |
121 | |
122 } | |
123 | |
124 | |
125 | |
126 | |
127 | |
128 | |
129 =head2 start | |
130 | |
131 Title : start | |
132 Usage : $start = $feat->start | |
133 $feat->start(20) | |
134 Function: Get/set on the start coordinate of the feature | |
135 Returns : integer | |
136 Args : none | |
137 | |
138 | |
139 =cut | |
140 | |
141 sub start{ | |
142 my ($self,$value) = @_; | |
143 | |
144 if (defined($value)) { | |
145 if ($value !~ /^\-?\d+/ ) { | |
146 $self->throw("$value is not a valid start"); | |
147 } | |
148 | |
149 $self->{'_gsf_start'} = $value | |
150 } | |
151 | |
152 return $self->{'_gsf_start'}; | |
153 | |
154 } | |
155 | |
156 =head2 end | |
157 | |
158 Title : end | |
159 Usage : $end = $feat->end | |
160 $feat->end($end) | |
161 Function: get/set on the end coordinate of the feature | |
162 Returns : integer | |
163 Args : none | |
164 | |
165 | |
166 =cut | |
167 | |
168 sub end{ | |
169 my ($self,$value) = @_; | |
170 | |
171 if (defined($value)) { | |
172 if( $value !~ /^\-?\d+/ ) { | |
173 $self->throw("[$value] is not a valid end"); | |
174 } | |
175 | |
176 $self->{'_gsf_end'} = $value; | |
177 } | |
178 | |
179 return $self->{'_gsf_end'}; | |
180 } | |
181 | |
182 =head2 length | |
183 | |
184 Title : length | |
185 Usage : | |
186 Function: | |
187 Example : | |
188 Returns : | |
189 Args : | |
190 | |
191 | |
192 =cut | |
193 | |
194 sub length{ | |
195 my ($self,@args) = @_; | |
196 | |
197 return $self->end - $self->start +1; | |
198 } | |
199 | |
200 | |
201 =head2 strand | |
202 | |
203 Title : strand | |
204 Usage : $strand = $feat->strand() | |
205 $feat->strand($strand) | |
206 Function: get/set on strand information, being 1,-1 or 0 | |
207 Returns : -1,1 or 0 | |
208 Args : none | |
209 | |
210 | |
211 =cut | |
212 | |
213 sub strand { | |
214 my ($self,$value) = @_; | |
215 | |
216 if (defined($value)) { | |
217 if( $value eq '+' ) { $value = 1; } | |
218 if( $value eq '-' ) { $value = -1; } | |
219 if( $value eq '.' ) { $value = 0; } | |
220 | |
221 if( $value != -1 && $value != 1 && $value != 0 ) { | |
222 $self->throw("$value is not a valid strand info"); | |
223 } | |
224 $self->{'_gsf_strand'} = $value; | |
225 } | |
226 | |
227 return $self->{'_gsf_strand'}; | |
228 } | |
229 | |
230 | |
231 =head2 move | |
232 | |
233 Arg [1] : int $start | |
234 Arg [2] : int $end | |
235 Arg [3] : (optional) int $strand | |
236 Example : $feature->move(100, 200, -1); | |
237 Description: Moves a feature to a different location. This is faster | |
238 then calling 3 seperate accesors in a large loop. | |
239 Returntype : none | |
240 Exceptions : none | |
241 Caller : BaseFeatureAdaptor | |
242 | |
243 =cut | |
244 | |
245 sub move { | |
246 my ($self, $start, $end, $strand) = @_; | |
247 | |
248 $self->{'_gsf_start'} = $start; | |
249 $self->{'_gsf_end'} = $end; | |
250 if(defined $strand) { | |
251 $self->{'_gsf_strand'} = $strand; | |
252 } | |
253 } | |
254 | |
255 | |
256 =head2 score | |
257 | |
258 Title : score | |
259 Usage : $score = $feat->score() | |
260 $feat->score($score) | |
261 Function: get/set on score information | |
262 Returns : float | |
263 Args : none if get, the new value if set | |
264 | |
265 | |
266 =cut | |
267 | |
268 sub score { | |
269 my ($self,$value) = @_; | |
270 | |
271 if(defined ($value) ) { | |
272 if( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) { | |
273 $self->throw("'$value' is not a valid score"); | |
274 } | |
275 $self->{'_gsf_score'} = $value; | |
276 } | |
277 | |
278 return $self->{'_gsf_score'}; | |
279 } | |
280 | |
281 =head2 frame | |
282 | |
283 Title : frame | |
284 Usage : $frame = $feat->frame() | |
285 $feat->frame($frame) | |
286 Function: get/set on frame information | |
287 Returns : 0,1,2 | |
288 Args : none if get, the new value if set | |
289 | |
290 | |
291 =cut | |
292 | |
293 sub frame { | |
294 my ($self,$value) = @_; | |
295 | |
296 if (defined($value)) { | |
297 if( $value != 1 && $value != 2 && $value != 3 ) { | |
298 $self->throw("'$value' is not a valid frame"); | |
299 } | |
300 $self->{'_gsf_frame'} = $value; | |
301 } | |
302 | |
303 return $self->{'_gsf_frame'}; | |
304 } | |
305 | |
306 =head2 primary_tag | |
307 | |
308 Title : primary_tag | |
309 Usage : $tag = $feat->primary_tag() | |
310 $feat->primary_tag('exon') | |
311 Function: get/set on the primary tag for a feature, | |
312 eg 'exon' | |
313 Returns : a string | |
314 Args : none | |
315 | |
316 | |
317 =cut | |
318 | |
319 sub primary_tag{ | |
320 my ($self,$arg) = @_; | |
321 | |
322 if (defined($arg)) { | |
323 # throw warnings about setting primary tag | |
324 my ($p,$f,$l) = caller; | |
325 $self->warn("$f:$l setting primary_tag now deprecated." . | |
326 "Primary tag is delegated to analysis object"); | |
327 } | |
328 | |
329 unless($self->analysis) { | |
330 return ''; | |
331 } | |
332 | |
333 return $self->analysis->gff_feature(); | |
334 } | |
335 | |
336 =head2 source_tag | |
337 | |
338 Title : source_tag | |
339 Usage : $tag = $feat->source_tag() | |
340 $feat->source_tag('genscan'); | |
341 Function: Returns the source tag for a feature, | |
342 eg, 'genscan' | |
343 Returns : a string | |
344 Args : none | |
345 | |
346 | |
347 =cut | |
348 | |
349 sub source_tag{ | |
350 my ($self,$arg) = @_; | |
351 | |
352 if (defined($arg)) { | |
353 # throw warnings about setting primary tag | |
354 my ($p,$f,$l) = caller; | |
355 $self->warn("$f:$l setting source_tag now deprecated. " . | |
356 "Source tag is delegated to analysis object"); | |
357 } | |
358 | |
359 unless($self->analysis) { | |
360 return ""; | |
361 } | |
362 | |
363 return $self->analysis->gff_source(); | |
364 } | |
365 | |
366 | |
367 =head2 analysis | |
368 | |
369 Title : analysis | |
370 Usage : $sf->analysis(); | |
371 Function: Store details of the program/database | |
372 and versions used to create this feature. | |
373 | |
374 Example : | |
375 Returns : | |
376 Args : | |
377 | |
378 | |
379 =cut | |
380 | |
381 sub analysis { | |
382 my ($self,$value) = @_; | |
383 | |
384 if (defined($value)) { | |
385 unless(ref($value) && $value->isa('Bio::EnsEMBL::Analysis')) { | |
386 $self->throw("Analysis is not a Bio::EnsEMBL::Analysis object " | |
387 . "but a $value object"); | |
388 } | |
389 | |
390 $self->{_analysis} = $value; | |
391 } else { | |
392 #if _analysis is not defined, create a new analysis object | |
393 unless(defined $self->{_analysis}) { | |
394 $self->{_analysis} = new Bio::EnsEMBL::Analysis(); | |
395 } | |
396 } | |
397 | |
398 return $self->{_analysis}; | |
399 } | |
400 | |
401 =head2 validate | |
402 | |
403 Title : validate | |
404 Usage : $sf->validate; | |
405 Function: Checks whether all the data is present in the | |
406 object. | |
407 Example : | |
408 Returns : | |
409 Args : | |
410 | |
411 | |
412 =cut | |
413 | |
414 sub validate { | |
415 my ($self) = @_; | |
416 | |
417 $self->vthrow("Seqname not defined in feature") unless defined($self->seqname); | |
418 $self->vthrow("start not defined in feature") unless defined($self->start); | |
419 $self->vthrow("end not defined in feature") unless defined($self->end); | |
420 $self->vthrow("strand not defined in feature") unless defined($self->strand); | |
421 $self->vthrow("score not defined in feature") unless defined($self->score); | |
422 $self->vthrow("analysis not defined in feature") unless defined($self->analysis); | |
423 | |
424 if ($self->end < $self->start) { | |
425 $self->vthrow("End coordinate < start coordinate"); | |
426 } | |
427 | |
428 } | |
429 | |
430 | |
431 | |
432 sub vthrow { | |
433 my ($self,$message) = @_; | |
434 | |
435 print(STDERR "Error validating feature [$message]\n"); | |
436 print(STDERR " Seqname : [" . $self->{_seqname} . "]\n"); | |
437 print(STDERR " Start : [" . $self->{_gsf_start} . "]\n"); | |
438 print(STDERR " End : [" . $self->{_gsf_end} . "]\n"); | |
439 print(STDERR " Strand : [" . | |
440 ((defined ($self->{_gsf_strand})) ? $self->{_gsf_strand} : "undefined") . "]\n"); | |
441 | |
442 print(STDERR " Score : [" . $self->{_gsf_score} . "]\n"); | |
443 | |
444 print(STDERR " Analysis : [" . $self->{_analysis}->dbID . "]\n"); | |
445 | |
446 $self->throw("Invalid feature - see dump on STDERR"); | |
447 } | |
448 | |
449 | |
450 =head2 validate_prot_feature | |
451 | |
452 Title : validate_prot_feature | |
453 Usage : | |
454 Function: | |
455 Example : | |
456 Returns : | |
457 Args : | |
458 | |
459 | |
460 =cut | |
461 | |
462 # Shouldn't this go as "validate" into Pro_SeqFeature? | |
463 sub validate_prot_feature{ | |
464 my ($self,$num) = @_; | |
465 $self->throw("Seqname not defined in feature") unless defined($self->seqname); | |
466 $self->throw("start not defined in feature") unless defined($self->start); | |
467 $self->throw("end not defined in feature") unless defined($self->end); | |
468 if ($num == 1) { | |
469 $self->throw("score not defined in feature") unless defined($self->score); | |
470 $self->throw("percent_id not defined in feature") unless defined($self->percent_id); | |
471 $self->throw("evalue not defined in feature") unless defined($self->p_value); | |
472 } | |
473 $self->throw("analysis not defined in feature") unless defined($self->analysis); | |
474 } | |
475 | |
476 | |
477 | |
478 # These methods are specified in the SeqFeatureI interface but we don't want | |
479 # people to store data in them. These are just here in order to keep | |
480 # existing code working | |
481 | |
482 | |
483 =head2 has_tag | |
484 | |
485 Title : has_tag | |
486 Usage : $value = $self->has_tag('some_tag') | |
487 Function: Returns the value of the tag (undef if | |
488 none) | |
489 Returns : | |
490 Args : | |
491 | |
492 | |
493 =cut | |
494 | |
495 sub has_tag{ | |
496 my ($self,$tag) = (shift, shift); | |
497 | |
498 return exists $self->{'_gsf_tag_hash'}->{$tag}; | |
499 } | |
500 | |
501 =head2 add_tag_value | |
502 | |
503 Title : add_tag_value | |
504 Usage : $self->add_tag_value('note',"this is a note"); | |
505 Returns : nothing | |
506 Args : tag (string) and value (any scalar) | |
507 | |
508 | |
509 =cut | |
510 | |
511 sub add_tag_value{ | |
512 my ($self,$tag,$value) = @_; | |
513 | |
514 if( !defined $self->{'_gsf_tag_hash'}->{$tag} ) { | |
515 $self->{'_gsf_tag_hash'}->{$tag} = []; | |
516 } | |
517 | |
518 push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value); | |
519 } | |
520 | |
521 =head2 each_tag_value | |
522 | |
523 Title : each_tag_value | |
524 Usage : | |
525 Function: | |
526 Example : | |
527 Returns : | |
528 Args : | |
529 | |
530 | |
531 =cut | |
532 | |
533 sub each_tag_value { | |
534 my ($self,$tag) = @_; | |
535 if( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) { | |
536 $self->throw("asking for tag value that does not exist $tag"); | |
537 } | |
538 | |
539 return @{$self->{'_gsf_tag_hash'}->{$tag}}; | |
540 } | |
541 | |
542 | |
543 =head2 all_tags | |
544 | |
545 Title : all_tags | |
546 Usage : @tags = $feat->all_tags() | |
547 Function: gives all tags for this feature | |
548 Returns : an array of strings | |
549 Args : none | |
550 | |
551 | |
552 =cut | |
553 | |
554 sub all_tags{ | |
555 my ($self,@args) = @_; | |
556 | |
557 return keys %{$self->{'_gsf_tag_hash'}}; | |
558 } | |
559 | |
560 | |
561 | |
562 =head2 seqname | |
563 | |
564 Arg [1] : string $seqname | |
565 Example : $seqname = $self->seqname(); | |
566 Description: Obtains the seqname of this features sequence. This is set | |
567 automatically when a sequence with a name is attached, or may | |
568 be set manually. | |
569 Returntype : string | |
570 Exceptions : none | |
571 Caller : general, attach_seq | |
572 | |
573 =cut | |
574 | |
575 sub seqname{ | |
576 my ($self,$seqname) = @_; | |
577 | |
578 my $seq = $self->contig(); | |
579 | |
580 if(defined $seqname) { | |
581 $self->{_seqname} = $seqname; | |
582 } else { | |
583 if($seq && ref $seq && $seq->can('name')) { | |
584 $self->{_seqname} = $seq->name(); | |
585 } | |
586 } | |
587 | |
588 return $self->{_seqname}; | |
589 } | |
590 | |
591 | |
592 =head2 attach_seq | |
593 | |
594 Title : attach_seq | |
595 Usage : $sf->attach_seq($seq) | |
596 Function: Attaches a Bio::PrimarySeqI object to this feature. This | |
597 Bio::PrimarySeqI object is for the *entire* sequence: ie | |
598 from 1 to 10000 | |
599 Example : | |
600 Returns : | |
601 Args : | |
602 | |
603 | |
604 =cut | |
605 | |
606 sub attach_seq{ | |
607 my ($self, $seq) = @_; | |
608 | |
609 $self->contig($seq); | |
610 } | |
611 | |
612 =head2 seq | |
613 | |
614 Example : $tseq = $sf->seq() | |
615 Function: returns the sequence (if any ) for this feature truncated to the range spanning the feature | |
616 Returns : a Bio::PrimarySeq object (I reckon) | |
617 | |
618 =cut | |
619 | |
620 sub seq{ | |
621 my ($self,$arg) = @_; | |
622 | |
623 if( defined $arg ) { | |
624 $self->throw("Calling SeqFeature::Generic->seq with an argument. " . | |
625 "You probably want attach_seq"); | |
626 } | |
627 | |
628 if( ! exists $self->{'_gsf_seq'} ) { | |
629 return undef; | |
630 } | |
631 | |
632 # assumming our seq object is sensible, it should not have to yank | |
633 # the entire sequence out here. | |
634 | |
635 my $seq = $self->{'_gsf_seq'}->trunc($self->start(),$self->end()); | |
636 | |
637 | |
638 if( $self->strand == -1 ) { | |
639 | |
640 # ok. this does not work well (?) | |
641 #print STDERR "Before revcom", $seq->str, "\n"; | |
642 $seq = $seq->revcom; | |
643 #print STDERR "After revcom", $seq->str, "\n"; | |
644 } | |
645 | |
646 return $seq; | |
647 } | |
648 | |
649 =head2 entire_seq | |
650 | |
651 Title : entire_seq | |
652 Usage : $whole_seq = $sf->entire_seq() | |
653 Function: gives the entire sequence that this seqfeature is attached to | |
654 Example : | |
655 Returns : | |
656 Args : | |
657 | |
658 | |
659 =cut | |
660 | |
661 sub entire_seq{ | |
662 my ($self) = @_; | |
663 | |
664 return $self->contig; | |
665 } | |
666 | |
667 | |
668 =head2 sub_SeqFeature | |
669 | |
670 Title : sub_SeqFeature | |
671 Usage : @feats = $feat->sub_SeqFeature(); | |
672 Function: Returns an array of sub Sequence Features | |
673 Returns : An array | |
674 Args : none | |
675 | |
676 | |
677 =cut | |
678 | |
679 sub sub_SeqFeature { | |
680 my ($self) = @_; | |
681 | |
682 if ( $self->{'_gsf_sub_array'} ) { | |
683 return @{ $self->{'_gsf_sub_array'} }; | |
684 } else { | |
685 return (); | |
686 } | |
687 } | |
688 | |
689 =head2 add_sub_SeqFeature | |
690 | |
691 Title : add_sub_SeqFeature | |
692 Usage : $feat->add_sub_SeqFeature($subfeat); | |
693 $feat->add_sub_SeqFeature($subfeat,'EXPAND') | |
694 Function: adds a SeqFeature into the subSeqFeature array. | |
695 with no 'EXPAND' qualifer, subfeat will be tested | |
696 as to whether it lies inside the parent, and throw | |
697 an exception if not. | |
698 | |
699 If EXPAND is used, the parents start/end/strand will | |
700 be adjusted so that it grows to accommodate the new | |
701 subFeature | |
702 Returns : nothing | |
703 Args : An object which has the SeqFeatureI interface | |
704 | |
705 | |
706 =cut | |
707 | |
708 sub add_sub_SeqFeature{ | |
709 my ($self,$feat,$expand) = @_; | |
710 | |
711 if( !$feat->isa('Bio::SeqFeatureI') ) { | |
712 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware..."); | |
713 } | |
714 | |
715 if( $expand eq 'EXPAND' ) { | |
716 # if this doesn't have start/end set - forget it! | |
717 if( !defined $self->start && !defined $self->end ) { | |
718 $self->start($feat->start()); | |
719 $self->end($feat->end()); | |
720 $self->strand($feat->strand); | |
721 } else { | |
722 my ($start,$end); | |
723 if( $feat->start < $self->start ) { | |
724 $start = $feat->start; | |
725 } | |
726 | |
727 if( $feat->end > $self->end ) { | |
728 $end = $feat->end; | |
729 } | |
730 | |
731 $self->start($start); | |
732 $self->end($end); | |
733 | |
734 } | |
735 } else { | |
736 if( !defined($feat->start()) || !defined($feat->end()) || | |
737 !defined($self->start()) || !defined($self->end())) { | |
738 $self->throw( "This SeqFeature and the sub_SeqFeature must define". | |
739 " start and end."); | |
740 } | |
741 if($feat->start() > $feat->end() || $self->start() > $self->end()) { | |
742 $self->throw("This SeqFeature and the sub_SeqFeature must have " . | |
743 "start that is less than or equal to end."); | |
744 } | |
745 if($feat->start() < $self->start() || $feat->end() > $self->end() ) { | |
746 $self->throw("$feat is not contained within parent feature, " . | |
747 "and expansion is not valid"); | |
748 } | |
749 } | |
750 | |
751 push(@{$self->{'_gsf_sub_array'}},$feat); | |
752 | |
753 } | |
754 | |
755 =head2 flush_sub_SeqFeature | |
756 | |
757 Title : flush_sub_SeqFeature | |
758 Usage : $sf->flush_sub_SeqFeature | |
759 Function: Removes all sub SeqFeature | |
760 (if you want to remove only a subset, take | |
761 an array of them all, flush them, and add | |
762 back only the guys you want) | |
763 Example : | |
764 Returns : none | |
765 Args : none | |
766 | |
767 | |
768 =cut | |
769 | |
770 sub flush_sub_SeqFeature { | |
771 my ($self) = @_; | |
772 | |
773 $self->{'_gsf_sub_array'} = []; # zap the array implicitly. | |
774 } | |
775 | |
776 | |
777 sub id { | |
778 my ($self,$value) = @_; | |
779 | |
780 if (defined($value)) { | |
781 $self->{_id} = $value; | |
782 } | |
783 | |
784 return $self->{_id}; | |
785 | |
786 } | |
787 | |
788 =head2 percent_id | |
789 | |
790 Title : percent_id | |
791 Usage : $pid = $feat->percent_id() | |
792 $feat->percent_id($pid) | |
793 Function: get/set on percentage identity information | |
794 Returns : float | |
795 Args : none if get, the new value if set | |
796 | |
797 =cut | |
798 | |
799 sub percent_id { | |
800 my ($self,$value) = @_; | |
801 | |
802 if (defined($value)) | |
803 { | |
804 $self->{_percent_id} = $value; | |
805 } | |
806 | |
807 return $self->{_percent_id}; | |
808 } | |
809 | |
810 =head2 p_value | |
811 | |
812 Title : p_value | |
813 Usage : $p_val = $feat->p_value() | |
814 $feat->p_value($p_val) | |
815 Function: get/set on p value information | |
816 Returns : float | |
817 Args : none if get, the new value if set | |
818 | |
819 =cut | |
820 | |
821 sub p_value { | |
822 my ($self,$value) = @_; | |
823 | |
824 if (defined($value)) | |
825 { | |
826 $self->{_p_value} = $value; | |
827 } | |
828 | |
829 return $self->{_p_value}; | |
830 } | |
831 | |
832 =head2 phase | |
833 | |
834 Title : phase | |
835 Usage : $phase = $feat->phase() | |
836 $feat->phase($phase) | |
837 Function: get/set on start phase of predicted exon feature | |
838 Returns : [0,1,2] | |
839 Args : none if get, 0,1 or 2 if set. | |
840 | |
841 =cut | |
842 | |
843 sub phase { | |
844 my ($self, $value) = @_; | |
845 | |
846 if (defined($value) ) | |
847 { | |
848 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2); | |
849 $self->{_phase} = $value; | |
850 } | |
851 | |
852 return $self->{_phase}; | |
853 } | |
854 | |
855 =head2 end_phase | |
856 | |
857 Title : end_phase | |
858 Usage : $end_phase = $feat->end_phase() | |
859 $feat->end_phase($end_phase) | |
860 Function: returns end_phase based on phase and length of feature | |
861 Returns : [0,1,2] | |
862 Args : none if get, 0,1 or 2 if set. | |
863 | |
864 =cut | |
865 | |
866 sub end_phase { | |
867 my ($self, $value) = @_; | |
868 | |
869 if (defined($value)) | |
870 { | |
871 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2); | |
872 $self->{_end_phase} = $value; | |
873 } | |
874 | |
875 return $self->{_end_phase}; | |
876 } | |
877 | |
878 | |
879 sub gffstring { | |
880 my ($self) = @_; | |
881 | |
882 my $str; | |
883 | |
884 my $strand = "+"; | |
885 | |
886 if ((defined $self->strand)&&($self->strand == -1)) { | |
887 $strand = "-"; | |
888 } | |
889 | |
890 $str .= (defined $self->seqname) ? $self->seqname."\t" : "\t"; | |
891 $str .= (defined $self->source_tag) ? $self->source_tag."\t" : "\t"; | |
892 $str .= (defined $self->primary_tag) ? $self->primary_tag."\t" : "\t"; | |
893 $str .= (defined $self->start) ? $self->start."\t" : "\t"; | |
894 $str .= (defined $self->end) ? $self->end."\t" : "\t"; | |
895 $str .= (defined $self->score) ? $self->score."\t" : "\t"; | |
896 $str .= (defined $self->strand) ? $strand."\t" : ".\t"; | |
897 $str .= (defined $self->phase) ? $self->phase."\t" : ".\t"; | |
898 eval{ | |
899 $str .= (defined $self->end_phase) ? $self->end_phase."\t" : ".\t"; | |
900 }; | |
901 | |
902 return $str; | |
903 } | |
904 | |
905 | |
906 =head2 external_db | |
907 | |
908 Title : external_db | |
909 Usage : $pid = $feat->external_db() | |
910 $feat->external_db($dbid) | |
911 Function: get/set for an external db accession number (e.g.: Interpro) | |
912 Returns : | |
913 Args : none if get, the new value if set | |
914 | |
915 =cut | |
916 | |
917 sub external_db { | |
918 my ($self,$value) = @_; | |
919 | |
920 if (defined($value)) | |
921 { | |
922 $self->{'_external_db'} = $value; | |
923 } | |
924 | |
925 return $self->{'_external_db'}; | |
926 } | |
927 | |
928 | |
929 | |
930 =head2 contig | |
931 | |
932 Arg [1] : Bio::PrimarySeqI $seq | |
933 Example : $seq = $self->contig; | |
934 Description: Accessor to attach/retrieve a sequence to/from a feature | |
935 Returntype : Bio::PrimarySeqI | |
936 Exceptions : none | |
937 Caller : general | |
938 | |
939 =cut | |
940 | |
941 sub contig { | |
942 my ($self, $arg) = @_; | |
943 | |
944 if ($arg) { | |
945 unless (defined $arg && ref $arg && $arg->isa("Bio::PrimarySeqI")) { | |
946 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures"); | |
947 } | |
948 | |
949 $self->{'_gsf_seq'} = $arg; | |
950 | |
951 # attach to sub features if they want it | |
952 | |
953 foreach my $sf ($self->sub_SeqFeature) { | |
954 if ($sf->can("attach_seq")) { | |
955 $sf->attach_seq($arg); | |
956 } | |
957 } | |
958 } | |
959 #print STDERR "contig is ".$self->{'_gsf_seq'}." with name ".$self->{'_gsf_seq'}->name."\n" unless(!$self->{'_gsf_seq'}); | |
960 # my ($p, $f, $l) = caller; | |
961 # print STDERR "Caller = ".$f." ".$l."\n"; | |
962 return $self->{'_gsf_seq'}; | |
963 } | |
964 | |
965 | |
966 | |
967 sub is_splittable { | |
968 my ($self, $arg) = @_; | |
969 | |
970 if (defined $arg) { | |
971 $self->{'_is_splittable'} = $arg; | |
972 } | |
973 return $self->{'_is_splittable'}; | |
974 } | |
975 | |
976 | |
977 sub transform { | |
978 my ($self, $slice) = @_; | |
979 | |
980 unless (defined $slice) { | |
981 | |
982 if ((defined $self->contig) && | |
983 ($self->contig->isa("Bio::EnsEMBL::RawContig"))) { | |
984 | |
985 # we are already in rawcontig coords, nothing needs to be done | |
986 return $self; | |
987 | |
988 } | |
989 else { | |
990 # transform to raw_contig coords from Slice coords | |
991 return $self->_transform_to_RawContig(); | |
992 } | |
993 } | |
994 | |
995 if (defined $self->contig) { | |
996 | |
997 if ($self->contig->isa("Bio::EnsEMBL::RawContig")) { | |
998 | |
999 # transform to slice coords from raw contig coords | |
1000 return $self->_transform_to_Slice($slice); | |
1001 } | |
1002 elsif ($self->contig->isa( "Bio::EnsEMBL::Slice" ) or $self->contig->isa( "Bio::EnsEMBL::LRGSlice" )) { | |
1003 | |
1004 # transform to slice coords from other slice coords | |
1005 return $self->_transform_between_Slices($slice); | |
1006 } | |
1007 else { | |
1008 | |
1009 # Unknown contig type | |
1010 $self->throw("Cannot transform unknown contig type @{[$self->contig]}"); | |
1011 } | |
1012 } | |
1013 else { | |
1014 | |
1015 #Can't convert to slice coords without a contig to work with | |
1016 return $self->throw("Object's contig is not defined - cannot transform"); | |
1017 } | |
1018 | |
1019 } | |
1020 | |
1021 | |
1022 sub _transform_to_Slice { | |
1023 my ($self, $slice) = @_; | |
1024 | |
1025 $self->throw("can't transform coordinates of $self without a contig defined") | |
1026 unless $self->contig; | |
1027 | |
1028 unless($self->contig->adaptor) { | |
1029 $self->throw("cannot transform coordinates of $self without adaptor " . | |
1030 "attached to contig"); | |
1031 } | |
1032 | |
1033 my $dbh = $self->contig->adaptor->db; | |
1034 | |
1035 my $mapper = | |
1036 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type); | |
1037 my $rca = $dbh->get_RawContigAdaptor; | |
1038 | |
1039 my @mapped = $mapper->map_coordinates_to_assembly( | |
1040 $self->contig->dbID, | |
1041 $self->start, | |
1042 $self->end, | |
1043 $self->strand | |
1044 ); | |
1045 | |
1046 unless (@mapped) { | |
1047 $self->throw("couldn't map $self to Slice"); | |
1048 } | |
1049 | |
1050 unless (@mapped == 1) { | |
1051 $self->throw("$self should only map to one chromosome - " . | |
1052 "something bad has happened ..."); | |
1053 } | |
1054 | |
1055 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
1056 $self->warn("feature lies on gap\n"); | |
1057 return; | |
1058 } | |
1059 | |
1060 if( ! defined $slice->chr_name() ) { | |
1061 my $slice_adaptor = $slice->adaptor(); | |
1062 %$slice = %{$slice_adaptor->fetch_by_chr_name( $mapped[0]->id() )}; | |
1063 } | |
1064 | |
1065 # mapped coords are on chromosome - need to convert to slice | |
1066 if($slice->strand == 1) { | |
1067 $self->start ($mapped[0]->start - $slice->chr_start + 1); | |
1068 $self->end ($mapped[0]->end - $slice->chr_start + 1); | |
1069 $self->strand ($mapped[0]->strand); | |
1070 } else { | |
1071 $self->start ($slice->chr_end - $mapped[0]->end + 1); | |
1072 $self->end ($slice->chr_end - $mapped[0]->start + 1); | |
1073 $self->strand ($mapped[0]->strand * -1); | |
1074 } | |
1075 | |
1076 $self->seqname($mapped[0]->id); | |
1077 | |
1078 #set the contig to the slice | |
1079 $self->contig($slice); | |
1080 | |
1081 return $self; | |
1082 } | |
1083 | |
1084 | |
1085 sub _transform_between_Slices { | |
1086 my ($self, $to_slice) = @_; | |
1087 | |
1088 my $from_slice = $self->contig; | |
1089 | |
1090 $self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice") | |
1091 unless ($to_slice->isa("Bio::EnsEMBL::Slice") or $to_slice->isa("Bio::EnsEMBL::LRGSlice") ); | |
1092 | |
1093 if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) { | |
1094 $self->warn("Can't transform between chromosomes: $c1 and $c2"); | |
1095 return; | |
1096 } | |
1097 | |
1098 my($start, $end, $strand); | |
1099 | |
1100 #first convert to assembly coords | |
1101 if($from_slice->strand == 1) { | |
1102 $start = $from_slice->chr_start + $self->start - 1; | |
1103 $end = $from_slice->chr_start + $self->end - 1; | |
1104 $strand = $self->strand; | |
1105 } else { | |
1106 $start = $from_slice->chr_end - $self->end + 1; | |
1107 $end = $from_slice->chr_end - $self->start + 1; | |
1108 $strand = $self->strand; | |
1109 } | |
1110 | |
1111 #now convert to the other slice's coords | |
1112 if($to_slice->strand == 1) { | |
1113 $self->start ($start - $to_slice->chr_start + 1); | |
1114 $self->end ($end - $to_slice->chr_start + 1); | |
1115 $self->strand($strand); | |
1116 } else { | |
1117 $self->start ($to_slice->chr_end - $end + 1); | |
1118 $self->end ($to_slice->chr_end - $start + 1); | |
1119 $self->strand($strand * -1); | |
1120 } | |
1121 | |
1122 $self->contig($to_slice); | |
1123 | |
1124 return $self; | |
1125 } | |
1126 | |
1127 | |
1128 sub _transform_to_RawContig { | |
1129 my($self) = @_; | |
1130 | |
1131 #print STDERR "transforming ".$self." to raw contig coords\n"; | |
1132 $self->throw("can't transform coordinates of $self without a contig defined") | |
1133 unless $self->contig; | |
1134 | |
1135 my $slice = $self->contig; | |
1136 | |
1137 unless($slice->adaptor) { | |
1138 $self->throw("can't transform coordinates of $self without an adaptor " . | |
1139 "attached to the feature's slice"); | |
1140 } | |
1141 | |
1142 my $dbh = $slice->adaptor->db; | |
1143 | |
1144 my $mapper = | |
1145 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type); | |
1146 my $rca = $dbh->get_RawContigAdaptor; | |
1147 | |
1148 #first convert the features coordinates to assembly coordinates | |
1149 my($start, $end, $strand); | |
1150 if($slice->strand == 1) { | |
1151 $start = $slice->chr_start + $self->start - 1; | |
1152 $end = $slice->chr_start + $self->end - 1; | |
1153 $strand = $self->strand; | |
1154 } else { | |
1155 $start = $slice->chr_end - $self->end + 1; | |
1156 $end = $slice->chr_end - $self->start + 1; | |
1157 $strand = $self->strand * -1; | |
1158 } | |
1159 | |
1160 #convert the assembly coordinates to RawContig coordinates | |
1161 my @mapped = $mapper->map_coordinates_to_rawcontig( | |
1162 $slice->chr_name, | |
1163 $start, | |
1164 $end, | |
1165 $strand | |
1166 ); | |
1167 | |
1168 unless (@mapped) { | |
1169 $self->throw("couldn't map $self"); | |
1170 return $self; | |
1171 } | |
1172 | |
1173 if (@mapped == 1) { | |
1174 | |
1175 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
1176 $self->warn("feature lies on gap\n"); | |
1177 return; | |
1178 } | |
1179 | |
1180 my $rc = $rca->fetch_by_dbID($mapped[0]->id); | |
1181 | |
1182 $self->start ($mapped[0]->start); | |
1183 $self->end ($mapped[0]->end); | |
1184 $self->strand ($mapped[0]->strand); | |
1185 $self->seqname ($mapped[0]->id); | |
1186 #print STDERR "setting contig to be ".$mapped[0]->id."\n"; | |
1187 $self->contig($rca->fetch_by_dbID($mapped[0]->id)); | |
1188 | |
1189 return $self; | |
1190 } | |
1191 else { | |
1192 | |
1193 # more than one object returned from mapper | |
1194 # possibly more than one RawContig in region | |
1195 | |
1196 my (@gaps, @coords); | |
1197 | |
1198 foreach my $m (@mapped) { | |
1199 | |
1200 if ($m->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
1201 push @gaps, $m; | |
1202 } | |
1203 elsif ($m->isa("Bio::EnsEMBL::Mapper::Coordinate")) { | |
1204 push @coords, $m; | |
1205 } | |
1206 } | |
1207 | |
1208 # case where only one RawContig maps | |
1209 if (@coords == 1) { | |
1210 | |
1211 $self->start ($coords[0]->start); | |
1212 $self->end ($coords[0]->end); | |
1213 $self->strand ($coords[0]->strand); | |
1214 $self->seqname($coords[0]->id); | |
1215 #print STDERR "2 setting contig to be ".$coords[0]->id."\n"; | |
1216 $self->contig ($rca->fetch_by_dbID($coords[0]->id)); | |
1217 | |
1218 $self->warn("Feature [$self] truncated as lies partially on a gap"); | |
1219 return $self; | |
1220 } | |
1221 | |
1222 unless ($self->is_splittable) { | |
1223 $self->warn("Feature spans >1 raw contig - can't split\n"); | |
1224 return; | |
1225 } | |
1226 | |
1227 my @out; | |
1228 my $obj = ref $self; | |
1229 | |
1230 SPLIT: foreach my $map (@mapped) { | |
1231 | |
1232 if ($map->isa("Bio::EnsEMBL::Mapper::Gap")) { | |
1233 $self->warn("piece of evidence lies on gap\n"); | |
1234 next SPLIT; | |
1235 } | |
1236 | |
1237 my $feat = $obj->new; | |
1238 | |
1239 $feat->start ($map->start); | |
1240 $feat->end ($map->end); | |
1241 $feat->strand ($map->strand); | |
1242 #print STDERR "3 setting contig to be ".$mapped[0]->id."\n"; | |
1243 $feat->contig ($rca->fetch_by_dbID($map->id)); | |
1244 $feat->adaptor($self->adaptor) if $self->adaptor(); | |
1245 $feat->display_label($self->display_label) if($self->can('display_label')); | |
1246 $feat->analysis($self->analysis); | |
1247 push @out, $feat; | |
1248 } | |
1249 | |
1250 return @out; | |
1251 } | |
1252 } | |
1253 | |
1254 | |
1255 1; |