comparison variant_effect_predictor/Bio/EnsEMBL/SeqFeature.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::SeqFeature - Ensembl specific sequence feature.
24
25 =head1 DESCRIPTION
26
27 Do not use this module if you can avoid it. It has been replaced by
28 Bio::EnsEMBL::Feature. This module has a long history of usage but has
29 become very bloated, and quite unweildy. It was decided to replace
30 it completely with a smaller, light-weight feature class rather than
31 attempting to refactor this class, and maintain strict backwards
32 compatibility.
33
34 Part of the complexity of this class was in its extensive
35 inheritance. As an example the following is a simplified inheritance
36 heirarchy that was present for Bio::EnsEMBL::DnaAlignFeature:
37
38 Bio::EnsEMBL::DnaAlignFeature
39 Bio::EnsEMBL::BaseAlignFeature
40 Bio::EnsEMBL::FeaturePair
41 Bio::EnsEMBL::SeqFeature
42 Bio::EnsEMBL::SeqFeatureI
43 Bio::SeqFeatureI
44 Bio::RangeI
45 Bio::Root::RootI
46
47 The new Bio::EnsEMBL::Feature class is much shorter, and hopefully much
48 easier to understand and maintain.
49
50 =head1 METHODS
51
52 =cut
53
54
55 # Let the code begin...
56
57
58 package Bio::EnsEMBL::SeqFeature;
59
60 use vars qw(@ISA);
61 use strict;
62
63
64 use Bio::EnsEMBL::SeqFeatureI;
65 use Bio::EnsEMBL::Analysis;
66 use Bio::EnsEMBL::Root;
67
68 @ISA = qw(Bio::EnsEMBL::Root Bio::EnsEMBL::SeqFeatureI);
69
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
71
72 sub new {
73 my($caller,@args) = @_;
74
75 my $self = {};
76
77 if(ref $caller) {
78 bless $self, ref $caller;
79 } else {
80 bless $self, $caller;
81 }
82
83 $self->{'_gsf_tag_hash'} = {};
84 $self->{'_gsf_sub_array'} = [];
85 $self->{'_parse_h'} = {};
86 $self->{'_is_splittable'} = 0;
87
88 my ($start,$end,$strand,$frame,$score,$analysis,$seqname, $source_tag,
89 $primary_tag, $percent_id, $p_value, $phase, $end_phase) =
90
91 &rearrange([qw(START
92 END
93 STRAND
94 FRAME
95 SCORE
96 ANALYSIS
97 SEQNAME
98 SOURCE_TAG
99 PRIMARY_TAG
100 PERCENT_ID
101 P_VALUE
102 PHASE
103 END_PHASE
104 )],@args);
105
106 # $gff_string && $self->_from_gff_string($gff_string);
107
108 if ( defined $analysis && $analysis ne "") { $self->analysis($analysis)};
109 if ( defined ($start) && $start ne "" ) { $self->start($start)};
110 if ( defined ($end ) && $end ne "" ) { $self->end($end)}
111 if ( defined $strand && $strand ne "") { $self->strand($strand)}
112 if ( defined $frame && $frame ne "") { $self->frame($frame)}
113 if ( defined $score && $score ne "") { $self->score($score)}
114 if ( defined $seqname && $seqname ne "") { $self->seqname($seqname)};
115 if ( defined $percent_id && $percent_id ne ""){ $self->percent_id($percent_id)};
116 if ( defined $p_value && $p_value ne "") { $self->p_value($p_value)};
117 if ( defined $phase && $phase ne "") { $self->phase($phase)};
118 if ( defined $end_phase && $end_phase ne "") { $self->end_phase($end_phase)};
119
120 return $self; # success - we hope!
121
122 }
123
124
125
126
127
128
129 =head2 start
130
131 Title : start
132 Usage : $start = $feat->start
133 $feat->start(20)
134 Function: Get/set on the start coordinate of the feature
135 Returns : integer
136 Args : none
137
138
139 =cut
140
141 sub start{
142 my ($self,$value) = @_;
143
144 if (defined($value)) {
145 if ($value !~ /^\-?\d+/ ) {
146 $self->throw("$value is not a valid start");
147 }
148
149 $self->{'_gsf_start'} = $value
150 }
151
152 return $self->{'_gsf_start'};
153
154 }
155
156 =head2 end
157
158 Title : end
159 Usage : $end = $feat->end
160 $feat->end($end)
161 Function: get/set on the end coordinate of the feature
162 Returns : integer
163 Args : none
164
165
166 =cut
167
168 sub end{
169 my ($self,$value) = @_;
170
171 if (defined($value)) {
172 if( $value !~ /^\-?\d+/ ) {
173 $self->throw("[$value] is not a valid end");
174 }
175
176 $self->{'_gsf_end'} = $value;
177 }
178
179 return $self->{'_gsf_end'};
180 }
181
182 =head2 length
183
184 Title : length
185 Usage :
186 Function:
187 Example :
188 Returns :
189 Args :
190
191
192 =cut
193
194 sub length{
195 my ($self,@args) = @_;
196
197 return $self->end - $self->start +1;
198 }
199
200
201 =head2 strand
202
203 Title : strand
204 Usage : $strand = $feat->strand()
205 $feat->strand($strand)
206 Function: get/set on strand information, being 1,-1 or 0
207 Returns : -1,1 or 0
208 Args : none
209
210
211 =cut
212
213 sub strand {
214 my ($self,$value) = @_;
215
216 if (defined($value)) {
217 if( $value eq '+' ) { $value = 1; }
218 if( $value eq '-' ) { $value = -1; }
219 if( $value eq '.' ) { $value = 0; }
220
221 if( $value != -1 && $value != 1 && $value != 0 ) {
222 $self->throw("$value is not a valid strand info");
223 }
224 $self->{'_gsf_strand'} = $value;
225 }
226
227 return $self->{'_gsf_strand'};
228 }
229
230
231 =head2 move
232
233 Arg [1] : int $start
234 Arg [2] : int $end
235 Arg [3] : (optional) int $strand
236 Example : $feature->move(100, 200, -1);
237 Description: Moves a feature to a different location. This is faster
238 then calling 3 seperate accesors in a large loop.
239 Returntype : none
240 Exceptions : none
241 Caller : BaseFeatureAdaptor
242
243 =cut
244
245 sub move {
246 my ($self, $start, $end, $strand) = @_;
247
248 $self->{'_gsf_start'} = $start;
249 $self->{'_gsf_end'} = $end;
250 if(defined $strand) {
251 $self->{'_gsf_strand'} = $strand;
252 }
253 }
254
255
256 =head2 score
257
258 Title : score
259 Usage : $score = $feat->score()
260 $feat->score($score)
261 Function: get/set on score information
262 Returns : float
263 Args : none if get, the new value if set
264
265
266 =cut
267
268 sub score {
269 my ($self,$value) = @_;
270
271 if(defined ($value) ) {
272 if( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) {
273 $self->throw("'$value' is not a valid score");
274 }
275 $self->{'_gsf_score'} = $value;
276 }
277
278 return $self->{'_gsf_score'};
279 }
280
281 =head2 frame
282
283 Title : frame
284 Usage : $frame = $feat->frame()
285 $feat->frame($frame)
286 Function: get/set on frame information
287 Returns : 0,1,2
288 Args : none if get, the new value if set
289
290
291 =cut
292
293 sub frame {
294 my ($self,$value) = @_;
295
296 if (defined($value)) {
297 if( $value != 1 && $value != 2 && $value != 3 ) {
298 $self->throw("'$value' is not a valid frame");
299 }
300 $self->{'_gsf_frame'} = $value;
301 }
302
303 return $self->{'_gsf_frame'};
304 }
305
306 =head2 primary_tag
307
308 Title : primary_tag
309 Usage : $tag = $feat->primary_tag()
310 $feat->primary_tag('exon')
311 Function: get/set on the primary tag for a feature,
312 eg 'exon'
313 Returns : a string
314 Args : none
315
316
317 =cut
318
319 sub primary_tag{
320 my ($self,$arg) = @_;
321
322 if (defined($arg)) {
323 # throw warnings about setting primary tag
324 my ($p,$f,$l) = caller;
325 $self->warn("$f:$l setting primary_tag now deprecated." .
326 "Primary tag is delegated to analysis object");
327 }
328
329 unless($self->analysis) {
330 return '';
331 }
332
333 return $self->analysis->gff_feature();
334 }
335
336 =head2 source_tag
337
338 Title : source_tag
339 Usage : $tag = $feat->source_tag()
340 $feat->source_tag('genscan');
341 Function: Returns the source tag for a feature,
342 eg, 'genscan'
343 Returns : a string
344 Args : none
345
346
347 =cut
348
349 sub source_tag{
350 my ($self,$arg) = @_;
351
352 if (defined($arg)) {
353 # throw warnings about setting primary tag
354 my ($p,$f,$l) = caller;
355 $self->warn("$f:$l setting source_tag now deprecated. " .
356 "Source tag is delegated to analysis object");
357 }
358
359 unless($self->analysis) {
360 return "";
361 }
362
363 return $self->analysis->gff_source();
364 }
365
366
367 =head2 analysis
368
369 Title : analysis
370 Usage : $sf->analysis();
371 Function: Store details of the program/database
372 and versions used to create this feature.
373
374 Example :
375 Returns :
376 Args :
377
378
379 =cut
380
381 sub analysis {
382 my ($self,$value) = @_;
383
384 if (defined($value)) {
385 unless(ref($value) && $value->isa('Bio::EnsEMBL::Analysis')) {
386 $self->throw("Analysis is not a Bio::EnsEMBL::Analysis object "
387 . "but a $value object");
388 }
389
390 $self->{_analysis} = $value;
391 } else {
392 #if _analysis is not defined, create a new analysis object
393 unless(defined $self->{_analysis}) {
394 $self->{_analysis} = new Bio::EnsEMBL::Analysis();
395 }
396 }
397
398 return $self->{_analysis};
399 }
400
401 =head2 validate
402
403 Title : validate
404 Usage : $sf->validate;
405 Function: Checks whether all the data is present in the
406 object.
407 Example :
408 Returns :
409 Args :
410
411
412 =cut
413
414 sub validate {
415 my ($self) = @_;
416
417 $self->vthrow("Seqname not defined in feature") unless defined($self->seqname);
418 $self->vthrow("start not defined in feature") unless defined($self->start);
419 $self->vthrow("end not defined in feature") unless defined($self->end);
420 $self->vthrow("strand not defined in feature") unless defined($self->strand);
421 $self->vthrow("score not defined in feature") unless defined($self->score);
422 $self->vthrow("analysis not defined in feature") unless defined($self->analysis);
423
424 if ($self->end < $self->start) {
425 $self->vthrow("End coordinate < start coordinate");
426 }
427
428 }
429
430
431
432 sub vthrow {
433 my ($self,$message) = @_;
434
435 print(STDERR "Error validating feature [$message]\n");
436 print(STDERR " Seqname : [" . $self->{_seqname} . "]\n");
437 print(STDERR " Start : [" . $self->{_gsf_start} . "]\n");
438 print(STDERR " End : [" . $self->{_gsf_end} . "]\n");
439 print(STDERR " Strand : [" .
440 ((defined ($self->{_gsf_strand})) ? $self->{_gsf_strand} : "undefined") . "]\n");
441
442 print(STDERR " Score : [" . $self->{_gsf_score} . "]\n");
443
444 print(STDERR " Analysis : [" . $self->{_analysis}->dbID . "]\n");
445
446 $self->throw("Invalid feature - see dump on STDERR");
447 }
448
449
450 =head2 validate_prot_feature
451
452 Title : validate_prot_feature
453 Usage :
454 Function:
455 Example :
456 Returns :
457 Args :
458
459
460 =cut
461
462 # Shouldn't this go as "validate" into Pro_SeqFeature?
463 sub validate_prot_feature{
464 my ($self,$num) = @_;
465 $self->throw("Seqname not defined in feature") unless defined($self->seqname);
466 $self->throw("start not defined in feature") unless defined($self->start);
467 $self->throw("end not defined in feature") unless defined($self->end);
468 if ($num == 1) {
469 $self->throw("score not defined in feature") unless defined($self->score);
470 $self->throw("percent_id not defined in feature") unless defined($self->percent_id);
471 $self->throw("evalue not defined in feature") unless defined($self->p_value);
472 }
473 $self->throw("analysis not defined in feature") unless defined($self->analysis);
474 }
475
476
477
478 # These methods are specified in the SeqFeatureI interface but we don't want
479 # people to store data in them. These are just here in order to keep
480 # existing code working
481
482
483 =head2 has_tag
484
485 Title : has_tag
486 Usage : $value = $self->has_tag('some_tag')
487 Function: Returns the value of the tag (undef if
488 none)
489 Returns :
490 Args :
491
492
493 =cut
494
495 sub has_tag{
496 my ($self,$tag) = (shift, shift);
497
498 return exists $self->{'_gsf_tag_hash'}->{$tag};
499 }
500
501 =head2 add_tag_value
502
503 Title : add_tag_value
504 Usage : $self->add_tag_value('note',"this is a note");
505 Returns : nothing
506 Args : tag (string) and value (any scalar)
507
508
509 =cut
510
511 sub add_tag_value{
512 my ($self,$tag,$value) = @_;
513
514 if( !defined $self->{'_gsf_tag_hash'}->{$tag} ) {
515 $self->{'_gsf_tag_hash'}->{$tag} = [];
516 }
517
518 push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value);
519 }
520
521 =head2 each_tag_value
522
523 Title : each_tag_value
524 Usage :
525 Function:
526 Example :
527 Returns :
528 Args :
529
530
531 =cut
532
533 sub each_tag_value {
534 my ($self,$tag) = @_;
535 if( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
536 $self->throw("asking for tag value that does not exist $tag");
537 }
538
539 return @{$self->{'_gsf_tag_hash'}->{$tag}};
540 }
541
542
543 =head2 all_tags
544
545 Title : all_tags
546 Usage : @tags = $feat->all_tags()
547 Function: gives all tags for this feature
548 Returns : an array of strings
549 Args : none
550
551
552 =cut
553
554 sub all_tags{
555 my ($self,@args) = @_;
556
557 return keys %{$self->{'_gsf_tag_hash'}};
558 }
559
560
561
562 =head2 seqname
563
564 Arg [1] : string $seqname
565 Example : $seqname = $self->seqname();
566 Description: Obtains the seqname of this features sequence. This is set
567 automatically when a sequence with a name is attached, or may
568 be set manually.
569 Returntype : string
570 Exceptions : none
571 Caller : general, attach_seq
572
573 =cut
574
575 sub seqname{
576 my ($self,$seqname) = @_;
577
578 my $seq = $self->contig();
579
580 if(defined $seqname) {
581 $self->{_seqname} = $seqname;
582 } else {
583 if($seq && ref $seq && $seq->can('name')) {
584 $self->{_seqname} = $seq->name();
585 }
586 }
587
588 return $self->{_seqname};
589 }
590
591
592 =head2 attach_seq
593
594 Title : attach_seq
595 Usage : $sf->attach_seq($seq)
596 Function: Attaches a Bio::PrimarySeqI object to this feature. This
597 Bio::PrimarySeqI object is for the *entire* sequence: ie
598 from 1 to 10000
599 Example :
600 Returns :
601 Args :
602
603
604 =cut
605
606 sub attach_seq{
607 my ($self, $seq) = @_;
608
609 $self->contig($seq);
610 }
611
612 =head2 seq
613
614 Example : $tseq = $sf->seq()
615 Function: returns the sequence (if any ) for this feature truncated to the range spanning the feature
616 Returns : a Bio::PrimarySeq object (I reckon)
617
618 =cut
619
620 sub seq{
621 my ($self,$arg) = @_;
622
623 if( defined $arg ) {
624 $self->throw("Calling SeqFeature::Generic->seq with an argument. " .
625 "You probably want attach_seq");
626 }
627
628 if( ! exists $self->{'_gsf_seq'} ) {
629 return undef;
630 }
631
632 # assumming our seq object is sensible, it should not have to yank
633 # the entire sequence out here.
634
635 my $seq = $self->{'_gsf_seq'}->trunc($self->start(),$self->end());
636
637
638 if( $self->strand == -1 ) {
639
640 # ok. this does not work well (?)
641 #print STDERR "Before revcom", $seq->str, "\n";
642 $seq = $seq->revcom;
643 #print STDERR "After revcom", $seq->str, "\n";
644 }
645
646 return $seq;
647 }
648
649 =head2 entire_seq
650
651 Title : entire_seq
652 Usage : $whole_seq = $sf->entire_seq()
653 Function: gives the entire sequence that this seqfeature is attached to
654 Example :
655 Returns :
656 Args :
657
658
659 =cut
660
661 sub entire_seq{
662 my ($self) = @_;
663
664 return $self->contig;
665 }
666
667
668 =head2 sub_SeqFeature
669
670 Title : sub_SeqFeature
671 Usage : @feats = $feat->sub_SeqFeature();
672 Function: Returns an array of sub Sequence Features
673 Returns : An array
674 Args : none
675
676
677 =cut
678
679 sub sub_SeqFeature {
680 my ($self) = @_;
681
682 if ( $self->{'_gsf_sub_array'} ) {
683 return @{ $self->{'_gsf_sub_array'} };
684 } else {
685 return ();
686 }
687 }
688
689 =head2 add_sub_SeqFeature
690
691 Title : add_sub_SeqFeature
692 Usage : $feat->add_sub_SeqFeature($subfeat);
693 $feat->add_sub_SeqFeature($subfeat,'EXPAND')
694 Function: adds a SeqFeature into the subSeqFeature array.
695 with no 'EXPAND' qualifer, subfeat will be tested
696 as to whether it lies inside the parent, and throw
697 an exception if not.
698
699 If EXPAND is used, the parents start/end/strand will
700 be adjusted so that it grows to accommodate the new
701 subFeature
702 Returns : nothing
703 Args : An object which has the SeqFeatureI interface
704
705
706 =cut
707
708 sub add_sub_SeqFeature{
709 my ($self,$feat,$expand) = @_;
710
711 if( !$feat->isa('Bio::SeqFeatureI') ) {
712 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
713 }
714
715 if( $expand eq 'EXPAND' ) {
716 # if this doesn't have start/end set - forget it!
717 if( !defined $self->start && !defined $self->end ) {
718 $self->start($feat->start());
719 $self->end($feat->end());
720 $self->strand($feat->strand);
721 } else {
722 my ($start,$end);
723 if( $feat->start < $self->start ) {
724 $start = $feat->start;
725 }
726
727 if( $feat->end > $self->end ) {
728 $end = $feat->end;
729 }
730
731 $self->start($start);
732 $self->end($end);
733
734 }
735 } else {
736 if( !defined($feat->start()) || !defined($feat->end()) ||
737 !defined($self->start()) || !defined($self->end())) {
738 $self->throw( "This SeqFeature and the sub_SeqFeature must define".
739 " start and end.");
740 }
741 if($feat->start() > $feat->end() || $self->start() > $self->end()) {
742 $self->throw("This SeqFeature and the sub_SeqFeature must have " .
743 "start that is less than or equal to end.");
744 }
745 if($feat->start() < $self->start() || $feat->end() > $self->end() ) {
746 $self->throw("$feat is not contained within parent feature, " .
747 "and expansion is not valid");
748 }
749 }
750
751 push(@{$self->{'_gsf_sub_array'}},$feat);
752
753 }
754
755 =head2 flush_sub_SeqFeature
756
757 Title : flush_sub_SeqFeature
758 Usage : $sf->flush_sub_SeqFeature
759 Function: Removes all sub SeqFeature
760 (if you want to remove only a subset, take
761 an array of them all, flush them, and add
762 back only the guys you want)
763 Example :
764 Returns : none
765 Args : none
766
767
768 =cut
769
770 sub flush_sub_SeqFeature {
771 my ($self) = @_;
772
773 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
774 }
775
776
777 sub id {
778 my ($self,$value) = @_;
779
780 if (defined($value)) {
781 $self->{_id} = $value;
782 }
783
784 return $self->{_id};
785
786 }
787
788 =head2 percent_id
789
790 Title : percent_id
791 Usage : $pid = $feat->percent_id()
792 $feat->percent_id($pid)
793 Function: get/set on percentage identity information
794 Returns : float
795 Args : none if get, the new value if set
796
797 =cut
798
799 sub percent_id {
800 my ($self,$value) = @_;
801
802 if (defined($value))
803 {
804 $self->{_percent_id} = $value;
805 }
806
807 return $self->{_percent_id};
808 }
809
810 =head2 p_value
811
812 Title : p_value
813 Usage : $p_val = $feat->p_value()
814 $feat->p_value($p_val)
815 Function: get/set on p value information
816 Returns : float
817 Args : none if get, the new value if set
818
819 =cut
820
821 sub p_value {
822 my ($self,$value) = @_;
823
824 if (defined($value))
825 {
826 $self->{_p_value} = $value;
827 }
828
829 return $self->{_p_value};
830 }
831
832 =head2 phase
833
834 Title : phase
835 Usage : $phase = $feat->phase()
836 $feat->phase($phase)
837 Function: get/set on start phase of predicted exon feature
838 Returns : [0,1,2]
839 Args : none if get, 0,1 or 2 if set.
840
841 =cut
842
843 sub phase {
844 my ($self, $value) = @_;
845
846 if (defined($value) )
847 {
848 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
849 $self->{_phase} = $value;
850 }
851
852 return $self->{_phase};
853 }
854
855 =head2 end_phase
856
857 Title : end_phase
858 Usage : $end_phase = $feat->end_phase()
859 $feat->end_phase($end_phase)
860 Function: returns end_phase based on phase and length of feature
861 Returns : [0,1,2]
862 Args : none if get, 0,1 or 2 if set.
863
864 =cut
865
866 sub end_phase {
867 my ($self, $value) = @_;
868
869 if (defined($value))
870 {
871 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
872 $self->{_end_phase} = $value;
873 }
874
875 return $self->{_end_phase};
876 }
877
878
879 sub gffstring {
880 my ($self) = @_;
881
882 my $str;
883
884 my $strand = "+";
885
886 if ((defined $self->strand)&&($self->strand == -1)) {
887 $strand = "-";
888 }
889
890 $str .= (defined $self->seqname) ? $self->seqname."\t" : "\t";
891 $str .= (defined $self->source_tag) ? $self->source_tag."\t" : "\t";
892 $str .= (defined $self->primary_tag) ? $self->primary_tag."\t" : "\t";
893 $str .= (defined $self->start) ? $self->start."\t" : "\t";
894 $str .= (defined $self->end) ? $self->end."\t" : "\t";
895 $str .= (defined $self->score) ? $self->score."\t" : "\t";
896 $str .= (defined $self->strand) ? $strand."\t" : ".\t";
897 $str .= (defined $self->phase) ? $self->phase."\t" : ".\t";
898 eval{
899 $str .= (defined $self->end_phase) ? $self->end_phase."\t" : ".\t";
900 };
901
902 return $str;
903 }
904
905
906 =head2 external_db
907
908 Title : external_db
909 Usage : $pid = $feat->external_db()
910 $feat->external_db($dbid)
911 Function: get/set for an external db accession number (e.g.: Interpro)
912 Returns :
913 Args : none if get, the new value if set
914
915 =cut
916
917 sub external_db {
918 my ($self,$value) = @_;
919
920 if (defined($value))
921 {
922 $self->{'_external_db'} = $value;
923 }
924
925 return $self->{'_external_db'};
926 }
927
928
929
930 =head2 contig
931
932 Arg [1] : Bio::PrimarySeqI $seq
933 Example : $seq = $self->contig;
934 Description: Accessor to attach/retrieve a sequence to/from a feature
935 Returntype : Bio::PrimarySeqI
936 Exceptions : none
937 Caller : general
938
939 =cut
940
941 sub contig {
942 my ($self, $arg) = @_;
943
944 if ($arg) {
945 unless (defined $arg && ref $arg && $arg->isa("Bio::PrimarySeqI")) {
946 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
947 }
948
949 $self->{'_gsf_seq'} = $arg;
950
951 # attach to sub features if they want it
952
953 foreach my $sf ($self->sub_SeqFeature) {
954 if ($sf->can("attach_seq")) {
955 $sf->attach_seq($arg);
956 }
957 }
958 }
959 #print STDERR "contig is ".$self->{'_gsf_seq'}." with name ".$self->{'_gsf_seq'}->name."\n" unless(!$self->{'_gsf_seq'});
960 # my ($p, $f, $l) = caller;
961 # print STDERR "Caller = ".$f." ".$l."\n";
962 return $self->{'_gsf_seq'};
963 }
964
965
966
967 sub is_splittable {
968 my ($self, $arg) = @_;
969
970 if (defined $arg) {
971 $self->{'_is_splittable'} = $arg;
972 }
973 return $self->{'_is_splittable'};
974 }
975
976
977 sub transform {
978 my ($self, $slice) = @_;
979
980 unless (defined $slice) {
981
982 if ((defined $self->contig) &&
983 ($self->contig->isa("Bio::EnsEMBL::RawContig"))) {
984
985 # we are already in rawcontig coords, nothing needs to be done
986 return $self;
987
988 }
989 else {
990 # transform to raw_contig coords from Slice coords
991 return $self->_transform_to_RawContig();
992 }
993 }
994
995 if (defined $self->contig) {
996
997 if ($self->contig->isa("Bio::EnsEMBL::RawContig")) {
998
999 # transform to slice coords from raw contig coords
1000 return $self->_transform_to_Slice($slice);
1001 }
1002 elsif ($self->contig->isa( "Bio::EnsEMBL::Slice" ) or $self->contig->isa( "Bio::EnsEMBL::LRGSlice" )) {
1003
1004 # transform to slice coords from other slice coords
1005 return $self->_transform_between_Slices($slice);
1006 }
1007 else {
1008
1009 # Unknown contig type
1010 $self->throw("Cannot transform unknown contig type @{[$self->contig]}");
1011 }
1012 }
1013 else {
1014
1015 #Can't convert to slice coords without a contig to work with
1016 return $self->throw("Object's contig is not defined - cannot transform");
1017 }
1018
1019 }
1020
1021
1022 sub _transform_to_Slice {
1023 my ($self, $slice) = @_;
1024
1025 $self->throw("can't transform coordinates of $self without a contig defined")
1026 unless $self->contig;
1027
1028 unless($self->contig->adaptor) {
1029 $self->throw("cannot transform coordinates of $self without adaptor " .
1030 "attached to contig");
1031 }
1032
1033 my $dbh = $self->contig->adaptor->db;
1034
1035 my $mapper =
1036 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
1037 my $rca = $dbh->get_RawContigAdaptor;
1038
1039 my @mapped = $mapper->map_coordinates_to_assembly(
1040 $self->contig->dbID,
1041 $self->start,
1042 $self->end,
1043 $self->strand
1044 );
1045
1046 unless (@mapped) {
1047 $self->throw("couldn't map $self to Slice");
1048 }
1049
1050 unless (@mapped == 1) {
1051 $self->throw("$self should only map to one chromosome - " .
1052 "something bad has happened ...");
1053 }
1054
1055 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
1056 $self->warn("feature lies on gap\n");
1057 return;
1058 }
1059
1060 if( ! defined $slice->chr_name() ) {
1061 my $slice_adaptor = $slice->adaptor();
1062 %$slice = %{$slice_adaptor->fetch_by_chr_name( $mapped[0]->id() )};
1063 }
1064
1065 # mapped coords are on chromosome - need to convert to slice
1066 if($slice->strand == 1) {
1067 $self->start ($mapped[0]->start - $slice->chr_start + 1);
1068 $self->end ($mapped[0]->end - $slice->chr_start + 1);
1069 $self->strand ($mapped[0]->strand);
1070 } else {
1071 $self->start ($slice->chr_end - $mapped[0]->end + 1);
1072 $self->end ($slice->chr_end - $mapped[0]->start + 1);
1073 $self->strand ($mapped[0]->strand * -1);
1074 }
1075
1076 $self->seqname($mapped[0]->id);
1077
1078 #set the contig to the slice
1079 $self->contig($slice);
1080
1081 return $self;
1082 }
1083
1084
1085 sub _transform_between_Slices {
1086 my ($self, $to_slice) = @_;
1087
1088 my $from_slice = $self->contig;
1089
1090 $self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice")
1091 unless ($to_slice->isa("Bio::EnsEMBL::Slice") or $to_slice->isa("Bio::EnsEMBL::LRGSlice") );
1092
1093 if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) {
1094 $self->warn("Can't transform between chromosomes: $c1 and $c2");
1095 return;
1096 }
1097
1098 my($start, $end, $strand);
1099
1100 #first convert to assembly coords
1101 if($from_slice->strand == 1) {
1102 $start = $from_slice->chr_start + $self->start - 1;
1103 $end = $from_slice->chr_start + $self->end - 1;
1104 $strand = $self->strand;
1105 } else {
1106 $start = $from_slice->chr_end - $self->end + 1;
1107 $end = $from_slice->chr_end - $self->start + 1;
1108 $strand = $self->strand;
1109 }
1110
1111 #now convert to the other slice's coords
1112 if($to_slice->strand == 1) {
1113 $self->start ($start - $to_slice->chr_start + 1);
1114 $self->end ($end - $to_slice->chr_start + 1);
1115 $self->strand($strand);
1116 } else {
1117 $self->start ($to_slice->chr_end - $end + 1);
1118 $self->end ($to_slice->chr_end - $start + 1);
1119 $self->strand($strand * -1);
1120 }
1121
1122 $self->contig($to_slice);
1123
1124 return $self;
1125 }
1126
1127
1128 sub _transform_to_RawContig {
1129 my($self) = @_;
1130
1131 #print STDERR "transforming ".$self." to raw contig coords\n";
1132 $self->throw("can't transform coordinates of $self without a contig defined")
1133 unless $self->contig;
1134
1135 my $slice = $self->contig;
1136
1137 unless($slice->adaptor) {
1138 $self->throw("can't transform coordinates of $self without an adaptor " .
1139 "attached to the feature's slice");
1140 }
1141
1142 my $dbh = $slice->adaptor->db;
1143
1144 my $mapper =
1145 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
1146 my $rca = $dbh->get_RawContigAdaptor;
1147
1148 #first convert the features coordinates to assembly coordinates
1149 my($start, $end, $strand);
1150 if($slice->strand == 1) {
1151 $start = $slice->chr_start + $self->start - 1;
1152 $end = $slice->chr_start + $self->end - 1;
1153 $strand = $self->strand;
1154 } else {
1155 $start = $slice->chr_end - $self->end + 1;
1156 $end = $slice->chr_end - $self->start + 1;
1157 $strand = $self->strand * -1;
1158 }
1159
1160 #convert the assembly coordinates to RawContig coordinates
1161 my @mapped = $mapper->map_coordinates_to_rawcontig(
1162 $slice->chr_name,
1163 $start,
1164 $end,
1165 $strand
1166 );
1167
1168 unless (@mapped) {
1169 $self->throw("couldn't map $self");
1170 return $self;
1171 }
1172
1173 if (@mapped == 1) {
1174
1175 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
1176 $self->warn("feature lies on gap\n");
1177 return;
1178 }
1179
1180 my $rc = $rca->fetch_by_dbID($mapped[0]->id);
1181
1182 $self->start ($mapped[0]->start);
1183 $self->end ($mapped[0]->end);
1184 $self->strand ($mapped[0]->strand);
1185 $self->seqname ($mapped[0]->id);
1186 #print STDERR "setting contig to be ".$mapped[0]->id."\n";
1187 $self->contig($rca->fetch_by_dbID($mapped[0]->id));
1188
1189 return $self;
1190 }
1191 else {
1192
1193 # more than one object returned from mapper
1194 # possibly more than one RawContig in region
1195
1196 my (@gaps, @coords);
1197
1198 foreach my $m (@mapped) {
1199
1200 if ($m->isa("Bio::EnsEMBL::Mapper::Gap")) {
1201 push @gaps, $m;
1202 }
1203 elsif ($m->isa("Bio::EnsEMBL::Mapper::Coordinate")) {
1204 push @coords, $m;
1205 }
1206 }
1207
1208 # case where only one RawContig maps
1209 if (@coords == 1) {
1210
1211 $self->start ($coords[0]->start);
1212 $self->end ($coords[0]->end);
1213 $self->strand ($coords[0]->strand);
1214 $self->seqname($coords[0]->id);
1215 #print STDERR "2 setting contig to be ".$coords[0]->id."\n";
1216 $self->contig ($rca->fetch_by_dbID($coords[0]->id));
1217
1218 $self->warn("Feature [$self] truncated as lies partially on a gap");
1219 return $self;
1220 }
1221
1222 unless ($self->is_splittable) {
1223 $self->warn("Feature spans >1 raw contig - can't split\n");
1224 return;
1225 }
1226
1227 my @out;
1228 my $obj = ref $self;
1229
1230 SPLIT: foreach my $map (@mapped) {
1231
1232 if ($map->isa("Bio::EnsEMBL::Mapper::Gap")) {
1233 $self->warn("piece of evidence lies on gap\n");
1234 next SPLIT;
1235 }
1236
1237 my $feat = $obj->new;
1238
1239 $feat->start ($map->start);
1240 $feat->end ($map->end);
1241 $feat->strand ($map->strand);
1242 #print STDERR "3 setting contig to be ".$mapped[0]->id."\n";
1243 $feat->contig ($rca->fetch_by_dbID($map->id));
1244 $feat->adaptor($self->adaptor) if $self->adaptor();
1245 $feat->display_label($self->display_label) if($self->can('display_label'));
1246 $feat->analysis($self->analysis);
1247 push @out, $feat;
1248 }
1249
1250 return @out;
1251 }
1252 }
1253
1254
1255 1;