0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::SeqFeature - Ensembl specific sequence feature.
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 Do not use this module if you can avoid it. It has been replaced by
|
|
28 Bio::EnsEMBL::Feature. This module has a long history of usage but has
|
|
29 become very bloated, and quite unweildy. It was decided to replace
|
|
30 it completely with a smaller, light-weight feature class rather than
|
|
31 attempting to refactor this class, and maintain strict backwards
|
|
32 compatibility.
|
|
33
|
|
34 Part of the complexity of this class was in its extensive
|
|
35 inheritance. As an example the following is a simplified inheritance
|
|
36 heirarchy that was present for Bio::EnsEMBL::DnaAlignFeature:
|
|
37
|
|
38 Bio::EnsEMBL::DnaAlignFeature
|
|
39 Bio::EnsEMBL::BaseAlignFeature
|
|
40 Bio::EnsEMBL::FeaturePair
|
|
41 Bio::EnsEMBL::SeqFeature
|
|
42 Bio::EnsEMBL::SeqFeatureI
|
|
43 Bio::SeqFeatureI
|
|
44 Bio::RangeI
|
|
45 Bio::Root::RootI
|
|
46
|
|
47 The new Bio::EnsEMBL::Feature class is much shorter, and hopefully much
|
|
48 easier to understand and maintain.
|
|
49
|
|
50 =head1 METHODS
|
|
51
|
|
52 =cut
|
|
53
|
|
54
|
|
55 # Let the code begin...
|
|
56
|
|
57
|
|
58 package Bio::EnsEMBL::SeqFeature;
|
|
59
|
|
60 use vars qw(@ISA);
|
|
61 use strict;
|
|
62
|
|
63
|
|
64 use Bio::EnsEMBL::SeqFeatureI;
|
|
65 use Bio::EnsEMBL::Analysis;
|
|
66 use Bio::EnsEMBL::Root;
|
|
67
|
|
68 @ISA = qw(Bio::EnsEMBL::Root Bio::EnsEMBL::SeqFeatureI);
|
|
69
|
|
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
71
|
|
72 sub new {
|
|
73 my($caller,@args) = @_;
|
|
74
|
|
75 my $self = {};
|
|
76
|
|
77 if(ref $caller) {
|
|
78 bless $self, ref $caller;
|
|
79 } else {
|
|
80 bless $self, $caller;
|
|
81 }
|
|
82
|
|
83 $self->{'_gsf_tag_hash'} = {};
|
|
84 $self->{'_gsf_sub_array'} = [];
|
|
85 $self->{'_parse_h'} = {};
|
|
86 $self->{'_is_splittable'} = 0;
|
|
87
|
|
88 my ($start,$end,$strand,$frame,$score,$analysis,$seqname, $source_tag,
|
|
89 $primary_tag, $percent_id, $p_value, $phase, $end_phase) =
|
|
90
|
|
91 &rearrange([qw(START
|
|
92 END
|
|
93 STRAND
|
|
94 FRAME
|
|
95 SCORE
|
|
96 ANALYSIS
|
|
97 SEQNAME
|
|
98 SOURCE_TAG
|
|
99 PRIMARY_TAG
|
|
100 PERCENT_ID
|
|
101 P_VALUE
|
|
102 PHASE
|
|
103 END_PHASE
|
|
104 )],@args);
|
|
105
|
|
106 # $gff_string && $self->_from_gff_string($gff_string);
|
|
107
|
|
108 if ( defined $analysis && $analysis ne "") { $self->analysis($analysis)};
|
|
109 if ( defined ($start) && $start ne "" ) { $self->start($start)};
|
|
110 if ( defined ($end ) && $end ne "" ) { $self->end($end)}
|
|
111 if ( defined $strand && $strand ne "") { $self->strand($strand)}
|
|
112 if ( defined $frame && $frame ne "") { $self->frame($frame)}
|
|
113 if ( defined $score && $score ne "") { $self->score($score)}
|
|
114 if ( defined $seqname && $seqname ne "") { $self->seqname($seqname)};
|
|
115 if ( defined $percent_id && $percent_id ne ""){ $self->percent_id($percent_id)};
|
|
116 if ( defined $p_value && $p_value ne "") { $self->p_value($p_value)};
|
|
117 if ( defined $phase && $phase ne "") { $self->phase($phase)};
|
|
118 if ( defined $end_phase && $end_phase ne "") { $self->end_phase($end_phase)};
|
|
119
|
|
120 return $self; # success - we hope!
|
|
121
|
|
122 }
|
|
123
|
|
124
|
|
125
|
|
126
|
|
127
|
|
128
|
|
129 =head2 start
|
|
130
|
|
131 Title : start
|
|
132 Usage : $start = $feat->start
|
|
133 $feat->start(20)
|
|
134 Function: Get/set on the start coordinate of the feature
|
|
135 Returns : integer
|
|
136 Args : none
|
|
137
|
|
138
|
|
139 =cut
|
|
140
|
|
141 sub start{
|
|
142 my ($self,$value) = @_;
|
|
143
|
|
144 if (defined($value)) {
|
|
145 if ($value !~ /^\-?\d+/ ) {
|
|
146 $self->throw("$value is not a valid start");
|
|
147 }
|
|
148
|
|
149 $self->{'_gsf_start'} = $value
|
|
150 }
|
|
151
|
|
152 return $self->{'_gsf_start'};
|
|
153
|
|
154 }
|
|
155
|
|
156 =head2 end
|
|
157
|
|
158 Title : end
|
|
159 Usage : $end = $feat->end
|
|
160 $feat->end($end)
|
|
161 Function: get/set on the end coordinate of the feature
|
|
162 Returns : integer
|
|
163 Args : none
|
|
164
|
|
165
|
|
166 =cut
|
|
167
|
|
168 sub end{
|
|
169 my ($self,$value) = @_;
|
|
170
|
|
171 if (defined($value)) {
|
|
172 if( $value !~ /^\-?\d+/ ) {
|
|
173 $self->throw("[$value] is not a valid end");
|
|
174 }
|
|
175
|
|
176 $self->{'_gsf_end'} = $value;
|
|
177 }
|
|
178
|
|
179 return $self->{'_gsf_end'};
|
|
180 }
|
|
181
|
|
182 =head2 length
|
|
183
|
|
184 Title : length
|
|
185 Usage :
|
|
186 Function:
|
|
187 Example :
|
|
188 Returns :
|
|
189 Args :
|
|
190
|
|
191
|
|
192 =cut
|
|
193
|
|
194 sub length{
|
|
195 my ($self,@args) = @_;
|
|
196
|
|
197 return $self->end - $self->start +1;
|
|
198 }
|
|
199
|
|
200
|
|
201 =head2 strand
|
|
202
|
|
203 Title : strand
|
|
204 Usage : $strand = $feat->strand()
|
|
205 $feat->strand($strand)
|
|
206 Function: get/set on strand information, being 1,-1 or 0
|
|
207 Returns : -1,1 or 0
|
|
208 Args : none
|
|
209
|
|
210
|
|
211 =cut
|
|
212
|
|
213 sub strand {
|
|
214 my ($self,$value) = @_;
|
|
215
|
|
216 if (defined($value)) {
|
|
217 if( $value eq '+' ) { $value = 1; }
|
|
218 if( $value eq '-' ) { $value = -1; }
|
|
219 if( $value eq '.' ) { $value = 0; }
|
|
220
|
|
221 if( $value != -1 && $value != 1 && $value != 0 ) {
|
|
222 $self->throw("$value is not a valid strand info");
|
|
223 }
|
|
224 $self->{'_gsf_strand'} = $value;
|
|
225 }
|
|
226
|
|
227 return $self->{'_gsf_strand'};
|
|
228 }
|
|
229
|
|
230
|
|
231 =head2 move
|
|
232
|
|
233 Arg [1] : int $start
|
|
234 Arg [2] : int $end
|
|
235 Arg [3] : (optional) int $strand
|
|
236 Example : $feature->move(100, 200, -1);
|
|
237 Description: Moves a feature to a different location. This is faster
|
|
238 then calling 3 seperate accesors in a large loop.
|
|
239 Returntype : none
|
|
240 Exceptions : none
|
|
241 Caller : BaseFeatureAdaptor
|
|
242
|
|
243 =cut
|
|
244
|
|
245 sub move {
|
|
246 my ($self, $start, $end, $strand) = @_;
|
|
247
|
|
248 $self->{'_gsf_start'} = $start;
|
|
249 $self->{'_gsf_end'} = $end;
|
|
250 if(defined $strand) {
|
|
251 $self->{'_gsf_strand'} = $strand;
|
|
252 }
|
|
253 }
|
|
254
|
|
255
|
|
256 =head2 score
|
|
257
|
|
258 Title : score
|
|
259 Usage : $score = $feat->score()
|
|
260 $feat->score($score)
|
|
261 Function: get/set on score information
|
|
262 Returns : float
|
|
263 Args : none if get, the new value if set
|
|
264
|
|
265
|
|
266 =cut
|
|
267
|
|
268 sub score {
|
|
269 my ($self,$value) = @_;
|
|
270
|
|
271 if(defined ($value) ) {
|
|
272 if( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) {
|
|
273 $self->throw("'$value' is not a valid score");
|
|
274 }
|
|
275 $self->{'_gsf_score'} = $value;
|
|
276 }
|
|
277
|
|
278 return $self->{'_gsf_score'};
|
|
279 }
|
|
280
|
|
281 =head2 frame
|
|
282
|
|
283 Title : frame
|
|
284 Usage : $frame = $feat->frame()
|
|
285 $feat->frame($frame)
|
|
286 Function: get/set on frame information
|
|
287 Returns : 0,1,2
|
|
288 Args : none if get, the new value if set
|
|
289
|
|
290
|
|
291 =cut
|
|
292
|
|
293 sub frame {
|
|
294 my ($self,$value) = @_;
|
|
295
|
|
296 if (defined($value)) {
|
|
297 if( $value != 1 && $value != 2 && $value != 3 ) {
|
|
298 $self->throw("'$value' is not a valid frame");
|
|
299 }
|
|
300 $self->{'_gsf_frame'} = $value;
|
|
301 }
|
|
302
|
|
303 return $self->{'_gsf_frame'};
|
|
304 }
|
|
305
|
|
306 =head2 primary_tag
|
|
307
|
|
308 Title : primary_tag
|
|
309 Usage : $tag = $feat->primary_tag()
|
|
310 $feat->primary_tag('exon')
|
|
311 Function: get/set on the primary tag for a feature,
|
|
312 eg 'exon'
|
|
313 Returns : a string
|
|
314 Args : none
|
|
315
|
|
316
|
|
317 =cut
|
|
318
|
|
319 sub primary_tag{
|
|
320 my ($self,$arg) = @_;
|
|
321
|
|
322 if (defined($arg)) {
|
|
323 # throw warnings about setting primary tag
|
|
324 my ($p,$f,$l) = caller;
|
|
325 $self->warn("$f:$l setting primary_tag now deprecated." .
|
|
326 "Primary tag is delegated to analysis object");
|
|
327 }
|
|
328
|
|
329 unless($self->analysis) {
|
|
330 return '';
|
|
331 }
|
|
332
|
|
333 return $self->analysis->gff_feature();
|
|
334 }
|
|
335
|
|
336 =head2 source_tag
|
|
337
|
|
338 Title : source_tag
|
|
339 Usage : $tag = $feat->source_tag()
|
|
340 $feat->source_tag('genscan');
|
|
341 Function: Returns the source tag for a feature,
|
|
342 eg, 'genscan'
|
|
343 Returns : a string
|
|
344 Args : none
|
|
345
|
|
346
|
|
347 =cut
|
|
348
|
|
349 sub source_tag{
|
|
350 my ($self,$arg) = @_;
|
|
351
|
|
352 if (defined($arg)) {
|
|
353 # throw warnings about setting primary tag
|
|
354 my ($p,$f,$l) = caller;
|
|
355 $self->warn("$f:$l setting source_tag now deprecated. " .
|
|
356 "Source tag is delegated to analysis object");
|
|
357 }
|
|
358
|
|
359 unless($self->analysis) {
|
|
360 return "";
|
|
361 }
|
|
362
|
|
363 return $self->analysis->gff_source();
|
|
364 }
|
|
365
|
|
366
|
|
367 =head2 analysis
|
|
368
|
|
369 Title : analysis
|
|
370 Usage : $sf->analysis();
|
|
371 Function: Store details of the program/database
|
|
372 and versions used to create this feature.
|
|
373
|
|
374 Example :
|
|
375 Returns :
|
|
376 Args :
|
|
377
|
|
378
|
|
379 =cut
|
|
380
|
|
381 sub analysis {
|
|
382 my ($self,$value) = @_;
|
|
383
|
|
384 if (defined($value)) {
|
|
385 unless(ref($value) && $value->isa('Bio::EnsEMBL::Analysis')) {
|
|
386 $self->throw("Analysis is not a Bio::EnsEMBL::Analysis object "
|
|
387 . "but a $value object");
|
|
388 }
|
|
389
|
|
390 $self->{_analysis} = $value;
|
|
391 } else {
|
|
392 #if _analysis is not defined, create a new analysis object
|
|
393 unless(defined $self->{_analysis}) {
|
|
394 $self->{_analysis} = new Bio::EnsEMBL::Analysis();
|
|
395 }
|
|
396 }
|
|
397
|
|
398 return $self->{_analysis};
|
|
399 }
|
|
400
|
|
401 =head2 validate
|
|
402
|
|
403 Title : validate
|
|
404 Usage : $sf->validate;
|
|
405 Function: Checks whether all the data is present in the
|
|
406 object.
|
|
407 Example :
|
|
408 Returns :
|
|
409 Args :
|
|
410
|
|
411
|
|
412 =cut
|
|
413
|
|
414 sub validate {
|
|
415 my ($self) = @_;
|
|
416
|
|
417 $self->vthrow("Seqname not defined in feature") unless defined($self->seqname);
|
|
418 $self->vthrow("start not defined in feature") unless defined($self->start);
|
|
419 $self->vthrow("end not defined in feature") unless defined($self->end);
|
|
420 $self->vthrow("strand not defined in feature") unless defined($self->strand);
|
|
421 $self->vthrow("score not defined in feature") unless defined($self->score);
|
|
422 $self->vthrow("analysis not defined in feature") unless defined($self->analysis);
|
|
423
|
|
424 if ($self->end < $self->start) {
|
|
425 $self->vthrow("End coordinate < start coordinate");
|
|
426 }
|
|
427
|
|
428 }
|
|
429
|
|
430
|
|
431
|
|
432 sub vthrow {
|
|
433 my ($self,$message) = @_;
|
|
434
|
|
435 print(STDERR "Error validating feature [$message]\n");
|
|
436 print(STDERR " Seqname : [" . $self->{_seqname} . "]\n");
|
|
437 print(STDERR " Start : [" . $self->{_gsf_start} . "]\n");
|
|
438 print(STDERR " End : [" . $self->{_gsf_end} . "]\n");
|
|
439 print(STDERR " Strand : [" .
|
|
440 ((defined ($self->{_gsf_strand})) ? $self->{_gsf_strand} : "undefined") . "]\n");
|
|
441
|
|
442 print(STDERR " Score : [" . $self->{_gsf_score} . "]\n");
|
|
443
|
|
444 print(STDERR " Analysis : [" . $self->{_analysis}->dbID . "]\n");
|
|
445
|
|
446 $self->throw("Invalid feature - see dump on STDERR");
|
|
447 }
|
|
448
|
|
449
|
|
450 =head2 validate_prot_feature
|
|
451
|
|
452 Title : validate_prot_feature
|
|
453 Usage :
|
|
454 Function:
|
|
455 Example :
|
|
456 Returns :
|
|
457 Args :
|
|
458
|
|
459
|
|
460 =cut
|
|
461
|
|
462 # Shouldn't this go as "validate" into Pro_SeqFeature?
|
|
463 sub validate_prot_feature{
|
|
464 my ($self,$num) = @_;
|
|
465 $self->throw("Seqname not defined in feature") unless defined($self->seqname);
|
|
466 $self->throw("start not defined in feature") unless defined($self->start);
|
|
467 $self->throw("end not defined in feature") unless defined($self->end);
|
|
468 if ($num == 1) {
|
|
469 $self->throw("score not defined in feature") unless defined($self->score);
|
|
470 $self->throw("percent_id not defined in feature") unless defined($self->percent_id);
|
|
471 $self->throw("evalue not defined in feature") unless defined($self->p_value);
|
|
472 }
|
|
473 $self->throw("analysis not defined in feature") unless defined($self->analysis);
|
|
474 }
|
|
475
|
|
476
|
|
477
|
|
478 # These methods are specified in the SeqFeatureI interface but we don't want
|
|
479 # people to store data in them. These are just here in order to keep
|
|
480 # existing code working
|
|
481
|
|
482
|
|
483 =head2 has_tag
|
|
484
|
|
485 Title : has_tag
|
|
486 Usage : $value = $self->has_tag('some_tag')
|
|
487 Function: Returns the value of the tag (undef if
|
|
488 none)
|
|
489 Returns :
|
|
490 Args :
|
|
491
|
|
492
|
|
493 =cut
|
|
494
|
|
495 sub has_tag{
|
|
496 my ($self,$tag) = (shift, shift);
|
|
497
|
|
498 return exists $self->{'_gsf_tag_hash'}->{$tag};
|
|
499 }
|
|
500
|
|
501 =head2 add_tag_value
|
|
502
|
|
503 Title : add_tag_value
|
|
504 Usage : $self->add_tag_value('note',"this is a note");
|
|
505 Returns : nothing
|
|
506 Args : tag (string) and value (any scalar)
|
|
507
|
|
508
|
|
509 =cut
|
|
510
|
|
511 sub add_tag_value{
|
|
512 my ($self,$tag,$value) = @_;
|
|
513
|
|
514 if( !defined $self->{'_gsf_tag_hash'}->{$tag} ) {
|
|
515 $self->{'_gsf_tag_hash'}->{$tag} = [];
|
|
516 }
|
|
517
|
|
518 push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value);
|
|
519 }
|
|
520
|
|
521 =head2 each_tag_value
|
|
522
|
|
523 Title : each_tag_value
|
|
524 Usage :
|
|
525 Function:
|
|
526 Example :
|
|
527 Returns :
|
|
528 Args :
|
|
529
|
|
530
|
|
531 =cut
|
|
532
|
|
533 sub each_tag_value {
|
|
534 my ($self,$tag) = @_;
|
|
535 if( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
|
|
536 $self->throw("asking for tag value that does not exist $tag");
|
|
537 }
|
|
538
|
|
539 return @{$self->{'_gsf_tag_hash'}->{$tag}};
|
|
540 }
|
|
541
|
|
542
|
|
543 =head2 all_tags
|
|
544
|
|
545 Title : all_tags
|
|
546 Usage : @tags = $feat->all_tags()
|
|
547 Function: gives all tags for this feature
|
|
548 Returns : an array of strings
|
|
549 Args : none
|
|
550
|
|
551
|
|
552 =cut
|
|
553
|
|
554 sub all_tags{
|
|
555 my ($self,@args) = @_;
|
|
556
|
|
557 return keys %{$self->{'_gsf_tag_hash'}};
|
|
558 }
|
|
559
|
|
560
|
|
561
|
|
562 =head2 seqname
|
|
563
|
|
564 Arg [1] : string $seqname
|
|
565 Example : $seqname = $self->seqname();
|
|
566 Description: Obtains the seqname of this features sequence. This is set
|
|
567 automatically when a sequence with a name is attached, or may
|
|
568 be set manually.
|
|
569 Returntype : string
|
|
570 Exceptions : none
|
|
571 Caller : general, attach_seq
|
|
572
|
|
573 =cut
|
|
574
|
|
575 sub seqname{
|
|
576 my ($self,$seqname) = @_;
|
|
577
|
|
578 my $seq = $self->contig();
|
|
579
|
|
580 if(defined $seqname) {
|
|
581 $self->{_seqname} = $seqname;
|
|
582 } else {
|
|
583 if($seq && ref $seq && $seq->can('name')) {
|
|
584 $self->{_seqname} = $seq->name();
|
|
585 }
|
|
586 }
|
|
587
|
|
588 return $self->{_seqname};
|
|
589 }
|
|
590
|
|
591
|
|
592 =head2 attach_seq
|
|
593
|
|
594 Title : attach_seq
|
|
595 Usage : $sf->attach_seq($seq)
|
|
596 Function: Attaches a Bio::PrimarySeqI object to this feature. This
|
|
597 Bio::PrimarySeqI object is for the *entire* sequence: ie
|
|
598 from 1 to 10000
|
|
599 Example :
|
|
600 Returns :
|
|
601 Args :
|
|
602
|
|
603
|
|
604 =cut
|
|
605
|
|
606 sub attach_seq{
|
|
607 my ($self, $seq) = @_;
|
|
608
|
|
609 $self->contig($seq);
|
|
610 }
|
|
611
|
|
612 =head2 seq
|
|
613
|
|
614 Example : $tseq = $sf->seq()
|
|
615 Function: returns the sequence (if any ) for this feature truncated to the range spanning the feature
|
|
616 Returns : a Bio::PrimarySeq object (I reckon)
|
|
617
|
|
618 =cut
|
|
619
|
|
620 sub seq{
|
|
621 my ($self,$arg) = @_;
|
|
622
|
|
623 if( defined $arg ) {
|
|
624 $self->throw("Calling SeqFeature::Generic->seq with an argument. " .
|
|
625 "You probably want attach_seq");
|
|
626 }
|
|
627
|
|
628 if( ! exists $self->{'_gsf_seq'} ) {
|
|
629 return undef;
|
|
630 }
|
|
631
|
|
632 # assumming our seq object is sensible, it should not have to yank
|
|
633 # the entire sequence out here.
|
|
634
|
|
635 my $seq = $self->{'_gsf_seq'}->trunc($self->start(),$self->end());
|
|
636
|
|
637
|
|
638 if( $self->strand == -1 ) {
|
|
639
|
|
640 # ok. this does not work well (?)
|
|
641 #print STDERR "Before revcom", $seq->str, "\n";
|
|
642 $seq = $seq->revcom;
|
|
643 #print STDERR "After revcom", $seq->str, "\n";
|
|
644 }
|
|
645
|
|
646 return $seq;
|
|
647 }
|
|
648
|
|
649 =head2 entire_seq
|
|
650
|
|
651 Title : entire_seq
|
|
652 Usage : $whole_seq = $sf->entire_seq()
|
|
653 Function: gives the entire sequence that this seqfeature is attached to
|
|
654 Example :
|
|
655 Returns :
|
|
656 Args :
|
|
657
|
|
658
|
|
659 =cut
|
|
660
|
|
661 sub entire_seq{
|
|
662 my ($self) = @_;
|
|
663
|
|
664 return $self->contig;
|
|
665 }
|
|
666
|
|
667
|
|
668 =head2 sub_SeqFeature
|
|
669
|
|
670 Title : sub_SeqFeature
|
|
671 Usage : @feats = $feat->sub_SeqFeature();
|
|
672 Function: Returns an array of sub Sequence Features
|
|
673 Returns : An array
|
|
674 Args : none
|
|
675
|
|
676
|
|
677 =cut
|
|
678
|
|
679 sub sub_SeqFeature {
|
|
680 my ($self) = @_;
|
|
681
|
|
682 if ( $self->{'_gsf_sub_array'} ) {
|
|
683 return @{ $self->{'_gsf_sub_array'} };
|
|
684 } else {
|
|
685 return ();
|
|
686 }
|
|
687 }
|
|
688
|
|
689 =head2 add_sub_SeqFeature
|
|
690
|
|
691 Title : add_sub_SeqFeature
|
|
692 Usage : $feat->add_sub_SeqFeature($subfeat);
|
|
693 $feat->add_sub_SeqFeature($subfeat,'EXPAND')
|
|
694 Function: adds a SeqFeature into the subSeqFeature array.
|
|
695 with no 'EXPAND' qualifer, subfeat will be tested
|
|
696 as to whether it lies inside the parent, and throw
|
|
697 an exception if not.
|
|
698
|
|
699 If EXPAND is used, the parents start/end/strand will
|
|
700 be adjusted so that it grows to accommodate the new
|
|
701 subFeature
|
|
702 Returns : nothing
|
|
703 Args : An object which has the SeqFeatureI interface
|
|
704
|
|
705
|
|
706 =cut
|
|
707
|
|
708 sub add_sub_SeqFeature{
|
|
709 my ($self,$feat,$expand) = @_;
|
|
710
|
|
711 if( !$feat->isa('Bio::SeqFeatureI') ) {
|
|
712 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
|
|
713 }
|
|
714
|
|
715 if( $expand eq 'EXPAND' ) {
|
|
716 # if this doesn't have start/end set - forget it!
|
|
717 if( !defined $self->start && !defined $self->end ) {
|
|
718 $self->start($feat->start());
|
|
719 $self->end($feat->end());
|
|
720 $self->strand($feat->strand);
|
|
721 } else {
|
|
722 my ($start,$end);
|
|
723 if( $feat->start < $self->start ) {
|
|
724 $start = $feat->start;
|
|
725 }
|
|
726
|
|
727 if( $feat->end > $self->end ) {
|
|
728 $end = $feat->end;
|
|
729 }
|
|
730
|
|
731 $self->start($start);
|
|
732 $self->end($end);
|
|
733
|
|
734 }
|
|
735 } else {
|
|
736 if( !defined($feat->start()) || !defined($feat->end()) ||
|
|
737 !defined($self->start()) || !defined($self->end())) {
|
|
738 $self->throw( "This SeqFeature and the sub_SeqFeature must define".
|
|
739 " start and end.");
|
|
740 }
|
|
741 if($feat->start() > $feat->end() || $self->start() > $self->end()) {
|
|
742 $self->throw("This SeqFeature and the sub_SeqFeature must have " .
|
|
743 "start that is less than or equal to end.");
|
|
744 }
|
|
745 if($feat->start() < $self->start() || $feat->end() > $self->end() ) {
|
|
746 $self->throw("$feat is not contained within parent feature, " .
|
|
747 "and expansion is not valid");
|
|
748 }
|
|
749 }
|
|
750
|
|
751 push(@{$self->{'_gsf_sub_array'}},$feat);
|
|
752
|
|
753 }
|
|
754
|
|
755 =head2 flush_sub_SeqFeature
|
|
756
|
|
757 Title : flush_sub_SeqFeature
|
|
758 Usage : $sf->flush_sub_SeqFeature
|
|
759 Function: Removes all sub SeqFeature
|
|
760 (if you want to remove only a subset, take
|
|
761 an array of them all, flush them, and add
|
|
762 back only the guys you want)
|
|
763 Example :
|
|
764 Returns : none
|
|
765 Args : none
|
|
766
|
|
767
|
|
768 =cut
|
|
769
|
|
770 sub flush_sub_SeqFeature {
|
|
771 my ($self) = @_;
|
|
772
|
|
773 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
|
|
774 }
|
|
775
|
|
776
|
|
777 sub id {
|
|
778 my ($self,$value) = @_;
|
|
779
|
|
780 if (defined($value)) {
|
|
781 $self->{_id} = $value;
|
|
782 }
|
|
783
|
|
784 return $self->{_id};
|
|
785
|
|
786 }
|
|
787
|
|
788 =head2 percent_id
|
|
789
|
|
790 Title : percent_id
|
|
791 Usage : $pid = $feat->percent_id()
|
|
792 $feat->percent_id($pid)
|
|
793 Function: get/set on percentage identity information
|
|
794 Returns : float
|
|
795 Args : none if get, the new value if set
|
|
796
|
|
797 =cut
|
|
798
|
|
799 sub percent_id {
|
|
800 my ($self,$value) = @_;
|
|
801
|
|
802 if (defined($value))
|
|
803 {
|
|
804 $self->{_percent_id} = $value;
|
|
805 }
|
|
806
|
|
807 return $self->{_percent_id};
|
|
808 }
|
|
809
|
|
810 =head2 p_value
|
|
811
|
|
812 Title : p_value
|
|
813 Usage : $p_val = $feat->p_value()
|
|
814 $feat->p_value($p_val)
|
|
815 Function: get/set on p value information
|
|
816 Returns : float
|
|
817 Args : none if get, the new value if set
|
|
818
|
|
819 =cut
|
|
820
|
|
821 sub p_value {
|
|
822 my ($self,$value) = @_;
|
|
823
|
|
824 if (defined($value))
|
|
825 {
|
|
826 $self->{_p_value} = $value;
|
|
827 }
|
|
828
|
|
829 return $self->{_p_value};
|
|
830 }
|
|
831
|
|
832 =head2 phase
|
|
833
|
|
834 Title : phase
|
|
835 Usage : $phase = $feat->phase()
|
|
836 $feat->phase($phase)
|
|
837 Function: get/set on start phase of predicted exon feature
|
|
838 Returns : [0,1,2]
|
|
839 Args : none if get, 0,1 or 2 if set.
|
|
840
|
|
841 =cut
|
|
842
|
|
843 sub phase {
|
|
844 my ($self, $value) = @_;
|
|
845
|
|
846 if (defined($value) )
|
|
847 {
|
|
848 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
|
|
849 $self->{_phase} = $value;
|
|
850 }
|
|
851
|
|
852 return $self->{_phase};
|
|
853 }
|
|
854
|
|
855 =head2 end_phase
|
|
856
|
|
857 Title : end_phase
|
|
858 Usage : $end_phase = $feat->end_phase()
|
|
859 $feat->end_phase($end_phase)
|
|
860 Function: returns end_phase based on phase and length of feature
|
|
861 Returns : [0,1,2]
|
|
862 Args : none if get, 0,1 or 2 if set.
|
|
863
|
|
864 =cut
|
|
865
|
|
866 sub end_phase {
|
|
867 my ($self, $value) = @_;
|
|
868
|
|
869 if (defined($value))
|
|
870 {
|
|
871 $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
|
|
872 $self->{_end_phase} = $value;
|
|
873 }
|
|
874
|
|
875 return $self->{_end_phase};
|
|
876 }
|
|
877
|
|
878
|
|
879 sub gffstring {
|
|
880 my ($self) = @_;
|
|
881
|
|
882 my $str;
|
|
883
|
|
884 my $strand = "+";
|
|
885
|
|
886 if ((defined $self->strand)&&($self->strand == -1)) {
|
|
887 $strand = "-";
|
|
888 }
|
|
889
|
|
890 $str .= (defined $self->seqname) ? $self->seqname."\t" : "\t";
|
|
891 $str .= (defined $self->source_tag) ? $self->source_tag."\t" : "\t";
|
|
892 $str .= (defined $self->primary_tag) ? $self->primary_tag."\t" : "\t";
|
|
893 $str .= (defined $self->start) ? $self->start."\t" : "\t";
|
|
894 $str .= (defined $self->end) ? $self->end."\t" : "\t";
|
|
895 $str .= (defined $self->score) ? $self->score."\t" : "\t";
|
|
896 $str .= (defined $self->strand) ? $strand."\t" : ".\t";
|
|
897 $str .= (defined $self->phase) ? $self->phase."\t" : ".\t";
|
|
898 eval{
|
|
899 $str .= (defined $self->end_phase) ? $self->end_phase."\t" : ".\t";
|
|
900 };
|
|
901
|
|
902 return $str;
|
|
903 }
|
|
904
|
|
905
|
|
906 =head2 external_db
|
|
907
|
|
908 Title : external_db
|
|
909 Usage : $pid = $feat->external_db()
|
|
910 $feat->external_db($dbid)
|
|
911 Function: get/set for an external db accession number (e.g.: Interpro)
|
|
912 Returns :
|
|
913 Args : none if get, the new value if set
|
|
914
|
|
915 =cut
|
|
916
|
|
917 sub external_db {
|
|
918 my ($self,$value) = @_;
|
|
919
|
|
920 if (defined($value))
|
|
921 {
|
|
922 $self->{'_external_db'} = $value;
|
|
923 }
|
|
924
|
|
925 return $self->{'_external_db'};
|
|
926 }
|
|
927
|
|
928
|
|
929
|
|
930 =head2 contig
|
|
931
|
|
932 Arg [1] : Bio::PrimarySeqI $seq
|
|
933 Example : $seq = $self->contig;
|
|
934 Description: Accessor to attach/retrieve a sequence to/from a feature
|
|
935 Returntype : Bio::PrimarySeqI
|
|
936 Exceptions : none
|
|
937 Caller : general
|
|
938
|
|
939 =cut
|
|
940
|
|
941 sub contig {
|
|
942 my ($self, $arg) = @_;
|
|
943
|
|
944 if ($arg) {
|
|
945 unless (defined $arg && ref $arg && $arg->isa("Bio::PrimarySeqI")) {
|
|
946 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
|
|
947 }
|
|
948
|
|
949 $self->{'_gsf_seq'} = $arg;
|
|
950
|
|
951 # attach to sub features if they want it
|
|
952
|
|
953 foreach my $sf ($self->sub_SeqFeature) {
|
|
954 if ($sf->can("attach_seq")) {
|
|
955 $sf->attach_seq($arg);
|
|
956 }
|
|
957 }
|
|
958 }
|
|
959 #print STDERR "contig is ".$self->{'_gsf_seq'}." with name ".$self->{'_gsf_seq'}->name."\n" unless(!$self->{'_gsf_seq'});
|
|
960 # my ($p, $f, $l) = caller;
|
|
961 # print STDERR "Caller = ".$f." ".$l."\n";
|
|
962 return $self->{'_gsf_seq'};
|
|
963 }
|
|
964
|
|
965
|
|
966
|
|
967 sub is_splittable {
|
|
968 my ($self, $arg) = @_;
|
|
969
|
|
970 if (defined $arg) {
|
|
971 $self->{'_is_splittable'} = $arg;
|
|
972 }
|
|
973 return $self->{'_is_splittable'};
|
|
974 }
|
|
975
|
|
976
|
|
977 sub transform {
|
|
978 my ($self, $slice) = @_;
|
|
979
|
|
980 unless (defined $slice) {
|
|
981
|
|
982 if ((defined $self->contig) &&
|
|
983 ($self->contig->isa("Bio::EnsEMBL::RawContig"))) {
|
|
984
|
|
985 # we are already in rawcontig coords, nothing needs to be done
|
|
986 return $self;
|
|
987
|
|
988 }
|
|
989 else {
|
|
990 # transform to raw_contig coords from Slice coords
|
|
991 return $self->_transform_to_RawContig();
|
|
992 }
|
|
993 }
|
|
994
|
|
995 if (defined $self->contig) {
|
|
996
|
|
997 if ($self->contig->isa("Bio::EnsEMBL::RawContig")) {
|
|
998
|
|
999 # transform to slice coords from raw contig coords
|
|
1000 return $self->_transform_to_Slice($slice);
|
|
1001 }
|
|
1002 elsif ($self->contig->isa( "Bio::EnsEMBL::Slice" ) or $self->contig->isa( "Bio::EnsEMBL::LRGSlice" )) {
|
|
1003
|
|
1004 # transform to slice coords from other slice coords
|
|
1005 return $self->_transform_between_Slices($slice);
|
|
1006 }
|
|
1007 else {
|
|
1008
|
|
1009 # Unknown contig type
|
|
1010 $self->throw("Cannot transform unknown contig type @{[$self->contig]}");
|
|
1011 }
|
|
1012 }
|
|
1013 else {
|
|
1014
|
|
1015 #Can't convert to slice coords without a contig to work with
|
|
1016 return $self->throw("Object's contig is not defined - cannot transform");
|
|
1017 }
|
|
1018
|
|
1019 }
|
|
1020
|
|
1021
|
|
1022 sub _transform_to_Slice {
|
|
1023 my ($self, $slice) = @_;
|
|
1024
|
|
1025 $self->throw("can't transform coordinates of $self without a contig defined")
|
|
1026 unless $self->contig;
|
|
1027
|
|
1028 unless($self->contig->adaptor) {
|
|
1029 $self->throw("cannot transform coordinates of $self without adaptor " .
|
|
1030 "attached to contig");
|
|
1031 }
|
|
1032
|
|
1033 my $dbh = $self->contig->adaptor->db;
|
|
1034
|
|
1035 my $mapper =
|
|
1036 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
|
|
1037 my $rca = $dbh->get_RawContigAdaptor;
|
|
1038
|
|
1039 my @mapped = $mapper->map_coordinates_to_assembly(
|
|
1040 $self->contig->dbID,
|
|
1041 $self->start,
|
|
1042 $self->end,
|
|
1043 $self->strand
|
|
1044 );
|
|
1045
|
|
1046 unless (@mapped) {
|
|
1047 $self->throw("couldn't map $self to Slice");
|
|
1048 }
|
|
1049
|
|
1050 unless (@mapped == 1) {
|
|
1051 $self->throw("$self should only map to one chromosome - " .
|
|
1052 "something bad has happened ...");
|
|
1053 }
|
|
1054
|
|
1055 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
|
|
1056 $self->warn("feature lies on gap\n");
|
|
1057 return;
|
|
1058 }
|
|
1059
|
|
1060 if( ! defined $slice->chr_name() ) {
|
|
1061 my $slice_adaptor = $slice->adaptor();
|
|
1062 %$slice = %{$slice_adaptor->fetch_by_chr_name( $mapped[0]->id() )};
|
|
1063 }
|
|
1064
|
|
1065 # mapped coords are on chromosome - need to convert to slice
|
|
1066 if($slice->strand == 1) {
|
|
1067 $self->start ($mapped[0]->start - $slice->chr_start + 1);
|
|
1068 $self->end ($mapped[0]->end - $slice->chr_start + 1);
|
|
1069 $self->strand ($mapped[0]->strand);
|
|
1070 } else {
|
|
1071 $self->start ($slice->chr_end - $mapped[0]->end + 1);
|
|
1072 $self->end ($slice->chr_end - $mapped[0]->start + 1);
|
|
1073 $self->strand ($mapped[0]->strand * -1);
|
|
1074 }
|
|
1075
|
|
1076 $self->seqname($mapped[0]->id);
|
|
1077
|
|
1078 #set the contig to the slice
|
|
1079 $self->contig($slice);
|
|
1080
|
|
1081 return $self;
|
|
1082 }
|
|
1083
|
|
1084
|
|
1085 sub _transform_between_Slices {
|
|
1086 my ($self, $to_slice) = @_;
|
|
1087
|
|
1088 my $from_slice = $self->contig;
|
|
1089
|
|
1090 $self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice")
|
|
1091 unless ($to_slice->isa("Bio::EnsEMBL::Slice") or $to_slice->isa("Bio::EnsEMBL::LRGSlice") );
|
|
1092
|
|
1093 if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) {
|
|
1094 $self->warn("Can't transform between chromosomes: $c1 and $c2");
|
|
1095 return;
|
|
1096 }
|
|
1097
|
|
1098 my($start, $end, $strand);
|
|
1099
|
|
1100 #first convert to assembly coords
|
|
1101 if($from_slice->strand == 1) {
|
|
1102 $start = $from_slice->chr_start + $self->start - 1;
|
|
1103 $end = $from_slice->chr_start + $self->end - 1;
|
|
1104 $strand = $self->strand;
|
|
1105 } else {
|
|
1106 $start = $from_slice->chr_end - $self->end + 1;
|
|
1107 $end = $from_slice->chr_end - $self->start + 1;
|
|
1108 $strand = $self->strand;
|
|
1109 }
|
|
1110
|
|
1111 #now convert to the other slice's coords
|
|
1112 if($to_slice->strand == 1) {
|
|
1113 $self->start ($start - $to_slice->chr_start + 1);
|
|
1114 $self->end ($end - $to_slice->chr_start + 1);
|
|
1115 $self->strand($strand);
|
|
1116 } else {
|
|
1117 $self->start ($to_slice->chr_end - $end + 1);
|
|
1118 $self->end ($to_slice->chr_end - $start + 1);
|
|
1119 $self->strand($strand * -1);
|
|
1120 }
|
|
1121
|
|
1122 $self->contig($to_slice);
|
|
1123
|
|
1124 return $self;
|
|
1125 }
|
|
1126
|
|
1127
|
|
1128 sub _transform_to_RawContig {
|
|
1129 my($self) = @_;
|
|
1130
|
|
1131 #print STDERR "transforming ".$self." to raw contig coords\n";
|
|
1132 $self->throw("can't transform coordinates of $self without a contig defined")
|
|
1133 unless $self->contig;
|
|
1134
|
|
1135 my $slice = $self->contig;
|
|
1136
|
|
1137 unless($slice->adaptor) {
|
|
1138 $self->throw("can't transform coordinates of $self without an adaptor " .
|
|
1139 "attached to the feature's slice");
|
|
1140 }
|
|
1141
|
|
1142 my $dbh = $slice->adaptor->db;
|
|
1143
|
|
1144 my $mapper =
|
|
1145 $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
|
|
1146 my $rca = $dbh->get_RawContigAdaptor;
|
|
1147
|
|
1148 #first convert the features coordinates to assembly coordinates
|
|
1149 my($start, $end, $strand);
|
|
1150 if($slice->strand == 1) {
|
|
1151 $start = $slice->chr_start + $self->start - 1;
|
|
1152 $end = $slice->chr_start + $self->end - 1;
|
|
1153 $strand = $self->strand;
|
|
1154 } else {
|
|
1155 $start = $slice->chr_end - $self->end + 1;
|
|
1156 $end = $slice->chr_end - $self->start + 1;
|
|
1157 $strand = $self->strand * -1;
|
|
1158 }
|
|
1159
|
|
1160 #convert the assembly coordinates to RawContig coordinates
|
|
1161 my @mapped = $mapper->map_coordinates_to_rawcontig(
|
|
1162 $slice->chr_name,
|
|
1163 $start,
|
|
1164 $end,
|
|
1165 $strand
|
|
1166 );
|
|
1167
|
|
1168 unless (@mapped) {
|
|
1169 $self->throw("couldn't map $self");
|
|
1170 return $self;
|
|
1171 }
|
|
1172
|
|
1173 if (@mapped == 1) {
|
|
1174
|
|
1175 if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
|
|
1176 $self->warn("feature lies on gap\n");
|
|
1177 return;
|
|
1178 }
|
|
1179
|
|
1180 my $rc = $rca->fetch_by_dbID($mapped[0]->id);
|
|
1181
|
|
1182 $self->start ($mapped[0]->start);
|
|
1183 $self->end ($mapped[0]->end);
|
|
1184 $self->strand ($mapped[0]->strand);
|
|
1185 $self->seqname ($mapped[0]->id);
|
|
1186 #print STDERR "setting contig to be ".$mapped[0]->id."\n";
|
|
1187 $self->contig($rca->fetch_by_dbID($mapped[0]->id));
|
|
1188
|
|
1189 return $self;
|
|
1190 }
|
|
1191 else {
|
|
1192
|
|
1193 # more than one object returned from mapper
|
|
1194 # possibly more than one RawContig in region
|
|
1195
|
|
1196 my (@gaps, @coords);
|
|
1197
|
|
1198 foreach my $m (@mapped) {
|
|
1199
|
|
1200 if ($m->isa("Bio::EnsEMBL::Mapper::Gap")) {
|
|
1201 push @gaps, $m;
|
|
1202 }
|
|
1203 elsif ($m->isa("Bio::EnsEMBL::Mapper::Coordinate")) {
|
|
1204 push @coords, $m;
|
|
1205 }
|
|
1206 }
|
|
1207
|
|
1208 # case where only one RawContig maps
|
|
1209 if (@coords == 1) {
|
|
1210
|
|
1211 $self->start ($coords[0]->start);
|
|
1212 $self->end ($coords[0]->end);
|
|
1213 $self->strand ($coords[0]->strand);
|
|
1214 $self->seqname($coords[0]->id);
|
|
1215 #print STDERR "2 setting contig to be ".$coords[0]->id."\n";
|
|
1216 $self->contig ($rca->fetch_by_dbID($coords[0]->id));
|
|
1217
|
|
1218 $self->warn("Feature [$self] truncated as lies partially on a gap");
|
|
1219 return $self;
|
|
1220 }
|
|
1221
|
|
1222 unless ($self->is_splittable) {
|
|
1223 $self->warn("Feature spans >1 raw contig - can't split\n");
|
|
1224 return;
|
|
1225 }
|
|
1226
|
|
1227 my @out;
|
|
1228 my $obj = ref $self;
|
|
1229
|
|
1230 SPLIT: foreach my $map (@mapped) {
|
|
1231
|
|
1232 if ($map->isa("Bio::EnsEMBL::Mapper::Gap")) {
|
|
1233 $self->warn("piece of evidence lies on gap\n");
|
|
1234 next SPLIT;
|
|
1235 }
|
|
1236
|
|
1237 my $feat = $obj->new;
|
|
1238
|
|
1239 $feat->start ($map->start);
|
|
1240 $feat->end ($map->end);
|
|
1241 $feat->strand ($map->strand);
|
|
1242 #print STDERR "3 setting contig to be ".$mapped[0]->id."\n";
|
|
1243 $feat->contig ($rca->fetch_by_dbID($map->id));
|
|
1244 $feat->adaptor($self->adaptor) if $self->adaptor();
|
|
1245 $feat->display_label($self->display_label) if($self->can('display_label'));
|
|
1246 $feat->analysis($self->analysis);
|
|
1247 push @out, $feat;
|
|
1248 }
|
|
1249
|
|
1250 return @out;
|
|
1251 }
|
|
1252 }
|
|
1253
|
|
1254
|
|
1255 1;
|