comparison variant_effect_predictor/Bio/EnsEMBL/Slice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::Slice - Arbitary Slice of a genome
24
25 =head1 SYNOPSIS
26
27 $sa = $db->get_SliceAdaptor;
28
29 $slice =
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
31
32 # get some attributes of the slice
33 my $seqname = $slice->seq_region_name();
34 my $start = $slice->start();
35 my $end = $slice->end();
36
37 # get the sequence from the slice
38 my $seq = $slice->seq();
39
40 # get some features from the slice
41 foreach $gene ( @{ $slice->get_all_Genes } ) {
42 # do something with a gene
43 }
44
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
46 # do something with dna-dna alignments
47 }
48
49 =head1 DESCRIPTION
50
51 A Slice object represents a region of a genome. It can be used to retrieve
52 sequence or features from an area of interest.
53
54 =head1 METHODS
55
56 =cut
57
58 package Bio::EnsEMBL::Slice;
59 use vars qw(@ISA);
60 use strict;
61
62 use Bio::PrimarySeqI;
63
64
65 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
66 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
67 use Bio::EnsEMBL::RepeatMaskedSlice;
68 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
69 use Bio::EnsEMBL::ProjectionSegment;
70 use Bio::EnsEMBL::Registry;
71 use Bio::EnsEMBL::Utils::Iterator;
72 use Bio::EnsEMBL::DBSQL::MergedAdaptor;
73
74 use Bio::EnsEMBL::StrainSlice;
75 #use Bio::EnsEMBL::IndividualSlice;
76 #use Bio::EnsEMBL::IndividualSliceFactory;
77 use Bio::EnsEMBL::Mapper::RangeRegistry;
78 use Bio::EnsEMBL::SeqRegionSynonym;
79 use Scalar::Util qw(weaken isweak);
80
81 # use Data::Dumper;
82
83 my $reg = "Bio::EnsEMBL::Registry";
84
85 @ISA = qw(Bio::PrimarySeqI);
86
87
88 =head2 new
89
90 Arg [...] : List of named arguments
91 Bio::EnsEMBL::CoordSystem COORD_SYSTEM
92 string SEQ_REGION_NAME,
93 int START,
94 int END,
95 int SEQ_REGION_LENGTH, (optional)
96 string SEQ (optional)
97 int STRAND, (optional, defaults to 1)
98 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
99 Example : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs,
100 -start => 1,
101 -end => 10000,
102 -strand => 1,
103 -seq_region_name => 'X',
104 -seq_region_length => 12e6,
105 -adaptor => $slice_adaptor);
106 Description: Creates a new slice object. A slice represents a region
107 of sequence in a particular coordinate system. Slices can be
108 used to retrieve sequence and features from an area of
109 interest in a genome.
110
111 Coordinates start at 1 and are inclusive. Negative
112 coordinates or coordinates exceeding the length of the
113 seq_region are permitted. Start must be less than or equal.
114 to end regardless of the strand.
115
116 Slice objects are immutable. Once instantiated their attributes
117 (with the exception of the adaptor) may not be altered. To
118 change the attributes a new slice must be created.
119 Returntype : Bio::EnsEMBL::Slice
120 Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
121 Caller : general, Bio::EnsEMBL::SliceAdaptor
122 Status : Stable
123
124 =cut
125
126 sub new {
127 my $caller = shift;
128
129 #new can be called as a class or object method
130 my $class = ref($caller) || $caller;
131
132 my ($seq, $coord_system, $seq_region_name, $seq_region_length,
133 $start, $end, $strand, $adaptor, $empty) =
134 rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
135 START END STRAND ADAPTOR EMPTY)], @_);
136
137 #empty is only for backwards compatibility
138 if ($empty) {
139 deprecate( "Creation of empty slices is no longer needed"
140 . "and is deprecated" );
141 my $self = bless( { 'empty' => 1 }, $class );
142 $self->adaptor($adaptor);
143 return $self;
144 }
145
146 if ( !defined($seq_region_name) ) {
147 throw('SEQ_REGION_NAME argument is required');
148 }
149 if ( !defined($start) ) { throw('START argument is required') }
150 if ( !defined($end) ) { throw('END argument is required') }
151
152 ## if ( $start > $end + 1 ) {
153 ## throw('start must be less than or equal to end+1');
154 ## }
155
156 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
157
158 if ( $seq_region_length <= 0 ) {
159 throw('SEQ_REGION_LENGTH must be > 0');
160 }
161
162 if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
163 throw('SEQ must be the same length as the defined LENGTH not '
164 . CORE::length($seq)
165 . ' compared to '
166 . ( $end - $start + 1 ) );
167 }
168
169 if(defined($coord_system)) {
170 if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){
171 throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
172 }
173 if($coord_system->is_top_level()) {
174 throw('Cannot create slice on toplevel CoordSystem.');
175 }
176 } else {
177 warning("Slice without coordinate system");
178 #warn(stack_trace_dump());
179 }
180
181 $strand ||= 1;
182
183 if($strand != 1 && $strand != -1) {
184 throw('STRAND argument must be -1 or 1');
185 }
186
187 if(defined($adaptor)) {
188 if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
189 throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
190 }
191 }
192
193 my $self = bless {'coord_system' => $coord_system,
194 'seq' => $seq,
195 'seq_region_name' => $seq_region_name,
196 'seq_region_length' => $seq_region_length,
197 'start' => int($start),
198 'end' => int($end),
199 'strand' => $strand}, $class;
200
201 $self->adaptor($adaptor);
202
203 return $self;
204
205 }
206
207 =head2 new_fast
208
209 Arg [1] : hashref to be blessed
210 Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
211 Exceptions : none
212 Returntype : Bio::EnsEMBL::Slice
213 Caller : general
214 Status : Stable
215
216 =cut
217
218
219 sub new_fast {
220 my $class = shift;
221 my $hashref = shift;
222 my $self = bless $hashref, $class;
223 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
224 return $self;
225 }
226
227 =head2 adaptor
228
229 Arg [1] : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor
230 Example : $adaptor = $slice->adaptor();
231 Description: Getter/Setter for the slice object adaptor used
232 by this slice for database interaction.
233 Returntype : Bio::EnsEMBL::DBSQL::SliceAdaptor
234 Exceptions : thorws if argument passed is not a SliceAdaptor
235 Caller : general
236 Status : Stable
237
238 =cut
239
240 sub adaptor{
241 my $self = shift;
242
243 if(@_) {
244 my $ad = shift;
245 if(defined($ad)) {
246 if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
247 throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
248 }
249 }
250 weaken($self->{'adaptor'} = $ad);
251 }
252
253 return $self->{'adaptor'};
254 }
255
256
257
258 =head2 seq_region_name
259
260 Arg [1] : none
261 Example : $seq_region = $slice->seq_region_name();
262 Description: Returns the name of the seq_region that this slice is on. For
263 example if this slice is in chromosomal coordinates the
264 seq_region_name might be 'X' or '10'.
265
266 This function was formerly named chr_name, but since slices can
267 now be on coordinate systems other than chromosomal it has been
268 changed.
269 Returntype : string
270 Exceptions : none
271 Caller : general
272 Status : Stable
273
274 =cut
275
276 sub seq_region_name {
277 my $self = shift;
278 return $self->{'seq_region_name'};
279 }
280
281
282
283 =head2 seq_region_length
284
285 Arg [1] : none
286 Example : $seq_region_length = $slice->seq_region_length();
287 Description: Returns the length of the entire seq_region that this slice is
288 on. For example if this slice is on a chromosome this will be
289 the length (in basepairs) of the entire chromosome.
290 Returntype : int
291 Exceptions : none
292 Caller : general
293 Status : Stable
294
295 =cut
296
297 sub seq_region_length {
298 my $self = shift;
299 return $self->{'seq_region_length'};
300 }
301
302
303 =head2 coord_system
304
305 Arg [1] : none
306 Example : print $slice->coord_system->name();
307 Description: Returns the coordinate system that this slice is on.
308 Returntype : Bio::EnsEMBL::CoordSystem
309 Exceptions : none
310 Caller : general
311 Status : Stable
312
313 =cut
314
315 sub coord_system {
316 my $self = shift;
317 return $self->{'coord_system'};
318 }
319
320 =head2 coord_system_name
321
322 Arg [1] : none
323 Example : print $slice->coord_system_name()
324 Description: Convenience method. Gets the name of the coord_system which
325 this slice is on.
326 Returns undef if this Slice does not have an attached
327 CoordSystem.
328 Returntype: string or undef
329 Exceptions: none
330 Caller : general
331 Status : Stable
332
333 =cut
334
335 sub coord_system_name {
336 my $self = shift;
337 my $csystem = $self->{'coord_system'};
338 return ($csystem) ? $csystem->name() : undef;
339 }
340
341
342 =head2 centrepoint
343
344 Arg [1] : none
345 Example : $cp = $slice->centrepoint();
346 Description: Returns the mid position of this slice relative to the
347 start of the sequence region that it was created on.
348 Coordinates are inclusive and start at 1.
349 Returntype : int
350 Exceptions : none
351 Caller : general
352 Status : Stable
353
354 =cut
355
356 sub centrepoint {
357 my $self = shift;
358 return ($self->{'start'}+$self->{'end'})/2;
359 }
360
361 =head2 start
362
363 Arg [1] : none
364 Example : $start = $slice->start();
365 Description: Returns the start position of this slice relative to the
366 start of the sequence region that it was created on.
367 Coordinates are inclusive and start at 1. Negative coordinates
368 or coordinates exceeding the length of the sequence region are
369 permitted. Start is always less than or equal to end
370 regardless of the orientation of the slice.
371 Returntype : int
372 Exceptions : none
373 Caller : general
374 Status : Stable
375
376 =cut
377
378 sub start {
379 my $self = shift;
380 return $self->{'start'};
381 }
382
383
384
385 =head2 end
386
387 Arg [1] : none
388 Example : $end = $slice->end();
389 Description: Returns the end position of this slice relative to the
390 start of the sequence region that it was created on.
391 Coordinates are inclusive and start at 1. Negative coordinates
392 or coordinates exceeding the length of the sequence region are
393 permitted. End is always greater than or equal to start
394 regardless of the orientation of the slice.
395 Returntype : int
396 Exceptions : none
397 Caller : general
398 Status : Stable
399
400 =cut
401
402 sub end {
403 my $self = shift;
404 return $self->{'end'};
405 }
406
407
408
409 =head2 strand
410
411 Arg [1] : none
412 Example : $strand = $slice->strand();
413 Description: Returns the orientation of this slice on the seq_region it has
414 been created on
415 Returntype : int (either 1 or -1)
416 Exceptions : none
417 Caller : general, invert
418 Status : Stable
419
420 =cut
421
422 sub strand{
423 my $self = shift;
424 return $self->{'strand'};
425 }
426
427
428
429
430
431 =head2 name
432
433 Arg [1] : none
434 Example : my $results = $cache{$slice->name()};
435 Description: Returns the name of this slice. The name is formatted as a colon
436 delimited string with the following attributes:
437 coord_system:version:seq_region_name:start:end:strand
438
439 Slices with the same name are equivalent and thus the name can
440 act as a hash key.
441 Returntype : string
442 Exceptions : none
443 Caller : general
444 Status : Stable
445
446 =cut
447
448 sub name {
449 my $self = shift;
450
451 my $cs = $self->{'coord_system'};
452
453 return join(':',
454 ($cs) ? $cs->name() : '',
455 ($cs) ? $cs->version() : '',
456 $self->{'seq_region_name'},
457 $self->{'start'},
458 $self->{'end'},
459 $self->{'strand'});
460 }
461
462
463
464 =head2 length
465
466 Arg [1] : none
467 Example : $length = $slice->length();
468 Description: Returns the length of this slice in basepairs
469 Returntype : int
470 Exceptions : none
471 Caller : general
472 Status : Stable
473
474 =cut
475
476 sub length {
477 my ($self) = @_;
478
479 my $length = $self->{'end'} - $self->{'start'} + 1;
480
481 if ( $self->{'start'} > $self->{'end'} && $self->is_circular() ) {
482 $length += $self->{'seq_region_length'};
483 }
484
485 return $length;
486 }
487
488 =head2 is_reference
489 Arg : none
490 Example : my $reference = $slice->is_reference()
491 Description: Returns 1 if slice is a reference slice else 0
492 Returntype : int
493 Caller : general
494 Status : At Risk
495
496 =cut
497
498 sub is_reference {
499 my ($self) = @_;
500
501 if ( !defined( $self->{'is_reference'} ) ) {
502 $self->{'is_reference'} =
503 $self->adaptor()->is_reference( $self->get_seq_region_id() );
504 }
505
506 return $self->{'is_reference'};
507 }
508
509 =head2 is_toplevel
510 Arg : none
511 Example : my $top = $slice->is_toplevel()
512 Description: Returns 1 if slice is a toplevel slice else 0
513 Returntype : int
514 Caller : general
515 Status : At Risk
516
517 =cut
518
519 sub is_toplevel {
520 my ($self) = @_;
521
522 if ( !defined( $self->{'toplevel'} ) ) {
523 $self->{'toplevel'} =
524 $self->adaptor()->is_toplevel( $self->get_seq_region_id() );
525 }
526
527 return $self->{'toplevel'};
528 }
529
530 =head2 is_circular
531 Arg : none
532 Example : my $circ = $slice->is_circular()
533 Description: Returns 1 if slice is a circular slice else 0
534 Returntype : int
535 Caller : general
536 Status : Stable
537
538 =cut
539
540 sub is_circular {
541 my ($self) = @_;
542 my $adaptor = $self->adaptor();
543 return 0 if ! defined $adaptor;
544 if (! exists $self->{'circular'}) {
545 my $id = $adaptor->get_seq_region_id($self);
546 $self->{circular} = $adaptor->is_circular($id);
547 }
548 return $self->{circular};
549 }
550
551 =head2 invert
552
553 Arg [1] : none
554 Example : $inverted_slice = $slice->invert;
555 Description: Creates a copy of this slice on the opposite strand and
556 returns it.
557 Returntype : Bio::EnsEMBL::Slice
558 Exceptions : none
559 Caller : general
560 Status : Stable
561
562 =cut
563
564 sub invert {
565 my $self = shift;
566
567 # make a shallow copy of the slice via a hash copy and flip the strand
568 my %s = %$self;
569 $s{'strand'} = $self->{'strand'} * -1;
570
571 # reverse compliment any attached sequence
572 reverse_comp(\$s{'seq'}) if($s{'seq'});
573
574 # bless and return the copy
575 return bless \%s, ref $self;
576 }
577
578
579
580 =head2 seq
581
582 Arg [1] : none
583 Example : print "SEQUENCE = ", $slice->seq();
584 Description: Returns the sequence of the region represented by this
585 slice formatted as a string.
586 Returntype : string
587 Exceptions : none
588 Caller : general
589 Status : Stable
590
591 =cut
592
593 sub seq {
594 my $self = shift;
595
596 # special case for in-between (insert) coordinates
597 return '' if($self->start() == $self->end() + 1);
598
599 return $self->{'seq'} if($self->{'seq'});
600
601 if($self->adaptor()) {
602 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
603 return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)};
604 }
605
606 # no attached sequence, and no db, so just return Ns
607 return 'N' x $self->length();
608 }
609
610
611
612 =head2 subseq
613
614 Arg [1] : int $startBasePair
615 relative to start of slice, which is 1.
616 Arg [2] : int $endBasePair
617 relative to start of slice.
618 Arg [3] : (optional) int $strand
619 The strand of the slice to obtain sequence from. Default
620 value is 1.
621 Description: returns string of dna sequence
622 Returntype : txt
623 Exceptions : end should be at least as big as start
624 strand must be set
625 Caller : general
626 Status : Stable
627
628 =cut
629
630 sub subseq {
631 my ( $self, $start, $end, $strand ) = @_;
632
633 if ( $end+1 < $start ) {
634 throw("End coord + 1 is less than start coord");
635 }
636
637 # handle 'between' case for insertions
638 return '' if( $start == $end + 1);
639
640 $strand = 1 unless(defined $strand);
641
642 if ( $strand != -1 && $strand != 1 ) {
643 throw("Invalid strand [$strand] in call to Slice::subseq.");
644 }
645 my $subseq;
646 if($self->adaptor){
647 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
648 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
649 ( $self, $start,
650 $end, $strand )};
651 } else {
652 ## check for gap at the beginning and pad it with Ns
653 if ($start < 1) {
654 $subseq = "N" x (1 - $start);
655 $start = 1;
656 }
657 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
658 ## check for gap at the end and pad it with Ns
659 if ($end > $self->length()) {
660 $subseq .= "N" x ($end - $self->length());
661 }
662 reverse_comp(\$subseq) if($strand == -1);
663 }
664 return $subseq;
665 }
666
667 =head2 sub_Slice_Iterator
668
669 Arg[1] : int The chunk size to request
670 Example : my $i = $slice->sub_Slice_Iterator(60000);
671 while($i->has_next()) { warn $i->next()->name(); }
672 Description : Returns an iterator which batches subslices of this Slice
673 in the requested chunk size
674 Returntype : Bio::EnsEMBL::Utils::Iterator next() will return the next
675 chunk of Slice
676 Exceptions : None
677
678 =cut
679
680 sub sub_Slice_Iterator {
681 my ($self, $chunk_size) = @_;
682 throw "Need a chunk size to divide the slice by" if ! $chunk_size;
683 my $here = 1;
684 my $end = $self->length();
685 my $iterator_sub = sub {
686 while($here <= $end) {
687 my $there = $here + $chunk_size - 1;
688 $there = $end if($there > $end);
689 my $slice = $self->sub_Slice($here, $there);
690 $here = $there + 1;
691 return $slice;
692 }
693 return;
694 };
695 return Bio::EnsEMBL::Utils::Iterator->new($iterator_sub);
696 }
697
698 =head2 assembly_exception_type
699
700 Example : $self->assembly_exception_type();
701 Description : Returns the type of slice this is. If it is reference then you
702 will get 'REF' back. Otherwise you will get the first
703 element from C<get_all_AssemblyExceptionFeatures()>. If no
704 assembly exception exists you will get an empty string back.
705 Returntype : String
706 Exceptions : None
707 Caller : Public
708 Status : Beta
709
710 =cut
711
712 sub assembly_exception_type {
713 my ($self) = @_;
714 my $type = q{};
715 if($self->is_reference()) {
716 $type = 'REF';
717 }
718 else {
719 my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures();
720 if(@{$assembly_exceptions}) {
721 $type = $assembly_exceptions->[0]->type();
722 }
723 }
724 return $type;
725 }
726
727 =head2 is_chromosome
728
729 Example : print ($slice->is_chromosome()) ? 'I am a chromosome' : 'Not one';
730 Description : Uses a number of rules known to indicate a chromosome region
731 other and takes into account those regions which can be
732 placed on a Chromsome coordinate system but in fact are not
733 assembled into one.
734 Returntype : Boolean indicates if the current object is a chromosome
735 Exceptions : None
736
737 =cut
738
739 sub is_chromosome {
740 my ($self) = @_;
741 my $coord_system = $self->coord_system->name;
742 my $seq_name = $self->seq_region_name;
743
744 if (($seq_name =~ /random
745 |^Un\d{4}$
746 |^Un\.\d{3}\.\d*$
747 |E\d\d\w*$
748 |_NT_
749 |scaffold_
750 |cutchr
751 |unplaced
752 |chunk
753 |clone
754 |contig
755 |genescaffold
756 |group
757 |reftig
758 |supercontig
759 |ultracontig
760 /x) or ( $coord_system !~ /^chromosome$/i )) {
761 return 0;
762 }
763
764 return 1;
765 }
766
767
768 =head2 get_base_count
769
770 Arg [1] : none
771 Example : $c_count = $slice->get_base_count->{'c'};
772 Description: Retrieves a hashref containing the counts of each bases in the
773 sequence spanned by this slice. The format of the hash is :
774 { 'a' => num,
775 'c' => num,
776 't' => num,
777 'g' => num,
778 'n' => num,
779 '%gc' => num }
780
781 All bases which are not in the set [A,a,C,c,T,t,G,g] are
782 included in the 'n' count. The 'n' count could therefore be
783 inclusive of ambiguity codes such as 'y'.
784 The %gc is the ratio of GC to AT content as in:
785 total(GC)/total(ACTG) * 100
786 This function is conservative in its memory usage and scales to
787 work for entire chromosomes.
788 Returntype : hashref
789 Exceptions : none
790 Caller : general
791 Status : Stable
792
793 =cut
794
795 sub get_base_count {
796 my $self = shift;
797
798 my $a = 0;
799 my $c = 0;
800 my $t = 0;
801 my $g = 0;
802
803 my $start = 1;
804 my $end;
805
806 my $RANGE = 100_000;
807 my $len = $self->length();
808
809 my $seq;
810
811 while ( $start <= $len ) {
812 $end = $start + $RANGE - 1;
813 $end = $len if ( $end > $len );
814
815 $seq = $self->subseq( $start, $end );
816
817 $a += $seq =~ tr/Aa//;
818 $c += $seq =~ tr/Cc//;
819 $t += $seq =~ tr/Tt//;
820 $g += $seq =~ tr/Gg//;
821
822 $start = $end + 1;
823 }
824
825 my $actg = $a + $c + $t + $g;
826
827 my $gc_content = 0;
828 if ( $actg > 0 ) { # Avoid dividing by 0
829 $gc_content = sprintf( "%1.2f", ( ( $g + $c )/$actg )*100 );
830 }
831
832 return { 'a' => $a,
833 'c' => $c,
834 't' => $t,
835 'g' => $g,
836 'n' => $len - $actg,
837 '%gc' => $gc_content };
838 }
839
840
841
842 =head2 project
843
844 Arg [1] : string $name
845 The name of the coordinate system to project this slice onto
846 Arg [2] : string $version
847 The version of the coordinate system (such as 'NCBI34') to
848 project this slice onto
849 Example :
850 my $clone_projection = $slice->project('clone');
851
852 foreach my $seg (@$clone_projection) {
853 my $clone = $segment->to_Slice();
854 print $slice->seq_region_name(), ':', $seg->from_start(), '-',
855 $seg->from_end(), ' -> ',
856 $clone->seq_region_name(), ':', $clone->start(), '-',$clone->end(),
857 $clone->strand(), "\n";
858 }
859 Description: Returns the results of 'projecting' this slice onto another
860 coordinate system. Projecting to a coordinate system that
861 the slice is assembled from is analagous to retrieving a tiling
862 path. This method may also be used to 'project up' to a higher
863 level coordinate system, however.
864
865 This method returns a listref of triplets [start,end,slice]
866 which represents the projection. The start and end defined the
867 region of this slice which is made up of the third value of
868 the triplet: a slice in the requested coordinate system.
869 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
870 can also be used as [$start,$end,$slice] triplets
871 Exceptions : none
872 Caller : general
873 Status : Stable
874
875 =cut
876
877 sub project {
878 my $self = shift;
879 my $cs_name = shift;
880 my $cs_version = shift;
881
882 throw('Coord_system name argument is required') if(!$cs_name);
883
884 my $slice_adaptor = $self->adaptor();
885
886 if(!$slice_adaptor) {
887 warning("Cannot project without attached adaptor.");
888 return [];
889 }
890
891 if(!$self->coord_system()) {
892 warning("Cannot project without attached coord system.");
893 return [];
894 }
895
896
897 my $db = $slice_adaptor->db();
898 my $csa = $db->get_CoordSystemAdaptor();
899 my $cs = $csa->fetch_by_name($cs_name, $cs_version);
900 my $slice_cs = $self->coord_system();
901
902 if(!$cs) {
903 throw("Cannot project to unknown coordinate system " .
904 "[$cs_name $cs_version]");
905 }
906
907 # no mapping is needed if the requested coord system is the one we are in
908 # but we do need to check if some of the slice is outside of defined regions
909 if($slice_cs->equals($cs)) {
910 return $self->_constrain_to_region();
911 }
912
913 my @projection;
914 my $current_start = 1;
915
916 # decompose this slice into its symlinked components.
917 # this allows us to handle haplotypes and PARs
918 my $normal_slice_proj =
919 $slice_adaptor->fetch_normalized_slice_projection($self);
920 foreach my $segment (@$normal_slice_proj) {
921 my $normal_slice = $segment->[2];
922
923 $slice_cs = $normal_slice->coord_system();
924
925 my $asma = $db->get_AssemblyMapperAdaptor();
926 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
927
928 # perform the mapping between this slice and the requested system
929 my @coords;
930
931 if( defined $asm_mapper ) {
932 @coords = $asm_mapper->map($normal_slice->seq_region_name(),
933 $normal_slice->start(),
934 $normal_slice->end(),
935 $normal_slice->strand(),
936 $slice_cs);
937 } else {
938 $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
939 $normal_slice->end());
940 }
941
942
943 # my $last_rank = 0;
944 #construct a projection from the mapping results and return it
945 foreach my $coord (@coords) {
946 my $coord_start = $coord->start();
947 my $coord_end = $coord->end();
948 my $length = $coord_end - $coord_start + 1;
949
950 if ( $coord_start > $coord_end ) {
951 $length =
952 $normal_slice->seq_region_length() -
953 $coord_start +
954 $coord_end + 1;
955 }
956
957 # if( $last_rank != $coord->rank){
958 # $current_start = 1;
959 # print "LAST rank has changed to ".$coord->rank."from $last_rank \n";
960 # }
961 # $last_rank = $coord->rank;
962
963 #skip gaps
964 if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
965
966 my $coord_cs = $coord->coord_system();
967
968 # If the normalised projection just ended up mapping to the
969 # same coordinate system we were already in then we should just
970 # return the original region. This can happen for example, if we
971 # were on a PAR region on Y which refered to X and a projection to
972 # 'toplevel' was requested.
973 if($coord_cs->equals($slice_cs)) {
974 # trim off regions which are not defined
975 return $self->_constrain_to_region();
976 }
977 #create slices for the mapped-to coord system
978 my $slice = $slice_adaptor->fetch_by_seq_region_id(
979 $coord->id(),
980 $coord_start,
981 $coord_end,
982 $coord->strand());
983
984 my $current_end = $current_start + $length - 1;
985
986 if ($current_end > $slice->seq_region_length() && $slice->is_circular ) {
987 $current_end -= $slice->seq_region_length();
988 }
989
990 push @projection, bless([$current_start, $current_end, $slice],
991 "Bio::EnsEMBL::ProjectionSegment");
992 }
993
994 $current_start += $length;
995 }
996 }
997
998 return \@projection;
999 }
1000
1001
1002 sub _constrain_to_region {
1003 my $self = shift;
1004
1005 my $entire_len = $self->seq_region_length();
1006
1007 #if the slice has negative coordinates or coordinates exceeding the
1008 #exceeding length of the sequence region we want to shrink the slice to
1009 #the defined region
1010
1011 if($self->{'start'} > $entire_len || $self->{'end'} < 1) {
1012 #none of this slice is in a defined region
1013 return [];
1014 }
1015
1016 my $right_contract = 0;
1017 my $left_contract = 0;
1018 if($self->{'end'} > $entire_len) {
1019 $right_contract = $entire_len - $self->{'end'};
1020 }
1021 if($self->{'start'} < 1) {
1022 $left_contract = $self->{'start'} - 1;
1023 }
1024
1025 my $new_slice;
1026 if($left_contract || $right_contract) {
1027 #if slice in negative strand, need to swap contracts
1028 if ($self->strand == 1) {
1029 $new_slice = $self->expand($left_contract, $right_contract);
1030 }
1031 elsif ($self->strand == -1) {
1032 $new_slice = $self->expand($right_contract, $left_contract);
1033 }
1034 } else {
1035 $new_slice = $self;
1036 }
1037
1038 return [bless [1-$left_contract, $self->length()+$right_contract,
1039 $new_slice], "Bio::EnsEMBL::ProjectionSegment" ];
1040 }
1041
1042
1043 =head2 expand
1044
1045 Arg [1] : (optional) int $five_prime_expand
1046 The number of basepairs to shift this slices five_prime
1047 coordinate by. Positive values make the slice larger,
1048 negative make the slice smaller.
1049 coordinate left.
1050 Default = 0.
1051 Arg [2] : (optional) int $three_prime_expand
1052 The number of basepairs to shift this slices three_prime
1053 coordinate by. Positive values make the slice larger,
1054 negative make the slice smaller.
1055 Default = 0.
1056 Arg [3] : (optional) bool $force_expand
1057 if set to 1, then the slice will be contracted even in the case
1058 when shifts $five_prime_expand and $three_prime_expand overlap.
1059 In that case $five_prime_expand and $three_prime_expand will be set
1060 to a maximum possible number and that will result in the slice
1061 which would have only 2pbs.
1062 Default = 0.
1063 Arg [4] : (optional) int* $fpref
1064 The reference to a number of basepairs to shift this slices five_prime
1065 coordinate by. Normally it would be set to $five_prime_expand.
1066 But in case when $five_prime_expand shift can not be applied and
1067 $force_expand is set to 1, then $$fpref will contain the maximum possible
1068 shift
1069 Arg [5] : (optional) int* $tpref
1070 The reference to a number of basepairs to shift this slices three_prime
1071 coordinate by. Normally it would be set to $three_prime_expand.
1072 But in case when $five_prime_expand shift can not be applied and
1073 $force_expand is set to 1, then $$tpref will contain the maximum possible
1074 shift
1075 Example : my $expanded_slice = $slice->expand( 1000, 1000);
1076 my $contracted_slice = $slice->expand(-1000,-1000);
1077 my $shifted_right_slice = $slice->expand(-1000, 1000);
1078 my $shifted_left_slice = $slice->expand( 1000,-1000);
1079 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
1080
1081 Description: Returns a slice which is a resized copy of this slice. The
1082 start and end are moved outwards from the center of the slice
1083 if positive values are provided and moved inwards if negative
1084 values are provided. This slice remains unchanged. A slice
1085 may not be contracted below 1bp but may grow to be arbitrarily
1086 large.
1087 Returntype : Bio::EnsEMBL::Slice
1088 Exceptions : warning if an attempt is made to contract the slice below 1bp
1089 Caller : general
1090 Status : Stable
1091
1092 =cut
1093
1094 sub expand {
1095 my $self = shift;
1096 my $five_prime_shift = shift || 0;
1097 my $three_prime_shift = shift || 0;
1098 my $force_expand = shift || 0;
1099 my $fpref = shift;
1100 my $tpref = shift;
1101
1102 if ( $self->{'seq'} ) {
1103 warning(
1104 "Cannot expand a slice which has a manually attached sequence ");
1105 return undef;
1106 }
1107
1108 my $sshift = $five_prime_shift;
1109 my $eshift = $three_prime_shift;
1110
1111 if ( $self->{'strand'} != 1 ) {
1112 $eshift = $five_prime_shift;
1113 $sshift = $three_prime_shift;
1114 }
1115
1116 my $new_start = $self->{'start'} - $sshift;
1117 my $new_end = $self->{'end'} + $eshift;
1118
1119 if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0 || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) {
1120
1121 if ( $new_start <= 0 ) {
1122 $new_start = $self->seq_region_length() + $new_start;
1123 }
1124 if ( $new_start > $self->seq_region_length() ) {
1125 $new_start -= $self->seq_region_length();
1126 }
1127
1128 if ( $new_end <= 0 ) {
1129 $new_end = $self->seq_region_length() + $new_end;
1130 }
1131 if ( $new_end > $self->seq_region_length() ) {
1132 $new_end -= $self->seq_region_length();
1133 }
1134
1135 }
1136
1137 if ( $new_start > $new_end && (not $self->is_circular() ) ) {
1138
1139 if ($force_expand) {
1140 # Apply max possible shift, if force_expand is set
1141 if ( $sshift < 0 ) {
1142 # if we are contracting the slice from the start - move the
1143 # start just before the end
1144 $new_start = $new_end - 1;
1145 $sshift = $self->{start} - $new_start;
1146 }
1147
1148 if ( $new_start > $new_end ) {
1149 # if the slice still has a negative length - try to move the
1150 # end
1151 if ( $eshift < 0 ) {
1152 $new_end = $new_start + 1;
1153 $eshift = $new_end - $self->{end};
1154 }
1155 }
1156 # return the values by which the primes were actually shifted
1157 $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
1158 $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
1159 }
1160 if ( $new_start > $new_end ) {
1161 throw('Slice start cannot be greater than slice end');
1162 }
1163 }
1164
1165 #fastest way to copy a slice is to do a shallow hash copy
1166 my %new_slice = %$self;
1167 $new_slice{'start'} = int($new_start);
1168 $new_slice{'end'} = int($new_end);
1169
1170 return bless \%new_slice, ref($self);
1171 } ## end sub expand
1172
1173
1174
1175 =head2 sub_Slice
1176
1177 Arg 1 : int $start
1178 Arg 2 : int $end
1179 Arge [3] : int $strand
1180 Example : none
1181 Description: Makes another Slice that covers only part of this slice
1182 If a slice is requested which lies outside of the boundaries
1183 of this function will return undef. This means that
1184 behaviour will be consistant whether or not the slice is
1185 attached to the database (i.e. if there is attached sequence
1186 to the slice). Alternatively the expand() method or the
1187 SliceAdaptor::fetch_by_region method can be used instead.
1188 Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
1189 Exceptions : none
1190 Caller : general
1191 Status : Stable
1192
1193 =cut
1194
1195 sub sub_Slice {
1196 my ( $self, $start, $end, $strand ) = @_;
1197
1198 if( $start < 1 || $start > $self->{'end'} ) {
1199 # throw( "start argument not valid" );
1200 return undef;
1201 }
1202
1203 if( $end < $start || $end > $self->{'end'} ) {
1204 # throw( "end argument not valid" )
1205 return undef;
1206 }
1207
1208 my ( $new_start, $new_end, $new_strand, $new_seq );
1209 if( ! defined $strand ) {
1210 $strand = 1;
1211 }
1212
1213 if( $self->{'strand'} == 1 ) {
1214 $new_start = $self->{'start'} + $start - 1;
1215 $new_end = $self->{'start'} + $end - 1;
1216 $new_strand = $strand;
1217 } else {
1218 $new_start = $self->{'end'} - $end + 1;;
1219 $new_end = $self->{'end'} - $start + 1;
1220 $new_strand = -$strand;
1221 }
1222
1223 if( defined $self->{'seq'} ) {
1224 $new_seq = $self->subseq( $start, $end, $strand );
1225 }
1226
1227 #fastest way to copy a slice is to do a shallow hash copy
1228 my $new_slice = {%{$self}};
1229 $new_slice->{'start'} = int($new_start);
1230 $new_slice->{'end'} = int($new_end);
1231 $new_slice->{'strand'} = $new_strand;
1232 if( $new_seq ) {
1233 $new_slice->{'seq'} = $new_seq;
1234 }
1235 weaken($new_slice->{adaptor});
1236
1237 return bless $new_slice, ref($self);
1238 }
1239
1240
1241
1242 =head2 seq_region_Slice
1243
1244 Arg [1] : none
1245 Example : $slice = $slice->seq_region_Slice();
1246 Description: Returns a slice which spans the whole seq_region which this slice
1247 is on. For example if this is a slice which spans a small region
1248 of chromosome X, this method will return a slice which covers the
1249 entire chromosome X. The returned slice will always have strand
1250 of 1 and start of 1. This method cannot be used if the sequence
1251 of the slice has been set manually.
1252 Returntype : Bio::EnsEMBL::Slice
1253 Exceptions : warning if called when sequence of Slice has been set manually.
1254 Caller : general
1255 Status : Stable
1256
1257 =cut
1258
1259 sub seq_region_Slice {
1260 my $self = shift;
1261
1262 if($self->{'seq'}){
1263 warning("Cannot get a seq_region_Slice of a slice which has manually ".
1264 "attached sequence ");
1265 return undef;
1266 }
1267
1268 # quick shallow copy
1269 my $slice;
1270 %{$slice} = %{$self};
1271 bless $slice, ref($self);
1272 weaken($slice->{adaptor});
1273
1274 $slice->{'start'} = 1;
1275 $slice->{'end'} = $slice->{'seq_region_length'};
1276 $slice->{'strand'} = 1;
1277
1278 return $slice;
1279 }
1280
1281
1282 =head2 get_seq_region_id
1283
1284 Arg [1] : none
1285 Example : my $seq_region_id = $slice->get_seq_region_id();
1286 Description: Gets the internal identifier of the seq_region that this slice
1287 is on. Note that this function will not work correctly if this
1288 slice does not have an attached adaptor. Also note that it may
1289 be better to go through the SliceAdaptor::get_seq_region_id
1290 method if you are working with multiple databases since is
1291 possible to work with slices from databases with different
1292 internal seq_region identifiers.
1293 Returntype : int or undef if slices does not have attached adaptor
1294 Exceptions : warning if slice is not associated with a SliceAdaptor
1295 Caller : assembly loading scripts, general
1296 Status : Stable
1297
1298 =cut
1299
1300 sub get_seq_region_id {
1301 my ($self) = @_;
1302
1303 if($self->adaptor) {
1304 return $self->adaptor->get_seq_region_id($self);
1305 } else {
1306 warning('Cannot retrieve seq_region_id without attached adaptor.');
1307 return undef;
1308 }
1309 }
1310
1311
1312 =head2 get_all_Attributes
1313
1314 Arg [1] : optional string $attrib_code
1315 The code of the attribute type to retrieve values for.
1316 Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
1317 @slice_attributes = @{$slice->get_all_Attributes()};
1318 Description: Gets a list of Attributes of this slice''s seq_region.
1319 Optionally just get Attrubutes for given code.
1320 Returntype : listref Bio::EnsEMBL::Attribute
1321 Exceptions : warning if slice does not have attached adaptor
1322 Caller : general
1323 Status : Stable
1324
1325 =cut
1326
1327 sub get_all_Attributes {
1328 my $self = shift;
1329 my $attrib_code = shift;
1330
1331 my $result;
1332 my @results;
1333
1334 if(!$self->adaptor()) {
1335 warning('Cannot get attributes without an adaptor.');
1336 return [];
1337 }
1338
1339 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
1340
1341 if( defined $attrib_code ) {
1342 @results = grep { uc($_->code()) eq uc($attrib_code) }
1343 @{$attribute_adaptor->fetch_all_by_Slice( $self )};
1344 $result = \@results;
1345 } else {
1346 $result = $attribute_adaptor->fetch_all_by_Slice( $self );
1347 }
1348
1349 return $result;
1350 }
1351
1352
1353 =head2 get_all_PredictionTranscripts
1354
1355 Arg [1] : (optional) string $logic_name
1356 The name of the analysis used to generate the prediction
1357 transcripts obtained.
1358 Arg [2] : (optional) boolean $load_exons
1359 If set to true will force loading of all PredictionExons
1360 immediately rather than loading them on demand later. This
1361 is faster if there are a large number of PredictionTranscripts
1362 and the exons will be used.
1363 Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
1364 Description: Retrieves the list of prediction transcripts which overlap
1365 this slice with logic_name $logic_name. If logic_name is
1366 not defined then all prediction transcripts are retrieved.
1367 Returntype : listref of Bio::EnsEMBL::PredictionTranscript
1368 Exceptions : warning if slice does not have attached adaptor
1369 Caller : none
1370 Status : Stable
1371
1372 =cut
1373
1374 sub get_all_PredictionTranscripts {
1375 my ($self,$logic_name, $load_exons) = @_;
1376
1377 if(!$self->adaptor()) {
1378 warning('Cannot get PredictionTranscripts without attached adaptor');
1379 return [];
1380 }
1381 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
1382 return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons);
1383 }
1384
1385
1386
1387 =head2 get_all_DnaAlignFeatures
1388
1389 Arg [1] : (optional) string $logic_name
1390 The name of the analysis performed on the dna align features
1391 to obtain.
1392 Arg [2] : (optional) float $score
1393 The mimimum score of the features to retrieve
1394 Arg [3] : (optional) string $dbtype
1395 The name of an attached database to retrieve the features from
1396 instead, e.g. 'otherfeatures'.
1397 Arg [4] : (optional) float hcoverage
1398 The minimum hcoverage od the featurs to retrieve
1399 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
1400 Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with
1401 logic name $logic_name and with score above $score. If
1402 $logic_name is not defined features of all logic names are
1403 retrieved. If $score is not defined features of all scores are
1404 retrieved.
1405 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
1406 Exceptions : warning if slice does not have attached adaptor
1407 Caller : general
1408 Status : Stable
1409
1410 =cut
1411
1412 sub get_all_DnaAlignFeatures {
1413 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1414
1415 if(!$self->adaptor()) {
1416 warning('Cannot get DnaAlignFeatures without attached adaptor');
1417 return [];
1418 }
1419
1420 my $db;
1421
1422 if($dbtype) {
1423 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1424 if(!$db) {
1425 warning("Don't have db $dbtype returning empty list\n");
1426 return [];
1427 }
1428 } else {
1429 $db = $self->adaptor->db;
1430 }
1431
1432 my $dafa = $db->get_DnaAlignFeatureAdaptor();
1433
1434 if(defined($score) and defined ($hcoverage)){
1435 warning "cannot specify score and hcoverage. Using score only";
1436 }
1437 if(defined($score)){
1438 return $dafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1439 }
1440 return $dafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
1441 }
1442
1443
1444
1445 =head2 get_all_ProteinAlignFeatures
1446
1447 Arg [1] : (optional) string $logic_name
1448 The name of the analysis performed on the protein align features
1449 to obtain.
1450 Arg [2] : (optional) float $score
1451 The mimimum score of the features to retrieve
1452 Arg [3] : (optional) string $dbtype
1453 The name of an attached database to retrieve features from
1454 instead.
1455 Arg [4] : (optional) float hcoverage
1456 The minimum hcoverage od the featurs to retrieve
1457 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
1458 Description: Retrieves the DnaPepAlignFeatures which overlap this slice with
1459 logic name $logic_name and with score above $score. If
1460 $logic_name is not defined features of all logic names are
1461 retrieved. If $score is not defined features of all scores are
1462 retrieved.
1463 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
1464 Exceptions : warning if slice does not have attached adaptor
1465 Caller : general
1466 Status : Stable
1467
1468 =cut
1469
1470 sub get_all_ProteinAlignFeatures {
1471 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1472
1473 if(!$self->adaptor()) {
1474 warning('Cannot get ProteinAlignFeatures without attached adaptor');
1475 return [];
1476 }
1477
1478 my $db;
1479
1480 if($dbtype) {
1481 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1482 if(!$db) {
1483 warning("Don't have db $dbtype returning empty list\n");
1484 return [];
1485 }
1486 } else {
1487 $db = $self->adaptor->db;
1488 }
1489
1490 my $pafa = $db->get_ProteinAlignFeatureAdaptor();
1491
1492 if(defined($score) and defined ($hcoverage)){
1493 warning "cannot specify score and hcoverage. Using score only";
1494 }
1495 if(defined($score)){
1496 return $pafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1497 }
1498 return $pafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
1499
1500 }
1501
1502
1503
1504 =head2 get_all_SimilarityFeatures
1505
1506 Arg [1] : (optional) string $logic_name
1507 the name of the analysis performed on the features to retrieve
1508 Arg [2] : (optional) float $score
1509 the lower bound of the score of the features to be retrieved
1510 Example : @feats = @{$slice->get_all_SimilarityFeatures};
1511 Description: Retrieves all dna_align_features and protein_align_features
1512 with analysis named $logic_name and with score above $score.
1513 It is probably faster to use get_all_ProteinAlignFeatures or
1514 get_all_DnaAlignFeatures if a sepcific feature type is desired.
1515 If $logic_name is not defined features of all logic names are
1516 retrieved. If $score is not defined features of all scores are
1517 retrieved.
1518 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
1519 Exceptions : warning if slice does not have attached adaptor
1520 Caller : general
1521 Status : Stable
1522
1523 =cut
1524
1525 sub get_all_SimilarityFeatures {
1526 my ($self, $logic_name, $score) = @_;
1527
1528 my @out = ();
1529
1530 push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) };
1531 push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) };
1532
1533 return \@out;
1534 }
1535
1536
1537
1538 =head2 get_all_SimpleFeatures
1539
1540 Arg [1] : (optional) string $logic_name
1541 The name of the analysis performed on the simple features
1542 to obtain.
1543 Arg [2] : (optional) float $score
1544 The mimimum score of the features to retrieve
1545 Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
1546 Description: Retrieves the SimpleFeatures which overlap this slice with
1547 logic name $logic_name and with score above $score. If
1548 $logic_name is not defined features of all logic names are
1549 retrieved. If $score is not defined features of all scores are
1550 retrieved.
1551 Returntype : listref of Bio::EnsEMBL::SimpleFeatures
1552 Exceptions : warning if slice does not have attached adaptor
1553 Caller : general
1554 Status : Stable
1555
1556 =cut
1557
1558 sub get_all_SimpleFeatures {
1559 my ($self, $logic_name, $score, $dbtype) = @_;
1560
1561 if(!$self->adaptor()) {
1562 warning('Cannot get SimpleFeatures without attached adaptor');
1563 return [];
1564 }
1565
1566 my $db;
1567 if($dbtype) {
1568 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1569 if(!$db) {
1570 warning("Don't have db $dbtype returning empty list\n");
1571 return [];
1572 }
1573 } else {
1574 $db = $self->adaptor->db;
1575 }
1576
1577 my $sfa = $db->get_SimpleFeatureAdaptor();
1578
1579 return $sfa->fetch_all_by_Slice_and_score($self, $score, $logic_name);
1580 }
1581
1582
1583
1584 =head2 get_all_RepeatFeatures
1585
1586 Arg [1] : (optional) string $logic_name
1587 The name of the analysis performed on the repeat features
1588 to obtain.
1589 Arg [2] : (optional) string/array $repeat_type
1590 Limits features returned to those of the specified
1591 repeat_type. Can specify a single value or an array reference
1592 to limit by more than one
1593 Arg [3] : (optional) string $db
1594 Key for database e.g. core/vega/cdna/....
1595 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'Type II Transposons')};
1596 Description: Retrieves the RepeatFeatures which overlap with
1597 logic name $logic_name and with score above $score. If
1598 $logic_name is not defined features of all logic names are
1599 retrieved.
1600 Returntype : listref of Bio::EnsEMBL::RepeatFeatures
1601 Exceptions : warning if slice does not have attached adaptor
1602 Caller : general
1603 Status : Stable
1604
1605 =cut
1606
1607 sub get_all_RepeatFeatures {
1608 my ($self, $logic_name, $repeat_type, $dbtype) = @_;
1609
1610 if(!$self->adaptor()) {
1611 warning('Cannot get RepeatFeatures without attached adaptor');
1612 return [];
1613 }
1614
1615 my $db;
1616 if($dbtype) {
1617 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1618 if(!$db) {
1619 warning("Don't have db $dbtype returning empty list\n");
1620 return [];
1621 }
1622 } else {
1623 $db = $self->adaptor->db;
1624 }
1625
1626 my $rpfa = $db->get_RepeatFeatureAdaptor();
1627
1628 return $rpfa->fetch_all_by_Slice($self, $logic_name, $repeat_type);
1629 }
1630
1631 =head2 get_all_LD_values
1632
1633 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
1634 Description : returns all LD values on this slice. This function will only work correctly if the variation
1635 database has been attached to the core database. If the argument is passed, will return the LD information
1636 in that population
1637 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
1638 Exceptions : none
1639 Caller : contigview, snpview
1640 Status : Stable
1641
1642 =cut
1643
1644 sub get_all_LD_values{
1645 my $self = shift;
1646 my $population = shift;
1647
1648
1649 if(!$self->adaptor()) {
1650 warning('Cannot get LDFeatureContainer without attached adaptor');
1651 return [];
1652 }
1653
1654 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1655
1656 unless($variation_db) {
1657 warning("Variation database must be attached to core database to " .
1658 "retrieve variation information" );
1659 return [];
1660 }
1661
1662 my $ld_adaptor = $variation_db->get_LDFeatureContainerAdaptor;
1663
1664 if( $ld_adaptor ) {
1665 return $ld_adaptor->fetch_by_Slice($self,$population);
1666 } else {
1667 return [];
1668
1669 }
1670
1671 # my $ld_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(-species => $self->adaptor()->db()->species, -type => "LDFeatureContainer");
1672
1673 # if( $ld_adaptor ) {
1674 # my $ld_values = $ld_adaptor->fetch_by_Slice($self,$population);
1675 # if (@{$ld_values} > 1){
1676 # warning("More than 1 variation database attached. Trying to merge LD results");
1677 # my $ld_value_merged = shift @{$ld_values};
1678 # #with more than 1 variation database attached, will try to merge in one single LDContainer object.
1679 # foreach my $ld (@{$ld_values}){
1680 # #copy the ld values to the result hash
1681 # foreach my $key (keys %{$ld->{'ldContainer'}}){
1682 # $ld_value_merged->{'ldContainer'}->{$key} = $ld->{'ldContainer'}->{$key};
1683 # }
1684 # #and copy the variationFeatures as well
1685 # foreach my $key (keys %{$ld->{'variationFeatures'}}){
1686 # $ld_value_merged->{'variationFeatures'}->{$key} = $ld->{'variationFeatures'}->{$key};
1687 # }
1688
1689 # }
1690 # return $ld_value_merged;
1691 # }
1692 # else{
1693 # return shift @{$ld_values};
1694 # }
1695 # } else {
1696 # warning("Variation database must be attached to core database to " .
1697 # "retrieve variation information" );
1698 # return [];
1699 # }
1700 }
1701
1702 sub _get_VariationFeatureAdaptor {
1703
1704 my $self = shift;
1705
1706 if(!$self->adaptor()) {
1707 warning('Cannot get variation features without attached adaptor');
1708 return undef;
1709 }
1710
1711 my $vf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
1712 -species => $self->adaptor()->db()->species,
1713 -type => "VariationFeature"
1714 );
1715
1716 if( $vf_adaptor ) {
1717 return $vf_adaptor;
1718 }
1719 else {
1720 warning("Variation database must be attached to core database to " .
1721 "retrieve variation information" );
1722
1723 return undef;
1724 }
1725 }
1726
1727 =head2 get_all_VariationFeatures
1728 Args : $so_terms [optional] - list of so_terms to limit the fetch to
1729 Description : Returns all germline variation features on this slice. This function will
1730 only work correctly if the variation database has been attached to the core
1731 database.
1732 If $so_terms is specified, only variation features with a consequence type
1733 that matches or is an ontological child of any of the supplied terms will
1734 be returned
1735 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1736 Exceptions : none
1737 Caller : contigview, snpview
1738 Status : Stable
1739
1740 =cut
1741
1742 sub get_all_VariationFeatures{
1743 my $self = shift;
1744
1745 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
1746 return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, @_);
1747 }
1748 else {
1749 return [];
1750 }
1751 }
1752
1753 =head2 get_all_somatic_VariationFeatures
1754
1755 Args : $filter [optional]
1756 Description : Returns all somatic variation features on this slice. This function will only
1757 work correctly if the variation database has been attached to the core database.
1758 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1759 Exceptions : none
1760 Status : Stable
1761
1762 =cut
1763
1764 sub get_all_somatic_VariationFeatures {
1765 my $self = shift;
1766
1767 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
1768 return $vf_adaptor->fetch_all_somatic_by_Slice($self);
1769 }
1770 else{
1771 return [];
1772 }
1773 }
1774
1775 =head2 get_all_VariationFeatures_with_annotation
1776
1777 Arg [1] : $variation_feature_source [optional]
1778 Arg [2] : $annotation_source [optional]
1779 Arg [3] : $annotation_name [optional]
1780 Description : returns all germline variation features on this slice associated with a phenotype.
1781 This function will only work correctly if the variation database has been
1782 attached to the core database.
1783 If $variation_feature_source is set only variations from that source
1784 are retrieved.
1785 If $annotation_source is set only variations whose annotations come from
1786 $annotation_source will be retrieved.
1787 If $annotation_name is set only variations with that annotation will be retrieved.
1788 $annotation_name can be a phenotype's internal dbID.
1789 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1790 Exceptions : none
1791 Caller : contigview, snpview
1792 Status : Stable
1793
1794 =cut
1795
1796 sub get_all_VariationFeatures_with_annotation{
1797 my $self = shift;
1798 my $source = shift;
1799 my $p_source = shift;
1800 my $annotation = shift;
1801
1802 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
1803 return $vf_adaptor->fetch_all_with_annotation_by_Slice($self, $source, $p_source, $annotation);
1804 }
1805 else {
1806 return [];
1807 }
1808 }
1809
1810 =head2 get_all_somatic_VariationFeatures_with_annotation
1811
1812 Arg [1] : $variation_feature_source [optional]
1813 Arg [2] : $annotation_source [optional]
1814 Arg [3] : $annotation_name [optional]
1815 Description : returns all somatic variation features on this slice associated with a phenotype.
1816 (see get_all_VariationFeatures_with_annotation for further documentation)
1817 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1818 Exceptions : none
1819 Status : Stable
1820
1821 =cut
1822
1823 sub get_all_somatic_VariationFeatures_with_annotation{
1824 my $self = shift;
1825 my $source = shift;
1826 my $p_source = shift;
1827 my $annotation = shift;
1828
1829 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
1830 return $vf_adaptor->fetch_all_somatic_with_annotation_by_Slice($self, $source, $p_source, $annotation);
1831 }
1832 else {
1833 return [] unless $vf_adaptor;
1834 }
1835 }
1836
1837 =head2 get_all_VariationFeatures_by_VariationSet
1838
1839 Arg [1] : Bio::EnsEMBL:Variation::VariationSet $set
1840 Description :returns all variation features on this slice associated with a given set.
1841 This function will only work correctly if the variation database has been
1842 attached to the core database.
1843 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1844 Exceptions : none
1845 Caller : contigview, snpview
1846 Status : Stable
1847
1848 =cut
1849
1850 sub get_all_VariationFeatures_by_VariationSet {
1851 my $self = shift;
1852 my $set = shift;
1853
1854 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
1855 return $vf_adaptor->fetch_all_by_Slice_VariationSet($self, $set);
1856 }
1857 else {
1858 return [];
1859 }
1860 }
1861
1862 =head2 get_all_StructuralVariations
1863
1864 Description: DEPRECATED. Use get_all_StructuralVariationFeatures instead
1865
1866 =cut
1867
1868 sub get_all_StructuralVariations{
1869 my $self = shift;
1870 my $source = shift;
1871 my $study = shift;
1872 my $sv_class = shift;
1873
1874 deprecate('Use get_all_StructuralVariationFeatures() instead.');
1875
1876 return $self->get_all_StructuralVariationFeatures($source,$sv_class);
1877 }
1878
1879
1880 =head2 get_all_CopyNumberVariantProbes
1881
1882 Description: DEPRECATED. Use get_all_CopyNumberVariantProbeFeatures instead
1883
1884 =cut
1885
1886 sub get_all_CopyNumberVariantProbes {
1887 my $self = shift;
1888 my $source = shift;
1889 my $study = shift;
1890
1891 deprecate('Use get_all_CopyNumberVariantProbeFeatures() instead.');
1892
1893 return $self->get_all_CopyNumberVariantProbeFeatures($source);
1894 }
1895
1896
1897 sub _get_StructuralVariationFeatureAdaptor {
1898
1899 my $self = shift;
1900
1901 if(!$self->adaptor()) {
1902 warning('Cannot get structural variation features without attached adaptor');
1903 return undef;
1904 }
1905
1906 my $svf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
1907 -species => $self->adaptor()->db()->species,
1908 -type => "StructuralVariationFeature"
1909 );
1910
1911 if( $svf_adaptor ) {
1912 return $svf_adaptor;
1913 }
1914 else {
1915 warning("Variation database must be attached to core database to " .
1916 "retrieve variation information" );
1917
1918 return undef;
1919 }
1920 }
1921
1922
1923 =head2 get_all_StructuralVariationFeatures
1924
1925 Arg[1] : string $source [optional]
1926 Arg[2] : int $include_evidence [optional]
1927 Arg[3] : string $sv_class (SO term) [optional]
1928 Description : returns all structural variation features on this slice. This function will only work
1929 correctly if the variation database has been attached to the core database.
1930 If $source is set, only structural variation features with that source name will be
1931 returned. By default, it only returns structural variant features which are not labelled
1932 as "CNV_PROBE".
1933 If $include_evidence is set (i.e. $include_evidence=1), structural variation features from
1934 both structural variation (SV) and their supporting structural variations (SSV) will be
1935 returned. By default, it only returns features from structural variations (SV).
1936 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
1937 Exceptions : none
1938 Caller : contigview, snpview, structural_variation_features
1939 Status : Stable
1940
1941 =cut
1942
1943 sub get_all_StructuralVariationFeatures {
1944 my $self = shift;
1945 my $source = shift;
1946 my $include_evidence = shift;
1947 my $somatic = shift;
1948 my $sv_class = shift;
1949
1950 my $operator = '';
1951
1952 if (!defined($sv_class)) {
1953 $sv_class = 'SO:0000051'; # CNV_PROBE
1954 $operator = '!'; # All but CNV_PROBE
1955 }
1956
1957 $somatic = (!defined($somatic) || !$somatic) ? 0 : 1;
1958
1959 my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor;
1960
1961 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1962
1963 # Get the attrib_id
1964 my $at_adaptor = $variation_db->get_AttributeAdaptor;
1965 my $SO_term = $at_adaptor->SO_term_for_SO_accession($sv_class);
1966 my $attrib_id = $at_adaptor->attrib_id_for_type_value('SO_term',$SO_term);
1967
1968 if (!$attrib_id) {
1969 warning("The Sequence Ontology accession number is not found in the database");
1970 return [];
1971 }
1972
1973 # Get the structural variations features
1974 if( $svf_adaptor ) {
1975
1976 my $constraint = qq{ svf.somatic=$somatic AND svf.class_attrib_id $operator=$attrib_id };
1977 $constraint .= qq{ AND svf.is_evidence=0 } if (!$include_evidence);
1978
1979 if($source) {
1980 return $svf_adaptor->fetch_all_by_Slice_constraint($self, qq{$constraint AND s.name = '$source'});
1981 }else {
1982 return $svf_adaptor->fetch_all_by_Slice_constraint($self, $constraint);
1983 }
1984 }
1985 else {
1986 warning("Variation database must be attached to core database to " .
1987 "retrieve variation information" );
1988 return [];
1989 }
1990 }
1991
1992
1993 =head2 get_all_StructuralVariationFeatures_by_VariationSet
1994
1995 Arg [1] : Bio::EnsEMBL:Variation::VariationSet $set
1996 Description :returns all structural variation features on this slice associated with a
1997 given set.
1998 This function will only work correctly if the variation database has been
1999 attached to the core database.
2000 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
2001 Exceptions : none
2002 Caller : contigview, snpview
2003 Status : Stable
2004
2005 =cut
2006
2007 sub get_all_StructuralVariationFeatures_by_VariationSet {
2008 my $self = shift;
2009 my $set = shift;
2010
2011 if (my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor) {
2012 return $svf_adaptor->fetch_all_by_Slice_VariationSet($self, $set);
2013 }
2014 else {
2015 return [];
2016 }
2017 }
2018
2019
2020 =head2 get_all_somatic_StructuralVariationFeatures
2021
2022 Arg[1] : string $source [optional]
2023 Arg[2] : int $include_evidence [optional]
2024 Arg[3] : string $sv_class (SO term) [optional]
2025 Description : returns all somatic structural variation features on this slice. This function will only work
2026 correctly if the variation database has been attached to the core database.
2027 If $source is set, only somatic structural variation features with that source name will be
2028 returned. By default, it only returns somatic structural variant features which are not labelled
2029 as "CNV_PROBE".
2030 If $include_evidence is set (i.e. $include_evidence=1), structural variation features from
2031 both structural variation (SV) and their supporting structural variations (SSV) will be
2032 returned. By default, it only returns features from structural variations (SV).
2033 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
2034 Exceptions : none
2035 Caller : contigview, snpview, structural_variation_features
2036 Status : Stable
2037
2038 =cut
2039
2040 sub get_all_somatic_StructuralVariationFeatures {
2041 my $self = shift;
2042 my $source = shift;
2043 my $include_evidence = shift;
2044 my $sv_class = shift;
2045
2046 return $self->get_all_StructuralVariationFeatures($source,$include_evidence,1,$sv_class);
2047 }
2048
2049
2050 =head2 get_all_CopyNumberVariantProbeFeatures
2051
2052 Arg[1] : string $source [optional]
2053 Description : returns all copy number variant probes on this slice. This function will only work
2054 correctly if the variation database has been attached to the core database.
2055 If $source is set, only CNV probes with that source name will be returned.
2056 If $study is set, only CNV probes of that study will be returned.
2057 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
2058 Exceptions : none
2059 Caller : contigview, snpview, structural_variation_feature
2060 Status : At Risk
2061
2062 =cut
2063
2064 sub get_all_CopyNumberVariantProbeFeatures {
2065 my $self = shift;
2066 my $source = shift;
2067
2068 return $self->get_all_StructuralVariationFeatures($source,0,0,'SO:0000051');
2069 }
2070
2071
2072 =head2 get_all_VariationFeatures_by_Population
2073
2074 Arg [1] : Bio::EnsEMBL::Variation::Population
2075 Arg [2] : $minimum_frequency (optional)
2076 Example : $pop = $pop_adaptor->fetch_by_dbID(659);
2077 @vfs = @{$slice->get_all_VariationFeatures_by_Population(
2078 $pop,$slice)};
2079 Description: Retrieves all variation features in a slice which are stored for
2080 a specified population. If $minimum_frequency is supplied, only
2081 variations with a minor allele frequency (MAF) greater than
2082 $minimum_frequency will be returned.
2083 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature
2084 Exceptions : throw on incorrect argument
2085 Caller : general
2086 Status : At Risk
2087
2088 =cut
2089
2090 sub get_all_VariationFeatures_by_Population {
2091 my $self = shift;
2092
2093 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
2094 return $vf_adaptor->fetch_all_by_Slice_Population($self, @_);
2095 }
2096 else {
2097 return [];
2098 }
2099 }
2100
2101
2102
2103
2104 =head2 get_all_IndividualSlice
2105
2106 Args : none
2107 Example : my $individualSlice = $slice->get_by_Population($population);
2108 Description : Gets the specific Slice for all the individuls in the population
2109 ReturnType : listref of Bio::EnsEMB::IndividualSlice
2110 Exceptions : none
2111 Caller : general
2112
2113 =cut
2114
2115 sub get_all_IndividualSlice{
2116 my $self = shift;
2117
2118 my $individualSliceFactory = Bio::EnsEMBL::IndividualSliceFactory->new(
2119 -START => $self->{'start'},
2120 -END => $self->{'end'},
2121 -STRAND => $self->{'strand'},
2122 -ADAPTOR => $self->adaptor(),
2123 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2124 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2125 -COORD_SYSTEM => $self->{'coord_system'},
2126 );
2127 return $individualSliceFactory->get_all_IndividualSlice();
2128 }
2129
2130 =head2 get_by_Individual
2131
2132 Arg[1] : Bio::EnsEMBL::Variation::Individual $individual
2133 Example : my $individualSlice = $slice->get_by_Individual($individual);
2134 Description : Gets the specific Slice for the individual
2135 ReturnType : Bio::EnsEMB::IndividualSlice
2136 Exceptions : none
2137 Caller : general
2138
2139 =cut
2140
2141 sub get_by_Individual{
2142 my $self = shift;
2143 my $individual = shift;
2144
2145 return Bio::EnsEMBL::IndividualSlice->new(
2146 -START => $self->{'start'},
2147 -END => $self->{'end'},
2148 -STRAND => $self->{'strand'},
2149 -ADAPTOR => $self->adaptor(),
2150 # -SEQ => $self->{'seq'},
2151 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2152 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2153 -COORD_SYSTEM => $self->{'coord_system'},
2154 -INDIVIDUAL => $individual);
2155
2156 }
2157
2158
2159
2160 =head2 get_by_strain
2161
2162 Arg[1] : string $strain
2163 Example : my $strainSlice = $slice->get_by_strain($strain);
2164 Description : Gets the specific Slice for the strain
2165 ReturnType : Bio::EnsEMB::StrainSlice
2166 Exceptions : none
2167 Caller : general
2168
2169 =cut
2170
2171 sub get_by_strain{
2172 my $self = shift;
2173 my $strain_name = shift;
2174
2175 return Bio::EnsEMBL::StrainSlice->new(
2176 -START => $self->{'start'},
2177 -END => $self->{'end'},
2178 -STRAND => $self->{'strand'},
2179 -ADAPTOR => $self->adaptor(),
2180 -SEQ => $self->{'seq'},
2181 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2182 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2183 -COORD_SYSTEM => $self->{'coord_system'},
2184 -STRAIN_NAME => $strain_name);
2185
2186 }
2187
2188 sub calculate_theta{
2189 my $self = shift;
2190 my $strains = shift;
2191 my $feature = shift; #optional parameter. Name of the feature in the Slice you want to calculate
2192
2193 if(!$self->adaptor()) {
2194 warning('Cannot get variation features without attached adaptor');
2195 return 0;
2196 }
2197 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
2198
2199 unless($variation_db) {
2200 warning("Variation database must be attached to core database to " .
2201 "retrieve variation information" );
2202 return 0;
2203 }
2204
2205 #need to get coverage regions for the slice in the different strains
2206 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
2207 my $strain;
2208 my $differences = [];
2209 my $slices = [];
2210 if ($coverage_adaptor){
2211 my $num_strains = scalar(@{$strains}) +1;
2212 if (!defined $feature){
2213 #we want to calculate for the whole slice
2214 push @{$slices}, $self; #add the slice as the slice to calculate the theta value
2215 }
2216 else{
2217 #we have features, get the slices for the different features
2218 my $features = $self->get_all_Exons();
2219 map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
2220 }
2221 my $length_regions = 0;
2222 my $snps = 0;
2223 my $theta = 0;
2224 my $last_position = 0;
2225 #get all the differences in the slice coordinates
2226 foreach my $strain_name (@{$strains}){
2227 my $strain = $self->get_by_strain($strain_name); #get the strainSlice for the strain
2228
2229 my $results = $strain->get_all_differences_Slice;
2230 push @{$differences}, @{$results} if (defined $results);
2231 }
2232 #when we finish, we have, in max_level, the regions covered by all the sample
2233 #sort the differences by the genomic position
2234 my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
2235 foreach my $slice (@{$slices}){
2236 my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
2237 if (defined $regions_covered){
2238 foreach my $range (@{$regions_covered}){
2239 $length_regions += ($range->[1] - $range->[0]) + 1; #add the length of the genomic region
2240 for (my $i = $last_position;$i<@differences_sorted;$i++){
2241 if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
2242 $snps++; #count differences in the region
2243 }
2244 elsif ($differences_sorted[$i]->end > $range->[1]){
2245 $last_position = $i;
2246 last;
2247 }
2248 }
2249 }
2250 #when all the ranges have been iterated, calculate rho
2251 #this is an intermediate variable called a in the formula
2252 # a = sum i=2..strains 1/i-1
2253 }
2254 }
2255 my $a = _calculate_a($num_strains);
2256 $theta = $snps / ($a * $length_regions);
2257 return $theta;
2258 }
2259 else{
2260 return 0;
2261 }
2262 }
2263
2264
2265
2266
2267 sub _calculate_a{
2268 my $max_level = shift;
2269
2270 my $a = 0;
2271 for (my $i = 2; $i <= $max_level+1;$i++){
2272 $a += 1/($i-1);
2273 }
2274 return $a;
2275 }
2276
2277 sub calculate_pi{
2278 my $self = shift;
2279 my $strains = shift;
2280 my $feature = shift;
2281
2282 if(!$self->adaptor()) {
2283 warning('Cannot get variation features without attached adaptor');
2284 return 0;
2285 }
2286 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
2287
2288 unless($variation_db) {
2289 warning("Variation database must be attached to core database to " .
2290 "retrieve variation information" );
2291 return 0;
2292 }
2293
2294 #need to get coverage regions for the slice in the different strains
2295 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
2296 my $differences = [];
2297 my $slices = [];
2298 if ($coverage_adaptor){
2299 my $num_strains = scalar(@{$strains}) +1;
2300 if (!defined $feature){
2301 #we want to calculate for the whole slice
2302 push @{$slices}, $self; #add the slice as the slice to calculate the theta value
2303 }
2304 else{
2305 #we have features, get the slices for the different features
2306 my $features = $self->get_all_Exons();
2307 map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
2308 }
2309 my @range_differences = ();
2310 my $pi = 0;
2311 my $regions = 0;
2312 my $last_position = 0; #last position visited in the sorted list of differences
2313 my $triallelic = 0;
2314 my $is_triallelic = 0;
2315 foreach my $slice (@{$slices}){
2316 foreach my $strain_name (@{$strains}){
2317 my $strain = $slice->get_by_strain($strain_name); #get the strainSlice for the strain
2318 my $results = $strain->get_all_differences_Slice;
2319 push @{$differences}, @{$results} if (defined $results);
2320 }
2321 my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
2322
2323 my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
2324 #when we finish, we have, in max_level, the regions covered by all the sample
2325 #sort the differences
2326 if (defined $regions_covered){
2327 foreach my $range (@{$regions_covered}){
2328 for (my $i = $last_position;$i<@differences_sorted;$i++){
2329 if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
2330 #check wether it is the same region or different
2331 if (!defined $range_differences[0] || ($differences_sorted[$i]->start == $range_differences[0]->start)){
2332 if (defined $range_differences[0] && ($differences_sorted[$i]->allele_string ne $range_differences[0]->allele_string)){
2333 $is_triallelic = 1;
2334 }
2335 push @range_differences, $differences_sorted[$i];
2336 }
2337 else{
2338 #new site, calc pi for the previous one
2339 $pi += 2 * (@range_differences/($num_strains)) * ( 1 - (@range_differences/$num_strains));
2340 if ($is_triallelic) {
2341 $triallelic++;
2342 $is_triallelic = 0;
2343 }
2344 $regions++;
2345 @range_differences = ();
2346 #and start a new range
2347 push @range_differences, $differences_sorted[$i];
2348 }
2349 }
2350 elsif ($differences_sorted[$i]->end > $range->[1]){
2351 $last_position = $i;
2352 last;
2353 }
2354 }
2355 #calculate pi for last site, if any
2356 if (defined $range_differences[0]){
2357 $pi += 2 * (@range_differences/$num_strains) * ( 1 - (@range_differences/$num_strains));
2358 $regions++;
2359 }
2360 }
2361 }
2362 $pi = $pi / $regions; #calculate average pi
2363 print "Regions with variations in region $regions and triallelic $triallelic\n\n";
2364 }
2365 return $pi;
2366 }
2367 else{
2368 return 0;
2369 }
2370
2371 }
2372
2373
2374
2375
2376
2377 =head2 get_all_genotyped_VariationFeatures
2378
2379 Args : none
2380 Function : returns all variation features on this slice that have been genotyped. This function will only work
2381 correctly if the variation database has been attached to the core database.
2382 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
2383 Exceptions : none
2384 Caller : contigview, snpview, ldview
2385 Status : At Risk
2386 : Variation database is under development.
2387
2388 =cut
2389
2390 sub get_all_genotyped_VariationFeatures{
2391 my $self = shift;
2392
2393 if( my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
2394 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
2395 }
2396 else {
2397 return [];
2398 }
2399 }
2400
2401
2402 =head2 get_all_SNPs
2403
2404 Description: DEPRECATED. Use get_all_VariationFeatures insted
2405
2406 =cut
2407
2408 sub get_all_SNPs {
2409 my $self = shift;
2410
2411 deprecate('Use get_all_VariationFeatures() instead.');
2412
2413 my $snps;
2414 my $vf = $self->get_all_genotyped_VariationFeatures();
2415 if( $vf->[0] ) {
2416 #necessary to convert the VariationFeatures into SNP objects
2417 foreach my $variation_feature (@{$vf}){
2418 push @{$snps},$variation_feature->convert_to_SNP();
2419 }
2420 return $snps;
2421 } else {
2422 return [];
2423 }
2424 }
2425
2426 =head2 get_all_genotyped_SNPs
2427
2428 Description : DEPRECATED. Use get_all_genotyped_VariationFeatures insted
2429
2430 =cut
2431
2432 sub get_all_genotyped_SNPs {
2433 my $self = shift;
2434
2435 deprecate("Use get_all_genotyped_VariationFeatures instead");
2436 my $vf = $self->get_all_genotyped_VariationFeatures;
2437 my $snps;
2438 if ($vf->[0]){
2439 foreach my $variation_feature (@{$vf}){
2440 push @{$snps},$variation_feature->convert_to_SNP();
2441 }
2442 return $snps;
2443 } else {
2444 return [];
2445 }
2446 }
2447
2448 sub get_all_SNPs_transcripts {
2449 my $self = shift;
2450
2451 deprecate("DEPRECATED");
2452
2453 return [];
2454
2455 }
2456
2457
2458
2459 =head2 get_all_Genes
2460
2461 Arg [1] : (optional) string $logic_name
2462 The name of the analysis used to generate the genes to retrieve
2463 Arg [2] : (optional) string $dbtype
2464 The dbtype of genes to obtain. This assumes that the db has
2465 been added to the DBAdaptor under this name (using the
2466 DBConnection::add_db_adaptor method).
2467 Arg [3] : (optional) boolean $load_transcripts
2468 If set to true, transcripts will be loaded immediately rather
2469 than being lazy-loaded on request. This will result in a
2470 significant speed up if the Transcripts and Exons are going to
2471 be used (but a slow down if they are not).
2472 Arg [4] : (optional) string $source
2473 The source of the genes to retrieve.
2474 Arg [5] : (optional) string $biotype
2475 The biotype of the genes to retrieve.
2476 Example : @genes = @{$slice->get_all_Genes};
2477 Description: Retrieves all genes that overlap this slice.
2478 Returntype : listref of Bio::EnsEMBL::Genes
2479 Exceptions : none
2480 Caller : none
2481 Status : Stable
2482
2483 =cut
2484
2485 sub get_all_Genes{
2486 my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_;
2487
2488 if(!$self->adaptor()) {
2489 warning('Cannot get Genes without attached adaptor');
2490 return [];
2491 }
2492
2493 my $ga;
2494 if($dbtype) {
2495 my $db = $reg->get_db($self->adaptor()->db(), $dbtype);
2496 if(defined($db)){
2497 $ga = $reg->get_adaptor( $db->species(), $db->group(), "Gene" );
2498 }
2499 else{
2500 $ga = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Gene" );
2501 }
2502 if(!defined $ga) {
2503 warning( "$dbtype genes not available" );
2504 return [];
2505 }
2506 } else {
2507 $ga = $self->adaptor->db->get_GeneAdaptor();
2508 }
2509
2510 return $ga->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype);
2511 }
2512
2513 =head2 get_all_Genes_by_type
2514
2515 Arg [1] : string $type
2516 The biotype of genes wanted.
2517 Arg [2] : (optional) string $logic_name
2518 Arg [3] : (optional) boolean $load_transcripts
2519 If set to true, transcripts will be loaded immediately rather
2520 than being lazy-loaded on request. This will result in a
2521 significant speed up if the Transcripts and Exons are going to
2522 be used (but a slow down if they are not).
2523 Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding',
2524 'ensembl')};
2525 Description: Retrieves genes that overlap this slice of biotype $type.
2526 This is primarily used by the genebuilding code when several
2527 biotypes of genes are used.
2528
2529 The logic name is the analysis of the genes that are retrieved.
2530 If not provided all genes will be retrieved instead.
2531
2532 Returntype : listref of Bio::EnsEMBL::Genes
2533 Exceptions : none
2534 Caller : genebuilder, general
2535 Status : Stable
2536
2537 =cut
2538
2539 sub get_all_Genes_by_type{
2540 my ($self, $type, $logic_name, $load_transcripts) = @_;
2541
2542 if(!$self->adaptor()) {
2543 warning('Cannot get Genes without attached adaptor');
2544 return [];
2545 }
2546
2547 return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type);
2548 }
2549
2550
2551 =head2 get_all_Genes_by_source
2552
2553 Arg [1] : string source
2554 Arg [2] : (optional) boolean $load_transcripts
2555 If set to true, transcripts will be loaded immediately rather
2556 than being lazy-loaded on request. This will result in a
2557 significant speed up if the Transcripts and Exons are going to
2558 be used (but a slow down if they are not).
2559 Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')};
2560 Description: Retrieves genes that overlap this slice of source $source.
2561
2562 Returntype : listref of Bio::EnsEMBL::Genes
2563 Exceptions : none
2564 Caller : general
2565 Status : Stable
2566
2567 =cut
2568
2569 sub get_all_Genes_by_source {
2570 my ($self, $source, $load_transcripts) = @_;
2571
2572 if(!$self->adaptor()) {
2573 warning('Cannot get Genes without attached adaptor');
2574 return [];
2575 }
2576
2577 return $self->get_all_Genes(undef, undef, $load_transcripts, $source);
2578 }
2579
2580 =head2 get_all_Transcripts
2581
2582 Arg [1] : (optional) boolean $load_exons
2583 If set to true exons will not be lazy-loaded but will instead
2584 be loaded right away. This is faster if the exons are
2585 actually going to be used right away.
2586 Arg [2] : (optional) string $logic_name
2587 the logic name of the type of features to obtain
2588 Arg [3] : (optional) string $db_type
2589 Example : @transcripts = @{$slice->get_all_Transcripts)_};
2590 Description: Gets all transcripts which overlap this slice. If you want to
2591 specify a particular analysis or type, then you are better off
2592 using get_all_Genes or get_all_Genes_by_type and iterating
2593 through the transcripts of each gene.
2594 Returntype : reference to a list of Bio::EnsEMBL::Transcripts
2595 Exceptions : none
2596 Caller : general
2597 Status : Stable
2598
2599 =cut
2600
2601 sub get_all_Transcripts {
2602 my $self = shift;
2603 my $load_exons = shift;
2604 my $logic_name = shift;
2605 my $dbtype = shift;
2606 if(!$self->adaptor()) {
2607 warning('Cannot get Transcripts without attached adaptor');
2608 return [];
2609 }
2610
2611
2612 my $ta;
2613 if($dbtype) {
2614 my $db = $reg->get_db($self->adaptor()->db(), $dbtype);
2615 if(defined($db)){
2616 $ta = $reg->get_adaptor( $db->species(), $db->group(), "Transcript" );
2617 } else{
2618 $ta = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Transcript" );
2619 }
2620 if(!defined $ta) {
2621 warning( "$dbtype genes not available" );
2622 return [];
2623 }
2624 } else {
2625 $ta = $self->adaptor->db->get_TranscriptAdaptor();
2626 }
2627 return $ta->fetch_all_by_Slice($self, $load_exons, $logic_name);
2628 }
2629
2630
2631 =head2 get_all_Exons
2632
2633 Arg [1] : none
2634 Example : @exons = @{$slice->get_all_Exons};
2635 Description: Gets all exons which overlap this slice. Note that these exons
2636 will not be associated with any transcripts, so this may not
2637 be terribly useful.
2638 Returntype : reference to a list of Bio::EnsEMBL::Exons
2639 Exceptions : none
2640 Caller : general
2641 Status : Stable
2642
2643 =cut
2644
2645 sub get_all_Exons {
2646 my $self = shift;
2647
2648 if(!$self->adaptor()) {
2649 warning('Cannot get Exons without attached adaptor');
2650 return [];
2651 }
2652
2653 return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
2654 }
2655
2656
2657
2658 =head2 get_all_QtlFeatures
2659
2660 Args : none
2661 Example : none
2662 Description: returns overlapping QtlFeatures
2663 Returntype : listref Bio::EnsEMBL::Map::QtlFeature
2664 Exceptions : none
2665 Caller : general
2666 Status : Stable
2667
2668 =cut
2669
2670 sub get_all_QtlFeatures {
2671 my $self = shift;
2672
2673 if(!$self->adaptor()) {
2674 warning('Cannot get QtlFeatures without attached adaptor');
2675 return [];
2676 }
2677
2678 my $qfAdaptor;
2679 if( $self->adaptor()) {
2680 $qfAdaptor = $self->adaptor()->db()->get_QtlFeatureAdaptor();
2681 } else {
2682 return [];
2683 }
2684
2685 return $qfAdaptor->fetch_all_by_Slice_constraint( $self );
2686 }
2687
2688
2689
2690
2691 =head2 get_all_KaryotypeBands
2692
2693 Arg [1] : none
2694 Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
2695 Description: Retrieves the karyotype bands which this slice overlaps.
2696 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
2697 Exceptions : none
2698 Caller : general, contigview
2699 Status : Stable
2700
2701 =cut
2702
2703 sub get_all_KaryotypeBands {
2704 my ($self) = @_;
2705
2706 if(!$self->adaptor()) {
2707 warning('Cannot get KaryotypeBands without attached adaptor');
2708 return [];
2709 }
2710
2711 my $kadp = $self->adaptor->db->get_KaryotypeBandAdaptor();
2712 return $kadp->fetch_all_by_Slice($self);
2713 }
2714
2715
2716
2717
2718 =head2 get_repeatmasked_seq
2719
2720 Arg [1] : listref of strings $logic_names (optional)
2721 Arg [2] : int $soft_masking_enable (optional)
2722 Arg [3] : hash reference $not_default_masking_cases (optional, default is {})
2723 The values are 0 or 1 for hard and soft masking respectively
2724 The keys of the hash should be of 2 forms
2725 "repeat_class_" . $repeat_consensus->repeat_class,
2726 e.g. "repeat_class_SINE/MIR"
2727 "repeat_name_" . $repeat_consensus->name
2728 e.g. "repeat_name_MIR"
2729 depending on which base you want to apply the not default
2730 masking either the repeat_class or repeat_name. Both can be
2731 specified in the same hash at the same time, but in that case,
2732 repeat_name setting has priority over repeat_class. For example,
2733 you may have hard masking as default, and you may want soft
2734 masking of all repeat_class SINE/MIR, but repeat_name AluSp
2735 (which are also from repeat_class SINE/MIR).
2736 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2737 "repeat_name_AluSp" => 0}
2738 Example : $rm_slice = $slice->get_repeatmasked_seq();
2739 $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
2740 Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
2741 masked sequence instead of the regular sequence.
2742 Sequence returned by this new slice will have repeat regions
2743 hardmasked by default (sequence replaced by N) or
2744 or soft-masked when arg[2] = 1 (sequence in lowercase)
2745 Will only work with database connection to get repeat features.
2746 Returntype : Bio::EnsEMBL::RepeatMaskedSlice
2747 Exceptions : none
2748 Caller : general
2749 Status : Stable
2750
2751 =cut
2752
2753 sub get_repeatmasked_seq {
2754 my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
2755
2756 return Bio::EnsEMBL::RepeatMaskedSlice->new
2757 (-START => $self->{'start'},
2758 -END => $self->{'end'},
2759 -STRAND => $self->{'strand'},
2760 -ADAPTOR => $self->adaptor(),
2761 -SEQ => $self->{'seq'},
2762 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2763 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2764 -COORD_SYSTEM => $self->{'coord_system'},
2765 -REPEAT_MASK => $logic_names,
2766 -SOFT_MASK => $soft_mask,
2767 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
2768 }
2769
2770
2771
2772 =head2 _mask_features
2773
2774 Arg [1] : reference to a string $dnaref
2775 Arg [2] : array_ref $repeats
2776 reference to a list Bio::EnsEMBL::RepeatFeature
2777 give the list of coordinates to replace with N or with
2778 lower case
2779 Arg [3] : int $soft_masking_enable (optional)
2780 Arg [4] : hash reference $not_default_masking_cases (optional, default is {})
2781 The values are 0 or 1 for hard and soft masking respectively
2782 The keys of the hash should be of 2 forms
2783 "repeat_class_" . $repeat_consensus->repeat_class,
2784 e.g. "repeat_class_SINE/MIR"
2785 "repeat_name_" . $repeat_consensus->name
2786 e.g. "repeat_name_MIR"
2787 depending on which base you want to apply the not default masking either
2788 the repeat_class or repeat_name. Both can be specified in the same hash
2789 at the same time, but in that case, repeat_name setting has priority over
2790 repeat_class. For example, you may have hard masking as default, and
2791 you may want soft masking of all repeat_class SINE/MIR,
2792 but repeat_name AluSp (which are also from repeat_class SINE/MIR).
2793 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2794 "repeat_name_AluSp" => 0}
2795 Example : none
2796 Description: replaces string positions described in the RepeatFeatures
2797 with Ns (default setting), or with the lower case equivalent
2798 (soft masking). The reference to a dna string which is passed
2799 is changed in place.
2800 Returntype : none
2801 Exceptions : none
2802 Caller : seq
2803 Status : Stable
2804
2805 =cut
2806
2807 sub _mask_features {
2808 my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
2809
2810 $soft_mask = 0 unless (defined $soft_mask);
2811 $not_default_masking_cases = {} unless (defined $not_default_masking_cases);
2812
2813 # explicit CORE::length call, to avoid any confusion with the Slice
2814 # length method
2815 my $dnalen = CORE::length($$dnaref);
2816
2817 REP:foreach my $old_f (@{$repeats}) {
2818 my $f = $old_f->transfer( $self );
2819 my $start = $f->start;
2820 my $end = $f->end;
2821 my $length = ($end - $start) + 1;
2822
2823 # check if we get repeat completely outside of expected slice range
2824 if ($end < 1 || $start > $dnalen) {
2825 # warning("Unexpected: Repeat completely outside slice coordinates.");
2826 next REP;
2827 }
2828
2829 # repeat partly outside slice range, so correct
2830 # the repeat start and length to the slice size if needed
2831 if ($start < 1) {
2832 $start = 1;
2833 $length = ($end - $start) + 1;
2834 }
2835
2836 # repeat partly outside slice range, so correct
2837 # the repeat end and length to the slice size if needed
2838 if ($end > $dnalen) {
2839 $end = $dnalen;
2840 $length = ($end - $start) + 1;
2841 }
2842
2843 $start--;
2844
2845 my $padstr;
2846 # if we decide to define masking on the base of the repeat_type, we'll need
2847 # to add the following, and the other commented line few lines below.
2848 # my $rc_type = "repeat_type_" . $f->repeat_consensus->repeat_type;
2849 my $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
2850 my $rc_name = "repeat_name_" . $f->repeat_consensus->name;
2851
2852 my $masking_type;
2853 # $masking_type = $not_default_masking_cases->{$rc_type} if (defined $not_default_masking_cases->{$rc_type});
2854 $masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class});
2855 $masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name});
2856
2857 $masking_type = $soft_mask unless (defined $masking_type);
2858
2859 if ($masking_type) {
2860 $padstr = lc substr ($$dnaref,$start,$length);
2861 } else {
2862 $padstr = 'N' x $length;
2863 }
2864 substr ($$dnaref,$start,$length) = $padstr;
2865 }
2866 }
2867
2868
2869 =head2 get_all_SearchFeatures
2870
2871 Arg [1] : scalar $ticket_ids
2872 Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
2873 Description: Retreives all search features for stored blast
2874 results for the ticket that overlap this slice
2875 Returntype : listref of Bio::EnsEMBL::SeqFeatures
2876 Exceptions : none
2877 Caller : general (webby!)
2878 Status : Stable
2879
2880 =cut
2881
2882 sub get_all_SearchFeatures {
2883 my $self = shift;
2884 my $ticket = shift;
2885 local $_;
2886 unless($ticket) {
2887 throw("ticket argument is required");
2888 }
2889
2890 if(!$self->adaptor()) {
2891 warning("Cannot get SearchFeatures without an attached adaptor");
2892 return [];
2893 }
2894
2895 my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
2896
2897 my $offset = $self->start-1;
2898
2899 my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
2900
2901 foreach( @$features ) {
2902 $_->start( $_->start - $offset );
2903 $_->end( $_->end - $offset );
2904 };
2905 return $features;
2906
2907 }
2908
2909 =head2 get_all_AssemblyExceptionFeatures
2910
2911 Arg [1] : string $set (optional)
2912 Example : $slice->get_all_AssemblyExceptionFeatures();
2913 Description: Retreives all misc features which overlap this slice. If
2914 a set code is provided only features which are members of
2915 the requested set are returned.
2916 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
2917 Exceptions : none
2918 Caller : general
2919 Status : Stable
2920
2921 =cut
2922
2923 sub get_all_AssemblyExceptionFeatures {
2924 my $self = shift;
2925 my $misc_set = shift;
2926
2927 my $adaptor = $self->adaptor();
2928
2929 if(!$adaptor) {
2930 warning('Cannot retrieve features without attached adaptor.');
2931 return [];
2932 }
2933
2934 my $aefa = $adaptor->db->get_AssemblyExceptionFeatureAdaptor();
2935
2936 return $aefa->fetch_all_by_Slice($self);
2937 }
2938
2939
2940
2941 =head2 get_all_MiscFeatures
2942
2943 Arg [1] : string $set (optional)
2944 Arg [2] : string $database (optional)
2945 Example : $slice->get_all_MiscFeatures('cloneset');
2946 Description: Retreives all misc features which overlap this slice. If
2947 a set code is provided only features which are members of
2948 the requested set are returned.
2949 Returntype : listref of Bio::EnsEMBL::MiscFeatures
2950 Exceptions : none
2951 Caller : general
2952 Status : Stable
2953
2954 =cut
2955
2956 sub get_all_MiscFeatures {
2957 my $self = shift;
2958 my $misc_set = shift;
2959 my $dbtype = shift;
2960 my $msa;
2961
2962 my $adaptor = $self->adaptor();
2963 if(!$adaptor) {
2964 warning('Cannot retrieve features without attached adaptor.');
2965 return [];
2966 }
2967
2968 my $mfa;
2969 if($dbtype) {
2970 my $db = $reg->get_db($adaptor->db(), $dbtype);
2971 if(defined($db)){
2972 $mfa = $reg->get_adaptor( lc($db->species()), $db->group(), "miscfeature" );
2973 } else{
2974 $mfa = $reg->get_adaptor( $adaptor->db()->species(), $dbtype, "miscfeature" );
2975 }
2976 if(!defined $mfa) {
2977 warning( "$dbtype misc features not available" );
2978 return [];
2979 }
2980 } else {
2981 $mfa = $adaptor->db->get_MiscFeatureAdaptor();
2982 }
2983
2984 if($misc_set) {
2985 return $mfa->fetch_all_by_Slice_and_set_code($self,$misc_set);
2986 }
2987
2988 return $mfa->fetch_all_by_Slice($self);
2989 }
2990
2991 =head2 get_all_MarkerFeatures
2992
2993 Arg [1] : (optional) string logic_name
2994 The logic name of the marker features to retrieve
2995 Arg [2] : (optional) int $priority
2996 Lower (exclusive) priority bound of the markers to retrieve
2997 Arg [3] : (optional) int $map_weight
2998 Upper (exclusive) priority bound of the markers to retrieve
2999 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
3000 Description: Retrieves all markers which lie on this slice fulfilling the
3001 specified map_weight and priority parameters (if supplied).
3002 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
3003 Exceptions : none
3004 Caller : contigview, general
3005 Status : Stable
3006
3007 =cut
3008
3009 sub get_all_MarkerFeatures {
3010 my ($self, $logic_name, $priority, $map_weight) = @_;
3011
3012 if(!$self->adaptor()) {
3013 warning('Cannot retrieve MarkerFeatures without attached adaptor.');
3014 return [];
3015 }
3016
3017 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
3018
3019 my $feats = $ma->fetch_all_by_Slice_and_priority($self,
3020 $priority,
3021 $map_weight,
3022 $logic_name);
3023 return $feats;
3024 }
3025
3026
3027 =head2 get_MarkerFeatures_by_Name
3028
3029 Arg [1] : string marker Name
3030 The name (synonym) of the marker feature(s) to retrieve
3031 Example : my @markers = @{$slice->get_MarkerFeatures_by_Name('z1705')};
3032 Description: Retrieves all markers with this ID
3033 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
3034 Exceptions : none
3035 Caller : contigview, general
3036 Status : Stable
3037
3038 =cut
3039
3040 sub get_MarkerFeatures_by_Name {
3041 my ($self, $name) = @_;
3042
3043 if(!$self->adaptor()) {
3044 warning('Cannot retrieve MarkerFeatures without attached adaptor.');
3045 return [];
3046 }
3047
3048 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
3049
3050 my $feats = $ma->fetch_all_by_Slice_and_MarkerName($self, $name);
3051 return $feats;
3052 }
3053
3054
3055 =head2 get_all_compara_DnaAlignFeatures
3056
3057 Arg [1] : string $qy_species
3058 The name of the species to retrieve similarity features from
3059 Arg [2] : string $qy_assembly
3060 The name of the assembly to retrieve similarity features from
3061 Arg [3] : string $type
3062 The type of the alignment to retrieve similarity features from
3063 Arg [4] : <optional> compara dbadptor to use.
3064 Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus',
3065 'MGSC3',
3066 'WGA');
3067 Description: Retrieves a list of DNA-DNA Alignments to the species specified
3068 by the $qy_species argument.
3069 The compara database must be attached to the core database
3070 for this call to work correctly. As well the compara database
3071 must have the core dbadaptors for both this species, and the
3072 query species added to function correctly.
3073 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
3074 Exceptions : warning if compara database is not available
3075 Caller : contigview
3076 Status : Stable
3077
3078 =cut
3079
3080 sub get_all_compara_DnaAlignFeatures {
3081 my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_;
3082
3083 if(!$self->adaptor()) {
3084 warning("Cannot retrieve DnaAlignFeatures without attached adaptor");
3085 return [];
3086 }
3087
3088 unless($qy_species && $alignment_type # && $qy_assembly
3089 ) {
3090 throw("Query species and assembly and alignmemt type arguments are required");
3091 }
3092
3093 if(!defined($compara_db)){
3094 $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
3095 }
3096 unless($compara_db) {
3097 warning("Compara database must be attached to core database or passed ".
3098 "as an argument to " .
3099 "retrieve compara information");
3100 return [];
3101 }
3102
3103 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
3104 return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
3105 }
3106
3107 =head2 get_all_compara_Syntenies
3108
3109 Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus"
3110 Arg [2] : string $method_link_type, default is "SYNTENY"
3111 Arg [3] : <optional> compara dbadaptor to use.
3112 Description: gets all the compara syntenyies for a specfic species
3113 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
3114 Status : Stable
3115
3116 =cut
3117
3118 sub get_all_compara_Syntenies {
3119 my ($self, $qy_species, $method_link_type, $compara_db) = @_;
3120
3121 if(!$self->adaptor()) {
3122 warning("Cannot retrieve features without attached adaptor");
3123 return [];
3124 }
3125
3126 unless($qy_species) {
3127 throw("Query species and assembly arguments are required");
3128 }
3129
3130 unless (defined $method_link_type) {
3131 $method_link_type = "SYNTENY";
3132 }
3133
3134 if(!defined($compara_db)){
3135 $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
3136 }
3137 unless($compara_db) {
3138 warning("Compara database must be attached to core database or passed ".
3139 "as an argument to " .
3140 "retrieve compara information");
3141 return [];
3142 }
3143 my $gdba = $compara_db->get_GenomeDBAdaptor();
3144 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
3145 my $dfa = $compara_db->get_DnaFragAdaptor();
3146 my $sra = $compara_db->get_SyntenyRegionAdaptor();
3147
3148 my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db());
3149 my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
3150 my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]);
3151
3152 my $cs = $self->coord_system()->name();
3153 my $sr = $self->seq_region_name();
3154 my ($dnafrag) = @{$dfa->fetch_all_by_GenomeDB_region($this_gdb, $cs, $sr)};
3155 return $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag, $self->start, $self->end);
3156 }
3157
3158 =head2 get_all_Haplotypes
3159
3160 Arg [1] : (optional) boolean $lite_flag
3161 if true lightweight haplotype objects are used
3162 Example : @haplotypes = $slice->get_all_Haplotypes;
3163 Description: Retrieves all of the haplotypes on this slice. Only works
3164 if the haplotype adaptor has been attached to the core adaptor
3165 via $dba->add_db_adaptor('haplotype', $hdba);
3166 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
3167 Exceptions : warning is Haplotype database is not available
3168 Caller : contigview, general
3169 Status : Stable
3170
3171 =cut
3172
3173 sub get_all_Haplotypes {
3174 my($self, $lite_flag) = @_;
3175
3176 if(!$self->adaptor()) {
3177 warning("Cannot retrieve features without attached adaptor");
3178 return [];
3179 }
3180
3181 my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype');
3182
3183 unless($haplo_db) {
3184 warning("Haplotype database must be attached to core database to " .
3185 "retrieve haplotype information" );
3186 return [];
3187 }
3188
3189 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
3190
3191 my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
3192
3193 return $haplotypes;
3194 }
3195
3196
3197 sub get_all_DASFactories {
3198 my $self = shift;
3199 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
3200 }
3201
3202 sub get_all_DASFeatures_dsn {
3203 my ($self, $source_type, $dsn) = @_;
3204
3205 if(!$self->adaptor()) {
3206 warning("Cannot retrieve features without attached adaptor");
3207 return [];
3208 }
3209 my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory;
3210
3211 return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
3212 }
3213
3214 =head2 get_all_DAS_Features
3215
3216 Arg [1] : none
3217 Example : $features = $slice->get_all_DASFeatures;
3218 Description: Retreives a hash reference to a hash of DAS feature
3219 sets, keyed by the DNS, NOTE the values of this hash
3220 are an anonymous array containing:
3221 (1) a pointer to an array of features;
3222 (2) a pointer to the DAS stylesheet
3223 Returntype : hashref of Bio::SeqFeatures
3224 Exceptions : ?
3225 Caller : webcode
3226 Status : Stable
3227
3228 =cut
3229 sub get_all_DAS_Features{
3230 my ($self) = @_;
3231
3232 $self->{_das_features} ||= {}; # Cache
3233 $self->{_das_styles} ||= {}; # Cache
3234 $self->{_das_segments} ||= {}; # Cache
3235 my %das_features;
3236 my %das_styles;
3237 my %das_segments;
3238 my $slice = $self;
3239
3240 foreach my $dasfact( @{$self->get_all_DASFactories} ){
3241 my $dsn = $dasfact->adaptor->dsn;
3242 my $name = $dasfact->adaptor->name;
3243 # my $type = $dasfact->adaptor->type;
3244 my $url = $dasfact->adaptor->url;
3245
3246 my ($type) = $dasfact->adaptor->mapping;
3247 if (ref $type eq 'ARRAY') {
3248 $type = shift @$type;
3249 }
3250 $type ||= $dasfact->adaptor->type;
3251 # Construct a cache key : SOURCE_URL/TYPE
3252 # Need the type to handle sources that serve multiple types of features
3253
3254 my $key = join('/', $name, $type);
3255 if( $self->{_das_features}->{$key} ){ # Use cached
3256 $das_features{$name} = $self->{_das_features}->{$key};
3257 $das_styles{$name} = $self->{_das_styles}->{$key};
3258 $das_segments{$name} = $self->{_das_segments}->{$key};
3259 } else { # Get fresh data
3260 my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type );
3261 $self->{_das_features}->{$key} = $featref;
3262 $self->{_das_styles}->{$key} = $styleref;
3263 $self->{_das_segments}->{$key} = $segref;
3264 $das_features{$name} = $featref;
3265 $das_styles{$name} = $styleref;
3266 $das_segments{$name} = $segref;
3267 }
3268 }
3269
3270 return (\%das_features, \%das_styles, \%das_segments);
3271 }
3272
3273 sub get_all_DASFeatures{
3274 my ($self, $source_type) = @_;
3275
3276
3277 if(!$self->adaptor()) {
3278 warning("Cannot retrieve features without attached adaptor");
3279 return [];
3280 }
3281
3282 my %genomic_features = map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ] ) } $self->adaptor()->db()->_each_DASFeatureFactory;
3283 return \%genomic_features;
3284
3285 }
3286
3287 sub old_get_all_DASFeatures{
3288 my ($self,@args) = @_;
3289
3290 if(!$self->adaptor()) {
3291 warning("Cannot retrieve features without attached adaptor");
3292 return [];
3293 }
3294
3295 my %genomic_features =
3296 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
3297 $self->adaptor()->db()->_each_DASFeatureFactory;
3298 return \%genomic_features;
3299
3300 }
3301
3302
3303 =head2 get_all_ExternalFeatures
3304
3305 Arg [1] : (optional) string $track_name
3306 If specified only features from ExternalFeatureAdaptors with
3307 the track name $track_name are retrieved.
3308 If not set, all features from every ExternalFeatureAdaptor are
3309 retrieved.
3310 Example : @x_features = @{$slice->get_all_ExternalFeatures}
3311 Description: Retrieves features on this slice from external feature adaptors
3312 Returntype : listref of Bio::SeqFeatureI implementing objects in slice
3313 coordinates
3314 Exceptions : none
3315 Caller : general
3316 Status : Stable
3317
3318 =cut
3319
3320 sub get_all_ExternalFeatures {
3321 my ($self, $track_name) = @_;
3322
3323 if(!$self->adaptor()) {
3324 warning("Cannot retrieve features without attached adaptor");
3325 return [];
3326 }
3327
3328 my $features = [];
3329
3330 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
3331 my @xf_adaptors = ();
3332
3333 if($track_name) {
3334 #use a specific adaptor
3335 if(exists $xfa_hash->{$track_name}) {
3336 push @xf_adaptors, $xfa_hash->{$track_name};
3337 }
3338 } else {
3339 #use all of the adaptors
3340 push @xf_adaptors, values %$xfa_hash;
3341 }
3342
3343
3344 foreach my $xfa (@xf_adaptors) {
3345 push @$features, @{$xfa->fetch_all_by_Slice($self)};
3346 }
3347
3348 return $features;
3349 }
3350
3351
3352 =head2 get_all_DitagFeatures
3353
3354 Arg [1] : (optional) string ditag type
3355 Arg [1] : (optional) string logic_name
3356 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
3357 Description: Retrieves the DitagFeatures of a specific type which overlap
3358 this slice with. If type is not defined, all features are
3359 retrieved.
3360 Returntype : listref of Bio::EnsEMBL::DitagFeatures
3361 Exceptions : warning if slice does not have attached adaptor
3362 Caller : general
3363 Status : Stable
3364
3365 =cut
3366
3367 sub get_all_DitagFeatures {
3368 my ($self, $type, $logic_name) = @_;
3369
3370 if(!$self->adaptor()) {
3371 warning('Cannot get DitagFeatures without attached adaptor');
3372 return [];
3373 }
3374
3375 my $dfa = $self->adaptor->db->get_DitagFeatureAdaptor();
3376
3377 return $dfa->fetch_all_by_Slice($self, $type, $logic_name);
3378 }
3379
3380
3381
3382
3383 # GENERIC FEATURES (See DBAdaptor.pm)
3384
3385 =head2 get_generic_features
3386
3387 Arg [1] : (optional) List of names of generic feature types to return.
3388 If no feature names are given, all generic features are
3389 returned.
3390 Example : my %features = %{$slice->get_generic_features()};
3391 Description: Gets generic features via the generic feature adaptors that
3392 have been added via DBAdaptor->add_GenricFeatureAdaptor (if
3393 any)
3394 Returntype : Hash of named features.
3395 Exceptions : none
3396 Caller : none
3397 Status : Stable
3398
3399 =cut
3400
3401 sub get_generic_features {
3402
3403 my ($self, @names) = @_;
3404
3405 if(!$self->adaptor()) {
3406 warning('Cannot retrieve features without attached adaptor');
3407 return [];
3408 }
3409
3410 my $db = $self->adaptor()->db();
3411
3412 my %features = (); # this will hold the results
3413
3414 # get the adaptors for each feature
3415 my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)};
3416
3417 foreach my $adaptor_name (keys(%adaptors)) {
3418
3419 my $adaptor_obj = $adaptors{$adaptor_name};
3420 # get the features and add them to the hash
3421 my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
3422 # add each feature to the hash to be returned
3423 foreach my $feature (@$features_ref) {
3424 $features{$adaptor_name} = $feature;
3425 }
3426 }
3427
3428 return \%features;
3429
3430 }
3431
3432 =head2 project_to_slice
3433
3434 Arg [1] : Slice to project to.
3435 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
3436 foreach my $segment ( @$chr_projection ){
3437 $chr_slice = $segment->to_Slice();
3438 print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'.
3439 $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start().
3440 '-'.$chr_slice->end().
3441 $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n";
3442 }
3443 Description: Projection of slice to another specific slice. Needed for where we have multiple mappings
3444 and we want to state which one to project to.
3445 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
3446 can also be used as [$start,$end,$slice] triplets.
3447 Exceptions : none
3448 Caller : none
3449 Status : At Risk
3450
3451 =cut
3452
3453 sub project_to_slice {
3454 my $self = shift;
3455 my $to_slice = shift;
3456
3457 throw('Slice argument is required') if(!$to_slice);
3458
3459 my $slice_adaptor = $self->adaptor();
3460
3461 if(!$slice_adaptor) {
3462 warning("Cannot project without attached adaptor.");
3463 return [];
3464 }
3465
3466
3467 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
3468
3469 my $cs = $to_slice->coord_system();
3470 my $slice_cs = $self->coord_system();
3471 my $to_slice_id = $to_slice->get_seq_region_id;
3472
3473 my @projection;
3474 my $current_start = 1;
3475
3476 # decompose this slice into its symlinked components.
3477 # this allows us to handle haplotypes and PARs
3478 my $normal_slice_proj =
3479 $slice_adaptor->fetch_normalized_slice_projection($self);
3480 foreach my $segment (@$normal_slice_proj) {
3481 my $normal_slice = $segment->[2];
3482
3483 $slice_cs = $normal_slice->coord_system();
3484
3485 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
3486 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
3487
3488 # perform the mapping between this slice and the requested system
3489 my @coords;
3490
3491 if( defined $asm_mapper ) {
3492 @coords = $asm_mapper->map($normal_slice->seq_region_name(),
3493 $normal_slice->start(),
3494 $normal_slice->end(),
3495 $normal_slice->strand(),
3496 $slice_cs, undef, $to_slice);
3497 } else {
3498 $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
3499 $normal_slice->end());
3500 }
3501
3502 my $last_rank =0;
3503 #construct a projection from the mapping results and return it
3504 foreach my $coord (@coords) {
3505 my $coord_start = $coord->start();
3506 my $coord_end = $coord->end();
3507 my $length = $coord_end - $coord_start + 1;
3508
3509
3510 if( $last_rank != $coord->rank){
3511 $current_start = 1;
3512 }
3513 $last_rank = $coord->rank;
3514
3515 #skip gaps
3516 if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
3517 if($coord->id != $to_slice_id){ # for multiple mappings only get the correct one
3518 $current_start += $length;
3519 next;
3520 }
3521 my $coord_cs = $coord->coord_system();
3522
3523 # If the normalised projection just ended up mapping to the
3524 # same coordinate system we were already in then we should just
3525 # return the original region. This can happen for example, if we
3526 # were on a PAR region on Y which refered to X and a projection to
3527 # 'toplevel' was requested.
3528 # if($coord_cs->equals($slice_cs)) {
3529 # # trim off regions which are not defined
3530 # return $self->_constrain_to_region();
3531 # }
3532
3533 #create slices for the mapped-to coord system
3534 my $slice = $slice_adaptor->fetch_by_seq_region_id(
3535 $coord->id(),
3536 $coord_start,
3537 $coord_end,
3538 $coord->strand());
3539
3540 my $current_end = $current_start + $length - 1;
3541
3542 push @projection, bless([$current_start, $current_end, $slice],
3543 "Bio::EnsEMBL::ProjectionSegment");
3544 }
3545
3546 $current_start += $length;
3547 }
3548 }
3549
3550
3551 # delete the cache as we may want to map to different set next time and old
3552 # results will be cached.
3553
3554 $mapper_aptr->delete_cache;
3555
3556 return \@projection;
3557 }
3558
3559
3560 =head2 get_all_synonyms
3561
3562 Args : none.
3563 Example : my @alternative_names = @{$slice->get_all_synonyms()};
3564 Description: get a list of alternative names for this slice
3565 Returntype : reference to list of SeqRegionSynonym objects.
3566 Exception : none
3567 Caller : general
3568 Status : At Risk
3569
3570 =cut
3571
3572 sub get_all_synonyms{
3573 my $self = shift;
3574 my $external_db_id =shift;
3575
3576 if ( !defined( $self->{'synonym'} ) ) {
3577 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
3578 $self->{'synonym'} =
3579 $adap->get_synonyms( $self->get_seq_region_id($self) );
3580 }
3581
3582 return $self->{'synonym'};
3583 }
3584
3585 =head2 add_synonym
3586
3587 Args[0] : synonym.
3588 Example : $slice->add_synonym("alt_name");
3589 Description: add an alternative name for this slice
3590 Returntype : none
3591 Exception : none
3592 Caller : general
3593 Status : At Risk
3594
3595 =cut
3596
3597 sub add_synonym{
3598 my $self = shift;
3599 my $syn = shift;
3600 my $external_db_id = shift;
3601
3602 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
3603 if ( !defined( $self->{'synonym'} ) ) {
3604 $self->{'synonym'} = $self->get_all_synonyms();
3605 }
3606 my $new_syn = Bio::EnsEMBL::SeqRegionSynonym->new( #-adaptor => $adap,
3607 -synonym => $syn,
3608 -external_db_id => $external_db_id,
3609 -seq_region_id => $self->get_seq_region_id($self));
3610
3611 push (@{$self->{'synonym'}}, $new_syn);
3612
3613 return;
3614 }
3615
3616 =head2 summary_as_hash
3617
3618 Example : $slice_summary = $slice->summary_as_hash();
3619 Description : Retrieves a textual summary of this slice.
3620 Returns : hashref of descriptive strings
3621 =cut
3622
3623 sub summary_as_hash {
3624 my $self = shift;
3625 my %summary;
3626 $summary{'display_id'} = $self->display_id;
3627 $summary{'start'} = $self->start;
3628 $summary{'end'} = $self->end;
3629 $summary{'strand'} = $self->strand;
3630 $summary{'Is_circular'} = $self->is_circular ? "true" : "false";
3631 $summary{'region_name'} = $self->seq_region_name();
3632 return \%summary;
3633 }
3634
3635 #
3636 # Bioperl Bio::PrimarySeqI methods:
3637 #
3638
3639 =head2 id
3640
3641 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3642
3643 =cut
3644
3645 sub id { name(@_); }
3646
3647
3648 =head2 display_id
3649
3650 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3651
3652 =cut
3653
3654 sub display_id { name(@_); }
3655
3656
3657 =head2 primary_id
3658
3659 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3660
3661 =cut
3662
3663 sub primary_id { name(@_); }
3664
3665
3666 =head2 desc
3667
3668 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3669
3670 =cut
3671
3672 sub desc{ return $_[0]->coord_system->name().' '.$_[0]->seq_region_name(); }
3673
3674
3675 =head2 moltype
3676
3677 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3678
3679 =cut
3680
3681 sub moltype { return 'dna'; }
3682
3683 =head2 alphabet
3684
3685 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3686
3687 =cut
3688
3689 sub alphabet { return 'dna'; }
3690
3691
3692 =head2 accession_number
3693
3694 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3695
3696 =cut
3697
3698 sub accession_number { name(@_); }
3699
3700
3701 # sub DEPRECATED METHODS #
3702 ###############################################################################
3703
3704 =head1 DEPRECATED METHODS
3705
3706 =head2 get_all_AffyFeatures
3707
3708 Description: DEPRECATED, use functionality provided by the Ensembl
3709 Functional Genomics API instead.
3710
3711 =cut
3712
3713 sub get_all_AffyFeatures {
3714 deprecate( 'Use functionality provided by the '
3715 . 'Ensembl Functional Genomics API instead.' );
3716 throw('Can not delegate deprecated functionality.');
3717
3718 # Old code:
3719
3720 # my $self = shift;
3721 # my @arraynames = @_;
3722 #
3723 # my $sa = $self->adaptor();
3724 # if ( ! $sa ) {
3725 # warning( "Cannot retrieve features without attached adaptor." );
3726 # }
3727 # my $fa = $sa->db()->get_AffyFeatureAdaptor();
3728 # my $features;
3729 #
3730 # if ( @arraynames ) {
3731 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
3732 # } else {
3733 # $features = $fa->fetch_all_by_Slice( $self );
3734 # }
3735 # return $features;
3736 }
3737
3738 =head2 get_all_OligoFeatures
3739
3740 Description: DEPRECATED, use functionality provided by the Ensembl
3741 Functional Genomics API instead.
3742
3743 =cut
3744
3745 sub get_all_OligoFeatures {
3746
3747 deprecate( 'Use functionality provided by the '
3748 . 'Ensembl Functional Genomics API instead.' );
3749 throw('Can not delegate deprecated functionality.');
3750
3751 # Old code:
3752
3753 # my $self = shift;
3754 # my @arraynames = @_;
3755 #
3756 # my $sa = $self->adaptor();
3757 # if ( ! $sa ) {
3758 # warning( "Cannot retrieve features without attached adaptor." );
3759 # }
3760 # my $fa = $sa->db()->get_OligoFeatureAdaptor();
3761 # my $features;
3762 #
3763 # if ( @arraynames ) {
3764 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
3765 # } else {
3766 # $features = $fa->fetch_all_by_Slice( $self );
3767 # }
3768 # return $features;
3769 }
3770
3771 =head2 get_all_OligoFeatures_by_type
3772
3773 Description: DEPRECATED, use functionality provided by the Ensembl
3774 Functional Genomics API instead.
3775
3776 =cut
3777
3778 sub get_all_OligoFeatures_by_type {
3779
3780 deprecate( 'Use functionality provided by the '
3781 . 'Ensembl Functional Genomics API instead.' );
3782 throw('Can not delegate deprecated functionality.');
3783
3784 # Old code:
3785
3786 # my ($self, $type, $logic_name) = @_;
3787 #
3788 # throw('Need type as parameter') if !$type;
3789 #
3790 # my $sa = $self->adaptor();
3791 # if ( ! $sa ) {
3792 # warning( "Cannot retrieve features without attached adaptor." );
3793 # }
3794 # my $fa = $sa->db()->get_OligoFeatureAdaptor();
3795 #
3796 # my $features = $fa->fetch_all_by_Slice_type( $self, $type, $logic_name );
3797 #
3798 # return $features;
3799 }
3800
3801 =head2 get_all_supercontig_Slices
3802
3803 Description: DEPRECATED use get_tiling_path("NTcontig") instead
3804
3805 =cut
3806
3807
3808 sub get_all_supercontig_Slices {
3809 my $self = shift;
3810
3811 deprecate("Use get_tiling_path('NTcontig') instead");
3812
3813 my $result = [];
3814
3815 if( $self->adaptor() ) {
3816 my $superctg_names =
3817 $self->adaptor()->list_overlapping_supercontigs( $self );
3818
3819 for my $name ( @$superctg_names ) {
3820 my $slice;
3821 $slice = $self->adaptor()->fetch_by_supercontig_name( $name );
3822 $slice->name( $name );
3823 push( @$result, $slice );
3824 }
3825 } else {
3826 warning( "Slice needs to be attached to a database to get supercontigs" );
3827 }
3828
3829 return $result;
3830 }
3831
3832
3833
3834
3835
3836 =head2 get_Chromosome
3837
3838 Description: DEPRECATED use this instead:
3839 $slice_adp->fetch_by_region('chromosome',
3840 $slice->seq_region_name)
3841
3842 =cut
3843
3844 sub get_Chromosome {
3845 my $self = shift @_;
3846
3847 deprecate("Use SliceAdaptor::fetch_by_region('chromosome'," .
3848 '$slice->seq_region_name) instead');
3849
3850 my $csa = $self->adaptor->db->get_CoordSystemAdaptor();
3851 my ($top_cs) = @{$csa->fetch_all()};
3852
3853 return $self->adaptor->fetch_by_region($top_cs->name(),
3854 $self->seq_region_name(),
3855 undef,undef,undef,
3856 $top_cs->version());
3857 }
3858
3859
3860
3861 =head2 chr_name
3862
3863 Description: DEPRECATED use seq_region_name() instead
3864
3865 =cut
3866
3867 sub chr_name{
3868 deprecate("Use seq_region_name() instead");
3869 seq_region_name(@_);
3870 }
3871
3872
3873
3874 =head2 chr_start
3875
3876 Description: DEPRECATED use start() instead
3877
3878 =cut
3879
3880 sub chr_start{
3881 deprecate('Use start() instead');
3882 start(@_);
3883 }
3884
3885
3886
3887 =head2 chr_end
3888
3889 Description: DEPRECATED use end() instead
3890 Returntype : int
3891 Exceptions : none
3892 Caller : SliceAdaptor, general
3893
3894 =cut
3895
3896 sub chr_end{
3897 deprecate('Use end() instead');
3898 end(@_);
3899 }
3900
3901
3902 =head2 assembly_type
3903
3904 Description: DEPRECATED use version instead
3905
3906 =cut
3907
3908 sub assembly_type{
3909 my $self = shift;
3910 deprecate('Use $slice->coord_system()->version() instead.');
3911 return $self->coord_system->version();
3912 }
3913
3914
3915 =head2 get_tiling_path
3916
3917 Description: DEPRECATED use project instead
3918
3919 =cut
3920
3921 sub get_tiling_path {
3922 my $self = shift;
3923 deprecate('Use $slice->project("seqlevel") instead.');
3924 return [];
3925 }
3926
3927
3928 =head2 dbID
3929
3930 Description: DEPRECATED use SliceAdaptor::get_seq_region_id instead
3931
3932 =cut
3933
3934 sub dbID {
3935 my $self = shift;
3936 deprecate('Use SliceAdaptor::get_seq_region_id instead.');
3937 if(!$self->adaptor) {
3938 warning('Cannot retrieve seq_region_id without attached adaptor.');
3939 return 0;
3940 }
3941 return $self->adaptor->get_seq_region_id($self);
3942 }
3943
3944
3945 =head2 get_all_MapFrags
3946
3947 Description: DEPRECATED use get_all_MiscFeatures instead
3948
3949 =cut
3950
3951 sub get_all_MapFrags {
3952 my $self = shift;
3953 deprecate('Use get_all_MiscFeatures instead');
3954 return $self->get_all_MiscFeatures(@_);
3955 }
3956
3957 =head2 has_MapSet
3958
3959 Description: DEPRECATED use get_all_MiscFeatures instead
3960
3961 =cut
3962
3963 sub has_MapSet {
3964 my( $self, $mapset_name ) = @_;
3965 deprecate('Use get_all_MiscFeatures instead');
3966 my $mfs = $self->get_all_MiscFeatures($mapset_name);
3967 return (@$mfs > 0);
3968 }
3969
3970 1;