comparison variant_effect_predictor/Bio/EnsEMBL/CircularSlice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::CircularSlice - Arbitary Slice of a genome
24
25 =head1 SYNOPSIS
26
27 $sa = $db->get_SliceAdaptor;
28
29 $slice =
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
31
32 # get some attributes of the slice
33 my $seqname = $slice->seq_region_name();
34 my $start = $slice->start();
35 my $end = $slice->end();
36
37 # get the sequence from the slice
38 my $seq = $slice->seq();
39
40 # get some features from the slice
41 foreach $gene ( @{ $slice->get_all_Genes } ) {
42 # do something with a gene
43 }
44
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
46 # do something with dna-dna alignments
47 }
48
49 =head1 DESCRIPTION
50
51 A Slice object represents a region of a genome. It can be used to
52 retrieve sequence or features from an area of interest.
53
54 =head1 METHODS
55
56 =cut
57
58 package Bio::EnsEMBL::CircularSlice;
59 use vars qw(@ISA);
60 use strict;
61
62 use Bio::PrimarySeqI;
63
64 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
65 use Bio::EnsEMBL::Utils::Exception
66 qw(throw deprecate warning stack_trace_dump);
67 use Bio::EnsEMBL::RepeatMaskedSlice;
68 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
69 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
70 use Bio::EnsEMBL::ProjectionSegment;
71 use Bio::EnsEMBL::Registry;
72 use Bio::EnsEMBL::DBSQL::MergedAdaptor;
73
74 use Bio::EnsEMBL::StrainSlice;
75 #use Bio::EnsEMBL::IndividualSlice;
76 #use Bio::EnsEMBL::IndividualSliceFactory;
77 use Bio::EnsEMBL::Mapper::RangeRegistry;
78 use Bio::EnsEMBL::Slice;
79 use Data::Dumper;
80 use Scalar::Util qw(weaken isweak);
81
82 my $reg = "Bio::EnsEMBL::Registry";
83
84 @ISA = qw(Bio::EnsEMBL::Slice);
85
86 =head2 new
87
88 Arg [...] : List of named arguments
89 Bio::EnsEMBL::CoordSystem COORD_SYSTEM
90 string SEQ_REGION_NAME,
91 int START,
92 int END,
93 int SEQ_REGION_LENGTH, (optional)
94 string SEQ (optional)
95 int STRAND, (optional, defaults to 1)
96 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
97 Example :
98
99 $slice =
100 Bio::EnsEMBL::CircularSlice->new( -coord_system => $cs,
101 -start => 1,
102 -end => 10000,
103 -strand => 1,
104 -seq_region_name => 'X',
105 -seq_region_length => 12e6,
106 -adaptor => $slice_adaptor );
107
108 Description: Creates a new slice object. A slice represents a
109 region of sequence in a particular coordinate system.
110 Slices can be used to retrieve sequence and features
111 from an area of interest in a genome.
112
113 Coordinates start at 1 and are inclusive. Negative
114 coordinates or coordinates exceeding the length of
115 the seq_region are permitted. Start must be less
116 than or equal. to end regardless of the strand.
117
118 Slice objects are immutable. Once instantiated their
119 attributes (with the exception of the adaptor) may
120 not be altered. To change the attributes a new slice
121 must be created.
122
123 Returntype : Bio::EnsEMBL::CircularSlice
124 Exceptions : throws if start, end, coordsystem or seq_region_name not
125 specified or not of the correct type
126 Caller : general, Bio::EnsEMBL::SliceAdaptor
127 Status : Stable
128
129 =cut
130
131 sub new {
132 my $caller = shift;
133
134 #new can be called as a class or object method
135 my $class = ref($caller) || $caller;
136
137 my ( $seq, $coord_system, $seq_region_name, $seq_region_length,
138 $start, $end, $strand, $adaptor, $empty )
139 = rearrange( [
140 qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
141 START END STRAND ADAPTOR EMPTY) ],
142 @_ );
143
144 #empty is only for backwards compatibility
145 if ($empty) {
146 deprecate( "Creation of empty slices is no longer needed "
147 . "and is deprecated" );
148 my $self = bless( { 'empty' => 1 }, $class );
149 $self->adaptor($adaptor);
150 return $self;
151 }
152
153 if ( !defined($seq_region_name) ) {
154 throw('SEQ_REGION_NAME argument is required');
155 }
156 if ( !defined($start) ) { throw('START argument is required') }
157 if ( !defined($end) ) { throw('END argument is required') }
158
159 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
160
161 if ( $seq_region_length <= 0 ) {
162 throw('SEQ_REGION_LENGTH must be > 0');
163 }
164
165 if ( defined($coord_system) ) {
166 assert_ref( $coord_system, 'Bio::EnsEMBL::CoordSystem' );
167
168 if ( $coord_system->is_top_level() ) {
169 throw('Cannot create circular slice on toplevel CoordSystem.');
170 }
171 } else {
172 warning("CircularSlice without coordinate system");
173 }
174
175 $strand ||= 1;
176
177 if ( $strand != 1 && $strand != -1 ) {
178 throw('STRAND argument must be -1 or 1');
179 }
180
181 if ( defined($adaptor) ) {
182 assert_ref( $adaptor, 'Bio::EnsEMBL::DBSQL::SliceAdaptor' );
183 }
184
185 my $seq1 = { 'coord_system' => $coord_system,
186 'seq' => $seq,
187 'seq_region_name' => $seq_region_name,
188 'seq_region_length' => $seq_region_length,
189 'start' => int($start),
190 'end' => int($end),
191 'strand' => $strand };
192
193 bless $seq1, $class;
194 $seq1->adaptor($adaptor);
195 return $seq1;
196 } ## end sub new
197
198 =head2 new_fast
199
200 Arg [1] : hashref to be blessed
201 Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
202 Exceptions : none
203 Returntype : Bio::EnsEMBL::CircularSlice
204 Caller : general
205 Status : Stable
206
207 =cut
208
209 sub new_fast {
210 my $class = shift;
211 my $hashref = shift;
212 my $self = bless $hashref, $class;
213 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
214 return $self;
215 }
216
217 =head2 centrepoint
218
219 Arg [1] : none
220 Example : $cp = $slice->centrepoint();
221 Description: Returns the mid position of this slice relative to the
222 start of the sequence region that it was created on.
223 Coordinates are inclusive and start at 1.
224 Returntype : int
225 Exceptions : none
226 Caller : general
227 Status : Stable
228
229 =cut
230
231 sub centrepoint {
232 my $self = shift;
233
234 my ( $s, $e, $length ) =
235 ( $self->{'start'}, $self->{'end'}, $self->{'seq_region_length'} );
236
237 if ( $s < $e ) {
238 return ( $s + $e )/2;
239 }
240
241 my $r1 = $length - $s;
242 my $r2 = $e;
243 my $r = ( $r1 + $r2 )/2;
244 my $m = $s + $r;
245
246 if ( $m > $length ) {
247 $m = $m - $length;
248 }
249
250 return $m;
251 }
252
253 =head2 length
254
255 Arg [1] : none
256 Example : $length = $slice->length();
257 Description: Returns the length of this slice in basepairs
258 Returntype : int
259 Exceptions : none
260 Caller : general
261 Status : Stable
262
263 =cut
264
265 sub length {
266 my ($self) = @_;
267
268 if ( $self->{'start'} < $self->{'end'} ) {
269 return $self->{'end'} - $self->{'start'} + 1;
270 }
271
272 my $r1 = $self->{'seq_region_length'} - $self->{'start'};
273 my $r2 = $self->{'end'};
274 my $ln = $r1 + $r2 + 1;
275
276 return $ln;
277 }
278
279 =head2 invert
280
281 Arg [1] : none
282 Example : $inverted_slice = $slice->invert;
283 Description: Creates a copy of this slice on the opposite strand and
284 returns it.
285 Returntype : Bio::EnsEMBL::Slice
286 Exceptions : none
287 Caller : general
288 Status : Stable
289
290 =cut
291
292 sub invert {
293 my $self = shift;
294
295 # make a shallow copy of the slice via a hash copy and flip the strand
296 my %s = %$self;
297 $s{'strand'} = $self->{'strand'}*-1;
298
299 # reverse compliment any attached sequence
300 reverse_comp( \$s{'seq'} ) if ( $s{'seq'} );
301
302 # bless and return the copy
303 return bless \%s, ref $self;
304 }
305
306 =head2 seq
307
308 Arg [1] : none
309 Example : print "SEQUENCE = ", $slice->seq();
310 Description: Returns the sequence of the region represented by this
311 slice formatted as a string.
312 Returntype : string
313 Exceptions : none
314 Caller : general
315 Status : Stable
316
317 =cut
318
319 sub seq {
320 my $self = shift;
321
322 # special case for in-between (insert) coordinates
323 return '' if ( $self->start() == $self->end() + 1 );
324 return $self->{'seq'} if ( $self->{'seq'} );
325
326 if ( $self->adaptor() ) {
327
328 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
329 if ( $self->{'start'} > $self->{'end'} ) {
330 my $length = $self->{'seq_region_length'};
331
332 my $sl1 =
333 Bio::EnsEMBL::Slice->new(
334 -COORD_SYSTEM => $self->{'coord_system'},
335 -SEQ_REGION_NAME => $self->{'seq_region_name'},
336 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
337 -START => $self->{'start'},
338 -END => $self->{'seq_region_length'},
339 -STRAND => $self->{'strand'},
340 -ADAPTOR => $self->adaptor() );
341
342 my $sl2 =
343 Bio::EnsEMBL::Slice->new(
344 -COORD_SYSTEM => $self->{'coord_system'},
345 -SEQ_REGION_NAME => $self->{'seq_region_name'},
346 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
347 -START => 1,
348 -END => $self->{'end'},
349 -STRAND => $self->{'strand'},
350 -ADAPTOR => $self->adaptor() );
351
352 my $seq1 = ${
353 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl1, 1, undef,
354 1 ) };
355 my $seq2 = ${
356 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl2, 1, undef,
357 1 ) };
358 return $seq1 . $seq2;
359
360 } else {
361 my $seq1 = ${
362 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, undef,
363 1 ) };
364 return $seq1;
365 }
366 } ## end if ( $self->adaptor() )
367
368 # no attached sequence, and no db, so just return Ns
369 return 'N' x $self->length();
370 } ## end sub seq
371
372 =head2 subseq
373
374 Arg [1] : int $startBasePair
375 relative to start of slice, which is 1.
376 Arg [2] : int $endBasePair
377 relative to start of slice.
378 Arg [3] : (optional) int $strand
379 The strand of the slice to obtain sequence from. Default
380 value is 1.
381 Description: returns string of dna sequence
382 Returntype : txt
383 Exceptions : end should be at least as big as start
384 strand must be set
385 Caller : general
386 Status : Stable
387
388 =cut
389
390 sub subseq {
391 my ( $self, $start, $end, $strand ) = @_;
392
393 # handle 'between' case for insertions
394 return '' if ( $start == $end + 1 );
395
396 $strand = 1 unless ( defined $strand );
397
398 if ( $strand != -1 && $strand != 1 ) {
399 throw("Invalid strand [$strand] in call to Slice::subseq.");
400 }
401 my $subseq;
402 my $length = $self->{'seq_region_length'};
403
404 if ( $self->adaptor ) {
405
406 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
407 if ( $end < $start ) {
408 my $subseq1 = ${
409 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
410 $length, $strand )
411 };
412 my $subseq2 = ${
413 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, $end,
414 $strand ) };
415 $subseq = $subseq1 . $subseq2;
416
417 } else {
418 $subseq = ${
419 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
420 $end, $strand ) };
421 }
422 } else {
423 ## check for gap at the beginning and pad it with Ns
424 if ( $start < 1 ) {
425 $subseq = "N" x ( 1 - $start );
426 $start = 1;
427 }
428 $subseq .= substr( $self->seq(), $start - 1, $end - $start + 1 );
429 ## check for gap at the end and pad it with Ns
430 if ( $end > $self->length() ) {
431 $subseq .= "N" x ( $end - $self->length() );
432 }
433 reverse_comp( \$subseq ) if ( $strand == -1 );
434 }
435 return $subseq;
436 } ## end sub subseq
437
438 =head2 project
439
440 Arg [1] : string $name
441 The name of the coordinate system to project this slice onto
442 Arg [2] : string $version
443 The version of the coordinate system (such as 'NCBI34') to
444 project this slice onto
445 Example :
446 my $clone_projection = $slice->project('clone');
447
448 foreach my $seg (@$clone_projection) {
449 my $clone = $segment->to_Slice();
450 print $slice->seq_region_name(), ':', $seg->from_start(), '-',
451 $seg->from_end(), ' -> ',
452 $clone->seq_region_name(), ':', $clone->start(), '-',
453 $clone->end(),
454 $clone->strand(), "\n";
455 }
456 Description: Returns the results of 'projecting' this slice onto another
457 coordinate system. Projecting to a coordinate system that
458 the slice is assembled from is analagous to retrieving a tiling
459 path. This method may also be used to 'project up' to a higher
460 level coordinate system, however.
461
462 This method returns a listref of triplets [start,end,slice]
463 which represents the projection. The start and end defined the
464 region of this slice which is made up of the third value of
465 the triplet: a slice in the requested coordinate system.
466 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
467 can also be used as [$start,$end,$slice] triplets
468 Exceptions : none
469 Caller : general
470 Status : Stable
471
472 =cut
473
474 sub project {
475 my $self = shift;
476 my $cs_name = shift;
477 my $cs_version = shift;
478
479 throw('Coord_system name argument is required') if ( !$cs_name );
480
481 my $slice_adaptor = $self->adaptor();
482
483 if ( !$slice_adaptor ) {
484 warning("Cannot project without attached adaptor.");
485 return [];
486 }
487
488 if ( !$self->coord_system() ) {
489 warning("Cannot project without attached coord system.");
490 return [];
491 }
492
493 my $db = $slice_adaptor->db();
494 my $csa = $db->get_CoordSystemAdaptor();
495 my $cs = $csa->fetch_by_name( $cs_name, $cs_version );
496
497 my $sl01 =
498 Bio::EnsEMBL::CircularSlice->new(
499 -COORD_SYSTEM => $self->{'coord_system'},
500 -SEQ_REGION_NAME => $self->{'seq_region_name'},
501 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
502 -START => $self->{'start'},
503 -END => $self->{'seq_region_length'},
504 -STRAND => $self->{'strand'},
505 -ADAPTOR => $self->adaptor() );
506
507 my $sl02 =
508 Bio::EnsEMBL::CircularSlice->new(
509 -COORD_SYSTEM => $self->{'coord_system'},
510 -SEQ_REGION_NAME => $self->{'seq_region_name'},
511 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
512 -START => 1,
513 -END => $self->{'end'},
514 -STRAND => $self->{'strand'},
515 -ADAPTOR => $self->adaptor() );
516
517 my @projection;
518 my $current_start = 1;
519
520 foreach my $sl2 ( $sl01, $sl02 ) {
521 my $slice_cs = $sl2->coord_system();
522
523 if ( !$cs ) {
524 throw( "Cannot project to unknown coordinate system "
525 . "[$cs_name $cs_version]" );
526 }
527
528 # no mapping is needed if the requested coord system is the one we are in
529 # but we do need to check if some of the slice is outside of defined regions
530 if ( $slice_cs->equals($cs) ) {
531 return $self->_constrain_to_region();
532 }
533
534 # decompose this slice into its symlinked components.
535 # this allows us to handle haplotypes and PARs
536 my $normal_slice_proj =
537 $slice_adaptor->fetch_normalized_slice_projection($sl2);
538
539 foreach my $segment (@$normal_slice_proj) {
540 my $normal_slice = $segment->[2];
541
542 $slice_cs = $normal_slice->coord_system();
543
544 my $asma = $db->get_AssemblyMapperAdaptor();
545 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
546
547 # perform the mapping between this slice and the requested system
548 my @coords;
549
550 if ( defined $asm_mapper ) {
551 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
552 $normal_slice->start(),
553 $normal_slice->end(),
554 $normal_slice->strand(),
555 $slice_cs );
556
557 } else {
558 $coords[0] =
559 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
560 $normal_slice->end() );
561 }
562
563 #construct a projection from the mapping results and return it
564 foreach my $coord (@coords) {
565 my $coord_start = $coord->start();
566 my $coord_end = $coord->end();
567 my $length = $coord_end - $coord_start + 1;
568
569 #skip gaps
570 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) {
571 my $coord_cs = $coord->coord_system();
572
573 # If the normalised projection just ended up mapping to the
574 # same coordinate system we were already in then we should just
575 # return the original region. This can happen for example, if we
576 # were on a PAR region on Y which refered to X and a projection to
577 # 'toplevel' was requested.
578
579 if ( $coord_cs->equals($slice_cs) ) {
580 # trim off regions which are not defined
581 return $self->_constrain_to_region();
582 }
583 #create slices for the mapped-to coord system
584
585 my $slice =
586 $slice_adaptor->fetch_by_seq_region_id(
587 $coord->id(), $coord_start,
588 $coord_end, $coord->strand()
589 );
590
591 my $current_end = $current_start + $length - 1;
592 push @projection,
593 bless( [ $current_start, $current_end, $slice ],
594 "Bio::EnsEMBL::ProjectionSegment" );
595 } ## end if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate'...))
596
597 $current_start += $length;
598 } ## end foreach my $coord (@coords)
599 } ## end foreach my $segment (@$normal_slice_proj)
600 } #foreach
601
602 return \@projection;
603 } ## end sub project
604
605 sub project_org {
606 my $self = shift;
607 my $cs_name = shift;
608 my $cs_version = shift;
609
610 throw('Coord_system name argument is required') if ( !$cs_name );
611
612 my $slice_adaptor = $self->adaptor();
613
614 if ( !$slice_adaptor ) {
615 warning("Cannot project without attached adaptor.");
616 return [];
617 }
618
619 if ( !$self->coord_system() ) {
620 warning("Cannot project without attached coord system.");
621 return [];
622 }
623
624 my $db = $slice_adaptor->db();
625 my $csa = $db->get_CoordSystemAdaptor();
626 my $cs = $csa->fetch_by_name( $cs_name, $cs_version );
627 my $slice_cs = $self->coord_system();
628
629 if ( !$cs ) {
630 throw( "Cannot project to unknown coordinate system "
631 . "[$cs_name $cs_version]" );
632 }
633
634 # No mapping is needed if the requested coord system is the one we
635 # are in. But we do need to check if some of the slice is outside of
636 # defined regions.
637 if ( $slice_cs->equals($cs) ) {
638 return $self->_constrain_to_region();
639 }
640
641 my @projection;
642 my $current_start = 1;
643
644 # Decompose this slice into its symlinked components. This allows us
645 # to handle haplotypes and PARs.
646 my $normal_slice_proj =
647 $slice_adaptor->fetch_normalized_slice_projection($self);
648 foreach my $segment (@$normal_slice_proj) {
649 my $normal_slice = $segment->[2];
650
651 $slice_cs = $normal_slice->coord_system();
652
653 my $asma = $db->get_AssemblyMapperAdaptor();
654 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
655
656 # perform the mapping between this slice and the requested system
657 my @coords;
658
659 if ( defined $asm_mapper ) {
660 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
661 $normal_slice->start(),
662 $normal_slice->end(),
663 $normal_slice->strand(),
664 $slice_cs );
665
666 } else {
667 $coords[0] =
668 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
669 $normal_slice->end() );
670 }
671
672 #construct a projection from the mapping results and return it
673 foreach my $coord (@coords) {
674 my $coord_start = $coord->start();
675 my $coord_end = $coord->end();
676 my $length = $coord_end - $coord_start + 1;
677
678 #skip gaps
679 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) {
680 my $coord_cs = $coord->coord_system();
681
682 # If the normalised projection just ended up mapping to the
683 # same coordinate system we were already in then we should just
684 # return the original region. This can happen for example,
685 # if we were on a PAR region on Y which refered to X and a
686 # projection to 'toplevel' was requested.
687
688 if ( $coord_cs->equals($slice_cs) ) {
689 # trim off regions which are not defined
690 return $self->_constrain_to_region();
691 }
692 #create slices for the mapped-to coord system
693
694 my $slice =
695 $slice_adaptor->fetch_by_seq_region_id( $coord->id(),
696 $coord_start, $coord_end, $coord->strand() );
697
698 my $current_end = $current_start + $length - 1;
699
700 push @projection,
701 bless( [ $current_start, $current_end, $slice ],
702 "Bio::EnsEMBL::ProjectionSegment" );
703 } ## end if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate'...))
704
705 $current_start += $length;
706 } ## end foreach my $coord (@coords)
707 } ## end foreach my $segment (@$normal_slice_proj)
708
709 return \@projection;
710 } ## end sub project_org
711
712 sub _constrain_to_region {
713 my $self = shift;
714
715 my $entire_len = $self->seq_region_length();
716
717 # If the slice has negative coordinates or coordinates exceeding the
718 # exceeding length of the sequence region we want to shrink the slice
719 # to the defined region.
720
721 if ( $self->{'start'} > $entire_len || $self->{'end'} < 1 ) {
722 #none of this slice is in a defined region
723 return [];
724 }
725
726 my $right_contract = 0;
727 my $left_contract = 0;
728 if ( $self->{'end'} > $entire_len ) {
729 $right_contract = $entire_len - $self->{'end'};
730 }
731 if ( $self->{'start'} < 1 ) {
732 $left_contract = $self->{'start'} - 1;
733 }
734
735 my $new_slice;
736 if ( $left_contract || $right_contract ) {
737 $new_slice = $self->expand( $left_contract, $right_contract );
738 } else {
739 $new_slice = $self;
740 }
741
742 return [ bless [ 1 - $left_contract,
743 $self->length() + $right_contract,
744 $new_slice ],
745 "Bio::EnsEMBL::ProjectionSegment" ];
746 } ## end sub _constrain_to_region
747
748 =head2 expand
749
750 Arg [1] : (optional) int $five_prime_expand
751 The number of basepairs to shift this slices five_prime
752 coordinate by. Positive values make the slice larger,
753 negative make the slice smaller.
754 coordinate left.
755 Default = 0.
756 Arg [2] : (optional) int $three_prime_expand
757 The number of basepairs to shift this slices three_prime
758 coordinate by. Positive values make the slice larger,
759 negative make the slice smaller.
760 Default = 0.
761 Arg [3] : (optional) bool $force_expand
762 if set to 1, then the slice will be contracted even in the case
763 when shifts $five_prime_expand and $three_prime_expand overlap.
764 In that case $five_prime_expand and $three_prime_expand will be set
765 to a maximum possible number and that will result in the slice
766 which would have only 2pbs.
767 Default = 0.
768 Arg [4] : (optional) int* $fpref
769 The reference to a number of basepairs to shift this slices five_prime
770 coordinate by. Normally it would be set to $five_prime_expand.
771 But in case when $five_prime_expand shift can not be applied and
772 $force_expand is set to 1, then $$fpref will contain the maximum possible
773 shift
774 Arg [5] : (optional) int* $tpref
775 The reference to a number of basepairs to shift this slices three_prime
776 coordinate by. Normally it would be set to $three_prime_expand.
777 But in case when $five_prime_expand shift can not be applied and
778 $force_expand is set to 1, then $$tpref will contain the maximum possible
779 shift
780 Example : my $expanded_slice = $slice->expand( 1000, 1000);
781 my $contracted_slice = $slice->expand(-1000,-1000);
782 my $shifted_right_slice = $slice->expand(-1000, 1000);
783 my $shifted_left_slice = $slice->expand( 1000,-1000);
784 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
785
786 Description: Returns a slice which is a resized copy of this slice. The
787 start and end are moved outwards from the center of the slice
788 if positive values are provided and moved inwards if negative
789 values are provided. This slice remains unchanged. A slice
790 may not be contracted below 1bp but may grow to be arbitrarily
791 large.
792 Returntype : Bio::EnsEMBL::Slice
793 Exceptions : warning if an attempt is made to contract the slice below 1bp
794 Caller : general
795 Status : Stable
796
797 =cut
798
799 sub expand {
800 my $self = shift;
801 my $five_prime_shift = shift || 0;
802 my $three_prime_shift = shift || 0;
803 my $force_expand = shift || 0;
804 my $fpref = shift;
805 my $tpref = shift;
806
807 if ( $self->{'seq'} ) {
808 warning(
809 "Cannot expand a slice which has a manually attached sequence ");
810 return undef;
811 }
812
813 my $new_start;
814 my $new_end;
815 my $sshift = $five_prime_shift;
816 my $eshift = $three_prime_shift;
817
818 if ( $self->{'strand'} != 1 ) {
819 $eshift = $five_prime_shift;
820 $sshift = $three_prime_shift;
821 }
822
823 $new_start = $self->{'start'} - $sshift;
824 $new_end = $self->{'end'} + $eshift;
825
826 # if($new_start > $new_end) {
827 # if ($force_expand) { # Apply max possible shift, if force_expand is set
828 # if ($sshift < 0) { # if we are contracting the slice from the start - move the start just before the end
829 # $new_start = $new_end - 1;
830 # $sshift = $self->{start} - $new_start;
831 # }
832
833 # if($new_start > $new_end) { # if the slice still has a negative length - try to move the end
834 # if ($eshift < 0) {
835 # $new_end = $new_start + 1;
836 # $eshift = $new_end - $self->{end};
837 # }
838 # }
839 # return the values by which the primes were actually shifted
840 # $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
841 # $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
842 # }
843 # if($new_start > $new_end) {
844 # throw('Slice start cannot be greater than slice end');
845 # }
846 # }
847
848 #fastest way to copy a slice is to do a shallow hash copy
849 my %new_slice = %$self;
850 $new_slice{'start'} = int($new_start);
851 $new_slice{'end'} = int($new_end);
852
853 return bless \%new_slice, ref($self);
854 } ## end sub expand
855
856 =head2 sub_Slice_same
857
858 Arg 1 : int $start
859 Arg 2 : int $end
860 Arge [3] : int $strand
861 Description: Makes another Slice that covers only part of this slice
862 If a slice is requested which lies outside of the boundaries
863 of this function will return undef. This means that
864 behaviour will be consistant whether or not the slice is
865 attached to the database (i.e. if there is attached sequence
866 to the slice). Alternatively the expand() method or the
867 SliceAdaptor::fetch_by_region method can be used instead.
868 Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
869 Exceptions : none
870 Caller : general
871 Status : Stable
872
873 =cut
874
875 sub sub_Slice_same {
876 my ( $self, $start, $end, $strand ) = @_;
877
878 if ( $start < 1 || $start > $self->{'end'} ) {
879 # throw( "start argument not valid" );
880 return undef;
881 }
882
883 if ( $end < $start || $end > $self->{'end'} ) {
884 # throw( "end argument not valid" )
885 return undef;
886 }
887
888 my ( $new_start, $new_end, $new_strand, $new_seq );
889 if ( !defined $strand ) {
890 $strand = 1;
891 }
892
893 if ( $self->{'strand'} == 1 ) {
894 $new_start = $self->{'start'} + $start - 1;
895 $new_end = $self->{'start'} + $end - 1;
896 $new_strand = $strand;
897 } else {
898 $new_start = $self->{'end'} - $end + 1;
899 $new_end = $self->{'end'} - $start + 1;
900 $new_strand = -$strand;
901 }
902
903 if ( defined $self->{'seq'} ) {
904 $new_seq = $self->subseq( $start, $end, $strand );
905 }
906
907 #fastest way to copy a slice is to do a shallow hash copy
908 my %new_slice = %$self;
909 $new_slice{'start'} = int($new_start);
910 $new_slice{'end'} = int($new_end);
911 $new_slice{'strand'} = $new_strand;
912 if ($new_seq) {
913 $new_slice{'seq'} = $new_seq;
914 }
915
916 return bless \%new_slice, ref($self);
917 } ## end sub sub_Slice_same
918
919 =head2 seq_region_Slice_same
920
921 Arg [1] : none
922 Example : $slice = $slice->seq_region_Slice();
923 Description: Returns a slice which spans the whole seq_region which this slice
924 is on. For example if this is a slice which spans a small region
925 of chromosome X, this method will return a slice which covers the
926 entire chromosome X. The returned slice will always have strand
927 of 1 and start of 1. This method cannot be used if the sequence
928 of the slice has been set manually.
929 Returntype : Bio::EnsEMBL::Slice
930 Exceptions : warning if called when sequence of Slice has been set manually.
931 Caller : general
932 Status : Stable
933
934 =cut
935
936 sub seq_region_Slice_same {
937 my $self = shift;
938
939 if ( $self->{'seq'} ) {
940 warning(
941 "Cannot get a seq_region_Slice of a slice which has manually "
942 . "attached sequence " );
943 return undef;
944 }
945
946 # quick shallow copy
947 my $slice;
948 %{$slice} = %{$self};
949 bless $slice, ref($self);
950
951 $slice->{'start'} = 1;
952 $slice->{'end'} = $slice->{'seq_region_length'};
953 $slice->{'strand'} = 1;
954
955 return $slice;
956 }
957
958 =head2 get_seq_region_id_same
959
960 Arg [1] : none
961 Example : my $seq_region_id = $slice->get_seq_region_id();
962 Description: Gets the internal identifier of the seq_region that this slice
963 is on. Note that this function will not work correctly if this
964 slice does not have an attached adaptor. Also note that it may
965 be better to go through the SliceAdaptor::get_seq_region_id
966 method if you are working with multiple databases since is
967 possible to work with slices from databases with different
968 internal seq_region identifiers.
969 Returntype : int or undef if slices does not have attached adaptor
970 Exceptions : warning if slice is not associated with a SliceAdaptor
971 Caller : assembly loading scripts, general
972 Status : Stable
973
974 =cut
975
976 sub get_seq_region_id_same {
977 my ($self) = @_;
978
979 if ( $self->adaptor ) {
980 return $self->adaptor->get_seq_region_id($self);
981 } else {
982 warning('Cannot retrieve seq_region_id without attached adaptor.');
983 return undef;
984 }
985 }
986
987 =head2 get_all_Attributes
988
989 Arg [1] : optional string $attrib_code
990 The code of the attribute type to retrieve values for.
991 Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
992 @slice_attributes = @{$slice->get_all_Attributes()};
993 Description: Gets a list of Attributes of this slice''s seq_region.
994 Optionally just get Attrubutes for given code.
995 Returntype : listref Bio::EnsEMBL::Attribute
996 Exceptions : warning if slice does not have attached adaptor
997 Caller : general
998 Status : Stable
999
1000 =cut
1001
1002 sub get_all_Attributes {
1003 my $self = shift;
1004 my $attrib_code = shift;
1005
1006 my $result;
1007 my @results;
1008
1009 if ( !$self->adaptor() ) {
1010 warning('Cannot get attributes without an adaptor.');
1011 return [];
1012 }
1013
1014 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
1015
1016 ## circular BOF
1017 my $sl1 =
1018 Bio::EnsEMBL::CircularSlice->new(
1019 -COORD_SYSTEM => $self->{'coord_system'},
1020 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1021 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1022 -START => $self->{'start'},
1023 -END => $self->{'seq_region_length'},
1024 -STRAND => $self->{'strand'},
1025 -ADAPTOR => $self->adaptor() );
1026
1027 my $sl2 =
1028 Bio::EnsEMBL::CircularSlice->new(
1029 -COORD_SYSTEM => $self->{'coord_system'},
1030 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1031 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1032 -START => 1,
1033 -END => $self->{'end'},
1034 -STRAND => $self->{'strand'},
1035 -ADAPTOR => $self->adaptor() );
1036 my $pta_ref = [];
1037
1038 if ( defined $attrib_code ) {
1039 my @res1 =
1040 grep { uc( $_->code() ) eq uc($attrib_code) }
1041 @{ $attribute_adaptor->fetch_all_by_Slice($sl1) };
1042 my @res2 =
1043 grep { uc( $_->code() ) eq uc($attrib_code) }
1044 @{ $attribute_adaptor->fetch_all_by_Slice($sl2) };
1045 my @res;
1046 push @res, @res1, @res2;
1047 $result = \@res;
1048 } else {
1049 my @res1 = @{ $attribute_adaptor->fetch_all_by_Slice($sl1) };
1050 my @res2 = @{ $attribute_adaptor->fetch_all_by_Slice($sl2) };
1051 my @res;
1052 push @res, @res1, @res2;
1053 $result = \@res;
1054 }
1055
1056 ## circular EOF
1057
1058 # if( defined $attrib_code ) {
1059 # @results = grep { uc($_->code()) eq uc($attrib_code) }
1060 # @{$attribute_adaptor->fetch_all_by_Slice( $self )};
1061 # $result = \@results;
1062 # } else {
1063 # $result = $attribute_adaptor->fetch_all_by_Slice( $self );
1064 # }
1065
1066 return $result;
1067 } ## end sub get_all_Attributes
1068
1069 =head2 get_all_PredictionTranscripts
1070
1071 Arg [1] : (optional) string $logic_name
1072 The name of the analysis used to generate the prediction
1073 transcripts obtained.
1074 Arg [2] : (optional) boolean $load_exons
1075 If set to true will force loading of all PredictionExons
1076 immediately rather than loading them on demand later. This
1077 is faster if there are a large number of PredictionTranscripts
1078 and the exons will be used.
1079 Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
1080 Description: Retrieves the list of prediction transcripts which overlap
1081 this slice with logic_name $logic_name. If logic_name is
1082 not defined then all prediction transcripts are retrieved.
1083 Returntype : listref of Bio::EnsEMBL::PredictionTranscript
1084 Exceptions : warning if slice does not have attached adaptor
1085 Caller : none
1086 Status : Stable
1087
1088 =cut
1089
1090 sub get_all_PredictionTranscripts {
1091 my ( $self, $logic_name, $load_exons ) = @_;
1092
1093 if ( !$self->adaptor() ) {
1094 warning(
1095 'Cannot get PredictionTranscripts without attached adaptor');
1096 return [];
1097 }
1098 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
1099
1100 ## circular BOF
1101 my $sl1 =
1102 Bio::EnsEMBL::CircularSlice->new(
1103 -COORD_SYSTEM => $self->{'coord_system'},
1104 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1105 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1106 -START => $self->{'start'},
1107 -END => $self->{'seq_region_length'},
1108 -STRAND => $self->{'strand'},
1109 -ADAPTOR => $self->adaptor() );
1110
1111 my $sl2 =
1112 Bio::EnsEMBL::CircularSlice->new(
1113 -COORD_SYSTEM => $self->{'coord_system'},
1114 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1115 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1116 -START => 1,
1117 -END => $self->{'end'},
1118 -STRAND => $self->{'strand'},
1119 -ADAPTOR => $self->adaptor() );
1120
1121 my ( @arr, @arr1, @arr2 );
1122 @arr1 =
1123 @{ $pta->fetch_all_by_Slice( $sl1, $logic_name, $load_exons ) };
1124 @arr2 =
1125 @{ $pta->fetch_all_by_Slice( $sl2, $logic_name, $load_exons ) };
1126 push @arr, @arr1, @arr2;
1127 return \@arr;
1128 ## circular EOF
1129
1130 #return $pta->fetch_all_by_Slice($sl1, $logic_name, $load_exons);
1131 } ## end sub get_all_PredictionTranscripts
1132
1133 =head2 get_all_DnaAlignFeatures
1134
1135 Arg [1] : (optional) string $logic_name
1136 The name of the analysis performed on the dna align features
1137 to obtain.
1138 Arg [2] : (optional) float $score
1139 The mimimum score of the features to retrieve
1140 Arg [3] : (optional) string $dbtype
1141 The name of an attached database to retrieve the features from
1142 instead, e.g. 'otherfeatures'.
1143 Arg [4] : (optional) float hcoverage
1144 The minimum hcoverage od the featurs to retrieve
1145 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
1146 Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with
1147 logic name $logic_name and with score above $score. If
1148 $logic_name is not defined features of all logic names are
1149 retrieved. If $score is not defined features of all scores are
1150 retrieved.
1151 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
1152 Exceptions : warning if slice does not have attached adaptor
1153 Caller : general
1154 Status : Stable
1155
1156 =cut
1157
1158 sub get_all_DnaAlignFeatures {
1159 my ( $self, $logic_name, $score, $dbtype, $hcoverage ) = @_;
1160
1161 if ( !$self->adaptor() ) {
1162 warning('Cannot get DnaAlignFeatures without attached adaptor');
1163 return [];
1164 }
1165
1166 my $db;
1167
1168 if ($dbtype) {
1169 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1170 if ( !$db ) {
1171 warning("Don't have db $dbtype returning empty list\n");
1172 return [];
1173 }
1174 } else {
1175 $db = $self->adaptor->db;
1176 }
1177
1178 my $dafa = $db->get_DnaAlignFeatureAdaptor();
1179
1180 if ( defined($score) and defined($hcoverage) ) {
1181 warning "cannot specify score and hcoverage. Using score only";
1182 }
1183
1184 ## circular BOF
1185 my $sl1 =
1186 Bio::EnsEMBL::CircularSlice->new(
1187 -COORD_SYSTEM => $self->{'coord_system'},
1188 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1189 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1190 -START => $self->{'start'},
1191 -END => $self->{'seq_region_length'},
1192 -STRAND => $self->{'strand'},
1193 -ADAPTOR => $self->adaptor() );
1194
1195 my $sl2 =
1196 Bio::EnsEMBL::CircularSlice->new(
1197 -COORD_SYSTEM => $self->{'coord_system'},
1198 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1199 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1200 -START => 1,
1201 -END => $self->{'end'},
1202 -STRAND => $self->{'strand'},
1203 -ADAPTOR => $self->adaptor() );
1204
1205 my ( @arr, @arr1, @arr2 );
1206 my %union;
1207 if ( defined($score) ) {
1208 @arr1 = @{ $dafa->fetch_all_by_Slice_and_score( $sl1, $score,
1209 $logic_name ) };
1210 @arr2 = @{ $dafa->fetch_all_by_Slice_and_score( $sl2, $score,
1211 $logic_name ) };
1212 push @arr, @arr1, @arr2;
1213 return \@arr;
1214 }
1215 @arr1 = @{
1216 $dafa->fetch_all_by_Slice_and_hcoverage( $sl1, $hcoverage,
1217 $logic_name ) };
1218 @arr2 = @{
1219 $dafa->fetch_all_by_Slice_and_hcoverage( $sl2, $hcoverage,
1220 $logic_name ) };
1221 push @arr, @arr1, @arr2;
1222 return \@arr;
1223
1224 ## circular EOF
1225
1226 # if(defined($score)){
1227 # return $dafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1228 # }
1229 # return $dafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
1230 } ## end sub get_all_DnaAlignFeatures
1231
1232 =head2 get_all_ProteinAlignFeatures
1233
1234 Arg [1] : (optional) string $logic_name
1235 The name of the analysis performed on the protein align features
1236 to obtain.
1237 Arg [2] : (optional) float $score
1238 The mimimum score of the features to retrieve
1239 Arg [3] : (optional) string $dbtype
1240 The name of an attached database to retrieve features from
1241 instead.
1242 Arg [4] : (optional) float hcoverage
1243 The minimum hcoverage od the featurs to retrieve
1244 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
1245 Description: Retrieves the DnaPepAlignFeatures which overlap this slice with
1246 logic name $logic_name and with score above $score. If
1247 $logic_name is not defined features of all logic names are
1248 retrieved. If $score is not defined features of all scores are
1249 retrieved.
1250 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
1251 Exceptions : warning if slice does not have attached adaptor
1252 Caller : general
1253 Status : Stable
1254
1255 =cut
1256
1257 sub get_all_ProteinAlignFeatures {
1258 my ( $self, $logic_name, $score, $dbtype, $hcoverage ) = @_;
1259 if ( !$self->adaptor() ) {
1260 warning('Cannot get ProteinAlignFeatures without attached adaptor');
1261 return [];
1262 }
1263 my $db;
1264 if ($dbtype) {
1265 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1266 if ( !$db ) {
1267 warning("Don't have db $dbtype returning empty list\n");
1268 return [];
1269 }
1270 } else {
1271 $db = $self->adaptor->db;
1272 }
1273
1274 my $pafa = $db->get_ProteinAlignFeatureAdaptor();
1275
1276 if ( defined($score) and defined($hcoverage) ) {
1277 warning "cannot specify score and hcoverage. Using score only";
1278 }
1279
1280 my $sl1 =
1281 Bio::EnsEMBL::CircularSlice->new(
1282 -COORD_SYSTEM => $self->{'coord_system'},
1283 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1284 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1285 -START => $self->{'start'},
1286 -END => $self->{'seq_region_length'},
1287 -STRAND => $self->{'strand'},
1288 -ADAPTOR => $self->adaptor() );
1289
1290 my $sl2 =
1291 Bio::EnsEMBL::CircularSlice->new(
1292 -COORD_SYSTEM => $self->{'coord_system'},
1293 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1294 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1295 -START => 1,
1296 -END => $self->{'end'},
1297 -STRAND => $self->{'strand'},
1298 -ADAPTOR => $self->adaptor() );
1299 my ( @arr, @arr1, @arr2 );
1300 if ( defined($score) ) {
1301 @arr1 = @{ $pafa->fetch_all_by_Slice_and_score( $sl1, $score,
1302 $logic_name ) };
1303 @arr2 = @{ $pafa->fetch_all_by_Slice_and_score( $sl2, $score,
1304 $logic_name ) };
1305
1306 push @arr, @arr1, @arr2;
1307 return \@arr;
1308 }
1309
1310 @arr1 = @{
1311 $pafa->fetch_all_by_Slice_and_hcoverage( $sl1, $hcoverage,
1312 $logic_name ) };
1313 @arr2 = @{
1314 $pafa->fetch_all_by_Slice_and_hcoverage( $sl2, $hcoverage,
1315 $logic_name ) };
1316
1317 push @arr, @arr1, @arr2;
1318 return \@arr;
1319
1320 # if(defined($score)){
1321 # return $pafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1322 # }
1323 # return $pafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
1324
1325 } ## end sub get_all_ProteinAlignFeatures
1326
1327 =head2 get_all_SimilarityFeatures
1328
1329 Arg [1] : (optional) string $logic_name
1330 the name of the analysis performed on the features to retrieve
1331 Arg [2] : (optional) float $score
1332 the lower bound of the score of the features to be retrieved
1333 Example : @feats = @{$slice->get_all_SimilarityFeatures};
1334 Description: Retrieves all dna_align_features and protein_align_features
1335 with analysis named $logic_name and with score above $score.
1336 It is probably faster to use get_all_ProteinAlignFeatures or
1337 get_all_DnaAlignFeatures if a sepcific feature type is desired.
1338 If $logic_name is not defined features of all logic names are
1339 retrieved. If $score is not defined features of all scores are
1340 retrieved.
1341 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
1342 Exceptions : warning if slice does not have attached adaptor
1343 Caller : general
1344 Status : Stable
1345
1346 =cut
1347
1348 sub get_all_SimilarityFeatures {
1349 my ( $self, $logic_name, $score ) = @_;
1350
1351 my @out = ();
1352
1353 push @out,
1354 @{ $self->get_all_ProteinAlignFeatures( $logic_name, $score ) };
1355 push @out,
1356 @{ $self->get_all_DnaAlignFeatures( $logic_name, $score ) };
1357
1358 return \@out;
1359 }
1360
1361 =head2 get_all_SimpleFeatures
1362
1363 Arg [1] : (optional) string $logic_name
1364 The name of the analysis performed on the simple features
1365 to obtain.
1366 Arg [2] : (optional) float $score
1367 The mimimum score of the features to retrieve
1368 Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
1369 Description: Retrieves the SimpleFeatures which overlap this slice with
1370 logic name $logic_name and with score above $score. If
1371 $logic_name is not defined features of all logic names are
1372 retrieved. If $score is not defined features of all scores are
1373 retrieved.
1374 Returntype : listref of Bio::EnsEMBL::SimpleFeatures
1375 Exceptions : warning if slice does not have attached adaptor
1376 Caller : general
1377 Status : Stable
1378
1379 =cut
1380
1381 sub get_all_SimpleFeatures {
1382 my ( $self, $logic_name, $score, $dbtype ) = @_;
1383
1384 if ( !$self->adaptor() ) {
1385 warning('Cannot get SimpleFeatures without attached adaptor');
1386 return [];
1387 }
1388
1389 my $db;
1390 if ($dbtype) {
1391 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1392 if ( !$db ) {
1393 warning("Don't have db $dbtype returning empty list\n");
1394 return [];
1395 }
1396 } else {
1397 $db = $self->adaptor->db;
1398 }
1399
1400 my $sfa = $db->get_SimpleFeatureAdaptor();
1401
1402 return $sfa->fetch_all_by_Slice_and_score( $self, $score,
1403 $logic_name );
1404 }
1405
1406 #### STOP !!! ########################################
1407
1408 =head2 get_all_RepeatFeatures
1409
1410 Arg [1] : (optional) string $logic_name
1411 The name of the analysis performed on the repeat features
1412 to obtain.
1413 Arg [2] : (optional) string $repeat_type
1414 Limits features returned to those of the specified repeat_type
1415 Arg [3] : (optional) string $db
1416 Key for database e.g. core/vega/cdna/....
1417 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'LTR')};
1418 Description: Retrieves the RepeatFeatures which overlap with
1419 logic name $logic_name and with score above $score. If
1420 $logic_name is not defined features of all logic names are
1421 retrieved.
1422 Returntype : listref of Bio::EnsEMBL::RepeatFeatures
1423 Exceptions : warning if slice does not have attached adaptor
1424 Caller : general
1425 Status : Stable
1426
1427 =cut
1428
1429 sub get_all_RepeatFeatures {
1430 my ( $self, $logic_name, $repeat_type, $dbtype ) = @_;
1431
1432 if ( !$self->adaptor() ) {
1433 warning('Cannot get RepeatFeatures without attached adaptor');
1434 return [];
1435 }
1436
1437 my $db;
1438 if ($dbtype) {
1439 $db = $self->adaptor->db->get_db_adaptor($dbtype);
1440 if ( !$db ) {
1441 warning("Don't have db $dbtype returning empty list\n");
1442 return [];
1443 }
1444 } else {
1445 $db = $self->adaptor->db;
1446 }
1447
1448 my $rpfa = $db->get_RepeatFeatureAdaptor();
1449
1450 return $rpfa->fetch_all_by_Slice( $self, $logic_name, $repeat_type );
1451 }
1452
1453 =head2 get_all_LD_values
1454
1455 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
1456 Description : returns all LD values on this slice. This function will only work correctly if the variation
1457 database has been attached to the core database. If the argument is passed, will return the LD information
1458 in that population
1459 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
1460 Exceptions : none
1461 Caller : contigview, snpview
1462 Status : At Risk
1463 : Variation database is under development.
1464
1465 =cut
1466
1467 sub get_all_LD_values {
1468 my $self = shift;
1469 my $population = shift;
1470
1471 if ( !$self->adaptor() ) {
1472 warning('Cannot get LDFeatureContainer without attached adaptor');
1473 return [];
1474 }
1475
1476 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1477
1478 unless ($variation_db) {
1479 warning( "Variation database must be attached to core database to "
1480 . "retrieve variation information" );
1481 return [];
1482 }
1483
1484 my $ld_adaptor = $variation_db->get_LDFeatureContainerAdaptor;
1485
1486 if ($ld_adaptor) {
1487 return $ld_adaptor->fetch_by_Slice( $self, $population );
1488 } else {
1489 return [];
1490
1491 }
1492
1493 # my $ld_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(-species => $self->adaptor()->db()->species, -type => "LDFeatureContainer");
1494
1495 # if( $ld_adaptor ) {
1496 # my $ld_values = $ld_adaptor->fetch_by_Slice($self,$population);
1497 # if (@{$ld_values} > 1){
1498 # warning("More than 1 variation database attached. Trying to merge LD results");
1499 # my $ld_value_merged = shift @{$ld_values};
1500 # #with more than 1 variation database attached, will try to merge in one single LDContainer object.
1501 # foreach my $ld (@{$ld_values}){
1502 # #copy the ld values to the result hash
1503 # foreach my $key (keys %{$ld->{'ldContainer'}}){
1504 # $ld_value_merged->{'ldContainer'}->{$key} = $ld->{'ldContainer'}->{$key};
1505 # }
1506 # #and copy the variationFeatures as well
1507 # foreach my $key (keys %{$ld->{'variationFeatures'}}){
1508 # $ld_value_merged->{'variationFeatures'}->{$key} = $ld->{'variationFeatures'}->{$key};
1509 # }
1510
1511 # }
1512 # return $ld_value_merged;
1513 # }
1514 # else{
1515 # return shift @{$ld_values};
1516 # }
1517 # } else {
1518 # warning("Variation database must be attached to core database to " .
1519 # "retrieve variation information" );
1520 # return [];
1521 # }
1522 } ## end sub get_all_LD_values
1523
1524 =head2 get_all_VariationFeatures
1525
1526 Args : $filter [optional]
1527 Description:returns all variation features on this slice. This function will only work
1528 correctly if the variation database has been attached to the core database.
1529 If $filter is "genotyped" return genotyped Snps only... (nice likkle hack);
1530 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1531 Exceptions : none
1532 Caller : contigview, snpview
1533 Status : At Risk
1534 : Variation database is under development.
1535
1536 =cut
1537
1538 sub get_all_VariationFeatures {
1539 my $self = shift;
1540 my $filter = shift;
1541
1542 $filter ||= '';
1543 if ( !$self->adaptor() ) {
1544 warning('Cannot get variation features without attached adaptor');
1545 return [];
1546 }
1547
1548 my $vf_adaptor =
1549 Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
1550 -species => $self->adaptor()->db()->species,
1551 -type => "VariationFeature" );
1552 if ($vf_adaptor) {
1553 if ( $filter eq 'genotyped' ) {
1554 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
1555 } else {
1556 return $vf_adaptor->fetch_all_by_Slice($self);
1557 }
1558 } else {
1559 warning( "Variation database must be attached to core database to "
1560 . "retrieve variation information" );
1561 return [];
1562 }
1563 } ## end sub get_all_VariationFeatures
1564
1565 =head2 get_all_IndividualSlice
1566
1567 Args : none
1568 Example : my $individualSlice = $slice->get_by_Population($population);
1569 Description : Gets the specific Slice for all the individuls in the population
1570 ReturnType : listref of Bio::EnsEMB::IndividualSlice
1571 Exceptions : none
1572 Caller : general
1573
1574 =cut
1575
1576 sub get_all_IndividualSlice {
1577 my $self = shift;
1578
1579 my $individualSliceFactory =
1580 Bio::EnsEMBL::IndividualSliceFactory->new(
1581 -START => $self->{'start'},
1582 -END => $self->{'end'},
1583 -STRAND => $self->{'strand'},
1584 -ADAPTOR => $self->adaptor(),
1585 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1586 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1587 -COORD_SYSTEM => $self->{'coord_system'}, );
1588 return $individualSliceFactory->get_all_IndividualSlice();
1589 }
1590
1591 =head2 get_by_Individual
1592
1593 Arg[1] : Bio::EnsEMBL::Variation::Individual $individual
1594 Example : my $individualSlice = $slice->get_by_Individual($individual);
1595 Description : Gets the specific Slice for the individual
1596 ReturnType : Bio::EnsEMB::IndividualSlice
1597 Exceptions : none
1598 Caller : general
1599
1600 =cut
1601
1602 sub get_by_Individual {
1603 my $self = shift;
1604 my $individual = shift;
1605
1606 return Bio::EnsEMBL::IndividualSlice->new(
1607 -START => $self->{'start'},
1608 -END => $self->{'end'},
1609 -STRAND => $self->{'strand'},
1610 -ADAPTOR => $self->adaptor(),
1611 # -SEQ => $self->{'seq'},
1612 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1613 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1614 -COORD_SYSTEM => $self->{'coord_system'},
1615 -INDIVIDUAL => $individual );
1616
1617 }
1618
1619 =head2 get_by_strain
1620
1621 Arg[1] : string $strain
1622 Example : my $strainSlice = $slice->get_by_strain($strain);
1623 Description : Gets the specific Slice for the strain
1624 ReturnType : Bio::EnsEMB::StrainSlice
1625 Exceptions : none
1626 Caller : general
1627
1628 =cut
1629
1630 sub get_by_strain {
1631 my $self = shift;
1632 my $strain_name = shift;
1633
1634 return
1635 Bio::EnsEMBL::StrainSlice->new(
1636 -START => $self->{'start'},
1637 -END => $self->{'end'},
1638 -STRAND => $self->{'strand'},
1639 -ADAPTOR => $self->adaptor(),
1640 -SEQ => $self->{'seq'},
1641 -SEQ_REGION_NAME => $self->{'seq_region_name'},
1642 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
1643 -COORD_SYSTEM => $self->{'coord_system'},
1644 -STRAIN_NAME => $strain_name );
1645
1646 }
1647
1648 sub calculate_theta {
1649 my $self = shift;
1650 my $strains = shift;
1651 my $feature = shift
1652 ; #optional parameter. Name of the feature in the Slice you want to calculate
1653
1654 if ( !$self->adaptor() ) {
1655 warning('Cannot get variation features without attached adaptor');
1656 return 0;
1657 }
1658 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1659
1660 unless ($variation_db) {
1661 warning( "Variation database must be attached to core database to "
1662 . "retrieve variation information" );
1663 return 0;
1664 }
1665
1666 #need to get coverage regions for the slice in the different strains
1667 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
1668 my $strain;
1669 my $differences = [];
1670 my $slices = [];
1671 if ($coverage_adaptor) {
1672 my $num_strains = scalar( @{$strains} ) + 1;
1673 if ( !defined $feature ) {
1674 #we want to calculate for the whole slice
1675 push @{$slices},
1676 $self; #add the slice as the slice to calculate the theta value
1677 } else {
1678 #we have features, get the slices for the different features
1679 my $features = $self->get_all_Exons();
1680 map { push @{$slices}, $_->feature_Slice }
1681 @{$features}; #add the slices of the features
1682 }
1683 my $length_regions = 0;
1684 my $snps = 0;
1685 my $theta = 0;
1686 my $last_position = 0;
1687 #get all the differences in the slice coordinates
1688 foreach my $strain_name ( @{$strains} ) {
1689 my $strain = $self->get_by_strain($strain_name)
1690 ; #get the strainSlice for the strain
1691
1692 my $results = $strain->get_all_differences_Slice;
1693 push @{$differences}, @{$results} if ( defined $results );
1694 }
1695 #when we finish, we have, in max_level, the regions covered by all the sample
1696 #sort the differences by the genomic position
1697 my @differences_sorted =
1698 sort { $a->start <=> $b->start } @{$differences};
1699 foreach my $slice ( @{$slices} ) {
1700 my $regions_covered =
1701 $coverage_adaptor->fetch_all_regions_covered( $slice,
1702 $strains );
1703 if ( defined $regions_covered ) {
1704 foreach my $range ( @{$regions_covered} ) {
1705 $length_regions +=
1706 ( $range->[1] - $range->[0] ) +
1707 1; #add the length of the genomic region
1708 for ( my $i = $last_position; $i < @differences_sorted; $i++ )
1709 {
1710 if ( $differences_sorted[$i]->start >= $range->[0]
1711 && $differences_sorted[$i]->end <= $range->[1] )
1712 {
1713 $snps++; #count differences in the region
1714 } elsif ( $differences_sorted[$i]->end > $range->[1] ) {
1715 $last_position = $i;
1716 last;
1717 }
1718 }
1719 }
1720 #when all the ranges have been iterated, calculate rho
1721 #this is an intermediate variable called a in the formula
1722 # a = sum i=2..strains 1/i-1
1723 }
1724 } ## end foreach my $slice ( @{$slices...})
1725 my $a = _calculate_a($num_strains);
1726 $theta = $snps/( $a*$length_regions );
1727 return $theta;
1728 } else {
1729 return 0;
1730 }
1731 } ## end sub calculate_theta
1732
1733 sub _calculate_a {
1734 my $max_level = shift;
1735
1736 my $a = 0;
1737 for ( my $i = 2; $i <= $max_level + 1; $i++ ) {
1738 $a += 1/( $i - 1 );
1739 }
1740 return $a;
1741 }
1742
1743 sub calculate_pi {
1744 my $self = shift;
1745 my $strains = shift;
1746 my $feature = shift;
1747
1748 if ( !$self->adaptor() ) {
1749 warning('Cannot get variation features without attached adaptor');
1750 return 0;
1751 }
1752 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1753
1754 unless ($variation_db) {
1755 warning( "Variation database must be attached to core database to "
1756 . "retrieve variation information" );
1757 return 0;
1758 }
1759
1760 #need to get coverage regions for the slice in the different strains
1761 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
1762 my $differences = [];
1763 my $slices = [];
1764 if ($coverage_adaptor) {
1765 my $num_strains = scalar( @{$strains} ) + 1;
1766 if ( !defined $feature ) {
1767 #we want to calculate for the whole slice
1768 push @{$slices},
1769 $self; #add the slice as the slice to calculate the theta value
1770 } else {
1771 #we have features, get the slices for the different features
1772 my $features = $self->get_all_Exons();
1773 map { push @{$slices}, $_->feature_Slice }
1774 @{$features}; #add the slices of the features
1775 }
1776 my @range_differences = ();
1777 my $pi = 0;
1778 my $regions = 0;
1779 my $last_position =
1780 0; #last position visited in the sorted list of differences
1781 my $triallelic = 0;
1782 my $is_triallelic = 0;
1783 foreach my $slice ( @{$slices} ) {
1784
1785 foreach my $strain_name ( @{$strains} ) {
1786 my $strain = $slice->get_by_strain($strain_name)
1787 ; #get the strainSlice for the strain
1788 my $results = $strain->get_all_differences_Slice;
1789 push @{$differences}, @{$results} if ( defined $results );
1790 }
1791 my @differences_sorted =
1792 sort { $a->start <=> $b->start } @{$differences};
1793
1794 my $regions_covered =
1795 $coverage_adaptor->fetch_all_regions_covered( $slice,
1796 $strains );
1797 #when we finish, we have, in max_level, the regions covered by all the sample
1798 #sort the differences
1799 if ( defined $regions_covered ) {
1800 foreach my $range ( @{$regions_covered} ) {
1801 for ( my $i = $last_position; $i < @differences_sorted; $i++ )
1802 {
1803 if ( $differences_sorted[$i]->start >= $range->[0]
1804 && $differences_sorted[$i]->end <= $range->[1] )
1805 {
1806 #check wether it is the same region or different
1807 if ( !defined $range_differences[0]
1808 || ( $differences_sorted[$i]->start ==
1809 $range_differences[0]->start ) )
1810 {
1811 if ( defined $range_differences[0]
1812 && ( $differences_sorted[$i]->allele_string ne
1813 $range_differences[0]->allele_string ) )
1814 {
1815 $is_triallelic = 1;
1816 }
1817 push @range_differences, $differences_sorted[$i];
1818 } else {
1819 #new site, calc pi for the previous one
1820 $pi +=
1821 2*
1822 ( @range_differences/($num_strains) )*
1823 ( 1 - ( @range_differences/$num_strains ) );
1824 if ($is_triallelic) {
1825 $triallelic++;
1826 $is_triallelic = 0;
1827 }
1828 $regions++;
1829 @range_differences = ();
1830 #and start a new range
1831 push @range_differences, $differences_sorted[$i];
1832 }
1833 } elsif ( $differences_sorted[$i]->end > $range->[1] ) {
1834 $last_position = $i;
1835 last;
1836 }
1837 } ## end for ( my $i = $last_position...)
1838 #calculate pi for last site, if any
1839 if ( defined $range_differences[0] ) {
1840 $pi +=
1841 2*
1842 ( @range_differences/$num_strains )*
1843 ( 1 - ( @range_differences/$num_strains ) );
1844 $regions++;
1845 }
1846 } ## end foreach my $range ( @{$regions_covered...})
1847 } ## end if ( defined $regions_covered)
1848 $pi = $pi/$regions; #calculate average pi
1849 print
1850 "Regions with variations in region $regions and triallelic $triallelic\n\n";
1851 } ## end foreach my $slice ( @{$slices...})
1852 return $pi;
1853 } else {
1854 return 0;
1855 }
1856
1857 } ## end sub calculate_pi
1858
1859 =head2 get_all_genotyped_VariationFeatures
1860
1861 Args : none
1862 Description: returns all variation features on this slice that have been genotyped.
1863 This function will only work correctly if the variation database has
1864 been attached to the core database.
1865 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1866 Exceptions : none
1867 Caller : contigview, snpview, ldview
1868 Status : At Risk
1869 : Variation database is under development.
1870
1871 =cut
1872
1873 sub get_all_genotyped_VariationFeatures {
1874 my $self = shift;
1875 my $vfa;
1876 if ( !$self->adaptor() ) {
1877 warning('Cannot get variation features without attached adaptor');
1878 return [];
1879 }
1880
1881 my $vf_adaptor =
1882 Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
1883 -species => $self->adaptor()->db()->species,
1884 -type => "VariationFeature" );
1885
1886 if ($vf_adaptor) {
1887 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
1888 } else {
1889 warning( "Variation database must be attached to core database to "
1890 . "retrieve variation information" );
1891 return [];
1892 }
1893 }
1894
1895 =head2 get_all_SNPs
1896
1897 Description: DEPRECATED. Use get_all_VariationFeatures instead
1898
1899 =cut
1900
1901 sub get_all_SNPs {
1902 my $self = shift;
1903
1904 deprecate('Use get_all_VariationFeatures() instead.');
1905
1906 my $snps;
1907 my $vf = $self->get_all_genotyped_VariationFeatures();
1908 if ( $vf->[0] ) {
1909 #necessary to convert the VariationFeatures into SNP objects
1910 foreach my $variation_feature ( @{$vf} ) {
1911 push @{$snps}, $variation_feature->convert_to_SNP();
1912 }
1913 return $snps;
1914 } else {
1915 return [];
1916 }
1917 }
1918
1919 =head2 get_all_genotyped_SNPs
1920
1921 Description : DEPRECATED. Use get_all_genotyped_VariationFeatures instead
1922
1923 =cut
1924
1925 sub get_all_genotyped_SNPs {
1926 my $self = shift;
1927
1928 deprecate("Use get_all_genotyped_VariationFeatures instead");
1929 my $vf = $self->get_all_genotyped_VariationFeatures;
1930 my $snps;
1931 if ( $vf->[0] ) {
1932 foreach my $variation_feature ( @{$vf} ) {
1933 push @{$snps}, $variation_feature->convert_to_SNP();
1934 }
1935 return $snps;
1936 } else {
1937 return [];
1938 }
1939 }
1940
1941 sub get_all_SNPs_transcripts {
1942 my $self = shift;
1943
1944 deprecate("DEPRECATED");
1945
1946 return [];
1947
1948 }
1949
1950 =head2 get_all_Genes
1951
1952 Arg [1] : (optional) string $logic_name
1953 The name of the analysis used to generate the genes to retrieve
1954 Arg [2] : (optional) string $dbtype
1955 The dbtype of genes to obtain. This assumes that the db has
1956 been added to the DBAdaptor under this name (using the
1957 DBConnection::add_db_adaptor method).
1958 Arg [3] : (optional) boolean $load_transcripts
1959 If set to true, transcripts will be loaded immediately rather
1960 than being lazy-loaded on request. This will result in a
1961 significant speed up if the Transcripts and Exons are going to
1962 be used (but a slow down if they are not).
1963 Arg [4] : (optional) string $source
1964 The source of the genes to retrieve.
1965 Arg [5] : (optional) string $biotype
1966 The biotype of the genes to retrieve.
1967 Example : @genes = @{$slice->get_all_Genes};
1968 Description: Retrieves all genes that overlap this slice.
1969 Returntype : listref of Bio::EnsEMBL::Genes
1970 Exceptions : none
1971 Caller : none
1972 Status : Stable
1973
1974 =cut
1975
1976 sub get_all_Genes {
1977 my ( $self, $logic_name, $dbtype, $load_transcripts, $source,
1978 $biotype )
1979 = @_;
1980
1981 if ( !$self->adaptor() ) {
1982 warning('Cannot get Genes without attached adaptor');
1983 return [];
1984 }
1985
1986 my $ga;
1987 if ($dbtype) {
1988 my $db = $reg->get_db( $self->adaptor()->db(), $dbtype );
1989 if ( defined($db) ) {
1990 $ga = $reg->get_adaptor( $db->species(), $db->group(), "Gene" );
1991 } else {
1992 $ga = $reg->get_adaptor( $self->adaptor()->db()->species(),
1993 $dbtype, "Gene" );
1994 }
1995 if ( !defined $ga ) {
1996 warning("$dbtype genes not available");
1997 return [];
1998 }
1999 } else {
2000 $ga = $self->adaptor->db->get_GeneAdaptor();
2001 }
2002
2003 ## circular BOF
2004 my $sl1 =
2005 Bio::EnsEMBL::CircularSlice->new(
2006 -COORD_SYSTEM => $self->{'coord_system'},
2007 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2008 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2009 -START => $self->{'start'},
2010 -END => $self->{'seq_region_length'},
2011 -STRAND => $self->{'strand'},
2012 -ADAPTOR => $self->adaptor() );
2013
2014 my $sl2 =
2015 Bio::EnsEMBL::CircularSlice->new(
2016 -COORD_SYSTEM => $self->{'coord_system'},
2017 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2018 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2019 -START => 1,
2020 -END => $self->{'end'},
2021 -STRAND => $self->{'strand'},
2022 -ADAPTOR => $self->adaptor() );
2023
2024 my ( @arr, @arr1, @arr2 );
2025 @arr1 = @{
2026 $ga->fetch_all_by_Slice( $sl1, $logic_name, $load_transcripts,
2027 $source, $biotype ) };
2028 @arr2 = @{
2029 $ga->fetch_all_by_Slice( $sl2, $logic_name, $load_transcripts,
2030 $source, $biotype ) };
2031 push @arr, @arr1, @arr2;
2032 return \@arr;
2033
2034 ## circular EOF
2035
2036 } ## end sub get_all_Genes
2037
2038 =head2 get_all_Genes_by_type
2039
2040 Arg [1] : string $type
2041 The biotype of genes wanted.
2042 Arg [2] : (optional) string $logic_name
2043 Arg [3] : (optional) boolean $load_transcripts
2044 If set to true, transcripts will be loaded immediately rather
2045 than being lazy-loaded on request. This will result in a
2046 significant speed up if the Transcripts and Exons are going to
2047 be used (but a slow down if they are not).
2048 Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding',
2049 'ensembl')};
2050 Description: Retrieves genes that overlap this slice of biotype $type.
2051 This is primarily used by the genebuilding code when several
2052 biotypes of genes are used.
2053
2054 The logic name is the analysis of the genes that are retrieved.
2055 If not provided all genes will be retrieved instead.
2056
2057 Returntype : listref of Bio::EnsEMBL::Genes
2058 Exceptions : none
2059 Caller : genebuilder, general
2060 Status : Stable
2061
2062 =cut
2063
2064 sub get_all_Genes_by_type {
2065 my ( $self, $type, $logic_name, $load_transcripts ) = @_;
2066
2067 if ( !$self->adaptor() ) {
2068 warning('Cannot get Genes without attached adaptor');
2069 return [];
2070 }
2071
2072 return
2073 $self->get_all_Genes( $logic_name, undef, $load_transcripts, undef,
2074 $type );
2075 }
2076
2077 =head2 get_all_Genes_by_source
2078
2079 Arg [1] : string source
2080 Arg [2] : (optional) boolean $load_transcripts
2081 If set to true, transcripts will be loaded immediately rather
2082 than being lazy-loaded on request. This will result in a
2083 significant speed up if the Transcripts and Exons are going to
2084 be used (but a slow down if they are not).
2085 Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')};
2086 Description: Retrieves genes that overlap this slice of source $source.
2087
2088 Returntype : listref of Bio::EnsEMBL::Genes
2089 Exceptions : none
2090 Caller : general
2091 Status : Stable
2092
2093 =cut
2094
2095 sub get_all_Genes_by_source {
2096 my ( $self, $source, $load_transcripts ) = @_;
2097
2098 if ( !$self->adaptor() ) {
2099 warning('Cannot get Genes without attached adaptor');
2100 return [];
2101 }
2102
2103 return $self->get_all_Genes( undef, undef, $load_transcripts,
2104 $source );
2105 }
2106
2107 =head2 get_all_Transcripts
2108
2109 Arg [1] : (optional) boolean $load_exons
2110 If set to true exons will not be lazy-loaded but will instead
2111 be loaded right away. This is faster if the exons are
2112 actually going to be used right away.
2113 Arg [2] : (optional) string $logic_name
2114 the logic name of the type of features to obtain
2115 Arg [3] : (optional) string $db_type
2116 Example : @transcripts = @{$slice->get_all_Transcripts)_};
2117 Description: Gets all transcripts which overlap this slice. If you want to
2118 specify a particular analysis or type, then you are better off
2119 using get_all_Genes or get_all_Genes_by_type and iterating
2120 through the transcripts of each gene.
2121 Returntype : reference to a list of Bio::EnsEMBL::Transcripts
2122 Exceptions : none
2123 Caller : general
2124 Status : Stable
2125
2126 =cut
2127
2128 sub get_all_Transcripts {
2129 my $self = shift;
2130 my $load_exons = shift;
2131 my $logic_name = shift;
2132 my $dbtype = shift;
2133 if ( !$self->adaptor() ) {
2134 warning('Cannot get Transcripts without attached adaptor');
2135 return [];
2136 }
2137
2138 my $ta;
2139 if ($dbtype) {
2140 my $db = $reg->get_db( $self->adaptor()->db(), $dbtype );
2141 if ( defined($db) ) {
2142 $ta =
2143 $reg->get_adaptor( $db->species(), $db->group(), "Transcript" );
2144 } else {
2145 $ta = $reg->get_adaptor( $self->adaptor()->db()->species(),
2146 $dbtype, "Transcript" );
2147 }
2148 if ( !defined $ta ) {
2149 warning("$dbtype genes not available");
2150 return [];
2151 }
2152 } else {
2153 $ta = $self->adaptor->db->get_TranscriptAdaptor();
2154 }
2155
2156 ## circular BOF
2157 my $sl1 =
2158 Bio::EnsEMBL::CircularSlice->new(
2159 -COORD_SYSTEM => $self->{'coord_system'},
2160 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2161 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2162 -START => $self->{'start'},
2163 -END => $self->{'seq_region_length'},
2164 -STRAND => $self->{'strand'},
2165 -ADAPTOR => $self->adaptor() );
2166
2167 my $sl2 =
2168 Bio::EnsEMBL::CircularSlice->new(
2169 -COORD_SYSTEM => $self->{'coord_system'},
2170 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2171 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2172 -START => 1,
2173 -END => $self->{'end'},
2174 -STRAND => $self->{'strand'},
2175 -ADAPTOR => $self->adaptor() );
2176
2177 my ( @arr, @arr1, @arr2 );
2178 @arr1 =
2179 @{ $ta->fetch_all_by_Slice( $sl1, $load_exons, $logic_name ) };
2180 @arr2 =
2181 @{ $ta->fetch_all_by_Slice( $sl2, $load_exons, $logic_name ) };
2182 push @arr, @arr1, @arr2;
2183 return \@arr;
2184 ## circular EOF
2185 #return $ta->fetch_all_by_Slice($self, $load_exons, $logic_name);
2186 } ## end sub get_all_Transcripts
2187
2188 =head2 get_all_Exons
2189
2190 Arg [1] : none
2191 Example : @exons = @{$slice->get_all_Exons};
2192 Description: Gets all exons which overlap this slice. Note that these exons
2193 will not be associated with any transcripts, so this may not
2194 be terribly useful.
2195 Returntype : reference to a list of Bio::EnsEMBL::Exons
2196 Exceptions : none
2197 Caller : general
2198 Status : Stable
2199
2200 =cut
2201
2202 sub get_all_Exons {
2203 my $self = shift;
2204
2205 if ( !$self->adaptor() ) {
2206 warning('Cannot get Exons without attached adaptor');
2207 return [];
2208 }
2209
2210 ## circular BOF
2211 my $sl1 =
2212 Bio::EnsEMBL::CircularSlice->new(
2213 -COORD_SYSTEM => $self->{'coord_system'},
2214 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2215 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2216 -START => $self->{'start'},
2217 -END => $self->{'seq_region_length'},
2218 -STRAND => $self->{'strand'},
2219 -ADAPTOR => $self->adaptor() );
2220
2221 my $sl2 =
2222 Bio::EnsEMBL::CircularSlice->new(
2223 -COORD_SYSTEM => $self->{'coord_system'},
2224 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2225 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2226 -START => 1,
2227 -END => $self->{'end'},
2228 -STRAND => $self->{'strand'},
2229 -ADAPTOR => $self->adaptor() );
2230
2231 my ( @arr, @arr1, @arr2 );
2232 @arr1 =
2233 @{ $sl1->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($sl1) };
2234 @arr2 =
2235 @{ $sl2->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($sl2) };
2236 push @arr, @arr1, @arr2;
2237 return \@arr;
2238 ## circular EOF
2239
2240 #rturn $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
2241 } ## end sub get_all_Exons
2242
2243 =head2 get_all_QtlFeatures
2244
2245 Args : none
2246 Description: returns overlapping QtlFeatures
2247 Returntype : listref Bio::EnsEMBL::Map::QtlFeature
2248 Exceptions : none
2249 Caller : general
2250 Status : Stable
2251
2252 =cut
2253
2254 sub get_all_QtlFeatures {
2255 my $self = shift;
2256
2257 if ( !$self->adaptor() ) {
2258 warning('Cannot get QtlFeatures without attached adaptor');
2259 return [];
2260 }
2261
2262 my $qfAdaptor;
2263 if ( $self->adaptor() ) {
2264 $qfAdaptor = $self->adaptor()->db()->get_QtlFeatureAdaptor();
2265 } else {
2266 return [];
2267 }
2268
2269 ## circular BOF
2270 my $sl1 =
2271 Bio::EnsEMBL::CircularSlice->new(
2272 -COORD_SYSTEM => $self->{'coord_system'},
2273 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2274 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2275 -START => $self->{'start'},
2276 -END => $self->{'seq_region_length'},
2277 -STRAND => $self->{'strand'},
2278 -ADAPTOR => $self->adaptor() );
2279
2280 my $sl2 =
2281 Bio::EnsEMBL::CircularSlice->new(
2282 -COORD_SYSTEM => $self->{'coord_system'},
2283 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2284 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2285 -START => 1,
2286 -END => $self->{'end'},
2287 -STRAND => $self->{'strand'},
2288 -ADAPTOR => $self->adaptor() );
2289
2290 my ( @arr, @arr1, @arr2 );
2291 @arr1 = @{ $qfAdaptor->fetch_all_by_Slice_constraint($sl1) };
2292 @arr2 = @{ $qfAdaptor->fetch_all_by_Slice_constraint($sl2) };
2293 push @arr, @arr1, @arr2;
2294 return \@arr;
2295 ## circular EOF
2296
2297 #return $qfAdaptor->fetch_all_by_Slice_constraint( $self );
2298 } ## end sub get_all_QtlFeatures
2299
2300 =head2 get_all_KaryotypeBands
2301
2302 Arg [1] : none
2303 Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
2304 Description: Retrieves the karyotype bands which this slice overlaps.
2305 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
2306 Exceptions : none
2307 Caller : general, contigview
2308 Status : Stable
2309
2310 =cut
2311
2312 sub get_all_KaryotypeBands {
2313 my ($self) = @_;
2314
2315 if ( !$self->adaptor() ) {
2316 warning('Cannot get KaryotypeBands without attached adaptor');
2317 return [];
2318 }
2319
2320 my $kadp = $self->adaptor->db->get_KaryotypeBandAdaptor();
2321
2322 ## circular BOF
2323 my $sl1 =
2324 Bio::EnsEMBL::CircularSlice->new(
2325 -COORD_SYSTEM => $self->{'coord_system'},
2326 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2327 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2328 -START => $self->{'start'},
2329 -END => $self->{'seq_region_length'},
2330 -STRAND => $self->{'strand'},
2331 -ADAPTOR => $self->adaptor() );
2332
2333 my $sl2 =
2334 Bio::EnsEMBL::CircularSlice->new(
2335 -COORD_SYSTEM => $self->{'coord_system'},
2336 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2337 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2338 -START => 1,
2339 -END => $self->{'end'},
2340 -STRAND => $self->{'strand'},
2341 -ADAPTOR => $self->adaptor() );
2342
2343 my ( @arr, @arr1, @arr2 );
2344 @arr1 = @{ $kadp->fetch_all_by_Slice($sl1) };
2345 @arr2 = @{ $kadp->fetch_all_by_Slice($sl2) };
2346 push @arr, @arr1, @arr2;
2347 return \@arr;
2348 ## circular EOF
2349
2350 #return $kadp->fetch_all_by_Slice($self);
2351 } ## end sub get_all_KaryotypeBands
2352
2353 =head2 get_repeatmasked_seq
2354
2355 Arg [1] : listref of strings $logic_names (optional)
2356 Arg [2] : int $soft_masking_enable (optional)
2357 Arg [3] : hash reference $not_default_masking_cases (optional, default is {})
2358 The values are 0 or 1 for hard and soft masking respectively
2359 The keys of the hash should be of 2 forms
2360 "repeat_class_" . $repeat_consensus->repeat_class,
2361 e.g. "repeat_class_SINE/MIR"
2362 "repeat_name_" . $repeat_consensus->name
2363 e.g. "repeat_name_MIR"
2364 depending on which base you want to apply the not default
2365 masking either the repeat_class or repeat_name. Both can be
2366 specified in the same hash at the same time, but in that case,
2367 repeat_name setting has priority over repeat_class. For example,
2368 you may have hard masking as default, and you may want soft
2369 masking of all repeat_class SINE/MIR, but repeat_name AluSp
2370 (which are also from repeat_class SINE/MIR).
2371 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2372 "repeat_name_AluSp" => 0}
2373 Example : $rm_slice = $slice->get_repeatmasked_seq();
2374 $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
2375 Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
2376 masked sequence instead of the regular sequence.
2377 Sequence returned by this new slice will have repeat regions
2378 hardmasked by default (sequence replaced by N) or
2379 or soft-masked when arg[2] = 1 (sequence in lowercase)
2380 Will only work with database connection to get repeat features.
2381 Returntype : Bio::EnsEMBL::RepeatMaskedSlice
2382 Exceptions : none
2383 Caller : general
2384 Status : Stable
2385
2386 =cut
2387
2388 sub get_repeatmasked_seq {
2389 my ( $self, $logic_names, $soft_mask, $not_default_masking_cases ) =
2390 @_;
2391
2392 return
2393 Bio::EnsEMBL::RepeatMaskedSlice->new(
2394 -START => $self->{'start'},
2395 -END => $self->{'end'},
2396 -STRAND => $self->{'strand'},
2397 -ADAPTOR => $self->adaptor(),
2398 -SEQ => $self->{'seq'},
2399 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2400 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2401 -COORD_SYSTEM => $self->{'coord_system'},
2402 -REPEAT_MASK => $logic_names,
2403 -SOFT_MASK => $soft_mask,
2404 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases
2405 );
2406 }
2407
2408 =head2 _mask_features
2409
2410 Arg [1] : reference to a string $dnaref
2411 Arg [2] : array_ref $repeats
2412 reference to a list Bio::EnsEMBL::RepeatFeature
2413 give the list of coordinates to replace with N or with
2414 lower case
2415 Arg [3] : int $soft_masking_enable (optional)
2416 Arg [4] : hash reference $not_default_masking_cases (optional, default is {})
2417 The values are 0 or 1 for hard and soft masking respectively
2418 The keys of the hash should be of 2 forms
2419 "repeat_class_" . $repeat_consensus->repeat_class,
2420 e.g. "repeat_class_SINE/MIR"
2421 "repeat_name_" . $repeat_consensus->name
2422 e.g. "repeat_name_MIR"
2423 depending on which base you want to apply the not default masking either
2424 the repeat_class or repeat_name. Both can be specified in the same hash
2425 at the same time, but in that case, repeat_name setting has priority over
2426 repeat_class. For example, you may have hard masking as default, and
2427 you may want soft masking of all repeat_class SINE/MIR,
2428 but repeat_name AluSp (which are also from repeat_class SINE/MIR).
2429 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2430 "repeat_name_AluSp" => 0}
2431 Description: replaces string positions described in the RepeatFeatures
2432 with Ns (default setting), or with the lower case equivalent
2433 (soft masking). The reference to a dna string which is passed
2434 is changed in place.
2435 Returntype : none
2436 Exceptions : none
2437 Caller : seq
2438 Status : Stable
2439
2440 =cut
2441
2442 sub _mask_features {
2443 my ( $self, $dnaref, $repeats, $soft_mask,
2444 $not_default_masking_cases ) = @_;
2445
2446 $soft_mask = 0 unless ( defined $soft_mask );
2447 $not_default_masking_cases = {}
2448 unless ( defined $not_default_masking_cases );
2449
2450 # explicit CORE::length call, to avoid any confusion with the Slice
2451 # length method
2452 my $dnalen = CORE::length($$dnaref);
2453
2454 REP: foreach my $old_f ( @{$repeats} ) {
2455 my $f = $old_f->transfer($self);
2456 my $start = $f->start;
2457 my $end = $f->end;
2458 my $length = ( $end - $start ) + 1;
2459
2460 # check if we get repeat completely outside of expected slice range
2461 if ( $end < 1 || $start > $dnalen ) {
2462 # warning("Unexpected: Repeat completely outside slice coordinates.");
2463 next REP;
2464 }
2465
2466 # repeat partly outside slice range, so correct
2467 # the repeat start and length to the slice size if needed
2468 if ( $start < 1 ) {
2469 $start = 1;
2470 $length = ( $end - $start ) + 1;
2471 }
2472
2473 # repeat partly outside slice range, so correct
2474 # the repeat end and length to the slice size if needed
2475 if ( $end > $dnalen ) {
2476 $end = $dnalen;
2477 $length = ( $end - $start ) + 1;
2478 }
2479
2480 $start--;
2481
2482 my $padstr;
2483 # if we decide to define masking on the base of the repeat_type, we'll need
2484 # to add the following, and the other commented line few lines below.
2485 # my $rc_type = "repeat_type_" . $f->repeat_consensus->repeat_type;
2486 my $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
2487 my $rc_name = "repeat_name_" . $f->repeat_consensus->name;
2488
2489 my $masking_type;
2490 # $masking_type = $not_default_masking_cases->{$rc_type} if (defined $not_default_masking_cases->{$rc_type});
2491 $masking_type = $not_default_masking_cases->{$rc_class}
2492 if ( defined $not_default_masking_cases->{$rc_class} );
2493 $masking_type = $not_default_masking_cases->{$rc_name}
2494 if ( defined $not_default_masking_cases->{$rc_name} );
2495
2496 $masking_type = $soft_mask unless ( defined $masking_type );
2497
2498 if ($masking_type) {
2499 $padstr = lc substr( $$dnaref, $start, $length );
2500 } else {
2501 $padstr = 'N' x $length;
2502 }
2503 substr( $$dnaref, $start, $length ) = $padstr;
2504 } ## end foreach my $old_f ( @{$repeats...})
2505 } ## end sub _mask_features
2506
2507 =head2 get_all_SearchFeatures
2508
2509 Arg [1] : scalar $ticket_ids
2510 Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
2511 Description: Retreives all search features for stored blast
2512 results for the ticket that overlap this slice
2513 Returntype : listref of Bio::EnsEMBL::SeqFeatures
2514 Exceptions : none
2515 Caller : general (webby!)
2516 Status : Stable
2517
2518 =cut
2519
2520 sub get_all_SearchFeatures {
2521 my $self = shift;
2522 my $ticket = shift;
2523 local $_;
2524 unless ($ticket) {
2525 throw("ticket argument is required");
2526 }
2527
2528 if ( !$self->adaptor() ) {
2529 warning("Cannot get SearchFeatures without an attached adaptor");
2530 return [];
2531 }
2532
2533 my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
2534
2535 my $offset = $self->start - 1;
2536
2537 my $features = $sfa
2538 ? $sfa->get_all_SearchFeatures( $ticket, $self->seq_region_name,
2539 $self->start, $self->end )
2540 : [];
2541
2542 foreach (@$features) {
2543 $_->start( $_->start - $offset );
2544 $_->end( $_->end - $offset );
2545 }
2546 return $features;
2547
2548 } ## end sub get_all_SearchFeatures
2549
2550 =head2 get_all_AssemblyExceptionFeatures
2551
2552 Arg [1] : string $set (optional)
2553 Example : $slice->get_all_AssemblyExceptionFeatures();
2554 Description: Retreives all misc features which overlap this slice. If
2555 a set code is provided only features which are members of
2556 the requested set are returned.
2557 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
2558 Exceptions : none
2559 Caller : general
2560 Status : Stable
2561
2562 =cut
2563
2564 sub get_all_AssemblyExceptionFeatures {
2565 my $self = shift;
2566 my $misc_set = shift;
2567
2568 my $adaptor = $self->adaptor();
2569
2570 if ( !$adaptor ) {
2571 warning('Cannot retrieve features without attached adaptor.');
2572 return [];
2573 }
2574
2575 my $aefa = $adaptor->db->get_AssemblyExceptionFeatureAdaptor();
2576
2577 ## circular BOF
2578 my $sl1 =
2579 Bio::EnsEMBL::CircularSlice->new(
2580 -COORD_SYSTEM => $self->{'coord_system'},
2581 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2582 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2583 -START => $self->{'start'},
2584 -END => $self->{'seq_region_length'},
2585 -STRAND => $self->{'strand'},
2586 -ADAPTOR => $self->adaptor() );
2587
2588 my $sl2 =
2589 Bio::EnsEMBL::CircularSlice->new(
2590 -COORD_SYSTEM => $self->{'coord_system'},
2591 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2592 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2593 -START => 1,
2594 -END => $self->{'end'},
2595 -STRAND => $self->{'strand'},
2596 -ADAPTOR => $self->adaptor() );
2597
2598 my ( @arr, @arr1, @arr2 );
2599 @arr1 = @{ $aefa->fetch_all_by_Slice($sl1) };
2600 @arr2 = @{ $aefa->fetch_all_by_Slice($sl2) };
2601 push @arr, @arr1, @arr2;
2602 return \@arr;
2603 ## circular EOF
2604 #return $aefa->fetch_all_by_Slice($self);
2605 } ## end sub get_all_AssemblyExceptionFeatures
2606
2607 =head2 get_all_MiscFeatures
2608
2609 Arg [1] : string $set (optional)
2610 Arg [2] : string $database (optional)
2611 Example : $slice->get_all_MiscFeatures('cloneset');
2612 Description: Retreives all misc features which overlap this slice. If
2613 a set code is provided only features which are members of
2614 the requested set are returned.
2615 Returntype : listref of Bio::EnsEMBL::MiscFeatures
2616 Exceptions : none
2617 Caller : general
2618 Status : Stable
2619
2620 =cut
2621
2622 sub get_all_MiscFeatures {
2623 my $self = shift;
2624 my $misc_set = shift;
2625 my $dbtype = shift;
2626 my $msa;
2627
2628 my $adaptor = $self->adaptor();
2629 if ( !$adaptor ) {
2630 warning('Cannot retrieve features without attached adaptor.');
2631 return [];
2632 }
2633
2634 my $mfa;
2635 if ($dbtype) {
2636 my $db = $reg->get_db( $adaptor->db(), $dbtype );
2637 if ( defined($db) ) {
2638 $mfa = $reg->get_adaptor( lc( $db->species() ),
2639 $db->group(), "miscfeature" );
2640 } else {
2641 $mfa = $reg->get_adaptor( $adaptor->db()->species(),
2642 $dbtype, "miscfeature" );
2643 }
2644 if ( !defined $mfa ) {
2645 warning("$dbtype misc features not available");
2646 return [];
2647 }
2648 } else {
2649 $mfa = $adaptor->db->get_MiscFeatureAdaptor();
2650 }
2651
2652 ## circular BOF
2653 my $sl1 =
2654 Bio::EnsEMBL::CircularSlice->new(
2655 -COORD_SYSTEM => $self->{'coord_system'},
2656 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2657 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2658 -START => $self->{'start'},
2659 -END => $self->{'seq_region_length'},
2660 -STRAND => $self->{'strand'},
2661 -ADAPTOR => $self->adaptor() );
2662
2663 my $sl2 =
2664 Bio::EnsEMBL::CircularSlice->new(
2665 -COORD_SYSTEM => $self->{'coord_system'},
2666 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2667 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2668 -START => 1,
2669 -END => $self->{'end'},
2670 -STRAND => $self->{'strand'},
2671 -ADAPTOR => $self->adaptor() );
2672
2673 my ( @arr, @arr1, @arr2 );
2674
2675 if ($misc_set) {
2676 @arr1 =
2677 @{ $mfa->fetch_all_by_Slice_and_set_code( $sl1, $misc_set ) };
2678 @arr2 =
2679 @{ $mfa->fetch_all_by_Slice_and_set_code( $sl2, $misc_set ) };
2680 push @arr, @arr1, @arr2;
2681 return \@arr;
2682 }
2683 @arr1 = @{ $mfa->fetch_all_by_Slice($sl1) };
2684 @arr2 = @{ $mfa->fetch_all_by_Slice($sl2) };
2685 push @arr, @arr1, @arr2;
2686 return \@arr;
2687 ## circular EOF
2688
2689 # if($misc_set) {
2690 # return $mfa->fetch_all_by_Slice_and_set_code($self,$misc_set);
2691 # }
2692
2693 # return $mfa->fetch_all_by_Slice($self);
2694 } ## end sub get_all_MiscFeatures
2695
2696 =head2 get_all_AffyFeatures
2697
2698 Args : (optional) list of strings - array names
2699 Example : $slice->get_all_AffyFeatures();
2700 Description: Retrieves all AffyFeature objects which overlap this slice.
2701 Returntype : listref of Bio::EnsEMBL::AffyFeature objects
2702 Exceptions : none
2703 Caller : general
2704 Status : Stable
2705
2706 =cut
2707
2708 sub get_all_AffyFeatures {
2709 my $self = shift;
2710 my @arraynames = @_;
2711
2712 my $sa = $self->adaptor();
2713 if ( !$sa ) {
2714 warning("Cannot retrieve features without attached adaptor.");
2715 }
2716 my $fa = $sa->db()->get_AffyFeatureAdaptor();
2717 my $features;
2718
2719 ## circular BOF
2720 my $sl1 =
2721 Bio::EnsEMBL::CircularSlice->new(
2722 -COORD_SYSTEM => $self->{'coord_system'},
2723 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2724 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2725 -START => $self->{'start'},
2726 -END => $self->{'seq_region_length'},
2727 -STRAND => $self->{'strand'},
2728 -ADAPTOR => $self->adaptor() );
2729
2730 my $sl2 =
2731 Bio::EnsEMBL::CircularSlice->new(
2732 -COORD_SYSTEM => $self->{'coord_system'},
2733 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2734 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2735 -START => 1,
2736 -END => $self->{'end'},
2737 -STRAND => $self->{'strand'},
2738 -ADAPTOR => $self->adaptor() );
2739
2740 my ( @arr, @arr1, @arr2 );
2741
2742 if (@arraynames) {
2743 @arr1 = @{ $fa->fetch_all_by_Slice_arrayname( $sl1, @arraynames ) };
2744 @arr2 = @{ $fa->fetch_all_by_Slice_arrayname( $sl2, @arraynames ) };
2745 } else {
2746 @arr1 = @{ $fa->fetch_all_by_Slice($sl1) };
2747 @arr2 = @{ $fa->fetch_all_by_Slice($sl2) };
2748 }
2749 push @arr, @arr1, @arr2;
2750 return \@arr;
2751 ## circular EOF
2752
2753 # if ( @arraynames ) {
2754 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
2755 # } else {
2756 # $features = $fa->fetch_all_by_Slice( $self );
2757 # }
2758 # return $features;
2759 } ## end sub get_all_AffyFeatures
2760
2761 =head2 get_all_OligoFeatures
2762
2763 Args : (optional) list of strings - array names
2764 Example : $slice->get_all_OligoFeatures();
2765 Description: Retrieves all OligoFeature objects which overlap this slice.
2766 Optionally just retrieve OligoFeature objects generated by
2767 probes from named arrays.
2768 Returntype : listref of Bio::EnsEMBL::OligoFeature objects
2769 Exceptions : none
2770 Caller : general
2771 Status : Stable
2772
2773 =cut
2774
2775 sub get_all_OligoFeatures {
2776 my $self = shift;
2777 my @arraynames = @_;
2778
2779 my $sa = $self->adaptor();
2780 if ( !$sa ) {
2781 warning("Cannot retrieve features without attached adaptor.");
2782 }
2783 my $fa = $sa->db()->get_OligoFeatureAdaptor();
2784 my $features;
2785
2786 ## circular BOF
2787 my $sl1 =
2788 Bio::EnsEMBL::CircularSlice->new(
2789 -COORD_SYSTEM => $self->{'coord_system'},
2790 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2791 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2792 -START => $self->{'start'},
2793 -END => $self->{'seq_region_length'},
2794 -STRAND => $self->{'strand'},
2795 -ADAPTOR => $self->adaptor() );
2796
2797 my $sl2 =
2798 Bio::EnsEMBL::CircularSlice->new(
2799 -COORD_SYSTEM => $self->{'coord_system'},
2800 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2801 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2802 -START => 1,
2803 -END => $self->{'end'},
2804 -STRAND => $self->{'strand'},
2805 -ADAPTOR => $self->adaptor() );
2806
2807 my ( @arr, @arr1, @arr2 );
2808
2809 if (@arraynames) {
2810 @arr1 = @{ $fa->fetch_all_by_Slice_arrayname( $sl1, @arraynames ) };
2811 @arr2 = @{ $fa->fetch_all_by_Slice_arrayname( $sl2, @arraynames ) };
2812 } else {
2813 @arr1 = @{ $fa->fetch_all_by_Slice($sl1) };
2814 @arr2 = @{ $fa->fetch_all_by_Slice($sl2) };
2815 }
2816 push @arr, @arr1, @arr2;
2817 return \@arr;
2818 ## circular EOF
2819
2820 # if ( @arraynames ) {
2821 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
2822 # } else {
2823 # $features = $fa->fetch_all_by_Slice( $self );
2824 # }
2825 # return $features;
2826 } ## end sub get_all_OligoFeatures
2827
2828 =head2 get_all_OligoFeatures_by_type
2829
2830 Arg [1] : string - type of array (e.g. AFFY or OLIGO)
2831 Arg [2] : (optional) string - logic name
2832 Example : $slice->get_all_OligoFeatures_by_type('OLIGO');
2833 Description: Retrieves all OligoFeature objects which overlap this slice and
2834 were created by probes from the specified type of array.
2835 Returntype : listref of Bio::EnsEMBL::OligoFeature objects
2836 Exceptions : throws if no type
2837 Caller : general
2838 Status : Stable
2839
2840 =cut
2841
2842 sub get_all_OligoFeatures_by_type {
2843 my ( $self, $type, $logic_name ) = @_;
2844
2845 throw('Need type as parameter') if !$type;
2846
2847 my $sa = $self->adaptor();
2848 if ( !$sa ) {
2849 warning("Cannot retrieve features without attached adaptor.");
2850 }
2851 my $fa = $sa->db()->get_OligoFeatureAdaptor();
2852
2853 ## circular BOF
2854 my $sl1 =
2855 Bio::EnsEMBL::CircularSlice->new(
2856 -COORD_SYSTEM => $self->{'coord_system'},
2857 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2858 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2859 -START => $self->{'start'},
2860 -END => $self->{'seq_region_length'},
2861 -STRAND => $self->{'strand'},
2862 -ADAPTOR => $self->adaptor() );
2863
2864 my $sl2 =
2865 Bio::EnsEMBL::CircularSlice->new(
2866 -COORD_SYSTEM => $self->{'coord_system'},
2867 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2868 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2869 -START => 1,
2870 -END => $self->{'end'},
2871 -STRAND => $self->{'strand'},
2872 -ADAPTOR => $self->adaptor() );
2873
2874 my ( @arr, @arr1, @arr2 );
2875 @arr1 = @{ $fa->fetch_all_by_Slice_type( $sl1, $type, $logic_name ) };
2876 @arr2 = @{ $fa->fetch_all_by_Slice_type( $sl2, $type, $logic_name ) };
2877 push @arr, @arr1, @arr2;
2878 return \@arr;
2879 ## circular EOF
2880
2881 #my $features = $fa->fetch_all_by_Slice_type( $self, $type, $logic_name );
2882 #return $features;
2883 } ## end sub get_all_OligoFeatures_by_type
2884
2885 =head2 get_all_MarkerFeatures
2886
2887 Arg [1] : (optional) string logic_name
2888 The logic name of the marker features to retrieve
2889 Arg [2] : (optional) int $priority
2890 Lower (exclusive) priority bound of the markers to retrieve
2891 Arg [3] : (optional) int $map_weight
2892 Upper (exclusive) priority bound of the markers to retrieve
2893 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
2894 Description: Retrieves all markers which lie on this slice fulfilling the
2895 specified map_weight and priority parameters (if supplied).
2896 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2897 Exceptions : none
2898 Caller : contigview, general
2899 Status : Stable
2900
2901 =cut
2902
2903 sub get_all_MarkerFeatures {
2904 my ( $self, $logic_name, $priority, $map_weight ) = @_;
2905
2906 if ( !$self->adaptor() ) {
2907 warning('Cannot retrieve MarkerFeatures without attached adaptor.');
2908 return [];
2909 }
2910
2911 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
2912
2913 ## circular BOF
2914 my $sl1 =
2915 Bio::EnsEMBL::CircularSlice->new(
2916 -COORD_SYSTEM => $self->{'coord_system'},
2917 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2918 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2919 -START => $self->{'start'},
2920 -END => $self->{'seq_region_length'},
2921 -STRAND => $self->{'strand'},
2922 -ADAPTOR => $self->adaptor() );
2923
2924 my $sl2 =
2925 Bio::EnsEMBL::CircularSlice->new(
2926 -COORD_SYSTEM => $self->{'coord_system'},
2927 -SEQ_REGION_NAME => $self->{'seq_region_name'},
2928 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2929 -START => 1,
2930 -END => $self->{'end'},
2931 -STRAND => $self->{'strand'},
2932 -ADAPTOR => $self->adaptor() );
2933
2934 my ( @arr, @arr1, @arr2 );
2935 @arr1 = @{
2936 $ma->fetch_all_by_Slice_and_priority( $sl1, $priority, $map_weight,
2937 $logic_name ) };
2938 @arr2 = @{
2939 $ma->fetch_all_by_Slice_and_priority( $sl2, $priority, $map_weight,
2940 $logic_name ) };
2941 push @arr, @arr1, @arr2;
2942 return \@arr;
2943 ## circular EOF
2944
2945 # my $feats = $ma->fetch_all_by_Slice_and_priority($self, $priority, $map_weight, $logic_name);
2946 # return $feats;
2947 } ## end sub get_all_MarkerFeatures
2948
2949 =head2 get_all_compara_DnaAlignFeatures
2950
2951 Arg [1] : string $qy_species
2952 The name of the species to retrieve similarity features from
2953 Arg [2] : string $qy_assembly
2954 The name of the assembly to retrieve similarity features from
2955 Arg [3] : string $type
2956 The type of the alignment to retrieve similarity features from
2957 Arg [4] : <optional> compara dbadptor to use.
2958 Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus',
2959 'MGSC3',
2960 'WGA');
2961 Description: Retrieves a list of DNA-DNA Alignments to the species specified
2962 by the $qy_species argument.
2963 The compara database must be attached to the core database
2964 for this call to work correctly. As well the compara database
2965 must have the core dbadaptors for both this species, and the
2966 query species added to function correctly.
2967 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
2968 Exceptions : warning if compara database is not available
2969 Caller : contigview
2970 Status : Stable
2971
2972 =cut
2973
2974 sub get_all_compara_DnaAlignFeatures {
2975 my ( $self, $qy_species, $qy_assembly, $alignment_type, $compara_db )
2976 = @_;
2977
2978 if ( !$self->adaptor() ) {
2979 warning(
2980 "Cannot retrieve DnaAlignFeatures without attached adaptor");
2981 return [];
2982 }
2983
2984 unless ( $qy_species && $alignment_type # && $qy_assembly
2985 )
2986 {
2987 throw(
2988 "Query species and assembly and alignmemt type arguments are required"
2989 );
2990 }
2991
2992 if ( !defined($compara_db) ) {
2993 $compara_db =
2994 Bio::EnsEMBL::Registry->get_DBAdaptor( "compara", "compara" );
2995 }
2996 unless ($compara_db) {
2997 warning(
2998 "Compara database must be attached to core database or passed "
2999 . "as an argument to "
3000 . "retrieve compara information" );
3001 return [];
3002 }
3003
3004 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
3005 ## circular BOF
3006 my $sl1 =
3007 Bio::EnsEMBL::CircularSlice->new(
3008 -COORD_SYSTEM => $self->{'coord_system'},
3009 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3010 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3011 -START => $self->{'start'},
3012 -END => $self->{'seq_region_length'},
3013 -STRAND => $self->{'strand'},
3014 -ADAPTOR => $self->adaptor() );
3015
3016 my $sl2 =
3017 Bio::EnsEMBL::CircularSlice->new(
3018 -COORD_SYSTEM => $self->{'coord_system'},
3019 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3020 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3021 -START => 1,
3022 -END => $self->{'end'},
3023 -STRAND => $self->{'strand'},
3024 -ADAPTOR => $self->adaptor() );
3025
3026 my ( @arr, @arr1, @arr2 );
3027 @arr1 = @{
3028 $dafa->fetch_all_by_Slice( $sl1, $qy_species, $qy_assembly,
3029 $alignment_type ) };
3030 @arr2 = @{
3031 $dafa->fetch_all_by_Slice( $sl2, $qy_species, $qy_assembly,
3032 $alignment_type ) };
3033 push @arr, @arr1, @arr2;
3034 return \@arr;
3035 ## circular EOF
3036 #return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
3037 } ## end sub get_all_compara_DnaAlignFeatures
3038
3039 =head2 get_all_compara_Syntenies
3040
3041 Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus"
3042 Arg [2] : string $method_link_type, default is "SYNTENY"
3043 Arg [3] : <optional> compara dbadaptor to use.
3044 Description: gets all the compara syntenyies for a specfic species
3045 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
3046 Status : Stable
3047
3048 =cut
3049
3050 sub get_all_compara_Syntenies {
3051 my ( $self, $qy_species, $method_link_type, $compara_db ) = @_;
3052
3053 if ( !$self->adaptor() ) {
3054 warning("Cannot retrieve features without attached adaptor");
3055 return [];
3056 }
3057
3058 unless ($qy_species) {
3059 throw("Query species and assembly arguments are required");
3060 }
3061
3062 unless ( defined $method_link_type ) {
3063 $method_link_type = "SYNTENY";
3064 }
3065
3066 if ( !defined($compara_db) ) {
3067 $compara_db =
3068 Bio::EnsEMBL::Registry->get_DBAdaptor( "compara", "compara" );
3069 }
3070 unless ($compara_db) {
3071 warning(
3072 "Compara database must be attached to core database or passed "
3073 . "as an argument to "
3074 . "retrieve compara information" );
3075 return [];
3076 }
3077 my $gdba = $compara_db->get_GenomeDBAdaptor();
3078 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
3079 my $dfa = $compara_db->get_DnaFragAdaptor();
3080 my $sra = $compara_db->get_SyntenyRegionAdaptor();
3081
3082 my $this_gdb =
3083 $gdba->fetch_by_core_DBAdaptor( $self->adaptor()->db() );
3084 my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
3085 my $mlss =
3086 $mlssa->fetch_by_method_link_type_GenomeDBs( $method_link_type,
3087 [ $this_gdb, $query_gdb ] );
3088
3089 my $cs = $self->coord_system()->name();
3090 my $sr = $self->seq_region_name();
3091 my ($dnafrag) =
3092 @{ $dfa->fetch_all_by_GenomeDB_region( $this_gdb, $cs, $sr ) };
3093
3094 ## circular BOF
3095 my $sl1 =
3096 Bio::EnsEMBL::CircularSlice->new(
3097 -COORD_SYSTEM => $self->{'coord_system'},
3098 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3099 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3100 -START => $self->{'start'},
3101 -END => $self->{'seq_region_length'},
3102 -STRAND => $self->{'strand'},
3103 -ADAPTOR => $self->adaptor() );
3104
3105 my $sl2 =
3106 Bio::EnsEMBL::CircularSlice->new(
3107 -COORD_SYSTEM => $self->{'coord_system'},
3108 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3109 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3110 -START => 1,
3111 -END => $self->{'end'},
3112 -STRAND => $self->{'strand'},
3113 -ADAPTOR => $self->adaptor() );
3114
3115 my ( @arr, @arr1, @arr2 );
3116 @arr1 = @{
3117 $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dnafrag,
3118 $sl1->start, $sl1->end )
3119 };
3120 @arr2 = @{
3121 $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dnafrag,
3122 $sl2->start, $sl2->end )
3123 };
3124 push @arr, @arr1, @arr2;
3125 return \@arr;
3126 ## circular EOF
3127 #return $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag, $self->start, $self->end);
3128 } ## end sub get_all_compara_Syntenies
3129
3130 =head2 get_all_Haplotypes
3131
3132 Arg [1] : (optional) boolean $lite_flag
3133 if true lightweight haplotype objects are used
3134 Example : @haplotypes = $slice->get_all_Haplotypes;
3135 Description: Retrieves all of the haplotypes on this slice. Only works
3136 if the haplotype adaptor has been attached to the core adaptor
3137 via $dba->add_db_adaptor('haplotype', $hdba);
3138 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
3139 Exceptions : warning is Haplotype database is not available
3140 Caller : contigview, general
3141 Status : Stable
3142
3143 =cut
3144
3145 sub get_all_Haplotypes {
3146 my ( $self, $lite_flag ) = @_;
3147
3148 if ( !$self->adaptor() ) {
3149 warning("Cannot retrieve features without attached adaptor");
3150 return [];
3151 }
3152
3153 my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype');
3154
3155 unless ($haplo_db) {
3156 warning( "Haplotype database must be attached to core database to "
3157 . "retrieve haplotype information" );
3158 return [];
3159 }
3160
3161 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
3162
3163 ## circular BOF
3164 my $sl1 =
3165 Bio::EnsEMBL::CircularSlice->new(
3166 -COORD_SYSTEM => $self->{'coord_system'},
3167 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3168 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3169 -START => $self->{'start'},
3170 -END => $self->{'seq_region_length'},
3171 -STRAND => $self->{'strand'},
3172 -ADAPTOR => $self->adaptor() );
3173
3174 my $sl2 =
3175 Bio::EnsEMBL::CircularSlice->new(
3176 -COORD_SYSTEM => $self->{'coord_system'},
3177 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3178 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3179 -START => 1,
3180 -END => $self->{'end'},
3181 -STRAND => $self->{'strand'},
3182 -ADAPTOR => $self->adaptor() );
3183
3184 my ( @arr, @arr1, @arr2 );
3185 @arr1 = @{ $haplo_adaptor->fetch_all_by_Slice( $sl1, $lite_flag ) };
3186 @arr2 = @{ $haplo_adaptor->fetch_all_by_Slice( $sl2, $lite_flag ) };
3187 push @arr, @arr1, @arr2;
3188 return \@arr;
3189 ## circular EOF
3190
3191 #my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
3192 #return $haplotypes;
3193 } ## end sub get_all_Haplotypes
3194
3195 sub get_all_DASFactories {
3196 my $self = shift;
3197 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
3198 }
3199
3200
3201 sub get_all_DASFeatures_dsn {
3202 my ( $self, $source_type, $dsn ) = @_;
3203
3204 if ( !$self->adaptor() ) {
3205 warning("Cannot retrieve features without attached adaptor");
3206 return [];
3207 }
3208 my @X =
3209 grep { $_->adaptor->dsn eq $dsn }
3210 $self->adaptor()->db()->_each_DASFeatureFactory;
3211
3212 return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
3213 }
3214
3215 sub get_all_DAS_Features {
3216 my ($self) = @_;
3217
3218 $self->{_das_features} ||= {}; # Cache
3219 $self->{_das_styles} ||= {}; # Cache
3220 $self->{_das_segments} ||= {}; # Cache
3221 my %das_features;
3222 my %das_styles;
3223 my %das_segments;
3224 my $slice = $self;
3225
3226 foreach my $dasfact ( @{ $self->get_all_DASFactories } ) {
3227 my $dsn = $dasfact->adaptor->dsn;
3228 my $name = $dasfact->adaptor->name;
3229 # my $type = $dasfact->adaptor->type;
3230 my $url = $dasfact->adaptor->url;
3231
3232 my ($type) = $dasfact->adaptor->mapping;
3233 if ( ref $type eq 'ARRAY' ) {
3234 $type = shift @$type;
3235 }
3236 $type ||= $dasfact->adaptor->type;
3237 # Construct a cache key : SOURCE_URL/TYPE
3238 # Need the type to handle sources that serve multiple types of features
3239
3240 my $key = join( '/', $name, $type );
3241 if ( $self->{_das_features}->{$key} ) { # Use cached
3242 $das_features{$name} = $self->{_das_features}->{$key};
3243 $das_styles{$name} = $self->{_das_styles}->{$key};
3244 $das_segments{$name} = $self->{_das_segments}->{$key};
3245 } else { # Get fresh data
3246 my ( $featref, $styleref, $segref ) =
3247 $dasfact->fetch_all_Features( $slice, $type );
3248 $self->{_das_features}->{$key} = $featref;
3249 $self->{_das_styles}->{$key} = $styleref;
3250 $self->{_das_segments}->{$key} = $segref;
3251 $das_features{$name} = $featref;
3252 $das_styles{$name} = $styleref;
3253 $das_segments{$name} = $segref;
3254 }
3255 } ## end foreach my $dasfact ( @{ $self...})
3256
3257 return ( \%das_features, \%das_styles, \%das_segments );
3258 } ## end sub get_all_DAS_Features
3259
3260 =head2 get_all_DASFeatures
3261
3262 Arg [1] : none
3263 Example : $features = $slice->get_all_DASFeatures;
3264 Description: Retreives a hash reference to a hash of DAS feature
3265 sets, keyed by the DNS, NOTE the values of this hash
3266 are an anonymous array containing:
3267 (1) a pointer to an array of features;
3268 (2) a pointer to the DAS stylesheet
3269 Returntype : hashref of Bio::SeqFeatures
3270 Exceptions : ?
3271 Caller : webcode
3272 Status : Stable
3273
3274 =cut
3275 sub get_all_DASFeatures {
3276 my ( $self, $source_type ) = @_;
3277
3278 if ( !$self->adaptor() ) {
3279 warning("Cannot retrieve features without attached adaptor");
3280 return [];
3281 }
3282
3283 my %genomic_features = map {
3284 ( $_->adaptor->dsn =>
3285 [ $_->fetch_all_Features( $self, $source_type ) ] )
3286 } $self->adaptor()->db()->_each_DASFeatureFactory;
3287 return \%genomic_features;
3288
3289 }
3290
3291 sub old_get_all_DASFeatures {
3292 my ( $self, @args ) = @_;
3293
3294 if ( !$self->adaptor() ) {
3295 warning("Cannot retrieve features without attached adaptor");
3296 return [];
3297 }
3298
3299 my %genomic_features =
3300 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
3301 $self->adaptor()->db()->_each_DASFeatureFactory;
3302 return \%genomic_features;
3303
3304 }
3305
3306 =head2 get_all_ExternalFeatures
3307
3308 Arg [1] : (optional) string $track_name
3309 If specified only features from ExternalFeatureAdaptors with
3310 the track name $track_name are retrieved.
3311 If not set, all features from every ExternalFeatureAdaptor are
3312 retrieved.
3313 Example : @x_features = @{$slice->get_all_ExternalFeatures}
3314 Description: Retrieves features on this slice from external feature adaptors
3315 Returntype : listref of Bio::SeqFeatureI implementing objects in slice
3316 coordinates
3317 Exceptions : none
3318 Caller : general
3319 Status : Stable
3320
3321 =cut
3322
3323 sub get_all_ExternalFeatures {
3324 my ( $self, $track_name ) = @_;
3325 if ( !$self->adaptor() ) {
3326 warning("Cannot retrieve features without attached adaptor");
3327 return [];
3328 }
3329 my $features = [];
3330 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
3331 my @xf_adaptors = ();
3332 if ($track_name) {
3333 #use a specific adaptor
3334 if ( exists $xfa_hash->{$track_name} ) {
3335 push @xf_adaptors, $xfa_hash->{$track_name};
3336 }
3337 } else {
3338 #use all of the adaptors
3339 push @xf_adaptors, values %$xfa_hash;
3340 }
3341
3342 ## circular BOF
3343 my $sl1 =
3344 Bio::EnsEMBL::CircularSlice->new(
3345 -COORD_SYSTEM => $self->{'coord_system'},
3346 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3347 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3348 -START => $self->{'start'},
3349 -END => $self->{'seq_region_length'},
3350 -STRAND => $self->{'strand'},
3351 -ADAPTOR => $self->adaptor() );
3352
3353 my $sl2 =
3354 Bio::EnsEMBL::CircularSlice->new(
3355 -COORD_SYSTEM => $self->{'coord_system'},
3356 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3357 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3358 -START => 1,
3359 -END => $self->{'end'},
3360 -STRAND => $self->{'strand'},
3361 -ADAPTOR => $self->adaptor() );
3362
3363 foreach my $xfa (@xf_adaptors) {
3364 push @$features, @{ $xfa->fetch_all_by_Slice($sl1) };
3365 push @$features, @{ $xfa->fetch_all_by_Slice($sl2) };
3366 }
3367 return $features;
3368 ## circular EOF
3369 } ## end sub get_all_ExternalFeatures
3370
3371 =head2 get_all_DitagFeatures
3372
3373 Arg [1] : (optional) string ditag type
3374 Arg [1] : (optional) string logic_name
3375 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
3376 Description: Retrieves the DitagFeatures of a specific type which overlap
3377 this slice with. If type is not defined, all features are
3378 retrieved.
3379 Returntype : listref of Bio::EnsEMBL::DitagFeatures
3380 Exceptions : warning if slice does not have attached adaptor
3381 Caller : general
3382 Status : Stable
3383
3384 =cut
3385
3386 sub get_all_DitagFeatures {
3387 my ( $self, $type, $logic_name ) = @_;
3388
3389 if ( !$self->adaptor() ) {
3390 warning('Cannot get DitagFeatures without attached adaptor');
3391 return [];
3392 }
3393
3394 my $dfa = $self->adaptor->db->get_DitagFeatureAdaptor();
3395
3396 ## circular BOF
3397 my $sl1 =
3398 Bio::EnsEMBL::CircularSlice->new(
3399 -COORD_SYSTEM => $self->{'coord_system'},
3400 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3401 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3402 -START => $self->{'start'},
3403 -END => $self->{'seq_region_length'},
3404 -STRAND => $self->{'strand'},
3405 -ADAPTOR => $self->adaptor() );
3406
3407 my $sl2 =
3408 Bio::EnsEMBL::CircularSlice->new(
3409 -COORD_SYSTEM => $self->{'coord_system'},
3410 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3411 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3412 -START => 1,
3413 -END => $self->{'end'},
3414 -STRAND => $self->{'strand'},
3415 -ADAPTOR => $self->adaptor() );
3416
3417 my ( @arr, @arr1, @arr2 );
3418 @arr1 = @{ $dfa->fetch_all_by_Slice( $sl1, $type, $logic_name ) };
3419 @arr2 = @{ $dfa->fetch_all_by_Slice( $sl2, $type, $logic_name ) };
3420 push @arr, @arr1, @arr2;
3421 return \@arr;
3422 ## circular EOF
3423 #return $dfa->fetch_all_by_Slice($self, $type, $logic_name);
3424 } ## end sub get_all_DitagFeatures
3425
3426 # GENERIC FEATURES (See DBAdaptor.pm)
3427
3428 =head2 get_generic_features
3429
3430 Arg [1] : (optional) List of names of generic feature types to return.
3431 If no feature names are given, all generic features are
3432 returned.
3433 Example : my %features = %{$slice->get_generic_features()};
3434 Description: Gets generic features via the generic feature adaptors that
3435 have been added via DBAdaptor->add_GenricFeatureAdaptor (if
3436 any)
3437 Returntype : Hash of named features.
3438 Exceptions : none
3439 Caller : none
3440 Status : Stable
3441
3442 =cut
3443
3444 sub get_generic_features {
3445
3446 my ( $self, @names ) = @_;
3447
3448 if ( !$self->adaptor() ) {
3449 warning('Cannot retrieve features without attached adaptor');
3450 return [];
3451 }
3452
3453 my $db = $self->adaptor()->db();
3454
3455 my %features = (); # this will hold the results
3456
3457 # get the adaptors for each feature
3458 my %adaptors = %{ $db->get_GenericFeatureAdaptors(@names) };
3459
3460 foreach my $adaptor_name ( keys(%adaptors) ) {
3461
3462 my $adaptor_obj = $adaptors{$adaptor_name};
3463 # get the features and add them to the hash
3464 ## circular BOF
3465 my $sl1 =
3466 Bio::EnsEMBL::CircularSlice->new(
3467 -COORD_SYSTEM => $self->{'coord_system'},
3468 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3469 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3470 -START => $self->{'start'},
3471 -END => $self->{'seq_region_length'},
3472 -STRAND => $self->{'strand'},
3473 -ADAPTOR => $self->adaptor() );
3474
3475 my $sl2 =
3476 Bio::EnsEMBL::CircularSlice->new(
3477 -COORD_SYSTEM => $self->{'coord_system'},
3478 -SEQ_REGION_NAME => $self->{'seq_region_name'},
3479 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
3480 -START => 1,
3481 -END => $self->{'end'},
3482 -STRAND => $self->{'strand'},
3483 -ADAPTOR => $self->adaptor() );
3484
3485 my ( @arr1, @arr2 );
3486 my $features_ref;
3487 @arr1 = @{ $adaptor_obj->fetch_all_by_Slice($sl1) };
3488 @arr2 = @{ $adaptor_obj->fetch_all_by_Slice($sl2) };
3489 push @{$features_ref}, @arr1, @arr2;
3490 ## circular EOF
3491 #my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
3492
3493 # add each feature to the hash to be returned
3494 foreach my $feature (@$features_ref) {
3495 $features{$adaptor_name} = $feature;
3496 }
3497 } ## end foreach my $adaptor_name ( ...)
3498
3499 return \%features;
3500
3501 } ## end sub get_generic_features
3502
3503 =head2 project_to_slice
3504
3505 Arg [1] : Slice to project to.
3506 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
3507 foreach my $segment ( @$chr_projection ){
3508 $chr_slice = $segment->to_Slice();
3509 print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'.
3510 $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start().
3511 '-'.$chr_slice->end().
3512 $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n";
3513 }
3514 Description: Projection of slice to another specific slice. Needed for where we have multiple mappings
3515 and we want to state which one to project to.
3516 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
3517 can also be used as [$start,$end,$slice] triplets.
3518 Exceptions : none
3519 Caller : none
3520 Status : At Risk
3521
3522 =cut
3523
3524 sub project_to_slice {
3525 my $self = shift;
3526 my $to_slice = shift;
3527
3528 throw('Slice argument is required') if ( !$to_slice );
3529
3530 my $slice_adaptor = $self->adaptor();
3531
3532 if ( !$slice_adaptor ) {
3533 warning("Cannot project without attached adaptor.");
3534 return [];
3535 }
3536
3537 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
3538
3539 my $cs = $to_slice->coord_system();
3540 my $slice_cs = $self->coord_system();
3541
3542 my @projection;
3543 my $current_start = 1;
3544
3545 # decompose this slice into its symlinked components.
3546 # this allows us to handle haplotypes and PARs
3547 my $normal_slice_proj =
3548 $slice_adaptor->fetch_normalized_slice_projection($self);
3549 foreach my $segment (@$normal_slice_proj) {
3550 my $normal_slice = $segment->[2];
3551
3552 $slice_cs = $normal_slice->coord_system();
3553
3554 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
3555 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
3556
3557 # perform the mapping between this slice and the requested system
3558 my @coords;
3559
3560 if ( defined $asm_mapper ) {
3561 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
3562 $normal_slice->start(),
3563 $normal_slice->end(),
3564 $normal_slice->strand(),
3565 $slice_cs,
3566 undef,
3567 $to_slice );
3568 } else {
3569 $coords[0] =
3570 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
3571 $normal_slice->end() );
3572 }
3573
3574 #construct a projection from the mapping results and return it
3575 foreach my $coord (@coords) {
3576 my $coord_start = $coord->start();
3577 my $coord_end = $coord->end();
3578 my $length = $coord_end - $coord_start + 1;
3579
3580 #skip gaps
3581 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) {
3582 my $coord_cs = $coord->coord_system();
3583
3584 # If the normalised projection just ended up mapping to the
3585 # same coordinate system we were already in then we should just
3586 # return the original region. This can happen for example, if we
3587 # were on a PAR region on Y which refered to X and a projection to
3588 # 'toplevel' was requested.
3589 # if($coord_cs->equals($slice_cs)) {
3590 # # trim off regions which are not defined
3591 # return $self->_constrain_to_region();
3592 # }
3593
3594 #create slices for the mapped-to coord system
3595 my $slice =
3596 $slice_adaptor->fetch_by_seq_region_id( $coord->id(),
3597 $coord_start, $coord_end, $coord->strand() );
3598
3599 my $current_end = $current_start + $length - 1;
3600
3601 push @projection,
3602 bless( [ $current_start, $current_end, $slice ],
3603 "Bio::EnsEMBL::ProjectionSegment" );
3604 }
3605
3606 $current_start += $length;
3607 } ## end foreach my $coord (@coords)
3608 } ## end foreach my $segment (@$normal_slice_proj)
3609
3610 # delete the cache as we may want to map to different set next time and old
3611 # results will be cached.
3612
3613 $mapper_aptr->delete_cache;
3614
3615 return \@projection;
3616 } ## end sub project_to_slice
3617
3618 #
3619 # Bioperl Bio::PrimarySeqI methods:
3620 #
3621
3622 =head2 id
3623
3624 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3625
3626 =cut
3627
3628 sub id { name(@_); }
3629
3630 =head2 display_id
3631
3632 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3633
3634 =cut
3635
3636 sub display_id { name(@_); }
3637
3638 =head2 primary_id
3639
3640 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3641
3642 =cut
3643
3644 sub primary_id { name(@_); }
3645
3646 =head2 desc
3647
3648 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3649
3650 =cut
3651
3652 sub desc {
3653 return $_[0]->coord_system->name() . ' ' . $_[0]->seq_region_name();
3654 }
3655
3656 =head2 moltype
3657
3658 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3659
3660 =cut
3661
3662 sub moltype { return 'dna'; }
3663
3664 =head2 alphabet
3665
3666 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3667
3668 =cut
3669
3670 sub alphabet { return 'dna'; }
3671
3672 =head2 accession_number
3673
3674 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3675
3676 =cut
3677
3678 sub accession_number { name(@_); }
3679
3680 =head2 is_circular
3681
3682 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3683
3684 =cut
3685
3686 sub is_circular {
3687 my ($self) = @_;
3688
3689 if ( !defined( $self->{'circular'} ) ) {
3690 my @attrs =
3691 grep { $_ } @{ $self->get_all_Attributes('circular_seq') };
3692 $self->{'circular'} = @attrs ? 1 : 0;
3693 }
3694
3695 return $self->{'circular'};
3696 }
3697
3698 # sub DEPRECATED METHODS #
3699 ###############################################################################
3700
3701 =head1 DEPRECATED METHODS
3702
3703 =cut
3704
3705 =head2 get_all_supercontig_Slices
3706
3707 DEPRECATED use get_tiling_path("NTcontig") instead
3708
3709 =cut
3710
3711 sub get_all_supercontig_Slices {
3712 my $self = shift;
3713
3714 deprecate("Use get_tiling_path('NTcontig') instead");
3715
3716 my $result = [];
3717
3718 if ( $self->adaptor() ) {
3719 my $superctg_names =
3720 $self->adaptor()->list_overlapping_supercontigs($self);
3721
3722 for my $name (@$superctg_names) {
3723 my $slice;
3724 $slice = $self->adaptor()->fetch_by_supercontig_name($name);
3725 $slice->name($name);
3726 push( @$result, $slice );
3727 }
3728 } else {
3729 warning(
3730 "Slice needs to be attached to a database to get supercontigs");
3731 }
3732
3733 return $result;
3734 }
3735
3736 =head2 get_Chromosome
3737
3738 DEPRECATED use this instead:
3739 $slice_adp->fetch_by_region('chromosome',$slice->seq_region_name)
3740
3741 =cut
3742
3743 sub get_Chromosome {
3744 my $self = shift @_;
3745
3746 deprecate( "Use SliceAdaptor::fetch_by_region('chromosome',"
3747 . '$slice->seq_region_name) instead' );
3748
3749 my $csa = $self->adaptor->db->get_CoordSystemAdaptor();
3750 my ($top_cs) = @{ $csa->fetch_all() };
3751
3752 return
3753 $self->adaptor->fetch_by_region( $top_cs->name(),
3754 $self->seq_region_name(),
3755 undef, undef, undef, $top_cs->version() );
3756 }
3757
3758 =head2 chr_name
3759
3760 DEPRECATED use seq_region_name() instead
3761
3762 =cut
3763
3764 sub chr_name {
3765 deprecate("Use seq_region_name() instead");
3766 seq_region_name(@_);
3767 }
3768
3769 =head2 chr_start
3770
3771 DEPRECATED use start() instead
3772
3773 =cut
3774
3775 sub chr_start {
3776 deprecate('Use start() instead');
3777 start(@_);
3778 }
3779
3780 =head2 chr_end
3781
3782 DEPRECATED use end() instead
3783
3784 Returntype : int
3785 Exceptions : none
3786 Caller : SliceAdaptor, general
3787
3788 =cut
3789
3790 sub chr_end {
3791 deprecate('Use end() instead');
3792 end(@_);
3793 }
3794
3795 =head2 assembly_type
3796
3797 DEPRECATED use version instead
3798
3799 =cut
3800
3801 sub assembly_type {
3802 my $self = shift;
3803 deprecate('Use $slice->coord_system()->version() instead.');
3804 return $self->coord_system->version();
3805 }
3806
3807 =head2 get_tiling_path
3808
3809 DEPRECATED use project instead
3810
3811 =cut
3812
3813 sub get_tiling_path {
3814 my $self = shift;
3815 deprecate('Use $slice->project("seqlevel") instead.');
3816 return [];
3817 }
3818
3819 =head2 dbID
3820
3821 Description: DEPRECATED use SliceAdaptor::get_seq_region_id instead
3822
3823 =cut
3824
3825 sub dbID {
3826 my $self = shift;
3827 deprecate('Use SliceAdaptor::get_seq_region_id instead.');
3828 if ( !$self->adaptor ) {
3829 warning('Cannot retrieve seq_region_id without attached adaptor.');
3830 return 0;
3831 }
3832 return $self->adaptor->get_seq_region_id($self);
3833 }
3834
3835 =head2 get_all_MapFrags
3836
3837 DEPRECATED use get_all_MiscFeatures instead
3838
3839 =cut
3840
3841 sub get_all_MapFrags {
3842 my $self = shift;
3843 deprecate('Use get_all_MiscFeatures instead');
3844 return $self->get_all_MiscFeatures(@_);
3845 }
3846
3847 =head2 has_MapSet
3848
3849 DEPRECATED use get_all_MiscFeatures instead
3850
3851 =cut
3852
3853 sub has_MapSet {
3854 my ( $self, $mapset_name ) = @_;
3855 deprecate('Use get_all_MiscFeatures instead');
3856 my $mfs = $self->get_all_MiscFeatures($mapset_name);
3857 return ( @$mfs > 0 );
3858 }
3859
3860 1;