0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Slice - Arbitary Slice of a genome
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 $sa = $db->get_SliceAdaptor;
|
|
28
|
|
29 $slice =
|
|
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
|
|
31
|
|
32 # get some attributes of the slice
|
|
33 my $seqname = $slice->seq_region_name();
|
|
34 my $start = $slice->start();
|
|
35 my $end = $slice->end();
|
|
36
|
|
37 # get the sequence from the slice
|
|
38 my $seq = $slice->seq();
|
|
39
|
|
40 # get some features from the slice
|
|
41 foreach $gene ( @{ $slice->get_all_Genes } ) {
|
|
42 # do something with a gene
|
|
43 }
|
|
44
|
|
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
|
|
46 # do something with dna-dna alignments
|
|
47 }
|
|
48
|
|
49 =head1 DESCRIPTION
|
|
50
|
|
51 A Slice object represents a region of a genome. It can be used to retrieve
|
|
52 sequence or features from an area of interest.
|
|
53
|
|
54 =head1 METHODS
|
|
55
|
|
56 =cut
|
|
57
|
|
58 package Bio::EnsEMBL::Slice;
|
|
59 use vars qw(@ISA);
|
|
60 use strict;
|
|
61
|
|
62 use Bio::PrimarySeqI;
|
|
63
|
|
64
|
|
65 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
66 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
|
|
67 use Bio::EnsEMBL::RepeatMaskedSlice;
|
|
68 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
69 use Bio::EnsEMBL::ProjectionSegment;
|
|
70 use Bio::EnsEMBL::Registry;
|
|
71 use Bio::EnsEMBL::Utils::Iterator;
|
|
72 use Bio::EnsEMBL::DBSQL::MergedAdaptor;
|
|
73
|
|
74 use Bio::EnsEMBL::StrainSlice;
|
|
75 #use Bio::EnsEMBL::IndividualSlice;
|
|
76 #use Bio::EnsEMBL::IndividualSliceFactory;
|
|
77 use Bio::EnsEMBL::Mapper::RangeRegistry;
|
|
78 use Bio::EnsEMBL::SeqRegionSynonym;
|
|
79 use Scalar::Util qw(weaken isweak);
|
|
80
|
|
81 # use Data::Dumper;
|
|
82
|
|
83 my $reg = "Bio::EnsEMBL::Registry";
|
|
84
|
|
85 @ISA = qw(Bio::PrimarySeqI);
|
|
86
|
|
87
|
|
88 =head2 new
|
|
89
|
|
90 Arg [...] : List of named arguments
|
|
91 Bio::EnsEMBL::CoordSystem COORD_SYSTEM
|
|
92 string SEQ_REGION_NAME,
|
|
93 int START,
|
|
94 int END,
|
|
95 int SEQ_REGION_LENGTH, (optional)
|
|
96 string SEQ (optional)
|
|
97 int STRAND, (optional, defaults to 1)
|
|
98 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
|
|
99 Example : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs,
|
|
100 -start => 1,
|
|
101 -end => 10000,
|
|
102 -strand => 1,
|
|
103 -seq_region_name => 'X',
|
|
104 -seq_region_length => 12e6,
|
|
105 -adaptor => $slice_adaptor);
|
|
106 Description: Creates a new slice object. A slice represents a region
|
|
107 of sequence in a particular coordinate system. Slices can be
|
|
108 used to retrieve sequence and features from an area of
|
|
109 interest in a genome.
|
|
110
|
|
111 Coordinates start at 1 and are inclusive. Negative
|
|
112 coordinates or coordinates exceeding the length of the
|
|
113 seq_region are permitted. Start must be less than or equal.
|
|
114 to end regardless of the strand.
|
|
115
|
|
116 Slice objects are immutable. Once instantiated their attributes
|
|
117 (with the exception of the adaptor) may not be altered. To
|
|
118 change the attributes a new slice must be created.
|
|
119 Returntype : Bio::EnsEMBL::Slice
|
|
120 Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
|
|
121 Caller : general, Bio::EnsEMBL::SliceAdaptor
|
|
122 Status : Stable
|
|
123
|
|
124 =cut
|
|
125
|
|
126 sub new {
|
|
127 my $caller = shift;
|
|
128
|
|
129 #new can be called as a class or object method
|
|
130 my $class = ref($caller) || $caller;
|
|
131
|
|
132 my ($seq, $coord_system, $seq_region_name, $seq_region_length,
|
|
133 $start, $end, $strand, $adaptor, $empty) =
|
|
134 rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
|
|
135 START END STRAND ADAPTOR EMPTY)], @_);
|
|
136
|
|
137 #empty is only for backwards compatibility
|
|
138 if ($empty) {
|
|
139 deprecate( "Creation of empty slices is no longer needed"
|
|
140 . "and is deprecated" );
|
|
141 my $self = bless( { 'empty' => 1 }, $class );
|
|
142 $self->adaptor($adaptor);
|
|
143 return $self;
|
|
144 }
|
|
145
|
|
146 if ( !defined($seq_region_name) ) {
|
|
147 throw('SEQ_REGION_NAME argument is required');
|
|
148 }
|
|
149 if ( !defined($start) ) { throw('START argument is required') }
|
|
150 if ( !defined($end) ) { throw('END argument is required') }
|
|
151
|
|
152 ## if ( $start > $end + 1 ) {
|
|
153 ## throw('start must be less than or equal to end+1');
|
|
154 ## }
|
|
155
|
|
156 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
|
|
157
|
|
158 if ( $seq_region_length <= 0 ) {
|
|
159 throw('SEQ_REGION_LENGTH must be > 0');
|
|
160 }
|
|
161
|
|
162 if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
|
|
163 throw('SEQ must be the same length as the defined LENGTH not '
|
|
164 . CORE::length($seq)
|
|
165 . ' compared to '
|
|
166 . ( $end - $start + 1 ) );
|
|
167 }
|
|
168
|
|
169 if(defined($coord_system)) {
|
|
170 if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){
|
|
171 throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
|
|
172 }
|
|
173 if($coord_system->is_top_level()) {
|
|
174 throw('Cannot create slice on toplevel CoordSystem.');
|
|
175 }
|
|
176 } else {
|
|
177 warning("Slice without coordinate system");
|
|
178 #warn(stack_trace_dump());
|
|
179 }
|
|
180
|
|
181 $strand ||= 1;
|
|
182
|
|
183 if($strand != 1 && $strand != -1) {
|
|
184 throw('STRAND argument must be -1 or 1');
|
|
185 }
|
|
186
|
|
187 if(defined($adaptor)) {
|
|
188 if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
|
|
189 throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
|
|
190 }
|
|
191 }
|
|
192
|
|
193 my $self = bless {'coord_system' => $coord_system,
|
|
194 'seq' => $seq,
|
|
195 'seq_region_name' => $seq_region_name,
|
|
196 'seq_region_length' => $seq_region_length,
|
|
197 'start' => int($start),
|
|
198 'end' => int($end),
|
|
199 'strand' => $strand}, $class;
|
|
200
|
|
201 $self->adaptor($adaptor);
|
|
202
|
|
203 return $self;
|
|
204
|
|
205 }
|
|
206
|
|
207 =head2 new_fast
|
|
208
|
|
209 Arg [1] : hashref to be blessed
|
|
210 Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
|
|
211 Exceptions : none
|
|
212 Returntype : Bio::EnsEMBL::Slice
|
|
213 Caller : general
|
|
214 Status : Stable
|
|
215
|
|
216 =cut
|
|
217
|
|
218
|
|
219 sub new_fast {
|
|
220 my $class = shift;
|
|
221 my $hashref = shift;
|
|
222 my $self = bless $hashref, $class;
|
|
223 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
|
|
224 return $self;
|
|
225 }
|
|
226
|
|
227 =head2 adaptor
|
|
228
|
|
229 Arg [1] : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor
|
|
230 Example : $adaptor = $slice->adaptor();
|
|
231 Description: Getter/Setter for the slice object adaptor used
|
|
232 by this slice for database interaction.
|
|
233 Returntype : Bio::EnsEMBL::DBSQL::SliceAdaptor
|
|
234 Exceptions : thorws if argument passed is not a SliceAdaptor
|
|
235 Caller : general
|
|
236 Status : Stable
|
|
237
|
|
238 =cut
|
|
239
|
|
240 sub adaptor{
|
|
241 my $self = shift;
|
|
242
|
|
243 if(@_) {
|
|
244 my $ad = shift;
|
|
245 if(defined($ad)) {
|
|
246 if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
|
|
247 throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
|
|
248 }
|
|
249 }
|
|
250 weaken($self->{'adaptor'} = $ad);
|
|
251 }
|
|
252
|
|
253 return $self->{'adaptor'};
|
|
254 }
|
|
255
|
|
256
|
|
257
|
|
258 =head2 seq_region_name
|
|
259
|
|
260 Arg [1] : none
|
|
261 Example : $seq_region = $slice->seq_region_name();
|
|
262 Description: Returns the name of the seq_region that this slice is on. For
|
|
263 example if this slice is in chromosomal coordinates the
|
|
264 seq_region_name might be 'X' or '10'.
|
|
265
|
|
266 This function was formerly named chr_name, but since slices can
|
|
267 now be on coordinate systems other than chromosomal it has been
|
|
268 changed.
|
|
269 Returntype : string
|
|
270 Exceptions : none
|
|
271 Caller : general
|
|
272 Status : Stable
|
|
273
|
|
274 =cut
|
|
275
|
|
276 sub seq_region_name {
|
|
277 my $self = shift;
|
|
278 return $self->{'seq_region_name'};
|
|
279 }
|
|
280
|
|
281
|
|
282
|
|
283 =head2 seq_region_length
|
|
284
|
|
285 Arg [1] : none
|
|
286 Example : $seq_region_length = $slice->seq_region_length();
|
|
287 Description: Returns the length of the entire seq_region that this slice is
|
|
288 on. For example if this slice is on a chromosome this will be
|
|
289 the length (in basepairs) of the entire chromosome.
|
|
290 Returntype : int
|
|
291 Exceptions : none
|
|
292 Caller : general
|
|
293 Status : Stable
|
|
294
|
|
295 =cut
|
|
296
|
|
297 sub seq_region_length {
|
|
298 my $self = shift;
|
|
299 return $self->{'seq_region_length'};
|
|
300 }
|
|
301
|
|
302
|
|
303 =head2 coord_system
|
|
304
|
|
305 Arg [1] : none
|
|
306 Example : print $slice->coord_system->name();
|
|
307 Description: Returns the coordinate system that this slice is on.
|
|
308 Returntype : Bio::EnsEMBL::CoordSystem
|
|
309 Exceptions : none
|
|
310 Caller : general
|
|
311 Status : Stable
|
|
312
|
|
313 =cut
|
|
314
|
|
315 sub coord_system {
|
|
316 my $self = shift;
|
|
317 return $self->{'coord_system'};
|
|
318 }
|
|
319
|
|
320 =head2 coord_system_name
|
|
321
|
|
322 Arg [1] : none
|
|
323 Example : print $slice->coord_system_name()
|
|
324 Description: Convenience method. Gets the name of the coord_system which
|
|
325 this slice is on.
|
|
326 Returns undef if this Slice does not have an attached
|
|
327 CoordSystem.
|
|
328 Returntype: string or undef
|
|
329 Exceptions: none
|
|
330 Caller : general
|
|
331 Status : Stable
|
|
332
|
|
333 =cut
|
|
334
|
|
335 sub coord_system_name {
|
|
336 my $self = shift;
|
|
337 my $csystem = $self->{'coord_system'};
|
|
338 return ($csystem) ? $csystem->name() : undef;
|
|
339 }
|
|
340
|
|
341
|
|
342 =head2 centrepoint
|
|
343
|
|
344 Arg [1] : none
|
|
345 Example : $cp = $slice->centrepoint();
|
|
346 Description: Returns the mid position of this slice relative to the
|
|
347 start of the sequence region that it was created on.
|
|
348 Coordinates are inclusive and start at 1.
|
|
349 Returntype : int
|
|
350 Exceptions : none
|
|
351 Caller : general
|
|
352 Status : Stable
|
|
353
|
|
354 =cut
|
|
355
|
|
356 sub centrepoint {
|
|
357 my $self = shift;
|
|
358 return ($self->{'start'}+$self->{'end'})/2;
|
|
359 }
|
|
360
|
|
361 =head2 start
|
|
362
|
|
363 Arg [1] : none
|
|
364 Example : $start = $slice->start();
|
|
365 Description: Returns the start position of this slice relative to the
|
|
366 start of the sequence region that it was created on.
|
|
367 Coordinates are inclusive and start at 1. Negative coordinates
|
|
368 or coordinates exceeding the length of the sequence region are
|
|
369 permitted. Start is always less than or equal to end
|
|
370 regardless of the orientation of the slice.
|
|
371 Returntype : int
|
|
372 Exceptions : none
|
|
373 Caller : general
|
|
374 Status : Stable
|
|
375
|
|
376 =cut
|
|
377
|
|
378 sub start {
|
|
379 my $self = shift;
|
|
380 return $self->{'start'};
|
|
381 }
|
|
382
|
|
383
|
|
384
|
|
385 =head2 end
|
|
386
|
|
387 Arg [1] : none
|
|
388 Example : $end = $slice->end();
|
|
389 Description: Returns the end position of this slice relative to the
|
|
390 start of the sequence region that it was created on.
|
|
391 Coordinates are inclusive and start at 1. Negative coordinates
|
|
392 or coordinates exceeding the length of the sequence region are
|
|
393 permitted. End is always greater than or equal to start
|
|
394 regardless of the orientation of the slice.
|
|
395 Returntype : int
|
|
396 Exceptions : none
|
|
397 Caller : general
|
|
398 Status : Stable
|
|
399
|
|
400 =cut
|
|
401
|
|
402 sub end {
|
|
403 my $self = shift;
|
|
404 return $self->{'end'};
|
|
405 }
|
|
406
|
|
407
|
|
408
|
|
409 =head2 strand
|
|
410
|
|
411 Arg [1] : none
|
|
412 Example : $strand = $slice->strand();
|
|
413 Description: Returns the orientation of this slice on the seq_region it has
|
|
414 been created on
|
|
415 Returntype : int (either 1 or -1)
|
|
416 Exceptions : none
|
|
417 Caller : general, invert
|
|
418 Status : Stable
|
|
419
|
|
420 =cut
|
|
421
|
|
422 sub strand{
|
|
423 my $self = shift;
|
|
424 return $self->{'strand'};
|
|
425 }
|
|
426
|
|
427
|
|
428
|
|
429
|
|
430
|
|
431 =head2 name
|
|
432
|
|
433 Arg [1] : none
|
|
434 Example : my $results = $cache{$slice->name()};
|
|
435 Description: Returns the name of this slice. The name is formatted as a colon
|
|
436 delimited string with the following attributes:
|
|
437 coord_system:version:seq_region_name:start:end:strand
|
|
438
|
|
439 Slices with the same name are equivalent and thus the name can
|
|
440 act as a hash key.
|
|
441 Returntype : string
|
|
442 Exceptions : none
|
|
443 Caller : general
|
|
444 Status : Stable
|
|
445
|
|
446 =cut
|
|
447
|
|
448 sub name {
|
|
449 my $self = shift;
|
|
450
|
|
451 my $cs = $self->{'coord_system'};
|
|
452
|
|
453 return join(':',
|
|
454 ($cs) ? $cs->name() : '',
|
|
455 ($cs) ? $cs->version() : '',
|
|
456 $self->{'seq_region_name'},
|
|
457 $self->{'start'},
|
|
458 $self->{'end'},
|
|
459 $self->{'strand'});
|
|
460 }
|
|
461
|
|
462
|
|
463
|
|
464 =head2 length
|
|
465
|
|
466 Arg [1] : none
|
|
467 Example : $length = $slice->length();
|
|
468 Description: Returns the length of this slice in basepairs
|
|
469 Returntype : int
|
|
470 Exceptions : none
|
|
471 Caller : general
|
|
472 Status : Stable
|
|
473
|
|
474 =cut
|
|
475
|
|
476 sub length {
|
|
477 my ($self) = @_;
|
|
478
|
|
479 my $length = $self->{'end'} - $self->{'start'} + 1;
|
|
480
|
|
481 if ( $self->{'start'} > $self->{'end'} && $self->is_circular() ) {
|
|
482 $length += $self->{'seq_region_length'};
|
|
483 }
|
|
484
|
|
485 return $length;
|
|
486 }
|
|
487
|
|
488 =head2 is_reference
|
|
489 Arg : none
|
|
490 Example : my $reference = $slice->is_reference()
|
|
491 Description: Returns 1 if slice is a reference slice else 0
|
|
492 Returntype : int
|
|
493 Caller : general
|
|
494 Status : At Risk
|
|
495
|
|
496 =cut
|
|
497
|
|
498 sub is_reference {
|
|
499 my ($self) = @_;
|
|
500
|
|
501 if ( !defined( $self->{'is_reference'} ) ) {
|
|
502 $self->{'is_reference'} =
|
|
503 $self->adaptor()->is_reference( $self->get_seq_region_id() );
|
|
504 }
|
|
505
|
|
506 return $self->{'is_reference'};
|
|
507 }
|
|
508
|
|
509 =head2 is_toplevel
|
|
510 Arg : none
|
|
511 Example : my $top = $slice->is_toplevel()
|
|
512 Description: Returns 1 if slice is a toplevel slice else 0
|
|
513 Returntype : int
|
|
514 Caller : general
|
|
515 Status : At Risk
|
|
516
|
|
517 =cut
|
|
518
|
|
519 sub is_toplevel {
|
|
520 my ($self) = @_;
|
|
521
|
|
522 if ( !defined( $self->{'toplevel'} ) ) {
|
|
523 $self->{'toplevel'} =
|
|
524 $self->adaptor()->is_toplevel( $self->get_seq_region_id() );
|
|
525 }
|
|
526
|
|
527 return $self->{'toplevel'};
|
|
528 }
|
|
529
|
|
530 =head2 is_circular
|
|
531 Arg : none
|
|
532 Example : my $circ = $slice->is_circular()
|
|
533 Description: Returns 1 if slice is a circular slice else 0
|
|
534 Returntype : int
|
|
535 Caller : general
|
|
536 Status : Stable
|
|
537
|
|
538 =cut
|
|
539
|
|
540 sub is_circular {
|
|
541 my ($self) = @_;
|
|
542 my $adaptor = $self->adaptor();
|
|
543 return 0 if ! defined $adaptor;
|
|
544 if (! exists $self->{'circular'}) {
|
|
545 my $id = $adaptor->get_seq_region_id($self);
|
|
546 $self->{circular} = $adaptor->is_circular($id);
|
|
547 }
|
|
548 return $self->{circular};
|
|
549 }
|
|
550
|
|
551 =head2 invert
|
|
552
|
|
553 Arg [1] : none
|
|
554 Example : $inverted_slice = $slice->invert;
|
|
555 Description: Creates a copy of this slice on the opposite strand and
|
|
556 returns it.
|
|
557 Returntype : Bio::EnsEMBL::Slice
|
|
558 Exceptions : none
|
|
559 Caller : general
|
|
560 Status : Stable
|
|
561
|
|
562 =cut
|
|
563
|
|
564 sub invert {
|
|
565 my $self = shift;
|
|
566
|
|
567 # make a shallow copy of the slice via a hash copy and flip the strand
|
|
568 my %s = %$self;
|
|
569 $s{'strand'} = $self->{'strand'} * -1;
|
|
570
|
|
571 # reverse compliment any attached sequence
|
|
572 reverse_comp(\$s{'seq'}) if($s{'seq'});
|
|
573
|
|
574 # bless and return the copy
|
|
575 return bless \%s, ref $self;
|
|
576 }
|
|
577
|
|
578
|
|
579
|
|
580 =head2 seq
|
|
581
|
|
582 Arg [1] : none
|
|
583 Example : print "SEQUENCE = ", $slice->seq();
|
|
584 Description: Returns the sequence of the region represented by this
|
|
585 slice formatted as a string.
|
|
586 Returntype : string
|
|
587 Exceptions : none
|
|
588 Caller : general
|
|
589 Status : Stable
|
|
590
|
|
591 =cut
|
|
592
|
|
593 sub seq {
|
|
594 my $self = shift;
|
|
595
|
|
596 # special case for in-between (insert) coordinates
|
|
597 return '' if($self->start() == $self->end() + 1);
|
|
598
|
|
599 return $self->{'seq'} if($self->{'seq'});
|
|
600
|
|
601 if($self->adaptor()) {
|
|
602 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
|
|
603 return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)};
|
|
604 }
|
|
605
|
|
606 # no attached sequence, and no db, so just return Ns
|
|
607 return 'N' x $self->length();
|
|
608 }
|
|
609
|
|
610
|
|
611
|
|
612 =head2 subseq
|
|
613
|
|
614 Arg [1] : int $startBasePair
|
|
615 relative to start of slice, which is 1.
|
|
616 Arg [2] : int $endBasePair
|
|
617 relative to start of slice.
|
|
618 Arg [3] : (optional) int $strand
|
|
619 The strand of the slice to obtain sequence from. Default
|
|
620 value is 1.
|
|
621 Description: returns string of dna sequence
|
|
622 Returntype : txt
|
|
623 Exceptions : end should be at least as big as start
|
|
624 strand must be set
|
|
625 Caller : general
|
|
626 Status : Stable
|
|
627
|
|
628 =cut
|
|
629
|
|
630 sub subseq {
|
|
631 my ( $self, $start, $end, $strand ) = @_;
|
|
632
|
|
633 if ( $end+1 < $start ) {
|
|
634 throw("End coord + 1 is less than start coord");
|
|
635 }
|
|
636
|
|
637 # handle 'between' case for insertions
|
|
638 return '' if( $start == $end + 1);
|
|
639
|
|
640 $strand = 1 unless(defined $strand);
|
|
641
|
|
642 if ( $strand != -1 && $strand != 1 ) {
|
|
643 throw("Invalid strand [$strand] in call to Slice::subseq.");
|
|
644 }
|
|
645 my $subseq;
|
|
646 if($self->adaptor){
|
|
647 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
|
|
648 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
|
|
649 ( $self, $start,
|
|
650 $end, $strand )};
|
|
651 } else {
|
|
652 ## check for gap at the beginning and pad it with Ns
|
|
653 if ($start < 1) {
|
|
654 $subseq = "N" x (1 - $start);
|
|
655 $start = 1;
|
|
656 }
|
|
657 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
|
|
658 ## check for gap at the end and pad it with Ns
|
|
659 if ($end > $self->length()) {
|
|
660 $subseq .= "N" x ($end - $self->length());
|
|
661 }
|
|
662 reverse_comp(\$subseq) if($strand == -1);
|
|
663 }
|
|
664 return $subseq;
|
|
665 }
|
|
666
|
|
667 =head2 sub_Slice_Iterator
|
|
668
|
|
669 Arg[1] : int The chunk size to request
|
|
670 Example : my $i = $slice->sub_Slice_Iterator(60000);
|
|
671 while($i->has_next()) { warn $i->next()->name(); }
|
|
672 Description : Returns an iterator which batches subslices of this Slice
|
|
673 in the requested chunk size
|
|
674 Returntype : Bio::EnsEMBL::Utils::Iterator next() will return the next
|
|
675 chunk of Slice
|
|
676 Exceptions : None
|
|
677
|
|
678 =cut
|
|
679
|
|
680 sub sub_Slice_Iterator {
|
|
681 my ($self, $chunk_size) = @_;
|
|
682 throw "Need a chunk size to divide the slice by" if ! $chunk_size;
|
|
683 my $here = 1;
|
|
684 my $end = $self->length();
|
|
685 my $iterator_sub = sub {
|
|
686 while($here <= $end) {
|
|
687 my $there = $here + $chunk_size - 1;
|
|
688 $there = $end if($there > $end);
|
|
689 my $slice = $self->sub_Slice($here, $there);
|
|
690 $here = $there + 1;
|
|
691 return $slice;
|
|
692 }
|
|
693 return;
|
|
694 };
|
|
695 return Bio::EnsEMBL::Utils::Iterator->new($iterator_sub);
|
|
696 }
|
|
697
|
|
698 =head2 assembly_exception_type
|
|
699
|
|
700 Example : $self->assembly_exception_type();
|
|
701 Description : Returns the type of slice this is. If it is reference then you
|
|
702 will get 'REF' back. Otherwise you will get the first
|
|
703 element from C<get_all_AssemblyExceptionFeatures()>. If no
|
|
704 assembly exception exists you will get an empty string back.
|
|
705 Returntype : String
|
|
706 Exceptions : None
|
|
707 Caller : Public
|
|
708 Status : Beta
|
|
709
|
|
710 =cut
|
|
711
|
|
712 sub assembly_exception_type {
|
|
713 my ($self) = @_;
|
|
714 my $type = q{};
|
|
715 if($self->is_reference()) {
|
|
716 $type = 'REF';
|
|
717 }
|
|
718 else {
|
|
719 my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures();
|
|
720 if(@{$assembly_exceptions}) {
|
|
721 $type = $assembly_exceptions->[0]->type();
|
|
722 }
|
|
723 }
|
|
724 return $type;
|
|
725 }
|
|
726
|
|
727 =head2 is_chromosome
|
|
728
|
|
729 Example : print ($slice->is_chromosome()) ? 'I am a chromosome' : 'Not one';
|
|
730 Description : Uses a number of rules known to indicate a chromosome region
|
|
731 other and takes into account those regions which can be
|
|
732 placed on a Chromsome coordinate system but in fact are not
|
|
733 assembled into one.
|
|
734 Returntype : Boolean indicates if the current object is a chromosome
|
|
735 Exceptions : None
|
|
736
|
|
737 =cut
|
|
738
|
|
739 sub is_chromosome {
|
|
740 my ($self) = @_;
|
|
741 my $coord_system = $self->coord_system->name;
|
|
742 my $seq_name = $self->seq_region_name;
|
|
743
|
|
744 if (($seq_name =~ /random
|
|
745 |^Un\d{4}$
|
|
746 |^Un\.\d{3}\.\d*$
|
|
747 |E\d\d\w*$
|
|
748 |_NT_
|
|
749 |scaffold_
|
|
750 |cutchr
|
|
751 |unplaced
|
|
752 |chunk
|
|
753 |clone
|
|
754 |contig
|
|
755 |genescaffold
|
|
756 |group
|
|
757 |reftig
|
|
758 |supercontig
|
|
759 |ultracontig
|
|
760 /x) or ( $coord_system !~ /^chromosome$/i )) {
|
|
761 return 0;
|
|
762 }
|
|
763
|
|
764 return 1;
|
|
765 }
|
|
766
|
|
767
|
|
768 =head2 get_base_count
|
|
769
|
|
770 Arg [1] : none
|
|
771 Example : $c_count = $slice->get_base_count->{'c'};
|
|
772 Description: Retrieves a hashref containing the counts of each bases in the
|
|
773 sequence spanned by this slice. The format of the hash is :
|
|
774 { 'a' => num,
|
|
775 'c' => num,
|
|
776 't' => num,
|
|
777 'g' => num,
|
|
778 'n' => num,
|
|
779 '%gc' => num }
|
|
780
|
|
781 All bases which are not in the set [A,a,C,c,T,t,G,g] are
|
|
782 included in the 'n' count. The 'n' count could therefore be
|
|
783 inclusive of ambiguity codes such as 'y'.
|
|
784 The %gc is the ratio of GC to AT content as in:
|
|
785 total(GC)/total(ACTG) * 100
|
|
786 This function is conservative in its memory usage and scales to
|
|
787 work for entire chromosomes.
|
|
788 Returntype : hashref
|
|
789 Exceptions : none
|
|
790 Caller : general
|
|
791 Status : Stable
|
|
792
|
|
793 =cut
|
|
794
|
|
795 sub get_base_count {
|
|
796 my $self = shift;
|
|
797
|
|
798 my $a = 0;
|
|
799 my $c = 0;
|
|
800 my $t = 0;
|
|
801 my $g = 0;
|
|
802
|
|
803 my $start = 1;
|
|
804 my $end;
|
|
805
|
|
806 my $RANGE = 100_000;
|
|
807 my $len = $self->length();
|
|
808
|
|
809 my $seq;
|
|
810
|
|
811 while ( $start <= $len ) {
|
|
812 $end = $start + $RANGE - 1;
|
|
813 $end = $len if ( $end > $len );
|
|
814
|
|
815 $seq = $self->subseq( $start, $end );
|
|
816
|
|
817 $a += $seq =~ tr/Aa//;
|
|
818 $c += $seq =~ tr/Cc//;
|
|
819 $t += $seq =~ tr/Tt//;
|
|
820 $g += $seq =~ tr/Gg//;
|
|
821
|
|
822 $start = $end + 1;
|
|
823 }
|
|
824
|
|
825 my $actg = $a + $c + $t + $g;
|
|
826
|
|
827 my $gc_content = 0;
|
|
828 if ( $actg > 0 ) { # Avoid dividing by 0
|
|
829 $gc_content = sprintf( "%1.2f", ( ( $g + $c )/$actg )*100 );
|
|
830 }
|
|
831
|
|
832 return { 'a' => $a,
|
|
833 'c' => $c,
|
|
834 't' => $t,
|
|
835 'g' => $g,
|
|
836 'n' => $len - $actg,
|
|
837 '%gc' => $gc_content };
|
|
838 }
|
|
839
|
|
840
|
|
841
|
|
842 =head2 project
|
|
843
|
|
844 Arg [1] : string $name
|
|
845 The name of the coordinate system to project this slice onto
|
|
846 Arg [2] : string $version
|
|
847 The version of the coordinate system (such as 'NCBI34') to
|
|
848 project this slice onto
|
|
849 Example :
|
|
850 my $clone_projection = $slice->project('clone');
|
|
851
|
|
852 foreach my $seg (@$clone_projection) {
|
|
853 my $clone = $segment->to_Slice();
|
|
854 print $slice->seq_region_name(), ':', $seg->from_start(), '-',
|
|
855 $seg->from_end(), ' -> ',
|
|
856 $clone->seq_region_name(), ':', $clone->start(), '-',$clone->end(),
|
|
857 $clone->strand(), "\n";
|
|
858 }
|
|
859 Description: Returns the results of 'projecting' this slice onto another
|
|
860 coordinate system. Projecting to a coordinate system that
|
|
861 the slice is assembled from is analagous to retrieving a tiling
|
|
862 path. This method may also be used to 'project up' to a higher
|
|
863 level coordinate system, however.
|
|
864
|
|
865 This method returns a listref of triplets [start,end,slice]
|
|
866 which represents the projection. The start and end defined the
|
|
867 region of this slice which is made up of the third value of
|
|
868 the triplet: a slice in the requested coordinate system.
|
|
869 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
|
|
870 can also be used as [$start,$end,$slice] triplets
|
|
871 Exceptions : none
|
|
872 Caller : general
|
|
873 Status : Stable
|
|
874
|
|
875 =cut
|
|
876
|
|
877 sub project {
|
|
878 my $self = shift;
|
|
879 my $cs_name = shift;
|
|
880 my $cs_version = shift;
|
|
881
|
|
882 throw('Coord_system name argument is required') if(!$cs_name);
|
|
883
|
|
884 my $slice_adaptor = $self->adaptor();
|
|
885
|
|
886 if(!$slice_adaptor) {
|
|
887 warning("Cannot project without attached adaptor.");
|
|
888 return [];
|
|
889 }
|
|
890
|
|
891 if(!$self->coord_system()) {
|
|
892 warning("Cannot project without attached coord system.");
|
|
893 return [];
|
|
894 }
|
|
895
|
|
896
|
|
897 my $db = $slice_adaptor->db();
|
|
898 my $csa = $db->get_CoordSystemAdaptor();
|
|
899 my $cs = $csa->fetch_by_name($cs_name, $cs_version);
|
|
900 my $slice_cs = $self->coord_system();
|
|
901
|
|
902 if(!$cs) {
|
|
903 throw("Cannot project to unknown coordinate system " .
|
|
904 "[$cs_name $cs_version]");
|
|
905 }
|
|
906
|
|
907 # no mapping is needed if the requested coord system is the one we are in
|
|
908 # but we do need to check if some of the slice is outside of defined regions
|
|
909 if($slice_cs->equals($cs)) {
|
|
910 return $self->_constrain_to_region();
|
|
911 }
|
|
912
|
|
913 my @projection;
|
|
914 my $current_start = 1;
|
|
915
|
|
916 # decompose this slice into its symlinked components.
|
|
917 # this allows us to handle haplotypes and PARs
|
|
918 my $normal_slice_proj =
|
|
919 $slice_adaptor->fetch_normalized_slice_projection($self);
|
|
920 foreach my $segment (@$normal_slice_proj) {
|
|
921 my $normal_slice = $segment->[2];
|
|
922
|
|
923 $slice_cs = $normal_slice->coord_system();
|
|
924
|
|
925 my $asma = $db->get_AssemblyMapperAdaptor();
|
|
926 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
|
|
927
|
|
928 # perform the mapping between this slice and the requested system
|
|
929 my @coords;
|
|
930
|
|
931 if( defined $asm_mapper ) {
|
|
932 @coords = $asm_mapper->map($normal_slice->seq_region_name(),
|
|
933 $normal_slice->start(),
|
|
934 $normal_slice->end(),
|
|
935 $normal_slice->strand(),
|
|
936 $slice_cs);
|
|
937 } else {
|
|
938 $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
|
|
939 $normal_slice->end());
|
|
940 }
|
|
941
|
|
942
|
|
943 # my $last_rank = 0;
|
|
944 #construct a projection from the mapping results and return it
|
|
945 foreach my $coord (@coords) {
|
|
946 my $coord_start = $coord->start();
|
|
947 my $coord_end = $coord->end();
|
|
948 my $length = $coord_end - $coord_start + 1;
|
|
949
|
|
950 if ( $coord_start > $coord_end ) {
|
|
951 $length =
|
|
952 $normal_slice->seq_region_length() -
|
|
953 $coord_start +
|
|
954 $coord_end + 1;
|
|
955 }
|
|
956
|
|
957 # if( $last_rank != $coord->rank){
|
|
958 # $current_start = 1;
|
|
959 # print "LAST rank has changed to ".$coord->rank."from $last_rank \n";
|
|
960 # }
|
|
961 # $last_rank = $coord->rank;
|
|
962
|
|
963 #skip gaps
|
|
964 if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
|
|
965
|
|
966 my $coord_cs = $coord->coord_system();
|
|
967
|
|
968 # If the normalised projection just ended up mapping to the
|
|
969 # same coordinate system we were already in then we should just
|
|
970 # return the original region. This can happen for example, if we
|
|
971 # were on a PAR region on Y which refered to X and a projection to
|
|
972 # 'toplevel' was requested.
|
|
973 if($coord_cs->equals($slice_cs)) {
|
|
974 # trim off regions which are not defined
|
|
975 return $self->_constrain_to_region();
|
|
976 }
|
|
977 #create slices for the mapped-to coord system
|
|
978 my $slice = $slice_adaptor->fetch_by_seq_region_id(
|
|
979 $coord->id(),
|
|
980 $coord_start,
|
|
981 $coord_end,
|
|
982 $coord->strand());
|
|
983
|
|
984 my $current_end = $current_start + $length - 1;
|
|
985
|
|
986 if ($current_end > $slice->seq_region_length() && $slice->is_circular ) {
|
|
987 $current_end -= $slice->seq_region_length();
|
|
988 }
|
|
989
|
|
990 push @projection, bless([$current_start, $current_end, $slice],
|
|
991 "Bio::EnsEMBL::ProjectionSegment");
|
|
992 }
|
|
993
|
|
994 $current_start += $length;
|
|
995 }
|
|
996 }
|
|
997
|
|
998 return \@projection;
|
|
999 }
|
|
1000
|
|
1001
|
|
1002 sub _constrain_to_region {
|
|
1003 my $self = shift;
|
|
1004
|
|
1005 my $entire_len = $self->seq_region_length();
|
|
1006
|
|
1007 #if the slice has negative coordinates or coordinates exceeding the
|
|
1008 #exceeding length of the sequence region we want to shrink the slice to
|
|
1009 #the defined region
|
|
1010
|
|
1011 if($self->{'start'} > $entire_len || $self->{'end'} < 1) {
|
|
1012 #none of this slice is in a defined region
|
|
1013 return [];
|
|
1014 }
|
|
1015
|
|
1016 my $right_contract = 0;
|
|
1017 my $left_contract = 0;
|
|
1018 if($self->{'end'} > $entire_len) {
|
|
1019 $right_contract = $entire_len - $self->{'end'};
|
|
1020 }
|
|
1021 if($self->{'start'} < 1) {
|
|
1022 $left_contract = $self->{'start'} - 1;
|
|
1023 }
|
|
1024
|
|
1025 my $new_slice;
|
|
1026 if($left_contract || $right_contract) {
|
|
1027 #if slice in negative strand, need to swap contracts
|
|
1028 if ($self->strand == 1) {
|
|
1029 $new_slice = $self->expand($left_contract, $right_contract);
|
|
1030 }
|
|
1031 elsif ($self->strand == -1) {
|
|
1032 $new_slice = $self->expand($right_contract, $left_contract);
|
|
1033 }
|
|
1034 } else {
|
|
1035 $new_slice = $self;
|
|
1036 }
|
|
1037
|
|
1038 return [bless [1-$left_contract, $self->length()+$right_contract,
|
|
1039 $new_slice], "Bio::EnsEMBL::ProjectionSegment" ];
|
|
1040 }
|
|
1041
|
|
1042
|
|
1043 =head2 expand
|
|
1044
|
|
1045 Arg [1] : (optional) int $five_prime_expand
|
|
1046 The number of basepairs to shift this slices five_prime
|
|
1047 coordinate by. Positive values make the slice larger,
|
|
1048 negative make the slice smaller.
|
|
1049 coordinate left.
|
|
1050 Default = 0.
|
|
1051 Arg [2] : (optional) int $three_prime_expand
|
|
1052 The number of basepairs to shift this slices three_prime
|
|
1053 coordinate by. Positive values make the slice larger,
|
|
1054 negative make the slice smaller.
|
|
1055 Default = 0.
|
|
1056 Arg [3] : (optional) bool $force_expand
|
|
1057 if set to 1, then the slice will be contracted even in the case
|
|
1058 when shifts $five_prime_expand and $three_prime_expand overlap.
|
|
1059 In that case $five_prime_expand and $three_prime_expand will be set
|
|
1060 to a maximum possible number and that will result in the slice
|
|
1061 which would have only 2pbs.
|
|
1062 Default = 0.
|
|
1063 Arg [4] : (optional) int* $fpref
|
|
1064 The reference to a number of basepairs to shift this slices five_prime
|
|
1065 coordinate by. Normally it would be set to $five_prime_expand.
|
|
1066 But in case when $five_prime_expand shift can not be applied and
|
|
1067 $force_expand is set to 1, then $$fpref will contain the maximum possible
|
|
1068 shift
|
|
1069 Arg [5] : (optional) int* $tpref
|
|
1070 The reference to a number of basepairs to shift this slices three_prime
|
|
1071 coordinate by. Normally it would be set to $three_prime_expand.
|
|
1072 But in case when $five_prime_expand shift can not be applied and
|
|
1073 $force_expand is set to 1, then $$tpref will contain the maximum possible
|
|
1074 shift
|
|
1075 Example : my $expanded_slice = $slice->expand( 1000, 1000);
|
|
1076 my $contracted_slice = $slice->expand(-1000,-1000);
|
|
1077 my $shifted_right_slice = $slice->expand(-1000, 1000);
|
|
1078 my $shifted_left_slice = $slice->expand( 1000,-1000);
|
|
1079 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
|
|
1080
|
|
1081 Description: Returns a slice which is a resized copy of this slice. The
|
|
1082 start and end are moved outwards from the center of the slice
|
|
1083 if positive values are provided and moved inwards if negative
|
|
1084 values are provided. This slice remains unchanged. A slice
|
|
1085 may not be contracted below 1bp but may grow to be arbitrarily
|
|
1086 large.
|
|
1087 Returntype : Bio::EnsEMBL::Slice
|
|
1088 Exceptions : warning if an attempt is made to contract the slice below 1bp
|
|
1089 Caller : general
|
|
1090 Status : Stable
|
|
1091
|
|
1092 =cut
|
|
1093
|
|
1094 sub expand {
|
|
1095 my $self = shift;
|
|
1096 my $five_prime_shift = shift || 0;
|
|
1097 my $three_prime_shift = shift || 0;
|
|
1098 my $force_expand = shift || 0;
|
|
1099 my $fpref = shift;
|
|
1100 my $tpref = shift;
|
|
1101
|
|
1102 if ( $self->{'seq'} ) {
|
|
1103 warning(
|
|
1104 "Cannot expand a slice which has a manually attached sequence ");
|
|
1105 return undef;
|
|
1106 }
|
|
1107
|
|
1108 my $sshift = $five_prime_shift;
|
|
1109 my $eshift = $three_prime_shift;
|
|
1110
|
|
1111 if ( $self->{'strand'} != 1 ) {
|
|
1112 $eshift = $five_prime_shift;
|
|
1113 $sshift = $three_prime_shift;
|
|
1114 }
|
|
1115
|
|
1116 my $new_start = $self->{'start'} - $sshift;
|
|
1117 my $new_end = $self->{'end'} + $eshift;
|
|
1118
|
|
1119 if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0 || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) {
|
|
1120
|
|
1121 if ( $new_start <= 0 ) {
|
|
1122 $new_start = $self->seq_region_length() + $new_start;
|
|
1123 }
|
|
1124 if ( $new_start > $self->seq_region_length() ) {
|
|
1125 $new_start -= $self->seq_region_length();
|
|
1126 }
|
|
1127
|
|
1128 if ( $new_end <= 0 ) {
|
|
1129 $new_end = $self->seq_region_length() + $new_end;
|
|
1130 }
|
|
1131 if ( $new_end > $self->seq_region_length() ) {
|
|
1132 $new_end -= $self->seq_region_length();
|
|
1133 }
|
|
1134
|
|
1135 }
|
|
1136
|
|
1137 if ( $new_start > $new_end && (not $self->is_circular() ) ) {
|
|
1138
|
|
1139 if ($force_expand) {
|
|
1140 # Apply max possible shift, if force_expand is set
|
|
1141 if ( $sshift < 0 ) {
|
|
1142 # if we are contracting the slice from the start - move the
|
|
1143 # start just before the end
|
|
1144 $new_start = $new_end - 1;
|
|
1145 $sshift = $self->{start} - $new_start;
|
|
1146 }
|
|
1147
|
|
1148 if ( $new_start > $new_end ) {
|
|
1149 # if the slice still has a negative length - try to move the
|
|
1150 # end
|
|
1151 if ( $eshift < 0 ) {
|
|
1152 $new_end = $new_start + 1;
|
|
1153 $eshift = $new_end - $self->{end};
|
|
1154 }
|
|
1155 }
|
|
1156 # return the values by which the primes were actually shifted
|
|
1157 $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
|
|
1158 $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
|
|
1159 }
|
|
1160 if ( $new_start > $new_end ) {
|
|
1161 throw('Slice start cannot be greater than slice end');
|
|
1162 }
|
|
1163 }
|
|
1164
|
|
1165 #fastest way to copy a slice is to do a shallow hash copy
|
|
1166 my %new_slice = %$self;
|
|
1167 $new_slice{'start'} = int($new_start);
|
|
1168 $new_slice{'end'} = int($new_end);
|
|
1169
|
|
1170 return bless \%new_slice, ref($self);
|
|
1171 } ## end sub expand
|
|
1172
|
|
1173
|
|
1174
|
|
1175 =head2 sub_Slice
|
|
1176
|
|
1177 Arg 1 : int $start
|
|
1178 Arg 2 : int $end
|
|
1179 Arge [3] : int $strand
|
|
1180 Example : none
|
|
1181 Description: Makes another Slice that covers only part of this slice
|
|
1182 If a slice is requested which lies outside of the boundaries
|
|
1183 of this function will return undef. This means that
|
|
1184 behaviour will be consistant whether or not the slice is
|
|
1185 attached to the database (i.e. if there is attached sequence
|
|
1186 to the slice). Alternatively the expand() method or the
|
|
1187 SliceAdaptor::fetch_by_region method can be used instead.
|
|
1188 Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
|
|
1189 Exceptions : none
|
|
1190 Caller : general
|
|
1191 Status : Stable
|
|
1192
|
|
1193 =cut
|
|
1194
|
|
1195 sub sub_Slice {
|
|
1196 my ( $self, $start, $end, $strand ) = @_;
|
|
1197
|
|
1198 if( $start < 1 || $start > $self->{'end'} ) {
|
|
1199 # throw( "start argument not valid" );
|
|
1200 return undef;
|
|
1201 }
|
|
1202
|
|
1203 if( $end < $start || $end > $self->{'end'} ) {
|
|
1204 # throw( "end argument not valid" )
|
|
1205 return undef;
|
|
1206 }
|
|
1207
|
|
1208 my ( $new_start, $new_end, $new_strand, $new_seq );
|
|
1209 if( ! defined $strand ) {
|
|
1210 $strand = 1;
|
|
1211 }
|
|
1212
|
|
1213 if( $self->{'strand'} == 1 ) {
|
|
1214 $new_start = $self->{'start'} + $start - 1;
|
|
1215 $new_end = $self->{'start'} + $end - 1;
|
|
1216 $new_strand = $strand;
|
|
1217 } else {
|
|
1218 $new_start = $self->{'end'} - $end + 1;;
|
|
1219 $new_end = $self->{'end'} - $start + 1;
|
|
1220 $new_strand = -$strand;
|
|
1221 }
|
|
1222
|
|
1223 if( defined $self->{'seq'} ) {
|
|
1224 $new_seq = $self->subseq( $start, $end, $strand );
|
|
1225 }
|
|
1226
|
|
1227 #fastest way to copy a slice is to do a shallow hash copy
|
|
1228 my $new_slice = {%{$self}};
|
|
1229 $new_slice->{'start'} = int($new_start);
|
|
1230 $new_slice->{'end'} = int($new_end);
|
|
1231 $new_slice->{'strand'} = $new_strand;
|
|
1232 if( $new_seq ) {
|
|
1233 $new_slice->{'seq'} = $new_seq;
|
|
1234 }
|
|
1235 weaken($new_slice->{adaptor});
|
|
1236
|
|
1237 return bless $new_slice, ref($self);
|
|
1238 }
|
|
1239
|
|
1240
|
|
1241
|
|
1242 =head2 seq_region_Slice
|
|
1243
|
|
1244 Arg [1] : none
|
|
1245 Example : $slice = $slice->seq_region_Slice();
|
|
1246 Description: Returns a slice which spans the whole seq_region which this slice
|
|
1247 is on. For example if this is a slice which spans a small region
|
|
1248 of chromosome X, this method will return a slice which covers the
|
|
1249 entire chromosome X. The returned slice will always have strand
|
|
1250 of 1 and start of 1. This method cannot be used if the sequence
|
|
1251 of the slice has been set manually.
|
|
1252 Returntype : Bio::EnsEMBL::Slice
|
|
1253 Exceptions : warning if called when sequence of Slice has been set manually.
|
|
1254 Caller : general
|
|
1255 Status : Stable
|
|
1256
|
|
1257 =cut
|
|
1258
|
|
1259 sub seq_region_Slice {
|
|
1260 my $self = shift;
|
|
1261
|
|
1262 if($self->{'seq'}){
|
|
1263 warning("Cannot get a seq_region_Slice of a slice which has manually ".
|
|
1264 "attached sequence ");
|
|
1265 return undef;
|
|
1266 }
|
|
1267
|
|
1268 # quick shallow copy
|
|
1269 my $slice;
|
|
1270 %{$slice} = %{$self};
|
|
1271 bless $slice, ref($self);
|
|
1272 weaken($slice->{adaptor});
|
|
1273
|
|
1274 $slice->{'start'} = 1;
|
|
1275 $slice->{'end'} = $slice->{'seq_region_length'};
|
|
1276 $slice->{'strand'} = 1;
|
|
1277
|
|
1278 return $slice;
|
|
1279 }
|
|
1280
|
|
1281
|
|
1282 =head2 get_seq_region_id
|
|
1283
|
|
1284 Arg [1] : none
|
|
1285 Example : my $seq_region_id = $slice->get_seq_region_id();
|
|
1286 Description: Gets the internal identifier of the seq_region that this slice
|
|
1287 is on. Note that this function will not work correctly if this
|
|
1288 slice does not have an attached adaptor. Also note that it may
|
|
1289 be better to go through the SliceAdaptor::get_seq_region_id
|
|
1290 method if you are working with multiple databases since is
|
|
1291 possible to work with slices from databases with different
|
|
1292 internal seq_region identifiers.
|
|
1293 Returntype : int or undef if slices does not have attached adaptor
|
|
1294 Exceptions : warning if slice is not associated with a SliceAdaptor
|
|
1295 Caller : assembly loading scripts, general
|
|
1296 Status : Stable
|
|
1297
|
|
1298 =cut
|
|
1299
|
|
1300 sub get_seq_region_id {
|
|
1301 my ($self) = @_;
|
|
1302
|
|
1303 if($self->adaptor) {
|
|
1304 return $self->adaptor->get_seq_region_id($self);
|
|
1305 } else {
|
|
1306 warning('Cannot retrieve seq_region_id without attached adaptor.');
|
|
1307 return undef;
|
|
1308 }
|
|
1309 }
|
|
1310
|
|
1311
|
|
1312 =head2 get_all_Attributes
|
|
1313
|
|
1314 Arg [1] : optional string $attrib_code
|
|
1315 The code of the attribute type to retrieve values for.
|
|
1316 Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
|
|
1317 @slice_attributes = @{$slice->get_all_Attributes()};
|
|
1318 Description: Gets a list of Attributes of this slice''s seq_region.
|
|
1319 Optionally just get Attrubutes for given code.
|
|
1320 Returntype : listref Bio::EnsEMBL::Attribute
|
|
1321 Exceptions : warning if slice does not have attached adaptor
|
|
1322 Caller : general
|
|
1323 Status : Stable
|
|
1324
|
|
1325 =cut
|
|
1326
|
|
1327 sub get_all_Attributes {
|
|
1328 my $self = shift;
|
|
1329 my $attrib_code = shift;
|
|
1330
|
|
1331 my $result;
|
|
1332 my @results;
|
|
1333
|
|
1334 if(!$self->adaptor()) {
|
|
1335 warning('Cannot get attributes without an adaptor.');
|
|
1336 return [];
|
|
1337 }
|
|
1338
|
|
1339 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
|
|
1340
|
|
1341 if( defined $attrib_code ) {
|
|
1342 @results = grep { uc($_->code()) eq uc($attrib_code) }
|
|
1343 @{$attribute_adaptor->fetch_all_by_Slice( $self )};
|
|
1344 $result = \@results;
|
|
1345 } else {
|
|
1346 $result = $attribute_adaptor->fetch_all_by_Slice( $self );
|
|
1347 }
|
|
1348
|
|
1349 return $result;
|
|
1350 }
|
|
1351
|
|
1352
|
|
1353 =head2 get_all_PredictionTranscripts
|
|
1354
|
|
1355 Arg [1] : (optional) string $logic_name
|
|
1356 The name of the analysis used to generate the prediction
|
|
1357 transcripts obtained.
|
|
1358 Arg [2] : (optional) boolean $load_exons
|
|
1359 If set to true will force loading of all PredictionExons
|
|
1360 immediately rather than loading them on demand later. This
|
|
1361 is faster if there are a large number of PredictionTranscripts
|
|
1362 and the exons will be used.
|
|
1363 Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
|
|
1364 Description: Retrieves the list of prediction transcripts which overlap
|
|
1365 this slice with logic_name $logic_name. If logic_name is
|
|
1366 not defined then all prediction transcripts are retrieved.
|
|
1367 Returntype : listref of Bio::EnsEMBL::PredictionTranscript
|
|
1368 Exceptions : warning if slice does not have attached adaptor
|
|
1369 Caller : none
|
|
1370 Status : Stable
|
|
1371
|
|
1372 =cut
|
|
1373
|
|
1374 sub get_all_PredictionTranscripts {
|
|
1375 my ($self,$logic_name, $load_exons) = @_;
|
|
1376
|
|
1377 if(!$self->adaptor()) {
|
|
1378 warning('Cannot get PredictionTranscripts without attached adaptor');
|
|
1379 return [];
|
|
1380 }
|
|
1381 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
|
|
1382 return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons);
|
|
1383 }
|
|
1384
|
|
1385
|
|
1386
|
|
1387 =head2 get_all_DnaAlignFeatures
|
|
1388
|
|
1389 Arg [1] : (optional) string $logic_name
|
|
1390 The name of the analysis performed on the dna align features
|
|
1391 to obtain.
|
|
1392 Arg [2] : (optional) float $score
|
|
1393 The mimimum score of the features to retrieve
|
|
1394 Arg [3] : (optional) string $dbtype
|
|
1395 The name of an attached database to retrieve the features from
|
|
1396 instead, e.g. 'otherfeatures'.
|
|
1397 Arg [4] : (optional) float hcoverage
|
|
1398 The minimum hcoverage od the featurs to retrieve
|
|
1399 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
|
|
1400 Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with
|
|
1401 logic name $logic_name and with score above $score. If
|
|
1402 $logic_name is not defined features of all logic names are
|
|
1403 retrieved. If $score is not defined features of all scores are
|
|
1404 retrieved.
|
|
1405 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
|
|
1406 Exceptions : warning if slice does not have attached adaptor
|
|
1407 Caller : general
|
|
1408 Status : Stable
|
|
1409
|
|
1410 =cut
|
|
1411
|
|
1412 sub get_all_DnaAlignFeatures {
|
|
1413 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
|
|
1414
|
|
1415 if(!$self->adaptor()) {
|
|
1416 warning('Cannot get DnaAlignFeatures without attached adaptor');
|
|
1417 return [];
|
|
1418 }
|
|
1419
|
|
1420 my $db;
|
|
1421
|
|
1422 if($dbtype) {
|
|
1423 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1424 if(!$db) {
|
|
1425 warning("Don't have db $dbtype returning empty list\n");
|
|
1426 return [];
|
|
1427 }
|
|
1428 } else {
|
|
1429 $db = $self->adaptor->db;
|
|
1430 }
|
|
1431
|
|
1432 my $dafa = $db->get_DnaAlignFeatureAdaptor();
|
|
1433
|
|
1434 if(defined($score) and defined ($hcoverage)){
|
|
1435 warning "cannot specify score and hcoverage. Using score only";
|
|
1436 }
|
|
1437 if(defined($score)){
|
|
1438 return $dafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
|
|
1439 }
|
|
1440 return $dafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
|
|
1441 }
|
|
1442
|
|
1443
|
|
1444
|
|
1445 =head2 get_all_ProteinAlignFeatures
|
|
1446
|
|
1447 Arg [1] : (optional) string $logic_name
|
|
1448 The name of the analysis performed on the protein align features
|
|
1449 to obtain.
|
|
1450 Arg [2] : (optional) float $score
|
|
1451 The mimimum score of the features to retrieve
|
|
1452 Arg [3] : (optional) string $dbtype
|
|
1453 The name of an attached database to retrieve features from
|
|
1454 instead.
|
|
1455 Arg [4] : (optional) float hcoverage
|
|
1456 The minimum hcoverage od the featurs to retrieve
|
|
1457 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
|
|
1458 Description: Retrieves the DnaPepAlignFeatures which overlap this slice with
|
|
1459 logic name $logic_name and with score above $score. If
|
|
1460 $logic_name is not defined features of all logic names are
|
|
1461 retrieved. If $score is not defined features of all scores are
|
|
1462 retrieved.
|
|
1463 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
|
|
1464 Exceptions : warning if slice does not have attached adaptor
|
|
1465 Caller : general
|
|
1466 Status : Stable
|
|
1467
|
|
1468 =cut
|
|
1469
|
|
1470 sub get_all_ProteinAlignFeatures {
|
|
1471 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
|
|
1472
|
|
1473 if(!$self->adaptor()) {
|
|
1474 warning('Cannot get ProteinAlignFeatures without attached adaptor');
|
|
1475 return [];
|
|
1476 }
|
|
1477
|
|
1478 my $db;
|
|
1479
|
|
1480 if($dbtype) {
|
|
1481 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1482 if(!$db) {
|
|
1483 warning("Don't have db $dbtype returning empty list\n");
|
|
1484 return [];
|
|
1485 }
|
|
1486 } else {
|
|
1487 $db = $self->adaptor->db;
|
|
1488 }
|
|
1489
|
|
1490 my $pafa = $db->get_ProteinAlignFeatureAdaptor();
|
|
1491
|
|
1492 if(defined($score) and defined ($hcoverage)){
|
|
1493 warning "cannot specify score and hcoverage. Using score only";
|
|
1494 }
|
|
1495 if(defined($score)){
|
|
1496 return $pafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
|
|
1497 }
|
|
1498 return $pafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
|
|
1499
|
|
1500 }
|
|
1501
|
|
1502
|
|
1503
|
|
1504 =head2 get_all_SimilarityFeatures
|
|
1505
|
|
1506 Arg [1] : (optional) string $logic_name
|
|
1507 the name of the analysis performed on the features to retrieve
|
|
1508 Arg [2] : (optional) float $score
|
|
1509 the lower bound of the score of the features to be retrieved
|
|
1510 Example : @feats = @{$slice->get_all_SimilarityFeatures};
|
|
1511 Description: Retrieves all dna_align_features and protein_align_features
|
|
1512 with analysis named $logic_name and with score above $score.
|
|
1513 It is probably faster to use get_all_ProteinAlignFeatures or
|
|
1514 get_all_DnaAlignFeatures if a sepcific feature type is desired.
|
|
1515 If $logic_name is not defined features of all logic names are
|
|
1516 retrieved. If $score is not defined features of all scores are
|
|
1517 retrieved.
|
|
1518 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
|
|
1519 Exceptions : warning if slice does not have attached adaptor
|
|
1520 Caller : general
|
|
1521 Status : Stable
|
|
1522
|
|
1523 =cut
|
|
1524
|
|
1525 sub get_all_SimilarityFeatures {
|
|
1526 my ($self, $logic_name, $score) = @_;
|
|
1527
|
|
1528 my @out = ();
|
|
1529
|
|
1530 push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) };
|
|
1531 push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) };
|
|
1532
|
|
1533 return \@out;
|
|
1534 }
|
|
1535
|
|
1536
|
|
1537
|
|
1538 =head2 get_all_SimpleFeatures
|
|
1539
|
|
1540 Arg [1] : (optional) string $logic_name
|
|
1541 The name of the analysis performed on the simple features
|
|
1542 to obtain.
|
|
1543 Arg [2] : (optional) float $score
|
|
1544 The mimimum score of the features to retrieve
|
|
1545 Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
|
|
1546 Description: Retrieves the SimpleFeatures which overlap this slice with
|
|
1547 logic name $logic_name and with score above $score. If
|
|
1548 $logic_name is not defined features of all logic names are
|
|
1549 retrieved. If $score is not defined features of all scores are
|
|
1550 retrieved.
|
|
1551 Returntype : listref of Bio::EnsEMBL::SimpleFeatures
|
|
1552 Exceptions : warning if slice does not have attached adaptor
|
|
1553 Caller : general
|
|
1554 Status : Stable
|
|
1555
|
|
1556 =cut
|
|
1557
|
|
1558 sub get_all_SimpleFeatures {
|
|
1559 my ($self, $logic_name, $score, $dbtype) = @_;
|
|
1560
|
|
1561 if(!$self->adaptor()) {
|
|
1562 warning('Cannot get SimpleFeatures without attached adaptor');
|
|
1563 return [];
|
|
1564 }
|
|
1565
|
|
1566 my $db;
|
|
1567 if($dbtype) {
|
|
1568 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1569 if(!$db) {
|
|
1570 warning("Don't have db $dbtype returning empty list\n");
|
|
1571 return [];
|
|
1572 }
|
|
1573 } else {
|
|
1574 $db = $self->adaptor->db;
|
|
1575 }
|
|
1576
|
|
1577 my $sfa = $db->get_SimpleFeatureAdaptor();
|
|
1578
|
|
1579 return $sfa->fetch_all_by_Slice_and_score($self, $score, $logic_name);
|
|
1580 }
|
|
1581
|
|
1582
|
|
1583
|
|
1584 =head2 get_all_RepeatFeatures
|
|
1585
|
|
1586 Arg [1] : (optional) string $logic_name
|
|
1587 The name of the analysis performed on the repeat features
|
|
1588 to obtain.
|
|
1589 Arg [2] : (optional) string/array $repeat_type
|
|
1590 Limits features returned to those of the specified
|
|
1591 repeat_type. Can specify a single value or an array reference
|
|
1592 to limit by more than one
|
|
1593 Arg [3] : (optional) string $db
|
|
1594 Key for database e.g. core/vega/cdna/....
|
|
1595 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'Type II Transposons')};
|
|
1596 Description: Retrieves the RepeatFeatures which overlap with
|
|
1597 logic name $logic_name and with score above $score. If
|
|
1598 $logic_name is not defined features of all logic names are
|
|
1599 retrieved.
|
|
1600 Returntype : listref of Bio::EnsEMBL::RepeatFeatures
|
|
1601 Exceptions : warning if slice does not have attached adaptor
|
|
1602 Caller : general
|
|
1603 Status : Stable
|
|
1604
|
|
1605 =cut
|
|
1606
|
|
1607 sub get_all_RepeatFeatures {
|
|
1608 my ($self, $logic_name, $repeat_type, $dbtype) = @_;
|
|
1609
|
|
1610 if(!$self->adaptor()) {
|
|
1611 warning('Cannot get RepeatFeatures without attached adaptor');
|
|
1612 return [];
|
|
1613 }
|
|
1614
|
|
1615 my $db;
|
|
1616 if($dbtype) {
|
|
1617 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1618 if(!$db) {
|
|
1619 warning("Don't have db $dbtype returning empty list\n");
|
|
1620 return [];
|
|
1621 }
|
|
1622 } else {
|
|
1623 $db = $self->adaptor->db;
|
|
1624 }
|
|
1625
|
|
1626 my $rpfa = $db->get_RepeatFeatureAdaptor();
|
|
1627
|
|
1628 return $rpfa->fetch_all_by_Slice($self, $logic_name, $repeat_type);
|
|
1629 }
|
|
1630
|
|
1631 =head2 get_all_LD_values
|
|
1632
|
|
1633 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
|
|
1634 Description : returns all LD values on this slice. This function will only work correctly if the variation
|
|
1635 database has been attached to the core database. If the argument is passed, will return the LD information
|
|
1636 in that population
|
|
1637 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
|
|
1638 Exceptions : none
|
|
1639 Caller : contigview, snpview
|
|
1640 Status : Stable
|
|
1641
|
|
1642 =cut
|
|
1643
|
|
1644 sub get_all_LD_values{
|
|
1645 my $self = shift;
|
|
1646 my $population = shift;
|
|
1647
|
|
1648
|
|
1649 if(!$self->adaptor()) {
|
|
1650 warning('Cannot get LDFeatureContainer without attached adaptor');
|
|
1651 return [];
|
|
1652 }
|
|
1653
|
|
1654 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
1655
|
|
1656 unless($variation_db) {
|
|
1657 warning("Variation database must be attached to core database to " .
|
|
1658 "retrieve variation information" );
|
|
1659 return [];
|
|
1660 }
|
|
1661
|
|
1662 my $ld_adaptor = $variation_db->get_LDFeatureContainerAdaptor;
|
|
1663
|
|
1664 if( $ld_adaptor ) {
|
|
1665 return $ld_adaptor->fetch_by_Slice($self,$population);
|
|
1666 } else {
|
|
1667 return [];
|
|
1668
|
|
1669 }
|
|
1670
|
|
1671 # my $ld_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(-species => $self->adaptor()->db()->species, -type => "LDFeatureContainer");
|
|
1672
|
|
1673 # if( $ld_adaptor ) {
|
|
1674 # my $ld_values = $ld_adaptor->fetch_by_Slice($self,$population);
|
|
1675 # if (@{$ld_values} > 1){
|
|
1676 # warning("More than 1 variation database attached. Trying to merge LD results");
|
|
1677 # my $ld_value_merged = shift @{$ld_values};
|
|
1678 # #with more than 1 variation database attached, will try to merge in one single LDContainer object.
|
|
1679 # foreach my $ld (@{$ld_values}){
|
|
1680 # #copy the ld values to the result hash
|
|
1681 # foreach my $key (keys %{$ld->{'ldContainer'}}){
|
|
1682 # $ld_value_merged->{'ldContainer'}->{$key} = $ld->{'ldContainer'}->{$key};
|
|
1683 # }
|
|
1684 # #and copy the variationFeatures as well
|
|
1685 # foreach my $key (keys %{$ld->{'variationFeatures'}}){
|
|
1686 # $ld_value_merged->{'variationFeatures'}->{$key} = $ld->{'variationFeatures'}->{$key};
|
|
1687 # }
|
|
1688
|
|
1689 # }
|
|
1690 # return $ld_value_merged;
|
|
1691 # }
|
|
1692 # else{
|
|
1693 # return shift @{$ld_values};
|
|
1694 # }
|
|
1695 # } else {
|
|
1696 # warning("Variation database must be attached to core database to " .
|
|
1697 # "retrieve variation information" );
|
|
1698 # return [];
|
|
1699 # }
|
|
1700 }
|
|
1701
|
|
1702 sub _get_VariationFeatureAdaptor {
|
|
1703
|
|
1704 my $self = shift;
|
|
1705
|
|
1706 if(!$self->adaptor()) {
|
|
1707 warning('Cannot get variation features without attached adaptor');
|
|
1708 return undef;
|
|
1709 }
|
|
1710
|
|
1711 my $vf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
|
|
1712 -species => $self->adaptor()->db()->species,
|
|
1713 -type => "VariationFeature"
|
|
1714 );
|
|
1715
|
|
1716 if( $vf_adaptor ) {
|
|
1717 return $vf_adaptor;
|
|
1718 }
|
|
1719 else {
|
|
1720 warning("Variation database must be attached to core database to " .
|
|
1721 "retrieve variation information" );
|
|
1722
|
|
1723 return undef;
|
|
1724 }
|
|
1725 }
|
|
1726
|
|
1727 =head2 get_all_VariationFeatures
|
|
1728 Args : $so_terms [optional] - list of so_terms to limit the fetch to
|
|
1729 Description : Returns all germline variation features on this slice. This function will
|
|
1730 only work correctly if the variation database has been attached to the core
|
|
1731 database.
|
|
1732 If $so_terms is specified, only variation features with a consequence type
|
|
1733 that matches or is an ontological child of any of the supplied terms will
|
|
1734 be returned
|
|
1735 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
1736 Exceptions : none
|
|
1737 Caller : contigview, snpview
|
|
1738 Status : Stable
|
|
1739
|
|
1740 =cut
|
|
1741
|
|
1742 sub get_all_VariationFeatures{
|
|
1743 my $self = shift;
|
|
1744
|
|
1745 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
|
|
1746 return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, @_);
|
|
1747 }
|
|
1748 else {
|
|
1749 return [];
|
|
1750 }
|
|
1751 }
|
|
1752
|
|
1753 =head2 get_all_somatic_VariationFeatures
|
|
1754
|
|
1755 Args : $filter [optional]
|
|
1756 Description : Returns all somatic variation features on this slice. This function will only
|
|
1757 work correctly if the variation database has been attached to the core database.
|
|
1758 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
1759 Exceptions : none
|
|
1760 Status : Stable
|
|
1761
|
|
1762 =cut
|
|
1763
|
|
1764 sub get_all_somatic_VariationFeatures {
|
|
1765 my $self = shift;
|
|
1766
|
|
1767 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
|
|
1768 return $vf_adaptor->fetch_all_somatic_by_Slice($self);
|
|
1769 }
|
|
1770 else{
|
|
1771 return [];
|
|
1772 }
|
|
1773 }
|
|
1774
|
|
1775 =head2 get_all_VariationFeatures_with_annotation
|
|
1776
|
|
1777 Arg [1] : $variation_feature_source [optional]
|
|
1778 Arg [2] : $annotation_source [optional]
|
|
1779 Arg [3] : $annotation_name [optional]
|
|
1780 Description : returns all germline variation features on this slice associated with a phenotype.
|
|
1781 This function will only work correctly if the variation database has been
|
|
1782 attached to the core database.
|
|
1783 If $variation_feature_source is set only variations from that source
|
|
1784 are retrieved.
|
|
1785 If $annotation_source is set only variations whose annotations come from
|
|
1786 $annotation_source will be retrieved.
|
|
1787 If $annotation_name is set only variations with that annotation will be retrieved.
|
|
1788 $annotation_name can be a phenotype's internal dbID.
|
|
1789 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
1790 Exceptions : none
|
|
1791 Caller : contigview, snpview
|
|
1792 Status : Stable
|
|
1793
|
|
1794 =cut
|
|
1795
|
|
1796 sub get_all_VariationFeatures_with_annotation{
|
|
1797 my $self = shift;
|
|
1798 my $source = shift;
|
|
1799 my $p_source = shift;
|
|
1800 my $annotation = shift;
|
|
1801
|
|
1802 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
|
|
1803 return $vf_adaptor->fetch_all_with_annotation_by_Slice($self, $source, $p_source, $annotation);
|
|
1804 }
|
|
1805 else {
|
|
1806 return [];
|
|
1807 }
|
|
1808 }
|
|
1809
|
|
1810 =head2 get_all_somatic_VariationFeatures_with_annotation
|
|
1811
|
|
1812 Arg [1] : $variation_feature_source [optional]
|
|
1813 Arg [2] : $annotation_source [optional]
|
|
1814 Arg [3] : $annotation_name [optional]
|
|
1815 Description : returns all somatic variation features on this slice associated with a phenotype.
|
|
1816 (see get_all_VariationFeatures_with_annotation for further documentation)
|
|
1817 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
1818 Exceptions : none
|
|
1819 Status : Stable
|
|
1820
|
|
1821 =cut
|
|
1822
|
|
1823 sub get_all_somatic_VariationFeatures_with_annotation{
|
|
1824 my $self = shift;
|
|
1825 my $source = shift;
|
|
1826 my $p_source = shift;
|
|
1827 my $annotation = shift;
|
|
1828
|
|
1829 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
|
|
1830 return $vf_adaptor->fetch_all_somatic_with_annotation_by_Slice($self, $source, $p_source, $annotation);
|
|
1831 }
|
|
1832 else {
|
|
1833 return [] unless $vf_adaptor;
|
|
1834 }
|
|
1835 }
|
|
1836
|
|
1837 =head2 get_all_VariationFeatures_by_VariationSet
|
|
1838
|
|
1839 Arg [1] : Bio::EnsEMBL:Variation::VariationSet $set
|
|
1840 Description :returns all variation features on this slice associated with a given set.
|
|
1841 This function will only work correctly if the variation database has been
|
|
1842 attached to the core database.
|
|
1843 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
1844 Exceptions : none
|
|
1845 Caller : contigview, snpview
|
|
1846 Status : Stable
|
|
1847
|
|
1848 =cut
|
|
1849
|
|
1850 sub get_all_VariationFeatures_by_VariationSet {
|
|
1851 my $self = shift;
|
|
1852 my $set = shift;
|
|
1853
|
|
1854 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
|
|
1855 return $vf_adaptor->fetch_all_by_Slice_VariationSet($self, $set);
|
|
1856 }
|
|
1857 else {
|
|
1858 return [];
|
|
1859 }
|
|
1860 }
|
|
1861
|
|
1862 =head2 get_all_StructuralVariations
|
|
1863
|
|
1864 Description: DEPRECATED. Use get_all_StructuralVariationFeatures instead
|
|
1865
|
|
1866 =cut
|
|
1867
|
|
1868 sub get_all_StructuralVariations{
|
|
1869 my $self = shift;
|
|
1870 my $source = shift;
|
|
1871 my $study = shift;
|
|
1872 my $sv_class = shift;
|
|
1873
|
|
1874 deprecate('Use get_all_StructuralVariationFeatures() instead.');
|
|
1875
|
|
1876 return $self->get_all_StructuralVariationFeatures($source,$sv_class);
|
|
1877 }
|
|
1878
|
|
1879
|
|
1880 =head2 get_all_CopyNumberVariantProbes
|
|
1881
|
|
1882 Description: DEPRECATED. Use get_all_CopyNumberVariantProbeFeatures instead
|
|
1883
|
|
1884 =cut
|
|
1885
|
|
1886 sub get_all_CopyNumberVariantProbes {
|
|
1887 my $self = shift;
|
|
1888 my $source = shift;
|
|
1889 my $study = shift;
|
|
1890
|
|
1891 deprecate('Use get_all_CopyNumberVariantProbeFeatures() instead.');
|
|
1892
|
|
1893 return $self->get_all_CopyNumberVariantProbeFeatures($source);
|
|
1894 }
|
|
1895
|
|
1896
|
|
1897 sub _get_StructuralVariationFeatureAdaptor {
|
|
1898
|
|
1899 my $self = shift;
|
|
1900
|
|
1901 if(!$self->adaptor()) {
|
|
1902 warning('Cannot get structural variation features without attached adaptor');
|
|
1903 return undef;
|
|
1904 }
|
|
1905
|
|
1906 my $svf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
|
|
1907 -species => $self->adaptor()->db()->species,
|
|
1908 -type => "StructuralVariationFeature"
|
|
1909 );
|
|
1910
|
|
1911 if( $svf_adaptor ) {
|
|
1912 return $svf_adaptor;
|
|
1913 }
|
|
1914 else {
|
|
1915 warning("Variation database must be attached to core database to " .
|
|
1916 "retrieve variation information" );
|
|
1917
|
|
1918 return undef;
|
|
1919 }
|
|
1920 }
|
|
1921
|
|
1922
|
|
1923 =head2 get_all_StructuralVariationFeatures
|
|
1924
|
|
1925 Arg[1] : string $source [optional]
|
|
1926 Arg[2] : int $include_evidence [optional]
|
|
1927 Arg[3] : string $sv_class (SO term) [optional]
|
|
1928 Description : returns all structural variation features on this slice. This function will only work
|
|
1929 correctly if the variation database has been attached to the core database.
|
|
1930 If $source is set, only structural variation features with that source name will be
|
|
1931 returned. By default, it only returns structural variant features which are not labelled
|
|
1932 as "CNV_PROBE".
|
|
1933 If $include_evidence is set (i.e. $include_evidence=1), structural variation features from
|
|
1934 both structural variation (SV) and their supporting structural variations (SSV) will be
|
|
1935 returned. By default, it only returns features from structural variations (SV).
|
|
1936 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
|
|
1937 Exceptions : none
|
|
1938 Caller : contigview, snpview, structural_variation_features
|
|
1939 Status : Stable
|
|
1940
|
|
1941 =cut
|
|
1942
|
|
1943 sub get_all_StructuralVariationFeatures {
|
|
1944 my $self = shift;
|
|
1945 my $source = shift;
|
|
1946 my $include_evidence = shift;
|
|
1947 my $somatic = shift;
|
|
1948 my $sv_class = shift;
|
|
1949
|
|
1950 my $operator = '';
|
|
1951
|
|
1952 if (!defined($sv_class)) {
|
|
1953 $sv_class = 'SO:0000051'; # CNV_PROBE
|
|
1954 $operator = '!'; # All but CNV_PROBE
|
|
1955 }
|
|
1956
|
|
1957 $somatic = (!defined($somatic) || !$somatic) ? 0 : 1;
|
|
1958
|
|
1959 my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor;
|
|
1960
|
|
1961 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
1962
|
|
1963 # Get the attrib_id
|
|
1964 my $at_adaptor = $variation_db->get_AttributeAdaptor;
|
|
1965 my $SO_term = $at_adaptor->SO_term_for_SO_accession($sv_class);
|
|
1966 my $attrib_id = $at_adaptor->attrib_id_for_type_value('SO_term',$SO_term);
|
|
1967
|
|
1968 if (!$attrib_id) {
|
|
1969 warning("The Sequence Ontology accession number is not found in the database");
|
|
1970 return [];
|
|
1971 }
|
|
1972
|
|
1973 # Get the structural variations features
|
|
1974 if( $svf_adaptor ) {
|
|
1975
|
|
1976 my $constraint = qq{ svf.somatic=$somatic AND svf.class_attrib_id $operator=$attrib_id };
|
|
1977 $constraint .= qq{ AND svf.is_evidence=0 } if (!$include_evidence);
|
|
1978
|
|
1979 if($source) {
|
|
1980 return $svf_adaptor->fetch_all_by_Slice_constraint($self, qq{$constraint AND s.name = '$source'});
|
|
1981 }else {
|
|
1982 return $svf_adaptor->fetch_all_by_Slice_constraint($self, $constraint);
|
|
1983 }
|
|
1984 }
|
|
1985 else {
|
|
1986 warning("Variation database must be attached to core database to " .
|
|
1987 "retrieve variation information" );
|
|
1988 return [];
|
|
1989 }
|
|
1990 }
|
|
1991
|
|
1992
|
|
1993 =head2 get_all_StructuralVariationFeatures_by_VariationSet
|
|
1994
|
|
1995 Arg [1] : Bio::EnsEMBL:Variation::VariationSet $set
|
|
1996 Description :returns all structural variation features on this slice associated with a
|
|
1997 given set.
|
|
1998 This function will only work correctly if the variation database has been
|
|
1999 attached to the core database.
|
|
2000 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
|
|
2001 Exceptions : none
|
|
2002 Caller : contigview, snpview
|
|
2003 Status : Stable
|
|
2004
|
|
2005 =cut
|
|
2006
|
|
2007 sub get_all_StructuralVariationFeatures_by_VariationSet {
|
|
2008 my $self = shift;
|
|
2009 my $set = shift;
|
|
2010
|
|
2011 if (my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor) {
|
|
2012 return $svf_adaptor->fetch_all_by_Slice_VariationSet($self, $set);
|
|
2013 }
|
|
2014 else {
|
|
2015 return [];
|
|
2016 }
|
|
2017 }
|
|
2018
|
|
2019
|
|
2020 =head2 get_all_somatic_StructuralVariationFeatures
|
|
2021
|
|
2022 Arg[1] : string $source [optional]
|
|
2023 Arg[2] : int $include_evidence [optional]
|
|
2024 Arg[3] : string $sv_class (SO term) [optional]
|
|
2025 Description : returns all somatic structural variation features on this slice. This function will only work
|
|
2026 correctly if the variation database has been attached to the core database.
|
|
2027 If $source is set, only somatic structural variation features with that source name will be
|
|
2028 returned. By default, it only returns somatic structural variant features which are not labelled
|
|
2029 as "CNV_PROBE".
|
|
2030 If $include_evidence is set (i.e. $include_evidence=1), structural variation features from
|
|
2031 both structural variation (SV) and their supporting structural variations (SSV) will be
|
|
2032 returned. By default, it only returns features from structural variations (SV).
|
|
2033 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
|
|
2034 Exceptions : none
|
|
2035 Caller : contigview, snpview, structural_variation_features
|
|
2036 Status : Stable
|
|
2037
|
|
2038 =cut
|
|
2039
|
|
2040 sub get_all_somatic_StructuralVariationFeatures {
|
|
2041 my $self = shift;
|
|
2042 my $source = shift;
|
|
2043 my $include_evidence = shift;
|
|
2044 my $sv_class = shift;
|
|
2045
|
|
2046 return $self->get_all_StructuralVariationFeatures($source,$include_evidence,1,$sv_class);
|
|
2047 }
|
|
2048
|
|
2049
|
|
2050 =head2 get_all_CopyNumberVariantProbeFeatures
|
|
2051
|
|
2052 Arg[1] : string $source [optional]
|
|
2053 Description : returns all copy number variant probes on this slice. This function will only work
|
|
2054 correctly if the variation database has been attached to the core database.
|
|
2055 If $source is set, only CNV probes with that source name will be returned.
|
|
2056 If $study is set, only CNV probes of that study will be returned.
|
|
2057 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature
|
|
2058 Exceptions : none
|
|
2059 Caller : contigview, snpview, structural_variation_feature
|
|
2060 Status : At Risk
|
|
2061
|
|
2062 =cut
|
|
2063
|
|
2064 sub get_all_CopyNumberVariantProbeFeatures {
|
|
2065 my $self = shift;
|
|
2066 my $source = shift;
|
|
2067
|
|
2068 return $self->get_all_StructuralVariationFeatures($source,0,0,'SO:0000051');
|
|
2069 }
|
|
2070
|
|
2071
|
|
2072 =head2 get_all_VariationFeatures_by_Population
|
|
2073
|
|
2074 Arg [1] : Bio::EnsEMBL::Variation::Population
|
|
2075 Arg [2] : $minimum_frequency (optional)
|
|
2076 Example : $pop = $pop_adaptor->fetch_by_dbID(659);
|
|
2077 @vfs = @{$slice->get_all_VariationFeatures_by_Population(
|
|
2078 $pop,$slice)};
|
|
2079 Description: Retrieves all variation features in a slice which are stored for
|
|
2080 a specified population. If $minimum_frequency is supplied, only
|
|
2081 variations with a minor allele frequency (MAF) greater than
|
|
2082 $minimum_frequency will be returned.
|
|
2083 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
2084 Exceptions : throw on incorrect argument
|
|
2085 Caller : general
|
|
2086 Status : At Risk
|
|
2087
|
|
2088 =cut
|
|
2089
|
|
2090 sub get_all_VariationFeatures_by_Population {
|
|
2091 my $self = shift;
|
|
2092
|
|
2093 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
|
|
2094 return $vf_adaptor->fetch_all_by_Slice_Population($self, @_);
|
|
2095 }
|
|
2096 else {
|
|
2097 return [];
|
|
2098 }
|
|
2099 }
|
|
2100
|
|
2101
|
|
2102
|
|
2103
|
|
2104 =head2 get_all_IndividualSlice
|
|
2105
|
|
2106 Args : none
|
|
2107 Example : my $individualSlice = $slice->get_by_Population($population);
|
|
2108 Description : Gets the specific Slice for all the individuls in the population
|
|
2109 ReturnType : listref of Bio::EnsEMB::IndividualSlice
|
|
2110 Exceptions : none
|
|
2111 Caller : general
|
|
2112
|
|
2113 =cut
|
|
2114
|
|
2115 sub get_all_IndividualSlice{
|
|
2116 my $self = shift;
|
|
2117
|
|
2118 my $individualSliceFactory = Bio::EnsEMBL::IndividualSliceFactory->new(
|
|
2119 -START => $self->{'start'},
|
|
2120 -END => $self->{'end'},
|
|
2121 -STRAND => $self->{'strand'},
|
|
2122 -ADAPTOR => $self->adaptor(),
|
|
2123 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2124 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2125 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2126 );
|
|
2127 return $individualSliceFactory->get_all_IndividualSlice();
|
|
2128 }
|
|
2129
|
|
2130 =head2 get_by_Individual
|
|
2131
|
|
2132 Arg[1] : Bio::EnsEMBL::Variation::Individual $individual
|
|
2133 Example : my $individualSlice = $slice->get_by_Individual($individual);
|
|
2134 Description : Gets the specific Slice for the individual
|
|
2135 ReturnType : Bio::EnsEMB::IndividualSlice
|
|
2136 Exceptions : none
|
|
2137 Caller : general
|
|
2138
|
|
2139 =cut
|
|
2140
|
|
2141 sub get_by_Individual{
|
|
2142 my $self = shift;
|
|
2143 my $individual = shift;
|
|
2144
|
|
2145 return Bio::EnsEMBL::IndividualSlice->new(
|
|
2146 -START => $self->{'start'},
|
|
2147 -END => $self->{'end'},
|
|
2148 -STRAND => $self->{'strand'},
|
|
2149 -ADAPTOR => $self->adaptor(),
|
|
2150 # -SEQ => $self->{'seq'},
|
|
2151 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2152 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2153 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2154 -INDIVIDUAL => $individual);
|
|
2155
|
|
2156 }
|
|
2157
|
|
2158
|
|
2159
|
|
2160 =head2 get_by_strain
|
|
2161
|
|
2162 Arg[1] : string $strain
|
|
2163 Example : my $strainSlice = $slice->get_by_strain($strain);
|
|
2164 Description : Gets the specific Slice for the strain
|
|
2165 ReturnType : Bio::EnsEMB::StrainSlice
|
|
2166 Exceptions : none
|
|
2167 Caller : general
|
|
2168
|
|
2169 =cut
|
|
2170
|
|
2171 sub get_by_strain{
|
|
2172 my $self = shift;
|
|
2173 my $strain_name = shift;
|
|
2174
|
|
2175 return Bio::EnsEMBL::StrainSlice->new(
|
|
2176 -START => $self->{'start'},
|
|
2177 -END => $self->{'end'},
|
|
2178 -STRAND => $self->{'strand'},
|
|
2179 -ADAPTOR => $self->adaptor(),
|
|
2180 -SEQ => $self->{'seq'},
|
|
2181 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2182 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2183 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2184 -STRAIN_NAME => $strain_name);
|
|
2185
|
|
2186 }
|
|
2187
|
|
2188 sub calculate_theta{
|
|
2189 my $self = shift;
|
|
2190 my $strains = shift;
|
|
2191 my $feature = shift; #optional parameter. Name of the feature in the Slice you want to calculate
|
|
2192
|
|
2193 if(!$self->adaptor()) {
|
|
2194 warning('Cannot get variation features without attached adaptor');
|
|
2195 return 0;
|
|
2196 }
|
|
2197 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
2198
|
|
2199 unless($variation_db) {
|
|
2200 warning("Variation database must be attached to core database to " .
|
|
2201 "retrieve variation information" );
|
|
2202 return 0;
|
|
2203 }
|
|
2204
|
|
2205 #need to get coverage regions for the slice in the different strains
|
|
2206 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
|
|
2207 my $strain;
|
|
2208 my $differences = [];
|
|
2209 my $slices = [];
|
|
2210 if ($coverage_adaptor){
|
|
2211 my $num_strains = scalar(@{$strains}) +1;
|
|
2212 if (!defined $feature){
|
|
2213 #we want to calculate for the whole slice
|
|
2214 push @{$slices}, $self; #add the slice as the slice to calculate the theta value
|
|
2215 }
|
|
2216 else{
|
|
2217 #we have features, get the slices for the different features
|
|
2218 my $features = $self->get_all_Exons();
|
|
2219 map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
|
|
2220 }
|
|
2221 my $length_regions = 0;
|
|
2222 my $snps = 0;
|
|
2223 my $theta = 0;
|
|
2224 my $last_position = 0;
|
|
2225 #get all the differences in the slice coordinates
|
|
2226 foreach my $strain_name (@{$strains}){
|
|
2227 my $strain = $self->get_by_strain($strain_name); #get the strainSlice for the strain
|
|
2228
|
|
2229 my $results = $strain->get_all_differences_Slice;
|
|
2230 push @{$differences}, @{$results} if (defined $results);
|
|
2231 }
|
|
2232 #when we finish, we have, in max_level, the regions covered by all the sample
|
|
2233 #sort the differences by the genomic position
|
|
2234 my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
|
|
2235 foreach my $slice (@{$slices}){
|
|
2236 my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
|
|
2237 if (defined $regions_covered){
|
|
2238 foreach my $range (@{$regions_covered}){
|
|
2239 $length_regions += ($range->[1] - $range->[0]) + 1; #add the length of the genomic region
|
|
2240 for (my $i = $last_position;$i<@differences_sorted;$i++){
|
|
2241 if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
|
|
2242 $snps++; #count differences in the region
|
|
2243 }
|
|
2244 elsif ($differences_sorted[$i]->end > $range->[1]){
|
|
2245 $last_position = $i;
|
|
2246 last;
|
|
2247 }
|
|
2248 }
|
|
2249 }
|
|
2250 #when all the ranges have been iterated, calculate rho
|
|
2251 #this is an intermediate variable called a in the formula
|
|
2252 # a = sum i=2..strains 1/i-1
|
|
2253 }
|
|
2254 }
|
|
2255 my $a = _calculate_a($num_strains);
|
|
2256 $theta = $snps / ($a * $length_regions);
|
|
2257 return $theta;
|
|
2258 }
|
|
2259 else{
|
|
2260 return 0;
|
|
2261 }
|
|
2262 }
|
|
2263
|
|
2264
|
|
2265
|
|
2266
|
|
2267 sub _calculate_a{
|
|
2268 my $max_level = shift;
|
|
2269
|
|
2270 my $a = 0;
|
|
2271 for (my $i = 2; $i <= $max_level+1;$i++){
|
|
2272 $a += 1/($i-1);
|
|
2273 }
|
|
2274 return $a;
|
|
2275 }
|
|
2276
|
|
2277 sub calculate_pi{
|
|
2278 my $self = shift;
|
|
2279 my $strains = shift;
|
|
2280 my $feature = shift;
|
|
2281
|
|
2282 if(!$self->adaptor()) {
|
|
2283 warning('Cannot get variation features without attached adaptor');
|
|
2284 return 0;
|
|
2285 }
|
|
2286 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
2287
|
|
2288 unless($variation_db) {
|
|
2289 warning("Variation database must be attached to core database to " .
|
|
2290 "retrieve variation information" );
|
|
2291 return 0;
|
|
2292 }
|
|
2293
|
|
2294 #need to get coverage regions for the slice in the different strains
|
|
2295 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
|
|
2296 my $differences = [];
|
|
2297 my $slices = [];
|
|
2298 if ($coverage_adaptor){
|
|
2299 my $num_strains = scalar(@{$strains}) +1;
|
|
2300 if (!defined $feature){
|
|
2301 #we want to calculate for the whole slice
|
|
2302 push @{$slices}, $self; #add the slice as the slice to calculate the theta value
|
|
2303 }
|
|
2304 else{
|
|
2305 #we have features, get the slices for the different features
|
|
2306 my $features = $self->get_all_Exons();
|
|
2307 map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
|
|
2308 }
|
|
2309 my @range_differences = ();
|
|
2310 my $pi = 0;
|
|
2311 my $regions = 0;
|
|
2312 my $last_position = 0; #last position visited in the sorted list of differences
|
|
2313 my $triallelic = 0;
|
|
2314 my $is_triallelic = 0;
|
|
2315 foreach my $slice (@{$slices}){
|
|
2316 foreach my $strain_name (@{$strains}){
|
|
2317 my $strain = $slice->get_by_strain($strain_name); #get the strainSlice for the strain
|
|
2318 my $results = $strain->get_all_differences_Slice;
|
|
2319 push @{$differences}, @{$results} if (defined $results);
|
|
2320 }
|
|
2321 my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
|
|
2322
|
|
2323 my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
|
|
2324 #when we finish, we have, in max_level, the regions covered by all the sample
|
|
2325 #sort the differences
|
|
2326 if (defined $regions_covered){
|
|
2327 foreach my $range (@{$regions_covered}){
|
|
2328 for (my $i = $last_position;$i<@differences_sorted;$i++){
|
|
2329 if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
|
|
2330 #check wether it is the same region or different
|
|
2331 if (!defined $range_differences[0] || ($differences_sorted[$i]->start == $range_differences[0]->start)){
|
|
2332 if (defined $range_differences[0] && ($differences_sorted[$i]->allele_string ne $range_differences[0]->allele_string)){
|
|
2333 $is_triallelic = 1;
|
|
2334 }
|
|
2335 push @range_differences, $differences_sorted[$i];
|
|
2336 }
|
|
2337 else{
|
|
2338 #new site, calc pi for the previous one
|
|
2339 $pi += 2 * (@range_differences/($num_strains)) * ( 1 - (@range_differences/$num_strains));
|
|
2340 if ($is_triallelic) {
|
|
2341 $triallelic++;
|
|
2342 $is_triallelic = 0;
|
|
2343 }
|
|
2344 $regions++;
|
|
2345 @range_differences = ();
|
|
2346 #and start a new range
|
|
2347 push @range_differences, $differences_sorted[$i];
|
|
2348 }
|
|
2349 }
|
|
2350 elsif ($differences_sorted[$i]->end > $range->[1]){
|
|
2351 $last_position = $i;
|
|
2352 last;
|
|
2353 }
|
|
2354 }
|
|
2355 #calculate pi for last site, if any
|
|
2356 if (defined $range_differences[0]){
|
|
2357 $pi += 2 * (@range_differences/$num_strains) * ( 1 - (@range_differences/$num_strains));
|
|
2358 $regions++;
|
|
2359 }
|
|
2360 }
|
|
2361 }
|
|
2362 $pi = $pi / $regions; #calculate average pi
|
|
2363 print "Regions with variations in region $regions and triallelic $triallelic\n\n";
|
|
2364 }
|
|
2365 return $pi;
|
|
2366 }
|
|
2367 else{
|
|
2368 return 0;
|
|
2369 }
|
|
2370
|
|
2371 }
|
|
2372
|
|
2373
|
|
2374
|
|
2375
|
|
2376
|
|
2377 =head2 get_all_genotyped_VariationFeatures
|
|
2378
|
|
2379 Args : none
|
|
2380 Function : returns all variation features on this slice that have been genotyped. This function will only work
|
|
2381 correctly if the variation database has been attached to the core database.
|
|
2382 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
2383 Exceptions : none
|
|
2384 Caller : contigview, snpview, ldview
|
|
2385 Status : At Risk
|
|
2386 : Variation database is under development.
|
|
2387
|
|
2388 =cut
|
|
2389
|
|
2390 sub get_all_genotyped_VariationFeatures{
|
|
2391 my $self = shift;
|
|
2392
|
|
2393 if( my $vf_adaptor = $self->_get_VariationFeatureAdaptor) {
|
|
2394 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
|
|
2395 }
|
|
2396 else {
|
|
2397 return [];
|
|
2398 }
|
|
2399 }
|
|
2400
|
|
2401
|
|
2402 =head2 get_all_SNPs
|
|
2403
|
|
2404 Description: DEPRECATED. Use get_all_VariationFeatures insted
|
|
2405
|
|
2406 =cut
|
|
2407
|
|
2408 sub get_all_SNPs {
|
|
2409 my $self = shift;
|
|
2410
|
|
2411 deprecate('Use get_all_VariationFeatures() instead.');
|
|
2412
|
|
2413 my $snps;
|
|
2414 my $vf = $self->get_all_genotyped_VariationFeatures();
|
|
2415 if( $vf->[0] ) {
|
|
2416 #necessary to convert the VariationFeatures into SNP objects
|
|
2417 foreach my $variation_feature (@{$vf}){
|
|
2418 push @{$snps},$variation_feature->convert_to_SNP();
|
|
2419 }
|
|
2420 return $snps;
|
|
2421 } else {
|
|
2422 return [];
|
|
2423 }
|
|
2424 }
|
|
2425
|
|
2426 =head2 get_all_genotyped_SNPs
|
|
2427
|
|
2428 Description : DEPRECATED. Use get_all_genotyped_VariationFeatures insted
|
|
2429
|
|
2430 =cut
|
|
2431
|
|
2432 sub get_all_genotyped_SNPs {
|
|
2433 my $self = shift;
|
|
2434
|
|
2435 deprecate("Use get_all_genotyped_VariationFeatures instead");
|
|
2436 my $vf = $self->get_all_genotyped_VariationFeatures;
|
|
2437 my $snps;
|
|
2438 if ($vf->[0]){
|
|
2439 foreach my $variation_feature (@{$vf}){
|
|
2440 push @{$snps},$variation_feature->convert_to_SNP();
|
|
2441 }
|
|
2442 return $snps;
|
|
2443 } else {
|
|
2444 return [];
|
|
2445 }
|
|
2446 }
|
|
2447
|
|
2448 sub get_all_SNPs_transcripts {
|
|
2449 my $self = shift;
|
|
2450
|
|
2451 deprecate("DEPRECATED");
|
|
2452
|
|
2453 return [];
|
|
2454
|
|
2455 }
|
|
2456
|
|
2457
|
|
2458
|
|
2459 =head2 get_all_Genes
|
|
2460
|
|
2461 Arg [1] : (optional) string $logic_name
|
|
2462 The name of the analysis used to generate the genes to retrieve
|
|
2463 Arg [2] : (optional) string $dbtype
|
|
2464 The dbtype of genes to obtain. This assumes that the db has
|
|
2465 been added to the DBAdaptor under this name (using the
|
|
2466 DBConnection::add_db_adaptor method).
|
|
2467 Arg [3] : (optional) boolean $load_transcripts
|
|
2468 If set to true, transcripts will be loaded immediately rather
|
|
2469 than being lazy-loaded on request. This will result in a
|
|
2470 significant speed up if the Transcripts and Exons are going to
|
|
2471 be used (but a slow down if they are not).
|
|
2472 Arg [4] : (optional) string $source
|
|
2473 The source of the genes to retrieve.
|
|
2474 Arg [5] : (optional) string $biotype
|
|
2475 The biotype of the genes to retrieve.
|
|
2476 Example : @genes = @{$slice->get_all_Genes};
|
|
2477 Description: Retrieves all genes that overlap this slice.
|
|
2478 Returntype : listref of Bio::EnsEMBL::Genes
|
|
2479 Exceptions : none
|
|
2480 Caller : none
|
|
2481 Status : Stable
|
|
2482
|
|
2483 =cut
|
|
2484
|
|
2485 sub get_all_Genes{
|
|
2486 my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_;
|
|
2487
|
|
2488 if(!$self->adaptor()) {
|
|
2489 warning('Cannot get Genes without attached adaptor');
|
|
2490 return [];
|
|
2491 }
|
|
2492
|
|
2493 my $ga;
|
|
2494 if($dbtype) {
|
|
2495 my $db = $reg->get_db($self->adaptor()->db(), $dbtype);
|
|
2496 if(defined($db)){
|
|
2497 $ga = $reg->get_adaptor( $db->species(), $db->group(), "Gene" );
|
|
2498 }
|
|
2499 else{
|
|
2500 $ga = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Gene" );
|
|
2501 }
|
|
2502 if(!defined $ga) {
|
|
2503 warning( "$dbtype genes not available" );
|
|
2504 return [];
|
|
2505 }
|
|
2506 } else {
|
|
2507 $ga = $self->adaptor->db->get_GeneAdaptor();
|
|
2508 }
|
|
2509
|
|
2510 return $ga->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype);
|
|
2511 }
|
|
2512
|
|
2513 =head2 get_all_Genes_by_type
|
|
2514
|
|
2515 Arg [1] : string $type
|
|
2516 The biotype of genes wanted.
|
|
2517 Arg [2] : (optional) string $logic_name
|
|
2518 Arg [3] : (optional) boolean $load_transcripts
|
|
2519 If set to true, transcripts will be loaded immediately rather
|
|
2520 than being lazy-loaded on request. This will result in a
|
|
2521 significant speed up if the Transcripts and Exons are going to
|
|
2522 be used (but a slow down if they are not).
|
|
2523 Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding',
|
|
2524 'ensembl')};
|
|
2525 Description: Retrieves genes that overlap this slice of biotype $type.
|
|
2526 This is primarily used by the genebuilding code when several
|
|
2527 biotypes of genes are used.
|
|
2528
|
|
2529 The logic name is the analysis of the genes that are retrieved.
|
|
2530 If not provided all genes will be retrieved instead.
|
|
2531
|
|
2532 Returntype : listref of Bio::EnsEMBL::Genes
|
|
2533 Exceptions : none
|
|
2534 Caller : genebuilder, general
|
|
2535 Status : Stable
|
|
2536
|
|
2537 =cut
|
|
2538
|
|
2539 sub get_all_Genes_by_type{
|
|
2540 my ($self, $type, $logic_name, $load_transcripts) = @_;
|
|
2541
|
|
2542 if(!$self->adaptor()) {
|
|
2543 warning('Cannot get Genes without attached adaptor');
|
|
2544 return [];
|
|
2545 }
|
|
2546
|
|
2547 return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type);
|
|
2548 }
|
|
2549
|
|
2550
|
|
2551 =head2 get_all_Genes_by_source
|
|
2552
|
|
2553 Arg [1] : string source
|
|
2554 Arg [2] : (optional) boolean $load_transcripts
|
|
2555 If set to true, transcripts will be loaded immediately rather
|
|
2556 than being lazy-loaded on request. This will result in a
|
|
2557 significant speed up if the Transcripts and Exons are going to
|
|
2558 be used (but a slow down if they are not).
|
|
2559 Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')};
|
|
2560 Description: Retrieves genes that overlap this slice of source $source.
|
|
2561
|
|
2562 Returntype : listref of Bio::EnsEMBL::Genes
|
|
2563 Exceptions : none
|
|
2564 Caller : general
|
|
2565 Status : Stable
|
|
2566
|
|
2567 =cut
|
|
2568
|
|
2569 sub get_all_Genes_by_source {
|
|
2570 my ($self, $source, $load_transcripts) = @_;
|
|
2571
|
|
2572 if(!$self->adaptor()) {
|
|
2573 warning('Cannot get Genes without attached adaptor');
|
|
2574 return [];
|
|
2575 }
|
|
2576
|
|
2577 return $self->get_all_Genes(undef, undef, $load_transcripts, $source);
|
|
2578 }
|
|
2579
|
|
2580 =head2 get_all_Transcripts
|
|
2581
|
|
2582 Arg [1] : (optional) boolean $load_exons
|
|
2583 If set to true exons will not be lazy-loaded but will instead
|
|
2584 be loaded right away. This is faster if the exons are
|
|
2585 actually going to be used right away.
|
|
2586 Arg [2] : (optional) string $logic_name
|
|
2587 the logic name of the type of features to obtain
|
|
2588 Arg [3] : (optional) string $db_type
|
|
2589 Example : @transcripts = @{$slice->get_all_Transcripts)_};
|
|
2590 Description: Gets all transcripts which overlap this slice. If you want to
|
|
2591 specify a particular analysis or type, then you are better off
|
|
2592 using get_all_Genes or get_all_Genes_by_type and iterating
|
|
2593 through the transcripts of each gene.
|
|
2594 Returntype : reference to a list of Bio::EnsEMBL::Transcripts
|
|
2595 Exceptions : none
|
|
2596 Caller : general
|
|
2597 Status : Stable
|
|
2598
|
|
2599 =cut
|
|
2600
|
|
2601 sub get_all_Transcripts {
|
|
2602 my $self = shift;
|
|
2603 my $load_exons = shift;
|
|
2604 my $logic_name = shift;
|
|
2605 my $dbtype = shift;
|
|
2606 if(!$self->adaptor()) {
|
|
2607 warning('Cannot get Transcripts without attached adaptor');
|
|
2608 return [];
|
|
2609 }
|
|
2610
|
|
2611
|
|
2612 my $ta;
|
|
2613 if($dbtype) {
|
|
2614 my $db = $reg->get_db($self->adaptor()->db(), $dbtype);
|
|
2615 if(defined($db)){
|
|
2616 $ta = $reg->get_adaptor( $db->species(), $db->group(), "Transcript" );
|
|
2617 } else{
|
|
2618 $ta = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Transcript" );
|
|
2619 }
|
|
2620 if(!defined $ta) {
|
|
2621 warning( "$dbtype genes not available" );
|
|
2622 return [];
|
|
2623 }
|
|
2624 } else {
|
|
2625 $ta = $self->adaptor->db->get_TranscriptAdaptor();
|
|
2626 }
|
|
2627 return $ta->fetch_all_by_Slice($self, $load_exons, $logic_name);
|
|
2628 }
|
|
2629
|
|
2630
|
|
2631 =head2 get_all_Exons
|
|
2632
|
|
2633 Arg [1] : none
|
|
2634 Example : @exons = @{$slice->get_all_Exons};
|
|
2635 Description: Gets all exons which overlap this slice. Note that these exons
|
|
2636 will not be associated with any transcripts, so this may not
|
|
2637 be terribly useful.
|
|
2638 Returntype : reference to a list of Bio::EnsEMBL::Exons
|
|
2639 Exceptions : none
|
|
2640 Caller : general
|
|
2641 Status : Stable
|
|
2642
|
|
2643 =cut
|
|
2644
|
|
2645 sub get_all_Exons {
|
|
2646 my $self = shift;
|
|
2647
|
|
2648 if(!$self->adaptor()) {
|
|
2649 warning('Cannot get Exons without attached adaptor');
|
|
2650 return [];
|
|
2651 }
|
|
2652
|
|
2653 return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
|
|
2654 }
|
|
2655
|
|
2656
|
|
2657
|
|
2658 =head2 get_all_QtlFeatures
|
|
2659
|
|
2660 Args : none
|
|
2661 Example : none
|
|
2662 Description: returns overlapping QtlFeatures
|
|
2663 Returntype : listref Bio::EnsEMBL::Map::QtlFeature
|
|
2664 Exceptions : none
|
|
2665 Caller : general
|
|
2666 Status : Stable
|
|
2667
|
|
2668 =cut
|
|
2669
|
|
2670 sub get_all_QtlFeatures {
|
|
2671 my $self = shift;
|
|
2672
|
|
2673 if(!$self->adaptor()) {
|
|
2674 warning('Cannot get QtlFeatures without attached adaptor');
|
|
2675 return [];
|
|
2676 }
|
|
2677
|
|
2678 my $qfAdaptor;
|
|
2679 if( $self->adaptor()) {
|
|
2680 $qfAdaptor = $self->adaptor()->db()->get_QtlFeatureAdaptor();
|
|
2681 } else {
|
|
2682 return [];
|
|
2683 }
|
|
2684
|
|
2685 return $qfAdaptor->fetch_all_by_Slice_constraint( $self );
|
|
2686 }
|
|
2687
|
|
2688
|
|
2689
|
|
2690
|
|
2691 =head2 get_all_KaryotypeBands
|
|
2692
|
|
2693 Arg [1] : none
|
|
2694 Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
|
|
2695 Description: Retrieves the karyotype bands which this slice overlaps.
|
|
2696 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
|
|
2697 Exceptions : none
|
|
2698 Caller : general, contigview
|
|
2699 Status : Stable
|
|
2700
|
|
2701 =cut
|
|
2702
|
|
2703 sub get_all_KaryotypeBands {
|
|
2704 my ($self) = @_;
|
|
2705
|
|
2706 if(!$self->adaptor()) {
|
|
2707 warning('Cannot get KaryotypeBands without attached adaptor');
|
|
2708 return [];
|
|
2709 }
|
|
2710
|
|
2711 my $kadp = $self->adaptor->db->get_KaryotypeBandAdaptor();
|
|
2712 return $kadp->fetch_all_by_Slice($self);
|
|
2713 }
|
|
2714
|
|
2715
|
|
2716
|
|
2717
|
|
2718 =head2 get_repeatmasked_seq
|
|
2719
|
|
2720 Arg [1] : listref of strings $logic_names (optional)
|
|
2721 Arg [2] : int $soft_masking_enable (optional)
|
|
2722 Arg [3] : hash reference $not_default_masking_cases (optional, default is {})
|
|
2723 The values are 0 or 1 for hard and soft masking respectively
|
|
2724 The keys of the hash should be of 2 forms
|
|
2725 "repeat_class_" . $repeat_consensus->repeat_class,
|
|
2726 e.g. "repeat_class_SINE/MIR"
|
|
2727 "repeat_name_" . $repeat_consensus->name
|
|
2728 e.g. "repeat_name_MIR"
|
|
2729 depending on which base you want to apply the not default
|
|
2730 masking either the repeat_class or repeat_name. Both can be
|
|
2731 specified in the same hash at the same time, but in that case,
|
|
2732 repeat_name setting has priority over repeat_class. For example,
|
|
2733 you may have hard masking as default, and you may want soft
|
|
2734 masking of all repeat_class SINE/MIR, but repeat_name AluSp
|
|
2735 (which are also from repeat_class SINE/MIR).
|
|
2736 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
|
|
2737 "repeat_name_AluSp" => 0}
|
|
2738 Example : $rm_slice = $slice->get_repeatmasked_seq();
|
|
2739 $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
|
|
2740 Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
|
|
2741 masked sequence instead of the regular sequence.
|
|
2742 Sequence returned by this new slice will have repeat regions
|
|
2743 hardmasked by default (sequence replaced by N) or
|
|
2744 or soft-masked when arg[2] = 1 (sequence in lowercase)
|
|
2745 Will only work with database connection to get repeat features.
|
|
2746 Returntype : Bio::EnsEMBL::RepeatMaskedSlice
|
|
2747 Exceptions : none
|
|
2748 Caller : general
|
|
2749 Status : Stable
|
|
2750
|
|
2751 =cut
|
|
2752
|
|
2753 sub get_repeatmasked_seq {
|
|
2754 my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
|
|
2755
|
|
2756 return Bio::EnsEMBL::RepeatMaskedSlice->new
|
|
2757 (-START => $self->{'start'},
|
|
2758 -END => $self->{'end'},
|
|
2759 -STRAND => $self->{'strand'},
|
|
2760 -ADAPTOR => $self->adaptor(),
|
|
2761 -SEQ => $self->{'seq'},
|
|
2762 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2763 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2764 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2765 -REPEAT_MASK => $logic_names,
|
|
2766 -SOFT_MASK => $soft_mask,
|
|
2767 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
|
|
2768 }
|
|
2769
|
|
2770
|
|
2771
|
|
2772 =head2 _mask_features
|
|
2773
|
|
2774 Arg [1] : reference to a string $dnaref
|
|
2775 Arg [2] : array_ref $repeats
|
|
2776 reference to a list Bio::EnsEMBL::RepeatFeature
|
|
2777 give the list of coordinates to replace with N or with
|
|
2778 lower case
|
|
2779 Arg [3] : int $soft_masking_enable (optional)
|
|
2780 Arg [4] : hash reference $not_default_masking_cases (optional, default is {})
|
|
2781 The values are 0 or 1 for hard and soft masking respectively
|
|
2782 The keys of the hash should be of 2 forms
|
|
2783 "repeat_class_" . $repeat_consensus->repeat_class,
|
|
2784 e.g. "repeat_class_SINE/MIR"
|
|
2785 "repeat_name_" . $repeat_consensus->name
|
|
2786 e.g. "repeat_name_MIR"
|
|
2787 depending on which base you want to apply the not default masking either
|
|
2788 the repeat_class or repeat_name. Both can be specified in the same hash
|
|
2789 at the same time, but in that case, repeat_name setting has priority over
|
|
2790 repeat_class. For example, you may have hard masking as default, and
|
|
2791 you may want soft masking of all repeat_class SINE/MIR,
|
|
2792 but repeat_name AluSp (which are also from repeat_class SINE/MIR).
|
|
2793 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
|
|
2794 "repeat_name_AluSp" => 0}
|
|
2795 Example : none
|
|
2796 Description: replaces string positions described in the RepeatFeatures
|
|
2797 with Ns (default setting), or with the lower case equivalent
|
|
2798 (soft masking). The reference to a dna string which is passed
|
|
2799 is changed in place.
|
|
2800 Returntype : none
|
|
2801 Exceptions : none
|
|
2802 Caller : seq
|
|
2803 Status : Stable
|
|
2804
|
|
2805 =cut
|
|
2806
|
|
2807 sub _mask_features {
|
|
2808 my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
|
|
2809
|
|
2810 $soft_mask = 0 unless (defined $soft_mask);
|
|
2811 $not_default_masking_cases = {} unless (defined $not_default_masking_cases);
|
|
2812
|
|
2813 # explicit CORE::length call, to avoid any confusion with the Slice
|
|
2814 # length method
|
|
2815 my $dnalen = CORE::length($$dnaref);
|
|
2816
|
|
2817 REP:foreach my $old_f (@{$repeats}) {
|
|
2818 my $f = $old_f->transfer( $self );
|
|
2819 my $start = $f->start;
|
|
2820 my $end = $f->end;
|
|
2821 my $length = ($end - $start) + 1;
|
|
2822
|
|
2823 # check if we get repeat completely outside of expected slice range
|
|
2824 if ($end < 1 || $start > $dnalen) {
|
|
2825 # warning("Unexpected: Repeat completely outside slice coordinates.");
|
|
2826 next REP;
|
|
2827 }
|
|
2828
|
|
2829 # repeat partly outside slice range, so correct
|
|
2830 # the repeat start and length to the slice size if needed
|
|
2831 if ($start < 1) {
|
|
2832 $start = 1;
|
|
2833 $length = ($end - $start) + 1;
|
|
2834 }
|
|
2835
|
|
2836 # repeat partly outside slice range, so correct
|
|
2837 # the repeat end and length to the slice size if needed
|
|
2838 if ($end > $dnalen) {
|
|
2839 $end = $dnalen;
|
|
2840 $length = ($end - $start) + 1;
|
|
2841 }
|
|
2842
|
|
2843 $start--;
|
|
2844
|
|
2845 my $padstr;
|
|
2846 # if we decide to define masking on the base of the repeat_type, we'll need
|
|
2847 # to add the following, and the other commented line few lines below.
|
|
2848 # my $rc_type = "repeat_type_" . $f->repeat_consensus->repeat_type;
|
|
2849 my $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
|
|
2850 my $rc_name = "repeat_name_" . $f->repeat_consensus->name;
|
|
2851
|
|
2852 my $masking_type;
|
|
2853 # $masking_type = $not_default_masking_cases->{$rc_type} if (defined $not_default_masking_cases->{$rc_type});
|
|
2854 $masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class});
|
|
2855 $masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name});
|
|
2856
|
|
2857 $masking_type = $soft_mask unless (defined $masking_type);
|
|
2858
|
|
2859 if ($masking_type) {
|
|
2860 $padstr = lc substr ($$dnaref,$start,$length);
|
|
2861 } else {
|
|
2862 $padstr = 'N' x $length;
|
|
2863 }
|
|
2864 substr ($$dnaref,$start,$length) = $padstr;
|
|
2865 }
|
|
2866 }
|
|
2867
|
|
2868
|
|
2869 =head2 get_all_SearchFeatures
|
|
2870
|
|
2871 Arg [1] : scalar $ticket_ids
|
|
2872 Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
|
|
2873 Description: Retreives all search features for stored blast
|
|
2874 results for the ticket that overlap this slice
|
|
2875 Returntype : listref of Bio::EnsEMBL::SeqFeatures
|
|
2876 Exceptions : none
|
|
2877 Caller : general (webby!)
|
|
2878 Status : Stable
|
|
2879
|
|
2880 =cut
|
|
2881
|
|
2882 sub get_all_SearchFeatures {
|
|
2883 my $self = shift;
|
|
2884 my $ticket = shift;
|
|
2885 local $_;
|
|
2886 unless($ticket) {
|
|
2887 throw("ticket argument is required");
|
|
2888 }
|
|
2889
|
|
2890 if(!$self->adaptor()) {
|
|
2891 warning("Cannot get SearchFeatures without an attached adaptor");
|
|
2892 return [];
|
|
2893 }
|
|
2894
|
|
2895 my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
|
|
2896
|
|
2897 my $offset = $self->start-1;
|
|
2898
|
|
2899 my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
|
|
2900
|
|
2901 foreach( @$features ) {
|
|
2902 $_->start( $_->start - $offset );
|
|
2903 $_->end( $_->end - $offset );
|
|
2904 };
|
|
2905 return $features;
|
|
2906
|
|
2907 }
|
|
2908
|
|
2909 =head2 get_all_AssemblyExceptionFeatures
|
|
2910
|
|
2911 Arg [1] : string $set (optional)
|
|
2912 Example : $slice->get_all_AssemblyExceptionFeatures();
|
|
2913 Description: Retreives all misc features which overlap this slice. If
|
|
2914 a set code is provided only features which are members of
|
|
2915 the requested set are returned.
|
|
2916 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
|
|
2917 Exceptions : none
|
|
2918 Caller : general
|
|
2919 Status : Stable
|
|
2920
|
|
2921 =cut
|
|
2922
|
|
2923 sub get_all_AssemblyExceptionFeatures {
|
|
2924 my $self = shift;
|
|
2925 my $misc_set = shift;
|
|
2926
|
|
2927 my $adaptor = $self->adaptor();
|
|
2928
|
|
2929 if(!$adaptor) {
|
|
2930 warning('Cannot retrieve features without attached adaptor.');
|
|
2931 return [];
|
|
2932 }
|
|
2933
|
|
2934 my $aefa = $adaptor->db->get_AssemblyExceptionFeatureAdaptor();
|
|
2935
|
|
2936 return $aefa->fetch_all_by_Slice($self);
|
|
2937 }
|
|
2938
|
|
2939
|
|
2940
|
|
2941 =head2 get_all_MiscFeatures
|
|
2942
|
|
2943 Arg [1] : string $set (optional)
|
|
2944 Arg [2] : string $database (optional)
|
|
2945 Example : $slice->get_all_MiscFeatures('cloneset');
|
|
2946 Description: Retreives all misc features which overlap this slice. If
|
|
2947 a set code is provided only features which are members of
|
|
2948 the requested set are returned.
|
|
2949 Returntype : listref of Bio::EnsEMBL::MiscFeatures
|
|
2950 Exceptions : none
|
|
2951 Caller : general
|
|
2952 Status : Stable
|
|
2953
|
|
2954 =cut
|
|
2955
|
|
2956 sub get_all_MiscFeatures {
|
|
2957 my $self = shift;
|
|
2958 my $misc_set = shift;
|
|
2959 my $dbtype = shift;
|
|
2960 my $msa;
|
|
2961
|
|
2962 my $adaptor = $self->adaptor();
|
|
2963 if(!$adaptor) {
|
|
2964 warning('Cannot retrieve features without attached adaptor.');
|
|
2965 return [];
|
|
2966 }
|
|
2967
|
|
2968 my $mfa;
|
|
2969 if($dbtype) {
|
|
2970 my $db = $reg->get_db($adaptor->db(), $dbtype);
|
|
2971 if(defined($db)){
|
|
2972 $mfa = $reg->get_adaptor( lc($db->species()), $db->group(), "miscfeature" );
|
|
2973 } else{
|
|
2974 $mfa = $reg->get_adaptor( $adaptor->db()->species(), $dbtype, "miscfeature" );
|
|
2975 }
|
|
2976 if(!defined $mfa) {
|
|
2977 warning( "$dbtype misc features not available" );
|
|
2978 return [];
|
|
2979 }
|
|
2980 } else {
|
|
2981 $mfa = $adaptor->db->get_MiscFeatureAdaptor();
|
|
2982 }
|
|
2983
|
|
2984 if($misc_set) {
|
|
2985 return $mfa->fetch_all_by_Slice_and_set_code($self,$misc_set);
|
|
2986 }
|
|
2987
|
|
2988 return $mfa->fetch_all_by_Slice($self);
|
|
2989 }
|
|
2990
|
|
2991 =head2 get_all_MarkerFeatures
|
|
2992
|
|
2993 Arg [1] : (optional) string logic_name
|
|
2994 The logic name of the marker features to retrieve
|
|
2995 Arg [2] : (optional) int $priority
|
|
2996 Lower (exclusive) priority bound of the markers to retrieve
|
|
2997 Arg [3] : (optional) int $map_weight
|
|
2998 Upper (exclusive) priority bound of the markers to retrieve
|
|
2999 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
|
|
3000 Description: Retrieves all markers which lie on this slice fulfilling the
|
|
3001 specified map_weight and priority parameters (if supplied).
|
|
3002 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
|
|
3003 Exceptions : none
|
|
3004 Caller : contigview, general
|
|
3005 Status : Stable
|
|
3006
|
|
3007 =cut
|
|
3008
|
|
3009 sub get_all_MarkerFeatures {
|
|
3010 my ($self, $logic_name, $priority, $map_weight) = @_;
|
|
3011
|
|
3012 if(!$self->adaptor()) {
|
|
3013 warning('Cannot retrieve MarkerFeatures without attached adaptor.');
|
|
3014 return [];
|
|
3015 }
|
|
3016
|
|
3017 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
|
|
3018
|
|
3019 my $feats = $ma->fetch_all_by_Slice_and_priority($self,
|
|
3020 $priority,
|
|
3021 $map_weight,
|
|
3022 $logic_name);
|
|
3023 return $feats;
|
|
3024 }
|
|
3025
|
|
3026
|
|
3027 =head2 get_MarkerFeatures_by_Name
|
|
3028
|
|
3029 Arg [1] : string marker Name
|
|
3030 The name (synonym) of the marker feature(s) to retrieve
|
|
3031 Example : my @markers = @{$slice->get_MarkerFeatures_by_Name('z1705')};
|
|
3032 Description: Retrieves all markers with this ID
|
|
3033 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
|
|
3034 Exceptions : none
|
|
3035 Caller : contigview, general
|
|
3036 Status : Stable
|
|
3037
|
|
3038 =cut
|
|
3039
|
|
3040 sub get_MarkerFeatures_by_Name {
|
|
3041 my ($self, $name) = @_;
|
|
3042
|
|
3043 if(!$self->adaptor()) {
|
|
3044 warning('Cannot retrieve MarkerFeatures without attached adaptor.');
|
|
3045 return [];
|
|
3046 }
|
|
3047
|
|
3048 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
|
|
3049
|
|
3050 my $feats = $ma->fetch_all_by_Slice_and_MarkerName($self, $name);
|
|
3051 return $feats;
|
|
3052 }
|
|
3053
|
|
3054
|
|
3055 =head2 get_all_compara_DnaAlignFeatures
|
|
3056
|
|
3057 Arg [1] : string $qy_species
|
|
3058 The name of the species to retrieve similarity features from
|
|
3059 Arg [2] : string $qy_assembly
|
|
3060 The name of the assembly to retrieve similarity features from
|
|
3061 Arg [3] : string $type
|
|
3062 The type of the alignment to retrieve similarity features from
|
|
3063 Arg [4] : <optional> compara dbadptor to use.
|
|
3064 Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus',
|
|
3065 'MGSC3',
|
|
3066 'WGA');
|
|
3067 Description: Retrieves a list of DNA-DNA Alignments to the species specified
|
|
3068 by the $qy_species argument.
|
|
3069 The compara database must be attached to the core database
|
|
3070 for this call to work correctly. As well the compara database
|
|
3071 must have the core dbadaptors for both this species, and the
|
|
3072 query species added to function correctly.
|
|
3073 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
|
|
3074 Exceptions : warning if compara database is not available
|
|
3075 Caller : contigview
|
|
3076 Status : Stable
|
|
3077
|
|
3078 =cut
|
|
3079
|
|
3080 sub get_all_compara_DnaAlignFeatures {
|
|
3081 my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_;
|
|
3082
|
|
3083 if(!$self->adaptor()) {
|
|
3084 warning("Cannot retrieve DnaAlignFeatures without attached adaptor");
|
|
3085 return [];
|
|
3086 }
|
|
3087
|
|
3088 unless($qy_species && $alignment_type # && $qy_assembly
|
|
3089 ) {
|
|
3090 throw("Query species and assembly and alignmemt type arguments are required");
|
|
3091 }
|
|
3092
|
|
3093 if(!defined($compara_db)){
|
|
3094 $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
|
|
3095 }
|
|
3096 unless($compara_db) {
|
|
3097 warning("Compara database must be attached to core database or passed ".
|
|
3098 "as an argument to " .
|
|
3099 "retrieve compara information");
|
|
3100 return [];
|
|
3101 }
|
|
3102
|
|
3103 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
|
|
3104 return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
|
|
3105 }
|
|
3106
|
|
3107 =head2 get_all_compara_Syntenies
|
|
3108
|
|
3109 Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus"
|
|
3110 Arg [2] : string $method_link_type, default is "SYNTENY"
|
|
3111 Arg [3] : <optional> compara dbadaptor to use.
|
|
3112 Description: gets all the compara syntenyies for a specfic species
|
|
3113 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
|
|
3114 Status : Stable
|
|
3115
|
|
3116 =cut
|
|
3117
|
|
3118 sub get_all_compara_Syntenies {
|
|
3119 my ($self, $qy_species, $method_link_type, $compara_db) = @_;
|
|
3120
|
|
3121 if(!$self->adaptor()) {
|
|
3122 warning("Cannot retrieve features without attached adaptor");
|
|
3123 return [];
|
|
3124 }
|
|
3125
|
|
3126 unless($qy_species) {
|
|
3127 throw("Query species and assembly arguments are required");
|
|
3128 }
|
|
3129
|
|
3130 unless (defined $method_link_type) {
|
|
3131 $method_link_type = "SYNTENY";
|
|
3132 }
|
|
3133
|
|
3134 if(!defined($compara_db)){
|
|
3135 $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
|
|
3136 }
|
|
3137 unless($compara_db) {
|
|
3138 warning("Compara database must be attached to core database or passed ".
|
|
3139 "as an argument to " .
|
|
3140 "retrieve compara information");
|
|
3141 return [];
|
|
3142 }
|
|
3143 my $gdba = $compara_db->get_GenomeDBAdaptor();
|
|
3144 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
|
|
3145 my $dfa = $compara_db->get_DnaFragAdaptor();
|
|
3146 my $sra = $compara_db->get_SyntenyRegionAdaptor();
|
|
3147
|
|
3148 my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db());
|
|
3149 my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
|
|
3150 my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]);
|
|
3151
|
|
3152 my $cs = $self->coord_system()->name();
|
|
3153 my $sr = $self->seq_region_name();
|
|
3154 my ($dnafrag) = @{$dfa->fetch_all_by_GenomeDB_region($this_gdb, $cs, $sr)};
|
|
3155 return $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag, $self->start, $self->end);
|
|
3156 }
|
|
3157
|
|
3158 =head2 get_all_Haplotypes
|
|
3159
|
|
3160 Arg [1] : (optional) boolean $lite_flag
|
|
3161 if true lightweight haplotype objects are used
|
|
3162 Example : @haplotypes = $slice->get_all_Haplotypes;
|
|
3163 Description: Retrieves all of the haplotypes on this slice. Only works
|
|
3164 if the haplotype adaptor has been attached to the core adaptor
|
|
3165 via $dba->add_db_adaptor('haplotype', $hdba);
|
|
3166 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
|
|
3167 Exceptions : warning is Haplotype database is not available
|
|
3168 Caller : contigview, general
|
|
3169 Status : Stable
|
|
3170
|
|
3171 =cut
|
|
3172
|
|
3173 sub get_all_Haplotypes {
|
|
3174 my($self, $lite_flag) = @_;
|
|
3175
|
|
3176 if(!$self->adaptor()) {
|
|
3177 warning("Cannot retrieve features without attached adaptor");
|
|
3178 return [];
|
|
3179 }
|
|
3180
|
|
3181 my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype');
|
|
3182
|
|
3183 unless($haplo_db) {
|
|
3184 warning("Haplotype database must be attached to core database to " .
|
|
3185 "retrieve haplotype information" );
|
|
3186 return [];
|
|
3187 }
|
|
3188
|
|
3189 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
|
|
3190
|
|
3191 my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
|
|
3192
|
|
3193 return $haplotypes;
|
|
3194 }
|
|
3195
|
|
3196
|
|
3197 sub get_all_DASFactories {
|
|
3198 my $self = shift;
|
|
3199 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
|
|
3200 }
|
|
3201
|
|
3202 sub get_all_DASFeatures_dsn {
|
|
3203 my ($self, $source_type, $dsn) = @_;
|
|
3204
|
|
3205 if(!$self->adaptor()) {
|
|
3206 warning("Cannot retrieve features without attached adaptor");
|
|
3207 return [];
|
|
3208 }
|
|
3209 my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory;
|
|
3210
|
|
3211 return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
|
|
3212 }
|
|
3213
|
|
3214 =head2 get_all_DAS_Features
|
|
3215
|
|
3216 Arg [1] : none
|
|
3217 Example : $features = $slice->get_all_DASFeatures;
|
|
3218 Description: Retreives a hash reference to a hash of DAS feature
|
|
3219 sets, keyed by the DNS, NOTE the values of this hash
|
|
3220 are an anonymous array containing:
|
|
3221 (1) a pointer to an array of features;
|
|
3222 (2) a pointer to the DAS stylesheet
|
|
3223 Returntype : hashref of Bio::SeqFeatures
|
|
3224 Exceptions : ?
|
|
3225 Caller : webcode
|
|
3226 Status : Stable
|
|
3227
|
|
3228 =cut
|
|
3229 sub get_all_DAS_Features{
|
|
3230 my ($self) = @_;
|
|
3231
|
|
3232 $self->{_das_features} ||= {}; # Cache
|
|
3233 $self->{_das_styles} ||= {}; # Cache
|
|
3234 $self->{_das_segments} ||= {}; # Cache
|
|
3235 my %das_features;
|
|
3236 my %das_styles;
|
|
3237 my %das_segments;
|
|
3238 my $slice = $self;
|
|
3239
|
|
3240 foreach my $dasfact( @{$self->get_all_DASFactories} ){
|
|
3241 my $dsn = $dasfact->adaptor->dsn;
|
|
3242 my $name = $dasfact->adaptor->name;
|
|
3243 # my $type = $dasfact->adaptor->type;
|
|
3244 my $url = $dasfact->adaptor->url;
|
|
3245
|
|
3246 my ($type) = $dasfact->adaptor->mapping;
|
|
3247 if (ref $type eq 'ARRAY') {
|
|
3248 $type = shift @$type;
|
|
3249 }
|
|
3250 $type ||= $dasfact->adaptor->type;
|
|
3251 # Construct a cache key : SOURCE_URL/TYPE
|
|
3252 # Need the type to handle sources that serve multiple types of features
|
|
3253
|
|
3254 my $key = join('/', $name, $type);
|
|
3255 if( $self->{_das_features}->{$key} ){ # Use cached
|
|
3256 $das_features{$name} = $self->{_das_features}->{$key};
|
|
3257 $das_styles{$name} = $self->{_das_styles}->{$key};
|
|
3258 $das_segments{$name} = $self->{_das_segments}->{$key};
|
|
3259 } else { # Get fresh data
|
|
3260 my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type );
|
|
3261 $self->{_das_features}->{$key} = $featref;
|
|
3262 $self->{_das_styles}->{$key} = $styleref;
|
|
3263 $self->{_das_segments}->{$key} = $segref;
|
|
3264 $das_features{$name} = $featref;
|
|
3265 $das_styles{$name} = $styleref;
|
|
3266 $das_segments{$name} = $segref;
|
|
3267 }
|
|
3268 }
|
|
3269
|
|
3270 return (\%das_features, \%das_styles, \%das_segments);
|
|
3271 }
|
|
3272
|
|
3273 sub get_all_DASFeatures{
|
|
3274 my ($self, $source_type) = @_;
|
|
3275
|
|
3276
|
|
3277 if(!$self->adaptor()) {
|
|
3278 warning("Cannot retrieve features without attached adaptor");
|
|
3279 return [];
|
|
3280 }
|
|
3281
|
|
3282 my %genomic_features = map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ] ) } $self->adaptor()->db()->_each_DASFeatureFactory;
|
|
3283 return \%genomic_features;
|
|
3284
|
|
3285 }
|
|
3286
|
|
3287 sub old_get_all_DASFeatures{
|
|
3288 my ($self,@args) = @_;
|
|
3289
|
|
3290 if(!$self->adaptor()) {
|
|
3291 warning("Cannot retrieve features without attached adaptor");
|
|
3292 return [];
|
|
3293 }
|
|
3294
|
|
3295 my %genomic_features =
|
|
3296 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
|
|
3297 $self->adaptor()->db()->_each_DASFeatureFactory;
|
|
3298 return \%genomic_features;
|
|
3299
|
|
3300 }
|
|
3301
|
|
3302
|
|
3303 =head2 get_all_ExternalFeatures
|
|
3304
|
|
3305 Arg [1] : (optional) string $track_name
|
|
3306 If specified only features from ExternalFeatureAdaptors with
|
|
3307 the track name $track_name are retrieved.
|
|
3308 If not set, all features from every ExternalFeatureAdaptor are
|
|
3309 retrieved.
|
|
3310 Example : @x_features = @{$slice->get_all_ExternalFeatures}
|
|
3311 Description: Retrieves features on this slice from external feature adaptors
|
|
3312 Returntype : listref of Bio::SeqFeatureI implementing objects in slice
|
|
3313 coordinates
|
|
3314 Exceptions : none
|
|
3315 Caller : general
|
|
3316 Status : Stable
|
|
3317
|
|
3318 =cut
|
|
3319
|
|
3320 sub get_all_ExternalFeatures {
|
|
3321 my ($self, $track_name) = @_;
|
|
3322
|
|
3323 if(!$self->adaptor()) {
|
|
3324 warning("Cannot retrieve features without attached adaptor");
|
|
3325 return [];
|
|
3326 }
|
|
3327
|
|
3328 my $features = [];
|
|
3329
|
|
3330 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
|
|
3331 my @xf_adaptors = ();
|
|
3332
|
|
3333 if($track_name) {
|
|
3334 #use a specific adaptor
|
|
3335 if(exists $xfa_hash->{$track_name}) {
|
|
3336 push @xf_adaptors, $xfa_hash->{$track_name};
|
|
3337 }
|
|
3338 } else {
|
|
3339 #use all of the adaptors
|
|
3340 push @xf_adaptors, values %$xfa_hash;
|
|
3341 }
|
|
3342
|
|
3343
|
|
3344 foreach my $xfa (@xf_adaptors) {
|
|
3345 push @$features, @{$xfa->fetch_all_by_Slice($self)};
|
|
3346 }
|
|
3347
|
|
3348 return $features;
|
|
3349 }
|
|
3350
|
|
3351
|
|
3352 =head2 get_all_DitagFeatures
|
|
3353
|
|
3354 Arg [1] : (optional) string ditag type
|
|
3355 Arg [1] : (optional) string logic_name
|
|
3356 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
|
|
3357 Description: Retrieves the DitagFeatures of a specific type which overlap
|
|
3358 this slice with. If type is not defined, all features are
|
|
3359 retrieved.
|
|
3360 Returntype : listref of Bio::EnsEMBL::DitagFeatures
|
|
3361 Exceptions : warning if slice does not have attached adaptor
|
|
3362 Caller : general
|
|
3363 Status : Stable
|
|
3364
|
|
3365 =cut
|
|
3366
|
|
3367 sub get_all_DitagFeatures {
|
|
3368 my ($self, $type, $logic_name) = @_;
|
|
3369
|
|
3370 if(!$self->adaptor()) {
|
|
3371 warning('Cannot get DitagFeatures without attached adaptor');
|
|
3372 return [];
|
|
3373 }
|
|
3374
|
|
3375 my $dfa = $self->adaptor->db->get_DitagFeatureAdaptor();
|
|
3376
|
|
3377 return $dfa->fetch_all_by_Slice($self, $type, $logic_name);
|
|
3378 }
|
|
3379
|
|
3380
|
|
3381
|
|
3382
|
|
3383 # GENERIC FEATURES (See DBAdaptor.pm)
|
|
3384
|
|
3385 =head2 get_generic_features
|
|
3386
|
|
3387 Arg [1] : (optional) List of names of generic feature types to return.
|
|
3388 If no feature names are given, all generic features are
|
|
3389 returned.
|
|
3390 Example : my %features = %{$slice->get_generic_features()};
|
|
3391 Description: Gets generic features via the generic feature adaptors that
|
|
3392 have been added via DBAdaptor->add_GenricFeatureAdaptor (if
|
|
3393 any)
|
|
3394 Returntype : Hash of named features.
|
|
3395 Exceptions : none
|
|
3396 Caller : none
|
|
3397 Status : Stable
|
|
3398
|
|
3399 =cut
|
|
3400
|
|
3401 sub get_generic_features {
|
|
3402
|
|
3403 my ($self, @names) = @_;
|
|
3404
|
|
3405 if(!$self->adaptor()) {
|
|
3406 warning('Cannot retrieve features without attached adaptor');
|
|
3407 return [];
|
|
3408 }
|
|
3409
|
|
3410 my $db = $self->adaptor()->db();
|
|
3411
|
|
3412 my %features = (); # this will hold the results
|
|
3413
|
|
3414 # get the adaptors for each feature
|
|
3415 my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)};
|
|
3416
|
|
3417 foreach my $adaptor_name (keys(%adaptors)) {
|
|
3418
|
|
3419 my $adaptor_obj = $adaptors{$adaptor_name};
|
|
3420 # get the features and add them to the hash
|
|
3421 my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
|
|
3422 # add each feature to the hash to be returned
|
|
3423 foreach my $feature (@$features_ref) {
|
|
3424 $features{$adaptor_name} = $feature;
|
|
3425 }
|
|
3426 }
|
|
3427
|
|
3428 return \%features;
|
|
3429
|
|
3430 }
|
|
3431
|
|
3432 =head2 project_to_slice
|
|
3433
|
|
3434 Arg [1] : Slice to project to.
|
|
3435 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
|
|
3436 foreach my $segment ( @$chr_projection ){
|
|
3437 $chr_slice = $segment->to_Slice();
|
|
3438 print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'.
|
|
3439 $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start().
|
|
3440 '-'.$chr_slice->end().
|
|
3441 $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n";
|
|
3442 }
|
|
3443 Description: Projection of slice to another specific slice. Needed for where we have multiple mappings
|
|
3444 and we want to state which one to project to.
|
|
3445 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
|
|
3446 can also be used as [$start,$end,$slice] triplets.
|
|
3447 Exceptions : none
|
|
3448 Caller : none
|
|
3449 Status : At Risk
|
|
3450
|
|
3451 =cut
|
|
3452
|
|
3453 sub project_to_slice {
|
|
3454 my $self = shift;
|
|
3455 my $to_slice = shift;
|
|
3456
|
|
3457 throw('Slice argument is required') if(!$to_slice);
|
|
3458
|
|
3459 my $slice_adaptor = $self->adaptor();
|
|
3460
|
|
3461 if(!$slice_adaptor) {
|
|
3462 warning("Cannot project without attached adaptor.");
|
|
3463 return [];
|
|
3464 }
|
|
3465
|
|
3466
|
|
3467 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
|
|
3468
|
|
3469 my $cs = $to_slice->coord_system();
|
|
3470 my $slice_cs = $self->coord_system();
|
|
3471 my $to_slice_id = $to_slice->get_seq_region_id;
|
|
3472
|
|
3473 my @projection;
|
|
3474 my $current_start = 1;
|
|
3475
|
|
3476 # decompose this slice into its symlinked components.
|
|
3477 # this allows us to handle haplotypes and PARs
|
|
3478 my $normal_slice_proj =
|
|
3479 $slice_adaptor->fetch_normalized_slice_projection($self);
|
|
3480 foreach my $segment (@$normal_slice_proj) {
|
|
3481 my $normal_slice = $segment->[2];
|
|
3482
|
|
3483 $slice_cs = $normal_slice->coord_system();
|
|
3484
|
|
3485 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
|
|
3486 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
|
|
3487
|
|
3488 # perform the mapping between this slice and the requested system
|
|
3489 my @coords;
|
|
3490
|
|
3491 if( defined $asm_mapper ) {
|
|
3492 @coords = $asm_mapper->map($normal_slice->seq_region_name(),
|
|
3493 $normal_slice->start(),
|
|
3494 $normal_slice->end(),
|
|
3495 $normal_slice->strand(),
|
|
3496 $slice_cs, undef, $to_slice);
|
|
3497 } else {
|
|
3498 $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
|
|
3499 $normal_slice->end());
|
|
3500 }
|
|
3501
|
|
3502 my $last_rank =0;
|
|
3503 #construct a projection from the mapping results and return it
|
|
3504 foreach my $coord (@coords) {
|
|
3505 my $coord_start = $coord->start();
|
|
3506 my $coord_end = $coord->end();
|
|
3507 my $length = $coord_end - $coord_start + 1;
|
|
3508
|
|
3509
|
|
3510 if( $last_rank != $coord->rank){
|
|
3511 $current_start = 1;
|
|
3512 }
|
|
3513 $last_rank = $coord->rank;
|
|
3514
|
|
3515 #skip gaps
|
|
3516 if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
|
|
3517 if($coord->id != $to_slice_id){ # for multiple mappings only get the correct one
|
|
3518 $current_start += $length;
|
|
3519 next;
|
|
3520 }
|
|
3521 my $coord_cs = $coord->coord_system();
|
|
3522
|
|
3523 # If the normalised projection just ended up mapping to the
|
|
3524 # same coordinate system we were already in then we should just
|
|
3525 # return the original region. This can happen for example, if we
|
|
3526 # were on a PAR region on Y which refered to X and a projection to
|
|
3527 # 'toplevel' was requested.
|
|
3528 # if($coord_cs->equals($slice_cs)) {
|
|
3529 # # trim off regions which are not defined
|
|
3530 # return $self->_constrain_to_region();
|
|
3531 # }
|
|
3532
|
|
3533 #create slices for the mapped-to coord system
|
|
3534 my $slice = $slice_adaptor->fetch_by_seq_region_id(
|
|
3535 $coord->id(),
|
|
3536 $coord_start,
|
|
3537 $coord_end,
|
|
3538 $coord->strand());
|
|
3539
|
|
3540 my $current_end = $current_start + $length - 1;
|
|
3541
|
|
3542 push @projection, bless([$current_start, $current_end, $slice],
|
|
3543 "Bio::EnsEMBL::ProjectionSegment");
|
|
3544 }
|
|
3545
|
|
3546 $current_start += $length;
|
|
3547 }
|
|
3548 }
|
|
3549
|
|
3550
|
|
3551 # delete the cache as we may want to map to different set next time and old
|
|
3552 # results will be cached.
|
|
3553
|
|
3554 $mapper_aptr->delete_cache;
|
|
3555
|
|
3556 return \@projection;
|
|
3557 }
|
|
3558
|
|
3559
|
|
3560 =head2 get_all_synonyms
|
|
3561
|
|
3562 Args : none.
|
|
3563 Example : my @alternative_names = @{$slice->get_all_synonyms()};
|
|
3564 Description: get a list of alternative names for this slice
|
|
3565 Returntype : reference to list of SeqRegionSynonym objects.
|
|
3566 Exception : none
|
|
3567 Caller : general
|
|
3568 Status : At Risk
|
|
3569
|
|
3570 =cut
|
|
3571
|
|
3572 sub get_all_synonyms{
|
|
3573 my $self = shift;
|
|
3574 my $external_db_id =shift;
|
|
3575
|
|
3576 if ( !defined( $self->{'synonym'} ) ) {
|
|
3577 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
|
|
3578 $self->{'synonym'} =
|
|
3579 $adap->get_synonyms( $self->get_seq_region_id($self) );
|
|
3580 }
|
|
3581
|
|
3582 return $self->{'synonym'};
|
|
3583 }
|
|
3584
|
|
3585 =head2 add_synonym
|
|
3586
|
|
3587 Args[0] : synonym.
|
|
3588 Example : $slice->add_synonym("alt_name");
|
|
3589 Description: add an alternative name for this slice
|
|
3590 Returntype : none
|
|
3591 Exception : none
|
|
3592 Caller : general
|
|
3593 Status : At Risk
|
|
3594
|
|
3595 =cut
|
|
3596
|
|
3597 sub add_synonym{
|
|
3598 my $self = shift;
|
|
3599 my $syn = shift;
|
|
3600 my $external_db_id = shift;
|
|
3601
|
|
3602 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
|
|
3603 if ( !defined( $self->{'synonym'} ) ) {
|
|
3604 $self->{'synonym'} = $self->get_all_synonyms();
|
|
3605 }
|
|
3606 my $new_syn = Bio::EnsEMBL::SeqRegionSynonym->new( #-adaptor => $adap,
|
|
3607 -synonym => $syn,
|
|
3608 -external_db_id => $external_db_id,
|
|
3609 -seq_region_id => $self->get_seq_region_id($self));
|
|
3610
|
|
3611 push (@{$self->{'synonym'}}, $new_syn);
|
|
3612
|
|
3613 return;
|
|
3614 }
|
|
3615
|
|
3616 =head2 summary_as_hash
|
|
3617
|
|
3618 Example : $slice_summary = $slice->summary_as_hash();
|
|
3619 Description : Retrieves a textual summary of this slice.
|
|
3620 Returns : hashref of descriptive strings
|
|
3621 =cut
|
|
3622
|
|
3623 sub summary_as_hash {
|
|
3624 my $self = shift;
|
|
3625 my %summary;
|
|
3626 $summary{'display_id'} = $self->display_id;
|
|
3627 $summary{'start'} = $self->start;
|
|
3628 $summary{'end'} = $self->end;
|
|
3629 $summary{'strand'} = $self->strand;
|
|
3630 $summary{'Is_circular'} = $self->is_circular ? "true" : "false";
|
|
3631 $summary{'region_name'} = $self->seq_region_name();
|
|
3632 return \%summary;
|
|
3633 }
|
|
3634
|
|
3635 #
|
|
3636 # Bioperl Bio::PrimarySeqI methods:
|
|
3637 #
|
|
3638
|
|
3639 =head2 id
|
|
3640
|
|
3641 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
|
|
3642
|
|
3643 =cut
|
|
3644
|
|
3645 sub id { name(@_); }
|
|
3646
|
|
3647
|
|
3648 =head2 display_id
|
|
3649
|
|
3650 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3651
|
|
3652 =cut
|
|
3653
|
|
3654 sub display_id { name(@_); }
|
|
3655
|
|
3656
|
|
3657 =head2 primary_id
|
|
3658
|
|
3659 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3660
|
|
3661 =cut
|
|
3662
|
|
3663 sub primary_id { name(@_); }
|
|
3664
|
|
3665
|
|
3666 =head2 desc
|
|
3667
|
|
3668 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3669
|
|
3670 =cut
|
|
3671
|
|
3672 sub desc{ return $_[0]->coord_system->name().' '.$_[0]->seq_region_name(); }
|
|
3673
|
|
3674
|
|
3675 =head2 moltype
|
|
3676
|
|
3677 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
|
|
3678
|
|
3679 =cut
|
|
3680
|
|
3681 sub moltype { return 'dna'; }
|
|
3682
|
|
3683 =head2 alphabet
|
|
3684
|
|
3685 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3686
|
|
3687 =cut
|
|
3688
|
|
3689 sub alphabet { return 'dna'; }
|
|
3690
|
|
3691
|
|
3692 =head2 accession_number
|
|
3693
|
|
3694 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3695
|
|
3696 =cut
|
|
3697
|
|
3698 sub accession_number { name(@_); }
|
|
3699
|
|
3700
|
|
3701 # sub DEPRECATED METHODS #
|
|
3702 ###############################################################################
|
|
3703
|
|
3704 =head1 DEPRECATED METHODS
|
|
3705
|
|
3706 =head2 get_all_AffyFeatures
|
|
3707
|
|
3708 Description: DEPRECATED, use functionality provided by the Ensembl
|
|
3709 Functional Genomics API instead.
|
|
3710
|
|
3711 =cut
|
|
3712
|
|
3713 sub get_all_AffyFeatures {
|
|
3714 deprecate( 'Use functionality provided by the '
|
|
3715 . 'Ensembl Functional Genomics API instead.' );
|
|
3716 throw('Can not delegate deprecated functionality.');
|
|
3717
|
|
3718 # Old code:
|
|
3719
|
|
3720 # my $self = shift;
|
|
3721 # my @arraynames = @_;
|
|
3722 #
|
|
3723 # my $sa = $self->adaptor();
|
|
3724 # if ( ! $sa ) {
|
|
3725 # warning( "Cannot retrieve features without attached adaptor." );
|
|
3726 # }
|
|
3727 # my $fa = $sa->db()->get_AffyFeatureAdaptor();
|
|
3728 # my $features;
|
|
3729 #
|
|
3730 # if ( @arraynames ) {
|
|
3731 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
|
|
3732 # } else {
|
|
3733 # $features = $fa->fetch_all_by_Slice( $self );
|
|
3734 # }
|
|
3735 # return $features;
|
|
3736 }
|
|
3737
|
|
3738 =head2 get_all_OligoFeatures
|
|
3739
|
|
3740 Description: DEPRECATED, use functionality provided by the Ensembl
|
|
3741 Functional Genomics API instead.
|
|
3742
|
|
3743 =cut
|
|
3744
|
|
3745 sub get_all_OligoFeatures {
|
|
3746
|
|
3747 deprecate( 'Use functionality provided by the '
|
|
3748 . 'Ensembl Functional Genomics API instead.' );
|
|
3749 throw('Can not delegate deprecated functionality.');
|
|
3750
|
|
3751 # Old code:
|
|
3752
|
|
3753 # my $self = shift;
|
|
3754 # my @arraynames = @_;
|
|
3755 #
|
|
3756 # my $sa = $self->adaptor();
|
|
3757 # if ( ! $sa ) {
|
|
3758 # warning( "Cannot retrieve features without attached adaptor." );
|
|
3759 # }
|
|
3760 # my $fa = $sa->db()->get_OligoFeatureAdaptor();
|
|
3761 # my $features;
|
|
3762 #
|
|
3763 # if ( @arraynames ) {
|
|
3764 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
|
|
3765 # } else {
|
|
3766 # $features = $fa->fetch_all_by_Slice( $self );
|
|
3767 # }
|
|
3768 # return $features;
|
|
3769 }
|
|
3770
|
|
3771 =head2 get_all_OligoFeatures_by_type
|
|
3772
|
|
3773 Description: DEPRECATED, use functionality provided by the Ensembl
|
|
3774 Functional Genomics API instead.
|
|
3775
|
|
3776 =cut
|
|
3777
|
|
3778 sub get_all_OligoFeatures_by_type {
|
|
3779
|
|
3780 deprecate( 'Use functionality provided by the '
|
|
3781 . 'Ensembl Functional Genomics API instead.' );
|
|
3782 throw('Can not delegate deprecated functionality.');
|
|
3783
|
|
3784 # Old code:
|
|
3785
|
|
3786 # my ($self, $type, $logic_name) = @_;
|
|
3787 #
|
|
3788 # throw('Need type as parameter') if !$type;
|
|
3789 #
|
|
3790 # my $sa = $self->adaptor();
|
|
3791 # if ( ! $sa ) {
|
|
3792 # warning( "Cannot retrieve features without attached adaptor." );
|
|
3793 # }
|
|
3794 # my $fa = $sa->db()->get_OligoFeatureAdaptor();
|
|
3795 #
|
|
3796 # my $features = $fa->fetch_all_by_Slice_type( $self, $type, $logic_name );
|
|
3797 #
|
|
3798 # return $features;
|
|
3799 }
|
|
3800
|
|
3801 =head2 get_all_supercontig_Slices
|
|
3802
|
|
3803 Description: DEPRECATED use get_tiling_path("NTcontig") instead
|
|
3804
|
|
3805 =cut
|
|
3806
|
|
3807
|
|
3808 sub get_all_supercontig_Slices {
|
|
3809 my $self = shift;
|
|
3810
|
|
3811 deprecate("Use get_tiling_path('NTcontig') instead");
|
|
3812
|
|
3813 my $result = [];
|
|
3814
|
|
3815 if( $self->adaptor() ) {
|
|
3816 my $superctg_names =
|
|
3817 $self->adaptor()->list_overlapping_supercontigs( $self );
|
|
3818
|
|
3819 for my $name ( @$superctg_names ) {
|
|
3820 my $slice;
|
|
3821 $slice = $self->adaptor()->fetch_by_supercontig_name( $name );
|
|
3822 $slice->name( $name );
|
|
3823 push( @$result, $slice );
|
|
3824 }
|
|
3825 } else {
|
|
3826 warning( "Slice needs to be attached to a database to get supercontigs" );
|
|
3827 }
|
|
3828
|
|
3829 return $result;
|
|
3830 }
|
|
3831
|
|
3832
|
|
3833
|
|
3834
|
|
3835
|
|
3836 =head2 get_Chromosome
|
|
3837
|
|
3838 Description: DEPRECATED use this instead:
|
|
3839 $slice_adp->fetch_by_region('chromosome',
|
|
3840 $slice->seq_region_name)
|
|
3841
|
|
3842 =cut
|
|
3843
|
|
3844 sub get_Chromosome {
|
|
3845 my $self = shift @_;
|
|
3846
|
|
3847 deprecate("Use SliceAdaptor::fetch_by_region('chromosome'," .
|
|
3848 '$slice->seq_region_name) instead');
|
|
3849
|
|
3850 my $csa = $self->adaptor->db->get_CoordSystemAdaptor();
|
|
3851 my ($top_cs) = @{$csa->fetch_all()};
|
|
3852
|
|
3853 return $self->adaptor->fetch_by_region($top_cs->name(),
|
|
3854 $self->seq_region_name(),
|
|
3855 undef,undef,undef,
|
|
3856 $top_cs->version());
|
|
3857 }
|
|
3858
|
|
3859
|
|
3860
|
|
3861 =head2 chr_name
|
|
3862
|
|
3863 Description: DEPRECATED use seq_region_name() instead
|
|
3864
|
|
3865 =cut
|
|
3866
|
|
3867 sub chr_name{
|
|
3868 deprecate("Use seq_region_name() instead");
|
|
3869 seq_region_name(@_);
|
|
3870 }
|
|
3871
|
|
3872
|
|
3873
|
|
3874 =head2 chr_start
|
|
3875
|
|
3876 Description: DEPRECATED use start() instead
|
|
3877
|
|
3878 =cut
|
|
3879
|
|
3880 sub chr_start{
|
|
3881 deprecate('Use start() instead');
|
|
3882 start(@_);
|
|
3883 }
|
|
3884
|
|
3885
|
|
3886
|
|
3887 =head2 chr_end
|
|
3888
|
|
3889 Description: DEPRECATED use end() instead
|
|
3890 Returntype : int
|
|
3891 Exceptions : none
|
|
3892 Caller : SliceAdaptor, general
|
|
3893
|
|
3894 =cut
|
|
3895
|
|
3896 sub chr_end{
|
|
3897 deprecate('Use end() instead');
|
|
3898 end(@_);
|
|
3899 }
|
|
3900
|
|
3901
|
|
3902 =head2 assembly_type
|
|
3903
|
|
3904 Description: DEPRECATED use version instead
|
|
3905
|
|
3906 =cut
|
|
3907
|
|
3908 sub assembly_type{
|
|
3909 my $self = shift;
|
|
3910 deprecate('Use $slice->coord_system()->version() instead.');
|
|
3911 return $self->coord_system->version();
|
|
3912 }
|
|
3913
|
|
3914
|
|
3915 =head2 get_tiling_path
|
|
3916
|
|
3917 Description: DEPRECATED use project instead
|
|
3918
|
|
3919 =cut
|
|
3920
|
|
3921 sub get_tiling_path {
|
|
3922 my $self = shift;
|
|
3923 deprecate('Use $slice->project("seqlevel") instead.');
|
|
3924 return [];
|
|
3925 }
|
|
3926
|
|
3927
|
|
3928 =head2 dbID
|
|
3929
|
|
3930 Description: DEPRECATED use SliceAdaptor::get_seq_region_id instead
|
|
3931
|
|
3932 =cut
|
|
3933
|
|
3934 sub dbID {
|
|
3935 my $self = shift;
|
|
3936 deprecate('Use SliceAdaptor::get_seq_region_id instead.');
|
|
3937 if(!$self->adaptor) {
|
|
3938 warning('Cannot retrieve seq_region_id without attached adaptor.');
|
|
3939 return 0;
|
|
3940 }
|
|
3941 return $self->adaptor->get_seq_region_id($self);
|
|
3942 }
|
|
3943
|
|
3944
|
|
3945 =head2 get_all_MapFrags
|
|
3946
|
|
3947 Description: DEPRECATED use get_all_MiscFeatures instead
|
|
3948
|
|
3949 =cut
|
|
3950
|
|
3951 sub get_all_MapFrags {
|
|
3952 my $self = shift;
|
|
3953 deprecate('Use get_all_MiscFeatures instead');
|
|
3954 return $self->get_all_MiscFeatures(@_);
|
|
3955 }
|
|
3956
|
|
3957 =head2 has_MapSet
|
|
3958
|
|
3959 Description: DEPRECATED use get_all_MiscFeatures instead
|
|
3960
|
|
3961 =cut
|
|
3962
|
|
3963 sub has_MapSet {
|
|
3964 my( $self, $mapset_name ) = @_;
|
|
3965 deprecate('Use get_all_MiscFeatures instead');
|
|
3966 my $mfs = $self->get_all_MiscFeatures($mapset_name);
|
|
3967 return (@$mfs > 0);
|
|
3968 }
|
|
3969
|
|
3970 1;
|