Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Slice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Slice - Arbitary Slice of a genome | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 $sa = $db->get_SliceAdaptor; | |
28 | |
29 $slice = | |
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); | |
31 | |
32 # get some attributes of the slice | |
33 my $seqname = $slice->seq_region_name(); | |
34 my $start = $slice->start(); | |
35 my $end = $slice->end(); | |
36 | |
37 # get the sequence from the slice | |
38 my $seq = $slice->seq(); | |
39 | |
40 # get some features from the slice | |
41 foreach $gene ( @{ $slice->get_all_Genes } ) { | |
42 # do something with a gene | |
43 } | |
44 | |
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) { | |
46 # do something with dna-dna alignments | |
47 } | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 A Slice object represents a region of a genome. It can be used to retrieve | |
52 sequence or features from an area of interest. | |
53 | |
54 =head1 METHODS | |
55 | |
56 =cut | |
57 | |
58 package Bio::EnsEMBL::Slice; | |
59 use vars qw(@ISA); | |
60 use strict; | |
61 | |
62 use Bio::PrimarySeqI; | |
63 | |
64 | |
65 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
66 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump); | |
67 use Bio::EnsEMBL::RepeatMaskedSlice; | |
68 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
69 use Bio::EnsEMBL::ProjectionSegment; | |
70 use Bio::EnsEMBL::Registry; | |
71 use Bio::EnsEMBL::Utils::Iterator; | |
72 use Bio::EnsEMBL::DBSQL::MergedAdaptor; | |
73 | |
74 use Bio::EnsEMBL::StrainSlice; | |
75 #use Bio::EnsEMBL::IndividualSlice; | |
76 #use Bio::EnsEMBL::IndividualSliceFactory; | |
77 use Bio::EnsEMBL::Mapper::RangeRegistry; | |
78 use Bio::EnsEMBL::SeqRegionSynonym; | |
79 use Scalar::Util qw(weaken isweak); | |
80 | |
81 # use Data::Dumper; | |
82 | |
83 my $reg = "Bio::EnsEMBL::Registry"; | |
84 | |
85 @ISA = qw(Bio::PrimarySeqI); | |
86 | |
87 | |
88 =head2 new | |
89 | |
90 Arg [...] : List of named arguments | |
91 Bio::EnsEMBL::CoordSystem COORD_SYSTEM | |
92 string SEQ_REGION_NAME, | |
93 int START, | |
94 int END, | |
95 int SEQ_REGION_LENGTH, (optional) | |
96 string SEQ (optional) | |
97 int STRAND, (optional, defaults to 1) | |
98 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional) | |
99 Example : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs, | |
100 -start => 1, | |
101 -end => 10000, | |
102 -strand => 1, | |
103 -seq_region_name => 'X', | |
104 -seq_region_length => 12e6, | |
105 -adaptor => $slice_adaptor); | |
106 Description: Creates a new slice object. A slice represents a region | |
107 of sequence in a particular coordinate system. Slices can be | |
108 used to retrieve sequence and features from an area of | |
109 interest in a genome. | |
110 | |
111 Coordinates start at 1 and are inclusive. Negative | |
112 coordinates or coordinates exceeding the length of the | |
113 seq_region are permitted. Start must be less than or equal. | |
114 to end regardless of the strand. | |
115 | |
116 Slice objects are immutable. Once instantiated their attributes | |
117 (with the exception of the adaptor) may not be altered. To | |
118 change the attributes a new slice must be created. | |
119 Returntype : Bio::EnsEMBL::Slice | |
120 Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type | |
121 Caller : general, Bio::EnsEMBL::SliceAdaptor | |
122 Status : Stable | |
123 | |
124 =cut | |
125 | |
126 sub new { | |
127 my $caller = shift; | |
128 | |
129 #new can be called as a class or object method | |
130 my $class = ref($caller) || $caller; | |
131 | |
132 my ($seq, $coord_system, $seq_region_name, $seq_region_length, | |
133 $start, $end, $strand, $adaptor, $empty) = | |
134 rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH | |
135 START END STRAND ADAPTOR EMPTY)], @_); | |
136 | |
137 #empty is only for backwards compatibility | |
138 if ($empty) { | |
139 deprecate( "Creation of empty slices is no longer needed" | |
140 . "and is deprecated" ); | |
141 my $self = bless( { 'empty' => 1 }, $class ); | |
142 $self->adaptor($adaptor); | |
143 return $self; | |
144 } | |
145 | |
146 if ( !defined($seq_region_name) ) { | |
147 throw('SEQ_REGION_NAME argument is required'); | |
148 } | |
149 if ( !defined($start) ) { throw('START argument is required') } | |
150 if ( !defined($end) ) { throw('END argument is required') } | |
151 | |
152 ## if ( $start > $end + 1 ) { | |
153 ## throw('start must be less than or equal to end+1'); | |
154 ## } | |
155 | |
156 if ( !defined($seq_region_length) ) { $seq_region_length = $end } | |
157 | |
158 if ( $seq_region_length <= 0 ) { | |
159 throw('SEQ_REGION_LENGTH must be > 0'); | |
160 } | |
161 | |
162 if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) { | |
163 throw('SEQ must be the same length as the defined LENGTH not ' | |
164 . CORE::length($seq) | |
165 . ' compared to ' | |
166 . ( $end - $start + 1 ) ); | |
167 } | |
168 | |
169 if(defined($coord_system)) { | |
170 if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){ | |
171 throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem'); | |
172 } | |
173 if($coord_system->is_top_level()) { | |
174 throw('Cannot create slice on toplevel CoordSystem.'); | |
175 } | |
176 } else { | |
177 warning("Slice without coordinate system"); | |
178 #warn(stack_trace_dump()); | |
179 } | |
180 | |
181 $strand ||= 1; | |
182 | |
183 if($strand != 1 && $strand != -1) { | |
184 throw('STRAND argument must be -1 or 1'); | |
185 } | |
186 | |
187 if(defined($adaptor)) { | |
188 if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) { | |
189 throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor'); | |
190 } | |
191 } | |
192 | |
193 my $self = bless {'coord_system' => $coord_system, | |
194 'seq' => $seq, | |
195 'seq_region_name' => $seq_region_name, | |
196 'seq_region_length' => $seq_region_length, | |
197 'start' => int($start), | |
198 'end' => int($end), | |
199 'strand' => $strand}, $class; | |
200 | |
201 $self->adaptor($adaptor); | |
202 | |
203 return $self; | |
204 | |
205 } | |
206 | |
207 =head2 new_fast | |
208 | |
209 Arg [1] : hashref to be blessed | |
210 Description: Construct a new Bio::EnsEMBL::Slice using the hashref. | |
211 Exceptions : none | |
212 Returntype : Bio::EnsEMBL::Slice | |
213 Caller : general | |
214 Status : Stable | |
215 | |
216 =cut | |
217 | |
218 | |
219 sub new_fast { | |
220 my $class = shift; | |
221 my $hashref = shift; | |
222 my $self = bless $hashref, $class; | |
223 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) ); | |
224 return $self; | |
225 } | |
226 | |
227 =head2 adaptor | |
228 | |
229 Arg [1] : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor | |
230 Example : $adaptor = $slice->adaptor(); | |
231 Description: Getter/Setter for the slice object adaptor used | |
232 by this slice for database interaction. | |
233 Returntype : Bio::EnsEMBL::DBSQL::SliceAdaptor | |
234 Exceptions : thorws if argument passed is not a SliceAdaptor | |
235 Caller : general | |
236 Status : Stable | |
237 | |
238 =cut | |
239 | |
240 sub adaptor{ | |
241 my $self = shift; | |
242 | |
243 if(@_) { | |
244 my $ad = shift; | |
245 if(defined($ad)) { | |
246 if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) { | |
247 throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor'); | |
248 } | |
249 } | |
250 weaken($self->{'adaptor'} = $ad); | |
251 } | |
252 | |
253 return $self->{'adaptor'}; | |
254 } | |
255 | |
256 | |
257 | |
258 =head2 seq_region_name | |
259 | |
260 Arg [1] : none | |
261 Example : $seq_region = $slice->seq_region_name(); | |
262 Description: Returns the name of the seq_region that this slice is on. For | |
263 example if this slice is in chromosomal coordinates the | |
264 seq_region_name might be 'X' or '10'. | |
265 | |
266 This function was formerly named chr_name, but since slices can | |
267 now be on coordinate systems other than chromosomal it has been | |
268 changed. | |
269 Returntype : string | |
270 Exceptions : none | |
271 Caller : general | |
272 Status : Stable | |
273 | |
274 =cut | |
275 | |
276 sub seq_region_name { | |
277 my $self = shift; | |
278 return $self->{'seq_region_name'}; | |
279 } | |
280 | |
281 | |
282 | |
283 =head2 seq_region_length | |
284 | |
285 Arg [1] : none | |
286 Example : $seq_region_length = $slice->seq_region_length(); | |
287 Description: Returns the length of the entire seq_region that this slice is | |
288 on. For example if this slice is on a chromosome this will be | |
289 the length (in basepairs) of the entire chromosome. | |
290 Returntype : int | |
291 Exceptions : none | |
292 Caller : general | |
293 Status : Stable | |
294 | |
295 =cut | |
296 | |
297 sub seq_region_length { | |
298 my $self = shift; | |
299 return $self->{'seq_region_length'}; | |
300 } | |
301 | |
302 | |
303 =head2 coord_system | |
304 | |
305 Arg [1] : none | |
306 Example : print $slice->coord_system->name(); | |
307 Description: Returns the coordinate system that this slice is on. | |
308 Returntype : Bio::EnsEMBL::CoordSystem | |
309 Exceptions : none | |
310 Caller : general | |
311 Status : Stable | |
312 | |
313 =cut | |
314 | |
315 sub coord_system { | |
316 my $self = shift; | |
317 return $self->{'coord_system'}; | |
318 } | |
319 | |
320 =head2 coord_system_name | |
321 | |
322 Arg [1] : none | |
323 Example : print $slice->coord_system_name() | |
324 Description: Convenience method. Gets the name of the coord_system which | |
325 this slice is on. | |
326 Returns undef if this Slice does not have an attached | |
327 CoordSystem. | |
328 Returntype: string or undef | |
329 Exceptions: none | |
330 Caller : general | |
331 Status : Stable | |
332 | |
333 =cut | |
334 | |
335 sub coord_system_name { | |
336 my $self = shift; | |
337 my $csystem = $self->{'coord_system'}; | |
338 return ($csystem) ? $csystem->name() : undef; | |
339 } | |
340 | |
341 | |
342 =head2 centrepoint | |
343 | |
344 Arg [1] : none | |
345 Example : $cp = $slice->centrepoint(); | |
346 Description: Returns the mid position of this slice relative to the | |
347 start of the sequence region that it was created on. | |
348 Coordinates are inclusive and start at 1. | |
349 Returntype : int | |
350 Exceptions : none | |
351 Caller : general | |
352 Status : Stable | |
353 | |
354 =cut | |
355 | |
356 sub centrepoint { | |
357 my $self = shift; | |
358 return ($self->{'start'}+$self->{'end'})/2; | |
359 } | |
360 | |
361 =head2 start | |
362 | |
363 Arg [1] : none | |
364 Example : $start = $slice->start(); | |
365 Description: Returns the start position of this slice relative to the | |
366 start of the sequence region that it was created on. | |
367 Coordinates are inclusive and start at 1. Negative coordinates | |
368 or coordinates exceeding the length of the sequence region are | |
369 permitted. Start is always less than or equal to end | |
370 regardless of the orientation of the slice. | |
371 Returntype : int | |
372 Exceptions : none | |
373 Caller : general | |
374 Status : Stable | |
375 | |
376 =cut | |
377 | |
378 sub start { | |
379 my $self = shift; | |
380 return $self->{'start'}; | |
381 } | |
382 | |
383 | |
384 | |
385 =head2 end | |
386 | |
387 Arg [1] : none | |
388 Example : $end = $slice->end(); | |
389 Description: Returns the end position of this slice relative to the | |
390 start of the sequence region that it was created on. | |
391 Coordinates are inclusive and start at 1. Negative coordinates | |
392 or coordinates exceeding the length of the sequence region are | |
393 permitted. End is always greater than or equal to start | |
394 regardless of the orientation of the slice. | |
395 Returntype : int | |
396 Exceptions : none | |
397 Caller : general | |
398 Status : Stable | |
399 | |
400 =cut | |
401 | |
402 sub end { | |
403 my $self = shift; | |
404 return $self->{'end'}; | |
405 } | |
406 | |
407 | |
408 | |
409 =head2 strand | |
410 | |
411 Arg [1] : none | |
412 Example : $strand = $slice->strand(); | |
413 Description: Returns the orientation of this slice on the seq_region it has | |
414 been created on | |
415 Returntype : int (either 1 or -1) | |
416 Exceptions : none | |
417 Caller : general, invert | |
418 Status : Stable | |
419 | |
420 =cut | |
421 | |
422 sub strand{ | |
423 my $self = shift; | |
424 return $self->{'strand'}; | |
425 } | |
426 | |
427 | |
428 | |
429 | |
430 | |
431 =head2 name | |
432 | |
433 Arg [1] : none | |
434 Example : my $results = $cache{$slice->name()}; | |
435 Description: Returns the name of this slice. The name is formatted as a colon | |
436 delimited string with the following attributes: | |
437 coord_system:version:seq_region_name:start:end:strand | |
438 | |
439 Slices with the same name are equivalent and thus the name can | |
440 act as a hash key. | |
441 Returntype : string | |
442 Exceptions : none | |
443 Caller : general | |
444 Status : Stable | |
445 | |
446 =cut | |
447 | |
448 sub name { | |
449 my $self = shift; | |
450 | |
451 my $cs = $self->{'coord_system'}; | |
452 | |
453 return join(':', | |
454 ($cs) ? $cs->name() : '', | |
455 ($cs) ? $cs->version() : '', | |
456 $self->{'seq_region_name'}, | |
457 $self->{'start'}, | |
458 $self->{'end'}, | |
459 $self->{'strand'}); | |
460 } | |
461 | |
462 | |
463 | |
464 =head2 length | |
465 | |
466 Arg [1] : none | |
467 Example : $length = $slice->length(); | |
468 Description: Returns the length of this slice in basepairs | |
469 Returntype : int | |
470 Exceptions : none | |
471 Caller : general | |
472 Status : Stable | |
473 | |
474 =cut | |
475 | |
476 sub length { | |
477 my ($self) = @_; | |
478 | |
479 my $length = $self->{'end'} - $self->{'start'} + 1; | |
480 | |
481 if ( $self->{'start'} > $self->{'end'} && $self->is_circular() ) { | |
482 $length += $self->{'seq_region_length'}; | |
483 } | |
484 | |
485 return $length; | |
486 } | |
487 | |
488 =head2 is_reference | |
489 Arg : none | |
490 Example : my $reference = $slice->is_reference() | |
491 Description: Returns 1 if slice is a reference slice else 0 | |
492 Returntype : int | |
493 Caller : general | |
494 Status : At Risk | |
495 | |
496 =cut | |
497 | |
498 sub is_reference { | |
499 my ($self) = @_; | |
500 | |
501 if ( !defined( $self->{'is_reference'} ) ) { | |
502 $self->{'is_reference'} = | |
503 $self->adaptor()->is_reference( $self->get_seq_region_id() ); | |
504 } | |
505 | |
506 return $self->{'is_reference'}; | |
507 } | |
508 | |
509 =head2 is_toplevel | |
510 Arg : none | |
511 Example : my $top = $slice->is_toplevel() | |
512 Description: Returns 1 if slice is a toplevel slice else 0 | |
513 Returntype : int | |
514 Caller : general | |
515 Status : At Risk | |
516 | |
517 =cut | |
518 | |
519 sub is_toplevel { | |
520 my ($self) = @_; | |
521 | |
522 if ( !defined( $self->{'toplevel'} ) ) { | |
523 $self->{'toplevel'} = | |
524 $self->adaptor()->is_toplevel( $self->get_seq_region_id() ); | |
525 } | |
526 | |
527 return $self->{'toplevel'}; | |
528 } | |
529 | |
530 =head2 is_circular | |
531 Arg : none | |
532 Example : my $circ = $slice->is_circular() | |
533 Description: Returns 1 if slice is a circular slice else 0 | |
534 Returntype : int | |
535 Caller : general | |
536 Status : Stable | |
537 | |
538 =cut | |
539 | |
540 sub is_circular { | |
541 my ($self) = @_; | |
542 my $adaptor = $self->adaptor(); | |
543 return 0 if ! defined $adaptor; | |
544 if (! exists $self->{'circular'}) { | |
545 my $id = $adaptor->get_seq_region_id($self); | |
546 $self->{circular} = $adaptor->is_circular($id); | |
547 } | |
548 return $self->{circular}; | |
549 } | |
550 | |
551 =head2 invert | |
552 | |
553 Arg [1] : none | |
554 Example : $inverted_slice = $slice->invert; | |
555 Description: Creates a copy of this slice on the opposite strand and | |
556 returns it. | |
557 Returntype : Bio::EnsEMBL::Slice | |
558 Exceptions : none | |
559 Caller : general | |
560 Status : Stable | |
561 | |
562 =cut | |
563 | |
564 sub invert { | |
565 my $self = shift; | |
566 | |
567 # make a shallow copy of the slice via a hash copy and flip the strand | |
568 my %s = %$self; | |
569 $s{'strand'} = $self->{'strand'} * -1; | |
570 | |
571 # reverse compliment any attached sequence | |
572 reverse_comp(\$s{'seq'}) if($s{'seq'}); | |
573 | |
574 # bless and return the copy | |
575 return bless \%s, ref $self; | |
576 } | |
577 | |
578 | |
579 | |
580 =head2 seq | |
581 | |
582 Arg [1] : none | |
583 Example : print "SEQUENCE = ", $slice->seq(); | |
584 Description: Returns the sequence of the region represented by this | |
585 slice formatted as a string. | |
586 Returntype : string | |
587 Exceptions : none | |
588 Caller : general | |
589 Status : Stable | |
590 | |
591 =cut | |
592 | |
593 sub seq { | |
594 my $self = shift; | |
595 | |
596 # special case for in-between (insert) coordinates | |
597 return '' if($self->start() == $self->end() + 1); | |
598 | |
599 return $self->{'seq'} if($self->{'seq'}); | |
600 | |
601 if($self->adaptor()) { | |
602 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor(); | |
603 return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)}; | |
604 } | |
605 | |
606 # no attached sequence, and no db, so just return Ns | |
607 return 'N' x $self->length(); | |
608 } | |
609 | |
610 | |
611 | |
612 =head2 subseq | |
613 | |
614 Arg [1] : int $startBasePair | |
615 relative to start of slice, which is 1. | |
616 Arg [2] : int $endBasePair | |
617 relative to start of slice. | |
618 Arg [3] : (optional) int $strand | |
619 The strand of the slice to obtain sequence from. Default | |
620 value is 1. | |
621 Description: returns string of dna sequence | |
622 Returntype : txt | |
623 Exceptions : end should be at least as big as start | |
624 strand must be set | |
625 Caller : general | |
626 Status : Stable | |
627 | |
628 =cut | |
629 | |
630 sub subseq { | |
631 my ( $self, $start, $end, $strand ) = @_; | |
632 | |
633 if ( $end+1 < $start ) { | |
634 throw("End coord + 1 is less than start coord"); | |
635 } | |
636 | |
637 # handle 'between' case for insertions | |
638 return '' if( $start == $end + 1); | |
639 | |
640 $strand = 1 unless(defined $strand); | |
641 | |
642 if ( $strand != -1 && $strand != 1 ) { | |
643 throw("Invalid strand [$strand] in call to Slice::subseq."); | |
644 } | |
645 my $subseq; | |
646 if($self->adaptor){ | |
647 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor(); | |
648 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand | |
649 ( $self, $start, | |
650 $end, $strand )}; | |
651 } else { | |
652 ## check for gap at the beginning and pad it with Ns | |
653 if ($start < 1) { | |
654 $subseq = "N" x (1 - $start); | |
655 $start = 1; | |
656 } | |
657 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1); | |
658 ## check for gap at the end and pad it with Ns | |
659 if ($end > $self->length()) { | |
660 $subseq .= "N" x ($end - $self->length()); | |
661 } | |
662 reverse_comp(\$subseq) if($strand == -1); | |
663 } | |
664 return $subseq; | |
665 } | |
666 | |
667 =head2 sub_Slice_Iterator | |
668 | |
669 Arg[1] : int The chunk size to request | |
670 Example : my $i = $slice->sub_Slice_Iterator(60000); | |
671 while($i->has_next()) { warn $i->next()->name(); } | |
672 Description : Returns an iterator which batches subslices of this Slice | |
673 in the requested chunk size | |
674 Returntype : Bio::EnsEMBL::Utils::Iterator next() will return the next | |
675 chunk of Slice | |
676 Exceptions : None | |
677 | |
678 =cut | |
679 | |
680 sub sub_Slice_Iterator { | |
681 my ($self, $chunk_size) = @_; | |
682 throw "Need a chunk size to divide the slice by" if ! $chunk_size; | |
683 my $here = 1; | |
684 my $end = $self->length(); | |
685 my $iterator_sub = sub { | |
686 while($here <= $end) { | |
687 my $there = $here + $chunk_size - 1; | |
688 $there = $end if($there > $end); | |
689 my $slice = $self->sub_Slice($here, $there); | |
690 $here = $there + 1; | |
691 return $slice; | |
692 } | |
693 return; | |
694 }; | |
695 return Bio::EnsEMBL::Utils::Iterator->new($iterator_sub); | |
696 } | |
697 | |
698 =head2 assembly_exception_type | |
699 | |
700 Example : $self->assembly_exception_type(); | |
701 Description : Returns the type of slice this is. If it is reference then you | |
702 will get 'REF' back. Otherwise you will get the first | |
703 element from C<get_all_AssemblyExceptionFeatures()>. If no | |
704 assembly exception exists you will get an empty string back. | |
705 Returntype : String | |
706 Exceptions : None | |
707 Caller : Public | |
708 Status : Beta | |
709 | |
710 =cut | |
711 | |
712 sub assembly_exception_type { | |
713 my ($self) = @_; | |
714 my $type = q{}; | |
715 if($self->is_reference()) { | |
716 $type = 'REF'; | |
717 } | |
718 else { | |
719 my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures(); | |
720 if(@{$assembly_exceptions}) { | |
721 $type = $assembly_exceptions->[0]->type(); | |
722 } | |
723 } | |
724 return $type; | |
725 } | |
726 | |
727 =head2 is_chromosome | |
728 | |
729 Example : print ($slice->is_chromosome()) ? 'I am a chromosome' : 'Not one'; | |
730 Description : Uses a number of rules known to indicate a chromosome region | |
731 other and takes into account those regions which can be | |
732 placed on a Chromsome coordinate system but in fact are not | |
733 assembled into one. | |
734 Returntype : Boolean indicates if the current object is a chromosome | |
735 Exceptions : None | |
736 | |
737 =cut | |
738 | |
739 sub is_chromosome { | |
740 my ($self) = @_; | |
741 my $coord_system = $self->coord_system->name; | |
742 my $seq_name = $self->seq_region_name; | |
743 | |
744 if (($seq_name =~ /random | |
745 |^Un\d{4}$ | |
746 |^Un\.\d{3}\.\d*$ | |
747 |E\d\d\w*$ | |
748 |_NT_ | |
749 |scaffold_ | |
750 |cutchr | |
751 |unplaced | |
752 |chunk | |
753 |clone | |
754 |contig | |
755 |genescaffold | |
756 |group | |
757 |reftig | |
758 |supercontig | |
759 |ultracontig | |
760 /x) or ( $coord_system !~ /^chromosome$/i )) { | |
761 return 0; | |
762 } | |
763 | |
764 return 1; | |
765 } | |
766 | |
767 | |
768 =head2 get_base_count | |
769 | |
770 Arg [1] : none | |
771 Example : $c_count = $slice->get_base_count->{'c'}; | |
772 Description: Retrieves a hashref containing the counts of each bases in the | |
773 sequence spanned by this slice. The format of the hash is : | |
774 { 'a' => num, | |
775 'c' => num, | |
776 't' => num, | |
777 'g' => num, | |
778 'n' => num, | |
779 '%gc' => num } | |
780 | |
781 All bases which are not in the set [A,a,C,c,T,t,G,g] are | |
782 included in the 'n' count. The 'n' count could therefore be | |
783 inclusive of ambiguity codes such as 'y'. | |
784 The %gc is the ratio of GC to AT content as in: | |
785 total(GC)/total(ACTG) * 100 | |
786 This function is conservative in its memory usage and scales to | |
787 work for entire chromosomes. | |
788 Returntype : hashref | |
789 Exceptions : none | |
790 Caller : general | |
791 Status : Stable | |
792 | |
793 =cut | |
794 | |
795 sub get_base_count { | |
796 my $self = shift; | |
797 | |
798 my $a = 0; | |
799 my $c = 0; | |
800 my $t = 0; | |
801 my $g = 0; | |
802 | |
803 my $start = 1; | |
804 my $end; | |
805 | |
806 my $RANGE = 100_000; | |
807 my $len = $self->length(); | |
808 | |
809 my $seq; | |
810 | |
811 while ( $start <= $len ) { | |
812 $end = $start + $RANGE - 1; | |
813 $end = $len if ( $end > $len ); | |
814 | |
815 $seq = $self->subseq( $start, $end ); | |
816 | |
817 $a += $seq =~ tr/Aa//; | |
818 $c += $seq =~ tr/Cc//; | |
819 $t += $seq =~ tr/Tt//; | |
820 $g += $seq =~ tr/Gg//; | |
821 | |
822 $start = $end + 1; | |
823 } | |
824 | |
825 my $actg = $a + $c + $t + $g; | |
826 | |
827 my $gc_content = 0; | |
828 if ( $actg > 0 ) { # Avoid dividing by 0 | |
829 $gc_content = sprintf( "%1.2f", ( ( $g + $c )/$actg )*100 ); | |
830 } | |
831 | |
832 return { 'a' => $a, | |
833 'c' => $c, | |
834 't' => $t, | |
835 'g' => $g, | |
836 'n' => $len - $actg, | |
837 '%gc' => $gc_content }; | |
838 } | |
839 | |
840 | |
841 | |
842 =head2 project | |
843 | |
844 Arg [1] : string $name | |
845 The name of the coordinate system to project this slice onto | |
846 Arg [2] : string $version | |
847 The version of the coordinate system (such as 'NCBI34') to | |
848 project this slice onto | |
849 Example : | |
850 my $clone_projection = $slice->project('clone'); | |
851 | |
852 foreach my $seg (@$clone_projection) { | |
853 my $clone = $segment->to_Slice(); | |
854 print $slice->seq_region_name(), ':', $seg->from_start(), '-', | |
855 $seg->from_end(), ' -> ', | |
856 $clone->seq_region_name(), ':', $clone->start(), '-',$clone->end(), | |
857 $clone->strand(), "\n"; | |
858 } | |
859 Description: Returns the results of 'projecting' this slice onto another | |
860 coordinate system. Projecting to a coordinate system that | |
861 the slice is assembled from is analagous to retrieving a tiling | |
862 path. This method may also be used to 'project up' to a higher | |
863 level coordinate system, however. | |
864 | |
865 This method returns a listref of triplets [start,end,slice] | |
866 which represents the projection. The start and end defined the | |
867 region of this slice which is made up of the third value of | |
868 the triplet: a slice in the requested coordinate system. | |
869 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which | |
870 can also be used as [$start,$end,$slice] triplets | |
871 Exceptions : none | |
872 Caller : general | |
873 Status : Stable | |
874 | |
875 =cut | |
876 | |
877 sub project { | |
878 my $self = shift; | |
879 my $cs_name = shift; | |
880 my $cs_version = shift; | |
881 | |
882 throw('Coord_system name argument is required') if(!$cs_name); | |
883 | |
884 my $slice_adaptor = $self->adaptor(); | |
885 | |
886 if(!$slice_adaptor) { | |
887 warning("Cannot project without attached adaptor."); | |
888 return []; | |
889 } | |
890 | |
891 if(!$self->coord_system()) { | |
892 warning("Cannot project without attached coord system."); | |
893 return []; | |
894 } | |
895 | |
896 | |
897 my $db = $slice_adaptor->db(); | |
898 my $csa = $db->get_CoordSystemAdaptor(); | |
899 my $cs = $csa->fetch_by_name($cs_name, $cs_version); | |
900 my $slice_cs = $self->coord_system(); | |
901 | |
902 if(!$cs) { | |
903 throw("Cannot project to unknown coordinate system " . | |
904 "[$cs_name $cs_version]"); | |
905 } | |
906 | |
907 # no mapping is needed if the requested coord system is the one we are in | |
908 # but we do need to check if some of the slice is outside of defined regions | |
909 if($slice_cs->equals($cs)) { | |
910 return $self->_constrain_to_region(); | |
911 } | |
912 | |
913 my @projection; | |
914 my $current_start = 1; | |
915 | |
916 # decompose this slice into its symlinked components. | |
917 # this allows us to handle haplotypes and PARs | |
918 my $normal_slice_proj = | |
919 $slice_adaptor->fetch_normalized_slice_projection($self); | |
920 foreach my $segment (@$normal_slice_proj) { | |
921 my $normal_slice = $segment->[2]; | |
922 | |
923 $slice_cs = $normal_slice->coord_system(); | |
924 | |
925 my $asma = $db->get_AssemblyMapperAdaptor(); | |
926 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs); | |
927 | |
928 # perform the mapping between this slice and the requested system | |
929 my @coords; | |
930 | |
931 if( defined $asm_mapper ) { | |
932 @coords = $asm_mapper->map($normal_slice->seq_region_name(), | |
933 $normal_slice->start(), | |
934 $normal_slice->end(), | |
935 $normal_slice->strand(), | |
936 $slice_cs); | |
937 } else { | |
938 $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(), | |
939 $normal_slice->end()); | |
940 } | |
941 | |
942 | |
943 # my $last_rank = 0; | |
944 #construct a projection from the mapping results and return it | |
945 foreach my $coord (@coords) { | |
946 my $coord_start = $coord->start(); | |
947 my $coord_end = $coord->end(); | |
948 my $length = $coord_end - $coord_start + 1; | |
949 | |
950 if ( $coord_start > $coord_end ) { | |
951 $length = | |
952 $normal_slice->seq_region_length() - | |
953 $coord_start + | |
954 $coord_end + 1; | |
955 } | |
956 | |
957 # if( $last_rank != $coord->rank){ | |
958 # $current_start = 1; | |
959 # print "LAST rank has changed to ".$coord->rank."from $last_rank \n"; | |
960 # } | |
961 # $last_rank = $coord->rank; | |
962 | |
963 #skip gaps | |
964 if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { | |
965 | |
966 my $coord_cs = $coord->coord_system(); | |
967 | |
968 # If the normalised projection just ended up mapping to the | |
969 # same coordinate system we were already in then we should just | |
970 # return the original region. This can happen for example, if we | |
971 # were on a PAR region on Y which refered to X and a projection to | |
972 # 'toplevel' was requested. | |
973 if($coord_cs->equals($slice_cs)) { | |
974 # trim off regions which are not defined | |
975 return $self->_constrain_to_region(); | |
976 } | |
977 #create slices for the mapped-to coord system | |
978 my $slice = $slice_adaptor->fetch_by_seq_region_id( | |
979 $coord->id(), | |
980 $coord_start, | |
981 $coord_end, | |
982 $coord->strand()); | |
983 | |
984 my $current_end = $current_start + $length - 1; | |
985 | |
986 if ($current_end > $slice->seq_region_length() && $slice->is_circular ) { | |
987 $current_end -= $slice->seq_region_length(); | |
988 } | |
989 | |
990 push @projection, bless([$current_start, $current_end, $slice], | |
991 "Bio::EnsEMBL::ProjectionSegment"); | |
992 } | |
993 | |
994 $current_start += $length; | |
995 } | |
996 } | |
997 | |
998 return \@projection; | |
999 } | |
1000 | |
1001 | |
1002 sub _constrain_to_region { | |
1003 my $self = shift; | |
1004 | |
1005 my $entire_len = $self->seq_region_length(); | |
1006 | |
1007 #if the slice has negative coordinates or coordinates exceeding the | |
1008 #exceeding length of the sequence region we want to shrink the slice to | |
1009 #the defined region | |
1010 | |
1011 if($self->{'start'} > $entire_len || $self->{'end'} < 1) { | |
1012 #none of this slice is in a defined region | |
1013 return []; | |
1014 } | |
1015 | |
1016 my $right_contract = 0; | |
1017 my $left_contract = 0; | |
1018 if($self->{'end'} > $entire_len) { | |
1019 $right_contract = $entire_len - $self->{'end'}; | |
1020 } | |
1021 if($self->{'start'} < 1) { | |
1022 $left_contract = $self->{'start'} - 1; | |
1023 } | |
1024 | |
1025 my $new_slice; | |
1026 if($left_contract || $right_contract) { | |
1027 #if slice in negative strand, need to swap contracts | |
1028 if ($self->strand == 1) { | |
1029 $new_slice = $self->expand($left_contract, $right_contract); | |
1030 } | |
1031 elsif ($self->strand == -1) { | |
1032 $new_slice = $self->expand($right_contract, $left_contract); | |
1033 } | |
1034 } else { | |
1035 $new_slice = $self; | |
1036 } | |
1037 | |
1038 return [bless [1-$left_contract, $self->length()+$right_contract, | |
1039 $new_slice], "Bio::EnsEMBL::ProjectionSegment" ]; | |
1040 } | |
1041 | |
1042 | |
1043 =head2 expand | |
1044 | |
1045 Arg [1] : (optional) int $five_prime_expand | |
1046 The number of basepairs to shift this slices five_prime | |
1047 coordinate by. Positive values make the slice larger, | |
1048 negative make the slice smaller. | |
1049 coordinate left. | |
1050 Default = 0. | |
1051 Arg [2] : (optional) int $three_prime_expand | |
1052 The number of basepairs to shift this slices three_prime | |
1053 coordinate by. Positive values make the slice larger, | |
1054 negative make the slice smaller. | |
1055 Default = 0. | |
1056 Arg [3] : (optional) bool $force_expand | |
1057 if set to 1, then the slice will be contracted even in the case | |
1058 when shifts $five_prime_expand and $three_prime_expand overlap. | |
1059 In that case $five_prime_expand and $three_prime_expand will be set | |
1060 to a maximum possible number and that will result in the slice | |
1061 which would have only 2pbs. | |
1062 Default = 0. | |
1063 Arg [4] : (optional) int* $fpref | |
1064 The reference to a number of basepairs to shift this slices five_prime | |
1065 coordinate by. Normally it would be set to $five_prime_expand. | |
1066 But in case when $five_prime_expand shift can not be applied and | |
1067 $force_expand is set to 1, then $$fpref will contain the maximum possible | |
1068 shift | |
1069 Arg [5] : (optional) int* $tpref | |
1070 The reference to a number of basepairs to shift this slices three_prime | |
1071 coordinate by. Normally it would be set to $three_prime_expand. | |
1072 But in case when $five_prime_expand shift can not be applied and | |
1073 $force_expand is set to 1, then $$tpref will contain the maximum possible | |
1074 shift | |
1075 Example : my $expanded_slice = $slice->expand( 1000, 1000); | |
1076 my $contracted_slice = $slice->expand(-1000,-1000); | |
1077 my $shifted_right_slice = $slice->expand(-1000, 1000); | |
1078 my $shifted_left_slice = $slice->expand( 1000,-1000); | |
1079 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift); | |
1080 | |
1081 Description: Returns a slice which is a resized copy of this slice. The | |
1082 start and end are moved outwards from the center of the slice | |
1083 if positive values are provided and moved inwards if negative | |
1084 values are provided. This slice remains unchanged. A slice | |
1085 may not be contracted below 1bp but may grow to be arbitrarily | |
1086 large. | |
1087 Returntype : Bio::EnsEMBL::Slice | |
1088 Exceptions : warning if an attempt is made to contract the slice below 1bp | |
1089 Caller : general | |
1090 Status : Stable | |
1091 | |
1092 =cut | |
1093 | |
1094 sub expand { | |
1095 my $self = shift; | |
1096 my $five_prime_shift = shift || 0; | |
1097 my $three_prime_shift = shift || 0; | |
1098 my $force_expand = shift || 0; | |
1099 my $fpref = shift; | |
1100 my $tpref = shift; | |
1101 | |
1102 if ( $self->{'seq'} ) { | |
1103 warning( | |
1104 "Cannot expand a slice which has a manually attached sequence "); | |
1105 return undef; | |
1106 } | |
1107 | |
1108 my $sshift = $five_prime_shift; | |
1109 my $eshift = $three_prime_shift; | |
1110 | |
1111 if ( $self->{'strand'} != 1 ) { | |
1112 $eshift = $five_prime_shift; | |
1113 $sshift = $three_prime_shift; | |
1114 } | |
1115 | |
1116 my $new_start = $self->{'start'} - $sshift; | |
1117 my $new_end = $self->{'end'} + $eshift; | |
1118 | |
1119 if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0 || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) { | |
1120 | |
1121 if ( $new_start <= 0 ) { | |
1122 $new_start = $self->seq_region_length() + $new_start; | |
1123 } | |
1124 if ( $new_start > $self->seq_region_length() ) { | |
1125 $new_start -= $self->seq_region_length(); | |
1126 } | |
1127 | |
1128 if ( $new_end <= 0 ) { | |
1129 $new_end = $self->seq_region_length() + $new_end; | |
1130 } | |
1131 if ( $new_end > $self->seq_region_length() ) { | |
1132 $new_end -= $self->seq_region_length(); | |
1133 } | |
1134 | |
1135 } | |
1136 | |
1137 if ( $new_start > $new_end && (not $self->is_circular() ) ) { | |
1138 | |
1139 if ($force_expand) { | |
1140 # Apply max possible shift, if force_expand is set | |
1141 if ( $sshift < 0 ) { | |
1142 # if we are contracting the slice from the start - move the | |
1143 # start just before the end | |
1144 $new_start = $new_end - 1; | |
1145 $sshift = $self->{start} - $new_start; | |
1146 } | |
1147 | |
1148 if ( $new_start > $new_end ) { | |
1149 # if the slice still has a negative length - try to move the | |
1150 # end | |
1151 if ( $eshift < 0 ) { | |
1152 $new_end = $new_start + 1; | |
1153 $eshift = $new_end - $self->{end}; | |
1154 } | |
1155 } | |
1156 # return the values by which the primes were actually shifted | |
1157 $$tpref = $self->{strand} == 1 ? $eshift : $sshift; | |
1158 $$fpref = $self->{strand} == 1 ? $sshift : $eshift; | |
1159 } | |
1160 if ( $new_start > $new_end ) { | |
1161 throw('Slice start cannot be greater than slice end'); | |
1162 } | |
1163 } | |
1164 | |
1165 #fastest way to copy a slice is to do a shallow hash copy | |
1166 my %new_slice = %$self; | |
1167 $new_slice{'start'} = int($new_start); | |
1168 $new_slice{'end'} = int($new_end); | |
1169 | |
1170 return bless \%new_slice, ref($self); | |
1171 } ## end sub expand | |
1172 | |
1173 | |
1174 | |
1175 =head2 sub_Slice | |
1176 | |
1177 Arg 1 : int $start | |
1178 Arg 2 : int $end | |
1179 Arge [3] : int $strand | |
1180 Example : none | |
1181 Description: Makes another Slice that covers only part of this slice | |
1182 If a slice is requested which lies outside of the boundaries | |
1183 of this function will return undef. This means that | |
1184 behaviour will be consistant whether or not the slice is | |
1185 attached to the database (i.e. if there is attached sequence | |
1186 to the slice). Alternatively the expand() method or the | |
1187 SliceAdaptor::fetch_by_region method can be used instead. | |
1188 Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong | |
1189 Exceptions : none | |
1190 Caller : general | |
1191 Status : Stable | |
1192 | |
1193 =cut | |
1194 | |
1195 sub sub_Slice { | |
1196 my ( $self, $start, $end, $strand ) = @_; | |
1197 | |
1198 if( $start < 1 || $start > $self->{'end'} ) { | |
1199 # throw( "start argument not valid" ); | |
1200 return undef; | |
1201 } | |
1202 | |
1203 if( $end < $start || $end > $self->{'end'} ) { | |
1204 # throw( "end argument not valid" ) | |
1205 return undef; | |
1206 } | |
1207 | |
1208 my ( $new_start, $new_end, $new_strand, $new_seq ); | |
1209 if( ! defined $strand ) { | |
1210 $strand = 1; | |
1211 } | |
1212 | |
1213 if( $self->{'strand'} == 1 ) { | |
1214 $new_start = $self->{'start'} + $start - 1; | |
1215 $new_end = $self->{'start'} + $end - 1; | |
1216 $new_strand = $strand; | |
1217 } else { | |
1218 $new_start = $self->{'end'} - $end + 1;; | |
1219 $new_end = $self->{'end'} - $start + 1; | |
1220 $new_strand = -$strand; | |
1221 } | |
1222 | |
1223 if( defined $self->{'seq'} ) { | |
1224 $new_seq = $self->subseq( $start, $end, $strand ); | |
1225 } | |
1226 | |
1227 #fastest way to copy a slice is to do a shallow hash copy | |
1228 my $new_slice = {%{$self}}; | |
1229 $new_slice->{'start'} = int($new_start); | |
1230 $new_slice->{'end'} = int($new_end); | |
1231 $new_slice->{'strand'} = $new_strand; | |
1232 if( $new_seq ) { | |
1233 $new_slice->{'seq'} = $new_seq; | |
1234 } | |
1235 weaken($new_slice->{adaptor}); | |
1236 | |
1237 return bless $new_slice, ref($self); | |
1238 } | |
1239 | |
1240 | |
1241 | |
1242 =head2 seq_region_Slice | |
1243 | |
1244 Arg [1] : none | |
1245 Example : $slice = $slice->seq_region_Slice(); | |
1246 Description: Returns a slice which spans the whole seq_region which this slice | |
1247 is on. For example if this is a slice which spans a small region | |
1248 of chromosome X, this method will return a slice which covers the | |
1249 entire chromosome X. The returned slice will always have strand | |
1250 of 1 and start of 1. This method cannot be used if the sequence | |
1251 of the slice has been set manually. | |
1252 Returntype : Bio::EnsEMBL::Slice | |
1253 Exceptions : warning if called when sequence of Slice has been set manually. | |
1254 Caller : general | |
1255 Status : Stable | |
1256 | |
1257 =cut | |
1258 | |
1259 sub seq_region_Slice { | |
1260 my $self = shift; | |
1261 | |
1262 if($self->{'seq'}){ | |
1263 warning("Cannot get a seq_region_Slice of a slice which has manually ". | |
1264 "attached sequence "); | |
1265 return undef; | |
1266 } | |
1267 | |
1268 # quick shallow copy | |
1269 my $slice; | |
1270 %{$slice} = %{$self}; | |
1271 bless $slice, ref($self); | |
1272 weaken($slice->{adaptor}); | |
1273 | |
1274 $slice->{'start'} = 1; | |
1275 $slice->{'end'} = $slice->{'seq_region_length'}; | |
1276 $slice->{'strand'} = 1; | |
1277 | |
1278 return $slice; | |
1279 } | |
1280 | |
1281 | |
1282 =head2 get_seq_region_id | |
1283 | |
1284 Arg [1] : none | |
1285 Example : my $seq_region_id = $slice->get_seq_region_id(); | |
1286 Description: Gets the internal identifier of the seq_region that this slice | |
1287 is on. Note that this function will not work correctly if this | |
1288 slice does not have an attached adaptor. Also note that it may | |
1289 be better to go through the SliceAdaptor::get_seq_region_id | |
1290 method if you are working with multiple databases since is | |
1291 possible to work with slices from databases with different | |
1292 internal seq_region identifiers. | |
1293 Returntype : int or undef if slices does not have attached adaptor | |
1294 Exceptions : warning if slice is not associated with a SliceAdaptor | |
1295 Caller : assembly loading scripts, general | |
1296 Status : Stable | |
1297 | |
1298 =cut | |
1299 | |
1300 sub get_seq_region_id { | |
1301 my ($self) = @_; | |
1302 | |
1303 if($self->adaptor) { | |
1304 return $self->adaptor->get_seq_region_id($self); | |
1305 } else { | |
1306 warning('Cannot retrieve seq_region_id without attached adaptor.'); | |
1307 return undef; | |
1308 } | |
1309 } | |
1310 | |
1311 | |
1312 =head2 get_all_Attributes | |
1313 | |
1314 Arg [1] : optional string $attrib_code | |
1315 The code of the attribute type to retrieve values for. | |
1316 Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')}; | |
1317 @slice_attributes = @{$slice->get_all_Attributes()}; | |
1318 Description: Gets a list of Attributes of this slice''s seq_region. | |
1319 Optionally just get Attrubutes for given code. | |
1320 Returntype : listref Bio::EnsEMBL::Attribute | |
1321 Exceptions : warning if slice does not have attached adaptor | |
1322 Caller : general | |
1323 Status : Stable | |
1324 | |
1325 =cut | |
1326 | |
1327 sub get_all_Attributes { | |
1328 my $self = shift; | |
1329 my $attrib_code = shift; | |
1330 | |
1331 my $result; | |
1332 my @results; | |
1333 | |
1334 if(!$self->adaptor()) { | |
1335 warning('Cannot get attributes without an adaptor.'); | |
1336 return []; | |
1337 } | |
1338 | |
1339 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor(); | |
1340 | |
1341 if( defined $attrib_code ) { | |
1342 @results = grep { uc($_->code()) eq uc($attrib_code) } | |
1343 @{$attribute_adaptor->fetch_all_by_Slice( $self )}; | |
1344 $result = \@results; | |
1345 } else { | |
1346 $result = $attribute_adaptor->fetch_all_by_Slice( $self ); | |
1347 } | |
1348 | |
1349 return $result; | |
1350 } | |
1351 | |
1352 | |
1353 =head2 get_all_PredictionTranscripts | |
1354 | |
1355 Arg [1] : (optional) string $logic_name | |
1356 The name of the analysis used to generate the prediction | |
1357 transcripts obtained. | |
1358 Arg [2] : (optional) boolean $load_exons | |
1359 If set to true will force loading of all PredictionExons | |
1360 immediately rather than loading them on demand later. This | |
1361 is faster if there are a large number of PredictionTranscripts | |
1362 and the exons will be used. | |
1363 Example : @transcripts = @{$slice->get_all_PredictionTranscripts}; | |
1364 Description: Retrieves the list of prediction transcripts which overlap | |
1365 this slice with logic_name $logic_name. If logic_name is | |
1366 not defined then all prediction transcripts are retrieved. | |
1367 Returntype : listref of Bio::EnsEMBL::PredictionTranscript | |
1368 Exceptions : warning if slice does not have attached adaptor | |
1369 Caller : none | |
1370 Status : Stable | |
1371 | |
1372 =cut | |
1373 | |
1374 sub get_all_PredictionTranscripts { | |
1375 my ($self,$logic_name, $load_exons) = @_; | |
1376 | |
1377 if(!$self->adaptor()) { | |
1378 warning('Cannot get PredictionTranscripts without attached adaptor'); | |
1379 return []; | |
1380 } | |
1381 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor(); | |
1382 return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons); | |
1383 } | |
1384 | |
1385 | |
1386 | |
1387 =head2 get_all_DnaAlignFeatures | |
1388 | |
1389 Arg [1] : (optional) string $logic_name | |
1390 The name of the analysis performed on the dna align features | |
1391 to obtain. | |
1392 Arg [2] : (optional) float $score | |
1393 The mimimum score of the features to retrieve | |
1394 Arg [3] : (optional) string $dbtype | |
1395 The name of an attached database to retrieve the features from | |
1396 instead, e.g. 'otherfeatures'. | |
1397 Arg [4] : (optional) float hcoverage | |
1398 The minimum hcoverage od the featurs to retrieve | |
1399 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures}; | |
1400 Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with | |
1401 logic name $logic_name and with score above $score. If | |
1402 $logic_name is not defined features of all logic names are | |
1403 retrieved. If $score is not defined features of all scores are | |
1404 retrieved. | |
1405 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures | |
1406 Exceptions : warning if slice does not have attached adaptor | |
1407 Caller : general | |
1408 Status : Stable | |
1409 | |
1410 =cut | |
1411 | |
1412 sub get_all_DnaAlignFeatures { | |
1413 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_; | |
1414 | |
1415 if(!$self->adaptor()) { | |
1416 warning('Cannot get DnaAlignFeatures without attached adaptor'); | |
1417 return []; | |
1418 } | |
1419 | |
1420 my $db; | |
1421 | |
1422 if($dbtype) { | |
1423 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1424 if(!$db) { | |
1425 warning("Don't have db $dbtype returning empty list\n"); | |
1426 return []; | |
1427 } | |
1428 } else { | |
1429 $db = $self->adaptor->db; | |
1430 } | |
1431 | |
1432 my $dafa = $db->get_DnaAlignFeatureAdaptor(); | |
1433 | |
1434 if(defined($score) and defined ($hcoverage)){ | |
1435 warning "cannot specify score and hcoverage. Using score only"; | |
1436 } | |
1437 if(defined($score)){ | |
1438 return $dafa->fetch_all_by_Slice_and_score($self,$score, $logic_name); | |
1439 } | |
1440 return $dafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name); | |
1441 } | |
1442 | |
1443 | |
1444 | |
1445 =head2 get_all_ProteinAlignFeatures | |
1446 | |
1447 Arg [1] : (optional) string $logic_name | |
1448 The name of the analysis performed on the protein align features | |
1449 to obtain. | |
1450 Arg [2] : (optional) float $score | |
1451 The mimimum score of the features to retrieve | |
1452 Arg [3] : (optional) string $dbtype | |
1453 The name of an attached database to retrieve features from | |
1454 instead. | |
1455 Arg [4] : (optional) float hcoverage | |
1456 The minimum hcoverage od the featurs to retrieve | |
1457 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures}; | |
1458 Description: Retrieves the DnaPepAlignFeatures which overlap this slice with | |
1459 logic name $logic_name and with score above $score. If | |
1460 $logic_name is not defined features of all logic names are | |
1461 retrieved. If $score is not defined features of all scores are | |
1462 retrieved. | |
1463 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures | |
1464 Exceptions : warning if slice does not have attached adaptor | |
1465 Caller : general | |
1466 Status : Stable | |
1467 | |
1468 =cut | |
1469 | |
1470 sub get_all_ProteinAlignFeatures { | |
1471 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_; | |
1472 | |
1473 if(!$self->adaptor()) { | |
1474 warning('Cannot get ProteinAlignFeatures without attached adaptor'); | |
1475 return []; | |
1476 } | |
1477 | |
1478 my $db; | |
1479 | |
1480 if($dbtype) { | |
1481 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1482 if(!$db) { | |
1483 warning("Don't have db $dbtype returning empty list\n"); | |
1484 return []; | |
1485 } | |
1486 } else { | |
1487 $db = $self->adaptor->db; | |
1488 } | |
1489 | |
1490 my $pafa = $db->get_ProteinAlignFeatureAdaptor(); | |
1491 | |
1492 if(defined($score) and defined ($hcoverage)){ | |
1493 warning "cannot specify score and hcoverage. Using score only"; | |
1494 } | |
1495 if(defined($score)){ | |
1496 return $pafa->fetch_all_by_Slice_and_score($self,$score, $logic_name); | |
1497 } | |
1498 return $pafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name); | |
1499 | |
1500 } | |
1501 | |
1502 | |
1503 | |
1504 =head2 get_all_SimilarityFeatures | |
1505 | |
1506 Arg [1] : (optional) string $logic_name | |
1507 the name of the analysis performed on the features to retrieve | |
1508 Arg [2] : (optional) float $score | |
1509 the lower bound of the score of the features to be retrieved | |
1510 Example : @feats = @{$slice->get_all_SimilarityFeatures}; | |
1511 Description: Retrieves all dna_align_features and protein_align_features | |
1512 with analysis named $logic_name and with score above $score. | |
1513 It is probably faster to use get_all_ProteinAlignFeatures or | |
1514 get_all_DnaAlignFeatures if a sepcific feature type is desired. | |
1515 If $logic_name is not defined features of all logic names are | |
1516 retrieved. If $score is not defined features of all scores are | |
1517 retrieved. | |
1518 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures | |
1519 Exceptions : warning if slice does not have attached adaptor | |
1520 Caller : general | |
1521 Status : Stable | |
1522 | |
1523 =cut | |
1524 | |
1525 sub get_all_SimilarityFeatures { | |
1526 my ($self, $logic_name, $score) = @_; | |
1527 | |
1528 my @out = (); | |
1529 | |
1530 push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) }; | |
1531 push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) }; | |
1532 | |
1533 return \@out; | |
1534 } | |
1535 | |
1536 | |
1537 | |
1538 =head2 get_all_SimpleFeatures | |
1539 | |
1540 Arg [1] : (optional) string $logic_name | |
1541 The name of the analysis performed on the simple features | |
1542 to obtain. | |
1543 Arg [2] : (optional) float $score | |
1544 The mimimum score of the features to retrieve | |
1545 Example : @simple_feats = @{$slice->get_all_SimpleFeatures}; | |
1546 Description: Retrieves the SimpleFeatures which overlap this slice with | |
1547 logic name $logic_name and with score above $score. If | |
1548 $logic_name is not defined features of all logic names are | |
1549 retrieved. If $score is not defined features of all scores are | |
1550 retrieved. | |
1551 Returntype : listref of Bio::EnsEMBL::SimpleFeatures | |
1552 Exceptions : warning if slice does not have attached adaptor | |
1553 Caller : general | |
1554 Status : Stable | |
1555 | |
1556 =cut | |
1557 | |
1558 sub get_all_SimpleFeatures { | |
1559 my ($self, $logic_name, $score, $dbtype) = @_; | |
1560 | |
1561 if(!$self->adaptor()) { | |
1562 warning('Cannot get SimpleFeatures without attached adaptor'); | |
1563 return []; | |
1564 } | |
1565 | |
1566 my $db; | |
1567 if($dbtype) { | |
1568 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1569 if(!$db) { | |
1570 warning("Don't have db $dbtype returning empty list\n"); | |
1571 return []; | |
1572 } | |
1573 } else { | |
1574 $db = $self->adaptor->db; | |
1575 } | |
1576 | |
1577 my $sfa = $db->get_SimpleFeatureAdaptor(); | |
1578 | |
1579 return $sfa->fetch_all_by_Slice_and_score($self, $score, $logic_name); | |
1580 } | |
1581 | |
1582 | |
1583 | |
1584 =head2 get_all_RepeatFeatures | |
1585 | |
1586 Arg [1] : (optional) string $logic_name | |
1587 The name of the analysis performed on the repeat features | |
1588 to obtain. | |
1589 Arg [2] : (optional) string/array $repeat_type | |
1590 Limits features returned to those of the specified | |
1591 repeat_type. Can specify a single value or an array reference | |
1592 to limit by more than one | |
1593 Arg [3] : (optional) string $db | |
1594 Key for database e.g. core/vega/cdna/.... | |
1595 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'Type II Transposons')}; | |
1596 Description: Retrieves the RepeatFeatures which overlap with | |
1597 logic name $logic_name and with score above $score. If | |
1598 $logic_name is not defined features of all logic names are | |
1599 retrieved. | |
1600 Returntype : listref of Bio::EnsEMBL::RepeatFeatures | |
1601 Exceptions : warning if slice does not have attached adaptor | |
1602 Caller : general | |
1603 Status : Stable | |
1604 | |
1605 =cut | |
1606 | |
1607 sub get_all_RepeatFeatures { | |
1608 my ($self, $logic_name, $repeat_type, $dbtype) = @_; | |
1609 | |
1610 if(!$self->adaptor()) { | |
1611 warning('Cannot get RepeatFeatures without attached adaptor'); | |
1612 return []; | |
1613 } | |
1614 | |
1615 my $db; | |
1616 if($dbtype) { | |
1617 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1618 if(!$db) { | |
1619 warning("Don't have db $dbtype returning empty list\n"); | |
1620 return []; | |
1621 } | |
1622 } else { | |
1623 $db = $self->adaptor->db; | |
1624 } | |
1625 | |
1626 my $rpfa = $db->get_RepeatFeatureAdaptor(); | |
1627 | |
1628 return $rpfa->fetch_all_by_Slice($self, $logic_name, $repeat_type); | |
1629 } | |
1630 | |
1631 =head2 get_all_LD_values | |
1632 | |
1633 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population | |
1634 Description : returns all LD values on this slice. This function will only work correctly if the variation | |
1635 database has been attached to the core database. If the argument is passed, will return the LD information | |
1636 in that population | |
1637 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer | |
1638 Exceptions : none | |
1639 Caller : contigview, snpview | |
1640 Status : Stable | |
1641 | |
1642 =cut | |
1643 | |
1644 sub get_all_LD_values{ | |
1645 my $self = shift; | |
1646 my $population = shift; | |
1647 | |
1648 | |
1649 if(!$self->adaptor()) { | |
1650 warning('Cannot get LDFeatureContainer without attached adaptor'); | |
1651 return []; | |
1652 } | |
1653 | |
1654 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
1655 | |
1656 unless($variation_db) { | |
1657 warning("Variation database must be attached to core database to " . | |
1658 "retrieve variation information" ); | |
1659 return []; | |
1660 } | |
1661 | |
1662 my $ld_adaptor = $variation_db->get_LDFeatureContainerAdaptor; | |
1663 | |
1664 if( $ld_adaptor ) { | |
1665 return $ld_adaptor->fetch_by_Slice($self,$population); | |
1666 } else { | |
1667 return []; | |
1668 | |
1669 } | |
1670 | |
1671 # my $ld_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(-species => $self->adaptor()->db()->species, -type => "LDFeatureContainer"); | |
1672 | |
1673 # if( $ld_adaptor ) { | |
1674 # my $ld_values = $ld_adaptor->fetch_by_Slice($self,$population); | |
1675 # if (@{$ld_values} > 1){ | |
1676 # warning("More than 1 variation database attached. Trying to merge LD results"); | |
1677 # my $ld_value_merged = shift @{$ld_values}; | |
1678 # #with more than 1 variation database attached, will try to merge in one single LDContainer object. | |
1679 # foreach my $ld (@{$ld_values}){ | |
1680 # #copy the ld values to the result hash | |
1681 # foreach my $key (keys %{$ld->{'ldContainer'}}){ | |
1682 # $ld_value_merged->{'ldContainer'}->{$key} = $ld->{'ldContainer'}->{$key}; | |
1683 # } | |
1684 # #and copy the variationFeatures as well | |
1685 # foreach my $key (keys %{$ld->{'variationFeatures'}}){ | |
1686 # $ld_value_merged->{'variationFeatures'}->{$key} = $ld->{'variationFeatures'}->{$key}; | |
1687 # } | |
1688 | |
1689 # } | |
1690 # return $ld_value_merged; | |
1691 # } | |
1692 # else{ | |
1693 # return shift @{$ld_values}; | |
1694 # } | |
1695 # } else { | |
1696 # warning("Variation database must be attached to core database to " . | |
1697 # "retrieve variation information" ); | |
1698 # return []; | |
1699 # } | |
1700 } | |
1701 | |
1702 sub _get_VariationFeatureAdaptor { | |
1703 | |
1704 my $self = shift; | |
1705 | |
1706 if(!$self->adaptor()) { | |
1707 warning('Cannot get variation features without attached adaptor'); | |
1708 return undef; | |
1709 } | |
1710 | |
1711 my $vf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new( | |
1712 -species => $self->adaptor()->db()->species, | |
1713 -type => "VariationFeature" | |
1714 ); | |
1715 | |
1716 if( $vf_adaptor ) { | |
1717 return $vf_adaptor; | |
1718 } | |
1719 else { | |
1720 warning("Variation database must be attached to core database to " . | |
1721 "retrieve variation information" ); | |
1722 | |
1723 return undef; | |
1724 } | |
1725 } | |
1726 | |
1727 =head2 get_all_VariationFeatures | |
1728 Args : $so_terms [optional] - list of so_terms to limit the fetch to | |
1729 Description : Returns all germline variation features on this slice. This function will | |
1730 only work correctly if the variation database has been attached to the core | |
1731 database. | |
1732 If $so_terms is specified, only variation features with a consequence type | |
1733 that matches or is an ontological child of any of the supplied terms will | |
1734 be returned | |
1735 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
1736 Exceptions : none | |
1737 Caller : contigview, snpview | |
1738 Status : Stable | |
1739 | |
1740 =cut | |
1741 | |
1742 sub get_all_VariationFeatures{ | |
1743 my $self = shift; | |
1744 | |
1745 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) { | |
1746 return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, @_); | |
1747 } | |
1748 else { | |
1749 return []; | |
1750 } | |
1751 } | |
1752 | |
1753 =head2 get_all_somatic_VariationFeatures | |
1754 | |
1755 Args : $filter [optional] | |
1756 Description : Returns all somatic variation features on this slice. This function will only | |
1757 work correctly if the variation database has been attached to the core database. | |
1758 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
1759 Exceptions : none | |
1760 Status : Stable | |
1761 | |
1762 =cut | |
1763 | |
1764 sub get_all_somatic_VariationFeatures { | |
1765 my $self = shift; | |
1766 | |
1767 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) { | |
1768 return $vf_adaptor->fetch_all_somatic_by_Slice($self); | |
1769 } | |
1770 else{ | |
1771 return []; | |
1772 } | |
1773 } | |
1774 | |
1775 =head2 get_all_VariationFeatures_with_annotation | |
1776 | |
1777 Arg [1] : $variation_feature_source [optional] | |
1778 Arg [2] : $annotation_source [optional] | |
1779 Arg [3] : $annotation_name [optional] | |
1780 Description : returns all germline variation features on this slice associated with a phenotype. | |
1781 This function will only work correctly if the variation database has been | |
1782 attached to the core database. | |
1783 If $variation_feature_source is set only variations from that source | |
1784 are retrieved. | |
1785 If $annotation_source is set only variations whose annotations come from | |
1786 $annotation_source will be retrieved. | |
1787 If $annotation_name is set only variations with that annotation will be retrieved. | |
1788 $annotation_name can be a phenotype's internal dbID. | |
1789 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
1790 Exceptions : none | |
1791 Caller : contigview, snpview | |
1792 Status : Stable | |
1793 | |
1794 =cut | |
1795 | |
1796 sub get_all_VariationFeatures_with_annotation{ | |
1797 my $self = shift; | |
1798 my $source = shift; | |
1799 my $p_source = shift; | |
1800 my $annotation = shift; | |
1801 | |
1802 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) { | |
1803 return $vf_adaptor->fetch_all_with_annotation_by_Slice($self, $source, $p_source, $annotation); | |
1804 } | |
1805 else { | |
1806 return []; | |
1807 } | |
1808 } | |
1809 | |
1810 =head2 get_all_somatic_VariationFeatures_with_annotation | |
1811 | |
1812 Arg [1] : $variation_feature_source [optional] | |
1813 Arg [2] : $annotation_source [optional] | |
1814 Arg [3] : $annotation_name [optional] | |
1815 Description : returns all somatic variation features on this slice associated with a phenotype. | |
1816 (see get_all_VariationFeatures_with_annotation for further documentation) | |
1817 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
1818 Exceptions : none | |
1819 Status : Stable | |
1820 | |
1821 =cut | |
1822 | |
1823 sub get_all_somatic_VariationFeatures_with_annotation{ | |
1824 my $self = shift; | |
1825 my $source = shift; | |
1826 my $p_source = shift; | |
1827 my $annotation = shift; | |
1828 | |
1829 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) { | |
1830 return $vf_adaptor->fetch_all_somatic_with_annotation_by_Slice($self, $source, $p_source, $annotation); | |
1831 } | |
1832 else { | |
1833 return [] unless $vf_adaptor; | |
1834 } | |
1835 } | |
1836 | |
1837 =head2 get_all_VariationFeatures_by_VariationSet | |
1838 | |
1839 Arg [1] : Bio::EnsEMBL:Variation::VariationSet $set | |
1840 Description :returns all variation features on this slice associated with a given set. | |
1841 This function will only work correctly if the variation database has been | |
1842 attached to the core database. | |
1843 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
1844 Exceptions : none | |
1845 Caller : contigview, snpview | |
1846 Status : Stable | |
1847 | |
1848 =cut | |
1849 | |
1850 sub get_all_VariationFeatures_by_VariationSet { | |
1851 my $self = shift; | |
1852 my $set = shift; | |
1853 | |
1854 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) { | |
1855 return $vf_adaptor->fetch_all_by_Slice_VariationSet($self, $set); | |
1856 } | |
1857 else { | |
1858 return []; | |
1859 } | |
1860 } | |
1861 | |
1862 =head2 get_all_StructuralVariations | |
1863 | |
1864 Description: DEPRECATED. Use get_all_StructuralVariationFeatures instead | |
1865 | |
1866 =cut | |
1867 | |
1868 sub get_all_StructuralVariations{ | |
1869 my $self = shift; | |
1870 my $source = shift; | |
1871 my $study = shift; | |
1872 my $sv_class = shift; | |
1873 | |
1874 deprecate('Use get_all_StructuralVariationFeatures() instead.'); | |
1875 | |
1876 return $self->get_all_StructuralVariationFeatures($source,$sv_class); | |
1877 } | |
1878 | |
1879 | |
1880 =head2 get_all_CopyNumberVariantProbes | |
1881 | |
1882 Description: DEPRECATED. Use get_all_CopyNumberVariantProbeFeatures instead | |
1883 | |
1884 =cut | |
1885 | |
1886 sub get_all_CopyNumberVariantProbes { | |
1887 my $self = shift; | |
1888 my $source = shift; | |
1889 my $study = shift; | |
1890 | |
1891 deprecate('Use get_all_CopyNumberVariantProbeFeatures() instead.'); | |
1892 | |
1893 return $self->get_all_CopyNumberVariantProbeFeatures($source); | |
1894 } | |
1895 | |
1896 | |
1897 sub _get_StructuralVariationFeatureAdaptor { | |
1898 | |
1899 my $self = shift; | |
1900 | |
1901 if(!$self->adaptor()) { | |
1902 warning('Cannot get structural variation features without attached adaptor'); | |
1903 return undef; | |
1904 } | |
1905 | |
1906 my $svf_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new( | |
1907 -species => $self->adaptor()->db()->species, | |
1908 -type => "StructuralVariationFeature" | |
1909 ); | |
1910 | |
1911 if( $svf_adaptor ) { | |
1912 return $svf_adaptor; | |
1913 } | |
1914 else { | |
1915 warning("Variation database must be attached to core database to " . | |
1916 "retrieve variation information" ); | |
1917 | |
1918 return undef; | |
1919 } | |
1920 } | |
1921 | |
1922 | |
1923 =head2 get_all_StructuralVariationFeatures | |
1924 | |
1925 Arg[1] : string $source [optional] | |
1926 Arg[2] : int $include_evidence [optional] | |
1927 Arg[3] : string $sv_class (SO term) [optional] | |
1928 Description : returns all structural variation features on this slice. This function will only work | |
1929 correctly if the variation database has been attached to the core database. | |
1930 If $source is set, only structural variation features with that source name will be | |
1931 returned. By default, it only returns structural variant features which are not labelled | |
1932 as "CNV_PROBE". | |
1933 If $include_evidence is set (i.e. $include_evidence=1), structural variation features from | |
1934 both structural variation (SV) and their supporting structural variations (SSV) will be | |
1935 returned. By default, it only returns features from structural variations (SV). | |
1936 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature | |
1937 Exceptions : none | |
1938 Caller : contigview, snpview, structural_variation_features | |
1939 Status : Stable | |
1940 | |
1941 =cut | |
1942 | |
1943 sub get_all_StructuralVariationFeatures { | |
1944 my $self = shift; | |
1945 my $source = shift; | |
1946 my $include_evidence = shift; | |
1947 my $somatic = shift; | |
1948 my $sv_class = shift; | |
1949 | |
1950 my $operator = ''; | |
1951 | |
1952 if (!defined($sv_class)) { | |
1953 $sv_class = 'SO:0000051'; # CNV_PROBE | |
1954 $operator = '!'; # All but CNV_PROBE | |
1955 } | |
1956 | |
1957 $somatic = (!defined($somatic) || !$somatic) ? 0 : 1; | |
1958 | |
1959 my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor; | |
1960 | |
1961 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
1962 | |
1963 # Get the attrib_id | |
1964 my $at_adaptor = $variation_db->get_AttributeAdaptor; | |
1965 my $SO_term = $at_adaptor->SO_term_for_SO_accession($sv_class); | |
1966 my $attrib_id = $at_adaptor->attrib_id_for_type_value('SO_term',$SO_term); | |
1967 | |
1968 if (!$attrib_id) { | |
1969 warning("The Sequence Ontology accession number is not found in the database"); | |
1970 return []; | |
1971 } | |
1972 | |
1973 # Get the structural variations features | |
1974 if( $svf_adaptor ) { | |
1975 | |
1976 my $constraint = qq{ svf.somatic=$somatic AND svf.class_attrib_id $operator=$attrib_id }; | |
1977 $constraint .= qq{ AND svf.is_evidence=0 } if (!$include_evidence); | |
1978 | |
1979 if($source) { | |
1980 return $svf_adaptor->fetch_all_by_Slice_constraint($self, qq{$constraint AND s.name = '$source'}); | |
1981 }else { | |
1982 return $svf_adaptor->fetch_all_by_Slice_constraint($self, $constraint); | |
1983 } | |
1984 } | |
1985 else { | |
1986 warning("Variation database must be attached to core database to " . | |
1987 "retrieve variation information" ); | |
1988 return []; | |
1989 } | |
1990 } | |
1991 | |
1992 | |
1993 =head2 get_all_StructuralVariationFeatures_by_VariationSet | |
1994 | |
1995 Arg [1] : Bio::EnsEMBL:Variation::VariationSet $set | |
1996 Description :returns all structural variation features on this slice associated with a | |
1997 given set. | |
1998 This function will only work correctly if the variation database has been | |
1999 attached to the core database. | |
2000 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature | |
2001 Exceptions : none | |
2002 Caller : contigview, snpview | |
2003 Status : Stable | |
2004 | |
2005 =cut | |
2006 | |
2007 sub get_all_StructuralVariationFeatures_by_VariationSet { | |
2008 my $self = shift; | |
2009 my $set = shift; | |
2010 | |
2011 if (my $svf_adaptor = $self->_get_StructuralVariationFeatureAdaptor) { | |
2012 return $svf_adaptor->fetch_all_by_Slice_VariationSet($self, $set); | |
2013 } | |
2014 else { | |
2015 return []; | |
2016 } | |
2017 } | |
2018 | |
2019 | |
2020 =head2 get_all_somatic_StructuralVariationFeatures | |
2021 | |
2022 Arg[1] : string $source [optional] | |
2023 Arg[2] : int $include_evidence [optional] | |
2024 Arg[3] : string $sv_class (SO term) [optional] | |
2025 Description : returns all somatic structural variation features on this slice. This function will only work | |
2026 correctly if the variation database has been attached to the core database. | |
2027 If $source is set, only somatic structural variation features with that source name will be | |
2028 returned. By default, it only returns somatic structural variant features which are not labelled | |
2029 as "CNV_PROBE". | |
2030 If $include_evidence is set (i.e. $include_evidence=1), structural variation features from | |
2031 both structural variation (SV) and their supporting structural variations (SSV) will be | |
2032 returned. By default, it only returns features from structural variations (SV). | |
2033 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature | |
2034 Exceptions : none | |
2035 Caller : contigview, snpview, structural_variation_features | |
2036 Status : Stable | |
2037 | |
2038 =cut | |
2039 | |
2040 sub get_all_somatic_StructuralVariationFeatures { | |
2041 my $self = shift; | |
2042 my $source = shift; | |
2043 my $include_evidence = shift; | |
2044 my $sv_class = shift; | |
2045 | |
2046 return $self->get_all_StructuralVariationFeatures($source,$include_evidence,1,$sv_class); | |
2047 } | |
2048 | |
2049 | |
2050 =head2 get_all_CopyNumberVariantProbeFeatures | |
2051 | |
2052 Arg[1] : string $source [optional] | |
2053 Description : returns all copy number variant probes on this slice. This function will only work | |
2054 correctly if the variation database has been attached to the core database. | |
2055 If $source is set, only CNV probes with that source name will be returned. | |
2056 If $study is set, only CNV probes of that study will be returned. | |
2057 ReturnType : listref of Bio::EnsEMBL::Variation::StructuralVariationFeature | |
2058 Exceptions : none | |
2059 Caller : contigview, snpview, structural_variation_feature | |
2060 Status : At Risk | |
2061 | |
2062 =cut | |
2063 | |
2064 sub get_all_CopyNumberVariantProbeFeatures { | |
2065 my $self = shift; | |
2066 my $source = shift; | |
2067 | |
2068 return $self->get_all_StructuralVariationFeatures($source,0,0,'SO:0000051'); | |
2069 } | |
2070 | |
2071 | |
2072 =head2 get_all_VariationFeatures_by_Population | |
2073 | |
2074 Arg [1] : Bio::EnsEMBL::Variation::Population | |
2075 Arg [2] : $minimum_frequency (optional) | |
2076 Example : $pop = $pop_adaptor->fetch_by_dbID(659); | |
2077 @vfs = @{$slice->get_all_VariationFeatures_by_Population( | |
2078 $pop,$slice)}; | |
2079 Description: Retrieves all variation features in a slice which are stored for | |
2080 a specified population. If $minimum_frequency is supplied, only | |
2081 variations with a minor allele frequency (MAF) greater than | |
2082 $minimum_frequency will be returned. | |
2083 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature | |
2084 Exceptions : throw on incorrect argument | |
2085 Caller : general | |
2086 Status : At Risk | |
2087 | |
2088 =cut | |
2089 | |
2090 sub get_all_VariationFeatures_by_Population { | |
2091 my $self = shift; | |
2092 | |
2093 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor) { | |
2094 return $vf_adaptor->fetch_all_by_Slice_Population($self, @_); | |
2095 } | |
2096 else { | |
2097 return []; | |
2098 } | |
2099 } | |
2100 | |
2101 | |
2102 | |
2103 | |
2104 =head2 get_all_IndividualSlice | |
2105 | |
2106 Args : none | |
2107 Example : my $individualSlice = $slice->get_by_Population($population); | |
2108 Description : Gets the specific Slice for all the individuls in the population | |
2109 ReturnType : listref of Bio::EnsEMB::IndividualSlice | |
2110 Exceptions : none | |
2111 Caller : general | |
2112 | |
2113 =cut | |
2114 | |
2115 sub get_all_IndividualSlice{ | |
2116 my $self = shift; | |
2117 | |
2118 my $individualSliceFactory = Bio::EnsEMBL::IndividualSliceFactory->new( | |
2119 -START => $self->{'start'}, | |
2120 -END => $self->{'end'}, | |
2121 -STRAND => $self->{'strand'}, | |
2122 -ADAPTOR => $self->adaptor(), | |
2123 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2124 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2125 -COORD_SYSTEM => $self->{'coord_system'}, | |
2126 ); | |
2127 return $individualSliceFactory->get_all_IndividualSlice(); | |
2128 } | |
2129 | |
2130 =head2 get_by_Individual | |
2131 | |
2132 Arg[1] : Bio::EnsEMBL::Variation::Individual $individual | |
2133 Example : my $individualSlice = $slice->get_by_Individual($individual); | |
2134 Description : Gets the specific Slice for the individual | |
2135 ReturnType : Bio::EnsEMB::IndividualSlice | |
2136 Exceptions : none | |
2137 Caller : general | |
2138 | |
2139 =cut | |
2140 | |
2141 sub get_by_Individual{ | |
2142 my $self = shift; | |
2143 my $individual = shift; | |
2144 | |
2145 return Bio::EnsEMBL::IndividualSlice->new( | |
2146 -START => $self->{'start'}, | |
2147 -END => $self->{'end'}, | |
2148 -STRAND => $self->{'strand'}, | |
2149 -ADAPTOR => $self->adaptor(), | |
2150 # -SEQ => $self->{'seq'}, | |
2151 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2152 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2153 -COORD_SYSTEM => $self->{'coord_system'}, | |
2154 -INDIVIDUAL => $individual); | |
2155 | |
2156 } | |
2157 | |
2158 | |
2159 | |
2160 =head2 get_by_strain | |
2161 | |
2162 Arg[1] : string $strain | |
2163 Example : my $strainSlice = $slice->get_by_strain($strain); | |
2164 Description : Gets the specific Slice for the strain | |
2165 ReturnType : Bio::EnsEMB::StrainSlice | |
2166 Exceptions : none | |
2167 Caller : general | |
2168 | |
2169 =cut | |
2170 | |
2171 sub get_by_strain{ | |
2172 my $self = shift; | |
2173 my $strain_name = shift; | |
2174 | |
2175 return Bio::EnsEMBL::StrainSlice->new( | |
2176 -START => $self->{'start'}, | |
2177 -END => $self->{'end'}, | |
2178 -STRAND => $self->{'strand'}, | |
2179 -ADAPTOR => $self->adaptor(), | |
2180 -SEQ => $self->{'seq'}, | |
2181 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2182 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2183 -COORD_SYSTEM => $self->{'coord_system'}, | |
2184 -STRAIN_NAME => $strain_name); | |
2185 | |
2186 } | |
2187 | |
2188 sub calculate_theta{ | |
2189 my $self = shift; | |
2190 my $strains = shift; | |
2191 my $feature = shift; #optional parameter. Name of the feature in the Slice you want to calculate | |
2192 | |
2193 if(!$self->adaptor()) { | |
2194 warning('Cannot get variation features without attached adaptor'); | |
2195 return 0; | |
2196 } | |
2197 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
2198 | |
2199 unless($variation_db) { | |
2200 warning("Variation database must be attached to core database to " . | |
2201 "retrieve variation information" ); | |
2202 return 0; | |
2203 } | |
2204 | |
2205 #need to get coverage regions for the slice in the different strains | |
2206 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor; | |
2207 my $strain; | |
2208 my $differences = []; | |
2209 my $slices = []; | |
2210 if ($coverage_adaptor){ | |
2211 my $num_strains = scalar(@{$strains}) +1; | |
2212 if (!defined $feature){ | |
2213 #we want to calculate for the whole slice | |
2214 push @{$slices}, $self; #add the slice as the slice to calculate the theta value | |
2215 } | |
2216 else{ | |
2217 #we have features, get the slices for the different features | |
2218 my $features = $self->get_all_Exons(); | |
2219 map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features | |
2220 } | |
2221 my $length_regions = 0; | |
2222 my $snps = 0; | |
2223 my $theta = 0; | |
2224 my $last_position = 0; | |
2225 #get all the differences in the slice coordinates | |
2226 foreach my $strain_name (@{$strains}){ | |
2227 my $strain = $self->get_by_strain($strain_name); #get the strainSlice for the strain | |
2228 | |
2229 my $results = $strain->get_all_differences_Slice; | |
2230 push @{$differences}, @{$results} if (defined $results); | |
2231 } | |
2232 #when we finish, we have, in max_level, the regions covered by all the sample | |
2233 #sort the differences by the genomic position | |
2234 my @differences_sorted = sort {$a->start <=> $b->start} @{$differences}; | |
2235 foreach my $slice (@{$slices}){ | |
2236 my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains); | |
2237 if (defined $regions_covered){ | |
2238 foreach my $range (@{$regions_covered}){ | |
2239 $length_regions += ($range->[1] - $range->[0]) + 1; #add the length of the genomic region | |
2240 for (my $i = $last_position;$i<@differences_sorted;$i++){ | |
2241 if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){ | |
2242 $snps++; #count differences in the region | |
2243 } | |
2244 elsif ($differences_sorted[$i]->end > $range->[1]){ | |
2245 $last_position = $i; | |
2246 last; | |
2247 } | |
2248 } | |
2249 } | |
2250 #when all the ranges have been iterated, calculate rho | |
2251 #this is an intermediate variable called a in the formula | |
2252 # a = sum i=2..strains 1/i-1 | |
2253 } | |
2254 } | |
2255 my $a = _calculate_a($num_strains); | |
2256 $theta = $snps / ($a * $length_regions); | |
2257 return $theta; | |
2258 } | |
2259 else{ | |
2260 return 0; | |
2261 } | |
2262 } | |
2263 | |
2264 | |
2265 | |
2266 | |
2267 sub _calculate_a{ | |
2268 my $max_level = shift; | |
2269 | |
2270 my $a = 0; | |
2271 for (my $i = 2; $i <= $max_level+1;$i++){ | |
2272 $a += 1/($i-1); | |
2273 } | |
2274 return $a; | |
2275 } | |
2276 | |
2277 sub calculate_pi{ | |
2278 my $self = shift; | |
2279 my $strains = shift; | |
2280 my $feature = shift; | |
2281 | |
2282 if(!$self->adaptor()) { | |
2283 warning('Cannot get variation features without attached adaptor'); | |
2284 return 0; | |
2285 } | |
2286 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
2287 | |
2288 unless($variation_db) { | |
2289 warning("Variation database must be attached to core database to " . | |
2290 "retrieve variation information" ); | |
2291 return 0; | |
2292 } | |
2293 | |
2294 #need to get coverage regions for the slice in the different strains | |
2295 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor; | |
2296 my $differences = []; | |
2297 my $slices = []; | |
2298 if ($coverage_adaptor){ | |
2299 my $num_strains = scalar(@{$strains}) +1; | |
2300 if (!defined $feature){ | |
2301 #we want to calculate for the whole slice | |
2302 push @{$slices}, $self; #add the slice as the slice to calculate the theta value | |
2303 } | |
2304 else{ | |
2305 #we have features, get the slices for the different features | |
2306 my $features = $self->get_all_Exons(); | |
2307 map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features | |
2308 } | |
2309 my @range_differences = (); | |
2310 my $pi = 0; | |
2311 my $regions = 0; | |
2312 my $last_position = 0; #last position visited in the sorted list of differences | |
2313 my $triallelic = 0; | |
2314 my $is_triallelic = 0; | |
2315 foreach my $slice (@{$slices}){ | |
2316 foreach my $strain_name (@{$strains}){ | |
2317 my $strain = $slice->get_by_strain($strain_name); #get the strainSlice for the strain | |
2318 my $results = $strain->get_all_differences_Slice; | |
2319 push @{$differences}, @{$results} if (defined $results); | |
2320 } | |
2321 my @differences_sorted = sort {$a->start <=> $b->start} @{$differences}; | |
2322 | |
2323 my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains); | |
2324 #when we finish, we have, in max_level, the regions covered by all the sample | |
2325 #sort the differences | |
2326 if (defined $regions_covered){ | |
2327 foreach my $range (@{$regions_covered}){ | |
2328 for (my $i = $last_position;$i<@differences_sorted;$i++){ | |
2329 if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){ | |
2330 #check wether it is the same region or different | |
2331 if (!defined $range_differences[0] || ($differences_sorted[$i]->start == $range_differences[0]->start)){ | |
2332 if (defined $range_differences[0] && ($differences_sorted[$i]->allele_string ne $range_differences[0]->allele_string)){ | |
2333 $is_triallelic = 1; | |
2334 } | |
2335 push @range_differences, $differences_sorted[$i]; | |
2336 } | |
2337 else{ | |
2338 #new site, calc pi for the previous one | |
2339 $pi += 2 * (@range_differences/($num_strains)) * ( 1 - (@range_differences/$num_strains)); | |
2340 if ($is_triallelic) { | |
2341 $triallelic++; | |
2342 $is_triallelic = 0; | |
2343 } | |
2344 $regions++; | |
2345 @range_differences = (); | |
2346 #and start a new range | |
2347 push @range_differences, $differences_sorted[$i]; | |
2348 } | |
2349 } | |
2350 elsif ($differences_sorted[$i]->end > $range->[1]){ | |
2351 $last_position = $i; | |
2352 last; | |
2353 } | |
2354 } | |
2355 #calculate pi for last site, if any | |
2356 if (defined $range_differences[0]){ | |
2357 $pi += 2 * (@range_differences/$num_strains) * ( 1 - (@range_differences/$num_strains)); | |
2358 $regions++; | |
2359 } | |
2360 } | |
2361 } | |
2362 $pi = $pi / $regions; #calculate average pi | |
2363 print "Regions with variations in region $regions and triallelic $triallelic\n\n"; | |
2364 } | |
2365 return $pi; | |
2366 } | |
2367 else{ | |
2368 return 0; | |
2369 } | |
2370 | |
2371 } | |
2372 | |
2373 | |
2374 | |
2375 | |
2376 | |
2377 =head2 get_all_genotyped_VariationFeatures | |
2378 | |
2379 Args : none | |
2380 Function : returns all variation features on this slice that have been genotyped. This function will only work | |
2381 correctly if the variation database has been attached to the core database. | |
2382 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
2383 Exceptions : none | |
2384 Caller : contigview, snpview, ldview | |
2385 Status : At Risk | |
2386 : Variation database is under development. | |
2387 | |
2388 =cut | |
2389 | |
2390 sub get_all_genotyped_VariationFeatures{ | |
2391 my $self = shift; | |
2392 | |
2393 if( my $vf_adaptor = $self->_get_VariationFeatureAdaptor) { | |
2394 return $vf_adaptor->fetch_all_genotyped_by_Slice($self); | |
2395 } | |
2396 else { | |
2397 return []; | |
2398 } | |
2399 } | |
2400 | |
2401 | |
2402 =head2 get_all_SNPs | |
2403 | |
2404 Description: DEPRECATED. Use get_all_VariationFeatures insted | |
2405 | |
2406 =cut | |
2407 | |
2408 sub get_all_SNPs { | |
2409 my $self = shift; | |
2410 | |
2411 deprecate('Use get_all_VariationFeatures() instead.'); | |
2412 | |
2413 my $snps; | |
2414 my $vf = $self->get_all_genotyped_VariationFeatures(); | |
2415 if( $vf->[0] ) { | |
2416 #necessary to convert the VariationFeatures into SNP objects | |
2417 foreach my $variation_feature (@{$vf}){ | |
2418 push @{$snps},$variation_feature->convert_to_SNP(); | |
2419 } | |
2420 return $snps; | |
2421 } else { | |
2422 return []; | |
2423 } | |
2424 } | |
2425 | |
2426 =head2 get_all_genotyped_SNPs | |
2427 | |
2428 Description : DEPRECATED. Use get_all_genotyped_VariationFeatures insted | |
2429 | |
2430 =cut | |
2431 | |
2432 sub get_all_genotyped_SNPs { | |
2433 my $self = shift; | |
2434 | |
2435 deprecate("Use get_all_genotyped_VariationFeatures instead"); | |
2436 my $vf = $self->get_all_genotyped_VariationFeatures; | |
2437 my $snps; | |
2438 if ($vf->[0]){ | |
2439 foreach my $variation_feature (@{$vf}){ | |
2440 push @{$snps},$variation_feature->convert_to_SNP(); | |
2441 } | |
2442 return $snps; | |
2443 } else { | |
2444 return []; | |
2445 } | |
2446 } | |
2447 | |
2448 sub get_all_SNPs_transcripts { | |
2449 my $self = shift; | |
2450 | |
2451 deprecate("DEPRECATED"); | |
2452 | |
2453 return []; | |
2454 | |
2455 } | |
2456 | |
2457 | |
2458 | |
2459 =head2 get_all_Genes | |
2460 | |
2461 Arg [1] : (optional) string $logic_name | |
2462 The name of the analysis used to generate the genes to retrieve | |
2463 Arg [2] : (optional) string $dbtype | |
2464 The dbtype of genes to obtain. This assumes that the db has | |
2465 been added to the DBAdaptor under this name (using the | |
2466 DBConnection::add_db_adaptor method). | |
2467 Arg [3] : (optional) boolean $load_transcripts | |
2468 If set to true, transcripts will be loaded immediately rather | |
2469 than being lazy-loaded on request. This will result in a | |
2470 significant speed up if the Transcripts and Exons are going to | |
2471 be used (but a slow down if they are not). | |
2472 Arg [4] : (optional) string $source | |
2473 The source of the genes to retrieve. | |
2474 Arg [5] : (optional) string $biotype | |
2475 The biotype of the genes to retrieve. | |
2476 Example : @genes = @{$slice->get_all_Genes}; | |
2477 Description: Retrieves all genes that overlap this slice. | |
2478 Returntype : listref of Bio::EnsEMBL::Genes | |
2479 Exceptions : none | |
2480 Caller : none | |
2481 Status : Stable | |
2482 | |
2483 =cut | |
2484 | |
2485 sub get_all_Genes{ | |
2486 my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_; | |
2487 | |
2488 if(!$self->adaptor()) { | |
2489 warning('Cannot get Genes without attached adaptor'); | |
2490 return []; | |
2491 } | |
2492 | |
2493 my $ga; | |
2494 if($dbtype) { | |
2495 my $db = $reg->get_db($self->adaptor()->db(), $dbtype); | |
2496 if(defined($db)){ | |
2497 $ga = $reg->get_adaptor( $db->species(), $db->group(), "Gene" ); | |
2498 } | |
2499 else{ | |
2500 $ga = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Gene" ); | |
2501 } | |
2502 if(!defined $ga) { | |
2503 warning( "$dbtype genes not available" ); | |
2504 return []; | |
2505 } | |
2506 } else { | |
2507 $ga = $self->adaptor->db->get_GeneAdaptor(); | |
2508 } | |
2509 | |
2510 return $ga->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype); | |
2511 } | |
2512 | |
2513 =head2 get_all_Genes_by_type | |
2514 | |
2515 Arg [1] : string $type | |
2516 The biotype of genes wanted. | |
2517 Arg [2] : (optional) string $logic_name | |
2518 Arg [3] : (optional) boolean $load_transcripts | |
2519 If set to true, transcripts will be loaded immediately rather | |
2520 than being lazy-loaded on request. This will result in a | |
2521 significant speed up if the Transcripts and Exons are going to | |
2522 be used (but a slow down if they are not). | |
2523 Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding', | |
2524 'ensembl')}; | |
2525 Description: Retrieves genes that overlap this slice of biotype $type. | |
2526 This is primarily used by the genebuilding code when several | |
2527 biotypes of genes are used. | |
2528 | |
2529 The logic name is the analysis of the genes that are retrieved. | |
2530 If not provided all genes will be retrieved instead. | |
2531 | |
2532 Returntype : listref of Bio::EnsEMBL::Genes | |
2533 Exceptions : none | |
2534 Caller : genebuilder, general | |
2535 Status : Stable | |
2536 | |
2537 =cut | |
2538 | |
2539 sub get_all_Genes_by_type{ | |
2540 my ($self, $type, $logic_name, $load_transcripts) = @_; | |
2541 | |
2542 if(!$self->adaptor()) { | |
2543 warning('Cannot get Genes without attached adaptor'); | |
2544 return []; | |
2545 } | |
2546 | |
2547 return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type); | |
2548 } | |
2549 | |
2550 | |
2551 =head2 get_all_Genes_by_source | |
2552 | |
2553 Arg [1] : string source | |
2554 Arg [2] : (optional) boolean $load_transcripts | |
2555 If set to true, transcripts will be loaded immediately rather | |
2556 than being lazy-loaded on request. This will result in a | |
2557 significant speed up if the Transcripts and Exons are going to | |
2558 be used (but a slow down if they are not). | |
2559 Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')}; | |
2560 Description: Retrieves genes that overlap this slice of source $source. | |
2561 | |
2562 Returntype : listref of Bio::EnsEMBL::Genes | |
2563 Exceptions : none | |
2564 Caller : general | |
2565 Status : Stable | |
2566 | |
2567 =cut | |
2568 | |
2569 sub get_all_Genes_by_source { | |
2570 my ($self, $source, $load_transcripts) = @_; | |
2571 | |
2572 if(!$self->adaptor()) { | |
2573 warning('Cannot get Genes without attached adaptor'); | |
2574 return []; | |
2575 } | |
2576 | |
2577 return $self->get_all_Genes(undef, undef, $load_transcripts, $source); | |
2578 } | |
2579 | |
2580 =head2 get_all_Transcripts | |
2581 | |
2582 Arg [1] : (optional) boolean $load_exons | |
2583 If set to true exons will not be lazy-loaded but will instead | |
2584 be loaded right away. This is faster if the exons are | |
2585 actually going to be used right away. | |
2586 Arg [2] : (optional) string $logic_name | |
2587 the logic name of the type of features to obtain | |
2588 Arg [3] : (optional) string $db_type | |
2589 Example : @transcripts = @{$slice->get_all_Transcripts)_}; | |
2590 Description: Gets all transcripts which overlap this slice. If you want to | |
2591 specify a particular analysis or type, then you are better off | |
2592 using get_all_Genes or get_all_Genes_by_type and iterating | |
2593 through the transcripts of each gene. | |
2594 Returntype : reference to a list of Bio::EnsEMBL::Transcripts | |
2595 Exceptions : none | |
2596 Caller : general | |
2597 Status : Stable | |
2598 | |
2599 =cut | |
2600 | |
2601 sub get_all_Transcripts { | |
2602 my $self = shift; | |
2603 my $load_exons = shift; | |
2604 my $logic_name = shift; | |
2605 my $dbtype = shift; | |
2606 if(!$self->adaptor()) { | |
2607 warning('Cannot get Transcripts without attached adaptor'); | |
2608 return []; | |
2609 } | |
2610 | |
2611 | |
2612 my $ta; | |
2613 if($dbtype) { | |
2614 my $db = $reg->get_db($self->adaptor()->db(), $dbtype); | |
2615 if(defined($db)){ | |
2616 $ta = $reg->get_adaptor( $db->species(), $db->group(), "Transcript" ); | |
2617 } else{ | |
2618 $ta = $reg->get_adaptor( $self->adaptor()->db()->species(), $dbtype, "Transcript" ); | |
2619 } | |
2620 if(!defined $ta) { | |
2621 warning( "$dbtype genes not available" ); | |
2622 return []; | |
2623 } | |
2624 } else { | |
2625 $ta = $self->adaptor->db->get_TranscriptAdaptor(); | |
2626 } | |
2627 return $ta->fetch_all_by_Slice($self, $load_exons, $logic_name); | |
2628 } | |
2629 | |
2630 | |
2631 =head2 get_all_Exons | |
2632 | |
2633 Arg [1] : none | |
2634 Example : @exons = @{$slice->get_all_Exons}; | |
2635 Description: Gets all exons which overlap this slice. Note that these exons | |
2636 will not be associated with any transcripts, so this may not | |
2637 be terribly useful. | |
2638 Returntype : reference to a list of Bio::EnsEMBL::Exons | |
2639 Exceptions : none | |
2640 Caller : general | |
2641 Status : Stable | |
2642 | |
2643 =cut | |
2644 | |
2645 sub get_all_Exons { | |
2646 my $self = shift; | |
2647 | |
2648 if(!$self->adaptor()) { | |
2649 warning('Cannot get Exons without attached adaptor'); | |
2650 return []; | |
2651 } | |
2652 | |
2653 return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self); | |
2654 } | |
2655 | |
2656 | |
2657 | |
2658 =head2 get_all_QtlFeatures | |
2659 | |
2660 Args : none | |
2661 Example : none | |
2662 Description: returns overlapping QtlFeatures | |
2663 Returntype : listref Bio::EnsEMBL::Map::QtlFeature | |
2664 Exceptions : none | |
2665 Caller : general | |
2666 Status : Stable | |
2667 | |
2668 =cut | |
2669 | |
2670 sub get_all_QtlFeatures { | |
2671 my $self = shift; | |
2672 | |
2673 if(!$self->adaptor()) { | |
2674 warning('Cannot get QtlFeatures without attached adaptor'); | |
2675 return []; | |
2676 } | |
2677 | |
2678 my $qfAdaptor; | |
2679 if( $self->adaptor()) { | |
2680 $qfAdaptor = $self->adaptor()->db()->get_QtlFeatureAdaptor(); | |
2681 } else { | |
2682 return []; | |
2683 } | |
2684 | |
2685 return $qfAdaptor->fetch_all_by_Slice_constraint( $self ); | |
2686 } | |
2687 | |
2688 | |
2689 | |
2690 | |
2691 =head2 get_all_KaryotypeBands | |
2692 | |
2693 Arg [1] : none | |
2694 Example : @kary_bands = @{$slice->get_all_KaryotypeBands}; | |
2695 Description: Retrieves the karyotype bands which this slice overlaps. | |
2696 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands | |
2697 Exceptions : none | |
2698 Caller : general, contigview | |
2699 Status : Stable | |
2700 | |
2701 =cut | |
2702 | |
2703 sub get_all_KaryotypeBands { | |
2704 my ($self) = @_; | |
2705 | |
2706 if(!$self->adaptor()) { | |
2707 warning('Cannot get KaryotypeBands without attached adaptor'); | |
2708 return []; | |
2709 } | |
2710 | |
2711 my $kadp = $self->adaptor->db->get_KaryotypeBandAdaptor(); | |
2712 return $kadp->fetch_all_by_Slice($self); | |
2713 } | |
2714 | |
2715 | |
2716 | |
2717 | |
2718 =head2 get_repeatmasked_seq | |
2719 | |
2720 Arg [1] : listref of strings $logic_names (optional) | |
2721 Arg [2] : int $soft_masking_enable (optional) | |
2722 Arg [3] : hash reference $not_default_masking_cases (optional, default is {}) | |
2723 The values are 0 or 1 for hard and soft masking respectively | |
2724 The keys of the hash should be of 2 forms | |
2725 "repeat_class_" . $repeat_consensus->repeat_class, | |
2726 e.g. "repeat_class_SINE/MIR" | |
2727 "repeat_name_" . $repeat_consensus->name | |
2728 e.g. "repeat_name_MIR" | |
2729 depending on which base you want to apply the not default | |
2730 masking either the repeat_class or repeat_name. Both can be | |
2731 specified in the same hash at the same time, but in that case, | |
2732 repeat_name setting has priority over repeat_class. For example, | |
2733 you may have hard masking as default, and you may want soft | |
2734 masking of all repeat_class SINE/MIR, but repeat_name AluSp | |
2735 (which are also from repeat_class SINE/MIR). | |
2736 Your hash will be something like {"repeat_class_SINE/MIR" => 1, | |
2737 "repeat_name_AluSp" => 0} | |
2738 Example : $rm_slice = $slice->get_repeatmasked_seq(); | |
2739 $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1); | |
2740 Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat | |
2741 masked sequence instead of the regular sequence. | |
2742 Sequence returned by this new slice will have repeat regions | |
2743 hardmasked by default (sequence replaced by N) or | |
2744 or soft-masked when arg[2] = 1 (sequence in lowercase) | |
2745 Will only work with database connection to get repeat features. | |
2746 Returntype : Bio::EnsEMBL::RepeatMaskedSlice | |
2747 Exceptions : none | |
2748 Caller : general | |
2749 Status : Stable | |
2750 | |
2751 =cut | |
2752 | |
2753 sub get_repeatmasked_seq { | |
2754 my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_; | |
2755 | |
2756 return Bio::EnsEMBL::RepeatMaskedSlice->new | |
2757 (-START => $self->{'start'}, | |
2758 -END => $self->{'end'}, | |
2759 -STRAND => $self->{'strand'}, | |
2760 -ADAPTOR => $self->adaptor(), | |
2761 -SEQ => $self->{'seq'}, | |
2762 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2763 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2764 -COORD_SYSTEM => $self->{'coord_system'}, | |
2765 -REPEAT_MASK => $logic_names, | |
2766 -SOFT_MASK => $soft_mask, | |
2767 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases); | |
2768 } | |
2769 | |
2770 | |
2771 | |
2772 =head2 _mask_features | |
2773 | |
2774 Arg [1] : reference to a string $dnaref | |
2775 Arg [2] : array_ref $repeats | |
2776 reference to a list Bio::EnsEMBL::RepeatFeature | |
2777 give the list of coordinates to replace with N or with | |
2778 lower case | |
2779 Arg [3] : int $soft_masking_enable (optional) | |
2780 Arg [4] : hash reference $not_default_masking_cases (optional, default is {}) | |
2781 The values are 0 or 1 for hard and soft masking respectively | |
2782 The keys of the hash should be of 2 forms | |
2783 "repeat_class_" . $repeat_consensus->repeat_class, | |
2784 e.g. "repeat_class_SINE/MIR" | |
2785 "repeat_name_" . $repeat_consensus->name | |
2786 e.g. "repeat_name_MIR" | |
2787 depending on which base you want to apply the not default masking either | |
2788 the repeat_class or repeat_name. Both can be specified in the same hash | |
2789 at the same time, but in that case, repeat_name setting has priority over | |
2790 repeat_class. For example, you may have hard masking as default, and | |
2791 you may want soft masking of all repeat_class SINE/MIR, | |
2792 but repeat_name AluSp (which are also from repeat_class SINE/MIR). | |
2793 Your hash will be something like {"repeat_class_SINE/MIR" => 1, | |
2794 "repeat_name_AluSp" => 0} | |
2795 Example : none | |
2796 Description: replaces string positions described in the RepeatFeatures | |
2797 with Ns (default setting), or with the lower case equivalent | |
2798 (soft masking). The reference to a dna string which is passed | |
2799 is changed in place. | |
2800 Returntype : none | |
2801 Exceptions : none | |
2802 Caller : seq | |
2803 Status : Stable | |
2804 | |
2805 =cut | |
2806 | |
2807 sub _mask_features { | |
2808 my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_; | |
2809 | |
2810 $soft_mask = 0 unless (defined $soft_mask); | |
2811 $not_default_masking_cases = {} unless (defined $not_default_masking_cases); | |
2812 | |
2813 # explicit CORE::length call, to avoid any confusion with the Slice | |
2814 # length method | |
2815 my $dnalen = CORE::length($$dnaref); | |
2816 | |
2817 REP:foreach my $old_f (@{$repeats}) { | |
2818 my $f = $old_f->transfer( $self ); | |
2819 my $start = $f->start; | |
2820 my $end = $f->end; | |
2821 my $length = ($end - $start) + 1; | |
2822 | |
2823 # check if we get repeat completely outside of expected slice range | |
2824 if ($end < 1 || $start > $dnalen) { | |
2825 # warning("Unexpected: Repeat completely outside slice coordinates."); | |
2826 next REP; | |
2827 } | |
2828 | |
2829 # repeat partly outside slice range, so correct | |
2830 # the repeat start and length to the slice size if needed | |
2831 if ($start < 1) { | |
2832 $start = 1; | |
2833 $length = ($end - $start) + 1; | |
2834 } | |
2835 | |
2836 # repeat partly outside slice range, so correct | |
2837 # the repeat end and length to the slice size if needed | |
2838 if ($end > $dnalen) { | |
2839 $end = $dnalen; | |
2840 $length = ($end - $start) + 1; | |
2841 } | |
2842 | |
2843 $start--; | |
2844 | |
2845 my $padstr; | |
2846 # if we decide to define masking on the base of the repeat_type, we'll need | |
2847 # to add the following, and the other commented line few lines below. | |
2848 # my $rc_type = "repeat_type_" . $f->repeat_consensus->repeat_type; | |
2849 my $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class; | |
2850 my $rc_name = "repeat_name_" . $f->repeat_consensus->name; | |
2851 | |
2852 my $masking_type; | |
2853 # $masking_type = $not_default_masking_cases->{$rc_type} if (defined $not_default_masking_cases->{$rc_type}); | |
2854 $masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class}); | |
2855 $masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name}); | |
2856 | |
2857 $masking_type = $soft_mask unless (defined $masking_type); | |
2858 | |
2859 if ($masking_type) { | |
2860 $padstr = lc substr ($$dnaref,$start,$length); | |
2861 } else { | |
2862 $padstr = 'N' x $length; | |
2863 } | |
2864 substr ($$dnaref,$start,$length) = $padstr; | |
2865 } | |
2866 } | |
2867 | |
2868 | |
2869 =head2 get_all_SearchFeatures | |
2870 | |
2871 Arg [1] : scalar $ticket_ids | |
2872 Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY'); | |
2873 Description: Retreives all search features for stored blast | |
2874 results for the ticket that overlap this slice | |
2875 Returntype : listref of Bio::EnsEMBL::SeqFeatures | |
2876 Exceptions : none | |
2877 Caller : general (webby!) | |
2878 Status : Stable | |
2879 | |
2880 =cut | |
2881 | |
2882 sub get_all_SearchFeatures { | |
2883 my $self = shift; | |
2884 my $ticket = shift; | |
2885 local $_; | |
2886 unless($ticket) { | |
2887 throw("ticket argument is required"); | |
2888 } | |
2889 | |
2890 if(!$self->adaptor()) { | |
2891 warning("Cannot get SearchFeatures without an attached adaptor"); | |
2892 return []; | |
2893 } | |
2894 | |
2895 my $sfa = $self->adaptor()->db()->get_db_adaptor('blast'); | |
2896 | |
2897 my $offset = $self->start-1; | |
2898 | |
2899 my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : []; | |
2900 | |
2901 foreach( @$features ) { | |
2902 $_->start( $_->start - $offset ); | |
2903 $_->end( $_->end - $offset ); | |
2904 }; | |
2905 return $features; | |
2906 | |
2907 } | |
2908 | |
2909 =head2 get_all_AssemblyExceptionFeatures | |
2910 | |
2911 Arg [1] : string $set (optional) | |
2912 Example : $slice->get_all_AssemblyExceptionFeatures(); | |
2913 Description: Retreives all misc features which overlap this slice. If | |
2914 a set code is provided only features which are members of | |
2915 the requested set are returned. | |
2916 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures | |
2917 Exceptions : none | |
2918 Caller : general | |
2919 Status : Stable | |
2920 | |
2921 =cut | |
2922 | |
2923 sub get_all_AssemblyExceptionFeatures { | |
2924 my $self = shift; | |
2925 my $misc_set = shift; | |
2926 | |
2927 my $adaptor = $self->adaptor(); | |
2928 | |
2929 if(!$adaptor) { | |
2930 warning('Cannot retrieve features without attached adaptor.'); | |
2931 return []; | |
2932 } | |
2933 | |
2934 my $aefa = $adaptor->db->get_AssemblyExceptionFeatureAdaptor(); | |
2935 | |
2936 return $aefa->fetch_all_by_Slice($self); | |
2937 } | |
2938 | |
2939 | |
2940 | |
2941 =head2 get_all_MiscFeatures | |
2942 | |
2943 Arg [1] : string $set (optional) | |
2944 Arg [2] : string $database (optional) | |
2945 Example : $slice->get_all_MiscFeatures('cloneset'); | |
2946 Description: Retreives all misc features which overlap this slice. If | |
2947 a set code is provided only features which are members of | |
2948 the requested set are returned. | |
2949 Returntype : listref of Bio::EnsEMBL::MiscFeatures | |
2950 Exceptions : none | |
2951 Caller : general | |
2952 Status : Stable | |
2953 | |
2954 =cut | |
2955 | |
2956 sub get_all_MiscFeatures { | |
2957 my $self = shift; | |
2958 my $misc_set = shift; | |
2959 my $dbtype = shift; | |
2960 my $msa; | |
2961 | |
2962 my $adaptor = $self->adaptor(); | |
2963 if(!$adaptor) { | |
2964 warning('Cannot retrieve features without attached adaptor.'); | |
2965 return []; | |
2966 } | |
2967 | |
2968 my $mfa; | |
2969 if($dbtype) { | |
2970 my $db = $reg->get_db($adaptor->db(), $dbtype); | |
2971 if(defined($db)){ | |
2972 $mfa = $reg->get_adaptor( lc($db->species()), $db->group(), "miscfeature" ); | |
2973 } else{ | |
2974 $mfa = $reg->get_adaptor( $adaptor->db()->species(), $dbtype, "miscfeature" ); | |
2975 } | |
2976 if(!defined $mfa) { | |
2977 warning( "$dbtype misc features not available" ); | |
2978 return []; | |
2979 } | |
2980 } else { | |
2981 $mfa = $adaptor->db->get_MiscFeatureAdaptor(); | |
2982 } | |
2983 | |
2984 if($misc_set) { | |
2985 return $mfa->fetch_all_by_Slice_and_set_code($self,$misc_set); | |
2986 } | |
2987 | |
2988 return $mfa->fetch_all_by_Slice($self); | |
2989 } | |
2990 | |
2991 =head2 get_all_MarkerFeatures | |
2992 | |
2993 Arg [1] : (optional) string logic_name | |
2994 The logic name of the marker features to retrieve | |
2995 Arg [2] : (optional) int $priority | |
2996 Lower (exclusive) priority bound of the markers to retrieve | |
2997 Arg [3] : (optional) int $map_weight | |
2998 Upper (exclusive) priority bound of the markers to retrieve | |
2999 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)}; | |
3000 Description: Retrieves all markers which lie on this slice fulfilling the | |
3001 specified map_weight and priority parameters (if supplied). | |
3002 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures | |
3003 Exceptions : none | |
3004 Caller : contigview, general | |
3005 Status : Stable | |
3006 | |
3007 =cut | |
3008 | |
3009 sub get_all_MarkerFeatures { | |
3010 my ($self, $logic_name, $priority, $map_weight) = @_; | |
3011 | |
3012 if(!$self->adaptor()) { | |
3013 warning('Cannot retrieve MarkerFeatures without attached adaptor.'); | |
3014 return []; | |
3015 } | |
3016 | |
3017 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor; | |
3018 | |
3019 my $feats = $ma->fetch_all_by_Slice_and_priority($self, | |
3020 $priority, | |
3021 $map_weight, | |
3022 $logic_name); | |
3023 return $feats; | |
3024 } | |
3025 | |
3026 | |
3027 =head2 get_MarkerFeatures_by_Name | |
3028 | |
3029 Arg [1] : string marker Name | |
3030 The name (synonym) of the marker feature(s) to retrieve | |
3031 Example : my @markers = @{$slice->get_MarkerFeatures_by_Name('z1705')}; | |
3032 Description: Retrieves all markers with this ID | |
3033 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures | |
3034 Exceptions : none | |
3035 Caller : contigview, general | |
3036 Status : Stable | |
3037 | |
3038 =cut | |
3039 | |
3040 sub get_MarkerFeatures_by_Name { | |
3041 my ($self, $name) = @_; | |
3042 | |
3043 if(!$self->adaptor()) { | |
3044 warning('Cannot retrieve MarkerFeatures without attached adaptor.'); | |
3045 return []; | |
3046 } | |
3047 | |
3048 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor; | |
3049 | |
3050 my $feats = $ma->fetch_all_by_Slice_and_MarkerName($self, $name); | |
3051 return $feats; | |
3052 } | |
3053 | |
3054 | |
3055 =head2 get_all_compara_DnaAlignFeatures | |
3056 | |
3057 Arg [1] : string $qy_species | |
3058 The name of the species to retrieve similarity features from | |
3059 Arg [2] : string $qy_assembly | |
3060 The name of the assembly to retrieve similarity features from | |
3061 Arg [3] : string $type | |
3062 The type of the alignment to retrieve similarity features from | |
3063 Arg [4] : <optional> compara dbadptor to use. | |
3064 Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus', | |
3065 'MGSC3', | |
3066 'WGA'); | |
3067 Description: Retrieves a list of DNA-DNA Alignments to the species specified | |
3068 by the $qy_species argument. | |
3069 The compara database must be attached to the core database | |
3070 for this call to work correctly. As well the compara database | |
3071 must have the core dbadaptors for both this species, and the | |
3072 query species added to function correctly. | |
3073 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures | |
3074 Exceptions : warning if compara database is not available | |
3075 Caller : contigview | |
3076 Status : Stable | |
3077 | |
3078 =cut | |
3079 | |
3080 sub get_all_compara_DnaAlignFeatures { | |
3081 my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_; | |
3082 | |
3083 if(!$self->adaptor()) { | |
3084 warning("Cannot retrieve DnaAlignFeatures without attached adaptor"); | |
3085 return []; | |
3086 } | |
3087 | |
3088 unless($qy_species && $alignment_type # && $qy_assembly | |
3089 ) { | |
3090 throw("Query species and assembly and alignmemt type arguments are required"); | |
3091 } | |
3092 | |
3093 if(!defined($compara_db)){ | |
3094 $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara"); | |
3095 } | |
3096 unless($compara_db) { | |
3097 warning("Compara database must be attached to core database or passed ". | |
3098 "as an argument to " . | |
3099 "retrieve compara information"); | |
3100 return []; | |
3101 } | |
3102 | |
3103 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor; | |
3104 return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type); | |
3105 } | |
3106 | |
3107 =head2 get_all_compara_Syntenies | |
3108 | |
3109 Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus" | |
3110 Arg [2] : string $method_link_type, default is "SYNTENY" | |
3111 Arg [3] : <optional> compara dbadaptor to use. | |
3112 Description: gets all the compara syntenyies for a specfic species | |
3113 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion | |
3114 Status : Stable | |
3115 | |
3116 =cut | |
3117 | |
3118 sub get_all_compara_Syntenies { | |
3119 my ($self, $qy_species, $method_link_type, $compara_db) = @_; | |
3120 | |
3121 if(!$self->adaptor()) { | |
3122 warning("Cannot retrieve features without attached adaptor"); | |
3123 return []; | |
3124 } | |
3125 | |
3126 unless($qy_species) { | |
3127 throw("Query species and assembly arguments are required"); | |
3128 } | |
3129 | |
3130 unless (defined $method_link_type) { | |
3131 $method_link_type = "SYNTENY"; | |
3132 } | |
3133 | |
3134 if(!defined($compara_db)){ | |
3135 $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara"); | |
3136 } | |
3137 unless($compara_db) { | |
3138 warning("Compara database must be attached to core database or passed ". | |
3139 "as an argument to " . | |
3140 "retrieve compara information"); | |
3141 return []; | |
3142 } | |
3143 my $gdba = $compara_db->get_GenomeDBAdaptor(); | |
3144 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor(); | |
3145 my $dfa = $compara_db->get_DnaFragAdaptor(); | |
3146 my $sra = $compara_db->get_SyntenyRegionAdaptor(); | |
3147 | |
3148 my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db()); | |
3149 my $query_gdb = $gdba->fetch_by_registry_name($qy_species); | |
3150 my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]); | |
3151 | |
3152 my $cs = $self->coord_system()->name(); | |
3153 my $sr = $self->seq_region_name(); | |
3154 my ($dnafrag) = @{$dfa->fetch_all_by_GenomeDB_region($this_gdb, $cs, $sr)}; | |
3155 return $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag, $self->start, $self->end); | |
3156 } | |
3157 | |
3158 =head2 get_all_Haplotypes | |
3159 | |
3160 Arg [1] : (optional) boolean $lite_flag | |
3161 if true lightweight haplotype objects are used | |
3162 Example : @haplotypes = $slice->get_all_Haplotypes; | |
3163 Description: Retrieves all of the haplotypes on this slice. Only works | |
3164 if the haplotype adaptor has been attached to the core adaptor | |
3165 via $dba->add_db_adaptor('haplotype', $hdba); | |
3166 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes | |
3167 Exceptions : warning is Haplotype database is not available | |
3168 Caller : contigview, general | |
3169 Status : Stable | |
3170 | |
3171 =cut | |
3172 | |
3173 sub get_all_Haplotypes { | |
3174 my($self, $lite_flag) = @_; | |
3175 | |
3176 if(!$self->adaptor()) { | |
3177 warning("Cannot retrieve features without attached adaptor"); | |
3178 return []; | |
3179 } | |
3180 | |
3181 my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype'); | |
3182 | |
3183 unless($haplo_db) { | |
3184 warning("Haplotype database must be attached to core database to " . | |
3185 "retrieve haplotype information" ); | |
3186 return []; | |
3187 } | |
3188 | |
3189 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor; | |
3190 | |
3191 my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag); | |
3192 | |
3193 return $haplotypes; | |
3194 } | |
3195 | |
3196 | |
3197 sub get_all_DASFactories { | |
3198 my $self = shift; | |
3199 return [ $self->adaptor()->db()->_each_DASFeatureFactory ]; | |
3200 } | |
3201 | |
3202 sub get_all_DASFeatures_dsn { | |
3203 my ($self, $source_type, $dsn) = @_; | |
3204 | |
3205 if(!$self->adaptor()) { | |
3206 warning("Cannot retrieve features without attached adaptor"); | |
3207 return []; | |
3208 } | |
3209 my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory; | |
3210 | |
3211 return [ $X[0]->fetch_all_Features( $self, $source_type ) ]; | |
3212 } | |
3213 | |
3214 =head2 get_all_DAS_Features | |
3215 | |
3216 Arg [1] : none | |
3217 Example : $features = $slice->get_all_DASFeatures; | |
3218 Description: Retreives a hash reference to a hash of DAS feature | |
3219 sets, keyed by the DNS, NOTE the values of this hash | |
3220 are an anonymous array containing: | |
3221 (1) a pointer to an array of features; | |
3222 (2) a pointer to the DAS stylesheet | |
3223 Returntype : hashref of Bio::SeqFeatures | |
3224 Exceptions : ? | |
3225 Caller : webcode | |
3226 Status : Stable | |
3227 | |
3228 =cut | |
3229 sub get_all_DAS_Features{ | |
3230 my ($self) = @_; | |
3231 | |
3232 $self->{_das_features} ||= {}; # Cache | |
3233 $self->{_das_styles} ||= {}; # Cache | |
3234 $self->{_das_segments} ||= {}; # Cache | |
3235 my %das_features; | |
3236 my %das_styles; | |
3237 my %das_segments; | |
3238 my $slice = $self; | |
3239 | |
3240 foreach my $dasfact( @{$self->get_all_DASFactories} ){ | |
3241 my $dsn = $dasfact->adaptor->dsn; | |
3242 my $name = $dasfact->adaptor->name; | |
3243 # my $type = $dasfact->adaptor->type; | |
3244 my $url = $dasfact->adaptor->url; | |
3245 | |
3246 my ($type) = $dasfact->adaptor->mapping; | |
3247 if (ref $type eq 'ARRAY') { | |
3248 $type = shift @$type; | |
3249 } | |
3250 $type ||= $dasfact->adaptor->type; | |
3251 # Construct a cache key : SOURCE_URL/TYPE | |
3252 # Need the type to handle sources that serve multiple types of features | |
3253 | |
3254 my $key = join('/', $name, $type); | |
3255 if( $self->{_das_features}->{$key} ){ # Use cached | |
3256 $das_features{$name} = $self->{_das_features}->{$key}; | |
3257 $das_styles{$name} = $self->{_das_styles}->{$key}; | |
3258 $das_segments{$name} = $self->{_das_segments}->{$key}; | |
3259 } else { # Get fresh data | |
3260 my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type ); | |
3261 $self->{_das_features}->{$key} = $featref; | |
3262 $self->{_das_styles}->{$key} = $styleref; | |
3263 $self->{_das_segments}->{$key} = $segref; | |
3264 $das_features{$name} = $featref; | |
3265 $das_styles{$name} = $styleref; | |
3266 $das_segments{$name} = $segref; | |
3267 } | |
3268 } | |
3269 | |
3270 return (\%das_features, \%das_styles, \%das_segments); | |
3271 } | |
3272 | |
3273 sub get_all_DASFeatures{ | |
3274 my ($self, $source_type) = @_; | |
3275 | |
3276 | |
3277 if(!$self->adaptor()) { | |
3278 warning("Cannot retrieve features without attached adaptor"); | |
3279 return []; | |
3280 } | |
3281 | |
3282 my %genomic_features = map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ] ) } $self->adaptor()->db()->_each_DASFeatureFactory; | |
3283 return \%genomic_features; | |
3284 | |
3285 } | |
3286 | |
3287 sub old_get_all_DASFeatures{ | |
3288 my ($self,@args) = @_; | |
3289 | |
3290 if(!$self->adaptor()) { | |
3291 warning("Cannot retrieve features without attached adaptor"); | |
3292 return []; | |
3293 } | |
3294 | |
3295 my %genomic_features = | |
3296 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) } | |
3297 $self->adaptor()->db()->_each_DASFeatureFactory; | |
3298 return \%genomic_features; | |
3299 | |
3300 } | |
3301 | |
3302 | |
3303 =head2 get_all_ExternalFeatures | |
3304 | |
3305 Arg [1] : (optional) string $track_name | |
3306 If specified only features from ExternalFeatureAdaptors with | |
3307 the track name $track_name are retrieved. | |
3308 If not set, all features from every ExternalFeatureAdaptor are | |
3309 retrieved. | |
3310 Example : @x_features = @{$slice->get_all_ExternalFeatures} | |
3311 Description: Retrieves features on this slice from external feature adaptors | |
3312 Returntype : listref of Bio::SeqFeatureI implementing objects in slice | |
3313 coordinates | |
3314 Exceptions : none | |
3315 Caller : general | |
3316 Status : Stable | |
3317 | |
3318 =cut | |
3319 | |
3320 sub get_all_ExternalFeatures { | |
3321 my ($self, $track_name) = @_; | |
3322 | |
3323 if(!$self->adaptor()) { | |
3324 warning("Cannot retrieve features without attached adaptor"); | |
3325 return []; | |
3326 } | |
3327 | |
3328 my $features = []; | |
3329 | |
3330 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors; | |
3331 my @xf_adaptors = (); | |
3332 | |
3333 if($track_name) { | |
3334 #use a specific adaptor | |
3335 if(exists $xfa_hash->{$track_name}) { | |
3336 push @xf_adaptors, $xfa_hash->{$track_name}; | |
3337 } | |
3338 } else { | |
3339 #use all of the adaptors | |
3340 push @xf_adaptors, values %$xfa_hash; | |
3341 } | |
3342 | |
3343 | |
3344 foreach my $xfa (@xf_adaptors) { | |
3345 push @$features, @{$xfa->fetch_all_by_Slice($self)}; | |
3346 } | |
3347 | |
3348 return $features; | |
3349 } | |
3350 | |
3351 | |
3352 =head2 get_all_DitagFeatures | |
3353 | |
3354 Arg [1] : (optional) string ditag type | |
3355 Arg [1] : (optional) string logic_name | |
3356 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures}; | |
3357 Description: Retrieves the DitagFeatures of a specific type which overlap | |
3358 this slice with. If type is not defined, all features are | |
3359 retrieved. | |
3360 Returntype : listref of Bio::EnsEMBL::DitagFeatures | |
3361 Exceptions : warning if slice does not have attached adaptor | |
3362 Caller : general | |
3363 Status : Stable | |
3364 | |
3365 =cut | |
3366 | |
3367 sub get_all_DitagFeatures { | |
3368 my ($self, $type, $logic_name) = @_; | |
3369 | |
3370 if(!$self->adaptor()) { | |
3371 warning('Cannot get DitagFeatures without attached adaptor'); | |
3372 return []; | |
3373 } | |
3374 | |
3375 my $dfa = $self->adaptor->db->get_DitagFeatureAdaptor(); | |
3376 | |
3377 return $dfa->fetch_all_by_Slice($self, $type, $logic_name); | |
3378 } | |
3379 | |
3380 | |
3381 | |
3382 | |
3383 # GENERIC FEATURES (See DBAdaptor.pm) | |
3384 | |
3385 =head2 get_generic_features | |
3386 | |
3387 Arg [1] : (optional) List of names of generic feature types to return. | |
3388 If no feature names are given, all generic features are | |
3389 returned. | |
3390 Example : my %features = %{$slice->get_generic_features()}; | |
3391 Description: Gets generic features via the generic feature adaptors that | |
3392 have been added via DBAdaptor->add_GenricFeatureAdaptor (if | |
3393 any) | |
3394 Returntype : Hash of named features. | |
3395 Exceptions : none | |
3396 Caller : none | |
3397 Status : Stable | |
3398 | |
3399 =cut | |
3400 | |
3401 sub get_generic_features { | |
3402 | |
3403 my ($self, @names) = @_; | |
3404 | |
3405 if(!$self->adaptor()) { | |
3406 warning('Cannot retrieve features without attached adaptor'); | |
3407 return []; | |
3408 } | |
3409 | |
3410 my $db = $self->adaptor()->db(); | |
3411 | |
3412 my %features = (); # this will hold the results | |
3413 | |
3414 # get the adaptors for each feature | |
3415 my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)}; | |
3416 | |
3417 foreach my $adaptor_name (keys(%adaptors)) { | |
3418 | |
3419 my $adaptor_obj = $adaptors{$adaptor_name}; | |
3420 # get the features and add them to the hash | |
3421 my $features_ref = $adaptor_obj->fetch_all_by_Slice($self); | |
3422 # add each feature to the hash to be returned | |
3423 foreach my $feature (@$features_ref) { | |
3424 $features{$adaptor_name} = $feature; | |
3425 } | |
3426 } | |
3427 | |
3428 return \%features; | |
3429 | |
3430 } | |
3431 | |
3432 =head2 project_to_slice | |
3433 | |
3434 Arg [1] : Slice to project to. | |
3435 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice); | |
3436 foreach my $segment ( @$chr_projection ){ | |
3437 $chr_slice = $segment->to_Slice(); | |
3438 print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'. | |
3439 $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start(). | |
3440 '-'.$chr_slice->end(). | |
3441 $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n"; | |
3442 } | |
3443 Description: Projection of slice to another specific slice. Needed for where we have multiple mappings | |
3444 and we want to state which one to project to. | |
3445 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which | |
3446 can also be used as [$start,$end,$slice] triplets. | |
3447 Exceptions : none | |
3448 Caller : none | |
3449 Status : At Risk | |
3450 | |
3451 =cut | |
3452 | |
3453 sub project_to_slice { | |
3454 my $self = shift; | |
3455 my $to_slice = shift; | |
3456 | |
3457 throw('Slice argument is required') if(!$to_slice); | |
3458 | |
3459 my $slice_adaptor = $self->adaptor(); | |
3460 | |
3461 if(!$slice_adaptor) { | |
3462 warning("Cannot project without attached adaptor."); | |
3463 return []; | |
3464 } | |
3465 | |
3466 | |
3467 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor(); | |
3468 | |
3469 my $cs = $to_slice->coord_system(); | |
3470 my $slice_cs = $self->coord_system(); | |
3471 my $to_slice_id = $to_slice->get_seq_region_id; | |
3472 | |
3473 my @projection; | |
3474 my $current_start = 1; | |
3475 | |
3476 # decompose this slice into its symlinked components. | |
3477 # this allows us to handle haplotypes and PARs | |
3478 my $normal_slice_proj = | |
3479 $slice_adaptor->fetch_normalized_slice_projection($self); | |
3480 foreach my $segment (@$normal_slice_proj) { | |
3481 my $normal_slice = $segment->[2]; | |
3482 | |
3483 $slice_cs = $normal_slice->coord_system(); | |
3484 | |
3485 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor(); | |
3486 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs); | |
3487 | |
3488 # perform the mapping between this slice and the requested system | |
3489 my @coords; | |
3490 | |
3491 if( defined $asm_mapper ) { | |
3492 @coords = $asm_mapper->map($normal_slice->seq_region_name(), | |
3493 $normal_slice->start(), | |
3494 $normal_slice->end(), | |
3495 $normal_slice->strand(), | |
3496 $slice_cs, undef, $to_slice); | |
3497 } else { | |
3498 $coords[0] = Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(), | |
3499 $normal_slice->end()); | |
3500 } | |
3501 | |
3502 my $last_rank =0; | |
3503 #construct a projection from the mapping results and return it | |
3504 foreach my $coord (@coords) { | |
3505 my $coord_start = $coord->start(); | |
3506 my $coord_end = $coord->end(); | |
3507 my $length = $coord_end - $coord_start + 1; | |
3508 | |
3509 | |
3510 if( $last_rank != $coord->rank){ | |
3511 $current_start = 1; | |
3512 } | |
3513 $last_rank = $coord->rank; | |
3514 | |
3515 #skip gaps | |
3516 if($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) { | |
3517 if($coord->id != $to_slice_id){ # for multiple mappings only get the correct one | |
3518 $current_start += $length; | |
3519 next; | |
3520 } | |
3521 my $coord_cs = $coord->coord_system(); | |
3522 | |
3523 # If the normalised projection just ended up mapping to the | |
3524 # same coordinate system we were already in then we should just | |
3525 # return the original region. This can happen for example, if we | |
3526 # were on a PAR region on Y which refered to X and a projection to | |
3527 # 'toplevel' was requested. | |
3528 # if($coord_cs->equals($slice_cs)) { | |
3529 # # trim off regions which are not defined | |
3530 # return $self->_constrain_to_region(); | |
3531 # } | |
3532 | |
3533 #create slices for the mapped-to coord system | |
3534 my $slice = $slice_adaptor->fetch_by_seq_region_id( | |
3535 $coord->id(), | |
3536 $coord_start, | |
3537 $coord_end, | |
3538 $coord->strand()); | |
3539 | |
3540 my $current_end = $current_start + $length - 1; | |
3541 | |
3542 push @projection, bless([$current_start, $current_end, $slice], | |
3543 "Bio::EnsEMBL::ProjectionSegment"); | |
3544 } | |
3545 | |
3546 $current_start += $length; | |
3547 } | |
3548 } | |
3549 | |
3550 | |
3551 # delete the cache as we may want to map to different set next time and old | |
3552 # results will be cached. | |
3553 | |
3554 $mapper_aptr->delete_cache; | |
3555 | |
3556 return \@projection; | |
3557 } | |
3558 | |
3559 | |
3560 =head2 get_all_synonyms | |
3561 | |
3562 Args : none. | |
3563 Example : my @alternative_names = @{$slice->get_all_synonyms()}; | |
3564 Description: get a list of alternative names for this slice | |
3565 Returntype : reference to list of SeqRegionSynonym objects. | |
3566 Exception : none | |
3567 Caller : general | |
3568 Status : At Risk | |
3569 | |
3570 =cut | |
3571 | |
3572 sub get_all_synonyms{ | |
3573 my $self = shift; | |
3574 my $external_db_id =shift; | |
3575 | |
3576 if ( !defined( $self->{'synonym'} ) ) { | |
3577 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor(); | |
3578 $self->{'synonym'} = | |
3579 $adap->get_synonyms( $self->get_seq_region_id($self) ); | |
3580 } | |
3581 | |
3582 return $self->{'synonym'}; | |
3583 } | |
3584 | |
3585 =head2 add_synonym | |
3586 | |
3587 Args[0] : synonym. | |
3588 Example : $slice->add_synonym("alt_name"); | |
3589 Description: add an alternative name for this slice | |
3590 Returntype : none | |
3591 Exception : none | |
3592 Caller : general | |
3593 Status : At Risk | |
3594 | |
3595 =cut | |
3596 | |
3597 sub add_synonym{ | |
3598 my $self = shift; | |
3599 my $syn = shift; | |
3600 my $external_db_id = shift; | |
3601 | |
3602 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor(); | |
3603 if ( !defined( $self->{'synonym'} ) ) { | |
3604 $self->{'synonym'} = $self->get_all_synonyms(); | |
3605 } | |
3606 my $new_syn = Bio::EnsEMBL::SeqRegionSynonym->new( #-adaptor => $adap, | |
3607 -synonym => $syn, | |
3608 -external_db_id => $external_db_id, | |
3609 -seq_region_id => $self->get_seq_region_id($self)); | |
3610 | |
3611 push (@{$self->{'synonym'}}, $new_syn); | |
3612 | |
3613 return; | |
3614 } | |
3615 | |
3616 =head2 summary_as_hash | |
3617 | |
3618 Example : $slice_summary = $slice->summary_as_hash(); | |
3619 Description : Retrieves a textual summary of this slice. | |
3620 Returns : hashref of descriptive strings | |
3621 =cut | |
3622 | |
3623 sub summary_as_hash { | |
3624 my $self = shift; | |
3625 my %summary; | |
3626 $summary{'display_id'} = $self->display_id; | |
3627 $summary{'start'} = $self->start; | |
3628 $summary{'end'} = $self->end; | |
3629 $summary{'strand'} = $self->strand; | |
3630 $summary{'Is_circular'} = $self->is_circular ? "true" : "false"; | |
3631 $summary{'region_name'} = $self->seq_region_name(); | |
3632 return \%summary; | |
3633 } | |
3634 | |
3635 # | |
3636 # Bioperl Bio::PrimarySeqI methods: | |
3637 # | |
3638 | |
3639 =head2 id | |
3640 | |
3641 Description: Included for Bio::PrimarySeqI interface compliance (0.7) | |
3642 | |
3643 =cut | |
3644 | |
3645 sub id { name(@_); } | |
3646 | |
3647 | |
3648 =head2 display_id | |
3649 | |
3650 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3651 | |
3652 =cut | |
3653 | |
3654 sub display_id { name(@_); } | |
3655 | |
3656 | |
3657 =head2 primary_id | |
3658 | |
3659 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3660 | |
3661 =cut | |
3662 | |
3663 sub primary_id { name(@_); } | |
3664 | |
3665 | |
3666 =head2 desc | |
3667 | |
3668 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3669 | |
3670 =cut | |
3671 | |
3672 sub desc{ return $_[0]->coord_system->name().' '.$_[0]->seq_region_name(); } | |
3673 | |
3674 | |
3675 =head2 moltype | |
3676 | |
3677 Description: Included for Bio::PrimarySeqI interface compliance (0.7) | |
3678 | |
3679 =cut | |
3680 | |
3681 sub moltype { return 'dna'; } | |
3682 | |
3683 =head2 alphabet | |
3684 | |
3685 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3686 | |
3687 =cut | |
3688 | |
3689 sub alphabet { return 'dna'; } | |
3690 | |
3691 | |
3692 =head2 accession_number | |
3693 | |
3694 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3695 | |
3696 =cut | |
3697 | |
3698 sub accession_number { name(@_); } | |
3699 | |
3700 | |
3701 # sub DEPRECATED METHODS # | |
3702 ############################################################################### | |
3703 | |
3704 =head1 DEPRECATED METHODS | |
3705 | |
3706 =head2 get_all_AffyFeatures | |
3707 | |
3708 Description: DEPRECATED, use functionality provided by the Ensembl | |
3709 Functional Genomics API instead. | |
3710 | |
3711 =cut | |
3712 | |
3713 sub get_all_AffyFeatures { | |
3714 deprecate( 'Use functionality provided by the ' | |
3715 . 'Ensembl Functional Genomics API instead.' ); | |
3716 throw('Can not delegate deprecated functionality.'); | |
3717 | |
3718 # Old code: | |
3719 | |
3720 # my $self = shift; | |
3721 # my @arraynames = @_; | |
3722 # | |
3723 # my $sa = $self->adaptor(); | |
3724 # if ( ! $sa ) { | |
3725 # warning( "Cannot retrieve features without attached adaptor." ); | |
3726 # } | |
3727 # my $fa = $sa->db()->get_AffyFeatureAdaptor(); | |
3728 # my $features; | |
3729 # | |
3730 # if ( @arraynames ) { | |
3731 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames ); | |
3732 # } else { | |
3733 # $features = $fa->fetch_all_by_Slice( $self ); | |
3734 # } | |
3735 # return $features; | |
3736 } | |
3737 | |
3738 =head2 get_all_OligoFeatures | |
3739 | |
3740 Description: DEPRECATED, use functionality provided by the Ensembl | |
3741 Functional Genomics API instead. | |
3742 | |
3743 =cut | |
3744 | |
3745 sub get_all_OligoFeatures { | |
3746 | |
3747 deprecate( 'Use functionality provided by the ' | |
3748 . 'Ensembl Functional Genomics API instead.' ); | |
3749 throw('Can not delegate deprecated functionality.'); | |
3750 | |
3751 # Old code: | |
3752 | |
3753 # my $self = shift; | |
3754 # my @arraynames = @_; | |
3755 # | |
3756 # my $sa = $self->adaptor(); | |
3757 # if ( ! $sa ) { | |
3758 # warning( "Cannot retrieve features without attached adaptor." ); | |
3759 # } | |
3760 # my $fa = $sa->db()->get_OligoFeatureAdaptor(); | |
3761 # my $features; | |
3762 # | |
3763 # if ( @arraynames ) { | |
3764 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames ); | |
3765 # } else { | |
3766 # $features = $fa->fetch_all_by_Slice( $self ); | |
3767 # } | |
3768 # return $features; | |
3769 } | |
3770 | |
3771 =head2 get_all_OligoFeatures_by_type | |
3772 | |
3773 Description: DEPRECATED, use functionality provided by the Ensembl | |
3774 Functional Genomics API instead. | |
3775 | |
3776 =cut | |
3777 | |
3778 sub get_all_OligoFeatures_by_type { | |
3779 | |
3780 deprecate( 'Use functionality provided by the ' | |
3781 . 'Ensembl Functional Genomics API instead.' ); | |
3782 throw('Can not delegate deprecated functionality.'); | |
3783 | |
3784 # Old code: | |
3785 | |
3786 # my ($self, $type, $logic_name) = @_; | |
3787 # | |
3788 # throw('Need type as parameter') if !$type; | |
3789 # | |
3790 # my $sa = $self->adaptor(); | |
3791 # if ( ! $sa ) { | |
3792 # warning( "Cannot retrieve features without attached adaptor." ); | |
3793 # } | |
3794 # my $fa = $sa->db()->get_OligoFeatureAdaptor(); | |
3795 # | |
3796 # my $features = $fa->fetch_all_by_Slice_type( $self, $type, $logic_name ); | |
3797 # | |
3798 # return $features; | |
3799 } | |
3800 | |
3801 =head2 get_all_supercontig_Slices | |
3802 | |
3803 Description: DEPRECATED use get_tiling_path("NTcontig") instead | |
3804 | |
3805 =cut | |
3806 | |
3807 | |
3808 sub get_all_supercontig_Slices { | |
3809 my $self = shift; | |
3810 | |
3811 deprecate("Use get_tiling_path('NTcontig') instead"); | |
3812 | |
3813 my $result = []; | |
3814 | |
3815 if( $self->adaptor() ) { | |
3816 my $superctg_names = | |
3817 $self->adaptor()->list_overlapping_supercontigs( $self ); | |
3818 | |
3819 for my $name ( @$superctg_names ) { | |
3820 my $slice; | |
3821 $slice = $self->adaptor()->fetch_by_supercontig_name( $name ); | |
3822 $slice->name( $name ); | |
3823 push( @$result, $slice ); | |
3824 } | |
3825 } else { | |
3826 warning( "Slice needs to be attached to a database to get supercontigs" ); | |
3827 } | |
3828 | |
3829 return $result; | |
3830 } | |
3831 | |
3832 | |
3833 | |
3834 | |
3835 | |
3836 =head2 get_Chromosome | |
3837 | |
3838 Description: DEPRECATED use this instead: | |
3839 $slice_adp->fetch_by_region('chromosome', | |
3840 $slice->seq_region_name) | |
3841 | |
3842 =cut | |
3843 | |
3844 sub get_Chromosome { | |
3845 my $self = shift @_; | |
3846 | |
3847 deprecate("Use SliceAdaptor::fetch_by_region('chromosome'," . | |
3848 '$slice->seq_region_name) instead'); | |
3849 | |
3850 my $csa = $self->adaptor->db->get_CoordSystemAdaptor(); | |
3851 my ($top_cs) = @{$csa->fetch_all()}; | |
3852 | |
3853 return $self->adaptor->fetch_by_region($top_cs->name(), | |
3854 $self->seq_region_name(), | |
3855 undef,undef,undef, | |
3856 $top_cs->version()); | |
3857 } | |
3858 | |
3859 | |
3860 | |
3861 =head2 chr_name | |
3862 | |
3863 Description: DEPRECATED use seq_region_name() instead | |
3864 | |
3865 =cut | |
3866 | |
3867 sub chr_name{ | |
3868 deprecate("Use seq_region_name() instead"); | |
3869 seq_region_name(@_); | |
3870 } | |
3871 | |
3872 | |
3873 | |
3874 =head2 chr_start | |
3875 | |
3876 Description: DEPRECATED use start() instead | |
3877 | |
3878 =cut | |
3879 | |
3880 sub chr_start{ | |
3881 deprecate('Use start() instead'); | |
3882 start(@_); | |
3883 } | |
3884 | |
3885 | |
3886 | |
3887 =head2 chr_end | |
3888 | |
3889 Description: DEPRECATED use end() instead | |
3890 Returntype : int | |
3891 Exceptions : none | |
3892 Caller : SliceAdaptor, general | |
3893 | |
3894 =cut | |
3895 | |
3896 sub chr_end{ | |
3897 deprecate('Use end() instead'); | |
3898 end(@_); | |
3899 } | |
3900 | |
3901 | |
3902 =head2 assembly_type | |
3903 | |
3904 Description: DEPRECATED use version instead | |
3905 | |
3906 =cut | |
3907 | |
3908 sub assembly_type{ | |
3909 my $self = shift; | |
3910 deprecate('Use $slice->coord_system()->version() instead.'); | |
3911 return $self->coord_system->version(); | |
3912 } | |
3913 | |
3914 | |
3915 =head2 get_tiling_path | |
3916 | |
3917 Description: DEPRECATED use project instead | |
3918 | |
3919 =cut | |
3920 | |
3921 sub get_tiling_path { | |
3922 my $self = shift; | |
3923 deprecate('Use $slice->project("seqlevel") instead.'); | |
3924 return []; | |
3925 } | |
3926 | |
3927 | |
3928 =head2 dbID | |
3929 | |
3930 Description: DEPRECATED use SliceAdaptor::get_seq_region_id instead | |
3931 | |
3932 =cut | |
3933 | |
3934 sub dbID { | |
3935 my $self = shift; | |
3936 deprecate('Use SliceAdaptor::get_seq_region_id instead.'); | |
3937 if(!$self->adaptor) { | |
3938 warning('Cannot retrieve seq_region_id without attached adaptor.'); | |
3939 return 0; | |
3940 } | |
3941 return $self->adaptor->get_seq_region_id($self); | |
3942 } | |
3943 | |
3944 | |
3945 =head2 get_all_MapFrags | |
3946 | |
3947 Description: DEPRECATED use get_all_MiscFeatures instead | |
3948 | |
3949 =cut | |
3950 | |
3951 sub get_all_MapFrags { | |
3952 my $self = shift; | |
3953 deprecate('Use get_all_MiscFeatures instead'); | |
3954 return $self->get_all_MiscFeatures(@_); | |
3955 } | |
3956 | |
3957 =head2 has_MapSet | |
3958 | |
3959 Description: DEPRECATED use get_all_MiscFeatures instead | |
3960 | |
3961 =cut | |
3962 | |
3963 sub has_MapSet { | |
3964 my( $self, $mapset_name ) = @_; | |
3965 deprecate('Use get_all_MiscFeatures instead'); | |
3966 my $mfs = $self->get_all_MiscFeatures($mapset_name); | |
3967 return (@$mfs > 0); | |
3968 } | |
3969 | |
3970 1; |