Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/CircularSlice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::CircularSlice - Arbitary Slice of a genome | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 $sa = $db->get_SliceAdaptor; | |
28 | |
29 $slice = | |
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); | |
31 | |
32 # get some attributes of the slice | |
33 my $seqname = $slice->seq_region_name(); | |
34 my $start = $slice->start(); | |
35 my $end = $slice->end(); | |
36 | |
37 # get the sequence from the slice | |
38 my $seq = $slice->seq(); | |
39 | |
40 # get some features from the slice | |
41 foreach $gene ( @{ $slice->get_all_Genes } ) { | |
42 # do something with a gene | |
43 } | |
44 | |
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) { | |
46 # do something with dna-dna alignments | |
47 } | |
48 | |
49 =head1 DESCRIPTION | |
50 | |
51 A Slice object represents a region of a genome. It can be used to | |
52 retrieve sequence or features from an area of interest. | |
53 | |
54 =head1 METHODS | |
55 | |
56 =cut | |
57 | |
58 package Bio::EnsEMBL::CircularSlice; | |
59 use vars qw(@ISA); | |
60 use strict; | |
61 | |
62 use Bio::PrimarySeqI; | |
63 | |
64 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
65 use Bio::EnsEMBL::Utils::Exception | |
66 qw(throw deprecate warning stack_trace_dump); | |
67 use Bio::EnsEMBL::RepeatMaskedSlice; | |
68 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); | |
69 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref ); | |
70 use Bio::EnsEMBL::ProjectionSegment; | |
71 use Bio::EnsEMBL::Registry; | |
72 use Bio::EnsEMBL::DBSQL::MergedAdaptor; | |
73 | |
74 use Bio::EnsEMBL::StrainSlice; | |
75 #use Bio::EnsEMBL::IndividualSlice; | |
76 #use Bio::EnsEMBL::IndividualSliceFactory; | |
77 use Bio::EnsEMBL::Mapper::RangeRegistry; | |
78 use Bio::EnsEMBL::Slice; | |
79 use Data::Dumper; | |
80 use Scalar::Util qw(weaken isweak); | |
81 | |
82 my $reg = "Bio::EnsEMBL::Registry"; | |
83 | |
84 @ISA = qw(Bio::EnsEMBL::Slice); | |
85 | |
86 =head2 new | |
87 | |
88 Arg [...] : List of named arguments | |
89 Bio::EnsEMBL::CoordSystem COORD_SYSTEM | |
90 string SEQ_REGION_NAME, | |
91 int START, | |
92 int END, | |
93 int SEQ_REGION_LENGTH, (optional) | |
94 string SEQ (optional) | |
95 int STRAND, (optional, defaults to 1) | |
96 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional) | |
97 Example : | |
98 | |
99 $slice = | |
100 Bio::EnsEMBL::CircularSlice->new( -coord_system => $cs, | |
101 -start => 1, | |
102 -end => 10000, | |
103 -strand => 1, | |
104 -seq_region_name => 'X', | |
105 -seq_region_length => 12e6, | |
106 -adaptor => $slice_adaptor ); | |
107 | |
108 Description: Creates a new slice object. A slice represents a | |
109 region of sequence in a particular coordinate system. | |
110 Slices can be used to retrieve sequence and features | |
111 from an area of interest in a genome. | |
112 | |
113 Coordinates start at 1 and are inclusive. Negative | |
114 coordinates or coordinates exceeding the length of | |
115 the seq_region are permitted. Start must be less | |
116 than or equal. to end regardless of the strand. | |
117 | |
118 Slice objects are immutable. Once instantiated their | |
119 attributes (with the exception of the adaptor) may | |
120 not be altered. To change the attributes a new slice | |
121 must be created. | |
122 | |
123 Returntype : Bio::EnsEMBL::CircularSlice | |
124 Exceptions : throws if start, end, coordsystem or seq_region_name not | |
125 specified or not of the correct type | |
126 Caller : general, Bio::EnsEMBL::SliceAdaptor | |
127 Status : Stable | |
128 | |
129 =cut | |
130 | |
131 sub new { | |
132 my $caller = shift; | |
133 | |
134 #new can be called as a class or object method | |
135 my $class = ref($caller) || $caller; | |
136 | |
137 my ( $seq, $coord_system, $seq_region_name, $seq_region_length, | |
138 $start, $end, $strand, $adaptor, $empty ) | |
139 = rearrange( [ | |
140 qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH | |
141 START END STRAND ADAPTOR EMPTY) ], | |
142 @_ ); | |
143 | |
144 #empty is only for backwards compatibility | |
145 if ($empty) { | |
146 deprecate( "Creation of empty slices is no longer needed " | |
147 . "and is deprecated" ); | |
148 my $self = bless( { 'empty' => 1 }, $class ); | |
149 $self->adaptor($adaptor); | |
150 return $self; | |
151 } | |
152 | |
153 if ( !defined($seq_region_name) ) { | |
154 throw('SEQ_REGION_NAME argument is required'); | |
155 } | |
156 if ( !defined($start) ) { throw('START argument is required') } | |
157 if ( !defined($end) ) { throw('END argument is required') } | |
158 | |
159 if ( !defined($seq_region_length) ) { $seq_region_length = $end } | |
160 | |
161 if ( $seq_region_length <= 0 ) { | |
162 throw('SEQ_REGION_LENGTH must be > 0'); | |
163 } | |
164 | |
165 if ( defined($coord_system) ) { | |
166 assert_ref( $coord_system, 'Bio::EnsEMBL::CoordSystem' ); | |
167 | |
168 if ( $coord_system->is_top_level() ) { | |
169 throw('Cannot create circular slice on toplevel CoordSystem.'); | |
170 } | |
171 } else { | |
172 warning("CircularSlice without coordinate system"); | |
173 } | |
174 | |
175 $strand ||= 1; | |
176 | |
177 if ( $strand != 1 && $strand != -1 ) { | |
178 throw('STRAND argument must be -1 or 1'); | |
179 } | |
180 | |
181 if ( defined($adaptor) ) { | |
182 assert_ref( $adaptor, 'Bio::EnsEMBL::DBSQL::SliceAdaptor' ); | |
183 } | |
184 | |
185 my $seq1 = { 'coord_system' => $coord_system, | |
186 'seq' => $seq, | |
187 'seq_region_name' => $seq_region_name, | |
188 'seq_region_length' => $seq_region_length, | |
189 'start' => int($start), | |
190 'end' => int($end), | |
191 'strand' => $strand }; | |
192 | |
193 bless $seq1, $class; | |
194 $seq1->adaptor($adaptor); | |
195 return $seq1; | |
196 } ## end sub new | |
197 | |
198 =head2 new_fast | |
199 | |
200 Arg [1] : hashref to be blessed | |
201 Description: Construct a new Bio::EnsEMBL::Slice using the hashref. | |
202 Exceptions : none | |
203 Returntype : Bio::EnsEMBL::CircularSlice | |
204 Caller : general | |
205 Status : Stable | |
206 | |
207 =cut | |
208 | |
209 sub new_fast { | |
210 my $class = shift; | |
211 my $hashref = shift; | |
212 my $self = bless $hashref, $class; | |
213 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) ); | |
214 return $self; | |
215 } | |
216 | |
217 =head2 centrepoint | |
218 | |
219 Arg [1] : none | |
220 Example : $cp = $slice->centrepoint(); | |
221 Description: Returns the mid position of this slice relative to the | |
222 start of the sequence region that it was created on. | |
223 Coordinates are inclusive and start at 1. | |
224 Returntype : int | |
225 Exceptions : none | |
226 Caller : general | |
227 Status : Stable | |
228 | |
229 =cut | |
230 | |
231 sub centrepoint { | |
232 my $self = shift; | |
233 | |
234 my ( $s, $e, $length ) = | |
235 ( $self->{'start'}, $self->{'end'}, $self->{'seq_region_length'} ); | |
236 | |
237 if ( $s < $e ) { | |
238 return ( $s + $e )/2; | |
239 } | |
240 | |
241 my $r1 = $length - $s; | |
242 my $r2 = $e; | |
243 my $r = ( $r1 + $r2 )/2; | |
244 my $m = $s + $r; | |
245 | |
246 if ( $m > $length ) { | |
247 $m = $m - $length; | |
248 } | |
249 | |
250 return $m; | |
251 } | |
252 | |
253 =head2 length | |
254 | |
255 Arg [1] : none | |
256 Example : $length = $slice->length(); | |
257 Description: Returns the length of this slice in basepairs | |
258 Returntype : int | |
259 Exceptions : none | |
260 Caller : general | |
261 Status : Stable | |
262 | |
263 =cut | |
264 | |
265 sub length { | |
266 my ($self) = @_; | |
267 | |
268 if ( $self->{'start'} < $self->{'end'} ) { | |
269 return $self->{'end'} - $self->{'start'} + 1; | |
270 } | |
271 | |
272 my $r1 = $self->{'seq_region_length'} - $self->{'start'}; | |
273 my $r2 = $self->{'end'}; | |
274 my $ln = $r1 + $r2 + 1; | |
275 | |
276 return $ln; | |
277 } | |
278 | |
279 =head2 invert | |
280 | |
281 Arg [1] : none | |
282 Example : $inverted_slice = $slice->invert; | |
283 Description: Creates a copy of this slice on the opposite strand and | |
284 returns it. | |
285 Returntype : Bio::EnsEMBL::Slice | |
286 Exceptions : none | |
287 Caller : general | |
288 Status : Stable | |
289 | |
290 =cut | |
291 | |
292 sub invert { | |
293 my $self = shift; | |
294 | |
295 # make a shallow copy of the slice via a hash copy and flip the strand | |
296 my %s = %$self; | |
297 $s{'strand'} = $self->{'strand'}*-1; | |
298 | |
299 # reverse compliment any attached sequence | |
300 reverse_comp( \$s{'seq'} ) if ( $s{'seq'} ); | |
301 | |
302 # bless and return the copy | |
303 return bless \%s, ref $self; | |
304 } | |
305 | |
306 =head2 seq | |
307 | |
308 Arg [1] : none | |
309 Example : print "SEQUENCE = ", $slice->seq(); | |
310 Description: Returns the sequence of the region represented by this | |
311 slice formatted as a string. | |
312 Returntype : string | |
313 Exceptions : none | |
314 Caller : general | |
315 Status : Stable | |
316 | |
317 =cut | |
318 | |
319 sub seq { | |
320 my $self = shift; | |
321 | |
322 # special case for in-between (insert) coordinates | |
323 return '' if ( $self->start() == $self->end() + 1 ); | |
324 return $self->{'seq'} if ( $self->{'seq'} ); | |
325 | |
326 if ( $self->adaptor() ) { | |
327 | |
328 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor(); | |
329 if ( $self->{'start'} > $self->{'end'} ) { | |
330 my $length = $self->{'seq_region_length'}; | |
331 | |
332 my $sl1 = | |
333 Bio::EnsEMBL::Slice->new( | |
334 -COORD_SYSTEM => $self->{'coord_system'}, | |
335 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
336 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
337 -START => $self->{'start'}, | |
338 -END => $self->{'seq_region_length'}, | |
339 -STRAND => $self->{'strand'}, | |
340 -ADAPTOR => $self->adaptor() ); | |
341 | |
342 my $sl2 = | |
343 Bio::EnsEMBL::Slice->new( | |
344 -COORD_SYSTEM => $self->{'coord_system'}, | |
345 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
346 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
347 -START => 1, | |
348 -END => $self->{'end'}, | |
349 -STRAND => $self->{'strand'}, | |
350 -ADAPTOR => $self->adaptor() ); | |
351 | |
352 my $seq1 = ${ | |
353 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl1, 1, undef, | |
354 1 ) }; | |
355 my $seq2 = ${ | |
356 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl2, 1, undef, | |
357 1 ) }; | |
358 return $seq1 . $seq2; | |
359 | |
360 } else { | |
361 my $seq1 = ${ | |
362 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, undef, | |
363 1 ) }; | |
364 return $seq1; | |
365 } | |
366 } ## end if ( $self->adaptor() ) | |
367 | |
368 # no attached sequence, and no db, so just return Ns | |
369 return 'N' x $self->length(); | |
370 } ## end sub seq | |
371 | |
372 =head2 subseq | |
373 | |
374 Arg [1] : int $startBasePair | |
375 relative to start of slice, which is 1. | |
376 Arg [2] : int $endBasePair | |
377 relative to start of slice. | |
378 Arg [3] : (optional) int $strand | |
379 The strand of the slice to obtain sequence from. Default | |
380 value is 1. | |
381 Description: returns string of dna sequence | |
382 Returntype : txt | |
383 Exceptions : end should be at least as big as start | |
384 strand must be set | |
385 Caller : general | |
386 Status : Stable | |
387 | |
388 =cut | |
389 | |
390 sub subseq { | |
391 my ( $self, $start, $end, $strand ) = @_; | |
392 | |
393 # handle 'between' case for insertions | |
394 return '' if ( $start == $end + 1 ); | |
395 | |
396 $strand = 1 unless ( defined $strand ); | |
397 | |
398 if ( $strand != -1 && $strand != 1 ) { | |
399 throw("Invalid strand [$strand] in call to Slice::subseq."); | |
400 } | |
401 my $subseq; | |
402 my $length = $self->{'seq_region_length'}; | |
403 | |
404 if ( $self->adaptor ) { | |
405 | |
406 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor(); | |
407 if ( $end < $start ) { | |
408 my $subseq1 = ${ | |
409 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start, | |
410 $length, $strand ) | |
411 }; | |
412 my $subseq2 = ${ | |
413 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, $end, | |
414 $strand ) }; | |
415 $subseq = $subseq1 . $subseq2; | |
416 | |
417 } else { | |
418 $subseq = ${ | |
419 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start, | |
420 $end, $strand ) }; | |
421 } | |
422 } else { | |
423 ## check for gap at the beginning and pad it with Ns | |
424 if ( $start < 1 ) { | |
425 $subseq = "N" x ( 1 - $start ); | |
426 $start = 1; | |
427 } | |
428 $subseq .= substr( $self->seq(), $start - 1, $end - $start + 1 ); | |
429 ## check for gap at the end and pad it with Ns | |
430 if ( $end > $self->length() ) { | |
431 $subseq .= "N" x ( $end - $self->length() ); | |
432 } | |
433 reverse_comp( \$subseq ) if ( $strand == -1 ); | |
434 } | |
435 return $subseq; | |
436 } ## end sub subseq | |
437 | |
438 =head2 project | |
439 | |
440 Arg [1] : string $name | |
441 The name of the coordinate system to project this slice onto | |
442 Arg [2] : string $version | |
443 The version of the coordinate system (such as 'NCBI34') to | |
444 project this slice onto | |
445 Example : | |
446 my $clone_projection = $slice->project('clone'); | |
447 | |
448 foreach my $seg (@$clone_projection) { | |
449 my $clone = $segment->to_Slice(); | |
450 print $slice->seq_region_name(), ':', $seg->from_start(), '-', | |
451 $seg->from_end(), ' -> ', | |
452 $clone->seq_region_name(), ':', $clone->start(), '-', | |
453 $clone->end(), | |
454 $clone->strand(), "\n"; | |
455 } | |
456 Description: Returns the results of 'projecting' this slice onto another | |
457 coordinate system. Projecting to a coordinate system that | |
458 the slice is assembled from is analagous to retrieving a tiling | |
459 path. This method may also be used to 'project up' to a higher | |
460 level coordinate system, however. | |
461 | |
462 This method returns a listref of triplets [start,end,slice] | |
463 which represents the projection. The start and end defined the | |
464 region of this slice which is made up of the third value of | |
465 the triplet: a slice in the requested coordinate system. | |
466 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which | |
467 can also be used as [$start,$end,$slice] triplets | |
468 Exceptions : none | |
469 Caller : general | |
470 Status : Stable | |
471 | |
472 =cut | |
473 | |
474 sub project { | |
475 my $self = shift; | |
476 my $cs_name = shift; | |
477 my $cs_version = shift; | |
478 | |
479 throw('Coord_system name argument is required') if ( !$cs_name ); | |
480 | |
481 my $slice_adaptor = $self->adaptor(); | |
482 | |
483 if ( !$slice_adaptor ) { | |
484 warning("Cannot project without attached adaptor."); | |
485 return []; | |
486 } | |
487 | |
488 if ( !$self->coord_system() ) { | |
489 warning("Cannot project without attached coord system."); | |
490 return []; | |
491 } | |
492 | |
493 my $db = $slice_adaptor->db(); | |
494 my $csa = $db->get_CoordSystemAdaptor(); | |
495 my $cs = $csa->fetch_by_name( $cs_name, $cs_version ); | |
496 | |
497 my $sl01 = | |
498 Bio::EnsEMBL::CircularSlice->new( | |
499 -COORD_SYSTEM => $self->{'coord_system'}, | |
500 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
501 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
502 -START => $self->{'start'}, | |
503 -END => $self->{'seq_region_length'}, | |
504 -STRAND => $self->{'strand'}, | |
505 -ADAPTOR => $self->adaptor() ); | |
506 | |
507 my $sl02 = | |
508 Bio::EnsEMBL::CircularSlice->new( | |
509 -COORD_SYSTEM => $self->{'coord_system'}, | |
510 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
511 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
512 -START => 1, | |
513 -END => $self->{'end'}, | |
514 -STRAND => $self->{'strand'}, | |
515 -ADAPTOR => $self->adaptor() ); | |
516 | |
517 my @projection; | |
518 my $current_start = 1; | |
519 | |
520 foreach my $sl2 ( $sl01, $sl02 ) { | |
521 my $slice_cs = $sl2->coord_system(); | |
522 | |
523 if ( !$cs ) { | |
524 throw( "Cannot project to unknown coordinate system " | |
525 . "[$cs_name $cs_version]" ); | |
526 } | |
527 | |
528 # no mapping is needed if the requested coord system is the one we are in | |
529 # but we do need to check if some of the slice is outside of defined regions | |
530 if ( $slice_cs->equals($cs) ) { | |
531 return $self->_constrain_to_region(); | |
532 } | |
533 | |
534 # decompose this slice into its symlinked components. | |
535 # this allows us to handle haplotypes and PARs | |
536 my $normal_slice_proj = | |
537 $slice_adaptor->fetch_normalized_slice_projection($sl2); | |
538 | |
539 foreach my $segment (@$normal_slice_proj) { | |
540 my $normal_slice = $segment->[2]; | |
541 | |
542 $slice_cs = $normal_slice->coord_system(); | |
543 | |
544 my $asma = $db->get_AssemblyMapperAdaptor(); | |
545 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs ); | |
546 | |
547 # perform the mapping between this slice and the requested system | |
548 my @coords; | |
549 | |
550 if ( defined $asm_mapper ) { | |
551 @coords = $asm_mapper->map( $normal_slice->seq_region_name(), | |
552 $normal_slice->start(), | |
553 $normal_slice->end(), | |
554 $normal_slice->strand(), | |
555 $slice_cs ); | |
556 | |
557 } else { | |
558 $coords[0] = | |
559 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(), | |
560 $normal_slice->end() ); | |
561 } | |
562 | |
563 #construct a projection from the mapping results and return it | |
564 foreach my $coord (@coords) { | |
565 my $coord_start = $coord->start(); | |
566 my $coord_end = $coord->end(); | |
567 my $length = $coord_end - $coord_start + 1; | |
568 | |
569 #skip gaps | |
570 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) { | |
571 my $coord_cs = $coord->coord_system(); | |
572 | |
573 # If the normalised projection just ended up mapping to the | |
574 # same coordinate system we were already in then we should just | |
575 # return the original region. This can happen for example, if we | |
576 # were on a PAR region on Y which refered to X and a projection to | |
577 # 'toplevel' was requested. | |
578 | |
579 if ( $coord_cs->equals($slice_cs) ) { | |
580 # trim off regions which are not defined | |
581 return $self->_constrain_to_region(); | |
582 } | |
583 #create slices for the mapped-to coord system | |
584 | |
585 my $slice = | |
586 $slice_adaptor->fetch_by_seq_region_id( | |
587 $coord->id(), $coord_start, | |
588 $coord_end, $coord->strand() | |
589 ); | |
590 | |
591 my $current_end = $current_start + $length - 1; | |
592 push @projection, | |
593 bless( [ $current_start, $current_end, $slice ], | |
594 "Bio::EnsEMBL::ProjectionSegment" ); | |
595 } ## end if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate'...)) | |
596 | |
597 $current_start += $length; | |
598 } ## end foreach my $coord (@coords) | |
599 } ## end foreach my $segment (@$normal_slice_proj) | |
600 } #foreach | |
601 | |
602 return \@projection; | |
603 } ## end sub project | |
604 | |
605 sub project_org { | |
606 my $self = shift; | |
607 my $cs_name = shift; | |
608 my $cs_version = shift; | |
609 | |
610 throw('Coord_system name argument is required') if ( !$cs_name ); | |
611 | |
612 my $slice_adaptor = $self->adaptor(); | |
613 | |
614 if ( !$slice_adaptor ) { | |
615 warning("Cannot project without attached adaptor."); | |
616 return []; | |
617 } | |
618 | |
619 if ( !$self->coord_system() ) { | |
620 warning("Cannot project without attached coord system."); | |
621 return []; | |
622 } | |
623 | |
624 my $db = $slice_adaptor->db(); | |
625 my $csa = $db->get_CoordSystemAdaptor(); | |
626 my $cs = $csa->fetch_by_name( $cs_name, $cs_version ); | |
627 my $slice_cs = $self->coord_system(); | |
628 | |
629 if ( !$cs ) { | |
630 throw( "Cannot project to unknown coordinate system " | |
631 . "[$cs_name $cs_version]" ); | |
632 } | |
633 | |
634 # No mapping is needed if the requested coord system is the one we | |
635 # are in. But we do need to check if some of the slice is outside of | |
636 # defined regions. | |
637 if ( $slice_cs->equals($cs) ) { | |
638 return $self->_constrain_to_region(); | |
639 } | |
640 | |
641 my @projection; | |
642 my $current_start = 1; | |
643 | |
644 # Decompose this slice into its symlinked components. This allows us | |
645 # to handle haplotypes and PARs. | |
646 my $normal_slice_proj = | |
647 $slice_adaptor->fetch_normalized_slice_projection($self); | |
648 foreach my $segment (@$normal_slice_proj) { | |
649 my $normal_slice = $segment->[2]; | |
650 | |
651 $slice_cs = $normal_slice->coord_system(); | |
652 | |
653 my $asma = $db->get_AssemblyMapperAdaptor(); | |
654 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs ); | |
655 | |
656 # perform the mapping between this slice and the requested system | |
657 my @coords; | |
658 | |
659 if ( defined $asm_mapper ) { | |
660 @coords = $asm_mapper->map( $normal_slice->seq_region_name(), | |
661 $normal_slice->start(), | |
662 $normal_slice->end(), | |
663 $normal_slice->strand(), | |
664 $slice_cs ); | |
665 | |
666 } else { | |
667 $coords[0] = | |
668 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(), | |
669 $normal_slice->end() ); | |
670 } | |
671 | |
672 #construct a projection from the mapping results and return it | |
673 foreach my $coord (@coords) { | |
674 my $coord_start = $coord->start(); | |
675 my $coord_end = $coord->end(); | |
676 my $length = $coord_end - $coord_start + 1; | |
677 | |
678 #skip gaps | |
679 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) { | |
680 my $coord_cs = $coord->coord_system(); | |
681 | |
682 # If the normalised projection just ended up mapping to the | |
683 # same coordinate system we were already in then we should just | |
684 # return the original region. This can happen for example, | |
685 # if we were on a PAR region on Y which refered to X and a | |
686 # projection to 'toplevel' was requested. | |
687 | |
688 if ( $coord_cs->equals($slice_cs) ) { | |
689 # trim off regions which are not defined | |
690 return $self->_constrain_to_region(); | |
691 } | |
692 #create slices for the mapped-to coord system | |
693 | |
694 my $slice = | |
695 $slice_adaptor->fetch_by_seq_region_id( $coord->id(), | |
696 $coord_start, $coord_end, $coord->strand() ); | |
697 | |
698 my $current_end = $current_start + $length - 1; | |
699 | |
700 push @projection, | |
701 bless( [ $current_start, $current_end, $slice ], | |
702 "Bio::EnsEMBL::ProjectionSegment" ); | |
703 } ## end if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate'...)) | |
704 | |
705 $current_start += $length; | |
706 } ## end foreach my $coord (@coords) | |
707 } ## end foreach my $segment (@$normal_slice_proj) | |
708 | |
709 return \@projection; | |
710 } ## end sub project_org | |
711 | |
712 sub _constrain_to_region { | |
713 my $self = shift; | |
714 | |
715 my $entire_len = $self->seq_region_length(); | |
716 | |
717 # If the slice has negative coordinates or coordinates exceeding the | |
718 # exceeding length of the sequence region we want to shrink the slice | |
719 # to the defined region. | |
720 | |
721 if ( $self->{'start'} > $entire_len || $self->{'end'} < 1 ) { | |
722 #none of this slice is in a defined region | |
723 return []; | |
724 } | |
725 | |
726 my $right_contract = 0; | |
727 my $left_contract = 0; | |
728 if ( $self->{'end'} > $entire_len ) { | |
729 $right_contract = $entire_len - $self->{'end'}; | |
730 } | |
731 if ( $self->{'start'} < 1 ) { | |
732 $left_contract = $self->{'start'} - 1; | |
733 } | |
734 | |
735 my $new_slice; | |
736 if ( $left_contract || $right_contract ) { | |
737 $new_slice = $self->expand( $left_contract, $right_contract ); | |
738 } else { | |
739 $new_slice = $self; | |
740 } | |
741 | |
742 return [ bless [ 1 - $left_contract, | |
743 $self->length() + $right_contract, | |
744 $new_slice ], | |
745 "Bio::EnsEMBL::ProjectionSegment" ]; | |
746 } ## end sub _constrain_to_region | |
747 | |
748 =head2 expand | |
749 | |
750 Arg [1] : (optional) int $five_prime_expand | |
751 The number of basepairs to shift this slices five_prime | |
752 coordinate by. Positive values make the slice larger, | |
753 negative make the slice smaller. | |
754 coordinate left. | |
755 Default = 0. | |
756 Arg [2] : (optional) int $three_prime_expand | |
757 The number of basepairs to shift this slices three_prime | |
758 coordinate by. Positive values make the slice larger, | |
759 negative make the slice smaller. | |
760 Default = 0. | |
761 Arg [3] : (optional) bool $force_expand | |
762 if set to 1, then the slice will be contracted even in the case | |
763 when shifts $five_prime_expand and $three_prime_expand overlap. | |
764 In that case $five_prime_expand and $three_prime_expand will be set | |
765 to a maximum possible number and that will result in the slice | |
766 which would have only 2pbs. | |
767 Default = 0. | |
768 Arg [4] : (optional) int* $fpref | |
769 The reference to a number of basepairs to shift this slices five_prime | |
770 coordinate by. Normally it would be set to $five_prime_expand. | |
771 But in case when $five_prime_expand shift can not be applied and | |
772 $force_expand is set to 1, then $$fpref will contain the maximum possible | |
773 shift | |
774 Arg [5] : (optional) int* $tpref | |
775 The reference to a number of basepairs to shift this slices three_prime | |
776 coordinate by. Normally it would be set to $three_prime_expand. | |
777 But in case when $five_prime_expand shift can not be applied and | |
778 $force_expand is set to 1, then $$tpref will contain the maximum possible | |
779 shift | |
780 Example : my $expanded_slice = $slice->expand( 1000, 1000); | |
781 my $contracted_slice = $slice->expand(-1000,-1000); | |
782 my $shifted_right_slice = $slice->expand(-1000, 1000); | |
783 my $shifted_left_slice = $slice->expand( 1000,-1000); | |
784 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift); | |
785 | |
786 Description: Returns a slice which is a resized copy of this slice. The | |
787 start and end are moved outwards from the center of the slice | |
788 if positive values are provided and moved inwards if negative | |
789 values are provided. This slice remains unchanged. A slice | |
790 may not be contracted below 1bp but may grow to be arbitrarily | |
791 large. | |
792 Returntype : Bio::EnsEMBL::Slice | |
793 Exceptions : warning if an attempt is made to contract the slice below 1bp | |
794 Caller : general | |
795 Status : Stable | |
796 | |
797 =cut | |
798 | |
799 sub expand { | |
800 my $self = shift; | |
801 my $five_prime_shift = shift || 0; | |
802 my $three_prime_shift = shift || 0; | |
803 my $force_expand = shift || 0; | |
804 my $fpref = shift; | |
805 my $tpref = shift; | |
806 | |
807 if ( $self->{'seq'} ) { | |
808 warning( | |
809 "Cannot expand a slice which has a manually attached sequence "); | |
810 return undef; | |
811 } | |
812 | |
813 my $new_start; | |
814 my $new_end; | |
815 my $sshift = $five_prime_shift; | |
816 my $eshift = $three_prime_shift; | |
817 | |
818 if ( $self->{'strand'} != 1 ) { | |
819 $eshift = $five_prime_shift; | |
820 $sshift = $three_prime_shift; | |
821 } | |
822 | |
823 $new_start = $self->{'start'} - $sshift; | |
824 $new_end = $self->{'end'} + $eshift; | |
825 | |
826 # if($new_start > $new_end) { | |
827 # if ($force_expand) { # Apply max possible shift, if force_expand is set | |
828 # if ($sshift < 0) { # if we are contracting the slice from the start - move the start just before the end | |
829 # $new_start = $new_end - 1; | |
830 # $sshift = $self->{start} - $new_start; | |
831 # } | |
832 | |
833 # if($new_start > $new_end) { # if the slice still has a negative length - try to move the end | |
834 # if ($eshift < 0) { | |
835 # $new_end = $new_start + 1; | |
836 # $eshift = $new_end - $self->{end}; | |
837 # } | |
838 # } | |
839 # return the values by which the primes were actually shifted | |
840 # $$tpref = $self->{strand} == 1 ? $eshift : $sshift; | |
841 # $$fpref = $self->{strand} == 1 ? $sshift : $eshift; | |
842 # } | |
843 # if($new_start > $new_end) { | |
844 # throw('Slice start cannot be greater than slice end'); | |
845 # } | |
846 # } | |
847 | |
848 #fastest way to copy a slice is to do a shallow hash copy | |
849 my %new_slice = %$self; | |
850 $new_slice{'start'} = int($new_start); | |
851 $new_slice{'end'} = int($new_end); | |
852 | |
853 return bless \%new_slice, ref($self); | |
854 } ## end sub expand | |
855 | |
856 =head2 sub_Slice_same | |
857 | |
858 Arg 1 : int $start | |
859 Arg 2 : int $end | |
860 Arge [3] : int $strand | |
861 Description: Makes another Slice that covers only part of this slice | |
862 If a slice is requested which lies outside of the boundaries | |
863 of this function will return undef. This means that | |
864 behaviour will be consistant whether or not the slice is | |
865 attached to the database (i.e. if there is attached sequence | |
866 to the slice). Alternatively the expand() method or the | |
867 SliceAdaptor::fetch_by_region method can be used instead. | |
868 Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong | |
869 Exceptions : none | |
870 Caller : general | |
871 Status : Stable | |
872 | |
873 =cut | |
874 | |
875 sub sub_Slice_same { | |
876 my ( $self, $start, $end, $strand ) = @_; | |
877 | |
878 if ( $start < 1 || $start > $self->{'end'} ) { | |
879 # throw( "start argument not valid" ); | |
880 return undef; | |
881 } | |
882 | |
883 if ( $end < $start || $end > $self->{'end'} ) { | |
884 # throw( "end argument not valid" ) | |
885 return undef; | |
886 } | |
887 | |
888 my ( $new_start, $new_end, $new_strand, $new_seq ); | |
889 if ( !defined $strand ) { | |
890 $strand = 1; | |
891 } | |
892 | |
893 if ( $self->{'strand'} == 1 ) { | |
894 $new_start = $self->{'start'} + $start - 1; | |
895 $new_end = $self->{'start'} + $end - 1; | |
896 $new_strand = $strand; | |
897 } else { | |
898 $new_start = $self->{'end'} - $end + 1; | |
899 $new_end = $self->{'end'} - $start + 1; | |
900 $new_strand = -$strand; | |
901 } | |
902 | |
903 if ( defined $self->{'seq'} ) { | |
904 $new_seq = $self->subseq( $start, $end, $strand ); | |
905 } | |
906 | |
907 #fastest way to copy a slice is to do a shallow hash copy | |
908 my %new_slice = %$self; | |
909 $new_slice{'start'} = int($new_start); | |
910 $new_slice{'end'} = int($new_end); | |
911 $new_slice{'strand'} = $new_strand; | |
912 if ($new_seq) { | |
913 $new_slice{'seq'} = $new_seq; | |
914 } | |
915 | |
916 return bless \%new_slice, ref($self); | |
917 } ## end sub sub_Slice_same | |
918 | |
919 =head2 seq_region_Slice_same | |
920 | |
921 Arg [1] : none | |
922 Example : $slice = $slice->seq_region_Slice(); | |
923 Description: Returns a slice which spans the whole seq_region which this slice | |
924 is on. For example if this is a slice which spans a small region | |
925 of chromosome X, this method will return a slice which covers the | |
926 entire chromosome X. The returned slice will always have strand | |
927 of 1 and start of 1. This method cannot be used if the sequence | |
928 of the slice has been set manually. | |
929 Returntype : Bio::EnsEMBL::Slice | |
930 Exceptions : warning if called when sequence of Slice has been set manually. | |
931 Caller : general | |
932 Status : Stable | |
933 | |
934 =cut | |
935 | |
936 sub seq_region_Slice_same { | |
937 my $self = shift; | |
938 | |
939 if ( $self->{'seq'} ) { | |
940 warning( | |
941 "Cannot get a seq_region_Slice of a slice which has manually " | |
942 . "attached sequence " ); | |
943 return undef; | |
944 } | |
945 | |
946 # quick shallow copy | |
947 my $slice; | |
948 %{$slice} = %{$self}; | |
949 bless $slice, ref($self); | |
950 | |
951 $slice->{'start'} = 1; | |
952 $slice->{'end'} = $slice->{'seq_region_length'}; | |
953 $slice->{'strand'} = 1; | |
954 | |
955 return $slice; | |
956 } | |
957 | |
958 =head2 get_seq_region_id_same | |
959 | |
960 Arg [1] : none | |
961 Example : my $seq_region_id = $slice->get_seq_region_id(); | |
962 Description: Gets the internal identifier of the seq_region that this slice | |
963 is on. Note that this function will not work correctly if this | |
964 slice does not have an attached adaptor. Also note that it may | |
965 be better to go through the SliceAdaptor::get_seq_region_id | |
966 method if you are working with multiple databases since is | |
967 possible to work with slices from databases with different | |
968 internal seq_region identifiers. | |
969 Returntype : int or undef if slices does not have attached adaptor | |
970 Exceptions : warning if slice is not associated with a SliceAdaptor | |
971 Caller : assembly loading scripts, general | |
972 Status : Stable | |
973 | |
974 =cut | |
975 | |
976 sub get_seq_region_id_same { | |
977 my ($self) = @_; | |
978 | |
979 if ( $self->adaptor ) { | |
980 return $self->adaptor->get_seq_region_id($self); | |
981 } else { | |
982 warning('Cannot retrieve seq_region_id without attached adaptor.'); | |
983 return undef; | |
984 } | |
985 } | |
986 | |
987 =head2 get_all_Attributes | |
988 | |
989 Arg [1] : optional string $attrib_code | |
990 The code of the attribute type to retrieve values for. | |
991 Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')}; | |
992 @slice_attributes = @{$slice->get_all_Attributes()}; | |
993 Description: Gets a list of Attributes of this slice''s seq_region. | |
994 Optionally just get Attrubutes for given code. | |
995 Returntype : listref Bio::EnsEMBL::Attribute | |
996 Exceptions : warning if slice does not have attached adaptor | |
997 Caller : general | |
998 Status : Stable | |
999 | |
1000 =cut | |
1001 | |
1002 sub get_all_Attributes { | |
1003 my $self = shift; | |
1004 my $attrib_code = shift; | |
1005 | |
1006 my $result; | |
1007 my @results; | |
1008 | |
1009 if ( !$self->adaptor() ) { | |
1010 warning('Cannot get attributes without an adaptor.'); | |
1011 return []; | |
1012 } | |
1013 | |
1014 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor(); | |
1015 | |
1016 ## circular BOF | |
1017 my $sl1 = | |
1018 Bio::EnsEMBL::CircularSlice->new( | |
1019 -COORD_SYSTEM => $self->{'coord_system'}, | |
1020 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1021 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1022 -START => $self->{'start'}, | |
1023 -END => $self->{'seq_region_length'}, | |
1024 -STRAND => $self->{'strand'}, | |
1025 -ADAPTOR => $self->adaptor() ); | |
1026 | |
1027 my $sl2 = | |
1028 Bio::EnsEMBL::CircularSlice->new( | |
1029 -COORD_SYSTEM => $self->{'coord_system'}, | |
1030 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1031 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1032 -START => 1, | |
1033 -END => $self->{'end'}, | |
1034 -STRAND => $self->{'strand'}, | |
1035 -ADAPTOR => $self->adaptor() ); | |
1036 my $pta_ref = []; | |
1037 | |
1038 if ( defined $attrib_code ) { | |
1039 my @res1 = | |
1040 grep { uc( $_->code() ) eq uc($attrib_code) } | |
1041 @{ $attribute_adaptor->fetch_all_by_Slice($sl1) }; | |
1042 my @res2 = | |
1043 grep { uc( $_->code() ) eq uc($attrib_code) } | |
1044 @{ $attribute_adaptor->fetch_all_by_Slice($sl2) }; | |
1045 my @res; | |
1046 push @res, @res1, @res2; | |
1047 $result = \@res; | |
1048 } else { | |
1049 my @res1 = @{ $attribute_adaptor->fetch_all_by_Slice($sl1) }; | |
1050 my @res2 = @{ $attribute_adaptor->fetch_all_by_Slice($sl2) }; | |
1051 my @res; | |
1052 push @res, @res1, @res2; | |
1053 $result = \@res; | |
1054 } | |
1055 | |
1056 ## circular EOF | |
1057 | |
1058 # if( defined $attrib_code ) { | |
1059 # @results = grep { uc($_->code()) eq uc($attrib_code) } | |
1060 # @{$attribute_adaptor->fetch_all_by_Slice( $self )}; | |
1061 # $result = \@results; | |
1062 # } else { | |
1063 # $result = $attribute_adaptor->fetch_all_by_Slice( $self ); | |
1064 # } | |
1065 | |
1066 return $result; | |
1067 } ## end sub get_all_Attributes | |
1068 | |
1069 =head2 get_all_PredictionTranscripts | |
1070 | |
1071 Arg [1] : (optional) string $logic_name | |
1072 The name of the analysis used to generate the prediction | |
1073 transcripts obtained. | |
1074 Arg [2] : (optional) boolean $load_exons | |
1075 If set to true will force loading of all PredictionExons | |
1076 immediately rather than loading them on demand later. This | |
1077 is faster if there are a large number of PredictionTranscripts | |
1078 and the exons will be used. | |
1079 Example : @transcripts = @{$slice->get_all_PredictionTranscripts}; | |
1080 Description: Retrieves the list of prediction transcripts which overlap | |
1081 this slice with logic_name $logic_name. If logic_name is | |
1082 not defined then all prediction transcripts are retrieved. | |
1083 Returntype : listref of Bio::EnsEMBL::PredictionTranscript | |
1084 Exceptions : warning if slice does not have attached adaptor | |
1085 Caller : none | |
1086 Status : Stable | |
1087 | |
1088 =cut | |
1089 | |
1090 sub get_all_PredictionTranscripts { | |
1091 my ( $self, $logic_name, $load_exons ) = @_; | |
1092 | |
1093 if ( !$self->adaptor() ) { | |
1094 warning( | |
1095 'Cannot get PredictionTranscripts without attached adaptor'); | |
1096 return []; | |
1097 } | |
1098 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor(); | |
1099 | |
1100 ## circular BOF | |
1101 my $sl1 = | |
1102 Bio::EnsEMBL::CircularSlice->new( | |
1103 -COORD_SYSTEM => $self->{'coord_system'}, | |
1104 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1105 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1106 -START => $self->{'start'}, | |
1107 -END => $self->{'seq_region_length'}, | |
1108 -STRAND => $self->{'strand'}, | |
1109 -ADAPTOR => $self->adaptor() ); | |
1110 | |
1111 my $sl2 = | |
1112 Bio::EnsEMBL::CircularSlice->new( | |
1113 -COORD_SYSTEM => $self->{'coord_system'}, | |
1114 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1115 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1116 -START => 1, | |
1117 -END => $self->{'end'}, | |
1118 -STRAND => $self->{'strand'}, | |
1119 -ADAPTOR => $self->adaptor() ); | |
1120 | |
1121 my ( @arr, @arr1, @arr2 ); | |
1122 @arr1 = | |
1123 @{ $pta->fetch_all_by_Slice( $sl1, $logic_name, $load_exons ) }; | |
1124 @arr2 = | |
1125 @{ $pta->fetch_all_by_Slice( $sl2, $logic_name, $load_exons ) }; | |
1126 push @arr, @arr1, @arr2; | |
1127 return \@arr; | |
1128 ## circular EOF | |
1129 | |
1130 #return $pta->fetch_all_by_Slice($sl1, $logic_name, $load_exons); | |
1131 } ## end sub get_all_PredictionTranscripts | |
1132 | |
1133 =head2 get_all_DnaAlignFeatures | |
1134 | |
1135 Arg [1] : (optional) string $logic_name | |
1136 The name of the analysis performed on the dna align features | |
1137 to obtain. | |
1138 Arg [2] : (optional) float $score | |
1139 The mimimum score of the features to retrieve | |
1140 Arg [3] : (optional) string $dbtype | |
1141 The name of an attached database to retrieve the features from | |
1142 instead, e.g. 'otherfeatures'. | |
1143 Arg [4] : (optional) float hcoverage | |
1144 The minimum hcoverage od the featurs to retrieve | |
1145 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures}; | |
1146 Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with | |
1147 logic name $logic_name and with score above $score. If | |
1148 $logic_name is not defined features of all logic names are | |
1149 retrieved. If $score is not defined features of all scores are | |
1150 retrieved. | |
1151 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures | |
1152 Exceptions : warning if slice does not have attached adaptor | |
1153 Caller : general | |
1154 Status : Stable | |
1155 | |
1156 =cut | |
1157 | |
1158 sub get_all_DnaAlignFeatures { | |
1159 my ( $self, $logic_name, $score, $dbtype, $hcoverage ) = @_; | |
1160 | |
1161 if ( !$self->adaptor() ) { | |
1162 warning('Cannot get DnaAlignFeatures without attached adaptor'); | |
1163 return []; | |
1164 } | |
1165 | |
1166 my $db; | |
1167 | |
1168 if ($dbtype) { | |
1169 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1170 if ( !$db ) { | |
1171 warning("Don't have db $dbtype returning empty list\n"); | |
1172 return []; | |
1173 } | |
1174 } else { | |
1175 $db = $self->adaptor->db; | |
1176 } | |
1177 | |
1178 my $dafa = $db->get_DnaAlignFeatureAdaptor(); | |
1179 | |
1180 if ( defined($score) and defined($hcoverage) ) { | |
1181 warning "cannot specify score and hcoverage. Using score only"; | |
1182 } | |
1183 | |
1184 ## circular BOF | |
1185 my $sl1 = | |
1186 Bio::EnsEMBL::CircularSlice->new( | |
1187 -COORD_SYSTEM => $self->{'coord_system'}, | |
1188 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1189 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1190 -START => $self->{'start'}, | |
1191 -END => $self->{'seq_region_length'}, | |
1192 -STRAND => $self->{'strand'}, | |
1193 -ADAPTOR => $self->adaptor() ); | |
1194 | |
1195 my $sl2 = | |
1196 Bio::EnsEMBL::CircularSlice->new( | |
1197 -COORD_SYSTEM => $self->{'coord_system'}, | |
1198 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1199 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1200 -START => 1, | |
1201 -END => $self->{'end'}, | |
1202 -STRAND => $self->{'strand'}, | |
1203 -ADAPTOR => $self->adaptor() ); | |
1204 | |
1205 my ( @arr, @arr1, @arr2 ); | |
1206 my %union; | |
1207 if ( defined($score) ) { | |
1208 @arr1 = @{ $dafa->fetch_all_by_Slice_and_score( $sl1, $score, | |
1209 $logic_name ) }; | |
1210 @arr2 = @{ $dafa->fetch_all_by_Slice_and_score( $sl2, $score, | |
1211 $logic_name ) }; | |
1212 push @arr, @arr1, @arr2; | |
1213 return \@arr; | |
1214 } | |
1215 @arr1 = @{ | |
1216 $dafa->fetch_all_by_Slice_and_hcoverage( $sl1, $hcoverage, | |
1217 $logic_name ) }; | |
1218 @arr2 = @{ | |
1219 $dafa->fetch_all_by_Slice_and_hcoverage( $sl2, $hcoverage, | |
1220 $logic_name ) }; | |
1221 push @arr, @arr1, @arr2; | |
1222 return \@arr; | |
1223 | |
1224 ## circular EOF | |
1225 | |
1226 # if(defined($score)){ | |
1227 # return $dafa->fetch_all_by_Slice_and_score($self,$score, $logic_name); | |
1228 # } | |
1229 # return $dafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name); | |
1230 } ## end sub get_all_DnaAlignFeatures | |
1231 | |
1232 =head2 get_all_ProteinAlignFeatures | |
1233 | |
1234 Arg [1] : (optional) string $logic_name | |
1235 The name of the analysis performed on the protein align features | |
1236 to obtain. | |
1237 Arg [2] : (optional) float $score | |
1238 The mimimum score of the features to retrieve | |
1239 Arg [3] : (optional) string $dbtype | |
1240 The name of an attached database to retrieve features from | |
1241 instead. | |
1242 Arg [4] : (optional) float hcoverage | |
1243 The minimum hcoverage od the featurs to retrieve | |
1244 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures}; | |
1245 Description: Retrieves the DnaPepAlignFeatures which overlap this slice with | |
1246 logic name $logic_name and with score above $score. If | |
1247 $logic_name is not defined features of all logic names are | |
1248 retrieved. If $score is not defined features of all scores are | |
1249 retrieved. | |
1250 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures | |
1251 Exceptions : warning if slice does not have attached adaptor | |
1252 Caller : general | |
1253 Status : Stable | |
1254 | |
1255 =cut | |
1256 | |
1257 sub get_all_ProteinAlignFeatures { | |
1258 my ( $self, $logic_name, $score, $dbtype, $hcoverage ) = @_; | |
1259 if ( !$self->adaptor() ) { | |
1260 warning('Cannot get ProteinAlignFeatures without attached adaptor'); | |
1261 return []; | |
1262 } | |
1263 my $db; | |
1264 if ($dbtype) { | |
1265 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1266 if ( !$db ) { | |
1267 warning("Don't have db $dbtype returning empty list\n"); | |
1268 return []; | |
1269 } | |
1270 } else { | |
1271 $db = $self->adaptor->db; | |
1272 } | |
1273 | |
1274 my $pafa = $db->get_ProteinAlignFeatureAdaptor(); | |
1275 | |
1276 if ( defined($score) and defined($hcoverage) ) { | |
1277 warning "cannot specify score and hcoverage. Using score only"; | |
1278 } | |
1279 | |
1280 my $sl1 = | |
1281 Bio::EnsEMBL::CircularSlice->new( | |
1282 -COORD_SYSTEM => $self->{'coord_system'}, | |
1283 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1284 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1285 -START => $self->{'start'}, | |
1286 -END => $self->{'seq_region_length'}, | |
1287 -STRAND => $self->{'strand'}, | |
1288 -ADAPTOR => $self->adaptor() ); | |
1289 | |
1290 my $sl2 = | |
1291 Bio::EnsEMBL::CircularSlice->new( | |
1292 -COORD_SYSTEM => $self->{'coord_system'}, | |
1293 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1294 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1295 -START => 1, | |
1296 -END => $self->{'end'}, | |
1297 -STRAND => $self->{'strand'}, | |
1298 -ADAPTOR => $self->adaptor() ); | |
1299 my ( @arr, @arr1, @arr2 ); | |
1300 if ( defined($score) ) { | |
1301 @arr1 = @{ $pafa->fetch_all_by_Slice_and_score( $sl1, $score, | |
1302 $logic_name ) }; | |
1303 @arr2 = @{ $pafa->fetch_all_by_Slice_and_score( $sl2, $score, | |
1304 $logic_name ) }; | |
1305 | |
1306 push @arr, @arr1, @arr2; | |
1307 return \@arr; | |
1308 } | |
1309 | |
1310 @arr1 = @{ | |
1311 $pafa->fetch_all_by_Slice_and_hcoverage( $sl1, $hcoverage, | |
1312 $logic_name ) }; | |
1313 @arr2 = @{ | |
1314 $pafa->fetch_all_by_Slice_and_hcoverage( $sl2, $hcoverage, | |
1315 $logic_name ) }; | |
1316 | |
1317 push @arr, @arr1, @arr2; | |
1318 return \@arr; | |
1319 | |
1320 # if(defined($score)){ | |
1321 # return $pafa->fetch_all_by_Slice_and_score($self,$score, $logic_name); | |
1322 # } | |
1323 # return $pafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name); | |
1324 | |
1325 } ## end sub get_all_ProteinAlignFeatures | |
1326 | |
1327 =head2 get_all_SimilarityFeatures | |
1328 | |
1329 Arg [1] : (optional) string $logic_name | |
1330 the name of the analysis performed on the features to retrieve | |
1331 Arg [2] : (optional) float $score | |
1332 the lower bound of the score of the features to be retrieved | |
1333 Example : @feats = @{$slice->get_all_SimilarityFeatures}; | |
1334 Description: Retrieves all dna_align_features and protein_align_features | |
1335 with analysis named $logic_name and with score above $score. | |
1336 It is probably faster to use get_all_ProteinAlignFeatures or | |
1337 get_all_DnaAlignFeatures if a sepcific feature type is desired. | |
1338 If $logic_name is not defined features of all logic names are | |
1339 retrieved. If $score is not defined features of all scores are | |
1340 retrieved. | |
1341 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures | |
1342 Exceptions : warning if slice does not have attached adaptor | |
1343 Caller : general | |
1344 Status : Stable | |
1345 | |
1346 =cut | |
1347 | |
1348 sub get_all_SimilarityFeatures { | |
1349 my ( $self, $logic_name, $score ) = @_; | |
1350 | |
1351 my @out = (); | |
1352 | |
1353 push @out, | |
1354 @{ $self->get_all_ProteinAlignFeatures( $logic_name, $score ) }; | |
1355 push @out, | |
1356 @{ $self->get_all_DnaAlignFeatures( $logic_name, $score ) }; | |
1357 | |
1358 return \@out; | |
1359 } | |
1360 | |
1361 =head2 get_all_SimpleFeatures | |
1362 | |
1363 Arg [1] : (optional) string $logic_name | |
1364 The name of the analysis performed on the simple features | |
1365 to obtain. | |
1366 Arg [2] : (optional) float $score | |
1367 The mimimum score of the features to retrieve | |
1368 Example : @simple_feats = @{$slice->get_all_SimpleFeatures}; | |
1369 Description: Retrieves the SimpleFeatures which overlap this slice with | |
1370 logic name $logic_name and with score above $score. If | |
1371 $logic_name is not defined features of all logic names are | |
1372 retrieved. If $score is not defined features of all scores are | |
1373 retrieved. | |
1374 Returntype : listref of Bio::EnsEMBL::SimpleFeatures | |
1375 Exceptions : warning if slice does not have attached adaptor | |
1376 Caller : general | |
1377 Status : Stable | |
1378 | |
1379 =cut | |
1380 | |
1381 sub get_all_SimpleFeatures { | |
1382 my ( $self, $logic_name, $score, $dbtype ) = @_; | |
1383 | |
1384 if ( !$self->adaptor() ) { | |
1385 warning('Cannot get SimpleFeatures without attached adaptor'); | |
1386 return []; | |
1387 } | |
1388 | |
1389 my $db; | |
1390 if ($dbtype) { | |
1391 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1392 if ( !$db ) { | |
1393 warning("Don't have db $dbtype returning empty list\n"); | |
1394 return []; | |
1395 } | |
1396 } else { | |
1397 $db = $self->adaptor->db; | |
1398 } | |
1399 | |
1400 my $sfa = $db->get_SimpleFeatureAdaptor(); | |
1401 | |
1402 return $sfa->fetch_all_by_Slice_and_score( $self, $score, | |
1403 $logic_name ); | |
1404 } | |
1405 | |
1406 #### STOP !!! ######################################## | |
1407 | |
1408 =head2 get_all_RepeatFeatures | |
1409 | |
1410 Arg [1] : (optional) string $logic_name | |
1411 The name of the analysis performed on the repeat features | |
1412 to obtain. | |
1413 Arg [2] : (optional) string $repeat_type | |
1414 Limits features returned to those of the specified repeat_type | |
1415 Arg [3] : (optional) string $db | |
1416 Key for database e.g. core/vega/cdna/.... | |
1417 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'LTR')}; | |
1418 Description: Retrieves the RepeatFeatures which overlap with | |
1419 logic name $logic_name and with score above $score. If | |
1420 $logic_name is not defined features of all logic names are | |
1421 retrieved. | |
1422 Returntype : listref of Bio::EnsEMBL::RepeatFeatures | |
1423 Exceptions : warning if slice does not have attached adaptor | |
1424 Caller : general | |
1425 Status : Stable | |
1426 | |
1427 =cut | |
1428 | |
1429 sub get_all_RepeatFeatures { | |
1430 my ( $self, $logic_name, $repeat_type, $dbtype ) = @_; | |
1431 | |
1432 if ( !$self->adaptor() ) { | |
1433 warning('Cannot get RepeatFeatures without attached adaptor'); | |
1434 return []; | |
1435 } | |
1436 | |
1437 my $db; | |
1438 if ($dbtype) { | |
1439 $db = $self->adaptor->db->get_db_adaptor($dbtype); | |
1440 if ( !$db ) { | |
1441 warning("Don't have db $dbtype returning empty list\n"); | |
1442 return []; | |
1443 } | |
1444 } else { | |
1445 $db = $self->adaptor->db; | |
1446 } | |
1447 | |
1448 my $rpfa = $db->get_RepeatFeatureAdaptor(); | |
1449 | |
1450 return $rpfa->fetch_all_by_Slice( $self, $logic_name, $repeat_type ); | |
1451 } | |
1452 | |
1453 =head2 get_all_LD_values | |
1454 | |
1455 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population | |
1456 Description : returns all LD values on this slice. This function will only work correctly if the variation | |
1457 database has been attached to the core database. If the argument is passed, will return the LD information | |
1458 in that population | |
1459 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer | |
1460 Exceptions : none | |
1461 Caller : contigview, snpview | |
1462 Status : At Risk | |
1463 : Variation database is under development. | |
1464 | |
1465 =cut | |
1466 | |
1467 sub get_all_LD_values { | |
1468 my $self = shift; | |
1469 my $population = shift; | |
1470 | |
1471 if ( !$self->adaptor() ) { | |
1472 warning('Cannot get LDFeatureContainer without attached adaptor'); | |
1473 return []; | |
1474 } | |
1475 | |
1476 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
1477 | |
1478 unless ($variation_db) { | |
1479 warning( "Variation database must be attached to core database to " | |
1480 . "retrieve variation information" ); | |
1481 return []; | |
1482 } | |
1483 | |
1484 my $ld_adaptor = $variation_db->get_LDFeatureContainerAdaptor; | |
1485 | |
1486 if ($ld_adaptor) { | |
1487 return $ld_adaptor->fetch_by_Slice( $self, $population ); | |
1488 } else { | |
1489 return []; | |
1490 | |
1491 } | |
1492 | |
1493 # my $ld_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(-species => $self->adaptor()->db()->species, -type => "LDFeatureContainer"); | |
1494 | |
1495 # if( $ld_adaptor ) { | |
1496 # my $ld_values = $ld_adaptor->fetch_by_Slice($self,$population); | |
1497 # if (@{$ld_values} > 1){ | |
1498 # warning("More than 1 variation database attached. Trying to merge LD results"); | |
1499 # my $ld_value_merged = shift @{$ld_values}; | |
1500 # #with more than 1 variation database attached, will try to merge in one single LDContainer object. | |
1501 # foreach my $ld (@{$ld_values}){ | |
1502 # #copy the ld values to the result hash | |
1503 # foreach my $key (keys %{$ld->{'ldContainer'}}){ | |
1504 # $ld_value_merged->{'ldContainer'}->{$key} = $ld->{'ldContainer'}->{$key}; | |
1505 # } | |
1506 # #and copy the variationFeatures as well | |
1507 # foreach my $key (keys %{$ld->{'variationFeatures'}}){ | |
1508 # $ld_value_merged->{'variationFeatures'}->{$key} = $ld->{'variationFeatures'}->{$key}; | |
1509 # } | |
1510 | |
1511 # } | |
1512 # return $ld_value_merged; | |
1513 # } | |
1514 # else{ | |
1515 # return shift @{$ld_values}; | |
1516 # } | |
1517 # } else { | |
1518 # warning("Variation database must be attached to core database to " . | |
1519 # "retrieve variation information" ); | |
1520 # return []; | |
1521 # } | |
1522 } ## end sub get_all_LD_values | |
1523 | |
1524 =head2 get_all_VariationFeatures | |
1525 | |
1526 Args : $filter [optional] | |
1527 Description:returns all variation features on this slice. This function will only work | |
1528 correctly if the variation database has been attached to the core database. | |
1529 If $filter is "genotyped" return genotyped Snps only... (nice likkle hack); | |
1530 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
1531 Exceptions : none | |
1532 Caller : contigview, snpview | |
1533 Status : At Risk | |
1534 : Variation database is under development. | |
1535 | |
1536 =cut | |
1537 | |
1538 sub get_all_VariationFeatures { | |
1539 my $self = shift; | |
1540 my $filter = shift; | |
1541 | |
1542 $filter ||= ''; | |
1543 if ( !$self->adaptor() ) { | |
1544 warning('Cannot get variation features without attached adaptor'); | |
1545 return []; | |
1546 } | |
1547 | |
1548 my $vf_adaptor = | |
1549 Bio::EnsEMBL::DBSQL::MergedAdaptor->new( | |
1550 -species => $self->adaptor()->db()->species, | |
1551 -type => "VariationFeature" ); | |
1552 if ($vf_adaptor) { | |
1553 if ( $filter eq 'genotyped' ) { | |
1554 return $vf_adaptor->fetch_all_genotyped_by_Slice($self); | |
1555 } else { | |
1556 return $vf_adaptor->fetch_all_by_Slice($self); | |
1557 } | |
1558 } else { | |
1559 warning( "Variation database must be attached to core database to " | |
1560 . "retrieve variation information" ); | |
1561 return []; | |
1562 } | |
1563 } ## end sub get_all_VariationFeatures | |
1564 | |
1565 =head2 get_all_IndividualSlice | |
1566 | |
1567 Args : none | |
1568 Example : my $individualSlice = $slice->get_by_Population($population); | |
1569 Description : Gets the specific Slice for all the individuls in the population | |
1570 ReturnType : listref of Bio::EnsEMB::IndividualSlice | |
1571 Exceptions : none | |
1572 Caller : general | |
1573 | |
1574 =cut | |
1575 | |
1576 sub get_all_IndividualSlice { | |
1577 my $self = shift; | |
1578 | |
1579 my $individualSliceFactory = | |
1580 Bio::EnsEMBL::IndividualSliceFactory->new( | |
1581 -START => $self->{'start'}, | |
1582 -END => $self->{'end'}, | |
1583 -STRAND => $self->{'strand'}, | |
1584 -ADAPTOR => $self->adaptor(), | |
1585 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1586 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1587 -COORD_SYSTEM => $self->{'coord_system'}, ); | |
1588 return $individualSliceFactory->get_all_IndividualSlice(); | |
1589 } | |
1590 | |
1591 =head2 get_by_Individual | |
1592 | |
1593 Arg[1] : Bio::EnsEMBL::Variation::Individual $individual | |
1594 Example : my $individualSlice = $slice->get_by_Individual($individual); | |
1595 Description : Gets the specific Slice for the individual | |
1596 ReturnType : Bio::EnsEMB::IndividualSlice | |
1597 Exceptions : none | |
1598 Caller : general | |
1599 | |
1600 =cut | |
1601 | |
1602 sub get_by_Individual { | |
1603 my $self = shift; | |
1604 my $individual = shift; | |
1605 | |
1606 return Bio::EnsEMBL::IndividualSlice->new( | |
1607 -START => $self->{'start'}, | |
1608 -END => $self->{'end'}, | |
1609 -STRAND => $self->{'strand'}, | |
1610 -ADAPTOR => $self->adaptor(), | |
1611 # -SEQ => $self->{'seq'}, | |
1612 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1613 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1614 -COORD_SYSTEM => $self->{'coord_system'}, | |
1615 -INDIVIDUAL => $individual ); | |
1616 | |
1617 } | |
1618 | |
1619 =head2 get_by_strain | |
1620 | |
1621 Arg[1] : string $strain | |
1622 Example : my $strainSlice = $slice->get_by_strain($strain); | |
1623 Description : Gets the specific Slice for the strain | |
1624 ReturnType : Bio::EnsEMB::StrainSlice | |
1625 Exceptions : none | |
1626 Caller : general | |
1627 | |
1628 =cut | |
1629 | |
1630 sub get_by_strain { | |
1631 my $self = shift; | |
1632 my $strain_name = shift; | |
1633 | |
1634 return | |
1635 Bio::EnsEMBL::StrainSlice->new( | |
1636 -START => $self->{'start'}, | |
1637 -END => $self->{'end'}, | |
1638 -STRAND => $self->{'strand'}, | |
1639 -ADAPTOR => $self->adaptor(), | |
1640 -SEQ => $self->{'seq'}, | |
1641 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
1642 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
1643 -COORD_SYSTEM => $self->{'coord_system'}, | |
1644 -STRAIN_NAME => $strain_name ); | |
1645 | |
1646 } | |
1647 | |
1648 sub calculate_theta { | |
1649 my $self = shift; | |
1650 my $strains = shift; | |
1651 my $feature = shift | |
1652 ; #optional parameter. Name of the feature in the Slice you want to calculate | |
1653 | |
1654 if ( !$self->adaptor() ) { | |
1655 warning('Cannot get variation features without attached adaptor'); | |
1656 return 0; | |
1657 } | |
1658 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
1659 | |
1660 unless ($variation_db) { | |
1661 warning( "Variation database must be attached to core database to " | |
1662 . "retrieve variation information" ); | |
1663 return 0; | |
1664 } | |
1665 | |
1666 #need to get coverage regions for the slice in the different strains | |
1667 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor; | |
1668 my $strain; | |
1669 my $differences = []; | |
1670 my $slices = []; | |
1671 if ($coverage_adaptor) { | |
1672 my $num_strains = scalar( @{$strains} ) + 1; | |
1673 if ( !defined $feature ) { | |
1674 #we want to calculate for the whole slice | |
1675 push @{$slices}, | |
1676 $self; #add the slice as the slice to calculate the theta value | |
1677 } else { | |
1678 #we have features, get the slices for the different features | |
1679 my $features = $self->get_all_Exons(); | |
1680 map { push @{$slices}, $_->feature_Slice } | |
1681 @{$features}; #add the slices of the features | |
1682 } | |
1683 my $length_regions = 0; | |
1684 my $snps = 0; | |
1685 my $theta = 0; | |
1686 my $last_position = 0; | |
1687 #get all the differences in the slice coordinates | |
1688 foreach my $strain_name ( @{$strains} ) { | |
1689 my $strain = $self->get_by_strain($strain_name) | |
1690 ; #get the strainSlice for the strain | |
1691 | |
1692 my $results = $strain->get_all_differences_Slice; | |
1693 push @{$differences}, @{$results} if ( defined $results ); | |
1694 } | |
1695 #when we finish, we have, in max_level, the regions covered by all the sample | |
1696 #sort the differences by the genomic position | |
1697 my @differences_sorted = | |
1698 sort { $a->start <=> $b->start } @{$differences}; | |
1699 foreach my $slice ( @{$slices} ) { | |
1700 my $regions_covered = | |
1701 $coverage_adaptor->fetch_all_regions_covered( $slice, | |
1702 $strains ); | |
1703 if ( defined $regions_covered ) { | |
1704 foreach my $range ( @{$regions_covered} ) { | |
1705 $length_regions += | |
1706 ( $range->[1] - $range->[0] ) + | |
1707 1; #add the length of the genomic region | |
1708 for ( my $i = $last_position; $i < @differences_sorted; $i++ ) | |
1709 { | |
1710 if ( $differences_sorted[$i]->start >= $range->[0] | |
1711 && $differences_sorted[$i]->end <= $range->[1] ) | |
1712 { | |
1713 $snps++; #count differences in the region | |
1714 } elsif ( $differences_sorted[$i]->end > $range->[1] ) { | |
1715 $last_position = $i; | |
1716 last; | |
1717 } | |
1718 } | |
1719 } | |
1720 #when all the ranges have been iterated, calculate rho | |
1721 #this is an intermediate variable called a in the formula | |
1722 # a = sum i=2..strains 1/i-1 | |
1723 } | |
1724 } ## end foreach my $slice ( @{$slices...}) | |
1725 my $a = _calculate_a($num_strains); | |
1726 $theta = $snps/( $a*$length_regions ); | |
1727 return $theta; | |
1728 } else { | |
1729 return 0; | |
1730 } | |
1731 } ## end sub calculate_theta | |
1732 | |
1733 sub _calculate_a { | |
1734 my $max_level = shift; | |
1735 | |
1736 my $a = 0; | |
1737 for ( my $i = 2; $i <= $max_level + 1; $i++ ) { | |
1738 $a += 1/( $i - 1 ); | |
1739 } | |
1740 return $a; | |
1741 } | |
1742 | |
1743 sub calculate_pi { | |
1744 my $self = shift; | |
1745 my $strains = shift; | |
1746 my $feature = shift; | |
1747 | |
1748 if ( !$self->adaptor() ) { | |
1749 warning('Cannot get variation features without attached adaptor'); | |
1750 return 0; | |
1751 } | |
1752 my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); | |
1753 | |
1754 unless ($variation_db) { | |
1755 warning( "Variation database must be attached to core database to " | |
1756 . "retrieve variation information" ); | |
1757 return 0; | |
1758 } | |
1759 | |
1760 #need to get coverage regions for the slice in the different strains | |
1761 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor; | |
1762 my $differences = []; | |
1763 my $slices = []; | |
1764 if ($coverage_adaptor) { | |
1765 my $num_strains = scalar( @{$strains} ) + 1; | |
1766 if ( !defined $feature ) { | |
1767 #we want to calculate for the whole slice | |
1768 push @{$slices}, | |
1769 $self; #add the slice as the slice to calculate the theta value | |
1770 } else { | |
1771 #we have features, get the slices for the different features | |
1772 my $features = $self->get_all_Exons(); | |
1773 map { push @{$slices}, $_->feature_Slice } | |
1774 @{$features}; #add the slices of the features | |
1775 } | |
1776 my @range_differences = (); | |
1777 my $pi = 0; | |
1778 my $regions = 0; | |
1779 my $last_position = | |
1780 0; #last position visited in the sorted list of differences | |
1781 my $triallelic = 0; | |
1782 my $is_triallelic = 0; | |
1783 foreach my $slice ( @{$slices} ) { | |
1784 | |
1785 foreach my $strain_name ( @{$strains} ) { | |
1786 my $strain = $slice->get_by_strain($strain_name) | |
1787 ; #get the strainSlice for the strain | |
1788 my $results = $strain->get_all_differences_Slice; | |
1789 push @{$differences}, @{$results} if ( defined $results ); | |
1790 } | |
1791 my @differences_sorted = | |
1792 sort { $a->start <=> $b->start } @{$differences}; | |
1793 | |
1794 my $regions_covered = | |
1795 $coverage_adaptor->fetch_all_regions_covered( $slice, | |
1796 $strains ); | |
1797 #when we finish, we have, in max_level, the regions covered by all the sample | |
1798 #sort the differences | |
1799 if ( defined $regions_covered ) { | |
1800 foreach my $range ( @{$regions_covered} ) { | |
1801 for ( my $i = $last_position; $i < @differences_sorted; $i++ ) | |
1802 { | |
1803 if ( $differences_sorted[$i]->start >= $range->[0] | |
1804 && $differences_sorted[$i]->end <= $range->[1] ) | |
1805 { | |
1806 #check wether it is the same region or different | |
1807 if ( !defined $range_differences[0] | |
1808 || ( $differences_sorted[$i]->start == | |
1809 $range_differences[0]->start ) ) | |
1810 { | |
1811 if ( defined $range_differences[0] | |
1812 && ( $differences_sorted[$i]->allele_string ne | |
1813 $range_differences[0]->allele_string ) ) | |
1814 { | |
1815 $is_triallelic = 1; | |
1816 } | |
1817 push @range_differences, $differences_sorted[$i]; | |
1818 } else { | |
1819 #new site, calc pi for the previous one | |
1820 $pi += | |
1821 2* | |
1822 ( @range_differences/($num_strains) )* | |
1823 ( 1 - ( @range_differences/$num_strains ) ); | |
1824 if ($is_triallelic) { | |
1825 $triallelic++; | |
1826 $is_triallelic = 0; | |
1827 } | |
1828 $regions++; | |
1829 @range_differences = (); | |
1830 #and start a new range | |
1831 push @range_differences, $differences_sorted[$i]; | |
1832 } | |
1833 } elsif ( $differences_sorted[$i]->end > $range->[1] ) { | |
1834 $last_position = $i; | |
1835 last; | |
1836 } | |
1837 } ## end for ( my $i = $last_position...) | |
1838 #calculate pi for last site, if any | |
1839 if ( defined $range_differences[0] ) { | |
1840 $pi += | |
1841 2* | |
1842 ( @range_differences/$num_strains )* | |
1843 ( 1 - ( @range_differences/$num_strains ) ); | |
1844 $regions++; | |
1845 } | |
1846 } ## end foreach my $range ( @{$regions_covered...}) | |
1847 } ## end if ( defined $regions_covered) | |
1848 $pi = $pi/$regions; #calculate average pi | |
1849 print | |
1850 "Regions with variations in region $regions and triallelic $triallelic\n\n"; | |
1851 } ## end foreach my $slice ( @{$slices...}) | |
1852 return $pi; | |
1853 } else { | |
1854 return 0; | |
1855 } | |
1856 | |
1857 } ## end sub calculate_pi | |
1858 | |
1859 =head2 get_all_genotyped_VariationFeatures | |
1860 | |
1861 Args : none | |
1862 Description: returns all variation features on this slice that have been genotyped. | |
1863 This function will only work correctly if the variation database has | |
1864 been attached to the core database. | |
1865 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature | |
1866 Exceptions : none | |
1867 Caller : contigview, snpview, ldview | |
1868 Status : At Risk | |
1869 : Variation database is under development. | |
1870 | |
1871 =cut | |
1872 | |
1873 sub get_all_genotyped_VariationFeatures { | |
1874 my $self = shift; | |
1875 my $vfa; | |
1876 if ( !$self->adaptor() ) { | |
1877 warning('Cannot get variation features without attached adaptor'); | |
1878 return []; | |
1879 } | |
1880 | |
1881 my $vf_adaptor = | |
1882 Bio::EnsEMBL::DBSQL::MergedAdaptor->new( | |
1883 -species => $self->adaptor()->db()->species, | |
1884 -type => "VariationFeature" ); | |
1885 | |
1886 if ($vf_adaptor) { | |
1887 return $vf_adaptor->fetch_all_genotyped_by_Slice($self); | |
1888 } else { | |
1889 warning( "Variation database must be attached to core database to " | |
1890 . "retrieve variation information" ); | |
1891 return []; | |
1892 } | |
1893 } | |
1894 | |
1895 =head2 get_all_SNPs | |
1896 | |
1897 Description: DEPRECATED. Use get_all_VariationFeatures instead | |
1898 | |
1899 =cut | |
1900 | |
1901 sub get_all_SNPs { | |
1902 my $self = shift; | |
1903 | |
1904 deprecate('Use get_all_VariationFeatures() instead.'); | |
1905 | |
1906 my $snps; | |
1907 my $vf = $self->get_all_genotyped_VariationFeatures(); | |
1908 if ( $vf->[0] ) { | |
1909 #necessary to convert the VariationFeatures into SNP objects | |
1910 foreach my $variation_feature ( @{$vf} ) { | |
1911 push @{$snps}, $variation_feature->convert_to_SNP(); | |
1912 } | |
1913 return $snps; | |
1914 } else { | |
1915 return []; | |
1916 } | |
1917 } | |
1918 | |
1919 =head2 get_all_genotyped_SNPs | |
1920 | |
1921 Description : DEPRECATED. Use get_all_genotyped_VariationFeatures instead | |
1922 | |
1923 =cut | |
1924 | |
1925 sub get_all_genotyped_SNPs { | |
1926 my $self = shift; | |
1927 | |
1928 deprecate("Use get_all_genotyped_VariationFeatures instead"); | |
1929 my $vf = $self->get_all_genotyped_VariationFeatures; | |
1930 my $snps; | |
1931 if ( $vf->[0] ) { | |
1932 foreach my $variation_feature ( @{$vf} ) { | |
1933 push @{$snps}, $variation_feature->convert_to_SNP(); | |
1934 } | |
1935 return $snps; | |
1936 } else { | |
1937 return []; | |
1938 } | |
1939 } | |
1940 | |
1941 sub get_all_SNPs_transcripts { | |
1942 my $self = shift; | |
1943 | |
1944 deprecate("DEPRECATED"); | |
1945 | |
1946 return []; | |
1947 | |
1948 } | |
1949 | |
1950 =head2 get_all_Genes | |
1951 | |
1952 Arg [1] : (optional) string $logic_name | |
1953 The name of the analysis used to generate the genes to retrieve | |
1954 Arg [2] : (optional) string $dbtype | |
1955 The dbtype of genes to obtain. This assumes that the db has | |
1956 been added to the DBAdaptor under this name (using the | |
1957 DBConnection::add_db_adaptor method). | |
1958 Arg [3] : (optional) boolean $load_transcripts | |
1959 If set to true, transcripts will be loaded immediately rather | |
1960 than being lazy-loaded on request. This will result in a | |
1961 significant speed up if the Transcripts and Exons are going to | |
1962 be used (but a slow down if they are not). | |
1963 Arg [4] : (optional) string $source | |
1964 The source of the genes to retrieve. | |
1965 Arg [5] : (optional) string $biotype | |
1966 The biotype of the genes to retrieve. | |
1967 Example : @genes = @{$slice->get_all_Genes}; | |
1968 Description: Retrieves all genes that overlap this slice. | |
1969 Returntype : listref of Bio::EnsEMBL::Genes | |
1970 Exceptions : none | |
1971 Caller : none | |
1972 Status : Stable | |
1973 | |
1974 =cut | |
1975 | |
1976 sub get_all_Genes { | |
1977 my ( $self, $logic_name, $dbtype, $load_transcripts, $source, | |
1978 $biotype ) | |
1979 = @_; | |
1980 | |
1981 if ( !$self->adaptor() ) { | |
1982 warning('Cannot get Genes without attached adaptor'); | |
1983 return []; | |
1984 } | |
1985 | |
1986 my $ga; | |
1987 if ($dbtype) { | |
1988 my $db = $reg->get_db( $self->adaptor()->db(), $dbtype ); | |
1989 if ( defined($db) ) { | |
1990 $ga = $reg->get_adaptor( $db->species(), $db->group(), "Gene" ); | |
1991 } else { | |
1992 $ga = $reg->get_adaptor( $self->adaptor()->db()->species(), | |
1993 $dbtype, "Gene" ); | |
1994 } | |
1995 if ( !defined $ga ) { | |
1996 warning("$dbtype genes not available"); | |
1997 return []; | |
1998 } | |
1999 } else { | |
2000 $ga = $self->adaptor->db->get_GeneAdaptor(); | |
2001 } | |
2002 | |
2003 ## circular BOF | |
2004 my $sl1 = | |
2005 Bio::EnsEMBL::CircularSlice->new( | |
2006 -COORD_SYSTEM => $self->{'coord_system'}, | |
2007 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2008 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2009 -START => $self->{'start'}, | |
2010 -END => $self->{'seq_region_length'}, | |
2011 -STRAND => $self->{'strand'}, | |
2012 -ADAPTOR => $self->adaptor() ); | |
2013 | |
2014 my $sl2 = | |
2015 Bio::EnsEMBL::CircularSlice->new( | |
2016 -COORD_SYSTEM => $self->{'coord_system'}, | |
2017 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2018 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2019 -START => 1, | |
2020 -END => $self->{'end'}, | |
2021 -STRAND => $self->{'strand'}, | |
2022 -ADAPTOR => $self->adaptor() ); | |
2023 | |
2024 my ( @arr, @arr1, @arr2 ); | |
2025 @arr1 = @{ | |
2026 $ga->fetch_all_by_Slice( $sl1, $logic_name, $load_transcripts, | |
2027 $source, $biotype ) }; | |
2028 @arr2 = @{ | |
2029 $ga->fetch_all_by_Slice( $sl2, $logic_name, $load_transcripts, | |
2030 $source, $biotype ) }; | |
2031 push @arr, @arr1, @arr2; | |
2032 return \@arr; | |
2033 | |
2034 ## circular EOF | |
2035 | |
2036 } ## end sub get_all_Genes | |
2037 | |
2038 =head2 get_all_Genes_by_type | |
2039 | |
2040 Arg [1] : string $type | |
2041 The biotype of genes wanted. | |
2042 Arg [2] : (optional) string $logic_name | |
2043 Arg [3] : (optional) boolean $load_transcripts | |
2044 If set to true, transcripts will be loaded immediately rather | |
2045 than being lazy-loaded on request. This will result in a | |
2046 significant speed up if the Transcripts and Exons are going to | |
2047 be used (but a slow down if they are not). | |
2048 Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding', | |
2049 'ensembl')}; | |
2050 Description: Retrieves genes that overlap this slice of biotype $type. | |
2051 This is primarily used by the genebuilding code when several | |
2052 biotypes of genes are used. | |
2053 | |
2054 The logic name is the analysis of the genes that are retrieved. | |
2055 If not provided all genes will be retrieved instead. | |
2056 | |
2057 Returntype : listref of Bio::EnsEMBL::Genes | |
2058 Exceptions : none | |
2059 Caller : genebuilder, general | |
2060 Status : Stable | |
2061 | |
2062 =cut | |
2063 | |
2064 sub get_all_Genes_by_type { | |
2065 my ( $self, $type, $logic_name, $load_transcripts ) = @_; | |
2066 | |
2067 if ( !$self->adaptor() ) { | |
2068 warning('Cannot get Genes without attached adaptor'); | |
2069 return []; | |
2070 } | |
2071 | |
2072 return | |
2073 $self->get_all_Genes( $logic_name, undef, $load_transcripts, undef, | |
2074 $type ); | |
2075 } | |
2076 | |
2077 =head2 get_all_Genes_by_source | |
2078 | |
2079 Arg [1] : string source | |
2080 Arg [2] : (optional) boolean $load_transcripts | |
2081 If set to true, transcripts will be loaded immediately rather | |
2082 than being lazy-loaded on request. This will result in a | |
2083 significant speed up if the Transcripts and Exons are going to | |
2084 be used (but a slow down if they are not). | |
2085 Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')}; | |
2086 Description: Retrieves genes that overlap this slice of source $source. | |
2087 | |
2088 Returntype : listref of Bio::EnsEMBL::Genes | |
2089 Exceptions : none | |
2090 Caller : general | |
2091 Status : Stable | |
2092 | |
2093 =cut | |
2094 | |
2095 sub get_all_Genes_by_source { | |
2096 my ( $self, $source, $load_transcripts ) = @_; | |
2097 | |
2098 if ( !$self->adaptor() ) { | |
2099 warning('Cannot get Genes without attached adaptor'); | |
2100 return []; | |
2101 } | |
2102 | |
2103 return $self->get_all_Genes( undef, undef, $load_transcripts, | |
2104 $source ); | |
2105 } | |
2106 | |
2107 =head2 get_all_Transcripts | |
2108 | |
2109 Arg [1] : (optional) boolean $load_exons | |
2110 If set to true exons will not be lazy-loaded but will instead | |
2111 be loaded right away. This is faster if the exons are | |
2112 actually going to be used right away. | |
2113 Arg [2] : (optional) string $logic_name | |
2114 the logic name of the type of features to obtain | |
2115 Arg [3] : (optional) string $db_type | |
2116 Example : @transcripts = @{$slice->get_all_Transcripts)_}; | |
2117 Description: Gets all transcripts which overlap this slice. If you want to | |
2118 specify a particular analysis or type, then you are better off | |
2119 using get_all_Genes or get_all_Genes_by_type and iterating | |
2120 through the transcripts of each gene. | |
2121 Returntype : reference to a list of Bio::EnsEMBL::Transcripts | |
2122 Exceptions : none | |
2123 Caller : general | |
2124 Status : Stable | |
2125 | |
2126 =cut | |
2127 | |
2128 sub get_all_Transcripts { | |
2129 my $self = shift; | |
2130 my $load_exons = shift; | |
2131 my $logic_name = shift; | |
2132 my $dbtype = shift; | |
2133 if ( !$self->adaptor() ) { | |
2134 warning('Cannot get Transcripts without attached adaptor'); | |
2135 return []; | |
2136 } | |
2137 | |
2138 my $ta; | |
2139 if ($dbtype) { | |
2140 my $db = $reg->get_db( $self->adaptor()->db(), $dbtype ); | |
2141 if ( defined($db) ) { | |
2142 $ta = | |
2143 $reg->get_adaptor( $db->species(), $db->group(), "Transcript" ); | |
2144 } else { | |
2145 $ta = $reg->get_adaptor( $self->adaptor()->db()->species(), | |
2146 $dbtype, "Transcript" ); | |
2147 } | |
2148 if ( !defined $ta ) { | |
2149 warning("$dbtype genes not available"); | |
2150 return []; | |
2151 } | |
2152 } else { | |
2153 $ta = $self->adaptor->db->get_TranscriptAdaptor(); | |
2154 } | |
2155 | |
2156 ## circular BOF | |
2157 my $sl1 = | |
2158 Bio::EnsEMBL::CircularSlice->new( | |
2159 -COORD_SYSTEM => $self->{'coord_system'}, | |
2160 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2161 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2162 -START => $self->{'start'}, | |
2163 -END => $self->{'seq_region_length'}, | |
2164 -STRAND => $self->{'strand'}, | |
2165 -ADAPTOR => $self->adaptor() ); | |
2166 | |
2167 my $sl2 = | |
2168 Bio::EnsEMBL::CircularSlice->new( | |
2169 -COORD_SYSTEM => $self->{'coord_system'}, | |
2170 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2171 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2172 -START => 1, | |
2173 -END => $self->{'end'}, | |
2174 -STRAND => $self->{'strand'}, | |
2175 -ADAPTOR => $self->adaptor() ); | |
2176 | |
2177 my ( @arr, @arr1, @arr2 ); | |
2178 @arr1 = | |
2179 @{ $ta->fetch_all_by_Slice( $sl1, $load_exons, $logic_name ) }; | |
2180 @arr2 = | |
2181 @{ $ta->fetch_all_by_Slice( $sl2, $load_exons, $logic_name ) }; | |
2182 push @arr, @arr1, @arr2; | |
2183 return \@arr; | |
2184 ## circular EOF | |
2185 #return $ta->fetch_all_by_Slice($self, $load_exons, $logic_name); | |
2186 } ## end sub get_all_Transcripts | |
2187 | |
2188 =head2 get_all_Exons | |
2189 | |
2190 Arg [1] : none | |
2191 Example : @exons = @{$slice->get_all_Exons}; | |
2192 Description: Gets all exons which overlap this slice. Note that these exons | |
2193 will not be associated with any transcripts, so this may not | |
2194 be terribly useful. | |
2195 Returntype : reference to a list of Bio::EnsEMBL::Exons | |
2196 Exceptions : none | |
2197 Caller : general | |
2198 Status : Stable | |
2199 | |
2200 =cut | |
2201 | |
2202 sub get_all_Exons { | |
2203 my $self = shift; | |
2204 | |
2205 if ( !$self->adaptor() ) { | |
2206 warning('Cannot get Exons without attached adaptor'); | |
2207 return []; | |
2208 } | |
2209 | |
2210 ## circular BOF | |
2211 my $sl1 = | |
2212 Bio::EnsEMBL::CircularSlice->new( | |
2213 -COORD_SYSTEM => $self->{'coord_system'}, | |
2214 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2215 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2216 -START => $self->{'start'}, | |
2217 -END => $self->{'seq_region_length'}, | |
2218 -STRAND => $self->{'strand'}, | |
2219 -ADAPTOR => $self->adaptor() ); | |
2220 | |
2221 my $sl2 = | |
2222 Bio::EnsEMBL::CircularSlice->new( | |
2223 -COORD_SYSTEM => $self->{'coord_system'}, | |
2224 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2225 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2226 -START => 1, | |
2227 -END => $self->{'end'}, | |
2228 -STRAND => $self->{'strand'}, | |
2229 -ADAPTOR => $self->adaptor() ); | |
2230 | |
2231 my ( @arr, @arr1, @arr2 ); | |
2232 @arr1 = | |
2233 @{ $sl1->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($sl1) }; | |
2234 @arr2 = | |
2235 @{ $sl2->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($sl2) }; | |
2236 push @arr, @arr1, @arr2; | |
2237 return \@arr; | |
2238 ## circular EOF | |
2239 | |
2240 #rturn $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self); | |
2241 } ## end sub get_all_Exons | |
2242 | |
2243 =head2 get_all_QtlFeatures | |
2244 | |
2245 Args : none | |
2246 Description: returns overlapping QtlFeatures | |
2247 Returntype : listref Bio::EnsEMBL::Map::QtlFeature | |
2248 Exceptions : none | |
2249 Caller : general | |
2250 Status : Stable | |
2251 | |
2252 =cut | |
2253 | |
2254 sub get_all_QtlFeatures { | |
2255 my $self = shift; | |
2256 | |
2257 if ( !$self->adaptor() ) { | |
2258 warning('Cannot get QtlFeatures without attached adaptor'); | |
2259 return []; | |
2260 } | |
2261 | |
2262 my $qfAdaptor; | |
2263 if ( $self->adaptor() ) { | |
2264 $qfAdaptor = $self->adaptor()->db()->get_QtlFeatureAdaptor(); | |
2265 } else { | |
2266 return []; | |
2267 } | |
2268 | |
2269 ## circular BOF | |
2270 my $sl1 = | |
2271 Bio::EnsEMBL::CircularSlice->new( | |
2272 -COORD_SYSTEM => $self->{'coord_system'}, | |
2273 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2274 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2275 -START => $self->{'start'}, | |
2276 -END => $self->{'seq_region_length'}, | |
2277 -STRAND => $self->{'strand'}, | |
2278 -ADAPTOR => $self->adaptor() ); | |
2279 | |
2280 my $sl2 = | |
2281 Bio::EnsEMBL::CircularSlice->new( | |
2282 -COORD_SYSTEM => $self->{'coord_system'}, | |
2283 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2284 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2285 -START => 1, | |
2286 -END => $self->{'end'}, | |
2287 -STRAND => $self->{'strand'}, | |
2288 -ADAPTOR => $self->adaptor() ); | |
2289 | |
2290 my ( @arr, @arr1, @arr2 ); | |
2291 @arr1 = @{ $qfAdaptor->fetch_all_by_Slice_constraint($sl1) }; | |
2292 @arr2 = @{ $qfAdaptor->fetch_all_by_Slice_constraint($sl2) }; | |
2293 push @arr, @arr1, @arr2; | |
2294 return \@arr; | |
2295 ## circular EOF | |
2296 | |
2297 #return $qfAdaptor->fetch_all_by_Slice_constraint( $self ); | |
2298 } ## end sub get_all_QtlFeatures | |
2299 | |
2300 =head2 get_all_KaryotypeBands | |
2301 | |
2302 Arg [1] : none | |
2303 Example : @kary_bands = @{$slice->get_all_KaryotypeBands}; | |
2304 Description: Retrieves the karyotype bands which this slice overlaps. | |
2305 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands | |
2306 Exceptions : none | |
2307 Caller : general, contigview | |
2308 Status : Stable | |
2309 | |
2310 =cut | |
2311 | |
2312 sub get_all_KaryotypeBands { | |
2313 my ($self) = @_; | |
2314 | |
2315 if ( !$self->adaptor() ) { | |
2316 warning('Cannot get KaryotypeBands without attached adaptor'); | |
2317 return []; | |
2318 } | |
2319 | |
2320 my $kadp = $self->adaptor->db->get_KaryotypeBandAdaptor(); | |
2321 | |
2322 ## circular BOF | |
2323 my $sl1 = | |
2324 Bio::EnsEMBL::CircularSlice->new( | |
2325 -COORD_SYSTEM => $self->{'coord_system'}, | |
2326 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2327 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2328 -START => $self->{'start'}, | |
2329 -END => $self->{'seq_region_length'}, | |
2330 -STRAND => $self->{'strand'}, | |
2331 -ADAPTOR => $self->adaptor() ); | |
2332 | |
2333 my $sl2 = | |
2334 Bio::EnsEMBL::CircularSlice->new( | |
2335 -COORD_SYSTEM => $self->{'coord_system'}, | |
2336 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2337 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2338 -START => 1, | |
2339 -END => $self->{'end'}, | |
2340 -STRAND => $self->{'strand'}, | |
2341 -ADAPTOR => $self->adaptor() ); | |
2342 | |
2343 my ( @arr, @arr1, @arr2 ); | |
2344 @arr1 = @{ $kadp->fetch_all_by_Slice($sl1) }; | |
2345 @arr2 = @{ $kadp->fetch_all_by_Slice($sl2) }; | |
2346 push @arr, @arr1, @arr2; | |
2347 return \@arr; | |
2348 ## circular EOF | |
2349 | |
2350 #return $kadp->fetch_all_by_Slice($self); | |
2351 } ## end sub get_all_KaryotypeBands | |
2352 | |
2353 =head2 get_repeatmasked_seq | |
2354 | |
2355 Arg [1] : listref of strings $logic_names (optional) | |
2356 Arg [2] : int $soft_masking_enable (optional) | |
2357 Arg [3] : hash reference $not_default_masking_cases (optional, default is {}) | |
2358 The values are 0 or 1 for hard and soft masking respectively | |
2359 The keys of the hash should be of 2 forms | |
2360 "repeat_class_" . $repeat_consensus->repeat_class, | |
2361 e.g. "repeat_class_SINE/MIR" | |
2362 "repeat_name_" . $repeat_consensus->name | |
2363 e.g. "repeat_name_MIR" | |
2364 depending on which base you want to apply the not default | |
2365 masking either the repeat_class or repeat_name. Both can be | |
2366 specified in the same hash at the same time, but in that case, | |
2367 repeat_name setting has priority over repeat_class. For example, | |
2368 you may have hard masking as default, and you may want soft | |
2369 masking of all repeat_class SINE/MIR, but repeat_name AluSp | |
2370 (which are also from repeat_class SINE/MIR). | |
2371 Your hash will be something like {"repeat_class_SINE/MIR" => 1, | |
2372 "repeat_name_AluSp" => 0} | |
2373 Example : $rm_slice = $slice->get_repeatmasked_seq(); | |
2374 $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1); | |
2375 Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat | |
2376 masked sequence instead of the regular sequence. | |
2377 Sequence returned by this new slice will have repeat regions | |
2378 hardmasked by default (sequence replaced by N) or | |
2379 or soft-masked when arg[2] = 1 (sequence in lowercase) | |
2380 Will only work with database connection to get repeat features. | |
2381 Returntype : Bio::EnsEMBL::RepeatMaskedSlice | |
2382 Exceptions : none | |
2383 Caller : general | |
2384 Status : Stable | |
2385 | |
2386 =cut | |
2387 | |
2388 sub get_repeatmasked_seq { | |
2389 my ( $self, $logic_names, $soft_mask, $not_default_masking_cases ) = | |
2390 @_; | |
2391 | |
2392 return | |
2393 Bio::EnsEMBL::RepeatMaskedSlice->new( | |
2394 -START => $self->{'start'}, | |
2395 -END => $self->{'end'}, | |
2396 -STRAND => $self->{'strand'}, | |
2397 -ADAPTOR => $self->adaptor(), | |
2398 -SEQ => $self->{'seq'}, | |
2399 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2400 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2401 -COORD_SYSTEM => $self->{'coord_system'}, | |
2402 -REPEAT_MASK => $logic_names, | |
2403 -SOFT_MASK => $soft_mask, | |
2404 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases | |
2405 ); | |
2406 } | |
2407 | |
2408 =head2 _mask_features | |
2409 | |
2410 Arg [1] : reference to a string $dnaref | |
2411 Arg [2] : array_ref $repeats | |
2412 reference to a list Bio::EnsEMBL::RepeatFeature | |
2413 give the list of coordinates to replace with N or with | |
2414 lower case | |
2415 Arg [3] : int $soft_masking_enable (optional) | |
2416 Arg [4] : hash reference $not_default_masking_cases (optional, default is {}) | |
2417 The values are 0 or 1 for hard and soft masking respectively | |
2418 The keys of the hash should be of 2 forms | |
2419 "repeat_class_" . $repeat_consensus->repeat_class, | |
2420 e.g. "repeat_class_SINE/MIR" | |
2421 "repeat_name_" . $repeat_consensus->name | |
2422 e.g. "repeat_name_MIR" | |
2423 depending on which base you want to apply the not default masking either | |
2424 the repeat_class or repeat_name. Both can be specified in the same hash | |
2425 at the same time, but in that case, repeat_name setting has priority over | |
2426 repeat_class. For example, you may have hard masking as default, and | |
2427 you may want soft masking of all repeat_class SINE/MIR, | |
2428 but repeat_name AluSp (which are also from repeat_class SINE/MIR). | |
2429 Your hash will be something like {"repeat_class_SINE/MIR" => 1, | |
2430 "repeat_name_AluSp" => 0} | |
2431 Description: replaces string positions described in the RepeatFeatures | |
2432 with Ns (default setting), or with the lower case equivalent | |
2433 (soft masking). The reference to a dna string which is passed | |
2434 is changed in place. | |
2435 Returntype : none | |
2436 Exceptions : none | |
2437 Caller : seq | |
2438 Status : Stable | |
2439 | |
2440 =cut | |
2441 | |
2442 sub _mask_features { | |
2443 my ( $self, $dnaref, $repeats, $soft_mask, | |
2444 $not_default_masking_cases ) = @_; | |
2445 | |
2446 $soft_mask = 0 unless ( defined $soft_mask ); | |
2447 $not_default_masking_cases = {} | |
2448 unless ( defined $not_default_masking_cases ); | |
2449 | |
2450 # explicit CORE::length call, to avoid any confusion with the Slice | |
2451 # length method | |
2452 my $dnalen = CORE::length($$dnaref); | |
2453 | |
2454 REP: foreach my $old_f ( @{$repeats} ) { | |
2455 my $f = $old_f->transfer($self); | |
2456 my $start = $f->start; | |
2457 my $end = $f->end; | |
2458 my $length = ( $end - $start ) + 1; | |
2459 | |
2460 # check if we get repeat completely outside of expected slice range | |
2461 if ( $end < 1 || $start > $dnalen ) { | |
2462 # warning("Unexpected: Repeat completely outside slice coordinates."); | |
2463 next REP; | |
2464 } | |
2465 | |
2466 # repeat partly outside slice range, so correct | |
2467 # the repeat start and length to the slice size if needed | |
2468 if ( $start < 1 ) { | |
2469 $start = 1; | |
2470 $length = ( $end - $start ) + 1; | |
2471 } | |
2472 | |
2473 # repeat partly outside slice range, so correct | |
2474 # the repeat end and length to the slice size if needed | |
2475 if ( $end > $dnalen ) { | |
2476 $end = $dnalen; | |
2477 $length = ( $end - $start ) + 1; | |
2478 } | |
2479 | |
2480 $start--; | |
2481 | |
2482 my $padstr; | |
2483 # if we decide to define masking on the base of the repeat_type, we'll need | |
2484 # to add the following, and the other commented line few lines below. | |
2485 # my $rc_type = "repeat_type_" . $f->repeat_consensus->repeat_type; | |
2486 my $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class; | |
2487 my $rc_name = "repeat_name_" . $f->repeat_consensus->name; | |
2488 | |
2489 my $masking_type; | |
2490 # $masking_type = $not_default_masking_cases->{$rc_type} if (defined $not_default_masking_cases->{$rc_type}); | |
2491 $masking_type = $not_default_masking_cases->{$rc_class} | |
2492 if ( defined $not_default_masking_cases->{$rc_class} ); | |
2493 $masking_type = $not_default_masking_cases->{$rc_name} | |
2494 if ( defined $not_default_masking_cases->{$rc_name} ); | |
2495 | |
2496 $masking_type = $soft_mask unless ( defined $masking_type ); | |
2497 | |
2498 if ($masking_type) { | |
2499 $padstr = lc substr( $$dnaref, $start, $length ); | |
2500 } else { | |
2501 $padstr = 'N' x $length; | |
2502 } | |
2503 substr( $$dnaref, $start, $length ) = $padstr; | |
2504 } ## end foreach my $old_f ( @{$repeats...}) | |
2505 } ## end sub _mask_features | |
2506 | |
2507 =head2 get_all_SearchFeatures | |
2508 | |
2509 Arg [1] : scalar $ticket_ids | |
2510 Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY'); | |
2511 Description: Retreives all search features for stored blast | |
2512 results for the ticket that overlap this slice | |
2513 Returntype : listref of Bio::EnsEMBL::SeqFeatures | |
2514 Exceptions : none | |
2515 Caller : general (webby!) | |
2516 Status : Stable | |
2517 | |
2518 =cut | |
2519 | |
2520 sub get_all_SearchFeatures { | |
2521 my $self = shift; | |
2522 my $ticket = shift; | |
2523 local $_; | |
2524 unless ($ticket) { | |
2525 throw("ticket argument is required"); | |
2526 } | |
2527 | |
2528 if ( !$self->adaptor() ) { | |
2529 warning("Cannot get SearchFeatures without an attached adaptor"); | |
2530 return []; | |
2531 } | |
2532 | |
2533 my $sfa = $self->adaptor()->db()->get_db_adaptor('blast'); | |
2534 | |
2535 my $offset = $self->start - 1; | |
2536 | |
2537 my $features = $sfa | |
2538 ? $sfa->get_all_SearchFeatures( $ticket, $self->seq_region_name, | |
2539 $self->start, $self->end ) | |
2540 : []; | |
2541 | |
2542 foreach (@$features) { | |
2543 $_->start( $_->start - $offset ); | |
2544 $_->end( $_->end - $offset ); | |
2545 } | |
2546 return $features; | |
2547 | |
2548 } ## end sub get_all_SearchFeatures | |
2549 | |
2550 =head2 get_all_AssemblyExceptionFeatures | |
2551 | |
2552 Arg [1] : string $set (optional) | |
2553 Example : $slice->get_all_AssemblyExceptionFeatures(); | |
2554 Description: Retreives all misc features which overlap this slice. If | |
2555 a set code is provided only features which are members of | |
2556 the requested set are returned. | |
2557 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures | |
2558 Exceptions : none | |
2559 Caller : general | |
2560 Status : Stable | |
2561 | |
2562 =cut | |
2563 | |
2564 sub get_all_AssemblyExceptionFeatures { | |
2565 my $self = shift; | |
2566 my $misc_set = shift; | |
2567 | |
2568 my $adaptor = $self->adaptor(); | |
2569 | |
2570 if ( !$adaptor ) { | |
2571 warning('Cannot retrieve features without attached adaptor.'); | |
2572 return []; | |
2573 } | |
2574 | |
2575 my $aefa = $adaptor->db->get_AssemblyExceptionFeatureAdaptor(); | |
2576 | |
2577 ## circular BOF | |
2578 my $sl1 = | |
2579 Bio::EnsEMBL::CircularSlice->new( | |
2580 -COORD_SYSTEM => $self->{'coord_system'}, | |
2581 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2582 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2583 -START => $self->{'start'}, | |
2584 -END => $self->{'seq_region_length'}, | |
2585 -STRAND => $self->{'strand'}, | |
2586 -ADAPTOR => $self->adaptor() ); | |
2587 | |
2588 my $sl2 = | |
2589 Bio::EnsEMBL::CircularSlice->new( | |
2590 -COORD_SYSTEM => $self->{'coord_system'}, | |
2591 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2592 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2593 -START => 1, | |
2594 -END => $self->{'end'}, | |
2595 -STRAND => $self->{'strand'}, | |
2596 -ADAPTOR => $self->adaptor() ); | |
2597 | |
2598 my ( @arr, @arr1, @arr2 ); | |
2599 @arr1 = @{ $aefa->fetch_all_by_Slice($sl1) }; | |
2600 @arr2 = @{ $aefa->fetch_all_by_Slice($sl2) }; | |
2601 push @arr, @arr1, @arr2; | |
2602 return \@arr; | |
2603 ## circular EOF | |
2604 #return $aefa->fetch_all_by_Slice($self); | |
2605 } ## end sub get_all_AssemblyExceptionFeatures | |
2606 | |
2607 =head2 get_all_MiscFeatures | |
2608 | |
2609 Arg [1] : string $set (optional) | |
2610 Arg [2] : string $database (optional) | |
2611 Example : $slice->get_all_MiscFeatures('cloneset'); | |
2612 Description: Retreives all misc features which overlap this slice. If | |
2613 a set code is provided only features which are members of | |
2614 the requested set are returned. | |
2615 Returntype : listref of Bio::EnsEMBL::MiscFeatures | |
2616 Exceptions : none | |
2617 Caller : general | |
2618 Status : Stable | |
2619 | |
2620 =cut | |
2621 | |
2622 sub get_all_MiscFeatures { | |
2623 my $self = shift; | |
2624 my $misc_set = shift; | |
2625 my $dbtype = shift; | |
2626 my $msa; | |
2627 | |
2628 my $adaptor = $self->adaptor(); | |
2629 if ( !$adaptor ) { | |
2630 warning('Cannot retrieve features without attached adaptor.'); | |
2631 return []; | |
2632 } | |
2633 | |
2634 my $mfa; | |
2635 if ($dbtype) { | |
2636 my $db = $reg->get_db( $adaptor->db(), $dbtype ); | |
2637 if ( defined($db) ) { | |
2638 $mfa = $reg->get_adaptor( lc( $db->species() ), | |
2639 $db->group(), "miscfeature" ); | |
2640 } else { | |
2641 $mfa = $reg->get_adaptor( $adaptor->db()->species(), | |
2642 $dbtype, "miscfeature" ); | |
2643 } | |
2644 if ( !defined $mfa ) { | |
2645 warning("$dbtype misc features not available"); | |
2646 return []; | |
2647 } | |
2648 } else { | |
2649 $mfa = $adaptor->db->get_MiscFeatureAdaptor(); | |
2650 } | |
2651 | |
2652 ## circular BOF | |
2653 my $sl1 = | |
2654 Bio::EnsEMBL::CircularSlice->new( | |
2655 -COORD_SYSTEM => $self->{'coord_system'}, | |
2656 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2657 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2658 -START => $self->{'start'}, | |
2659 -END => $self->{'seq_region_length'}, | |
2660 -STRAND => $self->{'strand'}, | |
2661 -ADAPTOR => $self->adaptor() ); | |
2662 | |
2663 my $sl2 = | |
2664 Bio::EnsEMBL::CircularSlice->new( | |
2665 -COORD_SYSTEM => $self->{'coord_system'}, | |
2666 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2667 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2668 -START => 1, | |
2669 -END => $self->{'end'}, | |
2670 -STRAND => $self->{'strand'}, | |
2671 -ADAPTOR => $self->adaptor() ); | |
2672 | |
2673 my ( @arr, @arr1, @arr2 ); | |
2674 | |
2675 if ($misc_set) { | |
2676 @arr1 = | |
2677 @{ $mfa->fetch_all_by_Slice_and_set_code( $sl1, $misc_set ) }; | |
2678 @arr2 = | |
2679 @{ $mfa->fetch_all_by_Slice_and_set_code( $sl2, $misc_set ) }; | |
2680 push @arr, @arr1, @arr2; | |
2681 return \@arr; | |
2682 } | |
2683 @arr1 = @{ $mfa->fetch_all_by_Slice($sl1) }; | |
2684 @arr2 = @{ $mfa->fetch_all_by_Slice($sl2) }; | |
2685 push @arr, @arr1, @arr2; | |
2686 return \@arr; | |
2687 ## circular EOF | |
2688 | |
2689 # if($misc_set) { | |
2690 # return $mfa->fetch_all_by_Slice_and_set_code($self,$misc_set); | |
2691 # } | |
2692 | |
2693 # return $mfa->fetch_all_by_Slice($self); | |
2694 } ## end sub get_all_MiscFeatures | |
2695 | |
2696 =head2 get_all_AffyFeatures | |
2697 | |
2698 Args : (optional) list of strings - array names | |
2699 Example : $slice->get_all_AffyFeatures(); | |
2700 Description: Retrieves all AffyFeature objects which overlap this slice. | |
2701 Returntype : listref of Bio::EnsEMBL::AffyFeature objects | |
2702 Exceptions : none | |
2703 Caller : general | |
2704 Status : Stable | |
2705 | |
2706 =cut | |
2707 | |
2708 sub get_all_AffyFeatures { | |
2709 my $self = shift; | |
2710 my @arraynames = @_; | |
2711 | |
2712 my $sa = $self->adaptor(); | |
2713 if ( !$sa ) { | |
2714 warning("Cannot retrieve features without attached adaptor."); | |
2715 } | |
2716 my $fa = $sa->db()->get_AffyFeatureAdaptor(); | |
2717 my $features; | |
2718 | |
2719 ## circular BOF | |
2720 my $sl1 = | |
2721 Bio::EnsEMBL::CircularSlice->new( | |
2722 -COORD_SYSTEM => $self->{'coord_system'}, | |
2723 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2724 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2725 -START => $self->{'start'}, | |
2726 -END => $self->{'seq_region_length'}, | |
2727 -STRAND => $self->{'strand'}, | |
2728 -ADAPTOR => $self->adaptor() ); | |
2729 | |
2730 my $sl2 = | |
2731 Bio::EnsEMBL::CircularSlice->new( | |
2732 -COORD_SYSTEM => $self->{'coord_system'}, | |
2733 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2734 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2735 -START => 1, | |
2736 -END => $self->{'end'}, | |
2737 -STRAND => $self->{'strand'}, | |
2738 -ADAPTOR => $self->adaptor() ); | |
2739 | |
2740 my ( @arr, @arr1, @arr2 ); | |
2741 | |
2742 if (@arraynames) { | |
2743 @arr1 = @{ $fa->fetch_all_by_Slice_arrayname( $sl1, @arraynames ) }; | |
2744 @arr2 = @{ $fa->fetch_all_by_Slice_arrayname( $sl2, @arraynames ) }; | |
2745 } else { | |
2746 @arr1 = @{ $fa->fetch_all_by_Slice($sl1) }; | |
2747 @arr2 = @{ $fa->fetch_all_by_Slice($sl2) }; | |
2748 } | |
2749 push @arr, @arr1, @arr2; | |
2750 return \@arr; | |
2751 ## circular EOF | |
2752 | |
2753 # if ( @arraynames ) { | |
2754 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames ); | |
2755 # } else { | |
2756 # $features = $fa->fetch_all_by_Slice( $self ); | |
2757 # } | |
2758 # return $features; | |
2759 } ## end sub get_all_AffyFeatures | |
2760 | |
2761 =head2 get_all_OligoFeatures | |
2762 | |
2763 Args : (optional) list of strings - array names | |
2764 Example : $slice->get_all_OligoFeatures(); | |
2765 Description: Retrieves all OligoFeature objects which overlap this slice. | |
2766 Optionally just retrieve OligoFeature objects generated by | |
2767 probes from named arrays. | |
2768 Returntype : listref of Bio::EnsEMBL::OligoFeature objects | |
2769 Exceptions : none | |
2770 Caller : general | |
2771 Status : Stable | |
2772 | |
2773 =cut | |
2774 | |
2775 sub get_all_OligoFeatures { | |
2776 my $self = shift; | |
2777 my @arraynames = @_; | |
2778 | |
2779 my $sa = $self->adaptor(); | |
2780 if ( !$sa ) { | |
2781 warning("Cannot retrieve features without attached adaptor."); | |
2782 } | |
2783 my $fa = $sa->db()->get_OligoFeatureAdaptor(); | |
2784 my $features; | |
2785 | |
2786 ## circular BOF | |
2787 my $sl1 = | |
2788 Bio::EnsEMBL::CircularSlice->new( | |
2789 -COORD_SYSTEM => $self->{'coord_system'}, | |
2790 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2791 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2792 -START => $self->{'start'}, | |
2793 -END => $self->{'seq_region_length'}, | |
2794 -STRAND => $self->{'strand'}, | |
2795 -ADAPTOR => $self->adaptor() ); | |
2796 | |
2797 my $sl2 = | |
2798 Bio::EnsEMBL::CircularSlice->new( | |
2799 -COORD_SYSTEM => $self->{'coord_system'}, | |
2800 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2801 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2802 -START => 1, | |
2803 -END => $self->{'end'}, | |
2804 -STRAND => $self->{'strand'}, | |
2805 -ADAPTOR => $self->adaptor() ); | |
2806 | |
2807 my ( @arr, @arr1, @arr2 ); | |
2808 | |
2809 if (@arraynames) { | |
2810 @arr1 = @{ $fa->fetch_all_by_Slice_arrayname( $sl1, @arraynames ) }; | |
2811 @arr2 = @{ $fa->fetch_all_by_Slice_arrayname( $sl2, @arraynames ) }; | |
2812 } else { | |
2813 @arr1 = @{ $fa->fetch_all_by_Slice($sl1) }; | |
2814 @arr2 = @{ $fa->fetch_all_by_Slice($sl2) }; | |
2815 } | |
2816 push @arr, @arr1, @arr2; | |
2817 return \@arr; | |
2818 ## circular EOF | |
2819 | |
2820 # if ( @arraynames ) { | |
2821 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames ); | |
2822 # } else { | |
2823 # $features = $fa->fetch_all_by_Slice( $self ); | |
2824 # } | |
2825 # return $features; | |
2826 } ## end sub get_all_OligoFeatures | |
2827 | |
2828 =head2 get_all_OligoFeatures_by_type | |
2829 | |
2830 Arg [1] : string - type of array (e.g. AFFY or OLIGO) | |
2831 Arg [2] : (optional) string - logic name | |
2832 Example : $slice->get_all_OligoFeatures_by_type('OLIGO'); | |
2833 Description: Retrieves all OligoFeature objects which overlap this slice and | |
2834 were created by probes from the specified type of array. | |
2835 Returntype : listref of Bio::EnsEMBL::OligoFeature objects | |
2836 Exceptions : throws if no type | |
2837 Caller : general | |
2838 Status : Stable | |
2839 | |
2840 =cut | |
2841 | |
2842 sub get_all_OligoFeatures_by_type { | |
2843 my ( $self, $type, $logic_name ) = @_; | |
2844 | |
2845 throw('Need type as parameter') if !$type; | |
2846 | |
2847 my $sa = $self->adaptor(); | |
2848 if ( !$sa ) { | |
2849 warning("Cannot retrieve features without attached adaptor."); | |
2850 } | |
2851 my $fa = $sa->db()->get_OligoFeatureAdaptor(); | |
2852 | |
2853 ## circular BOF | |
2854 my $sl1 = | |
2855 Bio::EnsEMBL::CircularSlice->new( | |
2856 -COORD_SYSTEM => $self->{'coord_system'}, | |
2857 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2858 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2859 -START => $self->{'start'}, | |
2860 -END => $self->{'seq_region_length'}, | |
2861 -STRAND => $self->{'strand'}, | |
2862 -ADAPTOR => $self->adaptor() ); | |
2863 | |
2864 my $sl2 = | |
2865 Bio::EnsEMBL::CircularSlice->new( | |
2866 -COORD_SYSTEM => $self->{'coord_system'}, | |
2867 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2868 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2869 -START => 1, | |
2870 -END => $self->{'end'}, | |
2871 -STRAND => $self->{'strand'}, | |
2872 -ADAPTOR => $self->adaptor() ); | |
2873 | |
2874 my ( @arr, @arr1, @arr2 ); | |
2875 @arr1 = @{ $fa->fetch_all_by_Slice_type( $sl1, $type, $logic_name ) }; | |
2876 @arr2 = @{ $fa->fetch_all_by_Slice_type( $sl2, $type, $logic_name ) }; | |
2877 push @arr, @arr1, @arr2; | |
2878 return \@arr; | |
2879 ## circular EOF | |
2880 | |
2881 #my $features = $fa->fetch_all_by_Slice_type( $self, $type, $logic_name ); | |
2882 #return $features; | |
2883 } ## end sub get_all_OligoFeatures_by_type | |
2884 | |
2885 =head2 get_all_MarkerFeatures | |
2886 | |
2887 Arg [1] : (optional) string logic_name | |
2888 The logic name of the marker features to retrieve | |
2889 Arg [2] : (optional) int $priority | |
2890 Lower (exclusive) priority bound of the markers to retrieve | |
2891 Arg [3] : (optional) int $map_weight | |
2892 Upper (exclusive) priority bound of the markers to retrieve | |
2893 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)}; | |
2894 Description: Retrieves all markers which lie on this slice fulfilling the | |
2895 specified map_weight and priority parameters (if supplied). | |
2896 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures | |
2897 Exceptions : none | |
2898 Caller : contigview, general | |
2899 Status : Stable | |
2900 | |
2901 =cut | |
2902 | |
2903 sub get_all_MarkerFeatures { | |
2904 my ( $self, $logic_name, $priority, $map_weight ) = @_; | |
2905 | |
2906 if ( !$self->adaptor() ) { | |
2907 warning('Cannot retrieve MarkerFeatures without attached adaptor.'); | |
2908 return []; | |
2909 } | |
2910 | |
2911 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor; | |
2912 | |
2913 ## circular BOF | |
2914 my $sl1 = | |
2915 Bio::EnsEMBL::CircularSlice->new( | |
2916 -COORD_SYSTEM => $self->{'coord_system'}, | |
2917 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2918 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2919 -START => $self->{'start'}, | |
2920 -END => $self->{'seq_region_length'}, | |
2921 -STRAND => $self->{'strand'}, | |
2922 -ADAPTOR => $self->adaptor() ); | |
2923 | |
2924 my $sl2 = | |
2925 Bio::EnsEMBL::CircularSlice->new( | |
2926 -COORD_SYSTEM => $self->{'coord_system'}, | |
2927 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
2928 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
2929 -START => 1, | |
2930 -END => $self->{'end'}, | |
2931 -STRAND => $self->{'strand'}, | |
2932 -ADAPTOR => $self->adaptor() ); | |
2933 | |
2934 my ( @arr, @arr1, @arr2 ); | |
2935 @arr1 = @{ | |
2936 $ma->fetch_all_by_Slice_and_priority( $sl1, $priority, $map_weight, | |
2937 $logic_name ) }; | |
2938 @arr2 = @{ | |
2939 $ma->fetch_all_by_Slice_and_priority( $sl2, $priority, $map_weight, | |
2940 $logic_name ) }; | |
2941 push @arr, @arr1, @arr2; | |
2942 return \@arr; | |
2943 ## circular EOF | |
2944 | |
2945 # my $feats = $ma->fetch_all_by_Slice_and_priority($self, $priority, $map_weight, $logic_name); | |
2946 # return $feats; | |
2947 } ## end sub get_all_MarkerFeatures | |
2948 | |
2949 =head2 get_all_compara_DnaAlignFeatures | |
2950 | |
2951 Arg [1] : string $qy_species | |
2952 The name of the species to retrieve similarity features from | |
2953 Arg [2] : string $qy_assembly | |
2954 The name of the assembly to retrieve similarity features from | |
2955 Arg [3] : string $type | |
2956 The type of the alignment to retrieve similarity features from | |
2957 Arg [4] : <optional> compara dbadptor to use. | |
2958 Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus', | |
2959 'MGSC3', | |
2960 'WGA'); | |
2961 Description: Retrieves a list of DNA-DNA Alignments to the species specified | |
2962 by the $qy_species argument. | |
2963 The compara database must be attached to the core database | |
2964 for this call to work correctly. As well the compara database | |
2965 must have the core dbadaptors for both this species, and the | |
2966 query species added to function correctly. | |
2967 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures | |
2968 Exceptions : warning if compara database is not available | |
2969 Caller : contigview | |
2970 Status : Stable | |
2971 | |
2972 =cut | |
2973 | |
2974 sub get_all_compara_DnaAlignFeatures { | |
2975 my ( $self, $qy_species, $qy_assembly, $alignment_type, $compara_db ) | |
2976 = @_; | |
2977 | |
2978 if ( !$self->adaptor() ) { | |
2979 warning( | |
2980 "Cannot retrieve DnaAlignFeatures without attached adaptor"); | |
2981 return []; | |
2982 } | |
2983 | |
2984 unless ( $qy_species && $alignment_type # && $qy_assembly | |
2985 ) | |
2986 { | |
2987 throw( | |
2988 "Query species and assembly and alignmemt type arguments are required" | |
2989 ); | |
2990 } | |
2991 | |
2992 if ( !defined($compara_db) ) { | |
2993 $compara_db = | |
2994 Bio::EnsEMBL::Registry->get_DBAdaptor( "compara", "compara" ); | |
2995 } | |
2996 unless ($compara_db) { | |
2997 warning( | |
2998 "Compara database must be attached to core database or passed " | |
2999 . "as an argument to " | |
3000 . "retrieve compara information" ); | |
3001 return []; | |
3002 } | |
3003 | |
3004 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor; | |
3005 ## circular BOF | |
3006 my $sl1 = | |
3007 Bio::EnsEMBL::CircularSlice->new( | |
3008 -COORD_SYSTEM => $self->{'coord_system'}, | |
3009 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3010 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3011 -START => $self->{'start'}, | |
3012 -END => $self->{'seq_region_length'}, | |
3013 -STRAND => $self->{'strand'}, | |
3014 -ADAPTOR => $self->adaptor() ); | |
3015 | |
3016 my $sl2 = | |
3017 Bio::EnsEMBL::CircularSlice->new( | |
3018 -COORD_SYSTEM => $self->{'coord_system'}, | |
3019 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3020 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3021 -START => 1, | |
3022 -END => $self->{'end'}, | |
3023 -STRAND => $self->{'strand'}, | |
3024 -ADAPTOR => $self->adaptor() ); | |
3025 | |
3026 my ( @arr, @arr1, @arr2 ); | |
3027 @arr1 = @{ | |
3028 $dafa->fetch_all_by_Slice( $sl1, $qy_species, $qy_assembly, | |
3029 $alignment_type ) }; | |
3030 @arr2 = @{ | |
3031 $dafa->fetch_all_by_Slice( $sl2, $qy_species, $qy_assembly, | |
3032 $alignment_type ) }; | |
3033 push @arr, @arr1, @arr2; | |
3034 return \@arr; | |
3035 ## circular EOF | |
3036 #return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type); | |
3037 } ## end sub get_all_compara_DnaAlignFeatures | |
3038 | |
3039 =head2 get_all_compara_Syntenies | |
3040 | |
3041 Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus" | |
3042 Arg [2] : string $method_link_type, default is "SYNTENY" | |
3043 Arg [3] : <optional> compara dbadaptor to use. | |
3044 Description: gets all the compara syntenyies for a specfic species | |
3045 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion | |
3046 Status : Stable | |
3047 | |
3048 =cut | |
3049 | |
3050 sub get_all_compara_Syntenies { | |
3051 my ( $self, $qy_species, $method_link_type, $compara_db ) = @_; | |
3052 | |
3053 if ( !$self->adaptor() ) { | |
3054 warning("Cannot retrieve features without attached adaptor"); | |
3055 return []; | |
3056 } | |
3057 | |
3058 unless ($qy_species) { | |
3059 throw("Query species and assembly arguments are required"); | |
3060 } | |
3061 | |
3062 unless ( defined $method_link_type ) { | |
3063 $method_link_type = "SYNTENY"; | |
3064 } | |
3065 | |
3066 if ( !defined($compara_db) ) { | |
3067 $compara_db = | |
3068 Bio::EnsEMBL::Registry->get_DBAdaptor( "compara", "compara" ); | |
3069 } | |
3070 unless ($compara_db) { | |
3071 warning( | |
3072 "Compara database must be attached to core database or passed " | |
3073 . "as an argument to " | |
3074 . "retrieve compara information" ); | |
3075 return []; | |
3076 } | |
3077 my $gdba = $compara_db->get_GenomeDBAdaptor(); | |
3078 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor(); | |
3079 my $dfa = $compara_db->get_DnaFragAdaptor(); | |
3080 my $sra = $compara_db->get_SyntenyRegionAdaptor(); | |
3081 | |
3082 my $this_gdb = | |
3083 $gdba->fetch_by_core_DBAdaptor( $self->adaptor()->db() ); | |
3084 my $query_gdb = $gdba->fetch_by_registry_name($qy_species); | |
3085 my $mlss = | |
3086 $mlssa->fetch_by_method_link_type_GenomeDBs( $method_link_type, | |
3087 [ $this_gdb, $query_gdb ] ); | |
3088 | |
3089 my $cs = $self->coord_system()->name(); | |
3090 my $sr = $self->seq_region_name(); | |
3091 my ($dnafrag) = | |
3092 @{ $dfa->fetch_all_by_GenomeDB_region( $this_gdb, $cs, $sr ) }; | |
3093 | |
3094 ## circular BOF | |
3095 my $sl1 = | |
3096 Bio::EnsEMBL::CircularSlice->new( | |
3097 -COORD_SYSTEM => $self->{'coord_system'}, | |
3098 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3099 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3100 -START => $self->{'start'}, | |
3101 -END => $self->{'seq_region_length'}, | |
3102 -STRAND => $self->{'strand'}, | |
3103 -ADAPTOR => $self->adaptor() ); | |
3104 | |
3105 my $sl2 = | |
3106 Bio::EnsEMBL::CircularSlice->new( | |
3107 -COORD_SYSTEM => $self->{'coord_system'}, | |
3108 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3109 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3110 -START => 1, | |
3111 -END => $self->{'end'}, | |
3112 -STRAND => $self->{'strand'}, | |
3113 -ADAPTOR => $self->adaptor() ); | |
3114 | |
3115 my ( @arr, @arr1, @arr2 ); | |
3116 @arr1 = @{ | |
3117 $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dnafrag, | |
3118 $sl1->start, $sl1->end ) | |
3119 }; | |
3120 @arr2 = @{ | |
3121 $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dnafrag, | |
3122 $sl2->start, $sl2->end ) | |
3123 }; | |
3124 push @arr, @arr1, @arr2; | |
3125 return \@arr; | |
3126 ## circular EOF | |
3127 #return $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag, $self->start, $self->end); | |
3128 } ## end sub get_all_compara_Syntenies | |
3129 | |
3130 =head2 get_all_Haplotypes | |
3131 | |
3132 Arg [1] : (optional) boolean $lite_flag | |
3133 if true lightweight haplotype objects are used | |
3134 Example : @haplotypes = $slice->get_all_Haplotypes; | |
3135 Description: Retrieves all of the haplotypes on this slice. Only works | |
3136 if the haplotype adaptor has been attached to the core adaptor | |
3137 via $dba->add_db_adaptor('haplotype', $hdba); | |
3138 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes | |
3139 Exceptions : warning is Haplotype database is not available | |
3140 Caller : contigview, general | |
3141 Status : Stable | |
3142 | |
3143 =cut | |
3144 | |
3145 sub get_all_Haplotypes { | |
3146 my ( $self, $lite_flag ) = @_; | |
3147 | |
3148 if ( !$self->adaptor() ) { | |
3149 warning("Cannot retrieve features without attached adaptor"); | |
3150 return []; | |
3151 } | |
3152 | |
3153 my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype'); | |
3154 | |
3155 unless ($haplo_db) { | |
3156 warning( "Haplotype database must be attached to core database to " | |
3157 . "retrieve haplotype information" ); | |
3158 return []; | |
3159 } | |
3160 | |
3161 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor; | |
3162 | |
3163 ## circular BOF | |
3164 my $sl1 = | |
3165 Bio::EnsEMBL::CircularSlice->new( | |
3166 -COORD_SYSTEM => $self->{'coord_system'}, | |
3167 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3168 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3169 -START => $self->{'start'}, | |
3170 -END => $self->{'seq_region_length'}, | |
3171 -STRAND => $self->{'strand'}, | |
3172 -ADAPTOR => $self->adaptor() ); | |
3173 | |
3174 my $sl2 = | |
3175 Bio::EnsEMBL::CircularSlice->new( | |
3176 -COORD_SYSTEM => $self->{'coord_system'}, | |
3177 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3178 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3179 -START => 1, | |
3180 -END => $self->{'end'}, | |
3181 -STRAND => $self->{'strand'}, | |
3182 -ADAPTOR => $self->adaptor() ); | |
3183 | |
3184 my ( @arr, @arr1, @arr2 ); | |
3185 @arr1 = @{ $haplo_adaptor->fetch_all_by_Slice( $sl1, $lite_flag ) }; | |
3186 @arr2 = @{ $haplo_adaptor->fetch_all_by_Slice( $sl2, $lite_flag ) }; | |
3187 push @arr, @arr1, @arr2; | |
3188 return \@arr; | |
3189 ## circular EOF | |
3190 | |
3191 #my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag); | |
3192 #return $haplotypes; | |
3193 } ## end sub get_all_Haplotypes | |
3194 | |
3195 sub get_all_DASFactories { | |
3196 my $self = shift; | |
3197 return [ $self->adaptor()->db()->_each_DASFeatureFactory ]; | |
3198 } | |
3199 | |
3200 | |
3201 sub get_all_DASFeatures_dsn { | |
3202 my ( $self, $source_type, $dsn ) = @_; | |
3203 | |
3204 if ( !$self->adaptor() ) { | |
3205 warning("Cannot retrieve features without attached adaptor"); | |
3206 return []; | |
3207 } | |
3208 my @X = | |
3209 grep { $_->adaptor->dsn eq $dsn } | |
3210 $self->adaptor()->db()->_each_DASFeatureFactory; | |
3211 | |
3212 return [ $X[0]->fetch_all_Features( $self, $source_type ) ]; | |
3213 } | |
3214 | |
3215 sub get_all_DAS_Features { | |
3216 my ($self) = @_; | |
3217 | |
3218 $self->{_das_features} ||= {}; # Cache | |
3219 $self->{_das_styles} ||= {}; # Cache | |
3220 $self->{_das_segments} ||= {}; # Cache | |
3221 my %das_features; | |
3222 my %das_styles; | |
3223 my %das_segments; | |
3224 my $slice = $self; | |
3225 | |
3226 foreach my $dasfact ( @{ $self->get_all_DASFactories } ) { | |
3227 my $dsn = $dasfact->adaptor->dsn; | |
3228 my $name = $dasfact->adaptor->name; | |
3229 # my $type = $dasfact->adaptor->type; | |
3230 my $url = $dasfact->adaptor->url; | |
3231 | |
3232 my ($type) = $dasfact->adaptor->mapping; | |
3233 if ( ref $type eq 'ARRAY' ) { | |
3234 $type = shift @$type; | |
3235 } | |
3236 $type ||= $dasfact->adaptor->type; | |
3237 # Construct a cache key : SOURCE_URL/TYPE | |
3238 # Need the type to handle sources that serve multiple types of features | |
3239 | |
3240 my $key = join( '/', $name, $type ); | |
3241 if ( $self->{_das_features}->{$key} ) { # Use cached | |
3242 $das_features{$name} = $self->{_das_features}->{$key}; | |
3243 $das_styles{$name} = $self->{_das_styles}->{$key}; | |
3244 $das_segments{$name} = $self->{_das_segments}->{$key}; | |
3245 } else { # Get fresh data | |
3246 my ( $featref, $styleref, $segref ) = | |
3247 $dasfact->fetch_all_Features( $slice, $type ); | |
3248 $self->{_das_features}->{$key} = $featref; | |
3249 $self->{_das_styles}->{$key} = $styleref; | |
3250 $self->{_das_segments}->{$key} = $segref; | |
3251 $das_features{$name} = $featref; | |
3252 $das_styles{$name} = $styleref; | |
3253 $das_segments{$name} = $segref; | |
3254 } | |
3255 } ## end foreach my $dasfact ( @{ $self...}) | |
3256 | |
3257 return ( \%das_features, \%das_styles, \%das_segments ); | |
3258 } ## end sub get_all_DAS_Features | |
3259 | |
3260 =head2 get_all_DASFeatures | |
3261 | |
3262 Arg [1] : none | |
3263 Example : $features = $slice->get_all_DASFeatures; | |
3264 Description: Retreives a hash reference to a hash of DAS feature | |
3265 sets, keyed by the DNS, NOTE the values of this hash | |
3266 are an anonymous array containing: | |
3267 (1) a pointer to an array of features; | |
3268 (2) a pointer to the DAS stylesheet | |
3269 Returntype : hashref of Bio::SeqFeatures | |
3270 Exceptions : ? | |
3271 Caller : webcode | |
3272 Status : Stable | |
3273 | |
3274 =cut | |
3275 sub get_all_DASFeatures { | |
3276 my ( $self, $source_type ) = @_; | |
3277 | |
3278 if ( !$self->adaptor() ) { | |
3279 warning("Cannot retrieve features without attached adaptor"); | |
3280 return []; | |
3281 } | |
3282 | |
3283 my %genomic_features = map { | |
3284 ( $_->adaptor->dsn => | |
3285 [ $_->fetch_all_Features( $self, $source_type ) ] ) | |
3286 } $self->adaptor()->db()->_each_DASFeatureFactory; | |
3287 return \%genomic_features; | |
3288 | |
3289 } | |
3290 | |
3291 sub old_get_all_DASFeatures { | |
3292 my ( $self, @args ) = @_; | |
3293 | |
3294 if ( !$self->adaptor() ) { | |
3295 warning("Cannot retrieve features without attached adaptor"); | |
3296 return []; | |
3297 } | |
3298 | |
3299 my %genomic_features = | |
3300 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) } | |
3301 $self->adaptor()->db()->_each_DASFeatureFactory; | |
3302 return \%genomic_features; | |
3303 | |
3304 } | |
3305 | |
3306 =head2 get_all_ExternalFeatures | |
3307 | |
3308 Arg [1] : (optional) string $track_name | |
3309 If specified only features from ExternalFeatureAdaptors with | |
3310 the track name $track_name are retrieved. | |
3311 If not set, all features from every ExternalFeatureAdaptor are | |
3312 retrieved. | |
3313 Example : @x_features = @{$slice->get_all_ExternalFeatures} | |
3314 Description: Retrieves features on this slice from external feature adaptors | |
3315 Returntype : listref of Bio::SeqFeatureI implementing objects in slice | |
3316 coordinates | |
3317 Exceptions : none | |
3318 Caller : general | |
3319 Status : Stable | |
3320 | |
3321 =cut | |
3322 | |
3323 sub get_all_ExternalFeatures { | |
3324 my ( $self, $track_name ) = @_; | |
3325 if ( !$self->adaptor() ) { | |
3326 warning("Cannot retrieve features without attached adaptor"); | |
3327 return []; | |
3328 } | |
3329 my $features = []; | |
3330 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors; | |
3331 my @xf_adaptors = (); | |
3332 if ($track_name) { | |
3333 #use a specific adaptor | |
3334 if ( exists $xfa_hash->{$track_name} ) { | |
3335 push @xf_adaptors, $xfa_hash->{$track_name}; | |
3336 } | |
3337 } else { | |
3338 #use all of the adaptors | |
3339 push @xf_adaptors, values %$xfa_hash; | |
3340 } | |
3341 | |
3342 ## circular BOF | |
3343 my $sl1 = | |
3344 Bio::EnsEMBL::CircularSlice->new( | |
3345 -COORD_SYSTEM => $self->{'coord_system'}, | |
3346 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3347 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3348 -START => $self->{'start'}, | |
3349 -END => $self->{'seq_region_length'}, | |
3350 -STRAND => $self->{'strand'}, | |
3351 -ADAPTOR => $self->adaptor() ); | |
3352 | |
3353 my $sl2 = | |
3354 Bio::EnsEMBL::CircularSlice->new( | |
3355 -COORD_SYSTEM => $self->{'coord_system'}, | |
3356 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3357 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3358 -START => 1, | |
3359 -END => $self->{'end'}, | |
3360 -STRAND => $self->{'strand'}, | |
3361 -ADAPTOR => $self->adaptor() ); | |
3362 | |
3363 foreach my $xfa (@xf_adaptors) { | |
3364 push @$features, @{ $xfa->fetch_all_by_Slice($sl1) }; | |
3365 push @$features, @{ $xfa->fetch_all_by_Slice($sl2) }; | |
3366 } | |
3367 return $features; | |
3368 ## circular EOF | |
3369 } ## end sub get_all_ExternalFeatures | |
3370 | |
3371 =head2 get_all_DitagFeatures | |
3372 | |
3373 Arg [1] : (optional) string ditag type | |
3374 Arg [1] : (optional) string logic_name | |
3375 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures}; | |
3376 Description: Retrieves the DitagFeatures of a specific type which overlap | |
3377 this slice with. If type is not defined, all features are | |
3378 retrieved. | |
3379 Returntype : listref of Bio::EnsEMBL::DitagFeatures | |
3380 Exceptions : warning if slice does not have attached adaptor | |
3381 Caller : general | |
3382 Status : Stable | |
3383 | |
3384 =cut | |
3385 | |
3386 sub get_all_DitagFeatures { | |
3387 my ( $self, $type, $logic_name ) = @_; | |
3388 | |
3389 if ( !$self->adaptor() ) { | |
3390 warning('Cannot get DitagFeatures without attached adaptor'); | |
3391 return []; | |
3392 } | |
3393 | |
3394 my $dfa = $self->adaptor->db->get_DitagFeatureAdaptor(); | |
3395 | |
3396 ## circular BOF | |
3397 my $sl1 = | |
3398 Bio::EnsEMBL::CircularSlice->new( | |
3399 -COORD_SYSTEM => $self->{'coord_system'}, | |
3400 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3401 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3402 -START => $self->{'start'}, | |
3403 -END => $self->{'seq_region_length'}, | |
3404 -STRAND => $self->{'strand'}, | |
3405 -ADAPTOR => $self->adaptor() ); | |
3406 | |
3407 my $sl2 = | |
3408 Bio::EnsEMBL::CircularSlice->new( | |
3409 -COORD_SYSTEM => $self->{'coord_system'}, | |
3410 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3411 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3412 -START => 1, | |
3413 -END => $self->{'end'}, | |
3414 -STRAND => $self->{'strand'}, | |
3415 -ADAPTOR => $self->adaptor() ); | |
3416 | |
3417 my ( @arr, @arr1, @arr2 ); | |
3418 @arr1 = @{ $dfa->fetch_all_by_Slice( $sl1, $type, $logic_name ) }; | |
3419 @arr2 = @{ $dfa->fetch_all_by_Slice( $sl2, $type, $logic_name ) }; | |
3420 push @arr, @arr1, @arr2; | |
3421 return \@arr; | |
3422 ## circular EOF | |
3423 #return $dfa->fetch_all_by_Slice($self, $type, $logic_name); | |
3424 } ## end sub get_all_DitagFeatures | |
3425 | |
3426 # GENERIC FEATURES (See DBAdaptor.pm) | |
3427 | |
3428 =head2 get_generic_features | |
3429 | |
3430 Arg [1] : (optional) List of names of generic feature types to return. | |
3431 If no feature names are given, all generic features are | |
3432 returned. | |
3433 Example : my %features = %{$slice->get_generic_features()}; | |
3434 Description: Gets generic features via the generic feature adaptors that | |
3435 have been added via DBAdaptor->add_GenricFeatureAdaptor (if | |
3436 any) | |
3437 Returntype : Hash of named features. | |
3438 Exceptions : none | |
3439 Caller : none | |
3440 Status : Stable | |
3441 | |
3442 =cut | |
3443 | |
3444 sub get_generic_features { | |
3445 | |
3446 my ( $self, @names ) = @_; | |
3447 | |
3448 if ( !$self->adaptor() ) { | |
3449 warning('Cannot retrieve features without attached adaptor'); | |
3450 return []; | |
3451 } | |
3452 | |
3453 my $db = $self->adaptor()->db(); | |
3454 | |
3455 my %features = (); # this will hold the results | |
3456 | |
3457 # get the adaptors for each feature | |
3458 my %adaptors = %{ $db->get_GenericFeatureAdaptors(@names) }; | |
3459 | |
3460 foreach my $adaptor_name ( keys(%adaptors) ) { | |
3461 | |
3462 my $adaptor_obj = $adaptors{$adaptor_name}; | |
3463 # get the features and add them to the hash | |
3464 ## circular BOF | |
3465 my $sl1 = | |
3466 Bio::EnsEMBL::CircularSlice->new( | |
3467 -COORD_SYSTEM => $self->{'coord_system'}, | |
3468 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3469 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3470 -START => $self->{'start'}, | |
3471 -END => $self->{'seq_region_length'}, | |
3472 -STRAND => $self->{'strand'}, | |
3473 -ADAPTOR => $self->adaptor() ); | |
3474 | |
3475 my $sl2 = | |
3476 Bio::EnsEMBL::CircularSlice->new( | |
3477 -COORD_SYSTEM => $self->{'coord_system'}, | |
3478 -SEQ_REGION_NAME => $self->{'seq_region_name'}, | |
3479 -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, | |
3480 -START => 1, | |
3481 -END => $self->{'end'}, | |
3482 -STRAND => $self->{'strand'}, | |
3483 -ADAPTOR => $self->adaptor() ); | |
3484 | |
3485 my ( @arr1, @arr2 ); | |
3486 my $features_ref; | |
3487 @arr1 = @{ $adaptor_obj->fetch_all_by_Slice($sl1) }; | |
3488 @arr2 = @{ $adaptor_obj->fetch_all_by_Slice($sl2) }; | |
3489 push @{$features_ref}, @arr1, @arr2; | |
3490 ## circular EOF | |
3491 #my $features_ref = $adaptor_obj->fetch_all_by_Slice($self); | |
3492 | |
3493 # add each feature to the hash to be returned | |
3494 foreach my $feature (@$features_ref) { | |
3495 $features{$adaptor_name} = $feature; | |
3496 } | |
3497 } ## end foreach my $adaptor_name ( ...) | |
3498 | |
3499 return \%features; | |
3500 | |
3501 } ## end sub get_generic_features | |
3502 | |
3503 =head2 project_to_slice | |
3504 | |
3505 Arg [1] : Slice to project to. | |
3506 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice); | |
3507 foreach my $segment ( @$chr_projection ){ | |
3508 $chr_slice = $segment->to_Slice(); | |
3509 print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'. | |
3510 $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start(). | |
3511 '-'.$chr_slice->end(). | |
3512 $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n"; | |
3513 } | |
3514 Description: Projection of slice to another specific slice. Needed for where we have multiple mappings | |
3515 and we want to state which one to project to. | |
3516 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which | |
3517 can also be used as [$start,$end,$slice] triplets. | |
3518 Exceptions : none | |
3519 Caller : none | |
3520 Status : At Risk | |
3521 | |
3522 =cut | |
3523 | |
3524 sub project_to_slice { | |
3525 my $self = shift; | |
3526 my $to_slice = shift; | |
3527 | |
3528 throw('Slice argument is required') if ( !$to_slice ); | |
3529 | |
3530 my $slice_adaptor = $self->adaptor(); | |
3531 | |
3532 if ( !$slice_adaptor ) { | |
3533 warning("Cannot project without attached adaptor."); | |
3534 return []; | |
3535 } | |
3536 | |
3537 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor(); | |
3538 | |
3539 my $cs = $to_slice->coord_system(); | |
3540 my $slice_cs = $self->coord_system(); | |
3541 | |
3542 my @projection; | |
3543 my $current_start = 1; | |
3544 | |
3545 # decompose this slice into its symlinked components. | |
3546 # this allows us to handle haplotypes and PARs | |
3547 my $normal_slice_proj = | |
3548 $slice_adaptor->fetch_normalized_slice_projection($self); | |
3549 foreach my $segment (@$normal_slice_proj) { | |
3550 my $normal_slice = $segment->[2]; | |
3551 | |
3552 $slice_cs = $normal_slice->coord_system(); | |
3553 | |
3554 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor(); | |
3555 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs ); | |
3556 | |
3557 # perform the mapping between this slice and the requested system | |
3558 my @coords; | |
3559 | |
3560 if ( defined $asm_mapper ) { | |
3561 @coords = $asm_mapper->map( $normal_slice->seq_region_name(), | |
3562 $normal_slice->start(), | |
3563 $normal_slice->end(), | |
3564 $normal_slice->strand(), | |
3565 $slice_cs, | |
3566 undef, | |
3567 $to_slice ); | |
3568 } else { | |
3569 $coords[0] = | |
3570 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(), | |
3571 $normal_slice->end() ); | |
3572 } | |
3573 | |
3574 #construct a projection from the mapping results and return it | |
3575 foreach my $coord (@coords) { | |
3576 my $coord_start = $coord->start(); | |
3577 my $coord_end = $coord->end(); | |
3578 my $length = $coord_end - $coord_start + 1; | |
3579 | |
3580 #skip gaps | |
3581 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) { | |
3582 my $coord_cs = $coord->coord_system(); | |
3583 | |
3584 # If the normalised projection just ended up mapping to the | |
3585 # same coordinate system we were already in then we should just | |
3586 # return the original region. This can happen for example, if we | |
3587 # were on a PAR region on Y which refered to X and a projection to | |
3588 # 'toplevel' was requested. | |
3589 # if($coord_cs->equals($slice_cs)) { | |
3590 # # trim off regions which are not defined | |
3591 # return $self->_constrain_to_region(); | |
3592 # } | |
3593 | |
3594 #create slices for the mapped-to coord system | |
3595 my $slice = | |
3596 $slice_adaptor->fetch_by_seq_region_id( $coord->id(), | |
3597 $coord_start, $coord_end, $coord->strand() ); | |
3598 | |
3599 my $current_end = $current_start + $length - 1; | |
3600 | |
3601 push @projection, | |
3602 bless( [ $current_start, $current_end, $slice ], | |
3603 "Bio::EnsEMBL::ProjectionSegment" ); | |
3604 } | |
3605 | |
3606 $current_start += $length; | |
3607 } ## end foreach my $coord (@coords) | |
3608 } ## end foreach my $segment (@$normal_slice_proj) | |
3609 | |
3610 # delete the cache as we may want to map to different set next time and old | |
3611 # results will be cached. | |
3612 | |
3613 $mapper_aptr->delete_cache; | |
3614 | |
3615 return \@projection; | |
3616 } ## end sub project_to_slice | |
3617 | |
3618 # | |
3619 # Bioperl Bio::PrimarySeqI methods: | |
3620 # | |
3621 | |
3622 =head2 id | |
3623 | |
3624 Description: Included for Bio::PrimarySeqI interface compliance (0.7) | |
3625 | |
3626 =cut | |
3627 | |
3628 sub id { name(@_); } | |
3629 | |
3630 =head2 display_id | |
3631 | |
3632 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3633 | |
3634 =cut | |
3635 | |
3636 sub display_id { name(@_); } | |
3637 | |
3638 =head2 primary_id | |
3639 | |
3640 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3641 | |
3642 =cut | |
3643 | |
3644 sub primary_id { name(@_); } | |
3645 | |
3646 =head2 desc | |
3647 | |
3648 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3649 | |
3650 =cut | |
3651 | |
3652 sub desc { | |
3653 return $_[0]->coord_system->name() . ' ' . $_[0]->seq_region_name(); | |
3654 } | |
3655 | |
3656 =head2 moltype | |
3657 | |
3658 Description: Included for Bio::PrimarySeqI interface compliance (0.7) | |
3659 | |
3660 =cut | |
3661 | |
3662 sub moltype { return 'dna'; } | |
3663 | |
3664 =head2 alphabet | |
3665 | |
3666 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3667 | |
3668 =cut | |
3669 | |
3670 sub alphabet { return 'dna'; } | |
3671 | |
3672 =head2 accession_number | |
3673 | |
3674 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3675 | |
3676 =cut | |
3677 | |
3678 sub accession_number { name(@_); } | |
3679 | |
3680 =head2 is_circular | |
3681 | |
3682 Description: Included for Bio::PrimarySeqI interface compliance (1.2) | |
3683 | |
3684 =cut | |
3685 | |
3686 sub is_circular { | |
3687 my ($self) = @_; | |
3688 | |
3689 if ( !defined( $self->{'circular'} ) ) { | |
3690 my @attrs = | |
3691 grep { $_ } @{ $self->get_all_Attributes('circular_seq') }; | |
3692 $self->{'circular'} = @attrs ? 1 : 0; | |
3693 } | |
3694 | |
3695 return $self->{'circular'}; | |
3696 } | |
3697 | |
3698 # sub DEPRECATED METHODS # | |
3699 ############################################################################### | |
3700 | |
3701 =head1 DEPRECATED METHODS | |
3702 | |
3703 =cut | |
3704 | |
3705 =head2 get_all_supercontig_Slices | |
3706 | |
3707 DEPRECATED use get_tiling_path("NTcontig") instead | |
3708 | |
3709 =cut | |
3710 | |
3711 sub get_all_supercontig_Slices { | |
3712 my $self = shift; | |
3713 | |
3714 deprecate("Use get_tiling_path('NTcontig') instead"); | |
3715 | |
3716 my $result = []; | |
3717 | |
3718 if ( $self->adaptor() ) { | |
3719 my $superctg_names = | |
3720 $self->adaptor()->list_overlapping_supercontigs($self); | |
3721 | |
3722 for my $name (@$superctg_names) { | |
3723 my $slice; | |
3724 $slice = $self->adaptor()->fetch_by_supercontig_name($name); | |
3725 $slice->name($name); | |
3726 push( @$result, $slice ); | |
3727 } | |
3728 } else { | |
3729 warning( | |
3730 "Slice needs to be attached to a database to get supercontigs"); | |
3731 } | |
3732 | |
3733 return $result; | |
3734 } | |
3735 | |
3736 =head2 get_Chromosome | |
3737 | |
3738 DEPRECATED use this instead: | |
3739 $slice_adp->fetch_by_region('chromosome',$slice->seq_region_name) | |
3740 | |
3741 =cut | |
3742 | |
3743 sub get_Chromosome { | |
3744 my $self = shift @_; | |
3745 | |
3746 deprecate( "Use SliceAdaptor::fetch_by_region('chromosome'," | |
3747 . '$slice->seq_region_name) instead' ); | |
3748 | |
3749 my $csa = $self->adaptor->db->get_CoordSystemAdaptor(); | |
3750 my ($top_cs) = @{ $csa->fetch_all() }; | |
3751 | |
3752 return | |
3753 $self->adaptor->fetch_by_region( $top_cs->name(), | |
3754 $self->seq_region_name(), | |
3755 undef, undef, undef, $top_cs->version() ); | |
3756 } | |
3757 | |
3758 =head2 chr_name | |
3759 | |
3760 DEPRECATED use seq_region_name() instead | |
3761 | |
3762 =cut | |
3763 | |
3764 sub chr_name { | |
3765 deprecate("Use seq_region_name() instead"); | |
3766 seq_region_name(@_); | |
3767 } | |
3768 | |
3769 =head2 chr_start | |
3770 | |
3771 DEPRECATED use start() instead | |
3772 | |
3773 =cut | |
3774 | |
3775 sub chr_start { | |
3776 deprecate('Use start() instead'); | |
3777 start(@_); | |
3778 } | |
3779 | |
3780 =head2 chr_end | |
3781 | |
3782 DEPRECATED use end() instead | |
3783 | |
3784 Returntype : int | |
3785 Exceptions : none | |
3786 Caller : SliceAdaptor, general | |
3787 | |
3788 =cut | |
3789 | |
3790 sub chr_end { | |
3791 deprecate('Use end() instead'); | |
3792 end(@_); | |
3793 } | |
3794 | |
3795 =head2 assembly_type | |
3796 | |
3797 DEPRECATED use version instead | |
3798 | |
3799 =cut | |
3800 | |
3801 sub assembly_type { | |
3802 my $self = shift; | |
3803 deprecate('Use $slice->coord_system()->version() instead.'); | |
3804 return $self->coord_system->version(); | |
3805 } | |
3806 | |
3807 =head2 get_tiling_path | |
3808 | |
3809 DEPRECATED use project instead | |
3810 | |
3811 =cut | |
3812 | |
3813 sub get_tiling_path { | |
3814 my $self = shift; | |
3815 deprecate('Use $slice->project("seqlevel") instead.'); | |
3816 return []; | |
3817 } | |
3818 | |
3819 =head2 dbID | |
3820 | |
3821 Description: DEPRECATED use SliceAdaptor::get_seq_region_id instead | |
3822 | |
3823 =cut | |
3824 | |
3825 sub dbID { | |
3826 my $self = shift; | |
3827 deprecate('Use SliceAdaptor::get_seq_region_id instead.'); | |
3828 if ( !$self->adaptor ) { | |
3829 warning('Cannot retrieve seq_region_id without attached adaptor.'); | |
3830 return 0; | |
3831 } | |
3832 return $self->adaptor->get_seq_region_id($self); | |
3833 } | |
3834 | |
3835 =head2 get_all_MapFrags | |
3836 | |
3837 DEPRECATED use get_all_MiscFeatures instead | |
3838 | |
3839 =cut | |
3840 | |
3841 sub get_all_MapFrags { | |
3842 my $self = shift; | |
3843 deprecate('Use get_all_MiscFeatures instead'); | |
3844 return $self->get_all_MiscFeatures(@_); | |
3845 } | |
3846 | |
3847 =head2 has_MapSet | |
3848 | |
3849 DEPRECATED use get_all_MiscFeatures instead | |
3850 | |
3851 =cut | |
3852 | |
3853 sub has_MapSet { | |
3854 my ( $self, $mapset_name ) = @_; | |
3855 deprecate('Use get_all_MiscFeatures instead'); | |
3856 my $mfs = $self->get_all_MiscFeatures($mapset_name); | |
3857 return ( @$mfs > 0 ); | |
3858 } | |
3859 | |
3860 1; |