0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::CircularSlice - Arbitary Slice of a genome
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 $sa = $db->get_SliceAdaptor;
|
|
28
|
|
29 $slice =
|
|
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
|
|
31
|
|
32 # get some attributes of the slice
|
|
33 my $seqname = $slice->seq_region_name();
|
|
34 my $start = $slice->start();
|
|
35 my $end = $slice->end();
|
|
36
|
|
37 # get the sequence from the slice
|
|
38 my $seq = $slice->seq();
|
|
39
|
|
40 # get some features from the slice
|
|
41 foreach $gene ( @{ $slice->get_all_Genes } ) {
|
|
42 # do something with a gene
|
|
43 }
|
|
44
|
|
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
|
|
46 # do something with dna-dna alignments
|
|
47 }
|
|
48
|
|
49 =head1 DESCRIPTION
|
|
50
|
|
51 A Slice object represents a region of a genome. It can be used to
|
|
52 retrieve sequence or features from an area of interest.
|
|
53
|
|
54 =head1 METHODS
|
|
55
|
|
56 =cut
|
|
57
|
|
58 package Bio::EnsEMBL::CircularSlice;
|
|
59 use vars qw(@ISA);
|
|
60 use strict;
|
|
61
|
|
62 use Bio::PrimarySeqI;
|
|
63
|
|
64 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
65 use Bio::EnsEMBL::Utils::Exception
|
|
66 qw(throw deprecate warning stack_trace_dump);
|
|
67 use Bio::EnsEMBL::RepeatMaskedSlice;
|
|
68 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
69 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
|
|
70 use Bio::EnsEMBL::ProjectionSegment;
|
|
71 use Bio::EnsEMBL::Registry;
|
|
72 use Bio::EnsEMBL::DBSQL::MergedAdaptor;
|
|
73
|
|
74 use Bio::EnsEMBL::StrainSlice;
|
|
75 #use Bio::EnsEMBL::IndividualSlice;
|
|
76 #use Bio::EnsEMBL::IndividualSliceFactory;
|
|
77 use Bio::EnsEMBL::Mapper::RangeRegistry;
|
|
78 use Bio::EnsEMBL::Slice;
|
|
79 use Data::Dumper;
|
|
80 use Scalar::Util qw(weaken isweak);
|
|
81
|
|
82 my $reg = "Bio::EnsEMBL::Registry";
|
|
83
|
|
84 @ISA = qw(Bio::EnsEMBL::Slice);
|
|
85
|
|
86 =head2 new
|
|
87
|
|
88 Arg [...] : List of named arguments
|
|
89 Bio::EnsEMBL::CoordSystem COORD_SYSTEM
|
|
90 string SEQ_REGION_NAME,
|
|
91 int START,
|
|
92 int END,
|
|
93 int SEQ_REGION_LENGTH, (optional)
|
|
94 string SEQ (optional)
|
|
95 int STRAND, (optional, defaults to 1)
|
|
96 Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
|
|
97 Example :
|
|
98
|
|
99 $slice =
|
|
100 Bio::EnsEMBL::CircularSlice->new( -coord_system => $cs,
|
|
101 -start => 1,
|
|
102 -end => 10000,
|
|
103 -strand => 1,
|
|
104 -seq_region_name => 'X',
|
|
105 -seq_region_length => 12e6,
|
|
106 -adaptor => $slice_adaptor );
|
|
107
|
|
108 Description: Creates a new slice object. A slice represents a
|
|
109 region of sequence in a particular coordinate system.
|
|
110 Slices can be used to retrieve sequence and features
|
|
111 from an area of interest in a genome.
|
|
112
|
|
113 Coordinates start at 1 and are inclusive. Negative
|
|
114 coordinates or coordinates exceeding the length of
|
|
115 the seq_region are permitted. Start must be less
|
|
116 than or equal. to end regardless of the strand.
|
|
117
|
|
118 Slice objects are immutable. Once instantiated their
|
|
119 attributes (with the exception of the adaptor) may
|
|
120 not be altered. To change the attributes a new slice
|
|
121 must be created.
|
|
122
|
|
123 Returntype : Bio::EnsEMBL::CircularSlice
|
|
124 Exceptions : throws if start, end, coordsystem or seq_region_name not
|
|
125 specified or not of the correct type
|
|
126 Caller : general, Bio::EnsEMBL::SliceAdaptor
|
|
127 Status : Stable
|
|
128
|
|
129 =cut
|
|
130
|
|
131 sub new {
|
|
132 my $caller = shift;
|
|
133
|
|
134 #new can be called as a class or object method
|
|
135 my $class = ref($caller) || $caller;
|
|
136
|
|
137 my ( $seq, $coord_system, $seq_region_name, $seq_region_length,
|
|
138 $start, $end, $strand, $adaptor, $empty )
|
|
139 = rearrange( [
|
|
140 qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
|
|
141 START END STRAND ADAPTOR EMPTY) ],
|
|
142 @_ );
|
|
143
|
|
144 #empty is only for backwards compatibility
|
|
145 if ($empty) {
|
|
146 deprecate( "Creation of empty slices is no longer needed "
|
|
147 . "and is deprecated" );
|
|
148 my $self = bless( { 'empty' => 1 }, $class );
|
|
149 $self->adaptor($adaptor);
|
|
150 return $self;
|
|
151 }
|
|
152
|
|
153 if ( !defined($seq_region_name) ) {
|
|
154 throw('SEQ_REGION_NAME argument is required');
|
|
155 }
|
|
156 if ( !defined($start) ) { throw('START argument is required') }
|
|
157 if ( !defined($end) ) { throw('END argument is required') }
|
|
158
|
|
159 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
|
|
160
|
|
161 if ( $seq_region_length <= 0 ) {
|
|
162 throw('SEQ_REGION_LENGTH must be > 0');
|
|
163 }
|
|
164
|
|
165 if ( defined($coord_system) ) {
|
|
166 assert_ref( $coord_system, 'Bio::EnsEMBL::CoordSystem' );
|
|
167
|
|
168 if ( $coord_system->is_top_level() ) {
|
|
169 throw('Cannot create circular slice on toplevel CoordSystem.');
|
|
170 }
|
|
171 } else {
|
|
172 warning("CircularSlice without coordinate system");
|
|
173 }
|
|
174
|
|
175 $strand ||= 1;
|
|
176
|
|
177 if ( $strand != 1 && $strand != -1 ) {
|
|
178 throw('STRAND argument must be -1 or 1');
|
|
179 }
|
|
180
|
|
181 if ( defined($adaptor) ) {
|
|
182 assert_ref( $adaptor, 'Bio::EnsEMBL::DBSQL::SliceAdaptor' );
|
|
183 }
|
|
184
|
|
185 my $seq1 = { 'coord_system' => $coord_system,
|
|
186 'seq' => $seq,
|
|
187 'seq_region_name' => $seq_region_name,
|
|
188 'seq_region_length' => $seq_region_length,
|
|
189 'start' => int($start),
|
|
190 'end' => int($end),
|
|
191 'strand' => $strand };
|
|
192
|
|
193 bless $seq1, $class;
|
|
194 $seq1->adaptor($adaptor);
|
|
195 return $seq1;
|
|
196 } ## end sub new
|
|
197
|
|
198 =head2 new_fast
|
|
199
|
|
200 Arg [1] : hashref to be blessed
|
|
201 Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
|
|
202 Exceptions : none
|
|
203 Returntype : Bio::EnsEMBL::CircularSlice
|
|
204 Caller : general
|
|
205 Status : Stable
|
|
206
|
|
207 =cut
|
|
208
|
|
209 sub new_fast {
|
|
210 my $class = shift;
|
|
211 my $hashref = shift;
|
|
212 my $self = bless $hashref, $class;
|
|
213 weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
|
|
214 return $self;
|
|
215 }
|
|
216
|
|
217 =head2 centrepoint
|
|
218
|
|
219 Arg [1] : none
|
|
220 Example : $cp = $slice->centrepoint();
|
|
221 Description: Returns the mid position of this slice relative to the
|
|
222 start of the sequence region that it was created on.
|
|
223 Coordinates are inclusive and start at 1.
|
|
224 Returntype : int
|
|
225 Exceptions : none
|
|
226 Caller : general
|
|
227 Status : Stable
|
|
228
|
|
229 =cut
|
|
230
|
|
231 sub centrepoint {
|
|
232 my $self = shift;
|
|
233
|
|
234 my ( $s, $e, $length ) =
|
|
235 ( $self->{'start'}, $self->{'end'}, $self->{'seq_region_length'} );
|
|
236
|
|
237 if ( $s < $e ) {
|
|
238 return ( $s + $e )/2;
|
|
239 }
|
|
240
|
|
241 my $r1 = $length - $s;
|
|
242 my $r2 = $e;
|
|
243 my $r = ( $r1 + $r2 )/2;
|
|
244 my $m = $s + $r;
|
|
245
|
|
246 if ( $m > $length ) {
|
|
247 $m = $m - $length;
|
|
248 }
|
|
249
|
|
250 return $m;
|
|
251 }
|
|
252
|
|
253 =head2 length
|
|
254
|
|
255 Arg [1] : none
|
|
256 Example : $length = $slice->length();
|
|
257 Description: Returns the length of this slice in basepairs
|
|
258 Returntype : int
|
|
259 Exceptions : none
|
|
260 Caller : general
|
|
261 Status : Stable
|
|
262
|
|
263 =cut
|
|
264
|
|
265 sub length {
|
|
266 my ($self) = @_;
|
|
267
|
|
268 if ( $self->{'start'} < $self->{'end'} ) {
|
|
269 return $self->{'end'} - $self->{'start'} + 1;
|
|
270 }
|
|
271
|
|
272 my $r1 = $self->{'seq_region_length'} - $self->{'start'};
|
|
273 my $r2 = $self->{'end'};
|
|
274 my $ln = $r1 + $r2 + 1;
|
|
275
|
|
276 return $ln;
|
|
277 }
|
|
278
|
|
279 =head2 invert
|
|
280
|
|
281 Arg [1] : none
|
|
282 Example : $inverted_slice = $slice->invert;
|
|
283 Description: Creates a copy of this slice on the opposite strand and
|
|
284 returns it.
|
|
285 Returntype : Bio::EnsEMBL::Slice
|
|
286 Exceptions : none
|
|
287 Caller : general
|
|
288 Status : Stable
|
|
289
|
|
290 =cut
|
|
291
|
|
292 sub invert {
|
|
293 my $self = shift;
|
|
294
|
|
295 # make a shallow copy of the slice via a hash copy and flip the strand
|
|
296 my %s = %$self;
|
|
297 $s{'strand'} = $self->{'strand'}*-1;
|
|
298
|
|
299 # reverse compliment any attached sequence
|
|
300 reverse_comp( \$s{'seq'} ) if ( $s{'seq'} );
|
|
301
|
|
302 # bless and return the copy
|
|
303 return bless \%s, ref $self;
|
|
304 }
|
|
305
|
|
306 =head2 seq
|
|
307
|
|
308 Arg [1] : none
|
|
309 Example : print "SEQUENCE = ", $slice->seq();
|
|
310 Description: Returns the sequence of the region represented by this
|
|
311 slice formatted as a string.
|
|
312 Returntype : string
|
|
313 Exceptions : none
|
|
314 Caller : general
|
|
315 Status : Stable
|
|
316
|
|
317 =cut
|
|
318
|
|
319 sub seq {
|
|
320 my $self = shift;
|
|
321
|
|
322 # special case for in-between (insert) coordinates
|
|
323 return '' if ( $self->start() == $self->end() + 1 );
|
|
324 return $self->{'seq'} if ( $self->{'seq'} );
|
|
325
|
|
326 if ( $self->adaptor() ) {
|
|
327
|
|
328 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
|
|
329 if ( $self->{'start'} > $self->{'end'} ) {
|
|
330 my $length = $self->{'seq_region_length'};
|
|
331
|
|
332 my $sl1 =
|
|
333 Bio::EnsEMBL::Slice->new(
|
|
334 -COORD_SYSTEM => $self->{'coord_system'},
|
|
335 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
336 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
337 -START => $self->{'start'},
|
|
338 -END => $self->{'seq_region_length'},
|
|
339 -STRAND => $self->{'strand'},
|
|
340 -ADAPTOR => $self->adaptor() );
|
|
341
|
|
342 my $sl2 =
|
|
343 Bio::EnsEMBL::Slice->new(
|
|
344 -COORD_SYSTEM => $self->{'coord_system'},
|
|
345 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
346 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
347 -START => 1,
|
|
348 -END => $self->{'end'},
|
|
349 -STRAND => $self->{'strand'},
|
|
350 -ADAPTOR => $self->adaptor() );
|
|
351
|
|
352 my $seq1 = ${
|
|
353 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl1, 1, undef,
|
|
354 1 ) };
|
|
355 my $seq2 = ${
|
|
356 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl2, 1, undef,
|
|
357 1 ) };
|
|
358 return $seq1 . $seq2;
|
|
359
|
|
360 } else {
|
|
361 my $seq1 = ${
|
|
362 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, undef,
|
|
363 1 ) };
|
|
364 return $seq1;
|
|
365 }
|
|
366 } ## end if ( $self->adaptor() )
|
|
367
|
|
368 # no attached sequence, and no db, so just return Ns
|
|
369 return 'N' x $self->length();
|
|
370 } ## end sub seq
|
|
371
|
|
372 =head2 subseq
|
|
373
|
|
374 Arg [1] : int $startBasePair
|
|
375 relative to start of slice, which is 1.
|
|
376 Arg [2] : int $endBasePair
|
|
377 relative to start of slice.
|
|
378 Arg [3] : (optional) int $strand
|
|
379 The strand of the slice to obtain sequence from. Default
|
|
380 value is 1.
|
|
381 Description: returns string of dna sequence
|
|
382 Returntype : txt
|
|
383 Exceptions : end should be at least as big as start
|
|
384 strand must be set
|
|
385 Caller : general
|
|
386 Status : Stable
|
|
387
|
|
388 =cut
|
|
389
|
|
390 sub subseq {
|
|
391 my ( $self, $start, $end, $strand ) = @_;
|
|
392
|
|
393 # handle 'between' case for insertions
|
|
394 return '' if ( $start == $end + 1 );
|
|
395
|
|
396 $strand = 1 unless ( defined $strand );
|
|
397
|
|
398 if ( $strand != -1 && $strand != 1 ) {
|
|
399 throw("Invalid strand [$strand] in call to Slice::subseq.");
|
|
400 }
|
|
401 my $subseq;
|
|
402 my $length = $self->{'seq_region_length'};
|
|
403
|
|
404 if ( $self->adaptor ) {
|
|
405
|
|
406 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
|
|
407 if ( $end < $start ) {
|
|
408 my $subseq1 = ${
|
|
409 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
|
|
410 $length, $strand )
|
|
411 };
|
|
412 my $subseq2 = ${
|
|
413 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, $end,
|
|
414 $strand ) };
|
|
415 $subseq = $subseq1 . $subseq2;
|
|
416
|
|
417 } else {
|
|
418 $subseq = ${
|
|
419 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
|
|
420 $end, $strand ) };
|
|
421 }
|
|
422 } else {
|
|
423 ## check for gap at the beginning and pad it with Ns
|
|
424 if ( $start < 1 ) {
|
|
425 $subseq = "N" x ( 1 - $start );
|
|
426 $start = 1;
|
|
427 }
|
|
428 $subseq .= substr( $self->seq(), $start - 1, $end - $start + 1 );
|
|
429 ## check for gap at the end and pad it with Ns
|
|
430 if ( $end > $self->length() ) {
|
|
431 $subseq .= "N" x ( $end - $self->length() );
|
|
432 }
|
|
433 reverse_comp( \$subseq ) if ( $strand == -1 );
|
|
434 }
|
|
435 return $subseq;
|
|
436 } ## end sub subseq
|
|
437
|
|
438 =head2 project
|
|
439
|
|
440 Arg [1] : string $name
|
|
441 The name of the coordinate system to project this slice onto
|
|
442 Arg [2] : string $version
|
|
443 The version of the coordinate system (such as 'NCBI34') to
|
|
444 project this slice onto
|
|
445 Example :
|
|
446 my $clone_projection = $slice->project('clone');
|
|
447
|
|
448 foreach my $seg (@$clone_projection) {
|
|
449 my $clone = $segment->to_Slice();
|
|
450 print $slice->seq_region_name(), ':', $seg->from_start(), '-',
|
|
451 $seg->from_end(), ' -> ',
|
|
452 $clone->seq_region_name(), ':', $clone->start(), '-',
|
|
453 $clone->end(),
|
|
454 $clone->strand(), "\n";
|
|
455 }
|
|
456 Description: Returns the results of 'projecting' this slice onto another
|
|
457 coordinate system. Projecting to a coordinate system that
|
|
458 the slice is assembled from is analagous to retrieving a tiling
|
|
459 path. This method may also be used to 'project up' to a higher
|
|
460 level coordinate system, however.
|
|
461
|
|
462 This method returns a listref of triplets [start,end,slice]
|
|
463 which represents the projection. The start and end defined the
|
|
464 region of this slice which is made up of the third value of
|
|
465 the triplet: a slice in the requested coordinate system.
|
|
466 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
|
|
467 can also be used as [$start,$end,$slice] triplets
|
|
468 Exceptions : none
|
|
469 Caller : general
|
|
470 Status : Stable
|
|
471
|
|
472 =cut
|
|
473
|
|
474 sub project {
|
|
475 my $self = shift;
|
|
476 my $cs_name = shift;
|
|
477 my $cs_version = shift;
|
|
478
|
|
479 throw('Coord_system name argument is required') if ( !$cs_name );
|
|
480
|
|
481 my $slice_adaptor = $self->adaptor();
|
|
482
|
|
483 if ( !$slice_adaptor ) {
|
|
484 warning("Cannot project without attached adaptor.");
|
|
485 return [];
|
|
486 }
|
|
487
|
|
488 if ( !$self->coord_system() ) {
|
|
489 warning("Cannot project without attached coord system.");
|
|
490 return [];
|
|
491 }
|
|
492
|
|
493 my $db = $slice_adaptor->db();
|
|
494 my $csa = $db->get_CoordSystemAdaptor();
|
|
495 my $cs = $csa->fetch_by_name( $cs_name, $cs_version );
|
|
496
|
|
497 my $sl01 =
|
|
498 Bio::EnsEMBL::CircularSlice->new(
|
|
499 -COORD_SYSTEM => $self->{'coord_system'},
|
|
500 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
501 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
502 -START => $self->{'start'},
|
|
503 -END => $self->{'seq_region_length'},
|
|
504 -STRAND => $self->{'strand'},
|
|
505 -ADAPTOR => $self->adaptor() );
|
|
506
|
|
507 my $sl02 =
|
|
508 Bio::EnsEMBL::CircularSlice->new(
|
|
509 -COORD_SYSTEM => $self->{'coord_system'},
|
|
510 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
511 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
512 -START => 1,
|
|
513 -END => $self->{'end'},
|
|
514 -STRAND => $self->{'strand'},
|
|
515 -ADAPTOR => $self->adaptor() );
|
|
516
|
|
517 my @projection;
|
|
518 my $current_start = 1;
|
|
519
|
|
520 foreach my $sl2 ( $sl01, $sl02 ) {
|
|
521 my $slice_cs = $sl2->coord_system();
|
|
522
|
|
523 if ( !$cs ) {
|
|
524 throw( "Cannot project to unknown coordinate system "
|
|
525 . "[$cs_name $cs_version]" );
|
|
526 }
|
|
527
|
|
528 # no mapping is needed if the requested coord system is the one we are in
|
|
529 # but we do need to check if some of the slice is outside of defined regions
|
|
530 if ( $slice_cs->equals($cs) ) {
|
|
531 return $self->_constrain_to_region();
|
|
532 }
|
|
533
|
|
534 # decompose this slice into its symlinked components.
|
|
535 # this allows us to handle haplotypes and PARs
|
|
536 my $normal_slice_proj =
|
|
537 $slice_adaptor->fetch_normalized_slice_projection($sl2);
|
|
538
|
|
539 foreach my $segment (@$normal_slice_proj) {
|
|
540 my $normal_slice = $segment->[2];
|
|
541
|
|
542 $slice_cs = $normal_slice->coord_system();
|
|
543
|
|
544 my $asma = $db->get_AssemblyMapperAdaptor();
|
|
545 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
|
|
546
|
|
547 # perform the mapping between this slice and the requested system
|
|
548 my @coords;
|
|
549
|
|
550 if ( defined $asm_mapper ) {
|
|
551 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
|
|
552 $normal_slice->start(),
|
|
553 $normal_slice->end(),
|
|
554 $normal_slice->strand(),
|
|
555 $slice_cs );
|
|
556
|
|
557 } else {
|
|
558 $coords[0] =
|
|
559 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
|
|
560 $normal_slice->end() );
|
|
561 }
|
|
562
|
|
563 #construct a projection from the mapping results and return it
|
|
564 foreach my $coord (@coords) {
|
|
565 my $coord_start = $coord->start();
|
|
566 my $coord_end = $coord->end();
|
|
567 my $length = $coord_end - $coord_start + 1;
|
|
568
|
|
569 #skip gaps
|
|
570 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) {
|
|
571 my $coord_cs = $coord->coord_system();
|
|
572
|
|
573 # If the normalised projection just ended up mapping to the
|
|
574 # same coordinate system we were already in then we should just
|
|
575 # return the original region. This can happen for example, if we
|
|
576 # were on a PAR region on Y which refered to X and a projection to
|
|
577 # 'toplevel' was requested.
|
|
578
|
|
579 if ( $coord_cs->equals($slice_cs) ) {
|
|
580 # trim off regions which are not defined
|
|
581 return $self->_constrain_to_region();
|
|
582 }
|
|
583 #create slices for the mapped-to coord system
|
|
584
|
|
585 my $slice =
|
|
586 $slice_adaptor->fetch_by_seq_region_id(
|
|
587 $coord->id(), $coord_start,
|
|
588 $coord_end, $coord->strand()
|
|
589 );
|
|
590
|
|
591 my $current_end = $current_start + $length - 1;
|
|
592 push @projection,
|
|
593 bless( [ $current_start, $current_end, $slice ],
|
|
594 "Bio::EnsEMBL::ProjectionSegment" );
|
|
595 } ## end if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate'...))
|
|
596
|
|
597 $current_start += $length;
|
|
598 } ## end foreach my $coord (@coords)
|
|
599 } ## end foreach my $segment (@$normal_slice_proj)
|
|
600 } #foreach
|
|
601
|
|
602 return \@projection;
|
|
603 } ## end sub project
|
|
604
|
|
605 sub project_org {
|
|
606 my $self = shift;
|
|
607 my $cs_name = shift;
|
|
608 my $cs_version = shift;
|
|
609
|
|
610 throw('Coord_system name argument is required') if ( !$cs_name );
|
|
611
|
|
612 my $slice_adaptor = $self->adaptor();
|
|
613
|
|
614 if ( !$slice_adaptor ) {
|
|
615 warning("Cannot project without attached adaptor.");
|
|
616 return [];
|
|
617 }
|
|
618
|
|
619 if ( !$self->coord_system() ) {
|
|
620 warning("Cannot project without attached coord system.");
|
|
621 return [];
|
|
622 }
|
|
623
|
|
624 my $db = $slice_adaptor->db();
|
|
625 my $csa = $db->get_CoordSystemAdaptor();
|
|
626 my $cs = $csa->fetch_by_name( $cs_name, $cs_version );
|
|
627 my $slice_cs = $self->coord_system();
|
|
628
|
|
629 if ( !$cs ) {
|
|
630 throw( "Cannot project to unknown coordinate system "
|
|
631 . "[$cs_name $cs_version]" );
|
|
632 }
|
|
633
|
|
634 # No mapping is needed if the requested coord system is the one we
|
|
635 # are in. But we do need to check if some of the slice is outside of
|
|
636 # defined regions.
|
|
637 if ( $slice_cs->equals($cs) ) {
|
|
638 return $self->_constrain_to_region();
|
|
639 }
|
|
640
|
|
641 my @projection;
|
|
642 my $current_start = 1;
|
|
643
|
|
644 # Decompose this slice into its symlinked components. This allows us
|
|
645 # to handle haplotypes and PARs.
|
|
646 my $normal_slice_proj =
|
|
647 $slice_adaptor->fetch_normalized_slice_projection($self);
|
|
648 foreach my $segment (@$normal_slice_proj) {
|
|
649 my $normal_slice = $segment->[2];
|
|
650
|
|
651 $slice_cs = $normal_slice->coord_system();
|
|
652
|
|
653 my $asma = $db->get_AssemblyMapperAdaptor();
|
|
654 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
|
|
655
|
|
656 # perform the mapping between this slice and the requested system
|
|
657 my @coords;
|
|
658
|
|
659 if ( defined $asm_mapper ) {
|
|
660 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
|
|
661 $normal_slice->start(),
|
|
662 $normal_slice->end(),
|
|
663 $normal_slice->strand(),
|
|
664 $slice_cs );
|
|
665
|
|
666 } else {
|
|
667 $coords[0] =
|
|
668 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
|
|
669 $normal_slice->end() );
|
|
670 }
|
|
671
|
|
672 #construct a projection from the mapping results and return it
|
|
673 foreach my $coord (@coords) {
|
|
674 my $coord_start = $coord->start();
|
|
675 my $coord_end = $coord->end();
|
|
676 my $length = $coord_end - $coord_start + 1;
|
|
677
|
|
678 #skip gaps
|
|
679 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) {
|
|
680 my $coord_cs = $coord->coord_system();
|
|
681
|
|
682 # If the normalised projection just ended up mapping to the
|
|
683 # same coordinate system we were already in then we should just
|
|
684 # return the original region. This can happen for example,
|
|
685 # if we were on a PAR region on Y which refered to X and a
|
|
686 # projection to 'toplevel' was requested.
|
|
687
|
|
688 if ( $coord_cs->equals($slice_cs) ) {
|
|
689 # trim off regions which are not defined
|
|
690 return $self->_constrain_to_region();
|
|
691 }
|
|
692 #create slices for the mapped-to coord system
|
|
693
|
|
694 my $slice =
|
|
695 $slice_adaptor->fetch_by_seq_region_id( $coord->id(),
|
|
696 $coord_start, $coord_end, $coord->strand() );
|
|
697
|
|
698 my $current_end = $current_start + $length - 1;
|
|
699
|
|
700 push @projection,
|
|
701 bless( [ $current_start, $current_end, $slice ],
|
|
702 "Bio::EnsEMBL::ProjectionSegment" );
|
|
703 } ## end if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate'...))
|
|
704
|
|
705 $current_start += $length;
|
|
706 } ## end foreach my $coord (@coords)
|
|
707 } ## end foreach my $segment (@$normal_slice_proj)
|
|
708
|
|
709 return \@projection;
|
|
710 } ## end sub project_org
|
|
711
|
|
712 sub _constrain_to_region {
|
|
713 my $self = shift;
|
|
714
|
|
715 my $entire_len = $self->seq_region_length();
|
|
716
|
|
717 # If the slice has negative coordinates or coordinates exceeding the
|
|
718 # exceeding length of the sequence region we want to shrink the slice
|
|
719 # to the defined region.
|
|
720
|
|
721 if ( $self->{'start'} > $entire_len || $self->{'end'} < 1 ) {
|
|
722 #none of this slice is in a defined region
|
|
723 return [];
|
|
724 }
|
|
725
|
|
726 my $right_contract = 0;
|
|
727 my $left_contract = 0;
|
|
728 if ( $self->{'end'} > $entire_len ) {
|
|
729 $right_contract = $entire_len - $self->{'end'};
|
|
730 }
|
|
731 if ( $self->{'start'} < 1 ) {
|
|
732 $left_contract = $self->{'start'} - 1;
|
|
733 }
|
|
734
|
|
735 my $new_slice;
|
|
736 if ( $left_contract || $right_contract ) {
|
|
737 $new_slice = $self->expand( $left_contract, $right_contract );
|
|
738 } else {
|
|
739 $new_slice = $self;
|
|
740 }
|
|
741
|
|
742 return [ bless [ 1 - $left_contract,
|
|
743 $self->length() + $right_contract,
|
|
744 $new_slice ],
|
|
745 "Bio::EnsEMBL::ProjectionSegment" ];
|
|
746 } ## end sub _constrain_to_region
|
|
747
|
|
748 =head2 expand
|
|
749
|
|
750 Arg [1] : (optional) int $five_prime_expand
|
|
751 The number of basepairs to shift this slices five_prime
|
|
752 coordinate by. Positive values make the slice larger,
|
|
753 negative make the slice smaller.
|
|
754 coordinate left.
|
|
755 Default = 0.
|
|
756 Arg [2] : (optional) int $three_prime_expand
|
|
757 The number of basepairs to shift this slices three_prime
|
|
758 coordinate by. Positive values make the slice larger,
|
|
759 negative make the slice smaller.
|
|
760 Default = 0.
|
|
761 Arg [3] : (optional) bool $force_expand
|
|
762 if set to 1, then the slice will be contracted even in the case
|
|
763 when shifts $five_prime_expand and $three_prime_expand overlap.
|
|
764 In that case $five_prime_expand and $three_prime_expand will be set
|
|
765 to a maximum possible number and that will result in the slice
|
|
766 which would have only 2pbs.
|
|
767 Default = 0.
|
|
768 Arg [4] : (optional) int* $fpref
|
|
769 The reference to a number of basepairs to shift this slices five_prime
|
|
770 coordinate by. Normally it would be set to $five_prime_expand.
|
|
771 But in case when $five_prime_expand shift can not be applied and
|
|
772 $force_expand is set to 1, then $$fpref will contain the maximum possible
|
|
773 shift
|
|
774 Arg [5] : (optional) int* $tpref
|
|
775 The reference to a number of basepairs to shift this slices three_prime
|
|
776 coordinate by. Normally it would be set to $three_prime_expand.
|
|
777 But in case when $five_prime_expand shift can not be applied and
|
|
778 $force_expand is set to 1, then $$tpref will contain the maximum possible
|
|
779 shift
|
|
780 Example : my $expanded_slice = $slice->expand( 1000, 1000);
|
|
781 my $contracted_slice = $slice->expand(-1000,-1000);
|
|
782 my $shifted_right_slice = $slice->expand(-1000, 1000);
|
|
783 my $shifted_left_slice = $slice->expand( 1000,-1000);
|
|
784 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
|
|
785
|
|
786 Description: Returns a slice which is a resized copy of this slice. The
|
|
787 start and end are moved outwards from the center of the slice
|
|
788 if positive values are provided and moved inwards if negative
|
|
789 values are provided. This slice remains unchanged. A slice
|
|
790 may not be contracted below 1bp but may grow to be arbitrarily
|
|
791 large.
|
|
792 Returntype : Bio::EnsEMBL::Slice
|
|
793 Exceptions : warning if an attempt is made to contract the slice below 1bp
|
|
794 Caller : general
|
|
795 Status : Stable
|
|
796
|
|
797 =cut
|
|
798
|
|
799 sub expand {
|
|
800 my $self = shift;
|
|
801 my $five_prime_shift = shift || 0;
|
|
802 my $three_prime_shift = shift || 0;
|
|
803 my $force_expand = shift || 0;
|
|
804 my $fpref = shift;
|
|
805 my $tpref = shift;
|
|
806
|
|
807 if ( $self->{'seq'} ) {
|
|
808 warning(
|
|
809 "Cannot expand a slice which has a manually attached sequence ");
|
|
810 return undef;
|
|
811 }
|
|
812
|
|
813 my $new_start;
|
|
814 my $new_end;
|
|
815 my $sshift = $five_prime_shift;
|
|
816 my $eshift = $three_prime_shift;
|
|
817
|
|
818 if ( $self->{'strand'} != 1 ) {
|
|
819 $eshift = $five_prime_shift;
|
|
820 $sshift = $three_prime_shift;
|
|
821 }
|
|
822
|
|
823 $new_start = $self->{'start'} - $sshift;
|
|
824 $new_end = $self->{'end'} + $eshift;
|
|
825
|
|
826 # if($new_start > $new_end) {
|
|
827 # if ($force_expand) { # Apply max possible shift, if force_expand is set
|
|
828 # if ($sshift < 0) { # if we are contracting the slice from the start - move the start just before the end
|
|
829 # $new_start = $new_end - 1;
|
|
830 # $sshift = $self->{start} - $new_start;
|
|
831 # }
|
|
832
|
|
833 # if($new_start > $new_end) { # if the slice still has a negative length - try to move the end
|
|
834 # if ($eshift < 0) {
|
|
835 # $new_end = $new_start + 1;
|
|
836 # $eshift = $new_end - $self->{end};
|
|
837 # }
|
|
838 # }
|
|
839 # return the values by which the primes were actually shifted
|
|
840 # $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
|
|
841 # $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
|
|
842 # }
|
|
843 # if($new_start > $new_end) {
|
|
844 # throw('Slice start cannot be greater than slice end');
|
|
845 # }
|
|
846 # }
|
|
847
|
|
848 #fastest way to copy a slice is to do a shallow hash copy
|
|
849 my %new_slice = %$self;
|
|
850 $new_slice{'start'} = int($new_start);
|
|
851 $new_slice{'end'} = int($new_end);
|
|
852
|
|
853 return bless \%new_slice, ref($self);
|
|
854 } ## end sub expand
|
|
855
|
|
856 =head2 sub_Slice_same
|
|
857
|
|
858 Arg 1 : int $start
|
|
859 Arg 2 : int $end
|
|
860 Arge [3] : int $strand
|
|
861 Description: Makes another Slice that covers only part of this slice
|
|
862 If a slice is requested which lies outside of the boundaries
|
|
863 of this function will return undef. This means that
|
|
864 behaviour will be consistant whether or not the slice is
|
|
865 attached to the database (i.e. if there is attached sequence
|
|
866 to the slice). Alternatively the expand() method or the
|
|
867 SliceAdaptor::fetch_by_region method can be used instead.
|
|
868 Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
|
|
869 Exceptions : none
|
|
870 Caller : general
|
|
871 Status : Stable
|
|
872
|
|
873 =cut
|
|
874
|
|
875 sub sub_Slice_same {
|
|
876 my ( $self, $start, $end, $strand ) = @_;
|
|
877
|
|
878 if ( $start < 1 || $start > $self->{'end'} ) {
|
|
879 # throw( "start argument not valid" );
|
|
880 return undef;
|
|
881 }
|
|
882
|
|
883 if ( $end < $start || $end > $self->{'end'} ) {
|
|
884 # throw( "end argument not valid" )
|
|
885 return undef;
|
|
886 }
|
|
887
|
|
888 my ( $new_start, $new_end, $new_strand, $new_seq );
|
|
889 if ( !defined $strand ) {
|
|
890 $strand = 1;
|
|
891 }
|
|
892
|
|
893 if ( $self->{'strand'} == 1 ) {
|
|
894 $new_start = $self->{'start'} + $start - 1;
|
|
895 $new_end = $self->{'start'} + $end - 1;
|
|
896 $new_strand = $strand;
|
|
897 } else {
|
|
898 $new_start = $self->{'end'} - $end + 1;
|
|
899 $new_end = $self->{'end'} - $start + 1;
|
|
900 $new_strand = -$strand;
|
|
901 }
|
|
902
|
|
903 if ( defined $self->{'seq'} ) {
|
|
904 $new_seq = $self->subseq( $start, $end, $strand );
|
|
905 }
|
|
906
|
|
907 #fastest way to copy a slice is to do a shallow hash copy
|
|
908 my %new_slice = %$self;
|
|
909 $new_slice{'start'} = int($new_start);
|
|
910 $new_slice{'end'} = int($new_end);
|
|
911 $new_slice{'strand'} = $new_strand;
|
|
912 if ($new_seq) {
|
|
913 $new_slice{'seq'} = $new_seq;
|
|
914 }
|
|
915
|
|
916 return bless \%new_slice, ref($self);
|
|
917 } ## end sub sub_Slice_same
|
|
918
|
|
919 =head2 seq_region_Slice_same
|
|
920
|
|
921 Arg [1] : none
|
|
922 Example : $slice = $slice->seq_region_Slice();
|
|
923 Description: Returns a slice which spans the whole seq_region which this slice
|
|
924 is on. For example if this is a slice which spans a small region
|
|
925 of chromosome X, this method will return a slice which covers the
|
|
926 entire chromosome X. The returned slice will always have strand
|
|
927 of 1 and start of 1. This method cannot be used if the sequence
|
|
928 of the slice has been set manually.
|
|
929 Returntype : Bio::EnsEMBL::Slice
|
|
930 Exceptions : warning if called when sequence of Slice has been set manually.
|
|
931 Caller : general
|
|
932 Status : Stable
|
|
933
|
|
934 =cut
|
|
935
|
|
936 sub seq_region_Slice_same {
|
|
937 my $self = shift;
|
|
938
|
|
939 if ( $self->{'seq'} ) {
|
|
940 warning(
|
|
941 "Cannot get a seq_region_Slice of a slice which has manually "
|
|
942 . "attached sequence " );
|
|
943 return undef;
|
|
944 }
|
|
945
|
|
946 # quick shallow copy
|
|
947 my $slice;
|
|
948 %{$slice} = %{$self};
|
|
949 bless $slice, ref($self);
|
|
950
|
|
951 $slice->{'start'} = 1;
|
|
952 $slice->{'end'} = $slice->{'seq_region_length'};
|
|
953 $slice->{'strand'} = 1;
|
|
954
|
|
955 return $slice;
|
|
956 }
|
|
957
|
|
958 =head2 get_seq_region_id_same
|
|
959
|
|
960 Arg [1] : none
|
|
961 Example : my $seq_region_id = $slice->get_seq_region_id();
|
|
962 Description: Gets the internal identifier of the seq_region that this slice
|
|
963 is on. Note that this function will not work correctly if this
|
|
964 slice does not have an attached adaptor. Also note that it may
|
|
965 be better to go through the SliceAdaptor::get_seq_region_id
|
|
966 method if you are working with multiple databases since is
|
|
967 possible to work with slices from databases with different
|
|
968 internal seq_region identifiers.
|
|
969 Returntype : int or undef if slices does not have attached adaptor
|
|
970 Exceptions : warning if slice is not associated with a SliceAdaptor
|
|
971 Caller : assembly loading scripts, general
|
|
972 Status : Stable
|
|
973
|
|
974 =cut
|
|
975
|
|
976 sub get_seq_region_id_same {
|
|
977 my ($self) = @_;
|
|
978
|
|
979 if ( $self->adaptor ) {
|
|
980 return $self->adaptor->get_seq_region_id($self);
|
|
981 } else {
|
|
982 warning('Cannot retrieve seq_region_id without attached adaptor.');
|
|
983 return undef;
|
|
984 }
|
|
985 }
|
|
986
|
|
987 =head2 get_all_Attributes
|
|
988
|
|
989 Arg [1] : optional string $attrib_code
|
|
990 The code of the attribute type to retrieve values for.
|
|
991 Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
|
|
992 @slice_attributes = @{$slice->get_all_Attributes()};
|
|
993 Description: Gets a list of Attributes of this slice''s seq_region.
|
|
994 Optionally just get Attrubutes for given code.
|
|
995 Returntype : listref Bio::EnsEMBL::Attribute
|
|
996 Exceptions : warning if slice does not have attached adaptor
|
|
997 Caller : general
|
|
998 Status : Stable
|
|
999
|
|
1000 =cut
|
|
1001
|
|
1002 sub get_all_Attributes {
|
|
1003 my $self = shift;
|
|
1004 my $attrib_code = shift;
|
|
1005
|
|
1006 my $result;
|
|
1007 my @results;
|
|
1008
|
|
1009 if ( !$self->adaptor() ) {
|
|
1010 warning('Cannot get attributes without an adaptor.');
|
|
1011 return [];
|
|
1012 }
|
|
1013
|
|
1014 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
|
|
1015
|
|
1016 ## circular BOF
|
|
1017 my $sl1 =
|
|
1018 Bio::EnsEMBL::CircularSlice->new(
|
|
1019 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1020 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1021 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1022 -START => $self->{'start'},
|
|
1023 -END => $self->{'seq_region_length'},
|
|
1024 -STRAND => $self->{'strand'},
|
|
1025 -ADAPTOR => $self->adaptor() );
|
|
1026
|
|
1027 my $sl2 =
|
|
1028 Bio::EnsEMBL::CircularSlice->new(
|
|
1029 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1030 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1031 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1032 -START => 1,
|
|
1033 -END => $self->{'end'},
|
|
1034 -STRAND => $self->{'strand'},
|
|
1035 -ADAPTOR => $self->adaptor() );
|
|
1036 my $pta_ref = [];
|
|
1037
|
|
1038 if ( defined $attrib_code ) {
|
|
1039 my @res1 =
|
|
1040 grep { uc( $_->code() ) eq uc($attrib_code) }
|
|
1041 @{ $attribute_adaptor->fetch_all_by_Slice($sl1) };
|
|
1042 my @res2 =
|
|
1043 grep { uc( $_->code() ) eq uc($attrib_code) }
|
|
1044 @{ $attribute_adaptor->fetch_all_by_Slice($sl2) };
|
|
1045 my @res;
|
|
1046 push @res, @res1, @res2;
|
|
1047 $result = \@res;
|
|
1048 } else {
|
|
1049 my @res1 = @{ $attribute_adaptor->fetch_all_by_Slice($sl1) };
|
|
1050 my @res2 = @{ $attribute_adaptor->fetch_all_by_Slice($sl2) };
|
|
1051 my @res;
|
|
1052 push @res, @res1, @res2;
|
|
1053 $result = \@res;
|
|
1054 }
|
|
1055
|
|
1056 ## circular EOF
|
|
1057
|
|
1058 # if( defined $attrib_code ) {
|
|
1059 # @results = grep { uc($_->code()) eq uc($attrib_code) }
|
|
1060 # @{$attribute_adaptor->fetch_all_by_Slice( $self )};
|
|
1061 # $result = \@results;
|
|
1062 # } else {
|
|
1063 # $result = $attribute_adaptor->fetch_all_by_Slice( $self );
|
|
1064 # }
|
|
1065
|
|
1066 return $result;
|
|
1067 } ## end sub get_all_Attributes
|
|
1068
|
|
1069 =head2 get_all_PredictionTranscripts
|
|
1070
|
|
1071 Arg [1] : (optional) string $logic_name
|
|
1072 The name of the analysis used to generate the prediction
|
|
1073 transcripts obtained.
|
|
1074 Arg [2] : (optional) boolean $load_exons
|
|
1075 If set to true will force loading of all PredictionExons
|
|
1076 immediately rather than loading them on demand later. This
|
|
1077 is faster if there are a large number of PredictionTranscripts
|
|
1078 and the exons will be used.
|
|
1079 Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
|
|
1080 Description: Retrieves the list of prediction transcripts which overlap
|
|
1081 this slice with logic_name $logic_name. If logic_name is
|
|
1082 not defined then all prediction transcripts are retrieved.
|
|
1083 Returntype : listref of Bio::EnsEMBL::PredictionTranscript
|
|
1084 Exceptions : warning if slice does not have attached adaptor
|
|
1085 Caller : none
|
|
1086 Status : Stable
|
|
1087
|
|
1088 =cut
|
|
1089
|
|
1090 sub get_all_PredictionTranscripts {
|
|
1091 my ( $self, $logic_name, $load_exons ) = @_;
|
|
1092
|
|
1093 if ( !$self->adaptor() ) {
|
|
1094 warning(
|
|
1095 'Cannot get PredictionTranscripts without attached adaptor');
|
|
1096 return [];
|
|
1097 }
|
|
1098 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
|
|
1099
|
|
1100 ## circular BOF
|
|
1101 my $sl1 =
|
|
1102 Bio::EnsEMBL::CircularSlice->new(
|
|
1103 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1104 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1105 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1106 -START => $self->{'start'},
|
|
1107 -END => $self->{'seq_region_length'},
|
|
1108 -STRAND => $self->{'strand'},
|
|
1109 -ADAPTOR => $self->adaptor() );
|
|
1110
|
|
1111 my $sl2 =
|
|
1112 Bio::EnsEMBL::CircularSlice->new(
|
|
1113 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1114 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1115 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1116 -START => 1,
|
|
1117 -END => $self->{'end'},
|
|
1118 -STRAND => $self->{'strand'},
|
|
1119 -ADAPTOR => $self->adaptor() );
|
|
1120
|
|
1121 my ( @arr, @arr1, @arr2 );
|
|
1122 @arr1 =
|
|
1123 @{ $pta->fetch_all_by_Slice( $sl1, $logic_name, $load_exons ) };
|
|
1124 @arr2 =
|
|
1125 @{ $pta->fetch_all_by_Slice( $sl2, $logic_name, $load_exons ) };
|
|
1126 push @arr, @arr1, @arr2;
|
|
1127 return \@arr;
|
|
1128 ## circular EOF
|
|
1129
|
|
1130 #return $pta->fetch_all_by_Slice($sl1, $logic_name, $load_exons);
|
|
1131 } ## end sub get_all_PredictionTranscripts
|
|
1132
|
|
1133 =head2 get_all_DnaAlignFeatures
|
|
1134
|
|
1135 Arg [1] : (optional) string $logic_name
|
|
1136 The name of the analysis performed on the dna align features
|
|
1137 to obtain.
|
|
1138 Arg [2] : (optional) float $score
|
|
1139 The mimimum score of the features to retrieve
|
|
1140 Arg [3] : (optional) string $dbtype
|
|
1141 The name of an attached database to retrieve the features from
|
|
1142 instead, e.g. 'otherfeatures'.
|
|
1143 Arg [4] : (optional) float hcoverage
|
|
1144 The minimum hcoverage od the featurs to retrieve
|
|
1145 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
|
|
1146 Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with
|
|
1147 logic name $logic_name and with score above $score. If
|
|
1148 $logic_name is not defined features of all logic names are
|
|
1149 retrieved. If $score is not defined features of all scores are
|
|
1150 retrieved.
|
|
1151 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
|
|
1152 Exceptions : warning if slice does not have attached adaptor
|
|
1153 Caller : general
|
|
1154 Status : Stable
|
|
1155
|
|
1156 =cut
|
|
1157
|
|
1158 sub get_all_DnaAlignFeatures {
|
|
1159 my ( $self, $logic_name, $score, $dbtype, $hcoverage ) = @_;
|
|
1160
|
|
1161 if ( !$self->adaptor() ) {
|
|
1162 warning('Cannot get DnaAlignFeatures without attached adaptor');
|
|
1163 return [];
|
|
1164 }
|
|
1165
|
|
1166 my $db;
|
|
1167
|
|
1168 if ($dbtype) {
|
|
1169 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1170 if ( !$db ) {
|
|
1171 warning("Don't have db $dbtype returning empty list\n");
|
|
1172 return [];
|
|
1173 }
|
|
1174 } else {
|
|
1175 $db = $self->adaptor->db;
|
|
1176 }
|
|
1177
|
|
1178 my $dafa = $db->get_DnaAlignFeatureAdaptor();
|
|
1179
|
|
1180 if ( defined($score) and defined($hcoverage) ) {
|
|
1181 warning "cannot specify score and hcoverage. Using score only";
|
|
1182 }
|
|
1183
|
|
1184 ## circular BOF
|
|
1185 my $sl1 =
|
|
1186 Bio::EnsEMBL::CircularSlice->new(
|
|
1187 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1188 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1189 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1190 -START => $self->{'start'},
|
|
1191 -END => $self->{'seq_region_length'},
|
|
1192 -STRAND => $self->{'strand'},
|
|
1193 -ADAPTOR => $self->adaptor() );
|
|
1194
|
|
1195 my $sl2 =
|
|
1196 Bio::EnsEMBL::CircularSlice->new(
|
|
1197 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1198 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1199 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1200 -START => 1,
|
|
1201 -END => $self->{'end'},
|
|
1202 -STRAND => $self->{'strand'},
|
|
1203 -ADAPTOR => $self->adaptor() );
|
|
1204
|
|
1205 my ( @arr, @arr1, @arr2 );
|
|
1206 my %union;
|
|
1207 if ( defined($score) ) {
|
|
1208 @arr1 = @{ $dafa->fetch_all_by_Slice_and_score( $sl1, $score,
|
|
1209 $logic_name ) };
|
|
1210 @arr2 = @{ $dafa->fetch_all_by_Slice_and_score( $sl2, $score,
|
|
1211 $logic_name ) };
|
|
1212 push @arr, @arr1, @arr2;
|
|
1213 return \@arr;
|
|
1214 }
|
|
1215 @arr1 = @{
|
|
1216 $dafa->fetch_all_by_Slice_and_hcoverage( $sl1, $hcoverage,
|
|
1217 $logic_name ) };
|
|
1218 @arr2 = @{
|
|
1219 $dafa->fetch_all_by_Slice_and_hcoverage( $sl2, $hcoverage,
|
|
1220 $logic_name ) };
|
|
1221 push @arr, @arr1, @arr2;
|
|
1222 return \@arr;
|
|
1223
|
|
1224 ## circular EOF
|
|
1225
|
|
1226 # if(defined($score)){
|
|
1227 # return $dafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
|
|
1228 # }
|
|
1229 # return $dafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
|
|
1230 } ## end sub get_all_DnaAlignFeatures
|
|
1231
|
|
1232 =head2 get_all_ProteinAlignFeatures
|
|
1233
|
|
1234 Arg [1] : (optional) string $logic_name
|
|
1235 The name of the analysis performed on the protein align features
|
|
1236 to obtain.
|
|
1237 Arg [2] : (optional) float $score
|
|
1238 The mimimum score of the features to retrieve
|
|
1239 Arg [3] : (optional) string $dbtype
|
|
1240 The name of an attached database to retrieve features from
|
|
1241 instead.
|
|
1242 Arg [4] : (optional) float hcoverage
|
|
1243 The minimum hcoverage od the featurs to retrieve
|
|
1244 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
|
|
1245 Description: Retrieves the DnaPepAlignFeatures which overlap this slice with
|
|
1246 logic name $logic_name and with score above $score. If
|
|
1247 $logic_name is not defined features of all logic names are
|
|
1248 retrieved. If $score is not defined features of all scores are
|
|
1249 retrieved.
|
|
1250 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
|
|
1251 Exceptions : warning if slice does not have attached adaptor
|
|
1252 Caller : general
|
|
1253 Status : Stable
|
|
1254
|
|
1255 =cut
|
|
1256
|
|
1257 sub get_all_ProteinAlignFeatures {
|
|
1258 my ( $self, $logic_name, $score, $dbtype, $hcoverage ) = @_;
|
|
1259 if ( !$self->adaptor() ) {
|
|
1260 warning('Cannot get ProteinAlignFeatures without attached adaptor');
|
|
1261 return [];
|
|
1262 }
|
|
1263 my $db;
|
|
1264 if ($dbtype) {
|
|
1265 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1266 if ( !$db ) {
|
|
1267 warning("Don't have db $dbtype returning empty list\n");
|
|
1268 return [];
|
|
1269 }
|
|
1270 } else {
|
|
1271 $db = $self->adaptor->db;
|
|
1272 }
|
|
1273
|
|
1274 my $pafa = $db->get_ProteinAlignFeatureAdaptor();
|
|
1275
|
|
1276 if ( defined($score) and defined($hcoverage) ) {
|
|
1277 warning "cannot specify score and hcoverage. Using score only";
|
|
1278 }
|
|
1279
|
|
1280 my $sl1 =
|
|
1281 Bio::EnsEMBL::CircularSlice->new(
|
|
1282 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1283 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1284 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1285 -START => $self->{'start'},
|
|
1286 -END => $self->{'seq_region_length'},
|
|
1287 -STRAND => $self->{'strand'},
|
|
1288 -ADAPTOR => $self->adaptor() );
|
|
1289
|
|
1290 my $sl2 =
|
|
1291 Bio::EnsEMBL::CircularSlice->new(
|
|
1292 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1293 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1294 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1295 -START => 1,
|
|
1296 -END => $self->{'end'},
|
|
1297 -STRAND => $self->{'strand'},
|
|
1298 -ADAPTOR => $self->adaptor() );
|
|
1299 my ( @arr, @arr1, @arr2 );
|
|
1300 if ( defined($score) ) {
|
|
1301 @arr1 = @{ $pafa->fetch_all_by_Slice_and_score( $sl1, $score,
|
|
1302 $logic_name ) };
|
|
1303 @arr2 = @{ $pafa->fetch_all_by_Slice_and_score( $sl2, $score,
|
|
1304 $logic_name ) };
|
|
1305
|
|
1306 push @arr, @arr1, @arr2;
|
|
1307 return \@arr;
|
|
1308 }
|
|
1309
|
|
1310 @arr1 = @{
|
|
1311 $pafa->fetch_all_by_Slice_and_hcoverage( $sl1, $hcoverage,
|
|
1312 $logic_name ) };
|
|
1313 @arr2 = @{
|
|
1314 $pafa->fetch_all_by_Slice_and_hcoverage( $sl2, $hcoverage,
|
|
1315 $logic_name ) };
|
|
1316
|
|
1317 push @arr, @arr1, @arr2;
|
|
1318 return \@arr;
|
|
1319
|
|
1320 # if(defined($score)){
|
|
1321 # return $pafa->fetch_all_by_Slice_and_score($self,$score, $logic_name);
|
|
1322 # }
|
|
1323 # return $pafa->fetch_all_by_Slice_and_hcoverage($self,$hcoverage, $logic_name);
|
|
1324
|
|
1325 } ## end sub get_all_ProteinAlignFeatures
|
|
1326
|
|
1327 =head2 get_all_SimilarityFeatures
|
|
1328
|
|
1329 Arg [1] : (optional) string $logic_name
|
|
1330 the name of the analysis performed on the features to retrieve
|
|
1331 Arg [2] : (optional) float $score
|
|
1332 the lower bound of the score of the features to be retrieved
|
|
1333 Example : @feats = @{$slice->get_all_SimilarityFeatures};
|
|
1334 Description: Retrieves all dna_align_features and protein_align_features
|
|
1335 with analysis named $logic_name and with score above $score.
|
|
1336 It is probably faster to use get_all_ProteinAlignFeatures or
|
|
1337 get_all_DnaAlignFeatures if a sepcific feature type is desired.
|
|
1338 If $logic_name is not defined features of all logic names are
|
|
1339 retrieved. If $score is not defined features of all scores are
|
|
1340 retrieved.
|
|
1341 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
|
|
1342 Exceptions : warning if slice does not have attached adaptor
|
|
1343 Caller : general
|
|
1344 Status : Stable
|
|
1345
|
|
1346 =cut
|
|
1347
|
|
1348 sub get_all_SimilarityFeatures {
|
|
1349 my ( $self, $logic_name, $score ) = @_;
|
|
1350
|
|
1351 my @out = ();
|
|
1352
|
|
1353 push @out,
|
|
1354 @{ $self->get_all_ProteinAlignFeatures( $logic_name, $score ) };
|
|
1355 push @out,
|
|
1356 @{ $self->get_all_DnaAlignFeatures( $logic_name, $score ) };
|
|
1357
|
|
1358 return \@out;
|
|
1359 }
|
|
1360
|
|
1361 =head2 get_all_SimpleFeatures
|
|
1362
|
|
1363 Arg [1] : (optional) string $logic_name
|
|
1364 The name of the analysis performed on the simple features
|
|
1365 to obtain.
|
|
1366 Arg [2] : (optional) float $score
|
|
1367 The mimimum score of the features to retrieve
|
|
1368 Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
|
|
1369 Description: Retrieves the SimpleFeatures which overlap this slice with
|
|
1370 logic name $logic_name and with score above $score. If
|
|
1371 $logic_name is not defined features of all logic names are
|
|
1372 retrieved. If $score is not defined features of all scores are
|
|
1373 retrieved.
|
|
1374 Returntype : listref of Bio::EnsEMBL::SimpleFeatures
|
|
1375 Exceptions : warning if slice does not have attached adaptor
|
|
1376 Caller : general
|
|
1377 Status : Stable
|
|
1378
|
|
1379 =cut
|
|
1380
|
|
1381 sub get_all_SimpleFeatures {
|
|
1382 my ( $self, $logic_name, $score, $dbtype ) = @_;
|
|
1383
|
|
1384 if ( !$self->adaptor() ) {
|
|
1385 warning('Cannot get SimpleFeatures without attached adaptor');
|
|
1386 return [];
|
|
1387 }
|
|
1388
|
|
1389 my $db;
|
|
1390 if ($dbtype) {
|
|
1391 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1392 if ( !$db ) {
|
|
1393 warning("Don't have db $dbtype returning empty list\n");
|
|
1394 return [];
|
|
1395 }
|
|
1396 } else {
|
|
1397 $db = $self->adaptor->db;
|
|
1398 }
|
|
1399
|
|
1400 my $sfa = $db->get_SimpleFeatureAdaptor();
|
|
1401
|
|
1402 return $sfa->fetch_all_by_Slice_and_score( $self, $score,
|
|
1403 $logic_name );
|
|
1404 }
|
|
1405
|
|
1406 #### STOP !!! ########################################
|
|
1407
|
|
1408 =head2 get_all_RepeatFeatures
|
|
1409
|
|
1410 Arg [1] : (optional) string $logic_name
|
|
1411 The name of the analysis performed on the repeat features
|
|
1412 to obtain.
|
|
1413 Arg [2] : (optional) string $repeat_type
|
|
1414 Limits features returned to those of the specified repeat_type
|
|
1415 Arg [3] : (optional) string $db
|
|
1416 Key for database e.g. core/vega/cdna/....
|
|
1417 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'LTR')};
|
|
1418 Description: Retrieves the RepeatFeatures which overlap with
|
|
1419 logic name $logic_name and with score above $score. If
|
|
1420 $logic_name is not defined features of all logic names are
|
|
1421 retrieved.
|
|
1422 Returntype : listref of Bio::EnsEMBL::RepeatFeatures
|
|
1423 Exceptions : warning if slice does not have attached adaptor
|
|
1424 Caller : general
|
|
1425 Status : Stable
|
|
1426
|
|
1427 =cut
|
|
1428
|
|
1429 sub get_all_RepeatFeatures {
|
|
1430 my ( $self, $logic_name, $repeat_type, $dbtype ) = @_;
|
|
1431
|
|
1432 if ( !$self->adaptor() ) {
|
|
1433 warning('Cannot get RepeatFeatures without attached adaptor');
|
|
1434 return [];
|
|
1435 }
|
|
1436
|
|
1437 my $db;
|
|
1438 if ($dbtype) {
|
|
1439 $db = $self->adaptor->db->get_db_adaptor($dbtype);
|
|
1440 if ( !$db ) {
|
|
1441 warning("Don't have db $dbtype returning empty list\n");
|
|
1442 return [];
|
|
1443 }
|
|
1444 } else {
|
|
1445 $db = $self->adaptor->db;
|
|
1446 }
|
|
1447
|
|
1448 my $rpfa = $db->get_RepeatFeatureAdaptor();
|
|
1449
|
|
1450 return $rpfa->fetch_all_by_Slice( $self, $logic_name, $repeat_type );
|
|
1451 }
|
|
1452
|
|
1453 =head2 get_all_LD_values
|
|
1454
|
|
1455 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
|
|
1456 Description : returns all LD values on this slice. This function will only work correctly if the variation
|
|
1457 database has been attached to the core database. If the argument is passed, will return the LD information
|
|
1458 in that population
|
|
1459 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
|
|
1460 Exceptions : none
|
|
1461 Caller : contigview, snpview
|
|
1462 Status : At Risk
|
|
1463 : Variation database is under development.
|
|
1464
|
|
1465 =cut
|
|
1466
|
|
1467 sub get_all_LD_values {
|
|
1468 my $self = shift;
|
|
1469 my $population = shift;
|
|
1470
|
|
1471 if ( !$self->adaptor() ) {
|
|
1472 warning('Cannot get LDFeatureContainer without attached adaptor');
|
|
1473 return [];
|
|
1474 }
|
|
1475
|
|
1476 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
1477
|
|
1478 unless ($variation_db) {
|
|
1479 warning( "Variation database must be attached to core database to "
|
|
1480 . "retrieve variation information" );
|
|
1481 return [];
|
|
1482 }
|
|
1483
|
|
1484 my $ld_adaptor = $variation_db->get_LDFeatureContainerAdaptor;
|
|
1485
|
|
1486 if ($ld_adaptor) {
|
|
1487 return $ld_adaptor->fetch_by_Slice( $self, $population );
|
|
1488 } else {
|
|
1489 return [];
|
|
1490
|
|
1491 }
|
|
1492
|
|
1493 # my $ld_adaptor = Bio::EnsEMBL::DBSQL::MergedAdaptor->new(-species => $self->adaptor()->db()->species, -type => "LDFeatureContainer");
|
|
1494
|
|
1495 # if( $ld_adaptor ) {
|
|
1496 # my $ld_values = $ld_adaptor->fetch_by_Slice($self,$population);
|
|
1497 # if (@{$ld_values} > 1){
|
|
1498 # warning("More than 1 variation database attached. Trying to merge LD results");
|
|
1499 # my $ld_value_merged = shift @{$ld_values};
|
|
1500 # #with more than 1 variation database attached, will try to merge in one single LDContainer object.
|
|
1501 # foreach my $ld (@{$ld_values}){
|
|
1502 # #copy the ld values to the result hash
|
|
1503 # foreach my $key (keys %{$ld->{'ldContainer'}}){
|
|
1504 # $ld_value_merged->{'ldContainer'}->{$key} = $ld->{'ldContainer'}->{$key};
|
|
1505 # }
|
|
1506 # #and copy the variationFeatures as well
|
|
1507 # foreach my $key (keys %{$ld->{'variationFeatures'}}){
|
|
1508 # $ld_value_merged->{'variationFeatures'}->{$key} = $ld->{'variationFeatures'}->{$key};
|
|
1509 # }
|
|
1510
|
|
1511 # }
|
|
1512 # return $ld_value_merged;
|
|
1513 # }
|
|
1514 # else{
|
|
1515 # return shift @{$ld_values};
|
|
1516 # }
|
|
1517 # } else {
|
|
1518 # warning("Variation database must be attached to core database to " .
|
|
1519 # "retrieve variation information" );
|
|
1520 # return [];
|
|
1521 # }
|
|
1522 } ## end sub get_all_LD_values
|
|
1523
|
|
1524 =head2 get_all_VariationFeatures
|
|
1525
|
|
1526 Args : $filter [optional]
|
|
1527 Description:returns all variation features on this slice. This function will only work
|
|
1528 correctly if the variation database has been attached to the core database.
|
|
1529 If $filter is "genotyped" return genotyped Snps only... (nice likkle hack);
|
|
1530 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
1531 Exceptions : none
|
|
1532 Caller : contigview, snpview
|
|
1533 Status : At Risk
|
|
1534 : Variation database is under development.
|
|
1535
|
|
1536 =cut
|
|
1537
|
|
1538 sub get_all_VariationFeatures {
|
|
1539 my $self = shift;
|
|
1540 my $filter = shift;
|
|
1541
|
|
1542 $filter ||= '';
|
|
1543 if ( !$self->adaptor() ) {
|
|
1544 warning('Cannot get variation features without attached adaptor');
|
|
1545 return [];
|
|
1546 }
|
|
1547
|
|
1548 my $vf_adaptor =
|
|
1549 Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
|
|
1550 -species => $self->adaptor()->db()->species,
|
|
1551 -type => "VariationFeature" );
|
|
1552 if ($vf_adaptor) {
|
|
1553 if ( $filter eq 'genotyped' ) {
|
|
1554 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
|
|
1555 } else {
|
|
1556 return $vf_adaptor->fetch_all_by_Slice($self);
|
|
1557 }
|
|
1558 } else {
|
|
1559 warning( "Variation database must be attached to core database to "
|
|
1560 . "retrieve variation information" );
|
|
1561 return [];
|
|
1562 }
|
|
1563 } ## end sub get_all_VariationFeatures
|
|
1564
|
|
1565 =head2 get_all_IndividualSlice
|
|
1566
|
|
1567 Args : none
|
|
1568 Example : my $individualSlice = $slice->get_by_Population($population);
|
|
1569 Description : Gets the specific Slice for all the individuls in the population
|
|
1570 ReturnType : listref of Bio::EnsEMB::IndividualSlice
|
|
1571 Exceptions : none
|
|
1572 Caller : general
|
|
1573
|
|
1574 =cut
|
|
1575
|
|
1576 sub get_all_IndividualSlice {
|
|
1577 my $self = shift;
|
|
1578
|
|
1579 my $individualSliceFactory =
|
|
1580 Bio::EnsEMBL::IndividualSliceFactory->new(
|
|
1581 -START => $self->{'start'},
|
|
1582 -END => $self->{'end'},
|
|
1583 -STRAND => $self->{'strand'},
|
|
1584 -ADAPTOR => $self->adaptor(),
|
|
1585 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1586 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1587 -COORD_SYSTEM => $self->{'coord_system'}, );
|
|
1588 return $individualSliceFactory->get_all_IndividualSlice();
|
|
1589 }
|
|
1590
|
|
1591 =head2 get_by_Individual
|
|
1592
|
|
1593 Arg[1] : Bio::EnsEMBL::Variation::Individual $individual
|
|
1594 Example : my $individualSlice = $slice->get_by_Individual($individual);
|
|
1595 Description : Gets the specific Slice for the individual
|
|
1596 ReturnType : Bio::EnsEMB::IndividualSlice
|
|
1597 Exceptions : none
|
|
1598 Caller : general
|
|
1599
|
|
1600 =cut
|
|
1601
|
|
1602 sub get_by_Individual {
|
|
1603 my $self = shift;
|
|
1604 my $individual = shift;
|
|
1605
|
|
1606 return Bio::EnsEMBL::IndividualSlice->new(
|
|
1607 -START => $self->{'start'},
|
|
1608 -END => $self->{'end'},
|
|
1609 -STRAND => $self->{'strand'},
|
|
1610 -ADAPTOR => $self->adaptor(),
|
|
1611 # -SEQ => $self->{'seq'},
|
|
1612 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1613 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1614 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1615 -INDIVIDUAL => $individual );
|
|
1616
|
|
1617 }
|
|
1618
|
|
1619 =head2 get_by_strain
|
|
1620
|
|
1621 Arg[1] : string $strain
|
|
1622 Example : my $strainSlice = $slice->get_by_strain($strain);
|
|
1623 Description : Gets the specific Slice for the strain
|
|
1624 ReturnType : Bio::EnsEMB::StrainSlice
|
|
1625 Exceptions : none
|
|
1626 Caller : general
|
|
1627
|
|
1628 =cut
|
|
1629
|
|
1630 sub get_by_strain {
|
|
1631 my $self = shift;
|
|
1632 my $strain_name = shift;
|
|
1633
|
|
1634 return
|
|
1635 Bio::EnsEMBL::StrainSlice->new(
|
|
1636 -START => $self->{'start'},
|
|
1637 -END => $self->{'end'},
|
|
1638 -STRAND => $self->{'strand'},
|
|
1639 -ADAPTOR => $self->adaptor(),
|
|
1640 -SEQ => $self->{'seq'},
|
|
1641 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
1642 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
1643 -COORD_SYSTEM => $self->{'coord_system'},
|
|
1644 -STRAIN_NAME => $strain_name );
|
|
1645
|
|
1646 }
|
|
1647
|
|
1648 sub calculate_theta {
|
|
1649 my $self = shift;
|
|
1650 my $strains = shift;
|
|
1651 my $feature = shift
|
|
1652 ; #optional parameter. Name of the feature in the Slice you want to calculate
|
|
1653
|
|
1654 if ( !$self->adaptor() ) {
|
|
1655 warning('Cannot get variation features without attached adaptor');
|
|
1656 return 0;
|
|
1657 }
|
|
1658 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
1659
|
|
1660 unless ($variation_db) {
|
|
1661 warning( "Variation database must be attached to core database to "
|
|
1662 . "retrieve variation information" );
|
|
1663 return 0;
|
|
1664 }
|
|
1665
|
|
1666 #need to get coverage regions for the slice in the different strains
|
|
1667 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
|
|
1668 my $strain;
|
|
1669 my $differences = [];
|
|
1670 my $slices = [];
|
|
1671 if ($coverage_adaptor) {
|
|
1672 my $num_strains = scalar( @{$strains} ) + 1;
|
|
1673 if ( !defined $feature ) {
|
|
1674 #we want to calculate for the whole slice
|
|
1675 push @{$slices},
|
|
1676 $self; #add the slice as the slice to calculate the theta value
|
|
1677 } else {
|
|
1678 #we have features, get the slices for the different features
|
|
1679 my $features = $self->get_all_Exons();
|
|
1680 map { push @{$slices}, $_->feature_Slice }
|
|
1681 @{$features}; #add the slices of the features
|
|
1682 }
|
|
1683 my $length_regions = 0;
|
|
1684 my $snps = 0;
|
|
1685 my $theta = 0;
|
|
1686 my $last_position = 0;
|
|
1687 #get all the differences in the slice coordinates
|
|
1688 foreach my $strain_name ( @{$strains} ) {
|
|
1689 my $strain = $self->get_by_strain($strain_name)
|
|
1690 ; #get the strainSlice for the strain
|
|
1691
|
|
1692 my $results = $strain->get_all_differences_Slice;
|
|
1693 push @{$differences}, @{$results} if ( defined $results );
|
|
1694 }
|
|
1695 #when we finish, we have, in max_level, the regions covered by all the sample
|
|
1696 #sort the differences by the genomic position
|
|
1697 my @differences_sorted =
|
|
1698 sort { $a->start <=> $b->start } @{$differences};
|
|
1699 foreach my $slice ( @{$slices} ) {
|
|
1700 my $regions_covered =
|
|
1701 $coverage_adaptor->fetch_all_regions_covered( $slice,
|
|
1702 $strains );
|
|
1703 if ( defined $regions_covered ) {
|
|
1704 foreach my $range ( @{$regions_covered} ) {
|
|
1705 $length_regions +=
|
|
1706 ( $range->[1] - $range->[0] ) +
|
|
1707 1; #add the length of the genomic region
|
|
1708 for ( my $i = $last_position; $i < @differences_sorted; $i++ )
|
|
1709 {
|
|
1710 if ( $differences_sorted[$i]->start >= $range->[0]
|
|
1711 && $differences_sorted[$i]->end <= $range->[1] )
|
|
1712 {
|
|
1713 $snps++; #count differences in the region
|
|
1714 } elsif ( $differences_sorted[$i]->end > $range->[1] ) {
|
|
1715 $last_position = $i;
|
|
1716 last;
|
|
1717 }
|
|
1718 }
|
|
1719 }
|
|
1720 #when all the ranges have been iterated, calculate rho
|
|
1721 #this is an intermediate variable called a in the formula
|
|
1722 # a = sum i=2..strains 1/i-1
|
|
1723 }
|
|
1724 } ## end foreach my $slice ( @{$slices...})
|
|
1725 my $a = _calculate_a($num_strains);
|
|
1726 $theta = $snps/( $a*$length_regions );
|
|
1727 return $theta;
|
|
1728 } else {
|
|
1729 return 0;
|
|
1730 }
|
|
1731 } ## end sub calculate_theta
|
|
1732
|
|
1733 sub _calculate_a {
|
|
1734 my $max_level = shift;
|
|
1735
|
|
1736 my $a = 0;
|
|
1737 for ( my $i = 2; $i <= $max_level + 1; $i++ ) {
|
|
1738 $a += 1/( $i - 1 );
|
|
1739 }
|
|
1740 return $a;
|
|
1741 }
|
|
1742
|
|
1743 sub calculate_pi {
|
|
1744 my $self = shift;
|
|
1745 my $strains = shift;
|
|
1746 my $feature = shift;
|
|
1747
|
|
1748 if ( !$self->adaptor() ) {
|
|
1749 warning('Cannot get variation features without attached adaptor');
|
|
1750 return 0;
|
|
1751 }
|
|
1752 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
|
|
1753
|
|
1754 unless ($variation_db) {
|
|
1755 warning( "Variation database must be attached to core database to "
|
|
1756 . "retrieve variation information" );
|
|
1757 return 0;
|
|
1758 }
|
|
1759
|
|
1760 #need to get coverage regions for the slice in the different strains
|
|
1761 my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
|
|
1762 my $differences = [];
|
|
1763 my $slices = [];
|
|
1764 if ($coverage_adaptor) {
|
|
1765 my $num_strains = scalar( @{$strains} ) + 1;
|
|
1766 if ( !defined $feature ) {
|
|
1767 #we want to calculate for the whole slice
|
|
1768 push @{$slices},
|
|
1769 $self; #add the slice as the slice to calculate the theta value
|
|
1770 } else {
|
|
1771 #we have features, get the slices for the different features
|
|
1772 my $features = $self->get_all_Exons();
|
|
1773 map { push @{$slices}, $_->feature_Slice }
|
|
1774 @{$features}; #add the slices of the features
|
|
1775 }
|
|
1776 my @range_differences = ();
|
|
1777 my $pi = 0;
|
|
1778 my $regions = 0;
|
|
1779 my $last_position =
|
|
1780 0; #last position visited in the sorted list of differences
|
|
1781 my $triallelic = 0;
|
|
1782 my $is_triallelic = 0;
|
|
1783 foreach my $slice ( @{$slices} ) {
|
|
1784
|
|
1785 foreach my $strain_name ( @{$strains} ) {
|
|
1786 my $strain = $slice->get_by_strain($strain_name)
|
|
1787 ; #get the strainSlice for the strain
|
|
1788 my $results = $strain->get_all_differences_Slice;
|
|
1789 push @{$differences}, @{$results} if ( defined $results );
|
|
1790 }
|
|
1791 my @differences_sorted =
|
|
1792 sort { $a->start <=> $b->start } @{$differences};
|
|
1793
|
|
1794 my $regions_covered =
|
|
1795 $coverage_adaptor->fetch_all_regions_covered( $slice,
|
|
1796 $strains );
|
|
1797 #when we finish, we have, in max_level, the regions covered by all the sample
|
|
1798 #sort the differences
|
|
1799 if ( defined $regions_covered ) {
|
|
1800 foreach my $range ( @{$regions_covered} ) {
|
|
1801 for ( my $i = $last_position; $i < @differences_sorted; $i++ )
|
|
1802 {
|
|
1803 if ( $differences_sorted[$i]->start >= $range->[0]
|
|
1804 && $differences_sorted[$i]->end <= $range->[1] )
|
|
1805 {
|
|
1806 #check wether it is the same region or different
|
|
1807 if ( !defined $range_differences[0]
|
|
1808 || ( $differences_sorted[$i]->start ==
|
|
1809 $range_differences[0]->start ) )
|
|
1810 {
|
|
1811 if ( defined $range_differences[0]
|
|
1812 && ( $differences_sorted[$i]->allele_string ne
|
|
1813 $range_differences[0]->allele_string ) )
|
|
1814 {
|
|
1815 $is_triallelic = 1;
|
|
1816 }
|
|
1817 push @range_differences, $differences_sorted[$i];
|
|
1818 } else {
|
|
1819 #new site, calc pi for the previous one
|
|
1820 $pi +=
|
|
1821 2*
|
|
1822 ( @range_differences/($num_strains) )*
|
|
1823 ( 1 - ( @range_differences/$num_strains ) );
|
|
1824 if ($is_triallelic) {
|
|
1825 $triallelic++;
|
|
1826 $is_triallelic = 0;
|
|
1827 }
|
|
1828 $regions++;
|
|
1829 @range_differences = ();
|
|
1830 #and start a new range
|
|
1831 push @range_differences, $differences_sorted[$i];
|
|
1832 }
|
|
1833 } elsif ( $differences_sorted[$i]->end > $range->[1] ) {
|
|
1834 $last_position = $i;
|
|
1835 last;
|
|
1836 }
|
|
1837 } ## end for ( my $i = $last_position...)
|
|
1838 #calculate pi for last site, if any
|
|
1839 if ( defined $range_differences[0] ) {
|
|
1840 $pi +=
|
|
1841 2*
|
|
1842 ( @range_differences/$num_strains )*
|
|
1843 ( 1 - ( @range_differences/$num_strains ) );
|
|
1844 $regions++;
|
|
1845 }
|
|
1846 } ## end foreach my $range ( @{$regions_covered...})
|
|
1847 } ## end if ( defined $regions_covered)
|
|
1848 $pi = $pi/$regions; #calculate average pi
|
|
1849 print
|
|
1850 "Regions with variations in region $regions and triallelic $triallelic\n\n";
|
|
1851 } ## end foreach my $slice ( @{$slices...})
|
|
1852 return $pi;
|
|
1853 } else {
|
|
1854 return 0;
|
|
1855 }
|
|
1856
|
|
1857 } ## end sub calculate_pi
|
|
1858
|
|
1859 =head2 get_all_genotyped_VariationFeatures
|
|
1860
|
|
1861 Args : none
|
|
1862 Description: returns all variation features on this slice that have been genotyped.
|
|
1863 This function will only work correctly if the variation database has
|
|
1864 been attached to the core database.
|
|
1865 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
|
|
1866 Exceptions : none
|
|
1867 Caller : contigview, snpview, ldview
|
|
1868 Status : At Risk
|
|
1869 : Variation database is under development.
|
|
1870
|
|
1871 =cut
|
|
1872
|
|
1873 sub get_all_genotyped_VariationFeatures {
|
|
1874 my $self = shift;
|
|
1875 my $vfa;
|
|
1876 if ( !$self->adaptor() ) {
|
|
1877 warning('Cannot get variation features without attached adaptor');
|
|
1878 return [];
|
|
1879 }
|
|
1880
|
|
1881 my $vf_adaptor =
|
|
1882 Bio::EnsEMBL::DBSQL::MergedAdaptor->new(
|
|
1883 -species => $self->adaptor()->db()->species,
|
|
1884 -type => "VariationFeature" );
|
|
1885
|
|
1886 if ($vf_adaptor) {
|
|
1887 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
|
|
1888 } else {
|
|
1889 warning( "Variation database must be attached to core database to "
|
|
1890 . "retrieve variation information" );
|
|
1891 return [];
|
|
1892 }
|
|
1893 }
|
|
1894
|
|
1895 =head2 get_all_SNPs
|
|
1896
|
|
1897 Description: DEPRECATED. Use get_all_VariationFeatures instead
|
|
1898
|
|
1899 =cut
|
|
1900
|
|
1901 sub get_all_SNPs {
|
|
1902 my $self = shift;
|
|
1903
|
|
1904 deprecate('Use get_all_VariationFeatures() instead.');
|
|
1905
|
|
1906 my $snps;
|
|
1907 my $vf = $self->get_all_genotyped_VariationFeatures();
|
|
1908 if ( $vf->[0] ) {
|
|
1909 #necessary to convert the VariationFeatures into SNP objects
|
|
1910 foreach my $variation_feature ( @{$vf} ) {
|
|
1911 push @{$snps}, $variation_feature->convert_to_SNP();
|
|
1912 }
|
|
1913 return $snps;
|
|
1914 } else {
|
|
1915 return [];
|
|
1916 }
|
|
1917 }
|
|
1918
|
|
1919 =head2 get_all_genotyped_SNPs
|
|
1920
|
|
1921 Description : DEPRECATED. Use get_all_genotyped_VariationFeatures instead
|
|
1922
|
|
1923 =cut
|
|
1924
|
|
1925 sub get_all_genotyped_SNPs {
|
|
1926 my $self = shift;
|
|
1927
|
|
1928 deprecate("Use get_all_genotyped_VariationFeatures instead");
|
|
1929 my $vf = $self->get_all_genotyped_VariationFeatures;
|
|
1930 my $snps;
|
|
1931 if ( $vf->[0] ) {
|
|
1932 foreach my $variation_feature ( @{$vf} ) {
|
|
1933 push @{$snps}, $variation_feature->convert_to_SNP();
|
|
1934 }
|
|
1935 return $snps;
|
|
1936 } else {
|
|
1937 return [];
|
|
1938 }
|
|
1939 }
|
|
1940
|
|
1941 sub get_all_SNPs_transcripts {
|
|
1942 my $self = shift;
|
|
1943
|
|
1944 deprecate("DEPRECATED");
|
|
1945
|
|
1946 return [];
|
|
1947
|
|
1948 }
|
|
1949
|
|
1950 =head2 get_all_Genes
|
|
1951
|
|
1952 Arg [1] : (optional) string $logic_name
|
|
1953 The name of the analysis used to generate the genes to retrieve
|
|
1954 Arg [2] : (optional) string $dbtype
|
|
1955 The dbtype of genes to obtain. This assumes that the db has
|
|
1956 been added to the DBAdaptor under this name (using the
|
|
1957 DBConnection::add_db_adaptor method).
|
|
1958 Arg [3] : (optional) boolean $load_transcripts
|
|
1959 If set to true, transcripts will be loaded immediately rather
|
|
1960 than being lazy-loaded on request. This will result in a
|
|
1961 significant speed up if the Transcripts and Exons are going to
|
|
1962 be used (but a slow down if they are not).
|
|
1963 Arg [4] : (optional) string $source
|
|
1964 The source of the genes to retrieve.
|
|
1965 Arg [5] : (optional) string $biotype
|
|
1966 The biotype of the genes to retrieve.
|
|
1967 Example : @genes = @{$slice->get_all_Genes};
|
|
1968 Description: Retrieves all genes that overlap this slice.
|
|
1969 Returntype : listref of Bio::EnsEMBL::Genes
|
|
1970 Exceptions : none
|
|
1971 Caller : none
|
|
1972 Status : Stable
|
|
1973
|
|
1974 =cut
|
|
1975
|
|
1976 sub get_all_Genes {
|
|
1977 my ( $self, $logic_name, $dbtype, $load_transcripts, $source,
|
|
1978 $biotype )
|
|
1979 = @_;
|
|
1980
|
|
1981 if ( !$self->adaptor() ) {
|
|
1982 warning('Cannot get Genes without attached adaptor');
|
|
1983 return [];
|
|
1984 }
|
|
1985
|
|
1986 my $ga;
|
|
1987 if ($dbtype) {
|
|
1988 my $db = $reg->get_db( $self->adaptor()->db(), $dbtype );
|
|
1989 if ( defined($db) ) {
|
|
1990 $ga = $reg->get_adaptor( $db->species(), $db->group(), "Gene" );
|
|
1991 } else {
|
|
1992 $ga = $reg->get_adaptor( $self->adaptor()->db()->species(),
|
|
1993 $dbtype, "Gene" );
|
|
1994 }
|
|
1995 if ( !defined $ga ) {
|
|
1996 warning("$dbtype genes not available");
|
|
1997 return [];
|
|
1998 }
|
|
1999 } else {
|
|
2000 $ga = $self->adaptor->db->get_GeneAdaptor();
|
|
2001 }
|
|
2002
|
|
2003 ## circular BOF
|
|
2004 my $sl1 =
|
|
2005 Bio::EnsEMBL::CircularSlice->new(
|
|
2006 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2007 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2008 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2009 -START => $self->{'start'},
|
|
2010 -END => $self->{'seq_region_length'},
|
|
2011 -STRAND => $self->{'strand'},
|
|
2012 -ADAPTOR => $self->adaptor() );
|
|
2013
|
|
2014 my $sl2 =
|
|
2015 Bio::EnsEMBL::CircularSlice->new(
|
|
2016 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2017 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2018 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2019 -START => 1,
|
|
2020 -END => $self->{'end'},
|
|
2021 -STRAND => $self->{'strand'},
|
|
2022 -ADAPTOR => $self->adaptor() );
|
|
2023
|
|
2024 my ( @arr, @arr1, @arr2 );
|
|
2025 @arr1 = @{
|
|
2026 $ga->fetch_all_by_Slice( $sl1, $logic_name, $load_transcripts,
|
|
2027 $source, $biotype ) };
|
|
2028 @arr2 = @{
|
|
2029 $ga->fetch_all_by_Slice( $sl2, $logic_name, $load_transcripts,
|
|
2030 $source, $biotype ) };
|
|
2031 push @arr, @arr1, @arr2;
|
|
2032 return \@arr;
|
|
2033
|
|
2034 ## circular EOF
|
|
2035
|
|
2036 } ## end sub get_all_Genes
|
|
2037
|
|
2038 =head2 get_all_Genes_by_type
|
|
2039
|
|
2040 Arg [1] : string $type
|
|
2041 The biotype of genes wanted.
|
|
2042 Arg [2] : (optional) string $logic_name
|
|
2043 Arg [3] : (optional) boolean $load_transcripts
|
|
2044 If set to true, transcripts will be loaded immediately rather
|
|
2045 than being lazy-loaded on request. This will result in a
|
|
2046 significant speed up if the Transcripts and Exons are going to
|
|
2047 be used (but a slow down if they are not).
|
|
2048 Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding',
|
|
2049 'ensembl')};
|
|
2050 Description: Retrieves genes that overlap this slice of biotype $type.
|
|
2051 This is primarily used by the genebuilding code when several
|
|
2052 biotypes of genes are used.
|
|
2053
|
|
2054 The logic name is the analysis of the genes that are retrieved.
|
|
2055 If not provided all genes will be retrieved instead.
|
|
2056
|
|
2057 Returntype : listref of Bio::EnsEMBL::Genes
|
|
2058 Exceptions : none
|
|
2059 Caller : genebuilder, general
|
|
2060 Status : Stable
|
|
2061
|
|
2062 =cut
|
|
2063
|
|
2064 sub get_all_Genes_by_type {
|
|
2065 my ( $self, $type, $logic_name, $load_transcripts ) = @_;
|
|
2066
|
|
2067 if ( !$self->adaptor() ) {
|
|
2068 warning('Cannot get Genes without attached adaptor');
|
|
2069 return [];
|
|
2070 }
|
|
2071
|
|
2072 return
|
|
2073 $self->get_all_Genes( $logic_name, undef, $load_transcripts, undef,
|
|
2074 $type );
|
|
2075 }
|
|
2076
|
|
2077 =head2 get_all_Genes_by_source
|
|
2078
|
|
2079 Arg [1] : string source
|
|
2080 Arg [2] : (optional) boolean $load_transcripts
|
|
2081 If set to true, transcripts will be loaded immediately rather
|
|
2082 than being lazy-loaded on request. This will result in a
|
|
2083 significant speed up if the Transcripts and Exons are going to
|
|
2084 be used (but a slow down if they are not).
|
|
2085 Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')};
|
|
2086 Description: Retrieves genes that overlap this slice of source $source.
|
|
2087
|
|
2088 Returntype : listref of Bio::EnsEMBL::Genes
|
|
2089 Exceptions : none
|
|
2090 Caller : general
|
|
2091 Status : Stable
|
|
2092
|
|
2093 =cut
|
|
2094
|
|
2095 sub get_all_Genes_by_source {
|
|
2096 my ( $self, $source, $load_transcripts ) = @_;
|
|
2097
|
|
2098 if ( !$self->adaptor() ) {
|
|
2099 warning('Cannot get Genes without attached adaptor');
|
|
2100 return [];
|
|
2101 }
|
|
2102
|
|
2103 return $self->get_all_Genes( undef, undef, $load_transcripts,
|
|
2104 $source );
|
|
2105 }
|
|
2106
|
|
2107 =head2 get_all_Transcripts
|
|
2108
|
|
2109 Arg [1] : (optional) boolean $load_exons
|
|
2110 If set to true exons will not be lazy-loaded but will instead
|
|
2111 be loaded right away. This is faster if the exons are
|
|
2112 actually going to be used right away.
|
|
2113 Arg [2] : (optional) string $logic_name
|
|
2114 the logic name of the type of features to obtain
|
|
2115 Arg [3] : (optional) string $db_type
|
|
2116 Example : @transcripts = @{$slice->get_all_Transcripts)_};
|
|
2117 Description: Gets all transcripts which overlap this slice. If you want to
|
|
2118 specify a particular analysis or type, then you are better off
|
|
2119 using get_all_Genes or get_all_Genes_by_type and iterating
|
|
2120 through the transcripts of each gene.
|
|
2121 Returntype : reference to a list of Bio::EnsEMBL::Transcripts
|
|
2122 Exceptions : none
|
|
2123 Caller : general
|
|
2124 Status : Stable
|
|
2125
|
|
2126 =cut
|
|
2127
|
|
2128 sub get_all_Transcripts {
|
|
2129 my $self = shift;
|
|
2130 my $load_exons = shift;
|
|
2131 my $logic_name = shift;
|
|
2132 my $dbtype = shift;
|
|
2133 if ( !$self->adaptor() ) {
|
|
2134 warning('Cannot get Transcripts without attached adaptor');
|
|
2135 return [];
|
|
2136 }
|
|
2137
|
|
2138 my $ta;
|
|
2139 if ($dbtype) {
|
|
2140 my $db = $reg->get_db( $self->adaptor()->db(), $dbtype );
|
|
2141 if ( defined($db) ) {
|
|
2142 $ta =
|
|
2143 $reg->get_adaptor( $db->species(), $db->group(), "Transcript" );
|
|
2144 } else {
|
|
2145 $ta = $reg->get_adaptor( $self->adaptor()->db()->species(),
|
|
2146 $dbtype, "Transcript" );
|
|
2147 }
|
|
2148 if ( !defined $ta ) {
|
|
2149 warning("$dbtype genes not available");
|
|
2150 return [];
|
|
2151 }
|
|
2152 } else {
|
|
2153 $ta = $self->adaptor->db->get_TranscriptAdaptor();
|
|
2154 }
|
|
2155
|
|
2156 ## circular BOF
|
|
2157 my $sl1 =
|
|
2158 Bio::EnsEMBL::CircularSlice->new(
|
|
2159 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2160 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2161 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2162 -START => $self->{'start'},
|
|
2163 -END => $self->{'seq_region_length'},
|
|
2164 -STRAND => $self->{'strand'},
|
|
2165 -ADAPTOR => $self->adaptor() );
|
|
2166
|
|
2167 my $sl2 =
|
|
2168 Bio::EnsEMBL::CircularSlice->new(
|
|
2169 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2170 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2171 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2172 -START => 1,
|
|
2173 -END => $self->{'end'},
|
|
2174 -STRAND => $self->{'strand'},
|
|
2175 -ADAPTOR => $self->adaptor() );
|
|
2176
|
|
2177 my ( @arr, @arr1, @arr2 );
|
|
2178 @arr1 =
|
|
2179 @{ $ta->fetch_all_by_Slice( $sl1, $load_exons, $logic_name ) };
|
|
2180 @arr2 =
|
|
2181 @{ $ta->fetch_all_by_Slice( $sl2, $load_exons, $logic_name ) };
|
|
2182 push @arr, @arr1, @arr2;
|
|
2183 return \@arr;
|
|
2184 ## circular EOF
|
|
2185 #return $ta->fetch_all_by_Slice($self, $load_exons, $logic_name);
|
|
2186 } ## end sub get_all_Transcripts
|
|
2187
|
|
2188 =head2 get_all_Exons
|
|
2189
|
|
2190 Arg [1] : none
|
|
2191 Example : @exons = @{$slice->get_all_Exons};
|
|
2192 Description: Gets all exons which overlap this slice. Note that these exons
|
|
2193 will not be associated with any transcripts, so this may not
|
|
2194 be terribly useful.
|
|
2195 Returntype : reference to a list of Bio::EnsEMBL::Exons
|
|
2196 Exceptions : none
|
|
2197 Caller : general
|
|
2198 Status : Stable
|
|
2199
|
|
2200 =cut
|
|
2201
|
|
2202 sub get_all_Exons {
|
|
2203 my $self = shift;
|
|
2204
|
|
2205 if ( !$self->adaptor() ) {
|
|
2206 warning('Cannot get Exons without attached adaptor');
|
|
2207 return [];
|
|
2208 }
|
|
2209
|
|
2210 ## circular BOF
|
|
2211 my $sl1 =
|
|
2212 Bio::EnsEMBL::CircularSlice->new(
|
|
2213 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2214 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2215 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2216 -START => $self->{'start'},
|
|
2217 -END => $self->{'seq_region_length'},
|
|
2218 -STRAND => $self->{'strand'},
|
|
2219 -ADAPTOR => $self->adaptor() );
|
|
2220
|
|
2221 my $sl2 =
|
|
2222 Bio::EnsEMBL::CircularSlice->new(
|
|
2223 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2224 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2225 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2226 -START => 1,
|
|
2227 -END => $self->{'end'},
|
|
2228 -STRAND => $self->{'strand'},
|
|
2229 -ADAPTOR => $self->adaptor() );
|
|
2230
|
|
2231 my ( @arr, @arr1, @arr2 );
|
|
2232 @arr1 =
|
|
2233 @{ $sl1->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($sl1) };
|
|
2234 @arr2 =
|
|
2235 @{ $sl2->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($sl2) };
|
|
2236 push @arr, @arr1, @arr2;
|
|
2237 return \@arr;
|
|
2238 ## circular EOF
|
|
2239
|
|
2240 #rturn $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
|
|
2241 } ## end sub get_all_Exons
|
|
2242
|
|
2243 =head2 get_all_QtlFeatures
|
|
2244
|
|
2245 Args : none
|
|
2246 Description: returns overlapping QtlFeatures
|
|
2247 Returntype : listref Bio::EnsEMBL::Map::QtlFeature
|
|
2248 Exceptions : none
|
|
2249 Caller : general
|
|
2250 Status : Stable
|
|
2251
|
|
2252 =cut
|
|
2253
|
|
2254 sub get_all_QtlFeatures {
|
|
2255 my $self = shift;
|
|
2256
|
|
2257 if ( !$self->adaptor() ) {
|
|
2258 warning('Cannot get QtlFeatures without attached adaptor');
|
|
2259 return [];
|
|
2260 }
|
|
2261
|
|
2262 my $qfAdaptor;
|
|
2263 if ( $self->adaptor() ) {
|
|
2264 $qfAdaptor = $self->adaptor()->db()->get_QtlFeatureAdaptor();
|
|
2265 } else {
|
|
2266 return [];
|
|
2267 }
|
|
2268
|
|
2269 ## circular BOF
|
|
2270 my $sl1 =
|
|
2271 Bio::EnsEMBL::CircularSlice->new(
|
|
2272 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2273 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2274 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2275 -START => $self->{'start'},
|
|
2276 -END => $self->{'seq_region_length'},
|
|
2277 -STRAND => $self->{'strand'},
|
|
2278 -ADAPTOR => $self->adaptor() );
|
|
2279
|
|
2280 my $sl2 =
|
|
2281 Bio::EnsEMBL::CircularSlice->new(
|
|
2282 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2283 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2284 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2285 -START => 1,
|
|
2286 -END => $self->{'end'},
|
|
2287 -STRAND => $self->{'strand'},
|
|
2288 -ADAPTOR => $self->adaptor() );
|
|
2289
|
|
2290 my ( @arr, @arr1, @arr2 );
|
|
2291 @arr1 = @{ $qfAdaptor->fetch_all_by_Slice_constraint($sl1) };
|
|
2292 @arr2 = @{ $qfAdaptor->fetch_all_by_Slice_constraint($sl2) };
|
|
2293 push @arr, @arr1, @arr2;
|
|
2294 return \@arr;
|
|
2295 ## circular EOF
|
|
2296
|
|
2297 #return $qfAdaptor->fetch_all_by_Slice_constraint( $self );
|
|
2298 } ## end sub get_all_QtlFeatures
|
|
2299
|
|
2300 =head2 get_all_KaryotypeBands
|
|
2301
|
|
2302 Arg [1] : none
|
|
2303 Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
|
|
2304 Description: Retrieves the karyotype bands which this slice overlaps.
|
|
2305 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
|
|
2306 Exceptions : none
|
|
2307 Caller : general, contigview
|
|
2308 Status : Stable
|
|
2309
|
|
2310 =cut
|
|
2311
|
|
2312 sub get_all_KaryotypeBands {
|
|
2313 my ($self) = @_;
|
|
2314
|
|
2315 if ( !$self->adaptor() ) {
|
|
2316 warning('Cannot get KaryotypeBands without attached adaptor');
|
|
2317 return [];
|
|
2318 }
|
|
2319
|
|
2320 my $kadp = $self->adaptor->db->get_KaryotypeBandAdaptor();
|
|
2321
|
|
2322 ## circular BOF
|
|
2323 my $sl1 =
|
|
2324 Bio::EnsEMBL::CircularSlice->new(
|
|
2325 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2326 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2327 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2328 -START => $self->{'start'},
|
|
2329 -END => $self->{'seq_region_length'},
|
|
2330 -STRAND => $self->{'strand'},
|
|
2331 -ADAPTOR => $self->adaptor() );
|
|
2332
|
|
2333 my $sl2 =
|
|
2334 Bio::EnsEMBL::CircularSlice->new(
|
|
2335 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2336 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2337 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2338 -START => 1,
|
|
2339 -END => $self->{'end'},
|
|
2340 -STRAND => $self->{'strand'},
|
|
2341 -ADAPTOR => $self->adaptor() );
|
|
2342
|
|
2343 my ( @arr, @arr1, @arr2 );
|
|
2344 @arr1 = @{ $kadp->fetch_all_by_Slice($sl1) };
|
|
2345 @arr2 = @{ $kadp->fetch_all_by_Slice($sl2) };
|
|
2346 push @arr, @arr1, @arr2;
|
|
2347 return \@arr;
|
|
2348 ## circular EOF
|
|
2349
|
|
2350 #return $kadp->fetch_all_by_Slice($self);
|
|
2351 } ## end sub get_all_KaryotypeBands
|
|
2352
|
|
2353 =head2 get_repeatmasked_seq
|
|
2354
|
|
2355 Arg [1] : listref of strings $logic_names (optional)
|
|
2356 Arg [2] : int $soft_masking_enable (optional)
|
|
2357 Arg [3] : hash reference $not_default_masking_cases (optional, default is {})
|
|
2358 The values are 0 or 1 for hard and soft masking respectively
|
|
2359 The keys of the hash should be of 2 forms
|
|
2360 "repeat_class_" . $repeat_consensus->repeat_class,
|
|
2361 e.g. "repeat_class_SINE/MIR"
|
|
2362 "repeat_name_" . $repeat_consensus->name
|
|
2363 e.g. "repeat_name_MIR"
|
|
2364 depending on which base you want to apply the not default
|
|
2365 masking either the repeat_class or repeat_name. Both can be
|
|
2366 specified in the same hash at the same time, but in that case,
|
|
2367 repeat_name setting has priority over repeat_class. For example,
|
|
2368 you may have hard masking as default, and you may want soft
|
|
2369 masking of all repeat_class SINE/MIR, but repeat_name AluSp
|
|
2370 (which are also from repeat_class SINE/MIR).
|
|
2371 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
|
|
2372 "repeat_name_AluSp" => 0}
|
|
2373 Example : $rm_slice = $slice->get_repeatmasked_seq();
|
|
2374 $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
|
|
2375 Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
|
|
2376 masked sequence instead of the regular sequence.
|
|
2377 Sequence returned by this new slice will have repeat regions
|
|
2378 hardmasked by default (sequence replaced by N) or
|
|
2379 or soft-masked when arg[2] = 1 (sequence in lowercase)
|
|
2380 Will only work with database connection to get repeat features.
|
|
2381 Returntype : Bio::EnsEMBL::RepeatMaskedSlice
|
|
2382 Exceptions : none
|
|
2383 Caller : general
|
|
2384 Status : Stable
|
|
2385
|
|
2386 =cut
|
|
2387
|
|
2388 sub get_repeatmasked_seq {
|
|
2389 my ( $self, $logic_names, $soft_mask, $not_default_masking_cases ) =
|
|
2390 @_;
|
|
2391
|
|
2392 return
|
|
2393 Bio::EnsEMBL::RepeatMaskedSlice->new(
|
|
2394 -START => $self->{'start'},
|
|
2395 -END => $self->{'end'},
|
|
2396 -STRAND => $self->{'strand'},
|
|
2397 -ADAPTOR => $self->adaptor(),
|
|
2398 -SEQ => $self->{'seq'},
|
|
2399 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2400 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2401 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2402 -REPEAT_MASK => $logic_names,
|
|
2403 -SOFT_MASK => $soft_mask,
|
|
2404 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases
|
|
2405 );
|
|
2406 }
|
|
2407
|
|
2408 =head2 _mask_features
|
|
2409
|
|
2410 Arg [1] : reference to a string $dnaref
|
|
2411 Arg [2] : array_ref $repeats
|
|
2412 reference to a list Bio::EnsEMBL::RepeatFeature
|
|
2413 give the list of coordinates to replace with N or with
|
|
2414 lower case
|
|
2415 Arg [3] : int $soft_masking_enable (optional)
|
|
2416 Arg [4] : hash reference $not_default_masking_cases (optional, default is {})
|
|
2417 The values are 0 or 1 for hard and soft masking respectively
|
|
2418 The keys of the hash should be of 2 forms
|
|
2419 "repeat_class_" . $repeat_consensus->repeat_class,
|
|
2420 e.g. "repeat_class_SINE/MIR"
|
|
2421 "repeat_name_" . $repeat_consensus->name
|
|
2422 e.g. "repeat_name_MIR"
|
|
2423 depending on which base you want to apply the not default masking either
|
|
2424 the repeat_class or repeat_name. Both can be specified in the same hash
|
|
2425 at the same time, but in that case, repeat_name setting has priority over
|
|
2426 repeat_class. For example, you may have hard masking as default, and
|
|
2427 you may want soft masking of all repeat_class SINE/MIR,
|
|
2428 but repeat_name AluSp (which are also from repeat_class SINE/MIR).
|
|
2429 Your hash will be something like {"repeat_class_SINE/MIR" => 1,
|
|
2430 "repeat_name_AluSp" => 0}
|
|
2431 Description: replaces string positions described in the RepeatFeatures
|
|
2432 with Ns (default setting), or with the lower case equivalent
|
|
2433 (soft masking). The reference to a dna string which is passed
|
|
2434 is changed in place.
|
|
2435 Returntype : none
|
|
2436 Exceptions : none
|
|
2437 Caller : seq
|
|
2438 Status : Stable
|
|
2439
|
|
2440 =cut
|
|
2441
|
|
2442 sub _mask_features {
|
|
2443 my ( $self, $dnaref, $repeats, $soft_mask,
|
|
2444 $not_default_masking_cases ) = @_;
|
|
2445
|
|
2446 $soft_mask = 0 unless ( defined $soft_mask );
|
|
2447 $not_default_masking_cases = {}
|
|
2448 unless ( defined $not_default_masking_cases );
|
|
2449
|
|
2450 # explicit CORE::length call, to avoid any confusion with the Slice
|
|
2451 # length method
|
|
2452 my $dnalen = CORE::length($$dnaref);
|
|
2453
|
|
2454 REP: foreach my $old_f ( @{$repeats} ) {
|
|
2455 my $f = $old_f->transfer($self);
|
|
2456 my $start = $f->start;
|
|
2457 my $end = $f->end;
|
|
2458 my $length = ( $end - $start ) + 1;
|
|
2459
|
|
2460 # check if we get repeat completely outside of expected slice range
|
|
2461 if ( $end < 1 || $start > $dnalen ) {
|
|
2462 # warning("Unexpected: Repeat completely outside slice coordinates.");
|
|
2463 next REP;
|
|
2464 }
|
|
2465
|
|
2466 # repeat partly outside slice range, so correct
|
|
2467 # the repeat start and length to the slice size if needed
|
|
2468 if ( $start < 1 ) {
|
|
2469 $start = 1;
|
|
2470 $length = ( $end - $start ) + 1;
|
|
2471 }
|
|
2472
|
|
2473 # repeat partly outside slice range, so correct
|
|
2474 # the repeat end and length to the slice size if needed
|
|
2475 if ( $end > $dnalen ) {
|
|
2476 $end = $dnalen;
|
|
2477 $length = ( $end - $start ) + 1;
|
|
2478 }
|
|
2479
|
|
2480 $start--;
|
|
2481
|
|
2482 my $padstr;
|
|
2483 # if we decide to define masking on the base of the repeat_type, we'll need
|
|
2484 # to add the following, and the other commented line few lines below.
|
|
2485 # my $rc_type = "repeat_type_" . $f->repeat_consensus->repeat_type;
|
|
2486 my $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
|
|
2487 my $rc_name = "repeat_name_" . $f->repeat_consensus->name;
|
|
2488
|
|
2489 my $masking_type;
|
|
2490 # $masking_type = $not_default_masking_cases->{$rc_type} if (defined $not_default_masking_cases->{$rc_type});
|
|
2491 $masking_type = $not_default_masking_cases->{$rc_class}
|
|
2492 if ( defined $not_default_masking_cases->{$rc_class} );
|
|
2493 $masking_type = $not_default_masking_cases->{$rc_name}
|
|
2494 if ( defined $not_default_masking_cases->{$rc_name} );
|
|
2495
|
|
2496 $masking_type = $soft_mask unless ( defined $masking_type );
|
|
2497
|
|
2498 if ($masking_type) {
|
|
2499 $padstr = lc substr( $$dnaref, $start, $length );
|
|
2500 } else {
|
|
2501 $padstr = 'N' x $length;
|
|
2502 }
|
|
2503 substr( $$dnaref, $start, $length ) = $padstr;
|
|
2504 } ## end foreach my $old_f ( @{$repeats...})
|
|
2505 } ## end sub _mask_features
|
|
2506
|
|
2507 =head2 get_all_SearchFeatures
|
|
2508
|
|
2509 Arg [1] : scalar $ticket_ids
|
|
2510 Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
|
|
2511 Description: Retreives all search features for stored blast
|
|
2512 results for the ticket that overlap this slice
|
|
2513 Returntype : listref of Bio::EnsEMBL::SeqFeatures
|
|
2514 Exceptions : none
|
|
2515 Caller : general (webby!)
|
|
2516 Status : Stable
|
|
2517
|
|
2518 =cut
|
|
2519
|
|
2520 sub get_all_SearchFeatures {
|
|
2521 my $self = shift;
|
|
2522 my $ticket = shift;
|
|
2523 local $_;
|
|
2524 unless ($ticket) {
|
|
2525 throw("ticket argument is required");
|
|
2526 }
|
|
2527
|
|
2528 if ( !$self->adaptor() ) {
|
|
2529 warning("Cannot get SearchFeatures without an attached adaptor");
|
|
2530 return [];
|
|
2531 }
|
|
2532
|
|
2533 my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
|
|
2534
|
|
2535 my $offset = $self->start - 1;
|
|
2536
|
|
2537 my $features = $sfa
|
|
2538 ? $sfa->get_all_SearchFeatures( $ticket, $self->seq_region_name,
|
|
2539 $self->start, $self->end )
|
|
2540 : [];
|
|
2541
|
|
2542 foreach (@$features) {
|
|
2543 $_->start( $_->start - $offset );
|
|
2544 $_->end( $_->end - $offset );
|
|
2545 }
|
|
2546 return $features;
|
|
2547
|
|
2548 } ## end sub get_all_SearchFeatures
|
|
2549
|
|
2550 =head2 get_all_AssemblyExceptionFeatures
|
|
2551
|
|
2552 Arg [1] : string $set (optional)
|
|
2553 Example : $slice->get_all_AssemblyExceptionFeatures();
|
|
2554 Description: Retreives all misc features which overlap this slice. If
|
|
2555 a set code is provided only features which are members of
|
|
2556 the requested set are returned.
|
|
2557 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
|
|
2558 Exceptions : none
|
|
2559 Caller : general
|
|
2560 Status : Stable
|
|
2561
|
|
2562 =cut
|
|
2563
|
|
2564 sub get_all_AssemblyExceptionFeatures {
|
|
2565 my $self = shift;
|
|
2566 my $misc_set = shift;
|
|
2567
|
|
2568 my $adaptor = $self->adaptor();
|
|
2569
|
|
2570 if ( !$adaptor ) {
|
|
2571 warning('Cannot retrieve features without attached adaptor.');
|
|
2572 return [];
|
|
2573 }
|
|
2574
|
|
2575 my $aefa = $adaptor->db->get_AssemblyExceptionFeatureAdaptor();
|
|
2576
|
|
2577 ## circular BOF
|
|
2578 my $sl1 =
|
|
2579 Bio::EnsEMBL::CircularSlice->new(
|
|
2580 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2581 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2582 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2583 -START => $self->{'start'},
|
|
2584 -END => $self->{'seq_region_length'},
|
|
2585 -STRAND => $self->{'strand'},
|
|
2586 -ADAPTOR => $self->adaptor() );
|
|
2587
|
|
2588 my $sl2 =
|
|
2589 Bio::EnsEMBL::CircularSlice->new(
|
|
2590 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2591 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2592 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2593 -START => 1,
|
|
2594 -END => $self->{'end'},
|
|
2595 -STRAND => $self->{'strand'},
|
|
2596 -ADAPTOR => $self->adaptor() );
|
|
2597
|
|
2598 my ( @arr, @arr1, @arr2 );
|
|
2599 @arr1 = @{ $aefa->fetch_all_by_Slice($sl1) };
|
|
2600 @arr2 = @{ $aefa->fetch_all_by_Slice($sl2) };
|
|
2601 push @arr, @arr1, @arr2;
|
|
2602 return \@arr;
|
|
2603 ## circular EOF
|
|
2604 #return $aefa->fetch_all_by_Slice($self);
|
|
2605 } ## end sub get_all_AssemblyExceptionFeatures
|
|
2606
|
|
2607 =head2 get_all_MiscFeatures
|
|
2608
|
|
2609 Arg [1] : string $set (optional)
|
|
2610 Arg [2] : string $database (optional)
|
|
2611 Example : $slice->get_all_MiscFeatures('cloneset');
|
|
2612 Description: Retreives all misc features which overlap this slice. If
|
|
2613 a set code is provided only features which are members of
|
|
2614 the requested set are returned.
|
|
2615 Returntype : listref of Bio::EnsEMBL::MiscFeatures
|
|
2616 Exceptions : none
|
|
2617 Caller : general
|
|
2618 Status : Stable
|
|
2619
|
|
2620 =cut
|
|
2621
|
|
2622 sub get_all_MiscFeatures {
|
|
2623 my $self = shift;
|
|
2624 my $misc_set = shift;
|
|
2625 my $dbtype = shift;
|
|
2626 my $msa;
|
|
2627
|
|
2628 my $adaptor = $self->adaptor();
|
|
2629 if ( !$adaptor ) {
|
|
2630 warning('Cannot retrieve features without attached adaptor.');
|
|
2631 return [];
|
|
2632 }
|
|
2633
|
|
2634 my $mfa;
|
|
2635 if ($dbtype) {
|
|
2636 my $db = $reg->get_db( $adaptor->db(), $dbtype );
|
|
2637 if ( defined($db) ) {
|
|
2638 $mfa = $reg->get_adaptor( lc( $db->species() ),
|
|
2639 $db->group(), "miscfeature" );
|
|
2640 } else {
|
|
2641 $mfa = $reg->get_adaptor( $adaptor->db()->species(),
|
|
2642 $dbtype, "miscfeature" );
|
|
2643 }
|
|
2644 if ( !defined $mfa ) {
|
|
2645 warning("$dbtype misc features not available");
|
|
2646 return [];
|
|
2647 }
|
|
2648 } else {
|
|
2649 $mfa = $adaptor->db->get_MiscFeatureAdaptor();
|
|
2650 }
|
|
2651
|
|
2652 ## circular BOF
|
|
2653 my $sl1 =
|
|
2654 Bio::EnsEMBL::CircularSlice->new(
|
|
2655 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2656 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2657 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2658 -START => $self->{'start'},
|
|
2659 -END => $self->{'seq_region_length'},
|
|
2660 -STRAND => $self->{'strand'},
|
|
2661 -ADAPTOR => $self->adaptor() );
|
|
2662
|
|
2663 my $sl2 =
|
|
2664 Bio::EnsEMBL::CircularSlice->new(
|
|
2665 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2666 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2667 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2668 -START => 1,
|
|
2669 -END => $self->{'end'},
|
|
2670 -STRAND => $self->{'strand'},
|
|
2671 -ADAPTOR => $self->adaptor() );
|
|
2672
|
|
2673 my ( @arr, @arr1, @arr2 );
|
|
2674
|
|
2675 if ($misc_set) {
|
|
2676 @arr1 =
|
|
2677 @{ $mfa->fetch_all_by_Slice_and_set_code( $sl1, $misc_set ) };
|
|
2678 @arr2 =
|
|
2679 @{ $mfa->fetch_all_by_Slice_and_set_code( $sl2, $misc_set ) };
|
|
2680 push @arr, @arr1, @arr2;
|
|
2681 return \@arr;
|
|
2682 }
|
|
2683 @arr1 = @{ $mfa->fetch_all_by_Slice($sl1) };
|
|
2684 @arr2 = @{ $mfa->fetch_all_by_Slice($sl2) };
|
|
2685 push @arr, @arr1, @arr2;
|
|
2686 return \@arr;
|
|
2687 ## circular EOF
|
|
2688
|
|
2689 # if($misc_set) {
|
|
2690 # return $mfa->fetch_all_by_Slice_and_set_code($self,$misc_set);
|
|
2691 # }
|
|
2692
|
|
2693 # return $mfa->fetch_all_by_Slice($self);
|
|
2694 } ## end sub get_all_MiscFeatures
|
|
2695
|
|
2696 =head2 get_all_AffyFeatures
|
|
2697
|
|
2698 Args : (optional) list of strings - array names
|
|
2699 Example : $slice->get_all_AffyFeatures();
|
|
2700 Description: Retrieves all AffyFeature objects which overlap this slice.
|
|
2701 Returntype : listref of Bio::EnsEMBL::AffyFeature objects
|
|
2702 Exceptions : none
|
|
2703 Caller : general
|
|
2704 Status : Stable
|
|
2705
|
|
2706 =cut
|
|
2707
|
|
2708 sub get_all_AffyFeatures {
|
|
2709 my $self = shift;
|
|
2710 my @arraynames = @_;
|
|
2711
|
|
2712 my $sa = $self->adaptor();
|
|
2713 if ( !$sa ) {
|
|
2714 warning("Cannot retrieve features without attached adaptor.");
|
|
2715 }
|
|
2716 my $fa = $sa->db()->get_AffyFeatureAdaptor();
|
|
2717 my $features;
|
|
2718
|
|
2719 ## circular BOF
|
|
2720 my $sl1 =
|
|
2721 Bio::EnsEMBL::CircularSlice->new(
|
|
2722 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2723 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2724 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2725 -START => $self->{'start'},
|
|
2726 -END => $self->{'seq_region_length'},
|
|
2727 -STRAND => $self->{'strand'},
|
|
2728 -ADAPTOR => $self->adaptor() );
|
|
2729
|
|
2730 my $sl2 =
|
|
2731 Bio::EnsEMBL::CircularSlice->new(
|
|
2732 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2733 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2734 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2735 -START => 1,
|
|
2736 -END => $self->{'end'},
|
|
2737 -STRAND => $self->{'strand'},
|
|
2738 -ADAPTOR => $self->adaptor() );
|
|
2739
|
|
2740 my ( @arr, @arr1, @arr2 );
|
|
2741
|
|
2742 if (@arraynames) {
|
|
2743 @arr1 = @{ $fa->fetch_all_by_Slice_arrayname( $sl1, @arraynames ) };
|
|
2744 @arr2 = @{ $fa->fetch_all_by_Slice_arrayname( $sl2, @arraynames ) };
|
|
2745 } else {
|
|
2746 @arr1 = @{ $fa->fetch_all_by_Slice($sl1) };
|
|
2747 @arr2 = @{ $fa->fetch_all_by_Slice($sl2) };
|
|
2748 }
|
|
2749 push @arr, @arr1, @arr2;
|
|
2750 return \@arr;
|
|
2751 ## circular EOF
|
|
2752
|
|
2753 # if ( @arraynames ) {
|
|
2754 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
|
|
2755 # } else {
|
|
2756 # $features = $fa->fetch_all_by_Slice( $self );
|
|
2757 # }
|
|
2758 # return $features;
|
|
2759 } ## end sub get_all_AffyFeatures
|
|
2760
|
|
2761 =head2 get_all_OligoFeatures
|
|
2762
|
|
2763 Args : (optional) list of strings - array names
|
|
2764 Example : $slice->get_all_OligoFeatures();
|
|
2765 Description: Retrieves all OligoFeature objects which overlap this slice.
|
|
2766 Optionally just retrieve OligoFeature objects generated by
|
|
2767 probes from named arrays.
|
|
2768 Returntype : listref of Bio::EnsEMBL::OligoFeature objects
|
|
2769 Exceptions : none
|
|
2770 Caller : general
|
|
2771 Status : Stable
|
|
2772
|
|
2773 =cut
|
|
2774
|
|
2775 sub get_all_OligoFeatures {
|
|
2776 my $self = shift;
|
|
2777 my @arraynames = @_;
|
|
2778
|
|
2779 my $sa = $self->adaptor();
|
|
2780 if ( !$sa ) {
|
|
2781 warning("Cannot retrieve features without attached adaptor.");
|
|
2782 }
|
|
2783 my $fa = $sa->db()->get_OligoFeatureAdaptor();
|
|
2784 my $features;
|
|
2785
|
|
2786 ## circular BOF
|
|
2787 my $sl1 =
|
|
2788 Bio::EnsEMBL::CircularSlice->new(
|
|
2789 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2790 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2791 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2792 -START => $self->{'start'},
|
|
2793 -END => $self->{'seq_region_length'},
|
|
2794 -STRAND => $self->{'strand'},
|
|
2795 -ADAPTOR => $self->adaptor() );
|
|
2796
|
|
2797 my $sl2 =
|
|
2798 Bio::EnsEMBL::CircularSlice->new(
|
|
2799 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2800 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2801 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2802 -START => 1,
|
|
2803 -END => $self->{'end'},
|
|
2804 -STRAND => $self->{'strand'},
|
|
2805 -ADAPTOR => $self->adaptor() );
|
|
2806
|
|
2807 my ( @arr, @arr1, @arr2 );
|
|
2808
|
|
2809 if (@arraynames) {
|
|
2810 @arr1 = @{ $fa->fetch_all_by_Slice_arrayname( $sl1, @arraynames ) };
|
|
2811 @arr2 = @{ $fa->fetch_all_by_Slice_arrayname( $sl2, @arraynames ) };
|
|
2812 } else {
|
|
2813 @arr1 = @{ $fa->fetch_all_by_Slice($sl1) };
|
|
2814 @arr2 = @{ $fa->fetch_all_by_Slice($sl2) };
|
|
2815 }
|
|
2816 push @arr, @arr1, @arr2;
|
|
2817 return \@arr;
|
|
2818 ## circular EOF
|
|
2819
|
|
2820 # if ( @arraynames ) {
|
|
2821 # $features = $fa->fetch_all_by_Slice_arrayname( $self, @arraynames );
|
|
2822 # } else {
|
|
2823 # $features = $fa->fetch_all_by_Slice( $self );
|
|
2824 # }
|
|
2825 # return $features;
|
|
2826 } ## end sub get_all_OligoFeatures
|
|
2827
|
|
2828 =head2 get_all_OligoFeatures_by_type
|
|
2829
|
|
2830 Arg [1] : string - type of array (e.g. AFFY or OLIGO)
|
|
2831 Arg [2] : (optional) string - logic name
|
|
2832 Example : $slice->get_all_OligoFeatures_by_type('OLIGO');
|
|
2833 Description: Retrieves all OligoFeature objects which overlap this slice and
|
|
2834 were created by probes from the specified type of array.
|
|
2835 Returntype : listref of Bio::EnsEMBL::OligoFeature objects
|
|
2836 Exceptions : throws if no type
|
|
2837 Caller : general
|
|
2838 Status : Stable
|
|
2839
|
|
2840 =cut
|
|
2841
|
|
2842 sub get_all_OligoFeatures_by_type {
|
|
2843 my ( $self, $type, $logic_name ) = @_;
|
|
2844
|
|
2845 throw('Need type as parameter') if !$type;
|
|
2846
|
|
2847 my $sa = $self->adaptor();
|
|
2848 if ( !$sa ) {
|
|
2849 warning("Cannot retrieve features without attached adaptor.");
|
|
2850 }
|
|
2851 my $fa = $sa->db()->get_OligoFeatureAdaptor();
|
|
2852
|
|
2853 ## circular BOF
|
|
2854 my $sl1 =
|
|
2855 Bio::EnsEMBL::CircularSlice->new(
|
|
2856 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2857 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2858 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2859 -START => $self->{'start'},
|
|
2860 -END => $self->{'seq_region_length'},
|
|
2861 -STRAND => $self->{'strand'},
|
|
2862 -ADAPTOR => $self->adaptor() );
|
|
2863
|
|
2864 my $sl2 =
|
|
2865 Bio::EnsEMBL::CircularSlice->new(
|
|
2866 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2867 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2868 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2869 -START => 1,
|
|
2870 -END => $self->{'end'},
|
|
2871 -STRAND => $self->{'strand'},
|
|
2872 -ADAPTOR => $self->adaptor() );
|
|
2873
|
|
2874 my ( @arr, @arr1, @arr2 );
|
|
2875 @arr1 = @{ $fa->fetch_all_by_Slice_type( $sl1, $type, $logic_name ) };
|
|
2876 @arr2 = @{ $fa->fetch_all_by_Slice_type( $sl2, $type, $logic_name ) };
|
|
2877 push @arr, @arr1, @arr2;
|
|
2878 return \@arr;
|
|
2879 ## circular EOF
|
|
2880
|
|
2881 #my $features = $fa->fetch_all_by_Slice_type( $self, $type, $logic_name );
|
|
2882 #return $features;
|
|
2883 } ## end sub get_all_OligoFeatures_by_type
|
|
2884
|
|
2885 =head2 get_all_MarkerFeatures
|
|
2886
|
|
2887 Arg [1] : (optional) string logic_name
|
|
2888 The logic name of the marker features to retrieve
|
|
2889 Arg [2] : (optional) int $priority
|
|
2890 Lower (exclusive) priority bound of the markers to retrieve
|
|
2891 Arg [3] : (optional) int $map_weight
|
|
2892 Upper (exclusive) priority bound of the markers to retrieve
|
|
2893 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
|
|
2894 Description: Retrieves all markers which lie on this slice fulfilling the
|
|
2895 specified map_weight and priority parameters (if supplied).
|
|
2896 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
|
|
2897 Exceptions : none
|
|
2898 Caller : contigview, general
|
|
2899 Status : Stable
|
|
2900
|
|
2901 =cut
|
|
2902
|
|
2903 sub get_all_MarkerFeatures {
|
|
2904 my ( $self, $logic_name, $priority, $map_weight ) = @_;
|
|
2905
|
|
2906 if ( !$self->adaptor() ) {
|
|
2907 warning('Cannot retrieve MarkerFeatures without attached adaptor.');
|
|
2908 return [];
|
|
2909 }
|
|
2910
|
|
2911 my $ma = $self->adaptor->db->get_MarkerFeatureAdaptor;
|
|
2912
|
|
2913 ## circular BOF
|
|
2914 my $sl1 =
|
|
2915 Bio::EnsEMBL::CircularSlice->new(
|
|
2916 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2917 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2918 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2919 -START => $self->{'start'},
|
|
2920 -END => $self->{'seq_region_length'},
|
|
2921 -STRAND => $self->{'strand'},
|
|
2922 -ADAPTOR => $self->adaptor() );
|
|
2923
|
|
2924 my $sl2 =
|
|
2925 Bio::EnsEMBL::CircularSlice->new(
|
|
2926 -COORD_SYSTEM => $self->{'coord_system'},
|
|
2927 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
2928 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
2929 -START => 1,
|
|
2930 -END => $self->{'end'},
|
|
2931 -STRAND => $self->{'strand'},
|
|
2932 -ADAPTOR => $self->adaptor() );
|
|
2933
|
|
2934 my ( @arr, @arr1, @arr2 );
|
|
2935 @arr1 = @{
|
|
2936 $ma->fetch_all_by_Slice_and_priority( $sl1, $priority, $map_weight,
|
|
2937 $logic_name ) };
|
|
2938 @arr2 = @{
|
|
2939 $ma->fetch_all_by_Slice_and_priority( $sl2, $priority, $map_weight,
|
|
2940 $logic_name ) };
|
|
2941 push @arr, @arr1, @arr2;
|
|
2942 return \@arr;
|
|
2943 ## circular EOF
|
|
2944
|
|
2945 # my $feats = $ma->fetch_all_by_Slice_and_priority($self, $priority, $map_weight, $logic_name);
|
|
2946 # return $feats;
|
|
2947 } ## end sub get_all_MarkerFeatures
|
|
2948
|
|
2949 =head2 get_all_compara_DnaAlignFeatures
|
|
2950
|
|
2951 Arg [1] : string $qy_species
|
|
2952 The name of the species to retrieve similarity features from
|
|
2953 Arg [2] : string $qy_assembly
|
|
2954 The name of the assembly to retrieve similarity features from
|
|
2955 Arg [3] : string $type
|
|
2956 The type of the alignment to retrieve similarity features from
|
|
2957 Arg [4] : <optional> compara dbadptor to use.
|
|
2958 Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus',
|
|
2959 'MGSC3',
|
|
2960 'WGA');
|
|
2961 Description: Retrieves a list of DNA-DNA Alignments to the species specified
|
|
2962 by the $qy_species argument.
|
|
2963 The compara database must be attached to the core database
|
|
2964 for this call to work correctly. As well the compara database
|
|
2965 must have the core dbadaptors for both this species, and the
|
|
2966 query species added to function correctly.
|
|
2967 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
|
|
2968 Exceptions : warning if compara database is not available
|
|
2969 Caller : contigview
|
|
2970 Status : Stable
|
|
2971
|
|
2972 =cut
|
|
2973
|
|
2974 sub get_all_compara_DnaAlignFeatures {
|
|
2975 my ( $self, $qy_species, $qy_assembly, $alignment_type, $compara_db )
|
|
2976 = @_;
|
|
2977
|
|
2978 if ( !$self->adaptor() ) {
|
|
2979 warning(
|
|
2980 "Cannot retrieve DnaAlignFeatures without attached adaptor");
|
|
2981 return [];
|
|
2982 }
|
|
2983
|
|
2984 unless ( $qy_species && $alignment_type # && $qy_assembly
|
|
2985 )
|
|
2986 {
|
|
2987 throw(
|
|
2988 "Query species and assembly and alignmemt type arguments are required"
|
|
2989 );
|
|
2990 }
|
|
2991
|
|
2992 if ( !defined($compara_db) ) {
|
|
2993 $compara_db =
|
|
2994 Bio::EnsEMBL::Registry->get_DBAdaptor( "compara", "compara" );
|
|
2995 }
|
|
2996 unless ($compara_db) {
|
|
2997 warning(
|
|
2998 "Compara database must be attached to core database or passed "
|
|
2999 . "as an argument to "
|
|
3000 . "retrieve compara information" );
|
|
3001 return [];
|
|
3002 }
|
|
3003
|
|
3004 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
|
|
3005 ## circular BOF
|
|
3006 my $sl1 =
|
|
3007 Bio::EnsEMBL::CircularSlice->new(
|
|
3008 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3009 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3010 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3011 -START => $self->{'start'},
|
|
3012 -END => $self->{'seq_region_length'},
|
|
3013 -STRAND => $self->{'strand'},
|
|
3014 -ADAPTOR => $self->adaptor() );
|
|
3015
|
|
3016 my $sl2 =
|
|
3017 Bio::EnsEMBL::CircularSlice->new(
|
|
3018 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3019 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3020 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3021 -START => 1,
|
|
3022 -END => $self->{'end'},
|
|
3023 -STRAND => $self->{'strand'},
|
|
3024 -ADAPTOR => $self->adaptor() );
|
|
3025
|
|
3026 my ( @arr, @arr1, @arr2 );
|
|
3027 @arr1 = @{
|
|
3028 $dafa->fetch_all_by_Slice( $sl1, $qy_species, $qy_assembly,
|
|
3029 $alignment_type ) };
|
|
3030 @arr2 = @{
|
|
3031 $dafa->fetch_all_by_Slice( $sl2, $qy_species, $qy_assembly,
|
|
3032 $alignment_type ) };
|
|
3033 push @arr, @arr1, @arr2;
|
|
3034 return \@arr;
|
|
3035 ## circular EOF
|
|
3036 #return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
|
|
3037 } ## end sub get_all_compara_DnaAlignFeatures
|
|
3038
|
|
3039 =head2 get_all_compara_Syntenies
|
|
3040
|
|
3041 Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus"
|
|
3042 Arg [2] : string $method_link_type, default is "SYNTENY"
|
|
3043 Arg [3] : <optional> compara dbadaptor to use.
|
|
3044 Description: gets all the compara syntenyies for a specfic species
|
|
3045 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
|
|
3046 Status : Stable
|
|
3047
|
|
3048 =cut
|
|
3049
|
|
3050 sub get_all_compara_Syntenies {
|
|
3051 my ( $self, $qy_species, $method_link_type, $compara_db ) = @_;
|
|
3052
|
|
3053 if ( !$self->adaptor() ) {
|
|
3054 warning("Cannot retrieve features without attached adaptor");
|
|
3055 return [];
|
|
3056 }
|
|
3057
|
|
3058 unless ($qy_species) {
|
|
3059 throw("Query species and assembly arguments are required");
|
|
3060 }
|
|
3061
|
|
3062 unless ( defined $method_link_type ) {
|
|
3063 $method_link_type = "SYNTENY";
|
|
3064 }
|
|
3065
|
|
3066 if ( !defined($compara_db) ) {
|
|
3067 $compara_db =
|
|
3068 Bio::EnsEMBL::Registry->get_DBAdaptor( "compara", "compara" );
|
|
3069 }
|
|
3070 unless ($compara_db) {
|
|
3071 warning(
|
|
3072 "Compara database must be attached to core database or passed "
|
|
3073 . "as an argument to "
|
|
3074 . "retrieve compara information" );
|
|
3075 return [];
|
|
3076 }
|
|
3077 my $gdba = $compara_db->get_GenomeDBAdaptor();
|
|
3078 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
|
|
3079 my $dfa = $compara_db->get_DnaFragAdaptor();
|
|
3080 my $sra = $compara_db->get_SyntenyRegionAdaptor();
|
|
3081
|
|
3082 my $this_gdb =
|
|
3083 $gdba->fetch_by_core_DBAdaptor( $self->adaptor()->db() );
|
|
3084 my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
|
|
3085 my $mlss =
|
|
3086 $mlssa->fetch_by_method_link_type_GenomeDBs( $method_link_type,
|
|
3087 [ $this_gdb, $query_gdb ] );
|
|
3088
|
|
3089 my $cs = $self->coord_system()->name();
|
|
3090 my $sr = $self->seq_region_name();
|
|
3091 my ($dnafrag) =
|
|
3092 @{ $dfa->fetch_all_by_GenomeDB_region( $this_gdb, $cs, $sr ) };
|
|
3093
|
|
3094 ## circular BOF
|
|
3095 my $sl1 =
|
|
3096 Bio::EnsEMBL::CircularSlice->new(
|
|
3097 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3098 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3099 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3100 -START => $self->{'start'},
|
|
3101 -END => $self->{'seq_region_length'},
|
|
3102 -STRAND => $self->{'strand'},
|
|
3103 -ADAPTOR => $self->adaptor() );
|
|
3104
|
|
3105 my $sl2 =
|
|
3106 Bio::EnsEMBL::CircularSlice->new(
|
|
3107 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3108 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3109 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3110 -START => 1,
|
|
3111 -END => $self->{'end'},
|
|
3112 -STRAND => $self->{'strand'},
|
|
3113 -ADAPTOR => $self->adaptor() );
|
|
3114
|
|
3115 my ( @arr, @arr1, @arr2 );
|
|
3116 @arr1 = @{
|
|
3117 $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dnafrag,
|
|
3118 $sl1->start, $sl1->end )
|
|
3119 };
|
|
3120 @arr2 = @{
|
|
3121 $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag( $mlss, $dnafrag,
|
|
3122 $sl2->start, $sl2->end )
|
|
3123 };
|
|
3124 push @arr, @arr1, @arr2;
|
|
3125 return \@arr;
|
|
3126 ## circular EOF
|
|
3127 #return $sra->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag, $self->start, $self->end);
|
|
3128 } ## end sub get_all_compara_Syntenies
|
|
3129
|
|
3130 =head2 get_all_Haplotypes
|
|
3131
|
|
3132 Arg [1] : (optional) boolean $lite_flag
|
|
3133 if true lightweight haplotype objects are used
|
|
3134 Example : @haplotypes = $slice->get_all_Haplotypes;
|
|
3135 Description: Retrieves all of the haplotypes on this slice. Only works
|
|
3136 if the haplotype adaptor has been attached to the core adaptor
|
|
3137 via $dba->add_db_adaptor('haplotype', $hdba);
|
|
3138 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
|
|
3139 Exceptions : warning is Haplotype database is not available
|
|
3140 Caller : contigview, general
|
|
3141 Status : Stable
|
|
3142
|
|
3143 =cut
|
|
3144
|
|
3145 sub get_all_Haplotypes {
|
|
3146 my ( $self, $lite_flag ) = @_;
|
|
3147
|
|
3148 if ( !$self->adaptor() ) {
|
|
3149 warning("Cannot retrieve features without attached adaptor");
|
|
3150 return [];
|
|
3151 }
|
|
3152
|
|
3153 my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype');
|
|
3154
|
|
3155 unless ($haplo_db) {
|
|
3156 warning( "Haplotype database must be attached to core database to "
|
|
3157 . "retrieve haplotype information" );
|
|
3158 return [];
|
|
3159 }
|
|
3160
|
|
3161 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
|
|
3162
|
|
3163 ## circular BOF
|
|
3164 my $sl1 =
|
|
3165 Bio::EnsEMBL::CircularSlice->new(
|
|
3166 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3167 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3168 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3169 -START => $self->{'start'},
|
|
3170 -END => $self->{'seq_region_length'},
|
|
3171 -STRAND => $self->{'strand'},
|
|
3172 -ADAPTOR => $self->adaptor() );
|
|
3173
|
|
3174 my $sl2 =
|
|
3175 Bio::EnsEMBL::CircularSlice->new(
|
|
3176 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3177 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3178 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3179 -START => 1,
|
|
3180 -END => $self->{'end'},
|
|
3181 -STRAND => $self->{'strand'},
|
|
3182 -ADAPTOR => $self->adaptor() );
|
|
3183
|
|
3184 my ( @arr, @arr1, @arr2 );
|
|
3185 @arr1 = @{ $haplo_adaptor->fetch_all_by_Slice( $sl1, $lite_flag ) };
|
|
3186 @arr2 = @{ $haplo_adaptor->fetch_all_by_Slice( $sl2, $lite_flag ) };
|
|
3187 push @arr, @arr1, @arr2;
|
|
3188 return \@arr;
|
|
3189 ## circular EOF
|
|
3190
|
|
3191 #my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
|
|
3192 #return $haplotypes;
|
|
3193 } ## end sub get_all_Haplotypes
|
|
3194
|
|
3195 sub get_all_DASFactories {
|
|
3196 my $self = shift;
|
|
3197 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
|
|
3198 }
|
|
3199
|
|
3200
|
|
3201 sub get_all_DASFeatures_dsn {
|
|
3202 my ( $self, $source_type, $dsn ) = @_;
|
|
3203
|
|
3204 if ( !$self->adaptor() ) {
|
|
3205 warning("Cannot retrieve features without attached adaptor");
|
|
3206 return [];
|
|
3207 }
|
|
3208 my @X =
|
|
3209 grep { $_->adaptor->dsn eq $dsn }
|
|
3210 $self->adaptor()->db()->_each_DASFeatureFactory;
|
|
3211
|
|
3212 return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
|
|
3213 }
|
|
3214
|
|
3215 sub get_all_DAS_Features {
|
|
3216 my ($self) = @_;
|
|
3217
|
|
3218 $self->{_das_features} ||= {}; # Cache
|
|
3219 $self->{_das_styles} ||= {}; # Cache
|
|
3220 $self->{_das_segments} ||= {}; # Cache
|
|
3221 my %das_features;
|
|
3222 my %das_styles;
|
|
3223 my %das_segments;
|
|
3224 my $slice = $self;
|
|
3225
|
|
3226 foreach my $dasfact ( @{ $self->get_all_DASFactories } ) {
|
|
3227 my $dsn = $dasfact->adaptor->dsn;
|
|
3228 my $name = $dasfact->adaptor->name;
|
|
3229 # my $type = $dasfact->adaptor->type;
|
|
3230 my $url = $dasfact->adaptor->url;
|
|
3231
|
|
3232 my ($type) = $dasfact->adaptor->mapping;
|
|
3233 if ( ref $type eq 'ARRAY' ) {
|
|
3234 $type = shift @$type;
|
|
3235 }
|
|
3236 $type ||= $dasfact->adaptor->type;
|
|
3237 # Construct a cache key : SOURCE_URL/TYPE
|
|
3238 # Need the type to handle sources that serve multiple types of features
|
|
3239
|
|
3240 my $key = join( '/', $name, $type );
|
|
3241 if ( $self->{_das_features}->{$key} ) { # Use cached
|
|
3242 $das_features{$name} = $self->{_das_features}->{$key};
|
|
3243 $das_styles{$name} = $self->{_das_styles}->{$key};
|
|
3244 $das_segments{$name} = $self->{_das_segments}->{$key};
|
|
3245 } else { # Get fresh data
|
|
3246 my ( $featref, $styleref, $segref ) =
|
|
3247 $dasfact->fetch_all_Features( $slice, $type );
|
|
3248 $self->{_das_features}->{$key} = $featref;
|
|
3249 $self->{_das_styles}->{$key} = $styleref;
|
|
3250 $self->{_das_segments}->{$key} = $segref;
|
|
3251 $das_features{$name} = $featref;
|
|
3252 $das_styles{$name} = $styleref;
|
|
3253 $das_segments{$name} = $segref;
|
|
3254 }
|
|
3255 } ## end foreach my $dasfact ( @{ $self...})
|
|
3256
|
|
3257 return ( \%das_features, \%das_styles, \%das_segments );
|
|
3258 } ## end sub get_all_DAS_Features
|
|
3259
|
|
3260 =head2 get_all_DASFeatures
|
|
3261
|
|
3262 Arg [1] : none
|
|
3263 Example : $features = $slice->get_all_DASFeatures;
|
|
3264 Description: Retreives a hash reference to a hash of DAS feature
|
|
3265 sets, keyed by the DNS, NOTE the values of this hash
|
|
3266 are an anonymous array containing:
|
|
3267 (1) a pointer to an array of features;
|
|
3268 (2) a pointer to the DAS stylesheet
|
|
3269 Returntype : hashref of Bio::SeqFeatures
|
|
3270 Exceptions : ?
|
|
3271 Caller : webcode
|
|
3272 Status : Stable
|
|
3273
|
|
3274 =cut
|
|
3275 sub get_all_DASFeatures {
|
|
3276 my ( $self, $source_type ) = @_;
|
|
3277
|
|
3278 if ( !$self->adaptor() ) {
|
|
3279 warning("Cannot retrieve features without attached adaptor");
|
|
3280 return [];
|
|
3281 }
|
|
3282
|
|
3283 my %genomic_features = map {
|
|
3284 ( $_->adaptor->dsn =>
|
|
3285 [ $_->fetch_all_Features( $self, $source_type ) ] )
|
|
3286 } $self->adaptor()->db()->_each_DASFeatureFactory;
|
|
3287 return \%genomic_features;
|
|
3288
|
|
3289 }
|
|
3290
|
|
3291 sub old_get_all_DASFeatures {
|
|
3292 my ( $self, @args ) = @_;
|
|
3293
|
|
3294 if ( !$self->adaptor() ) {
|
|
3295 warning("Cannot retrieve features without attached adaptor");
|
|
3296 return [];
|
|
3297 }
|
|
3298
|
|
3299 my %genomic_features =
|
|
3300 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
|
|
3301 $self->adaptor()->db()->_each_DASFeatureFactory;
|
|
3302 return \%genomic_features;
|
|
3303
|
|
3304 }
|
|
3305
|
|
3306 =head2 get_all_ExternalFeatures
|
|
3307
|
|
3308 Arg [1] : (optional) string $track_name
|
|
3309 If specified only features from ExternalFeatureAdaptors with
|
|
3310 the track name $track_name are retrieved.
|
|
3311 If not set, all features from every ExternalFeatureAdaptor are
|
|
3312 retrieved.
|
|
3313 Example : @x_features = @{$slice->get_all_ExternalFeatures}
|
|
3314 Description: Retrieves features on this slice from external feature adaptors
|
|
3315 Returntype : listref of Bio::SeqFeatureI implementing objects in slice
|
|
3316 coordinates
|
|
3317 Exceptions : none
|
|
3318 Caller : general
|
|
3319 Status : Stable
|
|
3320
|
|
3321 =cut
|
|
3322
|
|
3323 sub get_all_ExternalFeatures {
|
|
3324 my ( $self, $track_name ) = @_;
|
|
3325 if ( !$self->adaptor() ) {
|
|
3326 warning("Cannot retrieve features without attached adaptor");
|
|
3327 return [];
|
|
3328 }
|
|
3329 my $features = [];
|
|
3330 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
|
|
3331 my @xf_adaptors = ();
|
|
3332 if ($track_name) {
|
|
3333 #use a specific adaptor
|
|
3334 if ( exists $xfa_hash->{$track_name} ) {
|
|
3335 push @xf_adaptors, $xfa_hash->{$track_name};
|
|
3336 }
|
|
3337 } else {
|
|
3338 #use all of the adaptors
|
|
3339 push @xf_adaptors, values %$xfa_hash;
|
|
3340 }
|
|
3341
|
|
3342 ## circular BOF
|
|
3343 my $sl1 =
|
|
3344 Bio::EnsEMBL::CircularSlice->new(
|
|
3345 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3346 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3347 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3348 -START => $self->{'start'},
|
|
3349 -END => $self->{'seq_region_length'},
|
|
3350 -STRAND => $self->{'strand'},
|
|
3351 -ADAPTOR => $self->adaptor() );
|
|
3352
|
|
3353 my $sl2 =
|
|
3354 Bio::EnsEMBL::CircularSlice->new(
|
|
3355 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3356 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3357 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3358 -START => 1,
|
|
3359 -END => $self->{'end'},
|
|
3360 -STRAND => $self->{'strand'},
|
|
3361 -ADAPTOR => $self->adaptor() );
|
|
3362
|
|
3363 foreach my $xfa (@xf_adaptors) {
|
|
3364 push @$features, @{ $xfa->fetch_all_by_Slice($sl1) };
|
|
3365 push @$features, @{ $xfa->fetch_all_by_Slice($sl2) };
|
|
3366 }
|
|
3367 return $features;
|
|
3368 ## circular EOF
|
|
3369 } ## end sub get_all_ExternalFeatures
|
|
3370
|
|
3371 =head2 get_all_DitagFeatures
|
|
3372
|
|
3373 Arg [1] : (optional) string ditag type
|
|
3374 Arg [1] : (optional) string logic_name
|
|
3375 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
|
|
3376 Description: Retrieves the DitagFeatures of a specific type which overlap
|
|
3377 this slice with. If type is not defined, all features are
|
|
3378 retrieved.
|
|
3379 Returntype : listref of Bio::EnsEMBL::DitagFeatures
|
|
3380 Exceptions : warning if slice does not have attached adaptor
|
|
3381 Caller : general
|
|
3382 Status : Stable
|
|
3383
|
|
3384 =cut
|
|
3385
|
|
3386 sub get_all_DitagFeatures {
|
|
3387 my ( $self, $type, $logic_name ) = @_;
|
|
3388
|
|
3389 if ( !$self->adaptor() ) {
|
|
3390 warning('Cannot get DitagFeatures without attached adaptor');
|
|
3391 return [];
|
|
3392 }
|
|
3393
|
|
3394 my $dfa = $self->adaptor->db->get_DitagFeatureAdaptor();
|
|
3395
|
|
3396 ## circular BOF
|
|
3397 my $sl1 =
|
|
3398 Bio::EnsEMBL::CircularSlice->new(
|
|
3399 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3400 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3401 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3402 -START => $self->{'start'},
|
|
3403 -END => $self->{'seq_region_length'},
|
|
3404 -STRAND => $self->{'strand'},
|
|
3405 -ADAPTOR => $self->adaptor() );
|
|
3406
|
|
3407 my $sl2 =
|
|
3408 Bio::EnsEMBL::CircularSlice->new(
|
|
3409 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3410 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3411 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3412 -START => 1,
|
|
3413 -END => $self->{'end'},
|
|
3414 -STRAND => $self->{'strand'},
|
|
3415 -ADAPTOR => $self->adaptor() );
|
|
3416
|
|
3417 my ( @arr, @arr1, @arr2 );
|
|
3418 @arr1 = @{ $dfa->fetch_all_by_Slice( $sl1, $type, $logic_name ) };
|
|
3419 @arr2 = @{ $dfa->fetch_all_by_Slice( $sl2, $type, $logic_name ) };
|
|
3420 push @arr, @arr1, @arr2;
|
|
3421 return \@arr;
|
|
3422 ## circular EOF
|
|
3423 #return $dfa->fetch_all_by_Slice($self, $type, $logic_name);
|
|
3424 } ## end sub get_all_DitagFeatures
|
|
3425
|
|
3426 # GENERIC FEATURES (See DBAdaptor.pm)
|
|
3427
|
|
3428 =head2 get_generic_features
|
|
3429
|
|
3430 Arg [1] : (optional) List of names of generic feature types to return.
|
|
3431 If no feature names are given, all generic features are
|
|
3432 returned.
|
|
3433 Example : my %features = %{$slice->get_generic_features()};
|
|
3434 Description: Gets generic features via the generic feature adaptors that
|
|
3435 have been added via DBAdaptor->add_GenricFeatureAdaptor (if
|
|
3436 any)
|
|
3437 Returntype : Hash of named features.
|
|
3438 Exceptions : none
|
|
3439 Caller : none
|
|
3440 Status : Stable
|
|
3441
|
|
3442 =cut
|
|
3443
|
|
3444 sub get_generic_features {
|
|
3445
|
|
3446 my ( $self, @names ) = @_;
|
|
3447
|
|
3448 if ( !$self->adaptor() ) {
|
|
3449 warning('Cannot retrieve features without attached adaptor');
|
|
3450 return [];
|
|
3451 }
|
|
3452
|
|
3453 my $db = $self->adaptor()->db();
|
|
3454
|
|
3455 my %features = (); # this will hold the results
|
|
3456
|
|
3457 # get the adaptors for each feature
|
|
3458 my %adaptors = %{ $db->get_GenericFeatureAdaptors(@names) };
|
|
3459
|
|
3460 foreach my $adaptor_name ( keys(%adaptors) ) {
|
|
3461
|
|
3462 my $adaptor_obj = $adaptors{$adaptor_name};
|
|
3463 # get the features and add them to the hash
|
|
3464 ## circular BOF
|
|
3465 my $sl1 =
|
|
3466 Bio::EnsEMBL::CircularSlice->new(
|
|
3467 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3468 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3469 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3470 -START => $self->{'start'},
|
|
3471 -END => $self->{'seq_region_length'},
|
|
3472 -STRAND => $self->{'strand'},
|
|
3473 -ADAPTOR => $self->adaptor() );
|
|
3474
|
|
3475 my $sl2 =
|
|
3476 Bio::EnsEMBL::CircularSlice->new(
|
|
3477 -COORD_SYSTEM => $self->{'coord_system'},
|
|
3478 -SEQ_REGION_NAME => $self->{'seq_region_name'},
|
|
3479 -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
|
|
3480 -START => 1,
|
|
3481 -END => $self->{'end'},
|
|
3482 -STRAND => $self->{'strand'},
|
|
3483 -ADAPTOR => $self->adaptor() );
|
|
3484
|
|
3485 my ( @arr1, @arr2 );
|
|
3486 my $features_ref;
|
|
3487 @arr1 = @{ $adaptor_obj->fetch_all_by_Slice($sl1) };
|
|
3488 @arr2 = @{ $adaptor_obj->fetch_all_by_Slice($sl2) };
|
|
3489 push @{$features_ref}, @arr1, @arr2;
|
|
3490 ## circular EOF
|
|
3491 #my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
|
|
3492
|
|
3493 # add each feature to the hash to be returned
|
|
3494 foreach my $feature (@$features_ref) {
|
|
3495 $features{$adaptor_name} = $feature;
|
|
3496 }
|
|
3497 } ## end foreach my $adaptor_name ( ...)
|
|
3498
|
|
3499 return \%features;
|
|
3500
|
|
3501 } ## end sub get_generic_features
|
|
3502
|
|
3503 =head2 project_to_slice
|
|
3504
|
|
3505 Arg [1] : Slice to project to.
|
|
3506 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
|
|
3507 foreach my $segment ( @$chr_projection ){
|
|
3508 $chr_slice = $segment->to_Slice();
|
|
3509 print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'.
|
|
3510 $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start().
|
|
3511 '-'.$chr_slice->end().
|
|
3512 $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n";
|
|
3513 }
|
|
3514 Description: Projection of slice to another specific slice. Needed for where we have multiple mappings
|
|
3515 and we want to state which one to project to.
|
|
3516 Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
|
|
3517 can also be used as [$start,$end,$slice] triplets.
|
|
3518 Exceptions : none
|
|
3519 Caller : none
|
|
3520 Status : At Risk
|
|
3521
|
|
3522 =cut
|
|
3523
|
|
3524 sub project_to_slice {
|
|
3525 my $self = shift;
|
|
3526 my $to_slice = shift;
|
|
3527
|
|
3528 throw('Slice argument is required') if ( !$to_slice );
|
|
3529
|
|
3530 my $slice_adaptor = $self->adaptor();
|
|
3531
|
|
3532 if ( !$slice_adaptor ) {
|
|
3533 warning("Cannot project without attached adaptor.");
|
|
3534 return [];
|
|
3535 }
|
|
3536
|
|
3537 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
|
|
3538
|
|
3539 my $cs = $to_slice->coord_system();
|
|
3540 my $slice_cs = $self->coord_system();
|
|
3541
|
|
3542 my @projection;
|
|
3543 my $current_start = 1;
|
|
3544
|
|
3545 # decompose this slice into its symlinked components.
|
|
3546 # this allows us to handle haplotypes and PARs
|
|
3547 my $normal_slice_proj =
|
|
3548 $slice_adaptor->fetch_normalized_slice_projection($self);
|
|
3549 foreach my $segment (@$normal_slice_proj) {
|
|
3550 my $normal_slice = $segment->[2];
|
|
3551
|
|
3552 $slice_cs = $normal_slice->coord_system();
|
|
3553
|
|
3554 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
|
|
3555 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
|
|
3556
|
|
3557 # perform the mapping between this slice and the requested system
|
|
3558 my @coords;
|
|
3559
|
|
3560 if ( defined $asm_mapper ) {
|
|
3561 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
|
|
3562 $normal_slice->start(),
|
|
3563 $normal_slice->end(),
|
|
3564 $normal_slice->strand(),
|
|
3565 $slice_cs,
|
|
3566 undef,
|
|
3567 $to_slice );
|
|
3568 } else {
|
|
3569 $coords[0] =
|
|
3570 Bio::EnsEMBL::Mapper::Gap->new( $normal_slice->start(),
|
|
3571 $normal_slice->end() );
|
|
3572 }
|
|
3573
|
|
3574 #construct a projection from the mapping results and return it
|
|
3575 foreach my $coord (@coords) {
|
|
3576 my $coord_start = $coord->start();
|
|
3577 my $coord_end = $coord->end();
|
|
3578 my $length = $coord_end - $coord_start + 1;
|
|
3579
|
|
3580 #skip gaps
|
|
3581 if ( $coord->isa('Bio::EnsEMBL::Mapper::Coordinate') ) {
|
|
3582 my $coord_cs = $coord->coord_system();
|
|
3583
|
|
3584 # If the normalised projection just ended up mapping to the
|
|
3585 # same coordinate system we were already in then we should just
|
|
3586 # return the original region. This can happen for example, if we
|
|
3587 # were on a PAR region on Y which refered to X and a projection to
|
|
3588 # 'toplevel' was requested.
|
|
3589 # if($coord_cs->equals($slice_cs)) {
|
|
3590 # # trim off regions which are not defined
|
|
3591 # return $self->_constrain_to_region();
|
|
3592 # }
|
|
3593
|
|
3594 #create slices for the mapped-to coord system
|
|
3595 my $slice =
|
|
3596 $slice_adaptor->fetch_by_seq_region_id( $coord->id(),
|
|
3597 $coord_start, $coord_end, $coord->strand() );
|
|
3598
|
|
3599 my $current_end = $current_start + $length - 1;
|
|
3600
|
|
3601 push @projection,
|
|
3602 bless( [ $current_start, $current_end, $slice ],
|
|
3603 "Bio::EnsEMBL::ProjectionSegment" );
|
|
3604 }
|
|
3605
|
|
3606 $current_start += $length;
|
|
3607 } ## end foreach my $coord (@coords)
|
|
3608 } ## end foreach my $segment (@$normal_slice_proj)
|
|
3609
|
|
3610 # delete the cache as we may want to map to different set next time and old
|
|
3611 # results will be cached.
|
|
3612
|
|
3613 $mapper_aptr->delete_cache;
|
|
3614
|
|
3615 return \@projection;
|
|
3616 } ## end sub project_to_slice
|
|
3617
|
|
3618 #
|
|
3619 # Bioperl Bio::PrimarySeqI methods:
|
|
3620 #
|
|
3621
|
|
3622 =head2 id
|
|
3623
|
|
3624 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
|
|
3625
|
|
3626 =cut
|
|
3627
|
|
3628 sub id { name(@_); }
|
|
3629
|
|
3630 =head2 display_id
|
|
3631
|
|
3632 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3633
|
|
3634 =cut
|
|
3635
|
|
3636 sub display_id { name(@_); }
|
|
3637
|
|
3638 =head2 primary_id
|
|
3639
|
|
3640 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3641
|
|
3642 =cut
|
|
3643
|
|
3644 sub primary_id { name(@_); }
|
|
3645
|
|
3646 =head2 desc
|
|
3647
|
|
3648 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3649
|
|
3650 =cut
|
|
3651
|
|
3652 sub desc {
|
|
3653 return $_[0]->coord_system->name() . ' ' . $_[0]->seq_region_name();
|
|
3654 }
|
|
3655
|
|
3656 =head2 moltype
|
|
3657
|
|
3658 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
|
|
3659
|
|
3660 =cut
|
|
3661
|
|
3662 sub moltype { return 'dna'; }
|
|
3663
|
|
3664 =head2 alphabet
|
|
3665
|
|
3666 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3667
|
|
3668 =cut
|
|
3669
|
|
3670 sub alphabet { return 'dna'; }
|
|
3671
|
|
3672 =head2 accession_number
|
|
3673
|
|
3674 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3675
|
|
3676 =cut
|
|
3677
|
|
3678 sub accession_number { name(@_); }
|
|
3679
|
|
3680 =head2 is_circular
|
|
3681
|
|
3682 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
|
|
3683
|
|
3684 =cut
|
|
3685
|
|
3686 sub is_circular {
|
|
3687 my ($self) = @_;
|
|
3688
|
|
3689 if ( !defined( $self->{'circular'} ) ) {
|
|
3690 my @attrs =
|
|
3691 grep { $_ } @{ $self->get_all_Attributes('circular_seq') };
|
|
3692 $self->{'circular'} = @attrs ? 1 : 0;
|
|
3693 }
|
|
3694
|
|
3695 return $self->{'circular'};
|
|
3696 }
|
|
3697
|
|
3698 # sub DEPRECATED METHODS #
|
|
3699 ###############################################################################
|
|
3700
|
|
3701 =head1 DEPRECATED METHODS
|
|
3702
|
|
3703 =cut
|
|
3704
|
|
3705 =head2 get_all_supercontig_Slices
|
|
3706
|
|
3707 DEPRECATED use get_tiling_path("NTcontig") instead
|
|
3708
|
|
3709 =cut
|
|
3710
|
|
3711 sub get_all_supercontig_Slices {
|
|
3712 my $self = shift;
|
|
3713
|
|
3714 deprecate("Use get_tiling_path('NTcontig') instead");
|
|
3715
|
|
3716 my $result = [];
|
|
3717
|
|
3718 if ( $self->adaptor() ) {
|
|
3719 my $superctg_names =
|
|
3720 $self->adaptor()->list_overlapping_supercontigs($self);
|
|
3721
|
|
3722 for my $name (@$superctg_names) {
|
|
3723 my $slice;
|
|
3724 $slice = $self->adaptor()->fetch_by_supercontig_name($name);
|
|
3725 $slice->name($name);
|
|
3726 push( @$result, $slice );
|
|
3727 }
|
|
3728 } else {
|
|
3729 warning(
|
|
3730 "Slice needs to be attached to a database to get supercontigs");
|
|
3731 }
|
|
3732
|
|
3733 return $result;
|
|
3734 }
|
|
3735
|
|
3736 =head2 get_Chromosome
|
|
3737
|
|
3738 DEPRECATED use this instead:
|
|
3739 $slice_adp->fetch_by_region('chromosome',$slice->seq_region_name)
|
|
3740
|
|
3741 =cut
|
|
3742
|
|
3743 sub get_Chromosome {
|
|
3744 my $self = shift @_;
|
|
3745
|
|
3746 deprecate( "Use SliceAdaptor::fetch_by_region('chromosome',"
|
|
3747 . '$slice->seq_region_name) instead' );
|
|
3748
|
|
3749 my $csa = $self->adaptor->db->get_CoordSystemAdaptor();
|
|
3750 my ($top_cs) = @{ $csa->fetch_all() };
|
|
3751
|
|
3752 return
|
|
3753 $self->adaptor->fetch_by_region( $top_cs->name(),
|
|
3754 $self->seq_region_name(),
|
|
3755 undef, undef, undef, $top_cs->version() );
|
|
3756 }
|
|
3757
|
|
3758 =head2 chr_name
|
|
3759
|
|
3760 DEPRECATED use seq_region_name() instead
|
|
3761
|
|
3762 =cut
|
|
3763
|
|
3764 sub chr_name {
|
|
3765 deprecate("Use seq_region_name() instead");
|
|
3766 seq_region_name(@_);
|
|
3767 }
|
|
3768
|
|
3769 =head2 chr_start
|
|
3770
|
|
3771 DEPRECATED use start() instead
|
|
3772
|
|
3773 =cut
|
|
3774
|
|
3775 sub chr_start {
|
|
3776 deprecate('Use start() instead');
|
|
3777 start(@_);
|
|
3778 }
|
|
3779
|
|
3780 =head2 chr_end
|
|
3781
|
|
3782 DEPRECATED use end() instead
|
|
3783
|
|
3784 Returntype : int
|
|
3785 Exceptions : none
|
|
3786 Caller : SliceAdaptor, general
|
|
3787
|
|
3788 =cut
|
|
3789
|
|
3790 sub chr_end {
|
|
3791 deprecate('Use end() instead');
|
|
3792 end(@_);
|
|
3793 }
|
|
3794
|
|
3795 =head2 assembly_type
|
|
3796
|
|
3797 DEPRECATED use version instead
|
|
3798
|
|
3799 =cut
|
|
3800
|
|
3801 sub assembly_type {
|
|
3802 my $self = shift;
|
|
3803 deprecate('Use $slice->coord_system()->version() instead.');
|
|
3804 return $self->coord_system->version();
|
|
3805 }
|
|
3806
|
|
3807 =head2 get_tiling_path
|
|
3808
|
|
3809 DEPRECATED use project instead
|
|
3810
|
|
3811 =cut
|
|
3812
|
|
3813 sub get_tiling_path {
|
|
3814 my $self = shift;
|
|
3815 deprecate('Use $slice->project("seqlevel") instead.');
|
|
3816 return [];
|
|
3817 }
|
|
3818
|
|
3819 =head2 dbID
|
|
3820
|
|
3821 Description: DEPRECATED use SliceAdaptor::get_seq_region_id instead
|
|
3822
|
|
3823 =cut
|
|
3824
|
|
3825 sub dbID {
|
|
3826 my $self = shift;
|
|
3827 deprecate('Use SliceAdaptor::get_seq_region_id instead.');
|
|
3828 if ( !$self->adaptor ) {
|
|
3829 warning('Cannot retrieve seq_region_id without attached adaptor.');
|
|
3830 return 0;
|
|
3831 }
|
|
3832 return $self->adaptor->get_seq_region_id($self);
|
|
3833 }
|
|
3834
|
|
3835 =head2 get_all_MapFrags
|
|
3836
|
|
3837 DEPRECATED use get_all_MiscFeatures instead
|
|
3838
|
|
3839 =cut
|
|
3840
|
|
3841 sub get_all_MapFrags {
|
|
3842 my $self = shift;
|
|
3843 deprecate('Use get_all_MiscFeatures instead');
|
|
3844 return $self->get_all_MiscFeatures(@_);
|
|
3845 }
|
|
3846
|
|
3847 =head2 has_MapSet
|
|
3848
|
|
3849 DEPRECATED use get_all_MiscFeatures instead
|
|
3850
|
|
3851 =cut
|
|
3852
|
|
3853 sub has_MapSet {
|
|
3854 my ( $self, $mapset_name ) = @_;
|
|
3855 deprecate('Use get_all_MiscFeatures instead');
|
|
3856 my $mfs = $self->get_all_MiscFeatures($mapset_name);
|
|
3857 return ( @$mfs > 0 );
|
|
3858 }
|
|
3859
|
|
3860 1;
|