comparison variant_effect_predictor/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::DBSQL::SliceAdaptor - A database aware adaptor responsible for
24 the creation of Slice objects.
25
26 =head1 SYNOPSIS
27
28 use Bio::EnsEMBL::Utils::Slice qw(split_Slices);
29 use Bio::EnsEMBL::Registry;
30
31 Bio::EnsEMBL::Registry->load_registry_from_db(
32 -host => 'ensembldb.ensembl.org',
33 -user => 'anonymous'
34 );
35
36 $slice_adaptor =
37 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
38
39 # get a slice on the entire chromosome X
40 $chr_slice = $slice_adaptor->fetch_by_region( 'chromosome', 'X' );
41
42 # get a slice for each clone in the database
43 foreach $cln_slice ( @{ $slice_adaptor->fetch_all('clone') } ) {
44 # do something with clone
45 }
46
47 # get a slice which is part of NT_004321
48 $spctg_slice =
49 $slice_adaptor->fetch_by_region( 'supercontig', 'NT_004321',
50 200_000, 600_000 );
51
52 # get all non-redundant slices from the highest possible coordinate
53 # systems
54 $slices = $slice_adaptor->fetch_all('toplevel');
55
56 # include non-reference regions
57 $slices = $slice_adaptor->fetch_all( 'toplevel', undef, 1 );
58
59 # include non-duplicate regions
60 $slices = $slice_adaptor->fetch_all( 'toplevel', undef, 0, 1 );
61
62 # split up a list of slices into smaller slices
63 $overlap = 1000;
64 $max_length = 1e6;
65 $slices = split_Slices( $slices, $max_length, $overlap );
66
67 # store a list of slice names in a file
68 open( FILE, ">$filename" ) or die("Could not open file $filename");
69 foreach my $slice (@$slices) {
70 print FILE $slice->name(), "\n";
71 }
72 close FILE;
73
74 # retreive a list of slices from a file
75 open( FILE, $filename ) or die("Could not open file $filename");
76 while ( $name = <FILE> ) {
77 chomp($name);
78 $slice = $slice_adaptor->fetch_by_name($name);
79 # do something with slice
80 }
81
82 =head1 DESCRIPTION
83
84 This module is responsible for fetching Slices representing genomic
85 regions from a database. A Details on how slices can be used are in the
86 Bio::EnsEMBL::Slice module.
87
88 =head1 METHODS
89
90 =cut
91
92
93 package Bio::EnsEMBL::DBSQL::SliceAdaptor;
94 use vars qw(@ISA);
95 use strict;
96
97
98 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
99 use Bio::EnsEMBL::Slice;
100 use Bio::EnsEMBL::CircularSlice;
101 use Bio::EnsEMBL::Mapper;
102 use Bio::EnsEMBL::LRGSlice;
103 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
104 use Scalar::Util qw/looks_like_number/;
105
106 @ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
107
108 sub new {
109 my $caller = shift;
110
111 my $class = ref($caller) || $caller;
112
113 my $self = $class->SUPER::new(@_);
114
115 # use a cache which is shared and also used by the assembly
116 # mapper adaptor
117
118 my $seq_region_cache = $self->db->get_SeqRegionCache();
119
120 $self->{'sr_name_cache'} = $seq_region_cache->{'name_cache'};
121 $self->{'sr_id_cache'} = $seq_region_cache->{'id_cache'};
122
123 $self->{'lrg_region_test'} = undef;
124 my $meta_container = $self->db->get_MetaContainer();
125 my @values = $meta_container->list_value_by_key("LRG");
126 if(scalar(@values) and $values[0]->[0]){
127 $self->{'lrg_region_test'} = $values[0]->[0];
128 }
129 return $self;
130 }
131
132
133 =head2 fetch_by_region
134
135 Arg [1] : string $coord_system_name (optional)
136 The name of the coordinate system of the slice to be created
137 This may be a name of an actual coordinate system or an alias
138 to a coordinate system. Valid aliases are 'seqlevel' or
139 'toplevel'.
140 Arg [2] : string $seq_region_name
141 The name of the sequence region that the slice will be
142 created on.
143 Arg [3] : int $start (optional, default = 1)
144 The start of the slice on the sequence region
145 Arg [4] : int $end (optional, default = seq_region length)
146 The end of the slice on the sequence region
147 Arg [5] : int $strand (optional, default = 1)
148 The orientation of the slice on the sequence region
149 Arg [6] : string $version (optional, default = default version)
150 The version of the coordinate system to use (e.g. NCBI33)
151 Arg [7] : boolean $no_fuzz (optional, default = undef (false))
152 If true (non-zero), do not use "fuzzy matching" (see below).
153 Example : $slice = $slice_adaptor->fetch_by_region('chromosome', 'X');
154 $slice = $slice_adaptor->fetch_by_region('clone', 'AC008066.4');
155 Description: Retrieves a slice on the requested region. At a minimum the
156 name the name of the seq_region to fetch must be provided.
157
158 If no coordinate system name is provided than a slice on the
159 highest ranked coordinate system with a matching
160 seq_region_name will be returned. If a version but no
161 coordinate system name is provided, the same behaviour will
162 apply, but only coordinate systems of the appropriate version
163 are considered. The same applies if the 'toplevel' coordinate
164 system is specified, however in this case the version is
165 ignored. The coordinate system should always be specified if
166 it is known, since this is unambiguous and faster.
167
168 Some fuzzy matching is performed if no exact match for
169 the provided name is found. This allows clones to be
170 fetched even when their version is not known. For
171 example fetch_by_region('clone', 'AC008066') will
172 retrieve the sequence_region with name 'AC008066.4'.
173
174 The fuzzy matching can be turned off by setting the
175 $no_fuzz argument to a true value.
176
177 If the requested seq_region is not found in the database undef
178 is returned.
179
180 Returntype : Bio::EnsEMBL::Slice or undef
181 Exceptions : throw if no seq_region_name is provided
182 throw if invalid coord_system_name is provided
183 throw if start > end is provided
184 Caller : general
185 Status : Stable
186
187 =cut
188
189
190 #
191 # ARNE: This subroutine needs simplification!!
192 #
193 sub fetch_by_region {
194 my ( $self, $coord_system_name, $seq_region_name, $start, $end,
195 $strand, $version, $no_fuzz )
196 = @_;
197
198 if ( !defined($start) ) { $start = 1 }
199 if ( !defined($strand) ) { $strand = 1 }
200
201 if ( !defined($seq_region_name) ) {
202 throw('seq_region_name argument is required');
203 }
204
205 my $cs;
206 my $csa = $self->db->get_CoordSystemAdaptor();
207
208 if ( defined($coord_system_name) ) {
209 $cs = $csa->fetch_by_name( $coord_system_name, $version );
210
211 ## REMOVE THESE THREE LINES WHEN STICKLEBACK DB IS FIXED!
212 ## Anne/ap5 (2007-10-09):
213 # The problem was that the stickleback genebuild called the
214 # chromosomes 'groups', which meant they weren't being picked out by
215 # the karyotype drawing code. Apparently they are usually called
216 # 'groups' in the stickleback community, even though they really are
217 # chromosomes!
218
219 if ( !defined($cs) && $coord_system_name eq 'chromosome' ) {
220 $cs = $csa->fetch_by_name( 'group', $version );
221 }
222
223 if ( !defined($cs) ) {
224 throw( sprintf( "Unknown coordinate system:\n"
225 . "name='%s' version='%s'\n",
226 $coord_system_name, $version ) );
227 }
228
229 # fetching by toplevel is same as fetching w/o name or version
230 if ( $cs->is_top_level() ) {
231 $cs = undef;
232 $version = undef;
233 }
234
235 } ## end if ( defined($coord_system_name...))
236
237 my $constraint;
238 my $sql;
239 my @bind_params;
240 my $key;
241
242 if ( defined($cs) ) {
243 $sql = sprintf( "SELECT sr.name, sr.seq_region_id, sr.length, %d "
244 . "FROM seq_region sr ",
245 $cs->dbID() );
246
247 $constraint = "AND sr.coord_system_id = ?";
248 push( @bind_params, [ $cs->dbID(), SQL_INTEGER ] );
249
250 $key = "$seq_region_name:" . $cs->dbID();
251 } else {
252 $sql =
253 "SELECT sr.name, sr.seq_region_id, sr.length, cs.coord_system_id "
254 . "FROM seq_region sr, coord_system cs ";
255
256 $constraint = "AND sr.coord_system_id = cs.coord_system_id "
257 . "AND cs.species_id = ? ";
258 push( @bind_params, [ $self->species_id(), SQL_INTEGER ] );
259
260 if ( defined($version) ) {
261 $constraint .= "AND cs.version = ? ";
262 push( @bind_params, [ $version, SQL_VARCHAR ] );
263 }
264
265 $constraint .= "ORDER BY cs.rank ASC";
266 }
267
268 # check the cache so we only go to the db if necessary
269 my $length;
270 my $arr;
271
272 if ( defined($key) ) { $arr = $self->{'sr_name_cache'}->{$key} }
273
274 if ( defined($arr) ) {
275 $length = $arr->[3];
276 } else {
277 my $sth =
278 $self->prepare( $sql . "WHERE sr.name = ? " . $constraint );
279
280 unshift( @bind_params, [ $seq_region_name, SQL_VARCHAR ] );
281
282 my $pos = 0;
283 foreach my $param (@bind_params) {
284 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
285 }
286
287 $sth->execute();
288
289 if ( $sth->rows() == 0 ) {
290 $sth->finish();
291
292
293 # try synonyms
294 my $syn_sql_sth = $self->prepare("select s.name from seq_region s, seq_region_synonym ss where s.seq_region_id = ss.seq_region_id and ss.synonym = ?");
295 $syn_sql_sth->bind_param(1, $seq_region_name, SQL_VARCHAR);
296 $syn_sql_sth->execute();
297 my $new_name;
298 $syn_sql_sth->bind_columns( \$new_name);
299 if($syn_sql_sth->fetch){
300 $syn_sql_sth->finish;
301 return $self->fetch_by_region($coord_system_name, $new_name, $start, $end, $strand, $version, $no_fuzz);
302 }
303 $syn_sql_sth->finish;
304
305
306 if ($no_fuzz) { return undef }
307
308 # Do fuzzy matching, assuming that we are just missing a version
309 # on the end of the seq_region name.
310
311 $sth =
312 $self->prepare( $sql . " WHERE sr.name LIKE ? " . $constraint );
313
314 $bind_params[0] =
315 [ sprintf( '%s.%%', $seq_region_name ), SQL_VARCHAR ];
316
317 $pos = 0;
318 foreach my $param (@bind_params) {
319 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
320 }
321
322 $sth->execute();
323
324 my $prefix_len = length($seq_region_name) + 1;
325 my $high_ver = undef;
326 my $high_cs = $cs;
327
328 # Find the fuzzy-matched seq_region with the highest postfix
329 # (which ought to be a version).
330
331 my ( $tmp_name, $id, $tmp_length, $cs_id );
332 $sth->bind_columns( \( $tmp_name, $id, $tmp_length, $cs_id ) );
333
334 my $i = 0;
335
336 while ( $sth->fetch ) {
337 my $tmp_cs =
338 ( defined($cs) ? $cs : $csa->fetch_by_dbID($cs_id) );
339
340 # cache values for future reference
341 my $arr = [ $id, $tmp_name, $cs_id, $tmp_length ];
342 $self->{'sr_name_cache'}->{"$tmp_name:$cs_id"} = $arr;
343 $self->{'sr_id_cache'}->{"$id"} = $arr;
344
345 my $tmp_ver = substr( $tmp_name, $prefix_len );
346
347 # skip versions which are non-numeric and apparently not
348 # versions
349 if ( $tmp_ver !~ /^\d+$/ ) { next }
350
351 # take version with highest num, if two versions match take one
352 # with highest ranked coord system (lowest num)
353 if ( !defined($high_ver)
354 || $tmp_ver > $high_ver
355 || ( $tmp_ver == $high_ver && $tmp_cs->rank < $high_cs->rank )
356 )
357 {
358 $seq_region_name = $tmp_name;
359 $length = $tmp_length;
360 $high_ver = $tmp_ver;
361 $high_cs = $tmp_cs;
362 }
363
364 $i++;
365 } ## end while ( $sth->fetch )
366 $sth->finish();
367
368 # warn if fuzzy matching found more than one result
369 if ( $i > 1 ) {
370 warning(
371 sprintf(
372 "Fuzzy matching of seq_region_name "
373 . "returned more than one result.\n"
374 . "You might want to check whether the returned seq_region\n"
375 . "(%s:%s) is the one you intended to fetch.\n",
376 $high_cs->name(), $seq_region_name ) );
377 }
378
379 $cs = $high_cs;
380
381 # return if we did not find any appropriate match:
382 if ( !defined($high_ver) ) { return undef }
383
384 } else {
385
386 my ( $id, $cs_id );
387 ( $seq_region_name, $id, $length, $cs_id ) =
388 $sth->fetchrow_array();
389 $sth->finish();
390
391 # cache to speed up for future queries
392 my $arr = [ $id, $seq_region_name, $cs_id, $length ];
393 $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"} = $arr;
394 $self->{'sr_id_cache'}->{"$id"} = $arr;
395 $cs = $csa->fetch_by_dbID($cs_id);
396 }
397 } ## end else [ if ( defined($arr) ) ]
398
399 if ( !defined($end) ) { $end = $length }
400
401 #If this was given then check if we've got a circular seq region otherwise
402 #let it fall through to the normal Slice method
403 if ( $end + 1 < $start ) {
404 my $cs_id = $cs->dbID();
405 my $seq_region_id = $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"}->[0];
406 if($self->is_circular($seq_region_id)) {
407 my $new_sl =
408 Bio::EnsEMBL::CircularSlice->new(
409 -COORD_SYSTEM => $cs,
410 -SEQ_REGION_NAME => $seq_region_name,
411 -SEQ_REGION_LENGTH => $length,
412 -START => $start,
413 -END => $end,
414 -STRAND => 1,
415 -ADAPTOR => $self );
416
417 return $new_sl;
418 }
419 }
420
421 if ( defined( $self->{'lrg_region_test'} )
422 and substr( $cs->name, 0, 3 ) eq $self->{'lrg_region_test'} )
423 {
424 return
425 Bio::EnsEMBL::LRGSlice->new( -COORD_SYSTEM => $cs,
426 -SEQ_REGION_NAME => $seq_region_name,
427 -SEQ_REGION_LENGTH => $length,
428 -START => $start,
429 -END => $end,
430 -STRAND => $strand,
431 -ADAPTOR => $self );
432 } else {
433 return
434 Bio::EnsEMBL::Slice->new_fast( {
435 'coord_system' => $cs,
436 'seq_region_name' => $seq_region_name,
437 'seq_region_length' => $length,
438 'start' => $start,
439 'end' => $end,
440 'strand' => $strand,
441 'adaptor' => $self } );
442 }
443 } ## end sub fetch_by_region
444
445 =head2 fetch_by_toplevel_location
446
447 Arg [1] : string $location
448 Ensembl formatted location. Can be a format like
449 C<name:start-end>, C<name:start..end>, C<name:start:end>,
450 C<name:start>, C<name>. We can also support strand
451 specification as a +/- or 1/-1.
452
453 Location names must be separated by a C<:>. All others can be
454 separated by C<..>, C<:> or C<->.
455 Arg[2] : boolean $no_warnings
456 Suppress warnings from this method
457 Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
458 my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1')
459 Description : Converts an Ensembl location/region into the sequence region
460 name, start and end and passes them onto C<fetch_by_region()>.
461 The code assumes that the required slice is on the top level
462 coordinate system. The code assumes that location formatting
463 is not perfect and will perform basic cleanup before parsing.
464 Returntype : Bio::EnsEMBL::Slice
465 Exceptions : If $location is false otherwise see C<fetch_by_region()>
466 Caller : General
467 Status : Beta
468
469 =cut
470
471 sub fetch_by_toplevel_location {
472 my ($self, $location, $no_warnings) = @_;
473
474 my ($seq_region_name, $start, $end, $strand) = $self->parse_location_to_values($location, $no_warnings);
475
476 if(! $seq_region_name) {
477 return;
478 }
479
480 if(defined $start && defined $end && $start > $end) {
481 throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end";
482 }
483
484 my $coord_system_name = 'toplevel';
485 my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, undef, 0);
486 return unless $slice;
487
488 my $srl = $slice->seq_region_length();
489 my $name = $slice->seq_region_name();
490 if(defined $start && $start > $srl) {
491 throw "Cannot request a slice whose start ($start) is greater than $srl for $name.";
492 }
493 if(defined $end && $end > $srl) {
494 warning "Requested end ($end) is greater than $srl for $name. Resetting to $srl" if ! $no_warnings;
495 $slice->{end} = $srl;
496 }
497
498 return $slice;
499 }
500
501 =head2 parse_location_to_values
502
503 Arg [1] : string $location
504 Ensembl formatted location. Can be a format like
505 C<name:start-end>, C<name:start..end>, C<name:start:end>,
506 C<name:start>, C<name>. We can also support strand
507 specification as a +/- or 1/-1.
508
509 Location names must be separated by a C<:>. All others can be
510 separated by C<..>, C<:> or C<->.
511 Arg[2] : boolean $no_warnings
512 Suppress warnings from this method
513 Example : my ($name, $start, $end, $strand) = $sa->parse_location_to_values('X:1..100:1);
514 Description : Takes in an Ensembl location String and returns the parsed
515 values
516 Returntype : List. Contains name, start, end and strand
517
518 =cut
519
520
521 sub parse_location_to_values {
522 my ($self, $location, $no_warnings, $no_errors) = @_;
523
524 throw 'You must specify a location' if ! $location;
525
526 #cleanup any nomenclature like 1_000 or 1 000 or 1,000
527 my $number_seps_regex = qr/\s+|,|_/;
528 my $separator_regex = qr/(?:-|[.]{2}|\:)?/;
529 my $number_regex = qr/[0-9,_ E]+/xms;
530 my $strand_regex = qr/[+-1]|-1/xms;
531
532 my $regex = qr/^(\w+) \s* :? \s* ($number_regex)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms;
533 my ($seq_region_name, $start, $end, $strand);
534 if(($seq_region_name, $start, $end, $strand) = $location =~ $regex) {
535
536 if(defined $strand) {
537 if(!looks_like_number($strand)) {
538 $strand = ($strand eq '+') ? 1 : -1;
539 }
540 }
541
542 if(defined $start) {
543 $start =~ s/$number_seps_regex//g;
544 if($start < 1) {
545 warning "Start was less than 1 (${start}) which is not allowed. Resetting to 1" if ! $no_warnings;
546 $start = 1;
547 }
548 }
549 if(defined $end) {
550 $end =~ s/$number_seps_regex//g;
551 if($end < 1) {
552 throw "Cannot request negative or 0 end indexes through this interface. Given $end but expected something greater than 0" unless $no_errors;
553 }
554 }
555
556 if(defined $start && defined $end && $start > $end) {
557 throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end" unless $no_errors;
558 }
559 }
560
561 return ($seq_region_name, $start, $end, $strand);
562 }
563
564 =head2 fetch_by_region_unique
565
566 Arg [1] : string $coord_system_name (optional)
567 The name of the coordinate system of the slice to be created
568 This may be a name of an actual coordinate system or an alias
569 to a coordinate system. Valid aliases are 'seqlevel' or
570 'toplevel'.
571 Arg [2] : string $seq_region_name
572 The name of the sequence region that the slice will be
573 created on.
574 Arg [3] : int $start (optional, default = 1)
575 The start of the slice on the sequence region
576 Arg [4] : int $end (optional, default = seq_region length)
577 The end of the slice on the sequence region
578 Arg [5] : int $strand (optional, default = 1)
579 The orientation of the slice on the sequence region
580 Arg [6] : string $version (optional, default = default version)
581 The version of the coordinate system to use (e.g. NCBI33)
582 Arg [7] : boolean $no_fuzz (optional, default = undef (false))
583 If true (non-zero), do not use "fuzzy matching" (see below).
584 Example : $slice = $slice_adaptor->fetch_by_region_unique('chromosome', 'HSCHR6_MHC_COX');
585 Description: Retrieves a slice on the requested region but returns only the unique
586 parts of the slice. At a minimum the
587 name the name of the seq_region to fetch must be provided.
588
589 If no coordinate system name is provided than a slice on the
590 highest ranked coordinate system with a matching
591 seq_region_name will be returned. If a version but no
592 coordinate system name is provided, the same behaviour will
593 apply, but only coordinate systems of the appropriate version
594 are considered. The same applies if the 'toplevel' coordinate
595 system is specified, however in this case the version is
596 ignored. The coordinate system should always be specified if
597 it is known, since this is unambiguous and faster.
598
599 Some fuzzy matching is performed if no exact match for
600 the provided name is found. This allows clones to be
601 fetched even when their version is not known. For
602 example fetch_by_region('clone', 'AC008066') will
603 retrieve the sequence_region with name 'AC008066.4'.
604
605 The fuzzy matching can be turned off by setting the
606 $no_fuzz argument to a true value.
607
608 If the requested seq_region is not found in the database undef
609 is returned.
610
611 Returntype : listref Bio::EnsEMBL::Slice
612 Exceptions : throw if no seq_region_name is provided
613 throw if invalid coord_system_name is provided
614 throw if start > end is provided
615 Caller : general
616 Status : Stable
617
618 =cut
619
620 sub fetch_by_region_unique {
621 my $self = shift;
622
623 my @out = ();
624 my $slice = $self->fetch_by_region(@_);
625
626
627 if ( !exists( $self->{'asm_exc_cache'} ) ) {
628 $self->_build_exception_cache();
629 }
630
631 if ( exists(
632 $self->{'asm_exc_cache'}->{ $self->get_seq_region_id($slice) }
633 ) )
634 {
635 # Dereference symlinked assembly regions. Take out any regions
636 # which are symlinked because these are duplicates.
637 my @projection =
638 @{ $self->fetch_normalized_slice_projection($slice) };
639
640 foreach my $segment (@projection) {
641 if ( $segment->[2]->seq_region_name() eq $slice->seq_region_name()
642 && $segment->[2]->coord_system->equals( $slice->coord_system ) )
643 {
644 push( @out, $segment->[2] );
645 }
646 }
647 }
648
649 return \@out;
650 } ## end sub fetch_by_region_unique
651
652 =head2 fetch_by_name
653
654 Arg [1] : string $name
655 Example : $name = 'chromosome:NCBI34:X:1000000:2000000:1';
656 $slice = $slice_adaptor->fetch_by_name($name);
657 $slice2 = $slice_adaptor->fetch_by_name($slice3->name());
658 Description: Fetches a slice using a slice name (i.e. the value returned by
659 the Slice::name method). This is useful if you wish to
660 store a unique identifier for a slice in a file or database or
661 pass a slice over a network.
662 Slice::name allows you to serialise/marshall a slice and this
663 method allows you to deserialise/unmarshal it.
664
665 Returns undef if no seq_region with the provided name exists in
666 the database.
667
668 Returntype : Bio::EnsEMBL::Slice or undef
669 Exceptions : throw if incorrent arg provided
670 Caller : Pipeline
671 Status : Stable
672
673 =cut
674
675 sub fetch_by_name {
676 my $self = shift;
677 my $name = shift;
678
679 if(!$name) {
680 throw("name argument is required");
681 }
682
683 my @array = split(/:/,$name);
684
685 if(scalar(@array) < 3 || scalar(@array) > 6) {
686 throw("Malformed slice name [$name]. Format is " .
687 "coord_system:version:name:start:end:strand");
688 }
689
690 # Rearrange arguments to suit fetch_by_region
691
692 my @targetarray;
693
694 $targetarray[0]=$array[0];
695 $targetarray[5]=(($array[1]&&$array[1] ne "")?$array[1]:undef);
696 $targetarray[1]=(($array[2]&&$array[2] ne "")?$array[2]:undef);
697 $targetarray[2]=(($array[3]&&$array[3] ne "")?$array[3]:undef);
698 $targetarray[3]=(($array[4]&&$array[4] ne "")?$array[4]:undef);
699 $targetarray[4]=(($array[5]&&$array[5] ne "")?$array[5]:undef);
700 return $self->fetch_by_region(@targetarray);
701 }
702
703
704
705 =head2 fetch_by_seq_region_id
706
707 Arg [1] : string $seq_region_id
708 The internal identifier of the seq_region to create this slice
709 on
710 Arg [2] : optional start
711 Arg [3] : optional end
712 Arg [4] : optional strand
713 Example : $slice = $slice_adaptor->fetch_by_seq_region_id(34413);
714 Description: Creates a slice object of an entire seq_region using the
715 seq_region internal identifier to resolve the seq_region.
716 Returns undef if no such slice exists.
717 Returntype : Bio::EnsEMBL::Slice or undef
718 Exceptions : none
719 Caller : general
720 Status : Stable
721
722 =cut
723
724 sub fetch_by_seq_region_id {
725 my ( $self, $seq_region_id, $start, $end, $strand ) = @_;
726
727 my $arr = $self->{'sr_id_cache'}->{$seq_region_id};
728 my ( $name, $length, $cs, $cs_id );
729
730
731 if ( $arr && defined( $arr->[2] ) ) {
732 ( $name, $cs_id, $length ) = ( $arr->[1], $arr->[2], $arr->[3] );
733 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
734 } else {
735 my $sth =
736 $self->prepare( "SELECT sr.name, sr.coord_system_id, sr.length "
737 . "FROM seq_region sr "
738 . "WHERE sr.seq_region_id = ? " );
739
740 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
741 $sth->execute();
742
743 if ( $sth->rows() == 0 ) { return undef }
744
745 ( $name, $cs_id, $length ) = $sth->fetchrow_array();
746 $sth->finish();
747
748 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
749
750 #cache results to speed up repeated queries
751 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
752
753 $self->{'sr_name_cache'}->{"$name:$cs_id"} = $arr;
754 $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
755 }
756
757 return
758 Bio::EnsEMBL::Slice->new_fast({
759 'coord_system' => $cs,
760 'seq_region_name' => $name,
761 'seq_region_length'=> $length,
762 'start' => $start || 1,
763 'end' => $end || $length,
764 'strand' => $strand || 1,
765 'adaptor' => $self} );
766 } ## end sub fetch_by_seq_region_id
767
768
769
770 =head2 get_seq_region_id
771
772 Arg [1] : Bio::EnsEMBL::Slice $slice
773 The slice to fetch a seq_region_id for
774 Example : $srid = $slice_adaptor->get_seq_region_id($slice);
775 Description: Retrieves the seq_region id (in this database) given a slice
776 Seq region ids are not stored on the slices themselves
777 because they are intended to be somewhat database independant
778 and seq_region_ids vary accross databases.
779 Returntype : int
780 Exceptions : throw if the seq_region of the slice is not in the db
781 throw if incorrect arg provided
782 Caller : BaseFeatureAdaptor
783 Status : Stable
784
785 =cut
786
787 sub get_seq_region_id {
788 my $self = shift;
789 my $slice = shift;
790
791 if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
792 throw('Slice argument is required');
793 }
794
795 my $seq_region_name = $slice->seq_region_name();
796 my $key = $seq_region_name.":".$slice->coord_system->dbID();
797 my $arr = $self->{'sr_name_cache'}->{"$key"};
798
799 if( $arr ) {
800 return $arr->[0];
801 }
802
803 my $cs_id = $slice->coord_system->dbID();
804
805 my $sth = $self->prepare("SELECT seq_region_id, length " .
806 "FROM seq_region " .
807 "WHERE name = ? AND coord_system_id = ?");
808
809 #force seq_region_name cast to string so mysql cannot treat as int
810 $sth->bind_param(1,"$seq_region_name",SQL_VARCHAR);
811 $sth->bind_param(2,$cs_id,SQL_INTEGER);
812 $sth->execute();
813
814 if($sth->rows() != 1) {
815 throw("Non existant or ambigous seq_region:\n" .
816 " coord_system=[$cs_id],\n" .
817 " name=[$seq_region_name],\n");
818
819 }
820
821 my($seq_region_id, $length) = $sth->fetchrow_array();
822 $sth->finish();
823
824 #cache information for future requests
825 $arr = [ $seq_region_id, $seq_region_name, $cs_id, $length ];
826
827 $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"} = $arr;
828 $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
829
830 return $seq_region_id;
831 }
832
833
834
835 =head2 fetch_all
836
837 Arg [1] : string $coord_system_name
838 The name of the coordinate system to retrieve slices of.
839 This may be a name of an acutal coordinate system or an alias
840 to a coordinate system. Valid aliases are 'seqlevel' or
841 'toplevel'.
842 Arg [2] : string $coord_system_version (optional)
843 The version of the coordinate system to retrieve slices of
844 Arg [3] : bool $include_non_reference (optional)
845 If this argument is not provided then only reference slices
846 will be returned. If set, both reference and non refeference
847 slices will be rerurned.
848 Arg [4] : int $include_duplicates (optional)
849 If set duplicate regions will be returned.
850
851 NOTE: if you do not use this option and you have a PAR
852 (pseudo-autosomal region) at the beginning of your seq_region
853 then your slice will not start at position 1, so coordinates
854 retrieved from this slice might not be what you expected.
855
856 Arg[5] : bool $include_lrg (optional) (default 0)
857 If set lrg regions will be returned aswell.
858
859
860 Example : @chromos = @{$slice_adaptor->fetch_all('chromosome','NCBI33')};
861 @contigs = @{$slice_adaptor->fetch_all('contig')};
862
863 # get even non-reference regions
864 @slices = @{$slice_adaptor->fetch_all('toplevel',undef,1)};
865
866 # include duplicate regions (such as pseudo autosomal regions)
867 @slices = @{$slice_adaptor->fetch_all('toplevel', undef,0,1)};
868
869 Description: Retrieves slices of all seq_regions for a given coordinate
870 system. This is analagous to the methods fetch_all which were
871 formerly on the ChromosomeAdaptor, RawContigAdaptor and
872 CloneAdaptor classes. Slices fetched span the entire
873 seq_regions and are on the forward strand.
874 If the coordinate system with the provided name and version
875 does not exist an empty list is returned.
876 If the coordinate system name provided is 'toplevel', all
877 non-redundant toplevel slices are returned (note that any
878 coord_system_version argument is ignored in that case).
879
880 Retrieved slices can be broken into smaller slices using the
881 Bio::EnsEMBL::Utils::Slice module.
882
883 Returntype : listref of Bio::EnsEMBL::Slices
884 Exceptions : none
885 Caller : general
886 Status : Stable
887
888 =cut
889
890 sub fetch_all {
891 my $self = shift;
892 my $cs_name = shift;
893 my $cs_version = shift || '';
894
895 my ($include_non_reference, $include_duplicates, $include_lrg) = @_;
896
897 #
898 # verify existance of requested coord system and get its id
899 #
900 my $csa = $self->db->get_CoordSystemAdaptor();
901 my $orig_cs = $csa->fetch_by_name($cs_name, $cs_version);
902
903 return [] if ( !$orig_cs );
904
905 my %bad_vals=();
906
907
908 #
909 # Get a hash of non reference seq regions
910 #
911 if ( !$include_non_reference ) {
912 my $sth =
913 $self->prepare( 'SELECT sr.seq_region_id '
914 . 'FROM seq_region sr, seq_region_attrib sra, '
915 . 'attrib_type at, coord_system cs '
916 . 'WHERE at.code = "non_ref" '
917 . 'AND sra.seq_region_id = sr.seq_region_id '
918 . 'AND at.attrib_type_id = sra.attrib_type_id '
919 . 'AND sr.coord_system_id = cs.coord_system_id '
920 . 'AND cs.species_id = ?' );
921
922 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
923 $sth->execute();
924
925 my ($seq_region_id);
926 $sth->bind_columns( \$seq_region_id );
927
928 while ( $sth->fetch() ) {
929 $bad_vals{$seq_region_id} = 1;
930 }
931 }
932
933 #
934 # if we do not want lrg's then add them to the bad list;
935 #
936 if ( !$include_lrg ) {
937 my $sth =
938 $self->prepare( 'SELECT sr.seq_region_id '
939 . 'FROM seq_region sr, seq_region_attrib sra, '
940 . 'attrib_type at, coord_system cs '
941 . 'WHERE at.code = "LRG" '
942 . 'AND sra.seq_region_id = sr.seq_region_id '
943 . 'AND at.attrib_type_id = sra.attrib_type_id '
944 . 'AND sr.coord_system_id = cs.coord_system_id '
945 . 'AND cs.species_id = ?' );
946
947 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
948 $sth->execute();
949
950 my ($seq_region_id);
951 $sth->bind_columns( \$seq_region_id );
952
953 while ( $sth->fetch() ) {
954 $bad_vals{$seq_region_id} = 1;
955 }
956 }
957
958 #
959 # Retrieve the seq_regions from the database
960 #
961
962 my $sth;
963 if ( $orig_cs->is_top_level() ) {
964 $sth =
965 $self->prepare( 'SELECT sr.seq_region_id, sr.name, '
966 . 'sr.length, sr.coord_system_id '
967 . 'FROM seq_region sr, seq_region_attrib sra, '
968 . 'attrib_type at, coord_system cs '
969 . 'WHERE at.code = "toplevel" '
970 . 'AND at.attrib_type_id = sra.attrib_type_id '
971 . 'AND sra.seq_region_id = sr.seq_region_id '
972 . 'AND sr.coord_system_id = cs.coord_system_id '
973 . 'AND cs.species_id = ?' );
974
975 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
976 $sth->execute();
977 } else {
978 $sth =
979 $self->prepare( 'SELECT sr.seq_region_id, sr.name, '
980 . 'sr.length, sr.coord_system_id '
981 . 'FROM seq_region sr '
982 . 'WHERE sr.coord_system_id = ?' );
983
984 $sth->bind_param( 1, $orig_cs->dbID, SQL_INTEGER );
985 $sth->execute();
986 }
987
988 my ( $seq_region_id, $name, $length, $cs_id );
989 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) );
990
991 my $cache_count = 0;
992
993 my @out;
994 while($sth->fetch()) {
995 if(!defined($bad_vals{$seq_region_id})){
996 my $cs = $csa->fetch_by_dbID($cs_id);
997
998 if(!$cs) {
999 throw("seq_region $name references non-existent coord_system $cs_id.");
1000 }
1001
1002 #cache values for future reference, but stop adding to the cache once we
1003 #we know we have filled it up
1004 if($cache_count < $Bio::EnsEMBL::Utils::SeqRegionCache::SEQ_REGION_CACHE_SIZE) {
1005 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
1006
1007 $self->{'sr_name_cache'}->{"$name:$cs_id"} = $arr;
1008 $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
1009
1010 $cache_count++;
1011 }
1012
1013 my $slice = Bio::EnsEMBL::Slice->new_fast({
1014 'start' => 1,
1015 'end' => $length,
1016 'strand' => 1,
1017 'seq_region_name' => $name,
1018 'seq_region_length'=> $length,
1019 'coord_system' => $cs,
1020 'adaptor' => $self});
1021
1022 if(!defined($include_duplicates) or !$include_duplicates){
1023 # test if this slice *could* have a duplicate (exception) region
1024 $self->_build_exception_cache() if(!exists $self->{'asm_exc_cache'});
1025 if(exists $self->{asm_exc_cache}->{$seq_region_id}) {
1026
1027 # Dereference symlinked assembly regions. Take out
1028 # any regions which are symlinked because these are duplicates
1029 my @projection = @{$self->fetch_normalized_slice_projection($slice)};
1030 foreach my $segment ( @projection) {
1031 if($segment->[2]->seq_region_name() eq $slice->seq_region_name() &&
1032 $segment->[2]->coord_system->equals($slice->coord_system)) {
1033 push @out, $segment->[2];
1034 }
1035 }
1036 } else {
1037 # no duplicate regions
1038 push @out, $slice;
1039 }
1040 } else {
1041 # we want duplicates anyway so do not do any checks
1042 push @out, $slice;
1043 }
1044 }
1045 }
1046
1047 return \@out;
1048 }
1049
1050 =head2 is_toplevel
1051 Arg : int seq_region_id
1052 Example : my $top = $slice_adptor->is_toplevel($seq_region_id)
1053 Description: Returns 1 if slice is a toplevel slice else 0
1054 Returntype : int
1055 Caller : Slice method is_toplevel
1056 Status : At Risk
1057
1058 =cut
1059
1060 sub is_toplevel {
1061 my $self = shift;
1062 my $id = shift;
1063
1064 my $sth = $self->prepare(
1065 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1066 . "WHERE sra.seq_region_id = ? "
1067 . "AND at.attrib_type_id = sra.attrib_type_id "
1068 . "AND at.code = 'toplevel'" );
1069
1070 $sth->bind_param( 1, $id, SQL_INTEGER );
1071 $sth->execute();
1072
1073 my $code;
1074 $sth->bind_columns( \$code );
1075
1076 while ( $sth->fetch ) {
1077 $sth->finish;
1078 return 1;
1079 }
1080
1081 $sth->finish;
1082 return 0;
1083 }
1084
1085 =head2 is_reference
1086 Arg : int seq_region_id
1087 Example : my $reference = $slice_adptor->is_reference($seq_region_id)
1088 Description: Returns 1 if slice is a reference slice else 0
1089 Returntype : int
1090 Caller : Slice method is_reference
1091 Status : At Risk
1092
1093 =cut
1094
1095 sub is_reference {
1096 my $self = shift;
1097 my $id = shift;
1098
1099 my $sth = $self->prepare(
1100 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1101 . "WHERE sra.seq_region_id = ? "
1102 . "AND at.attrib_type_id = sra.attrib_type_id "
1103 . "AND at.code = 'non_ref'" );
1104
1105 $sth->bind_param( 1, $id, SQL_INTEGER );
1106 $sth->execute();
1107
1108 my $code;
1109 $sth->bind_columns( \$code );
1110
1111 while ( $sth->fetch ) {
1112 $sth->finish;
1113 return 0;
1114 }
1115
1116 $sth->finish;
1117 return 1;
1118 }
1119
1120 =head2 is_circular
1121
1122 Arg[1] : int seq_region_id
1123 Example : my $circular = $slice_adptor->is_circular($seq_region_id);
1124 Description : Indicates if the sequence region was circular or not
1125 Returntype : Boolean
1126
1127 =cut
1128
1129 sub is_circular {
1130 my ($self, $id) = @_;
1131
1132 if (! defined $self->{is_circular}) {
1133 $self->_build_circular_slice_cache();
1134 }
1135
1136 return 0 if $self->{is_circular} == 0;
1137 return (exists $self->{circular_sr_id_cache}->{$id}) ? 1 : 0;
1138 }
1139
1140 =head2 fetch_by_band
1141
1142 Title : fetch_by_band
1143 Usage :
1144 Function: Does not work please use fetch_by_chr_band
1145 Example :
1146 Returns : Bio::EnsEMBL::Slice
1147 Args : the band name
1148 Status : AT RISK
1149
1150 =cut
1151
1152 sub fetch_by_band {
1153 my ($self,$band) = @_;
1154
1155 my $sth = $self->dbc->prepare
1156 ("select s.name,max(k.seq_region_id)-min(k.seq_region_id, min(k.seq_region_start), max(k.seq_region_id) " .
1157 "from karyotype as k " .
1158 "where k.band like ? and k.seq_region_id = s.seq_region_id");
1159
1160 $sth->bind_param(1,"$band%",SQL_VARCHAR);
1161 $sth->execute();
1162 my ( $seq_region_name, $discrepancy, $seq_region_start, $seq_region_end) = $sth->fetchrow_array;
1163
1164 if($seq_region_name && $discrepancy>0) {
1165 throw("Band maps to multiple seq_regions");
1166 } else {
1167 return $self->fetch_by_region('toplevel',$seq_region_name,$seq_region_start,$seq_region_end);
1168 }
1169 throw("Band not recognised in database");
1170 }
1171
1172 =head2 fetch_by_chr_band
1173
1174 Title : fetch_by_chr_band
1175 Usage :
1176 Function: create a Slice representing a series of bands
1177 Example :
1178 Returns : Bio::EnsEMBL::Slice
1179 Args : the band name
1180 Status : Stable
1181
1182 =cut
1183
1184 sub fetch_by_chr_band {
1185 my ( $self, $chr, $band ) = @_;
1186
1187 my $chr_slice = $self->fetch_by_region( 'toplevel', $chr );
1188 my $seq_region_id = $self->get_seq_region_id($chr_slice);
1189
1190 my $sth =
1191 $self->prepare( 'SELECT MIN(k.seq_region_start), '
1192 . 'MAX(k.seq_region_end) '
1193 . 'FROM karyotype k '
1194 . 'WHERE k.seq_region_id = ? '
1195 . 'AND k.band LIKE ?' );
1196
1197 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
1198 $sth->bind_param( 2, "$band%", SQL_VARCHAR );
1199 $sth->execute();
1200
1201 my ( $slice_start, $slice_end ) = $sth->fetchrow_array;
1202
1203 if ( defined $slice_start ) {
1204 return
1205 $self->fetch_by_region( 'toplevel', $chr,
1206 $slice_start, $slice_end );
1207 }
1208
1209 throw("Band not recognised in database");
1210 } ## end sub fetch_by_chr_band
1211
1212
1213
1214 =head2 fetch_by_exon_stable_id
1215
1216 Arg [1] : string $exonid
1217 The stable id of the exon around which the slice is
1218 desired
1219 Arg [2] : (optional) int $size
1220 The length of the flanking regions the slice should encompass
1221 on either side of the exon (0 by default)
1222 Example : $slc = $sa->fetch_by_exon_stable_id('ENSE00000302930',10);
1223 Description: Creates a slice around the region of the specified exon.
1224 If a context size is given, the slice is extended by that
1225 number of basepairs on either side of the exon.
1226
1227 The slice will be created in the exon's native coordinate system
1228 and in the forward orientation.
1229 Returntype : Bio::EnsEMBL::Slice
1230 Exceptions : Thrown if the exon is not in the database.
1231 Caller : general
1232 Status : Stable
1233
1234 =cut
1235
1236 sub fetch_by_exon_stable_id{
1237 my ($self,$exonid,$size) = @_;
1238
1239 throw('Exon argument is required.') if(!$exonid);
1240
1241 my $ea = $self->db->get_ExonAdaptor;
1242 my $exon = $ea->fetch_by_stable_id($exonid);
1243
1244 throw("Exon [$exonid] does not exist in DB.") if(!$exon);
1245
1246 return $self->fetch_by_Feature($exon, $size);
1247 }
1248
1249 =head2 fetch_by_transcript_stable_id
1250
1251 Arg [1] : string $transcriptid
1252 The stable id of the transcript around which the slice is
1253 desired
1254 Arg [2] : (optional) int $size
1255 The length of the flanking regions the slice should encompass
1256 on either side of the transcript (0 by default)
1257 Example : $slc = $sa->fetch_by_transcript_stable_id('ENST00000302930',10);
1258 Description: Creates a slice around the region of the specified transcript.
1259 If a context size is given, the slice is extended by that
1260 number of basepairs on either side of the
1261 transcript.
1262
1263 The slice will be created in the transcript's native coordinate
1264 system and in the forward orientation.
1265 Returntype : Bio::EnsEMBL::Slice
1266 Exceptions : Thrown if the transcript is not in the database.
1267 Caller : general
1268 Status : Stable
1269
1270 =cut
1271
1272 sub fetch_by_transcript_stable_id{
1273 my ($self,$transcriptid,$size) = @_;
1274
1275 throw('Transcript argument is required.') if(!$transcriptid);
1276
1277 my $ta = $self->db->get_TranscriptAdaptor;
1278 my $transcript = $ta->fetch_by_stable_id($transcriptid);
1279
1280 throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
1281
1282 return $self->fetch_by_Feature($transcript, $size);
1283 }
1284
1285
1286
1287 =head2 fetch_by_transcript_id
1288
1289 Arg [1] : int $transcriptid
1290 The unique database identifier of the transcript around which
1291 the slice is desired
1292 Arg [2] : (optional) int $size
1293 The length of the flanking regions the slice should encompass
1294 on either side of the transcript (0 by default)
1295 Example : $slc = $sa->fetch_by_transcript_id(24, 1000);
1296 Description: Creates a slice around the region of the specified transcript.
1297 If a context size is given, the slice is extended by that
1298 number of basepairs on either side of the
1299 transcript.
1300
1301 The slice will be created in the transcript's native coordinate
1302 system and in the forward orientation.
1303 Returntype : Bio::EnsEMBL::Slice
1304 Exceptions : throw on incorrect args
1305 throw if transcript is not in database
1306 Caller : general
1307 Status : Stable
1308
1309 =cut
1310
1311 sub fetch_by_transcript_id {
1312 my ($self,$transcriptid,$size) = @_;
1313
1314 throw('Transcript id argument is required.') if(!$transcriptid);
1315
1316 my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
1317 my $transcript = $transcript_adaptor->fetch_by_dbID($transcriptid);
1318
1319 throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
1320
1321 return $self->fetch_by_Feature($transcript, $size);
1322 }
1323
1324
1325
1326 =head2 fetch_by_gene_stable_id
1327
1328 Arg [1] : string $geneid
1329 The stable id of the gene around which the slice is
1330 desired
1331 Arg [2] : (optional) int $size
1332 The length of the flanking regions the slice should encompass
1333 on either side of the gene (0 by default)
1334 Example : $slc = $sa->fetch_by_gene_stable_id('ENSG00000012123',10);
1335 Description: Creates a slice around the region of the specified gene.
1336 If a context size is given, the slice is extended by that
1337 number of basepairs on either side of the gene.
1338
1339 The slice will be created in the gene's native coordinate system
1340 and in the forward orientation.
1341 Returntype : Bio::EnsEMBL::Slice
1342 Exceptions : throw on incorrect args
1343 throw if transcript does not exist
1344 Caller : general
1345 Status : Stable
1346
1347 =cut
1348
1349 sub fetch_by_gene_stable_id {
1350 my ($self,$geneid,$size) = @_;
1351
1352 throw('Gene argument is required.') if(!$geneid);
1353
1354 my $gene_adaptor = $self->db->get_GeneAdaptor();
1355 my $gene = $gene_adaptor->fetch_by_stable_id($geneid);
1356
1357 throw("Gene [$geneid] does not exist in DB.") if(!$gene);
1358
1359 return $self->fetch_by_Feature($gene, $size);
1360 }
1361
1362
1363
1364 =head2 fetch_by_Feature
1365
1366 Arg [1] : Bio::EnsEMBL::Feature $feat
1367 The feature to fetch the slice around
1368 Arg [2] : int size (optional)
1369 The desired number of flanking basepairs around the feature.
1370 The size may also be provided as a percentage of the feature
1371 size such as 200% or 80.5%.
1372 Example : $slice = $slice_adaptor->fetch_by_Feature($feat, 100);
1373 Description: Retrieves a slice around a specific feature. All this really
1374 does is return a resized version of the slice that the feature
1375 is already on. Note that slices returned from this method
1376 are always on the forward strand of the seq_region regardless of
1377 the strandedness of the feature passed in.
1378 Returntype : Bio::EnsEMBL::Slice
1379 Exceptions : throw if the feature does not have an attached slice
1380 throw if feature argument is not provided
1381 Caller : fetch_by_gene_stable_id, fetch_by_transcript_stable_id,
1382 fetch_by_gene_id, fetch_by_transcript_id
1383 Status : Stable
1384
1385 =cut
1386
1387 sub fetch_by_Feature{
1388 my ($self, $feature, $size) = @_;
1389
1390 $size ||= 0;
1391
1392 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
1393 throw('Feature argument expected.');
1394 }
1395
1396 my $slice = $feature->slice();
1397 if(!$slice || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice') )) {
1398 throw('Feature must be attached to a valid slice.');
1399 }
1400
1401
1402 my $fstart = $feature->start();
1403 my $fend = $feature->end();
1404 if(!defined($fstart) || !defined($fend)) {
1405 throw('Feature must have defined start and end.');
1406 }
1407
1408 #convert the feature slice coordinates to seq_region coordinates
1409 my $slice_start = $slice->start();
1410 my $slice_end = $slice->end();
1411 my $slice_strand = $slice->strand();
1412 if($slice_start != 1 || $slice_strand != 1) {
1413 if($slice_strand == 1) {
1414 $fstart = $fstart + $slice_start - 1;
1415 $fend = $fend + $slice_start - 1;
1416 } else {
1417 my $tmp_start = $fstart;
1418 $fstart = $slice_end - $fend + 1;
1419 $fend = $slice_end - $tmp_start + 1;
1420 }
1421 }
1422
1423 ## Size may be stored as a %age of the length of the feature
1424 ## Size = 100% gives no context
1425 ## Size = 200% gives context - 50% the size of the feature either side of
1426 ## feature
1427
1428 $size = int( ($1-100)/200 * ($fend-$fstart+1) ) if( $size =~/([\d+\.]+)%/ );
1429
1430 #return a new slice covering the region of the feature
1431 my $S = Bio::EnsEMBL::Slice->new_fast({
1432 'seq_region_name' => $slice->seq_region_name,
1433 'seq_region_length' => $slice->seq_region_length,
1434 'coord_system' => $slice->coord_system,
1435 'start' => $fstart - $size,
1436 'end' => $fend + $size,
1437 'strand' => 1,
1438 'adaptor' => $self});
1439 $S->{'_raw_feature_strand'} = $feature->strand * $slice_strand if $feature->can('strand');
1440 return $S;
1441 }
1442
1443
1444
1445 =head2 fetch_by_misc_feature_attribute
1446
1447 Arg [1] : string $attribute_type
1448 The code of the attribute type
1449 Arg [2] : (optional) string $attribute_value
1450 The value of the attribute to fetch by
1451 Arg [3] : (optional) int $size
1452 The amount of flanking region around the misc feature desired.
1453 Example : $slice = $sa->fetch_by_misc_feature_attribute('superctg',
1454 'NT_030871');
1455 $slice = $sa->fetch_by_misc_feature_attribute('synonym',
1456 'AL00012311',
1457 $flanking);
1458 Description: Fetches a slice around a MiscFeature with a particular
1459 attribute type and value. If no value is specified then
1460 the feature with the particular attribute is used.
1461 If no size is specified then 0 is used.
1462 Returntype : Bio::EnsEMBL::Slice
1463 Exceptions : Throw if no feature with the specified attribute type and value
1464 exists in the database
1465 Warning if multiple features with the specified attribute type
1466 and value exist in the database.
1467 Caller : webcode
1468 Status : Stable
1469
1470 =cut
1471
1472 sub fetch_by_misc_feature_attribute {
1473 my ($self, $attrib_type_code, $attrib_value, $size) = @_;
1474
1475 my $mfa = $self->db()->get_MiscFeatureAdaptor();
1476
1477 my $feats = $mfa->fetch_all_by_attribute_type_value($attrib_type_code,
1478 $attrib_value);
1479
1480 if(@$feats == 0) {
1481 throw("MiscFeature with $attrib_type_code=$attrib_value does " .
1482 "not exist in DB.");
1483 }
1484
1485 if(@$feats > 1) {
1486 warning("MiscFeature with $attrib_type_code=$attrib_value is " .
1487 "ambiguous - using first one found.");
1488 }
1489
1490 my ($feat) = @$feats;
1491
1492 return $self->fetch_by_Feature($feat, $size);
1493 }
1494
1495 =head2 fetch_normalized_slice_projection
1496
1497 Arg [1] : Bio::EnsEMBL::Slice $slice
1498 Example : ( optional )
1499 Description: gives back a project style result. The returned slices
1500 represent the areas to which there are symlinks for the
1501 given slice. start, end show which area on given slice is
1502 symlinked
1503 Returntype : [[start,end,$slice][]]
1504 Exceptions : none
1505 Caller : BaseFeatureAdaptor
1506 Status : Stable
1507
1508 =cut
1509
1510
1511 sub fetch_normalized_slice_projection {
1512 my $self = shift;
1513 my $slice = shift;
1514
1515 my $slice_seq_region_id = $self->get_seq_region_id( $slice );
1516
1517 $self->_build_exception_cache() if(!exists($self->{'asm_exc_cache'}));
1518
1519 my $result = $self->{'asm_exc_cache'}->{$slice_seq_region_id};
1520
1521 $result ||= [];
1522
1523 my (@haps, @pars);
1524
1525 foreach my $row (@$result) {
1526 my ( $seq_region_id, $seq_region_start, $seq_region_end,
1527 $exc_type, $exc_seq_region_id, $exc_seq_region_start,
1528 $exc_seq_region_end ) = @$row;
1529
1530 # need overlapping PAR and all HAPs if any
1531 if( $exc_type eq "PAR" ) {
1532 if( $seq_region_start <= $slice->end() &&
1533 $seq_region_end >= $slice->start() ) {
1534 push( @pars, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
1535 $exc_seq_region_start, $exc_seq_region_end ] );
1536 }
1537 } else {
1538 push( @haps, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
1539 $exc_seq_region_start, $exc_seq_region_end ] );
1540 }
1541 }
1542
1543 if(!@pars && !@haps) {
1544 #just return this slice, there were no haps or pars
1545 return [bless ( [1,$slice->length, $slice], "Bio::EnsEMBL::ProjectionSegment")];
1546 }
1547
1548 my @syms;
1549 if( @haps >= 1 ) {
1550 my @sort_haps = sort { $a->[1] <=> $b->[1] } @haps;
1551
1552 my $count =0;
1553 my $chr_start = 1;
1554 my $hap_start = 1;
1555 my $last = 0;
1556
1557 my $seq_reg_slice = $self->fetch_by_seq_region_id($slice_seq_region_id);
1558 my $exc_slice = $self->fetch_by_seq_region_id( $sort_haps[0][2] );
1559 my $len1 = $seq_reg_slice->length();
1560 my $len2 = $exc_slice->length();
1561 my $max_len = ($len1 > $len2) ? $len1 : $len2;
1562
1563 while($count <= scalar(@sort_haps) and !$last){
1564 my $chr_end;
1565 my $hap_end;
1566 if(defined($sort_haps[$count]) and defined($sort_haps[$count][0]) ){
1567 $hap_end = $sort_haps[$count][0]-1;
1568 $chr_end = $sort_haps[$count][3]-1
1569 }
1570 else{
1571 $last = 1;
1572 $hap_end = $len1;
1573 $chr_end = $len2;
1574 my $diff = ($hap_end-$hap_start)-($chr_end-$chr_start);
1575 if($diff > 0){
1576 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end+$diff] );
1577 }
1578 elsif($diff < 0){
1579 push( @syms, [ $hap_start, $hap_end - $diff, $sort_haps[0][2], $chr_start, $chr_end] );
1580 }
1581 else{
1582 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
1583 }
1584 next;
1585 }
1586 if($hap_end and $hap_start < $len1){ # if hap at start or end of chromosome
1587 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
1588 }
1589 $chr_start = $chr_end + ($sort_haps[$count][4]-$sort_haps[$count][3]) + 2;
1590 $hap_start = $hap_end + ($sort_haps[$count][1]-$sort_haps[$count][0]) + 2;
1591 $count++;
1592 }
1593
1594
1595 }
1596
1597
1598 # for now haps and pars should not be both there, but in theory we
1599 # could handle it here by cleverly merging the pars into the existing syms,
1600 # for now just:
1601 push( @syms, @pars );
1602
1603 my $mapper = Bio::EnsEMBL::Mapper->new( "sym", "org" );
1604 my $count = 0;
1605 for my $sym ( @syms ) {
1606 $mapper->add_map_coordinates( $slice_seq_region_id, $sym->[0], $sym->[1],
1607 1, $sym->[2], $sym->[3], $sym->[4] );
1608 }
1609
1610 my @linked = $mapper->map_coordinates( $slice_seq_region_id,
1611 $slice->start(), $slice->end(),
1612 $slice->strand(), "sym" );
1613
1614 # gaps are regions where there is no mapping to another region
1615 my $rel_start = 1;
1616
1617 #if there was only one coord and it is a gap, we know it is just the
1618 #same slice with no overlapping symlinks
1619 if(@linked == 1 && $linked[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
1620 return [bless( [1,$slice->length, $slice], "Bio::EnsEMBL::ProjectionSegment" )];
1621 }
1622
1623 my @out;
1624 for my $coord ( @linked ) {
1625 if( $coord->isa( "Bio::EnsEMBL::Mapper::Gap" )) {
1626 my $exc_slice = Bio::EnsEMBL::Slice->new_fast({
1627 'start' => $coord->start(),
1628 'end' => $coord->end(),
1629 'strand' => $slice->strand(),
1630 'coord_system' => $slice->coord_system(),
1631 'adaptor' => $self,
1632 'seq_region_name' => $slice->seq_region_name(),
1633 'seq_region_length' => $slice->seq_region_length()});
1634 push( @out, bless ( [ $rel_start, $coord->length()+$rel_start-1,
1635 $exc_slice ], "Bio::EnsEMBL::ProjectionSegment") );
1636 } else {
1637 my $exc_slice = $self->fetch_by_seq_region_id( $coord->id() );
1638 my $exc2_slice = Bio::EnsEMBL::Slice->new_fast({
1639
1640 'start' => $coord->start(),
1641 'end' => $coord->end(),
1642 'strand' => $coord->strand(),
1643 'seq_region_name' => $exc_slice->seq_region_name(),
1644 'seq_region_length' => $exc_slice->seq_region_length(),
1645 'coord_system' => $exc_slice->coord_system(),
1646 'adaptor' => $self
1647 });
1648
1649 push( @out, bless( [ $rel_start, $coord->length() + $rel_start - 1,
1650 $exc2_slice ], "Bio::EnsEMBL::ProjectionSegment") );
1651 }
1652 $rel_start += $coord->length();
1653 }
1654
1655 return \@out;
1656 }
1657
1658
1659
1660
1661 =head2 store
1662
1663 Arg [1] : Bio::EnsEMBL::Slice $slice
1664 Arg [2] : (optional) $seqref reference to a string
1665 The sequence associated with the slice to be stored.
1666 Example : $slice = Bio::EnsEMBL::Slice->new(...);
1667 $seq_region_id = $slice_adaptor->store($slice, \$sequence);
1668 Description: This stores a slice as a sequence region in the database
1669 and returns the seq region id. The passed in slice must
1670 start at 1, and must have a valid seq_region name and coordinate
1671 system. The attached coordinate system must already be stored in
1672 the database. The sequence region is assumed to start at 1 and
1673 to have a length equalling the length of the slice. The end of
1674 the slice must equal the seq_region_length.
1675 If the slice coordinate system is the sequence level coordinate
1676 system then the seqref argument must also be passed. If the
1677 slice coordinate system is NOT a sequence level coordinate
1678 system then the sequence argument cannot be passed.
1679 Returntype : int
1680 Exceptions : throw if slice has no coord system.
1681 throw if slice coord system is not already stored.
1682 throw if slice coord system is seqlevel and no sequence is
1683 provided.
1684 throw if slice coord system is not seqlevel and sequence is
1685 provided.
1686 throw if slice does not start at 1
1687 throw if sequence is provided and the sequence length does not
1688 match the slice length.
1689 throw if the SQL insert fails (e.g. on duplicate seq region)
1690 throw if slice argument is not passed
1691 throw if the slice end is not equal to seq_region_length
1692 Caller : database loading scripts
1693 Status : Stable
1694
1695 =cut
1696
1697
1698
1699 sub store {
1700 my $self = shift;
1701 my $slice = shift;
1702 my $seqref = shift;
1703
1704 #
1705 # Get all of the sanity checks out of the way before storing anything
1706 #
1707
1708 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
1709 throw('Slice argument is required');
1710 }
1711
1712 my $cs = $slice->coord_system();
1713 throw("Slice must have attached CoordSystem.") if(!$cs);
1714
1715 my $db = $self->db();
1716 if(!$cs->is_stored($db)) {
1717 throw("Slice CoordSystem must already be stored in DB.")
1718 }
1719
1720 if($slice->start != 1 || $slice->strand != 1) {
1721 throw("Slice must have start==1 and strand==1.");
1722 }
1723
1724 if($slice->end() != $slice->seq_region_length()) {
1725 throw("Slice must have end==seq_region_length");
1726 }
1727
1728 my $sr_len = $slice->length();
1729 my $sr_name = $slice->seq_region_name();
1730
1731 if(!$sr_name) {
1732 throw("Slice must have valid seq region name.");
1733 }
1734
1735 if($cs->is_sequence_level()) {
1736 if(!$seqref) {
1737 throw("Must provide sequence for sequence level coord system.");
1738 }
1739 if(ref($seqref) ne 'SCALAR') {
1740 throw("Sequence must be a scalar reference.");
1741 }
1742 my $seq_len = length($$seqref);
1743
1744 if($seq_len != $sr_len) {
1745 throw("Sequence length ($seq_len) must match slice length ($sr_len).");
1746 }
1747 } else {
1748 if($seqref) {
1749 throw("Cannot provide sequence for non-sequence level seq regions.");
1750 }
1751 }
1752
1753 #store the seq_region
1754
1755 my $sth = $db->dbc->prepare("INSERT INTO seq_region " .
1756 "SET name = ?, " .
1757 " length = ?, " .
1758 " coord_system_id = ?" );
1759
1760 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
1761 $sth->bind_param(2,$sr_len,SQL_INTEGER);
1762 $sth->bind_param(3,$cs->dbID,SQL_INTEGER);
1763
1764 $sth->execute();
1765
1766 my $seq_region_id = $sth->{'mysql_insertid'};
1767
1768 if(!$seq_region_id) {
1769 throw("Database seq_region insertion failed.");
1770 }
1771
1772 if($cs->is_sequence_level()) {
1773 #store sequence if it was provided
1774 my $seq_adaptor = $db->get_SequenceAdaptor();
1775 $seq_adaptor->store($seq_region_id, $$seqref);
1776 }
1777
1778 #synonyms
1779 if(defined($slice->{'synonym'})){
1780 foreach my $syn (@{$slice->{'synonym'}} ){
1781 $syn->seq_region_id($seq_region_id); # set the seq_region_id
1782 $syn->adaptor->store($syn);
1783 }
1784 }
1785
1786
1787 $slice->adaptor($self);
1788
1789 return $seq_region_id;
1790 }
1791
1792
1793 =head2 store_assembly
1794
1795 Arg [1] : Bio::EnsEMBL::Slice $asm_slice
1796 Arg [2] : Bio::EnsEMBL::Slice $cmp_slice
1797 Example : $asm = $slice_adaptor->store_assembly( $slice1, $slice2 );
1798 Description: Creates an entry in the analysis table based on the
1799 coordinates of the two slices supplied. Returns a string
1800 representation of the assembly that gets created.
1801 Returntype : string
1802 Exceptions : throw if either slice has no coord system (cs).
1803 throw unless the cs rank of the asm_slice is lower than the
1804 cmp_slice.
1805 throw if there is no mapping path between coord systems
1806 throw if the lengths of each slice are not equal
1807 throw if there are existing mappings between either slice
1808 and the oposite cs
1809 Caller : database loading scripts
1810 Status : Experimental
1811
1812 =cut
1813
1814 sub store_assembly{
1815 my $self = shift;
1816 my $asm_slice = shift;
1817 my $cmp_slice = shift;
1818
1819 #
1820 # Get all of the sanity checks out of the way before storing anything
1821 #
1822
1823 if(!ref($asm_slice) || !($asm_slice->isa('Bio::EnsEMBL::Slice') or $asm_slice->isa('Bio::EnsEMBL::LRGSlice'))) {
1824 throw('Assembled Slice argument is required');
1825 }
1826 if(!ref($cmp_slice) || !($cmp_slice->isa('Bio::EnsEMBL::Slice') or $cmp_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
1827 throw('Assembled Slice argument is required');
1828 }
1829
1830 my $asm_cs = $asm_slice->coord_system();
1831 throw("Slice must have attached CoordSystem.") if(!$asm_cs);
1832 my $cmp_cs = $cmp_slice->coord_system();
1833 throw("Slice must have attached CoordSystem.") if(!$cmp_cs);
1834
1835 unless( $asm_cs->rank < $cmp_cs->rank ){
1836 throw("Assembled Slice CoordSystem->rank must be lower than ".
1837 "the component Slice Coord_system" );
1838 }
1839
1840 my @path =
1841 @{ $asm_cs->adaptor()->get_mapping_path( $asm_cs, $cmp_cs ) };
1842
1843 if ( !@path ) {
1844 throw("No mapping path defined between "
1845 . $asm_cs->name() . " and "
1846 . $cmp_cs->name() );
1847 }
1848
1849 if( $asm_slice->length != $cmp_slice->length ){
1850 throw("The lengths of the assembled and component slices are not equal" );
1851 }
1852
1853 # For now we disallow any existing mappings between the asm slice and cmp
1854 # CoordSystem and vice-versa.
1855 # Some cases of multiple mappings may be allowable by the API, but their
1856 # logic needs to be coded below.
1857
1858 my $asm_proj = $asm_slice->project( $cmp_cs->name, $cmp_cs->version );
1859 if( @$asm_proj ){
1860 throw("Regions of the assembled slice are already assembled ".
1861 "into the component CoordSystem" );
1862 }
1863 my $cmp_proj = $cmp_slice->project( $asm_cs->name, $asm_cs->version );
1864 if( @$cmp_proj ){
1865 throw("Regions of the component slice are already assembled ".
1866 "into the assembled CoordSystem" );
1867 }
1868
1869 #
1870 # Checks complete. Store the data
1871 #
1872 my $sth = $self->db->dbc->prepare
1873 ("INSERT INTO assembly " .
1874 "SET asm_seq_region_id = ?, " .
1875 " cmp_seq_region_id = ?, " .
1876 " asm_start = ?, " .
1877 " asm_end = ?, " .
1878 " cmp_start = ?, " .
1879 " cmp_end = ?, " .
1880 " ori = ?" );
1881
1882 my $asm_seq_region_id = $self->get_seq_region_id( $asm_slice );
1883 my $cmp_seq_region_id = $self->get_seq_region_id( $cmp_slice );
1884 my $ori = $asm_slice->strand * $cmp_slice->strand;
1885
1886 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
1887 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
1888 $sth->bind_param(3,$asm_slice->start,SQL_INTEGER);
1889 $sth->bind_param(4,$asm_slice->end,SQL_INTEGER);
1890 $sth->bind_param(5,$cmp_slice->start,SQL_INTEGER);
1891 $sth->bind_param(6,$cmp_slice->end,SQL_INTEGER);
1892 $sth->bind_param(7,$ori,SQL_INTEGER);
1893
1894 $sth->execute();
1895
1896 #use Data::Dumper qw( Dumper );
1897 #warn Dumper( $self->db->{seq_region_cache} );
1898 #$self->db->{seq_region_cache} = undef;
1899 #$self->_cache_seq_regions();
1900
1901 my $ama = $self->db->get_AssemblyMapperAdaptor();
1902 $ama->delete_cache();
1903
1904
1905 return $asm_slice->name . "<>" . $cmp_slice->name;
1906
1907 }
1908
1909
1910 =head2 prepare
1911
1912 Arg [1] : String $sql
1913 Example : ( optional )
1914 Description: overrides the default adaptor prepare method.
1915 All slice sql will usually use the dna_db.
1916 Returntype : DBD::sth
1917 Exceptions : none
1918 Caller : internal, convenience method
1919 Status : Stable
1920
1921 =cut
1922
1923 sub prepare {
1924 my ( $self, $sql ) = @_;
1925 return $self->db()->dnadb()->dbc->prepare($sql);
1926 }
1927
1928 sub _build_exception_cache {
1929 my $self = shift;
1930
1931 # build up a cache of the entire assembly exception table
1932 # it should be small anyway
1933 my $sth =
1934 $self->prepare( 'SELECT ae.seq_region_id, ae.seq_region_start, '
1935 . 'ae.seq_region_end, ae.exc_type, ae.exc_seq_region_id, '
1936 . 'ae.exc_seq_region_start, ae.exc_seq_region_end '
1937 . 'FROM assembly_exception ae, '
1938 . 'seq_region sr, coord_system cs '
1939 . 'WHERE sr.seq_region_id = ae.seq_region_id '
1940 . 'AND sr.coord_system_id = cs.coord_system_id '
1941 . 'AND cs.species_id = ?' );
1942
1943 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1944 $sth->execute();
1945
1946 my %hash;
1947 $self->{'asm_exc_cache'} = \%hash;
1948
1949 my $row;
1950 while ( $row = $sth->fetchrow_arrayref() ) {
1951 my @result = @$row;
1952 $hash{ $result[0] } ||= [];
1953 push( @{ $hash{ $result[0] } }, \@result );
1954 }
1955 $sth->finish();
1956 } ## end sub _build_exception_cache
1957
1958 =head2 cache_toplevel_seq_mappings
1959
1960 Args : none
1961 Example : $slice_adaptor->cache_toplevel_seq_mappings();
1962 Description: caches all the assembly mappings needed for genes
1963 Returntype : None
1964 Exceptions : None
1965 Caller : general
1966 Status : At Risk
1967 : New experimental code
1968
1969 =cut
1970
1971 sub cache_toplevel_seq_mappings {
1972 my ($self) = @_;
1973
1974 # Get the sequence level to map too
1975
1976 my $sql = (<<SSQL);
1977 SELECT name
1978 FROM coord_system
1979 WHERE attrib like "%sequence_level%"
1980 AND species_id = ?
1981 SSQL
1982
1983 my $sth = $self->prepare($sql);
1984 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1985 $sth->execute();
1986
1987 my $sequence_level = $sth->fetchrow_array();
1988
1989 $sth->finish();
1990
1991 my $csa = $self->db->get_CoordSystemAdaptor();
1992 my $ama = $self->db->get_AssemblyMapperAdaptor();
1993
1994 my $cs1 = $csa->fetch_by_name($sequence_level);
1995
1996 #get level to map too.
1997
1998 $sql = (<<LSQL);
1999 SELECT DISTINCT(cs.name)
2000 FROM seq_region sr,
2001 seq_region_attrib sra,
2002 attrib_type at,
2003 coord_system cs
2004 WHERE sra.seq_region_id = sr.seq_region_id
2005 AND sra.attrib_type_id = at.attrib_type_id
2006 AND at.code = "toplevel"
2007 AND cs.coord_system_id = sr.coord_system_id
2008 AND cs.species_id = ?
2009 LSQL
2010
2011 $sth = $self->prepare($sql);
2012 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2013 $sth->execute();
2014
2015 while ( my $csn = $sth->fetchrow_array() ) {
2016 if ( $csn eq $sequence_level ) { next }
2017 my $cs2 = $csa->fetch_by_name($csn);
2018 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
2019 $am->register_all();
2020 }
2021
2022 } ## end sub cache_toplevel_seq_mappings
2023
2024
2025 sub _build_circular_slice_cache {
2026 my $self = shift;
2027
2028 # build up a cache of circular sequence region ids
2029 my $sth =
2030 $self->prepare( "SELECT sra.seq_region_id FROM seq_region_attrib sra "
2031 . "INNER JOIN attrib_type at ON sra.attrib_type_id = at.attrib_type_id "
2032 . "INNER JOIN seq_region sr ON sra.seq_region_id = sr.seq_region_id "
2033 . "INNER JOIN coord_system cs ON sr.coord_system_id = cs.coord_system_id "
2034 . "WHERE code = 'circular_seq' and cs.species_id = ?");
2035
2036 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2037 $sth->execute();
2038
2039 my $id;
2040 my %hash;
2041 if ( ($id) = $sth->fetchrow_array() ) {
2042 $self->{'circular_sr_id_cache'} = \%hash;
2043 $self->{'is_circular'} = 1;
2044 $hash{ $id } = $id;
2045 while ( ($id) = $sth->fetchrow_array() ) {
2046 $hash{ $id } = $id;
2047 }
2048 } else {
2049 $self->{'is_circular'} = 0;
2050 }
2051 $sth->finish();
2052 } ## end _build_circular_slice_cache
2053
2054
2055 #####################################
2056 # sub DEPRECATED METHODs
2057 #####################################
2058
2059 =head2 fetch_by_mapfrag
2060
2061 Function: DEPRECATED use fetch_by_misc_feature_attribute('synonym',$mapfrag)
2062
2063 =cut
2064
2065 sub fetch_by_mapfrag{
2066 my ($self,$mymapfrag,$flag,$size) = @_;
2067 deprecate('Use fetch_by_misc_feature_attribute instead');
2068 $flag ||= 'fixed-width'; # alt.. 'context'
2069 $size ||= $flag eq 'fixed-width' ? 100000 : 0;
2070 return $self->fetch_by_misc_feature_attribute('synonym',$mymapfrag,$size);
2071 }
2072
2073
2074
2075 =head2 fetch_by_chr_start_end
2076
2077 Description: DEPRECATED use fetch_by_region instead
2078
2079 =cut
2080
2081 sub fetch_by_chr_start_end {
2082 my ($self,$chr,$start,$end) = @_;
2083 deprecate('Use fetch_by_region() instead');
2084
2085 #assume that by chromosome the user actually meant top-level coord
2086 #system since this is the old behaviour of this deprecated method
2087 my $csa = $self->db->get_CoordSystemAdaptor();
2088 my ($cs) = @{$csa->fetch_all()}; # get the highest coord system
2089
2090 return $self->fetch_by_region($cs->name,$chr,$start,$end,1,$cs->version);
2091 }
2092
2093
2094
2095 =head2 fetch_by_contig_name
2096
2097 Description: Deprecated. Use fetch_by_region(), Slice::project(),
2098 Slice::expand() instead
2099
2100 =cut
2101
2102 sub fetch_by_contig_name {
2103 my ($self, $name, $size) = @_;
2104
2105 deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
2106
2107 #previously wanted chromosomal slice on a given contig. Assume this means
2108 #a top-level slice on a given seq_region in the seq_level coord system
2109 my $csa = $self->db()->get_CoordSystemAdaptor();
2110 my $seq_level = $csa->fetch_sequence_level();
2111
2112 my $seq_lvl_slice = $self->fetch_by_region($seq_level->name(), $name);
2113
2114 if(!$seq_lvl_slice) {
2115 return undef;
2116 }
2117
2118 my @projection = @{$seq_lvl_slice->project('toplevel')};
2119
2120 if(@projection != 1) {
2121 warning("$name is mapped to multiple toplevel locations.");
2122 }
2123
2124 return $projection[0]->[2]->expand($size, $size);
2125 }
2126
2127
2128 =head2 fetch_by_clone_accession
2129
2130 Description: DEPRECATED. Use fetch_by_region, Slice::project, Slice::expand
2131 instead.
2132
2133 =cut
2134
2135 sub fetch_by_clone_accession{
2136 my ($self,$name,$size) = @_;
2137
2138 deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
2139
2140 my $csa = $self->db()->get_CoordSystemAdaptor();
2141 my $clone_cs = $csa->fetch_by_name('clone');
2142
2143 if(!$clone_cs) {
2144 warning('Clone coordinate system does not exist for this species');
2145 return undef;
2146 }
2147
2148 #this unfortunately needs a version on the end to work
2149 if(! ($name =~ /\./)) {
2150 my $sth =
2151 $self->prepare( "SELECT sr.name "
2152 . "FROM seq_region sr, coord_system cs "
2153 . "WHERE cs.name = 'clone' "
2154 . "AND cs.coord_system_id = sr.coord_system_id "
2155 . "AND sr.name LIKE '$name.%'"
2156 . "AND cs.species_id = ?" );
2157
2158 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2159 $sth->execute();
2160
2161 if(!$sth->rows()) {
2162 $sth->finish();
2163 throw("Clone $name not found in database");
2164 }
2165
2166 ($name) = $sth->fetchrow_array();
2167
2168 $sth->finish();
2169 }
2170
2171 my $clone = $self->fetch_by_region($clone_cs->name(), $name);
2172 return undef if(!$clone);
2173
2174 my @projection = @{$clone->project('toplevel')};
2175
2176 if(@projection != 1) {
2177 warning("$name is mapped to multiple locations.");
2178 }
2179
2180 return $projection[0]->[2]->expand($size, $size);
2181 }
2182
2183
2184 =head2 fetch_by_supercontig_name
2185
2186 Description: DEPRECATED. Use fetch_by_region(), Slice::project() and
2187 Slice::expand() instead
2188
2189 =cut
2190
2191 sub fetch_by_supercontig_name {
2192 my ($self,$name, $size) = @_;
2193
2194 deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
2195
2196 my $csa = $self->db()->get_CoordSystemAdaptor();
2197 my $sc_level = $csa->fetch_by_name('supercontig');
2198
2199 if(!$sc_level) {
2200 warning('No supercontig coordinate system exists for this species.');
2201 return undef;
2202 }
2203
2204 my $sc_slice = $self->fetch_by_region($sc_level->name(),$name);
2205
2206 return undef if(!$sc_slice);
2207
2208 my @projection = @{$sc_slice->project('toplevel')};
2209
2210 if(@projection > 1) {
2211 warning("$name is mapped to multiple locations in toplevel");
2212 }
2213
2214 return $projection[0]->[2]->expand($size, $size);
2215 }
2216
2217
2218
2219
2220 =head2 list_overlapping_supercontigs
2221
2222 Description: DEPRECATED use Slice::project instead
2223
2224 =cut
2225
2226 sub list_overlapping_supercontigs {
2227 my ($self,$slice) = @_;
2228
2229 deprecate('Use Slice::project() instead.');
2230
2231 my $csa = $self->db()->get_CoordSystemAdaptor();
2232 my $sc_level = $csa->fetch_by_name('supercontig');
2233
2234 if(!$sc_level) {
2235 warning('No supercontig coordinate system exists for this species.');
2236 return undef;
2237 }
2238
2239 my @out;
2240 foreach my $seg (@{$slice->project($sc_level->name(), $sc_level->version)}){
2241 push @out, $seg->[2]->seq_region_name();
2242 }
2243
2244 return \@out;
2245 }
2246
2247
2248
2249 =head2 fetch_by_chr_name
2250
2251 Description: DEPRECATED. Use fetch by region instead
2252
2253 =cut
2254
2255 sub fetch_by_chr_name{
2256 my ($self,$chr_name) = @_;
2257 deprecate('Use fetch_by_region() instead.');
2258
2259 my $csa = $self->db->get_CoordSystemAdaptor();
2260
2261 my $top_cs = @{$csa->fetch_all()};
2262
2263 return $self->fetch_by_region($top_cs->name(),$chr_name,
2264 undef,undef,undef,$top_cs->version);
2265 }
2266
2267
2268
2269
2270
2271
2272 1;