0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::DBSQL::SliceAdaptor - A database aware adaptor responsible for
|
|
24 the creation of Slice objects.
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 use Bio::EnsEMBL::Utils::Slice qw(split_Slices);
|
|
29 use Bio::EnsEMBL::Registry;
|
|
30
|
|
31 Bio::EnsEMBL::Registry->load_registry_from_db(
|
|
32 -host => 'ensembldb.ensembl.org',
|
|
33 -user => 'anonymous'
|
|
34 );
|
|
35
|
|
36 $slice_adaptor =
|
|
37 Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
|
|
38
|
|
39 # get a slice on the entire chromosome X
|
|
40 $chr_slice = $slice_adaptor->fetch_by_region( 'chromosome', 'X' );
|
|
41
|
|
42 # get a slice for each clone in the database
|
|
43 foreach $cln_slice ( @{ $slice_adaptor->fetch_all('clone') } ) {
|
|
44 # do something with clone
|
|
45 }
|
|
46
|
|
47 # get a slice which is part of NT_004321
|
|
48 $spctg_slice =
|
|
49 $slice_adaptor->fetch_by_region( 'supercontig', 'NT_004321',
|
|
50 200_000, 600_000 );
|
|
51
|
|
52 # get all non-redundant slices from the highest possible coordinate
|
|
53 # systems
|
|
54 $slices = $slice_adaptor->fetch_all('toplevel');
|
|
55
|
|
56 # include non-reference regions
|
|
57 $slices = $slice_adaptor->fetch_all( 'toplevel', undef, 1 );
|
|
58
|
|
59 # include non-duplicate regions
|
|
60 $slices = $slice_adaptor->fetch_all( 'toplevel', undef, 0, 1 );
|
|
61
|
|
62 # split up a list of slices into smaller slices
|
|
63 $overlap = 1000;
|
|
64 $max_length = 1e6;
|
|
65 $slices = split_Slices( $slices, $max_length, $overlap );
|
|
66
|
|
67 # store a list of slice names in a file
|
|
68 open( FILE, ">$filename" ) or die("Could not open file $filename");
|
|
69 foreach my $slice (@$slices) {
|
|
70 print FILE $slice->name(), "\n";
|
|
71 }
|
|
72 close FILE;
|
|
73
|
|
74 # retreive a list of slices from a file
|
|
75 open( FILE, $filename ) or die("Could not open file $filename");
|
|
76 while ( $name = <FILE> ) {
|
|
77 chomp($name);
|
|
78 $slice = $slice_adaptor->fetch_by_name($name);
|
|
79 # do something with slice
|
|
80 }
|
|
81
|
|
82 =head1 DESCRIPTION
|
|
83
|
|
84 This module is responsible for fetching Slices representing genomic
|
|
85 regions from a database. A Details on how slices can be used are in the
|
|
86 Bio::EnsEMBL::Slice module.
|
|
87
|
|
88 =head1 METHODS
|
|
89
|
|
90 =cut
|
|
91
|
|
92
|
|
93 package Bio::EnsEMBL::DBSQL::SliceAdaptor;
|
|
94 use vars qw(@ISA);
|
|
95 use strict;
|
|
96
|
|
97
|
|
98 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
|
|
99 use Bio::EnsEMBL::Slice;
|
|
100 use Bio::EnsEMBL::CircularSlice;
|
|
101 use Bio::EnsEMBL::Mapper;
|
|
102 use Bio::EnsEMBL::LRGSlice;
|
|
103 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
|
|
104 use Scalar::Util qw/looks_like_number/;
|
|
105
|
|
106 @ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
|
|
107
|
|
108 sub new {
|
|
109 my $caller = shift;
|
|
110
|
|
111 my $class = ref($caller) || $caller;
|
|
112
|
|
113 my $self = $class->SUPER::new(@_);
|
|
114
|
|
115 # use a cache which is shared and also used by the assembly
|
|
116 # mapper adaptor
|
|
117
|
|
118 my $seq_region_cache = $self->db->get_SeqRegionCache();
|
|
119
|
|
120 $self->{'sr_name_cache'} = $seq_region_cache->{'name_cache'};
|
|
121 $self->{'sr_id_cache'} = $seq_region_cache->{'id_cache'};
|
|
122
|
|
123 $self->{'lrg_region_test'} = undef;
|
|
124 my $meta_container = $self->db->get_MetaContainer();
|
|
125 my @values = $meta_container->list_value_by_key("LRG");
|
|
126 if(scalar(@values) and $values[0]->[0]){
|
|
127 $self->{'lrg_region_test'} = $values[0]->[0];
|
|
128 }
|
|
129 return $self;
|
|
130 }
|
|
131
|
|
132
|
|
133 =head2 fetch_by_region
|
|
134
|
|
135 Arg [1] : string $coord_system_name (optional)
|
|
136 The name of the coordinate system of the slice to be created
|
|
137 This may be a name of an actual coordinate system or an alias
|
|
138 to a coordinate system. Valid aliases are 'seqlevel' or
|
|
139 'toplevel'.
|
|
140 Arg [2] : string $seq_region_name
|
|
141 The name of the sequence region that the slice will be
|
|
142 created on.
|
|
143 Arg [3] : int $start (optional, default = 1)
|
|
144 The start of the slice on the sequence region
|
|
145 Arg [4] : int $end (optional, default = seq_region length)
|
|
146 The end of the slice on the sequence region
|
|
147 Arg [5] : int $strand (optional, default = 1)
|
|
148 The orientation of the slice on the sequence region
|
|
149 Arg [6] : string $version (optional, default = default version)
|
|
150 The version of the coordinate system to use (e.g. NCBI33)
|
|
151 Arg [7] : boolean $no_fuzz (optional, default = undef (false))
|
|
152 If true (non-zero), do not use "fuzzy matching" (see below).
|
|
153 Example : $slice = $slice_adaptor->fetch_by_region('chromosome', 'X');
|
|
154 $slice = $slice_adaptor->fetch_by_region('clone', 'AC008066.4');
|
|
155 Description: Retrieves a slice on the requested region. At a minimum the
|
|
156 name the name of the seq_region to fetch must be provided.
|
|
157
|
|
158 If no coordinate system name is provided than a slice on the
|
|
159 highest ranked coordinate system with a matching
|
|
160 seq_region_name will be returned. If a version but no
|
|
161 coordinate system name is provided, the same behaviour will
|
|
162 apply, but only coordinate systems of the appropriate version
|
|
163 are considered. The same applies if the 'toplevel' coordinate
|
|
164 system is specified, however in this case the version is
|
|
165 ignored. The coordinate system should always be specified if
|
|
166 it is known, since this is unambiguous and faster.
|
|
167
|
|
168 Some fuzzy matching is performed if no exact match for
|
|
169 the provided name is found. This allows clones to be
|
|
170 fetched even when their version is not known. For
|
|
171 example fetch_by_region('clone', 'AC008066') will
|
|
172 retrieve the sequence_region with name 'AC008066.4'.
|
|
173
|
|
174 The fuzzy matching can be turned off by setting the
|
|
175 $no_fuzz argument to a true value.
|
|
176
|
|
177 If the requested seq_region is not found in the database undef
|
|
178 is returned.
|
|
179
|
|
180 Returntype : Bio::EnsEMBL::Slice or undef
|
|
181 Exceptions : throw if no seq_region_name is provided
|
|
182 throw if invalid coord_system_name is provided
|
|
183 throw if start > end is provided
|
|
184 Caller : general
|
|
185 Status : Stable
|
|
186
|
|
187 =cut
|
|
188
|
|
189
|
|
190 #
|
|
191 # ARNE: This subroutine needs simplification!!
|
|
192 #
|
|
193 sub fetch_by_region {
|
|
194 my ( $self, $coord_system_name, $seq_region_name, $start, $end,
|
|
195 $strand, $version, $no_fuzz )
|
|
196 = @_;
|
|
197
|
|
198 if ( !defined($start) ) { $start = 1 }
|
|
199 if ( !defined($strand) ) { $strand = 1 }
|
|
200
|
|
201 if ( !defined($seq_region_name) ) {
|
|
202 throw('seq_region_name argument is required');
|
|
203 }
|
|
204
|
|
205 my $cs;
|
|
206 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
207
|
|
208 if ( defined($coord_system_name) ) {
|
|
209 $cs = $csa->fetch_by_name( $coord_system_name, $version );
|
|
210
|
|
211 ## REMOVE THESE THREE LINES WHEN STICKLEBACK DB IS FIXED!
|
|
212 ## Anne/ap5 (2007-10-09):
|
|
213 # The problem was that the stickleback genebuild called the
|
|
214 # chromosomes 'groups', which meant they weren't being picked out by
|
|
215 # the karyotype drawing code. Apparently they are usually called
|
|
216 # 'groups' in the stickleback community, even though they really are
|
|
217 # chromosomes!
|
|
218
|
|
219 if ( !defined($cs) && $coord_system_name eq 'chromosome' ) {
|
|
220 $cs = $csa->fetch_by_name( 'group', $version );
|
|
221 }
|
|
222
|
|
223 if ( !defined($cs) ) {
|
|
224 throw( sprintf( "Unknown coordinate system:\n"
|
|
225 . "name='%s' version='%s'\n",
|
|
226 $coord_system_name, $version ) );
|
|
227 }
|
|
228
|
|
229 # fetching by toplevel is same as fetching w/o name or version
|
|
230 if ( $cs->is_top_level() ) {
|
|
231 $cs = undef;
|
|
232 $version = undef;
|
|
233 }
|
|
234
|
|
235 } ## end if ( defined($coord_system_name...))
|
|
236
|
|
237 my $constraint;
|
|
238 my $sql;
|
|
239 my @bind_params;
|
|
240 my $key;
|
|
241
|
|
242 if ( defined($cs) ) {
|
|
243 $sql = sprintf( "SELECT sr.name, sr.seq_region_id, sr.length, %d "
|
|
244 . "FROM seq_region sr ",
|
|
245 $cs->dbID() );
|
|
246
|
|
247 $constraint = "AND sr.coord_system_id = ?";
|
|
248 push( @bind_params, [ $cs->dbID(), SQL_INTEGER ] );
|
|
249
|
|
250 $key = "$seq_region_name:" . $cs->dbID();
|
|
251 } else {
|
|
252 $sql =
|
|
253 "SELECT sr.name, sr.seq_region_id, sr.length, cs.coord_system_id "
|
|
254 . "FROM seq_region sr, coord_system cs ";
|
|
255
|
|
256 $constraint = "AND sr.coord_system_id = cs.coord_system_id "
|
|
257 . "AND cs.species_id = ? ";
|
|
258 push( @bind_params, [ $self->species_id(), SQL_INTEGER ] );
|
|
259
|
|
260 if ( defined($version) ) {
|
|
261 $constraint .= "AND cs.version = ? ";
|
|
262 push( @bind_params, [ $version, SQL_VARCHAR ] );
|
|
263 }
|
|
264
|
|
265 $constraint .= "ORDER BY cs.rank ASC";
|
|
266 }
|
|
267
|
|
268 # check the cache so we only go to the db if necessary
|
|
269 my $length;
|
|
270 my $arr;
|
|
271
|
|
272 if ( defined($key) ) { $arr = $self->{'sr_name_cache'}->{$key} }
|
|
273
|
|
274 if ( defined($arr) ) {
|
|
275 $length = $arr->[3];
|
|
276 } else {
|
|
277 my $sth =
|
|
278 $self->prepare( $sql . "WHERE sr.name = ? " . $constraint );
|
|
279
|
|
280 unshift( @bind_params, [ $seq_region_name, SQL_VARCHAR ] );
|
|
281
|
|
282 my $pos = 0;
|
|
283 foreach my $param (@bind_params) {
|
|
284 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
|
|
285 }
|
|
286
|
|
287 $sth->execute();
|
|
288
|
|
289 if ( $sth->rows() == 0 ) {
|
|
290 $sth->finish();
|
|
291
|
|
292
|
|
293 # try synonyms
|
|
294 my $syn_sql_sth = $self->prepare("select s.name from seq_region s, seq_region_synonym ss where s.seq_region_id = ss.seq_region_id and ss.synonym = ?");
|
|
295 $syn_sql_sth->bind_param(1, $seq_region_name, SQL_VARCHAR);
|
|
296 $syn_sql_sth->execute();
|
|
297 my $new_name;
|
|
298 $syn_sql_sth->bind_columns( \$new_name);
|
|
299 if($syn_sql_sth->fetch){
|
|
300 $syn_sql_sth->finish;
|
|
301 return $self->fetch_by_region($coord_system_name, $new_name, $start, $end, $strand, $version, $no_fuzz);
|
|
302 }
|
|
303 $syn_sql_sth->finish;
|
|
304
|
|
305
|
|
306 if ($no_fuzz) { return undef }
|
|
307
|
|
308 # Do fuzzy matching, assuming that we are just missing a version
|
|
309 # on the end of the seq_region name.
|
|
310
|
|
311 $sth =
|
|
312 $self->prepare( $sql . " WHERE sr.name LIKE ? " . $constraint );
|
|
313
|
|
314 $bind_params[0] =
|
|
315 [ sprintf( '%s.%%', $seq_region_name ), SQL_VARCHAR ];
|
|
316
|
|
317 $pos = 0;
|
|
318 foreach my $param (@bind_params) {
|
|
319 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
|
|
320 }
|
|
321
|
|
322 $sth->execute();
|
|
323
|
|
324 my $prefix_len = length($seq_region_name) + 1;
|
|
325 my $high_ver = undef;
|
|
326 my $high_cs = $cs;
|
|
327
|
|
328 # Find the fuzzy-matched seq_region with the highest postfix
|
|
329 # (which ought to be a version).
|
|
330
|
|
331 my ( $tmp_name, $id, $tmp_length, $cs_id );
|
|
332 $sth->bind_columns( \( $tmp_name, $id, $tmp_length, $cs_id ) );
|
|
333
|
|
334 my $i = 0;
|
|
335
|
|
336 while ( $sth->fetch ) {
|
|
337 my $tmp_cs =
|
|
338 ( defined($cs) ? $cs : $csa->fetch_by_dbID($cs_id) );
|
|
339
|
|
340 # cache values for future reference
|
|
341 my $arr = [ $id, $tmp_name, $cs_id, $tmp_length ];
|
|
342 $self->{'sr_name_cache'}->{"$tmp_name:$cs_id"} = $arr;
|
|
343 $self->{'sr_id_cache'}->{"$id"} = $arr;
|
|
344
|
|
345 my $tmp_ver = substr( $tmp_name, $prefix_len );
|
|
346
|
|
347 # skip versions which are non-numeric and apparently not
|
|
348 # versions
|
|
349 if ( $tmp_ver !~ /^\d+$/ ) { next }
|
|
350
|
|
351 # take version with highest num, if two versions match take one
|
|
352 # with highest ranked coord system (lowest num)
|
|
353 if ( !defined($high_ver)
|
|
354 || $tmp_ver > $high_ver
|
|
355 || ( $tmp_ver == $high_ver && $tmp_cs->rank < $high_cs->rank )
|
|
356 )
|
|
357 {
|
|
358 $seq_region_name = $tmp_name;
|
|
359 $length = $tmp_length;
|
|
360 $high_ver = $tmp_ver;
|
|
361 $high_cs = $tmp_cs;
|
|
362 }
|
|
363
|
|
364 $i++;
|
|
365 } ## end while ( $sth->fetch )
|
|
366 $sth->finish();
|
|
367
|
|
368 # warn if fuzzy matching found more than one result
|
|
369 if ( $i > 1 ) {
|
|
370 warning(
|
|
371 sprintf(
|
|
372 "Fuzzy matching of seq_region_name "
|
|
373 . "returned more than one result.\n"
|
|
374 . "You might want to check whether the returned seq_region\n"
|
|
375 . "(%s:%s) is the one you intended to fetch.\n",
|
|
376 $high_cs->name(), $seq_region_name ) );
|
|
377 }
|
|
378
|
|
379 $cs = $high_cs;
|
|
380
|
|
381 # return if we did not find any appropriate match:
|
|
382 if ( !defined($high_ver) ) { return undef }
|
|
383
|
|
384 } else {
|
|
385
|
|
386 my ( $id, $cs_id );
|
|
387 ( $seq_region_name, $id, $length, $cs_id ) =
|
|
388 $sth->fetchrow_array();
|
|
389 $sth->finish();
|
|
390
|
|
391 # cache to speed up for future queries
|
|
392 my $arr = [ $id, $seq_region_name, $cs_id, $length ];
|
|
393 $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"} = $arr;
|
|
394 $self->{'sr_id_cache'}->{"$id"} = $arr;
|
|
395 $cs = $csa->fetch_by_dbID($cs_id);
|
|
396 }
|
|
397 } ## end else [ if ( defined($arr) ) ]
|
|
398
|
|
399 if ( !defined($end) ) { $end = $length }
|
|
400
|
|
401 #If this was given then check if we've got a circular seq region otherwise
|
|
402 #let it fall through to the normal Slice method
|
|
403 if ( $end + 1 < $start ) {
|
|
404 my $cs_id = $cs->dbID();
|
|
405 my $seq_region_id = $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"}->[0];
|
|
406 if($self->is_circular($seq_region_id)) {
|
|
407 my $new_sl =
|
|
408 Bio::EnsEMBL::CircularSlice->new(
|
|
409 -COORD_SYSTEM => $cs,
|
|
410 -SEQ_REGION_NAME => $seq_region_name,
|
|
411 -SEQ_REGION_LENGTH => $length,
|
|
412 -START => $start,
|
|
413 -END => $end,
|
|
414 -STRAND => 1,
|
|
415 -ADAPTOR => $self );
|
|
416
|
|
417 return $new_sl;
|
|
418 }
|
|
419 }
|
|
420
|
|
421 if ( defined( $self->{'lrg_region_test'} )
|
|
422 and substr( $cs->name, 0, 3 ) eq $self->{'lrg_region_test'} )
|
|
423 {
|
|
424 return
|
|
425 Bio::EnsEMBL::LRGSlice->new( -COORD_SYSTEM => $cs,
|
|
426 -SEQ_REGION_NAME => $seq_region_name,
|
|
427 -SEQ_REGION_LENGTH => $length,
|
|
428 -START => $start,
|
|
429 -END => $end,
|
|
430 -STRAND => $strand,
|
|
431 -ADAPTOR => $self );
|
|
432 } else {
|
|
433 return
|
|
434 Bio::EnsEMBL::Slice->new_fast( {
|
|
435 'coord_system' => $cs,
|
|
436 'seq_region_name' => $seq_region_name,
|
|
437 'seq_region_length' => $length,
|
|
438 'start' => $start,
|
|
439 'end' => $end,
|
|
440 'strand' => $strand,
|
|
441 'adaptor' => $self } );
|
|
442 }
|
|
443 } ## end sub fetch_by_region
|
|
444
|
|
445 =head2 fetch_by_toplevel_location
|
|
446
|
|
447 Arg [1] : string $location
|
|
448 Ensembl formatted location. Can be a format like
|
|
449 C<name:start-end>, C<name:start..end>, C<name:start:end>,
|
|
450 C<name:start>, C<name>. We can also support strand
|
|
451 specification as a +/- or 1/-1.
|
|
452
|
|
453 Location names must be separated by a C<:>. All others can be
|
|
454 separated by C<..>, C<:> or C<->.
|
|
455 Arg[2] : boolean $no_warnings
|
|
456 Suppress warnings from this method
|
|
457 Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
|
|
458 my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1')
|
|
459 Description : Converts an Ensembl location/region into the sequence region
|
|
460 name, start and end and passes them onto C<fetch_by_region()>.
|
|
461 The code assumes that the required slice is on the top level
|
|
462 coordinate system. The code assumes that location formatting
|
|
463 is not perfect and will perform basic cleanup before parsing.
|
|
464 Returntype : Bio::EnsEMBL::Slice
|
|
465 Exceptions : If $location is false otherwise see C<fetch_by_region()>
|
|
466 Caller : General
|
|
467 Status : Beta
|
|
468
|
|
469 =cut
|
|
470
|
|
471 sub fetch_by_toplevel_location {
|
|
472 my ($self, $location, $no_warnings) = @_;
|
|
473
|
|
474 my ($seq_region_name, $start, $end, $strand) = $self->parse_location_to_values($location, $no_warnings);
|
|
475
|
|
476 if(! $seq_region_name) {
|
|
477 return;
|
|
478 }
|
|
479
|
|
480 if(defined $start && defined $end && $start > $end) {
|
|
481 throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end";
|
|
482 }
|
|
483
|
|
484 my $coord_system_name = 'toplevel';
|
|
485 my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, undef, 0);
|
|
486 return unless $slice;
|
|
487
|
|
488 my $srl = $slice->seq_region_length();
|
|
489 my $name = $slice->seq_region_name();
|
|
490 if(defined $start && $start > $srl) {
|
|
491 throw "Cannot request a slice whose start ($start) is greater than $srl for $name.";
|
|
492 }
|
|
493 if(defined $end && $end > $srl) {
|
|
494 warning "Requested end ($end) is greater than $srl for $name. Resetting to $srl" if ! $no_warnings;
|
|
495 $slice->{end} = $srl;
|
|
496 }
|
|
497
|
|
498 return $slice;
|
|
499 }
|
|
500
|
|
501 =head2 parse_location_to_values
|
|
502
|
|
503 Arg [1] : string $location
|
|
504 Ensembl formatted location. Can be a format like
|
|
505 C<name:start-end>, C<name:start..end>, C<name:start:end>,
|
|
506 C<name:start>, C<name>. We can also support strand
|
|
507 specification as a +/- or 1/-1.
|
|
508
|
|
509 Location names must be separated by a C<:>. All others can be
|
|
510 separated by C<..>, C<:> or C<->.
|
|
511 Arg[2] : boolean $no_warnings
|
|
512 Suppress warnings from this method
|
|
513 Example : my ($name, $start, $end, $strand) = $sa->parse_location_to_values('X:1..100:1);
|
|
514 Description : Takes in an Ensembl location String and returns the parsed
|
|
515 values
|
|
516 Returntype : List. Contains name, start, end and strand
|
|
517
|
|
518 =cut
|
|
519
|
|
520
|
|
521 sub parse_location_to_values {
|
|
522 my ($self, $location, $no_warnings, $no_errors) = @_;
|
|
523
|
|
524 throw 'You must specify a location' if ! $location;
|
|
525
|
|
526 #cleanup any nomenclature like 1_000 or 1 000 or 1,000
|
|
527 my $number_seps_regex = qr/\s+|,|_/;
|
|
528 my $separator_regex = qr/(?:-|[.]{2}|\:)?/;
|
|
529 my $number_regex = qr/[0-9,_ E]+/xms;
|
|
530 my $strand_regex = qr/[+-1]|-1/xms;
|
|
531
|
|
532 my $regex = qr/^(\w+) \s* :? \s* ($number_regex)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms;
|
|
533 my ($seq_region_name, $start, $end, $strand);
|
|
534 if(($seq_region_name, $start, $end, $strand) = $location =~ $regex) {
|
|
535
|
|
536 if(defined $strand) {
|
|
537 if(!looks_like_number($strand)) {
|
|
538 $strand = ($strand eq '+') ? 1 : -1;
|
|
539 }
|
|
540 }
|
|
541
|
|
542 if(defined $start) {
|
|
543 $start =~ s/$number_seps_regex//g;
|
|
544 if($start < 1) {
|
|
545 warning "Start was less than 1 (${start}) which is not allowed. Resetting to 1" if ! $no_warnings;
|
|
546 $start = 1;
|
|
547 }
|
|
548 }
|
|
549 if(defined $end) {
|
|
550 $end =~ s/$number_seps_regex//g;
|
|
551 if($end < 1) {
|
|
552 throw "Cannot request negative or 0 end indexes through this interface. Given $end but expected something greater than 0" unless $no_errors;
|
|
553 }
|
|
554 }
|
|
555
|
|
556 if(defined $start && defined $end && $start > $end) {
|
|
557 throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end" unless $no_errors;
|
|
558 }
|
|
559 }
|
|
560
|
|
561 return ($seq_region_name, $start, $end, $strand);
|
|
562 }
|
|
563
|
|
564 =head2 fetch_by_region_unique
|
|
565
|
|
566 Arg [1] : string $coord_system_name (optional)
|
|
567 The name of the coordinate system of the slice to be created
|
|
568 This may be a name of an actual coordinate system or an alias
|
|
569 to a coordinate system. Valid aliases are 'seqlevel' or
|
|
570 'toplevel'.
|
|
571 Arg [2] : string $seq_region_name
|
|
572 The name of the sequence region that the slice will be
|
|
573 created on.
|
|
574 Arg [3] : int $start (optional, default = 1)
|
|
575 The start of the slice on the sequence region
|
|
576 Arg [4] : int $end (optional, default = seq_region length)
|
|
577 The end of the slice on the sequence region
|
|
578 Arg [5] : int $strand (optional, default = 1)
|
|
579 The orientation of the slice on the sequence region
|
|
580 Arg [6] : string $version (optional, default = default version)
|
|
581 The version of the coordinate system to use (e.g. NCBI33)
|
|
582 Arg [7] : boolean $no_fuzz (optional, default = undef (false))
|
|
583 If true (non-zero), do not use "fuzzy matching" (see below).
|
|
584 Example : $slice = $slice_adaptor->fetch_by_region_unique('chromosome', 'HSCHR6_MHC_COX');
|
|
585 Description: Retrieves a slice on the requested region but returns only the unique
|
|
586 parts of the slice. At a minimum the
|
|
587 name the name of the seq_region to fetch must be provided.
|
|
588
|
|
589 If no coordinate system name is provided than a slice on the
|
|
590 highest ranked coordinate system with a matching
|
|
591 seq_region_name will be returned. If a version but no
|
|
592 coordinate system name is provided, the same behaviour will
|
|
593 apply, but only coordinate systems of the appropriate version
|
|
594 are considered. The same applies if the 'toplevel' coordinate
|
|
595 system is specified, however in this case the version is
|
|
596 ignored. The coordinate system should always be specified if
|
|
597 it is known, since this is unambiguous and faster.
|
|
598
|
|
599 Some fuzzy matching is performed if no exact match for
|
|
600 the provided name is found. This allows clones to be
|
|
601 fetched even when their version is not known. For
|
|
602 example fetch_by_region('clone', 'AC008066') will
|
|
603 retrieve the sequence_region with name 'AC008066.4'.
|
|
604
|
|
605 The fuzzy matching can be turned off by setting the
|
|
606 $no_fuzz argument to a true value.
|
|
607
|
|
608 If the requested seq_region is not found in the database undef
|
|
609 is returned.
|
|
610
|
|
611 Returntype : listref Bio::EnsEMBL::Slice
|
|
612 Exceptions : throw if no seq_region_name is provided
|
|
613 throw if invalid coord_system_name is provided
|
|
614 throw if start > end is provided
|
|
615 Caller : general
|
|
616 Status : Stable
|
|
617
|
|
618 =cut
|
|
619
|
|
620 sub fetch_by_region_unique {
|
|
621 my $self = shift;
|
|
622
|
|
623 my @out = ();
|
|
624 my $slice = $self->fetch_by_region(@_);
|
|
625
|
|
626
|
|
627 if ( !exists( $self->{'asm_exc_cache'} ) ) {
|
|
628 $self->_build_exception_cache();
|
|
629 }
|
|
630
|
|
631 if ( exists(
|
|
632 $self->{'asm_exc_cache'}->{ $self->get_seq_region_id($slice) }
|
|
633 ) )
|
|
634 {
|
|
635 # Dereference symlinked assembly regions. Take out any regions
|
|
636 # which are symlinked because these are duplicates.
|
|
637 my @projection =
|
|
638 @{ $self->fetch_normalized_slice_projection($slice) };
|
|
639
|
|
640 foreach my $segment (@projection) {
|
|
641 if ( $segment->[2]->seq_region_name() eq $slice->seq_region_name()
|
|
642 && $segment->[2]->coord_system->equals( $slice->coord_system ) )
|
|
643 {
|
|
644 push( @out, $segment->[2] );
|
|
645 }
|
|
646 }
|
|
647 }
|
|
648
|
|
649 return \@out;
|
|
650 } ## end sub fetch_by_region_unique
|
|
651
|
|
652 =head2 fetch_by_name
|
|
653
|
|
654 Arg [1] : string $name
|
|
655 Example : $name = 'chromosome:NCBI34:X:1000000:2000000:1';
|
|
656 $slice = $slice_adaptor->fetch_by_name($name);
|
|
657 $slice2 = $slice_adaptor->fetch_by_name($slice3->name());
|
|
658 Description: Fetches a slice using a slice name (i.e. the value returned by
|
|
659 the Slice::name method). This is useful if you wish to
|
|
660 store a unique identifier for a slice in a file or database or
|
|
661 pass a slice over a network.
|
|
662 Slice::name allows you to serialise/marshall a slice and this
|
|
663 method allows you to deserialise/unmarshal it.
|
|
664
|
|
665 Returns undef if no seq_region with the provided name exists in
|
|
666 the database.
|
|
667
|
|
668 Returntype : Bio::EnsEMBL::Slice or undef
|
|
669 Exceptions : throw if incorrent arg provided
|
|
670 Caller : Pipeline
|
|
671 Status : Stable
|
|
672
|
|
673 =cut
|
|
674
|
|
675 sub fetch_by_name {
|
|
676 my $self = shift;
|
|
677 my $name = shift;
|
|
678
|
|
679 if(!$name) {
|
|
680 throw("name argument is required");
|
|
681 }
|
|
682
|
|
683 my @array = split(/:/,$name);
|
|
684
|
|
685 if(scalar(@array) < 3 || scalar(@array) > 6) {
|
|
686 throw("Malformed slice name [$name]. Format is " .
|
|
687 "coord_system:version:name:start:end:strand");
|
|
688 }
|
|
689
|
|
690 # Rearrange arguments to suit fetch_by_region
|
|
691
|
|
692 my @targetarray;
|
|
693
|
|
694 $targetarray[0]=$array[0];
|
|
695 $targetarray[5]=(($array[1]&&$array[1] ne "")?$array[1]:undef);
|
|
696 $targetarray[1]=(($array[2]&&$array[2] ne "")?$array[2]:undef);
|
|
697 $targetarray[2]=(($array[3]&&$array[3] ne "")?$array[3]:undef);
|
|
698 $targetarray[3]=(($array[4]&&$array[4] ne "")?$array[4]:undef);
|
|
699 $targetarray[4]=(($array[5]&&$array[5] ne "")?$array[5]:undef);
|
|
700 return $self->fetch_by_region(@targetarray);
|
|
701 }
|
|
702
|
|
703
|
|
704
|
|
705 =head2 fetch_by_seq_region_id
|
|
706
|
|
707 Arg [1] : string $seq_region_id
|
|
708 The internal identifier of the seq_region to create this slice
|
|
709 on
|
|
710 Arg [2] : optional start
|
|
711 Arg [3] : optional end
|
|
712 Arg [4] : optional strand
|
|
713 Example : $slice = $slice_adaptor->fetch_by_seq_region_id(34413);
|
|
714 Description: Creates a slice object of an entire seq_region using the
|
|
715 seq_region internal identifier to resolve the seq_region.
|
|
716 Returns undef if no such slice exists.
|
|
717 Returntype : Bio::EnsEMBL::Slice or undef
|
|
718 Exceptions : none
|
|
719 Caller : general
|
|
720 Status : Stable
|
|
721
|
|
722 =cut
|
|
723
|
|
724 sub fetch_by_seq_region_id {
|
|
725 my ( $self, $seq_region_id, $start, $end, $strand ) = @_;
|
|
726
|
|
727 my $arr = $self->{'sr_id_cache'}->{$seq_region_id};
|
|
728 my ( $name, $length, $cs, $cs_id );
|
|
729
|
|
730
|
|
731 if ( $arr && defined( $arr->[2] ) ) {
|
|
732 ( $name, $cs_id, $length ) = ( $arr->[1], $arr->[2], $arr->[3] );
|
|
733 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
|
|
734 } else {
|
|
735 my $sth =
|
|
736 $self->prepare( "SELECT sr.name, sr.coord_system_id, sr.length "
|
|
737 . "FROM seq_region sr "
|
|
738 . "WHERE sr.seq_region_id = ? " );
|
|
739
|
|
740 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
|
|
741 $sth->execute();
|
|
742
|
|
743 if ( $sth->rows() == 0 ) { return undef }
|
|
744
|
|
745 ( $name, $cs_id, $length ) = $sth->fetchrow_array();
|
|
746 $sth->finish();
|
|
747
|
|
748 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
|
|
749
|
|
750 #cache results to speed up repeated queries
|
|
751 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
|
|
752
|
|
753 $self->{'sr_name_cache'}->{"$name:$cs_id"} = $arr;
|
|
754 $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
|
|
755 }
|
|
756
|
|
757 return
|
|
758 Bio::EnsEMBL::Slice->new_fast({
|
|
759 'coord_system' => $cs,
|
|
760 'seq_region_name' => $name,
|
|
761 'seq_region_length'=> $length,
|
|
762 'start' => $start || 1,
|
|
763 'end' => $end || $length,
|
|
764 'strand' => $strand || 1,
|
|
765 'adaptor' => $self} );
|
|
766 } ## end sub fetch_by_seq_region_id
|
|
767
|
|
768
|
|
769
|
|
770 =head2 get_seq_region_id
|
|
771
|
|
772 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
773 The slice to fetch a seq_region_id for
|
|
774 Example : $srid = $slice_adaptor->get_seq_region_id($slice);
|
|
775 Description: Retrieves the seq_region id (in this database) given a slice
|
|
776 Seq region ids are not stored on the slices themselves
|
|
777 because they are intended to be somewhat database independant
|
|
778 and seq_region_ids vary accross databases.
|
|
779 Returntype : int
|
|
780 Exceptions : throw if the seq_region of the slice is not in the db
|
|
781 throw if incorrect arg provided
|
|
782 Caller : BaseFeatureAdaptor
|
|
783 Status : Stable
|
|
784
|
|
785 =cut
|
|
786
|
|
787 sub get_seq_region_id {
|
|
788 my $self = shift;
|
|
789 my $slice = shift;
|
|
790
|
|
791 if(!$slice || !ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
|
|
792 throw('Slice argument is required');
|
|
793 }
|
|
794
|
|
795 my $seq_region_name = $slice->seq_region_name();
|
|
796 my $key = $seq_region_name.":".$slice->coord_system->dbID();
|
|
797 my $arr = $self->{'sr_name_cache'}->{"$key"};
|
|
798
|
|
799 if( $arr ) {
|
|
800 return $arr->[0];
|
|
801 }
|
|
802
|
|
803 my $cs_id = $slice->coord_system->dbID();
|
|
804
|
|
805 my $sth = $self->prepare("SELECT seq_region_id, length " .
|
|
806 "FROM seq_region " .
|
|
807 "WHERE name = ? AND coord_system_id = ?");
|
|
808
|
|
809 #force seq_region_name cast to string so mysql cannot treat as int
|
|
810 $sth->bind_param(1,"$seq_region_name",SQL_VARCHAR);
|
|
811 $sth->bind_param(2,$cs_id,SQL_INTEGER);
|
|
812 $sth->execute();
|
|
813
|
|
814 if($sth->rows() != 1) {
|
|
815 throw("Non existant or ambigous seq_region:\n" .
|
|
816 " coord_system=[$cs_id],\n" .
|
|
817 " name=[$seq_region_name],\n");
|
|
818
|
|
819 }
|
|
820
|
|
821 my($seq_region_id, $length) = $sth->fetchrow_array();
|
|
822 $sth->finish();
|
|
823
|
|
824 #cache information for future requests
|
|
825 $arr = [ $seq_region_id, $seq_region_name, $cs_id, $length ];
|
|
826
|
|
827 $self->{'sr_name_cache'}->{"$seq_region_name:$cs_id"} = $arr;
|
|
828 $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
|
|
829
|
|
830 return $seq_region_id;
|
|
831 }
|
|
832
|
|
833
|
|
834
|
|
835 =head2 fetch_all
|
|
836
|
|
837 Arg [1] : string $coord_system_name
|
|
838 The name of the coordinate system to retrieve slices of.
|
|
839 This may be a name of an acutal coordinate system or an alias
|
|
840 to a coordinate system. Valid aliases are 'seqlevel' or
|
|
841 'toplevel'.
|
|
842 Arg [2] : string $coord_system_version (optional)
|
|
843 The version of the coordinate system to retrieve slices of
|
|
844 Arg [3] : bool $include_non_reference (optional)
|
|
845 If this argument is not provided then only reference slices
|
|
846 will be returned. If set, both reference and non refeference
|
|
847 slices will be rerurned.
|
|
848 Arg [4] : int $include_duplicates (optional)
|
|
849 If set duplicate regions will be returned.
|
|
850
|
|
851 NOTE: if you do not use this option and you have a PAR
|
|
852 (pseudo-autosomal region) at the beginning of your seq_region
|
|
853 then your slice will not start at position 1, so coordinates
|
|
854 retrieved from this slice might not be what you expected.
|
|
855
|
|
856 Arg[5] : bool $include_lrg (optional) (default 0)
|
|
857 If set lrg regions will be returned aswell.
|
|
858
|
|
859
|
|
860 Example : @chromos = @{$slice_adaptor->fetch_all('chromosome','NCBI33')};
|
|
861 @contigs = @{$slice_adaptor->fetch_all('contig')};
|
|
862
|
|
863 # get even non-reference regions
|
|
864 @slices = @{$slice_adaptor->fetch_all('toplevel',undef,1)};
|
|
865
|
|
866 # include duplicate regions (such as pseudo autosomal regions)
|
|
867 @slices = @{$slice_adaptor->fetch_all('toplevel', undef,0,1)};
|
|
868
|
|
869 Description: Retrieves slices of all seq_regions for a given coordinate
|
|
870 system. This is analagous to the methods fetch_all which were
|
|
871 formerly on the ChromosomeAdaptor, RawContigAdaptor and
|
|
872 CloneAdaptor classes. Slices fetched span the entire
|
|
873 seq_regions and are on the forward strand.
|
|
874 If the coordinate system with the provided name and version
|
|
875 does not exist an empty list is returned.
|
|
876 If the coordinate system name provided is 'toplevel', all
|
|
877 non-redundant toplevel slices are returned (note that any
|
|
878 coord_system_version argument is ignored in that case).
|
|
879
|
|
880 Retrieved slices can be broken into smaller slices using the
|
|
881 Bio::EnsEMBL::Utils::Slice module.
|
|
882
|
|
883 Returntype : listref of Bio::EnsEMBL::Slices
|
|
884 Exceptions : none
|
|
885 Caller : general
|
|
886 Status : Stable
|
|
887
|
|
888 =cut
|
|
889
|
|
890 sub fetch_all {
|
|
891 my $self = shift;
|
|
892 my $cs_name = shift;
|
|
893 my $cs_version = shift || '';
|
|
894
|
|
895 my ($include_non_reference, $include_duplicates, $include_lrg) = @_;
|
|
896
|
|
897 #
|
|
898 # verify existance of requested coord system and get its id
|
|
899 #
|
|
900 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
901 my $orig_cs = $csa->fetch_by_name($cs_name, $cs_version);
|
|
902
|
|
903 return [] if ( !$orig_cs );
|
|
904
|
|
905 my %bad_vals=();
|
|
906
|
|
907
|
|
908 #
|
|
909 # Get a hash of non reference seq regions
|
|
910 #
|
|
911 if ( !$include_non_reference ) {
|
|
912 my $sth =
|
|
913 $self->prepare( 'SELECT sr.seq_region_id '
|
|
914 . 'FROM seq_region sr, seq_region_attrib sra, '
|
|
915 . 'attrib_type at, coord_system cs '
|
|
916 . 'WHERE at.code = "non_ref" '
|
|
917 . 'AND sra.seq_region_id = sr.seq_region_id '
|
|
918 . 'AND at.attrib_type_id = sra.attrib_type_id '
|
|
919 . 'AND sr.coord_system_id = cs.coord_system_id '
|
|
920 . 'AND cs.species_id = ?' );
|
|
921
|
|
922 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
923 $sth->execute();
|
|
924
|
|
925 my ($seq_region_id);
|
|
926 $sth->bind_columns( \$seq_region_id );
|
|
927
|
|
928 while ( $sth->fetch() ) {
|
|
929 $bad_vals{$seq_region_id} = 1;
|
|
930 }
|
|
931 }
|
|
932
|
|
933 #
|
|
934 # if we do not want lrg's then add them to the bad list;
|
|
935 #
|
|
936 if ( !$include_lrg ) {
|
|
937 my $sth =
|
|
938 $self->prepare( 'SELECT sr.seq_region_id '
|
|
939 . 'FROM seq_region sr, seq_region_attrib sra, '
|
|
940 . 'attrib_type at, coord_system cs '
|
|
941 . 'WHERE at.code = "LRG" '
|
|
942 . 'AND sra.seq_region_id = sr.seq_region_id '
|
|
943 . 'AND at.attrib_type_id = sra.attrib_type_id '
|
|
944 . 'AND sr.coord_system_id = cs.coord_system_id '
|
|
945 . 'AND cs.species_id = ?' );
|
|
946
|
|
947 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
948 $sth->execute();
|
|
949
|
|
950 my ($seq_region_id);
|
|
951 $sth->bind_columns( \$seq_region_id );
|
|
952
|
|
953 while ( $sth->fetch() ) {
|
|
954 $bad_vals{$seq_region_id} = 1;
|
|
955 }
|
|
956 }
|
|
957
|
|
958 #
|
|
959 # Retrieve the seq_regions from the database
|
|
960 #
|
|
961
|
|
962 my $sth;
|
|
963 if ( $orig_cs->is_top_level() ) {
|
|
964 $sth =
|
|
965 $self->prepare( 'SELECT sr.seq_region_id, sr.name, '
|
|
966 . 'sr.length, sr.coord_system_id '
|
|
967 . 'FROM seq_region sr, seq_region_attrib sra, '
|
|
968 . 'attrib_type at, coord_system cs '
|
|
969 . 'WHERE at.code = "toplevel" '
|
|
970 . 'AND at.attrib_type_id = sra.attrib_type_id '
|
|
971 . 'AND sra.seq_region_id = sr.seq_region_id '
|
|
972 . 'AND sr.coord_system_id = cs.coord_system_id '
|
|
973 . 'AND cs.species_id = ?' );
|
|
974
|
|
975 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
976 $sth->execute();
|
|
977 } else {
|
|
978 $sth =
|
|
979 $self->prepare( 'SELECT sr.seq_region_id, sr.name, '
|
|
980 . 'sr.length, sr.coord_system_id '
|
|
981 . 'FROM seq_region sr '
|
|
982 . 'WHERE sr.coord_system_id = ?' );
|
|
983
|
|
984 $sth->bind_param( 1, $orig_cs->dbID, SQL_INTEGER );
|
|
985 $sth->execute();
|
|
986 }
|
|
987
|
|
988 my ( $seq_region_id, $name, $length, $cs_id );
|
|
989 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) );
|
|
990
|
|
991 my $cache_count = 0;
|
|
992
|
|
993 my @out;
|
|
994 while($sth->fetch()) {
|
|
995 if(!defined($bad_vals{$seq_region_id})){
|
|
996 my $cs = $csa->fetch_by_dbID($cs_id);
|
|
997
|
|
998 if(!$cs) {
|
|
999 throw("seq_region $name references non-existent coord_system $cs_id.");
|
|
1000 }
|
|
1001
|
|
1002 #cache values for future reference, but stop adding to the cache once we
|
|
1003 #we know we have filled it up
|
|
1004 if($cache_count < $Bio::EnsEMBL::Utils::SeqRegionCache::SEQ_REGION_CACHE_SIZE) {
|
|
1005 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
|
|
1006
|
|
1007 $self->{'sr_name_cache'}->{"$name:$cs_id"} = $arr;
|
|
1008 $self->{'sr_id_cache'}->{"$seq_region_id"} = $arr;
|
|
1009
|
|
1010 $cache_count++;
|
|
1011 }
|
|
1012
|
|
1013 my $slice = Bio::EnsEMBL::Slice->new_fast({
|
|
1014 'start' => 1,
|
|
1015 'end' => $length,
|
|
1016 'strand' => 1,
|
|
1017 'seq_region_name' => $name,
|
|
1018 'seq_region_length'=> $length,
|
|
1019 'coord_system' => $cs,
|
|
1020 'adaptor' => $self});
|
|
1021
|
|
1022 if(!defined($include_duplicates) or !$include_duplicates){
|
|
1023 # test if this slice *could* have a duplicate (exception) region
|
|
1024 $self->_build_exception_cache() if(!exists $self->{'asm_exc_cache'});
|
|
1025 if(exists $self->{asm_exc_cache}->{$seq_region_id}) {
|
|
1026
|
|
1027 # Dereference symlinked assembly regions. Take out
|
|
1028 # any regions which are symlinked because these are duplicates
|
|
1029 my @projection = @{$self->fetch_normalized_slice_projection($slice)};
|
|
1030 foreach my $segment ( @projection) {
|
|
1031 if($segment->[2]->seq_region_name() eq $slice->seq_region_name() &&
|
|
1032 $segment->[2]->coord_system->equals($slice->coord_system)) {
|
|
1033 push @out, $segment->[2];
|
|
1034 }
|
|
1035 }
|
|
1036 } else {
|
|
1037 # no duplicate regions
|
|
1038 push @out, $slice;
|
|
1039 }
|
|
1040 } else {
|
|
1041 # we want duplicates anyway so do not do any checks
|
|
1042 push @out, $slice;
|
|
1043 }
|
|
1044 }
|
|
1045 }
|
|
1046
|
|
1047 return \@out;
|
|
1048 }
|
|
1049
|
|
1050 =head2 is_toplevel
|
|
1051 Arg : int seq_region_id
|
|
1052 Example : my $top = $slice_adptor->is_toplevel($seq_region_id)
|
|
1053 Description: Returns 1 if slice is a toplevel slice else 0
|
|
1054 Returntype : int
|
|
1055 Caller : Slice method is_toplevel
|
|
1056 Status : At Risk
|
|
1057
|
|
1058 =cut
|
|
1059
|
|
1060 sub is_toplevel {
|
|
1061 my $self = shift;
|
|
1062 my $id = shift;
|
|
1063
|
|
1064 my $sth = $self->prepare(
|
|
1065 "SELECT at.code from seq_region_attrib sra, attrib_type at "
|
|
1066 . "WHERE sra.seq_region_id = ? "
|
|
1067 . "AND at.attrib_type_id = sra.attrib_type_id "
|
|
1068 . "AND at.code = 'toplevel'" );
|
|
1069
|
|
1070 $sth->bind_param( 1, $id, SQL_INTEGER );
|
|
1071 $sth->execute();
|
|
1072
|
|
1073 my $code;
|
|
1074 $sth->bind_columns( \$code );
|
|
1075
|
|
1076 while ( $sth->fetch ) {
|
|
1077 $sth->finish;
|
|
1078 return 1;
|
|
1079 }
|
|
1080
|
|
1081 $sth->finish;
|
|
1082 return 0;
|
|
1083 }
|
|
1084
|
|
1085 =head2 is_reference
|
|
1086 Arg : int seq_region_id
|
|
1087 Example : my $reference = $slice_adptor->is_reference($seq_region_id)
|
|
1088 Description: Returns 1 if slice is a reference slice else 0
|
|
1089 Returntype : int
|
|
1090 Caller : Slice method is_reference
|
|
1091 Status : At Risk
|
|
1092
|
|
1093 =cut
|
|
1094
|
|
1095 sub is_reference {
|
|
1096 my $self = shift;
|
|
1097 my $id = shift;
|
|
1098
|
|
1099 my $sth = $self->prepare(
|
|
1100 "SELECT at.code from seq_region_attrib sra, attrib_type at "
|
|
1101 . "WHERE sra.seq_region_id = ? "
|
|
1102 . "AND at.attrib_type_id = sra.attrib_type_id "
|
|
1103 . "AND at.code = 'non_ref'" );
|
|
1104
|
|
1105 $sth->bind_param( 1, $id, SQL_INTEGER );
|
|
1106 $sth->execute();
|
|
1107
|
|
1108 my $code;
|
|
1109 $sth->bind_columns( \$code );
|
|
1110
|
|
1111 while ( $sth->fetch ) {
|
|
1112 $sth->finish;
|
|
1113 return 0;
|
|
1114 }
|
|
1115
|
|
1116 $sth->finish;
|
|
1117 return 1;
|
|
1118 }
|
|
1119
|
|
1120 =head2 is_circular
|
|
1121
|
|
1122 Arg[1] : int seq_region_id
|
|
1123 Example : my $circular = $slice_adptor->is_circular($seq_region_id);
|
|
1124 Description : Indicates if the sequence region was circular or not
|
|
1125 Returntype : Boolean
|
|
1126
|
|
1127 =cut
|
|
1128
|
|
1129 sub is_circular {
|
|
1130 my ($self, $id) = @_;
|
|
1131
|
|
1132 if (! defined $self->{is_circular}) {
|
|
1133 $self->_build_circular_slice_cache();
|
|
1134 }
|
|
1135
|
|
1136 return 0 if $self->{is_circular} == 0;
|
|
1137 return (exists $self->{circular_sr_id_cache}->{$id}) ? 1 : 0;
|
|
1138 }
|
|
1139
|
|
1140 =head2 fetch_by_band
|
|
1141
|
|
1142 Title : fetch_by_band
|
|
1143 Usage :
|
|
1144 Function: Does not work please use fetch_by_chr_band
|
|
1145 Example :
|
|
1146 Returns : Bio::EnsEMBL::Slice
|
|
1147 Args : the band name
|
|
1148 Status : AT RISK
|
|
1149
|
|
1150 =cut
|
|
1151
|
|
1152 sub fetch_by_band {
|
|
1153 my ($self,$band) = @_;
|
|
1154
|
|
1155 my $sth = $self->dbc->prepare
|
|
1156 ("select s.name,max(k.seq_region_id)-min(k.seq_region_id, min(k.seq_region_start), max(k.seq_region_id) " .
|
|
1157 "from karyotype as k " .
|
|
1158 "where k.band like ? and k.seq_region_id = s.seq_region_id");
|
|
1159
|
|
1160 $sth->bind_param(1,"$band%",SQL_VARCHAR);
|
|
1161 $sth->execute();
|
|
1162 my ( $seq_region_name, $discrepancy, $seq_region_start, $seq_region_end) = $sth->fetchrow_array;
|
|
1163
|
|
1164 if($seq_region_name && $discrepancy>0) {
|
|
1165 throw("Band maps to multiple seq_regions");
|
|
1166 } else {
|
|
1167 return $self->fetch_by_region('toplevel',$seq_region_name,$seq_region_start,$seq_region_end);
|
|
1168 }
|
|
1169 throw("Band not recognised in database");
|
|
1170 }
|
|
1171
|
|
1172 =head2 fetch_by_chr_band
|
|
1173
|
|
1174 Title : fetch_by_chr_band
|
|
1175 Usage :
|
|
1176 Function: create a Slice representing a series of bands
|
|
1177 Example :
|
|
1178 Returns : Bio::EnsEMBL::Slice
|
|
1179 Args : the band name
|
|
1180 Status : Stable
|
|
1181
|
|
1182 =cut
|
|
1183
|
|
1184 sub fetch_by_chr_band {
|
|
1185 my ( $self, $chr, $band ) = @_;
|
|
1186
|
|
1187 my $chr_slice = $self->fetch_by_region( 'toplevel', $chr );
|
|
1188 my $seq_region_id = $self->get_seq_region_id($chr_slice);
|
|
1189
|
|
1190 my $sth =
|
|
1191 $self->prepare( 'SELECT MIN(k.seq_region_start), '
|
|
1192 . 'MAX(k.seq_region_end) '
|
|
1193 . 'FROM karyotype k '
|
|
1194 . 'WHERE k.seq_region_id = ? '
|
|
1195 . 'AND k.band LIKE ?' );
|
|
1196
|
|
1197 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
|
|
1198 $sth->bind_param( 2, "$band%", SQL_VARCHAR );
|
|
1199 $sth->execute();
|
|
1200
|
|
1201 my ( $slice_start, $slice_end ) = $sth->fetchrow_array;
|
|
1202
|
|
1203 if ( defined $slice_start ) {
|
|
1204 return
|
|
1205 $self->fetch_by_region( 'toplevel', $chr,
|
|
1206 $slice_start, $slice_end );
|
|
1207 }
|
|
1208
|
|
1209 throw("Band not recognised in database");
|
|
1210 } ## end sub fetch_by_chr_band
|
|
1211
|
|
1212
|
|
1213
|
|
1214 =head2 fetch_by_exon_stable_id
|
|
1215
|
|
1216 Arg [1] : string $exonid
|
|
1217 The stable id of the exon around which the slice is
|
|
1218 desired
|
|
1219 Arg [2] : (optional) int $size
|
|
1220 The length of the flanking regions the slice should encompass
|
|
1221 on either side of the exon (0 by default)
|
|
1222 Example : $slc = $sa->fetch_by_exon_stable_id('ENSE00000302930',10);
|
|
1223 Description: Creates a slice around the region of the specified exon.
|
|
1224 If a context size is given, the slice is extended by that
|
|
1225 number of basepairs on either side of the exon.
|
|
1226
|
|
1227 The slice will be created in the exon's native coordinate system
|
|
1228 and in the forward orientation.
|
|
1229 Returntype : Bio::EnsEMBL::Slice
|
|
1230 Exceptions : Thrown if the exon is not in the database.
|
|
1231 Caller : general
|
|
1232 Status : Stable
|
|
1233
|
|
1234 =cut
|
|
1235
|
|
1236 sub fetch_by_exon_stable_id{
|
|
1237 my ($self,$exonid,$size) = @_;
|
|
1238
|
|
1239 throw('Exon argument is required.') if(!$exonid);
|
|
1240
|
|
1241 my $ea = $self->db->get_ExonAdaptor;
|
|
1242 my $exon = $ea->fetch_by_stable_id($exonid);
|
|
1243
|
|
1244 throw("Exon [$exonid] does not exist in DB.") if(!$exon);
|
|
1245
|
|
1246 return $self->fetch_by_Feature($exon, $size);
|
|
1247 }
|
|
1248
|
|
1249 =head2 fetch_by_transcript_stable_id
|
|
1250
|
|
1251 Arg [1] : string $transcriptid
|
|
1252 The stable id of the transcript around which the slice is
|
|
1253 desired
|
|
1254 Arg [2] : (optional) int $size
|
|
1255 The length of the flanking regions the slice should encompass
|
|
1256 on either side of the transcript (0 by default)
|
|
1257 Example : $slc = $sa->fetch_by_transcript_stable_id('ENST00000302930',10);
|
|
1258 Description: Creates a slice around the region of the specified transcript.
|
|
1259 If a context size is given, the slice is extended by that
|
|
1260 number of basepairs on either side of the
|
|
1261 transcript.
|
|
1262
|
|
1263 The slice will be created in the transcript's native coordinate
|
|
1264 system and in the forward orientation.
|
|
1265 Returntype : Bio::EnsEMBL::Slice
|
|
1266 Exceptions : Thrown if the transcript is not in the database.
|
|
1267 Caller : general
|
|
1268 Status : Stable
|
|
1269
|
|
1270 =cut
|
|
1271
|
|
1272 sub fetch_by_transcript_stable_id{
|
|
1273 my ($self,$transcriptid,$size) = @_;
|
|
1274
|
|
1275 throw('Transcript argument is required.') if(!$transcriptid);
|
|
1276
|
|
1277 my $ta = $self->db->get_TranscriptAdaptor;
|
|
1278 my $transcript = $ta->fetch_by_stable_id($transcriptid);
|
|
1279
|
|
1280 throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
|
|
1281
|
|
1282 return $self->fetch_by_Feature($transcript, $size);
|
|
1283 }
|
|
1284
|
|
1285
|
|
1286
|
|
1287 =head2 fetch_by_transcript_id
|
|
1288
|
|
1289 Arg [1] : int $transcriptid
|
|
1290 The unique database identifier of the transcript around which
|
|
1291 the slice is desired
|
|
1292 Arg [2] : (optional) int $size
|
|
1293 The length of the flanking regions the slice should encompass
|
|
1294 on either side of the transcript (0 by default)
|
|
1295 Example : $slc = $sa->fetch_by_transcript_id(24, 1000);
|
|
1296 Description: Creates a slice around the region of the specified transcript.
|
|
1297 If a context size is given, the slice is extended by that
|
|
1298 number of basepairs on either side of the
|
|
1299 transcript.
|
|
1300
|
|
1301 The slice will be created in the transcript's native coordinate
|
|
1302 system and in the forward orientation.
|
|
1303 Returntype : Bio::EnsEMBL::Slice
|
|
1304 Exceptions : throw on incorrect args
|
|
1305 throw if transcript is not in database
|
|
1306 Caller : general
|
|
1307 Status : Stable
|
|
1308
|
|
1309 =cut
|
|
1310
|
|
1311 sub fetch_by_transcript_id {
|
|
1312 my ($self,$transcriptid,$size) = @_;
|
|
1313
|
|
1314 throw('Transcript id argument is required.') if(!$transcriptid);
|
|
1315
|
|
1316 my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
|
|
1317 my $transcript = $transcript_adaptor->fetch_by_dbID($transcriptid);
|
|
1318
|
|
1319 throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
|
|
1320
|
|
1321 return $self->fetch_by_Feature($transcript, $size);
|
|
1322 }
|
|
1323
|
|
1324
|
|
1325
|
|
1326 =head2 fetch_by_gene_stable_id
|
|
1327
|
|
1328 Arg [1] : string $geneid
|
|
1329 The stable id of the gene around which the slice is
|
|
1330 desired
|
|
1331 Arg [2] : (optional) int $size
|
|
1332 The length of the flanking regions the slice should encompass
|
|
1333 on either side of the gene (0 by default)
|
|
1334 Example : $slc = $sa->fetch_by_gene_stable_id('ENSG00000012123',10);
|
|
1335 Description: Creates a slice around the region of the specified gene.
|
|
1336 If a context size is given, the slice is extended by that
|
|
1337 number of basepairs on either side of the gene.
|
|
1338
|
|
1339 The slice will be created in the gene's native coordinate system
|
|
1340 and in the forward orientation.
|
|
1341 Returntype : Bio::EnsEMBL::Slice
|
|
1342 Exceptions : throw on incorrect args
|
|
1343 throw if transcript does not exist
|
|
1344 Caller : general
|
|
1345 Status : Stable
|
|
1346
|
|
1347 =cut
|
|
1348
|
|
1349 sub fetch_by_gene_stable_id {
|
|
1350 my ($self,$geneid,$size) = @_;
|
|
1351
|
|
1352 throw('Gene argument is required.') if(!$geneid);
|
|
1353
|
|
1354 my $gene_adaptor = $self->db->get_GeneAdaptor();
|
|
1355 my $gene = $gene_adaptor->fetch_by_stable_id($geneid);
|
|
1356
|
|
1357 throw("Gene [$geneid] does not exist in DB.") if(!$gene);
|
|
1358
|
|
1359 return $self->fetch_by_Feature($gene, $size);
|
|
1360 }
|
|
1361
|
|
1362
|
|
1363
|
|
1364 =head2 fetch_by_Feature
|
|
1365
|
|
1366 Arg [1] : Bio::EnsEMBL::Feature $feat
|
|
1367 The feature to fetch the slice around
|
|
1368 Arg [2] : int size (optional)
|
|
1369 The desired number of flanking basepairs around the feature.
|
|
1370 The size may also be provided as a percentage of the feature
|
|
1371 size such as 200% or 80.5%.
|
|
1372 Example : $slice = $slice_adaptor->fetch_by_Feature($feat, 100);
|
|
1373 Description: Retrieves a slice around a specific feature. All this really
|
|
1374 does is return a resized version of the slice that the feature
|
|
1375 is already on. Note that slices returned from this method
|
|
1376 are always on the forward strand of the seq_region regardless of
|
|
1377 the strandedness of the feature passed in.
|
|
1378 Returntype : Bio::EnsEMBL::Slice
|
|
1379 Exceptions : throw if the feature does not have an attached slice
|
|
1380 throw if feature argument is not provided
|
|
1381 Caller : fetch_by_gene_stable_id, fetch_by_transcript_stable_id,
|
|
1382 fetch_by_gene_id, fetch_by_transcript_id
|
|
1383 Status : Stable
|
|
1384
|
|
1385 =cut
|
|
1386
|
|
1387 sub fetch_by_Feature{
|
|
1388 my ($self, $feature, $size) = @_;
|
|
1389
|
|
1390 $size ||= 0;
|
|
1391
|
|
1392 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
|
|
1393 throw('Feature argument expected.');
|
|
1394 }
|
|
1395
|
|
1396 my $slice = $feature->slice();
|
|
1397 if(!$slice || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice') )) {
|
|
1398 throw('Feature must be attached to a valid slice.');
|
|
1399 }
|
|
1400
|
|
1401
|
|
1402 my $fstart = $feature->start();
|
|
1403 my $fend = $feature->end();
|
|
1404 if(!defined($fstart) || !defined($fend)) {
|
|
1405 throw('Feature must have defined start and end.');
|
|
1406 }
|
|
1407
|
|
1408 #convert the feature slice coordinates to seq_region coordinates
|
|
1409 my $slice_start = $slice->start();
|
|
1410 my $slice_end = $slice->end();
|
|
1411 my $slice_strand = $slice->strand();
|
|
1412 if($slice_start != 1 || $slice_strand != 1) {
|
|
1413 if($slice_strand == 1) {
|
|
1414 $fstart = $fstart + $slice_start - 1;
|
|
1415 $fend = $fend + $slice_start - 1;
|
|
1416 } else {
|
|
1417 my $tmp_start = $fstart;
|
|
1418 $fstart = $slice_end - $fend + 1;
|
|
1419 $fend = $slice_end - $tmp_start + 1;
|
|
1420 }
|
|
1421 }
|
|
1422
|
|
1423 ## Size may be stored as a %age of the length of the feature
|
|
1424 ## Size = 100% gives no context
|
|
1425 ## Size = 200% gives context - 50% the size of the feature either side of
|
|
1426 ## feature
|
|
1427
|
|
1428 $size = int( ($1-100)/200 * ($fend-$fstart+1) ) if( $size =~/([\d+\.]+)%/ );
|
|
1429
|
|
1430 #return a new slice covering the region of the feature
|
|
1431 my $S = Bio::EnsEMBL::Slice->new_fast({
|
|
1432 'seq_region_name' => $slice->seq_region_name,
|
|
1433 'seq_region_length' => $slice->seq_region_length,
|
|
1434 'coord_system' => $slice->coord_system,
|
|
1435 'start' => $fstart - $size,
|
|
1436 'end' => $fend + $size,
|
|
1437 'strand' => 1,
|
|
1438 'adaptor' => $self});
|
|
1439 $S->{'_raw_feature_strand'} = $feature->strand * $slice_strand if $feature->can('strand');
|
|
1440 return $S;
|
|
1441 }
|
|
1442
|
|
1443
|
|
1444
|
|
1445 =head2 fetch_by_misc_feature_attribute
|
|
1446
|
|
1447 Arg [1] : string $attribute_type
|
|
1448 The code of the attribute type
|
|
1449 Arg [2] : (optional) string $attribute_value
|
|
1450 The value of the attribute to fetch by
|
|
1451 Arg [3] : (optional) int $size
|
|
1452 The amount of flanking region around the misc feature desired.
|
|
1453 Example : $slice = $sa->fetch_by_misc_feature_attribute('superctg',
|
|
1454 'NT_030871');
|
|
1455 $slice = $sa->fetch_by_misc_feature_attribute('synonym',
|
|
1456 'AL00012311',
|
|
1457 $flanking);
|
|
1458 Description: Fetches a slice around a MiscFeature with a particular
|
|
1459 attribute type and value. If no value is specified then
|
|
1460 the feature with the particular attribute is used.
|
|
1461 If no size is specified then 0 is used.
|
|
1462 Returntype : Bio::EnsEMBL::Slice
|
|
1463 Exceptions : Throw if no feature with the specified attribute type and value
|
|
1464 exists in the database
|
|
1465 Warning if multiple features with the specified attribute type
|
|
1466 and value exist in the database.
|
|
1467 Caller : webcode
|
|
1468 Status : Stable
|
|
1469
|
|
1470 =cut
|
|
1471
|
|
1472 sub fetch_by_misc_feature_attribute {
|
|
1473 my ($self, $attrib_type_code, $attrib_value, $size) = @_;
|
|
1474
|
|
1475 my $mfa = $self->db()->get_MiscFeatureAdaptor();
|
|
1476
|
|
1477 my $feats = $mfa->fetch_all_by_attribute_type_value($attrib_type_code,
|
|
1478 $attrib_value);
|
|
1479
|
|
1480 if(@$feats == 0) {
|
|
1481 throw("MiscFeature with $attrib_type_code=$attrib_value does " .
|
|
1482 "not exist in DB.");
|
|
1483 }
|
|
1484
|
|
1485 if(@$feats > 1) {
|
|
1486 warning("MiscFeature with $attrib_type_code=$attrib_value is " .
|
|
1487 "ambiguous - using first one found.");
|
|
1488 }
|
|
1489
|
|
1490 my ($feat) = @$feats;
|
|
1491
|
|
1492 return $self->fetch_by_Feature($feat, $size);
|
|
1493 }
|
|
1494
|
|
1495 =head2 fetch_normalized_slice_projection
|
|
1496
|
|
1497 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
1498 Example : ( optional )
|
|
1499 Description: gives back a project style result. The returned slices
|
|
1500 represent the areas to which there are symlinks for the
|
|
1501 given slice. start, end show which area on given slice is
|
|
1502 symlinked
|
|
1503 Returntype : [[start,end,$slice][]]
|
|
1504 Exceptions : none
|
|
1505 Caller : BaseFeatureAdaptor
|
|
1506 Status : Stable
|
|
1507
|
|
1508 =cut
|
|
1509
|
|
1510
|
|
1511 sub fetch_normalized_slice_projection {
|
|
1512 my $self = shift;
|
|
1513 my $slice = shift;
|
|
1514
|
|
1515 my $slice_seq_region_id = $self->get_seq_region_id( $slice );
|
|
1516
|
|
1517 $self->_build_exception_cache() if(!exists($self->{'asm_exc_cache'}));
|
|
1518
|
|
1519 my $result = $self->{'asm_exc_cache'}->{$slice_seq_region_id};
|
|
1520
|
|
1521 $result ||= [];
|
|
1522
|
|
1523 my (@haps, @pars);
|
|
1524
|
|
1525 foreach my $row (@$result) {
|
|
1526 my ( $seq_region_id, $seq_region_start, $seq_region_end,
|
|
1527 $exc_type, $exc_seq_region_id, $exc_seq_region_start,
|
|
1528 $exc_seq_region_end ) = @$row;
|
|
1529
|
|
1530 # need overlapping PAR and all HAPs if any
|
|
1531 if( $exc_type eq "PAR" ) {
|
|
1532 if( $seq_region_start <= $slice->end() &&
|
|
1533 $seq_region_end >= $slice->start() ) {
|
|
1534 push( @pars, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
|
|
1535 $exc_seq_region_start, $exc_seq_region_end ] );
|
|
1536 }
|
|
1537 } else {
|
|
1538 push( @haps, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
|
|
1539 $exc_seq_region_start, $exc_seq_region_end ] );
|
|
1540 }
|
|
1541 }
|
|
1542
|
|
1543 if(!@pars && !@haps) {
|
|
1544 #just return this slice, there were no haps or pars
|
|
1545 return [bless ( [1,$slice->length, $slice], "Bio::EnsEMBL::ProjectionSegment")];
|
|
1546 }
|
|
1547
|
|
1548 my @syms;
|
|
1549 if( @haps >= 1 ) {
|
|
1550 my @sort_haps = sort { $a->[1] <=> $b->[1] } @haps;
|
|
1551
|
|
1552 my $count =0;
|
|
1553 my $chr_start = 1;
|
|
1554 my $hap_start = 1;
|
|
1555 my $last = 0;
|
|
1556
|
|
1557 my $seq_reg_slice = $self->fetch_by_seq_region_id($slice_seq_region_id);
|
|
1558 my $exc_slice = $self->fetch_by_seq_region_id( $sort_haps[0][2] );
|
|
1559 my $len1 = $seq_reg_slice->length();
|
|
1560 my $len2 = $exc_slice->length();
|
|
1561 my $max_len = ($len1 > $len2) ? $len1 : $len2;
|
|
1562
|
|
1563 while($count <= scalar(@sort_haps) and !$last){
|
|
1564 my $chr_end;
|
|
1565 my $hap_end;
|
|
1566 if(defined($sort_haps[$count]) and defined($sort_haps[$count][0]) ){
|
|
1567 $hap_end = $sort_haps[$count][0]-1;
|
|
1568 $chr_end = $sort_haps[$count][3]-1
|
|
1569 }
|
|
1570 else{
|
|
1571 $last = 1;
|
|
1572 $hap_end = $len1;
|
|
1573 $chr_end = $len2;
|
|
1574 my $diff = ($hap_end-$hap_start)-($chr_end-$chr_start);
|
|
1575 if($diff > 0){
|
|
1576 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end+$diff] );
|
|
1577 }
|
|
1578 elsif($diff < 0){
|
|
1579 push( @syms, [ $hap_start, $hap_end - $diff, $sort_haps[0][2], $chr_start, $chr_end] );
|
|
1580 }
|
|
1581 else{
|
|
1582 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
|
|
1583 }
|
|
1584 next;
|
|
1585 }
|
|
1586 if($hap_end and $hap_start < $len1){ # if hap at start or end of chromosome
|
|
1587 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
|
|
1588 }
|
|
1589 $chr_start = $chr_end + ($sort_haps[$count][4]-$sort_haps[$count][3]) + 2;
|
|
1590 $hap_start = $hap_end + ($sort_haps[$count][1]-$sort_haps[$count][0]) + 2;
|
|
1591 $count++;
|
|
1592 }
|
|
1593
|
|
1594
|
|
1595 }
|
|
1596
|
|
1597
|
|
1598 # for now haps and pars should not be both there, but in theory we
|
|
1599 # could handle it here by cleverly merging the pars into the existing syms,
|
|
1600 # for now just:
|
|
1601 push( @syms, @pars );
|
|
1602
|
|
1603 my $mapper = Bio::EnsEMBL::Mapper->new( "sym", "org" );
|
|
1604 my $count = 0;
|
|
1605 for my $sym ( @syms ) {
|
|
1606 $mapper->add_map_coordinates( $slice_seq_region_id, $sym->[0], $sym->[1],
|
|
1607 1, $sym->[2], $sym->[3], $sym->[4] );
|
|
1608 }
|
|
1609
|
|
1610 my @linked = $mapper->map_coordinates( $slice_seq_region_id,
|
|
1611 $slice->start(), $slice->end(),
|
|
1612 $slice->strand(), "sym" );
|
|
1613
|
|
1614 # gaps are regions where there is no mapping to another region
|
|
1615 my $rel_start = 1;
|
|
1616
|
|
1617 #if there was only one coord and it is a gap, we know it is just the
|
|
1618 #same slice with no overlapping symlinks
|
|
1619 if(@linked == 1 && $linked[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
|
|
1620 return [bless( [1,$slice->length, $slice], "Bio::EnsEMBL::ProjectionSegment" )];
|
|
1621 }
|
|
1622
|
|
1623 my @out;
|
|
1624 for my $coord ( @linked ) {
|
|
1625 if( $coord->isa( "Bio::EnsEMBL::Mapper::Gap" )) {
|
|
1626 my $exc_slice = Bio::EnsEMBL::Slice->new_fast({
|
|
1627 'start' => $coord->start(),
|
|
1628 'end' => $coord->end(),
|
|
1629 'strand' => $slice->strand(),
|
|
1630 'coord_system' => $slice->coord_system(),
|
|
1631 'adaptor' => $self,
|
|
1632 'seq_region_name' => $slice->seq_region_name(),
|
|
1633 'seq_region_length' => $slice->seq_region_length()});
|
|
1634 push( @out, bless ( [ $rel_start, $coord->length()+$rel_start-1,
|
|
1635 $exc_slice ], "Bio::EnsEMBL::ProjectionSegment") );
|
|
1636 } else {
|
|
1637 my $exc_slice = $self->fetch_by_seq_region_id( $coord->id() );
|
|
1638 my $exc2_slice = Bio::EnsEMBL::Slice->new_fast({
|
|
1639
|
|
1640 'start' => $coord->start(),
|
|
1641 'end' => $coord->end(),
|
|
1642 'strand' => $coord->strand(),
|
|
1643 'seq_region_name' => $exc_slice->seq_region_name(),
|
|
1644 'seq_region_length' => $exc_slice->seq_region_length(),
|
|
1645 'coord_system' => $exc_slice->coord_system(),
|
|
1646 'adaptor' => $self
|
|
1647 });
|
|
1648
|
|
1649 push( @out, bless( [ $rel_start, $coord->length() + $rel_start - 1,
|
|
1650 $exc2_slice ], "Bio::EnsEMBL::ProjectionSegment") );
|
|
1651 }
|
|
1652 $rel_start += $coord->length();
|
|
1653 }
|
|
1654
|
|
1655 return \@out;
|
|
1656 }
|
|
1657
|
|
1658
|
|
1659
|
|
1660
|
|
1661 =head2 store
|
|
1662
|
|
1663 Arg [1] : Bio::EnsEMBL::Slice $slice
|
|
1664 Arg [2] : (optional) $seqref reference to a string
|
|
1665 The sequence associated with the slice to be stored.
|
|
1666 Example : $slice = Bio::EnsEMBL::Slice->new(...);
|
|
1667 $seq_region_id = $slice_adaptor->store($slice, \$sequence);
|
|
1668 Description: This stores a slice as a sequence region in the database
|
|
1669 and returns the seq region id. The passed in slice must
|
|
1670 start at 1, and must have a valid seq_region name and coordinate
|
|
1671 system. The attached coordinate system must already be stored in
|
|
1672 the database. The sequence region is assumed to start at 1 and
|
|
1673 to have a length equalling the length of the slice. The end of
|
|
1674 the slice must equal the seq_region_length.
|
|
1675 If the slice coordinate system is the sequence level coordinate
|
|
1676 system then the seqref argument must also be passed. If the
|
|
1677 slice coordinate system is NOT a sequence level coordinate
|
|
1678 system then the sequence argument cannot be passed.
|
|
1679 Returntype : int
|
|
1680 Exceptions : throw if slice has no coord system.
|
|
1681 throw if slice coord system is not already stored.
|
|
1682 throw if slice coord system is seqlevel and no sequence is
|
|
1683 provided.
|
|
1684 throw if slice coord system is not seqlevel and sequence is
|
|
1685 provided.
|
|
1686 throw if slice does not start at 1
|
|
1687 throw if sequence is provided and the sequence length does not
|
|
1688 match the slice length.
|
|
1689 throw if the SQL insert fails (e.g. on duplicate seq region)
|
|
1690 throw if slice argument is not passed
|
|
1691 throw if the slice end is not equal to seq_region_length
|
|
1692 Caller : database loading scripts
|
|
1693 Status : Stable
|
|
1694
|
|
1695 =cut
|
|
1696
|
|
1697
|
|
1698
|
|
1699 sub store {
|
|
1700 my $self = shift;
|
|
1701 my $slice = shift;
|
|
1702 my $seqref = shift;
|
|
1703
|
|
1704 #
|
|
1705 # Get all of the sanity checks out of the way before storing anything
|
|
1706 #
|
|
1707
|
|
1708 if(!ref($slice) || !($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
|
|
1709 throw('Slice argument is required');
|
|
1710 }
|
|
1711
|
|
1712 my $cs = $slice->coord_system();
|
|
1713 throw("Slice must have attached CoordSystem.") if(!$cs);
|
|
1714
|
|
1715 my $db = $self->db();
|
|
1716 if(!$cs->is_stored($db)) {
|
|
1717 throw("Slice CoordSystem must already be stored in DB.")
|
|
1718 }
|
|
1719
|
|
1720 if($slice->start != 1 || $slice->strand != 1) {
|
|
1721 throw("Slice must have start==1 and strand==1.");
|
|
1722 }
|
|
1723
|
|
1724 if($slice->end() != $slice->seq_region_length()) {
|
|
1725 throw("Slice must have end==seq_region_length");
|
|
1726 }
|
|
1727
|
|
1728 my $sr_len = $slice->length();
|
|
1729 my $sr_name = $slice->seq_region_name();
|
|
1730
|
|
1731 if(!$sr_name) {
|
|
1732 throw("Slice must have valid seq region name.");
|
|
1733 }
|
|
1734
|
|
1735 if($cs->is_sequence_level()) {
|
|
1736 if(!$seqref) {
|
|
1737 throw("Must provide sequence for sequence level coord system.");
|
|
1738 }
|
|
1739 if(ref($seqref) ne 'SCALAR') {
|
|
1740 throw("Sequence must be a scalar reference.");
|
|
1741 }
|
|
1742 my $seq_len = length($$seqref);
|
|
1743
|
|
1744 if($seq_len != $sr_len) {
|
|
1745 throw("Sequence length ($seq_len) must match slice length ($sr_len).");
|
|
1746 }
|
|
1747 } else {
|
|
1748 if($seqref) {
|
|
1749 throw("Cannot provide sequence for non-sequence level seq regions.");
|
|
1750 }
|
|
1751 }
|
|
1752
|
|
1753 #store the seq_region
|
|
1754
|
|
1755 my $sth = $db->dbc->prepare("INSERT INTO seq_region " .
|
|
1756 "SET name = ?, " .
|
|
1757 " length = ?, " .
|
|
1758 " coord_system_id = ?" );
|
|
1759
|
|
1760 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
|
|
1761 $sth->bind_param(2,$sr_len,SQL_INTEGER);
|
|
1762 $sth->bind_param(3,$cs->dbID,SQL_INTEGER);
|
|
1763
|
|
1764 $sth->execute();
|
|
1765
|
|
1766 my $seq_region_id = $sth->{'mysql_insertid'};
|
|
1767
|
|
1768 if(!$seq_region_id) {
|
|
1769 throw("Database seq_region insertion failed.");
|
|
1770 }
|
|
1771
|
|
1772 if($cs->is_sequence_level()) {
|
|
1773 #store sequence if it was provided
|
|
1774 my $seq_adaptor = $db->get_SequenceAdaptor();
|
|
1775 $seq_adaptor->store($seq_region_id, $$seqref);
|
|
1776 }
|
|
1777
|
|
1778 #synonyms
|
|
1779 if(defined($slice->{'synonym'})){
|
|
1780 foreach my $syn (@{$slice->{'synonym'}} ){
|
|
1781 $syn->seq_region_id($seq_region_id); # set the seq_region_id
|
|
1782 $syn->adaptor->store($syn);
|
|
1783 }
|
|
1784 }
|
|
1785
|
|
1786
|
|
1787 $slice->adaptor($self);
|
|
1788
|
|
1789 return $seq_region_id;
|
|
1790 }
|
|
1791
|
|
1792
|
|
1793 =head2 store_assembly
|
|
1794
|
|
1795 Arg [1] : Bio::EnsEMBL::Slice $asm_slice
|
|
1796 Arg [2] : Bio::EnsEMBL::Slice $cmp_slice
|
|
1797 Example : $asm = $slice_adaptor->store_assembly( $slice1, $slice2 );
|
|
1798 Description: Creates an entry in the analysis table based on the
|
|
1799 coordinates of the two slices supplied. Returns a string
|
|
1800 representation of the assembly that gets created.
|
|
1801 Returntype : string
|
|
1802 Exceptions : throw if either slice has no coord system (cs).
|
|
1803 throw unless the cs rank of the asm_slice is lower than the
|
|
1804 cmp_slice.
|
|
1805 throw if there is no mapping path between coord systems
|
|
1806 throw if the lengths of each slice are not equal
|
|
1807 throw if there are existing mappings between either slice
|
|
1808 and the oposite cs
|
|
1809 Caller : database loading scripts
|
|
1810 Status : Experimental
|
|
1811
|
|
1812 =cut
|
|
1813
|
|
1814 sub store_assembly{
|
|
1815 my $self = shift;
|
|
1816 my $asm_slice = shift;
|
|
1817 my $cmp_slice = shift;
|
|
1818
|
|
1819 #
|
|
1820 # Get all of the sanity checks out of the way before storing anything
|
|
1821 #
|
|
1822
|
|
1823 if(!ref($asm_slice) || !($asm_slice->isa('Bio::EnsEMBL::Slice') or $asm_slice->isa('Bio::EnsEMBL::LRGSlice'))) {
|
|
1824 throw('Assembled Slice argument is required');
|
|
1825 }
|
|
1826 if(!ref($cmp_slice) || !($cmp_slice->isa('Bio::EnsEMBL::Slice') or $cmp_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
|
|
1827 throw('Assembled Slice argument is required');
|
|
1828 }
|
|
1829
|
|
1830 my $asm_cs = $asm_slice->coord_system();
|
|
1831 throw("Slice must have attached CoordSystem.") if(!$asm_cs);
|
|
1832 my $cmp_cs = $cmp_slice->coord_system();
|
|
1833 throw("Slice must have attached CoordSystem.") if(!$cmp_cs);
|
|
1834
|
|
1835 unless( $asm_cs->rank < $cmp_cs->rank ){
|
|
1836 throw("Assembled Slice CoordSystem->rank must be lower than ".
|
|
1837 "the component Slice Coord_system" );
|
|
1838 }
|
|
1839
|
|
1840 my @path =
|
|
1841 @{ $asm_cs->adaptor()->get_mapping_path( $asm_cs, $cmp_cs ) };
|
|
1842
|
|
1843 if ( !@path ) {
|
|
1844 throw("No mapping path defined between "
|
|
1845 . $asm_cs->name() . " and "
|
|
1846 . $cmp_cs->name() );
|
|
1847 }
|
|
1848
|
|
1849 if( $asm_slice->length != $cmp_slice->length ){
|
|
1850 throw("The lengths of the assembled and component slices are not equal" );
|
|
1851 }
|
|
1852
|
|
1853 # For now we disallow any existing mappings between the asm slice and cmp
|
|
1854 # CoordSystem and vice-versa.
|
|
1855 # Some cases of multiple mappings may be allowable by the API, but their
|
|
1856 # logic needs to be coded below.
|
|
1857
|
|
1858 my $asm_proj = $asm_slice->project( $cmp_cs->name, $cmp_cs->version );
|
|
1859 if( @$asm_proj ){
|
|
1860 throw("Regions of the assembled slice are already assembled ".
|
|
1861 "into the component CoordSystem" );
|
|
1862 }
|
|
1863 my $cmp_proj = $cmp_slice->project( $asm_cs->name, $asm_cs->version );
|
|
1864 if( @$cmp_proj ){
|
|
1865 throw("Regions of the component slice are already assembled ".
|
|
1866 "into the assembled CoordSystem" );
|
|
1867 }
|
|
1868
|
|
1869 #
|
|
1870 # Checks complete. Store the data
|
|
1871 #
|
|
1872 my $sth = $self->db->dbc->prepare
|
|
1873 ("INSERT INTO assembly " .
|
|
1874 "SET asm_seq_region_id = ?, " .
|
|
1875 " cmp_seq_region_id = ?, " .
|
|
1876 " asm_start = ?, " .
|
|
1877 " asm_end = ?, " .
|
|
1878 " cmp_start = ?, " .
|
|
1879 " cmp_end = ?, " .
|
|
1880 " ori = ?" );
|
|
1881
|
|
1882 my $asm_seq_region_id = $self->get_seq_region_id( $asm_slice );
|
|
1883 my $cmp_seq_region_id = $self->get_seq_region_id( $cmp_slice );
|
|
1884 my $ori = $asm_slice->strand * $cmp_slice->strand;
|
|
1885
|
|
1886 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
|
|
1887 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
|
|
1888 $sth->bind_param(3,$asm_slice->start,SQL_INTEGER);
|
|
1889 $sth->bind_param(4,$asm_slice->end,SQL_INTEGER);
|
|
1890 $sth->bind_param(5,$cmp_slice->start,SQL_INTEGER);
|
|
1891 $sth->bind_param(6,$cmp_slice->end,SQL_INTEGER);
|
|
1892 $sth->bind_param(7,$ori,SQL_INTEGER);
|
|
1893
|
|
1894 $sth->execute();
|
|
1895
|
|
1896 #use Data::Dumper qw( Dumper );
|
|
1897 #warn Dumper( $self->db->{seq_region_cache} );
|
|
1898 #$self->db->{seq_region_cache} = undef;
|
|
1899 #$self->_cache_seq_regions();
|
|
1900
|
|
1901 my $ama = $self->db->get_AssemblyMapperAdaptor();
|
|
1902 $ama->delete_cache();
|
|
1903
|
|
1904
|
|
1905 return $asm_slice->name . "<>" . $cmp_slice->name;
|
|
1906
|
|
1907 }
|
|
1908
|
|
1909
|
|
1910 =head2 prepare
|
|
1911
|
|
1912 Arg [1] : String $sql
|
|
1913 Example : ( optional )
|
|
1914 Description: overrides the default adaptor prepare method.
|
|
1915 All slice sql will usually use the dna_db.
|
|
1916 Returntype : DBD::sth
|
|
1917 Exceptions : none
|
|
1918 Caller : internal, convenience method
|
|
1919 Status : Stable
|
|
1920
|
|
1921 =cut
|
|
1922
|
|
1923 sub prepare {
|
|
1924 my ( $self, $sql ) = @_;
|
|
1925 return $self->db()->dnadb()->dbc->prepare($sql);
|
|
1926 }
|
|
1927
|
|
1928 sub _build_exception_cache {
|
|
1929 my $self = shift;
|
|
1930
|
|
1931 # build up a cache of the entire assembly exception table
|
|
1932 # it should be small anyway
|
|
1933 my $sth =
|
|
1934 $self->prepare( 'SELECT ae.seq_region_id, ae.seq_region_start, '
|
|
1935 . 'ae.seq_region_end, ae.exc_type, ae.exc_seq_region_id, '
|
|
1936 . 'ae.exc_seq_region_start, ae.exc_seq_region_end '
|
|
1937 . 'FROM assembly_exception ae, '
|
|
1938 . 'seq_region sr, coord_system cs '
|
|
1939 . 'WHERE sr.seq_region_id = ae.seq_region_id '
|
|
1940 . 'AND sr.coord_system_id = cs.coord_system_id '
|
|
1941 . 'AND cs.species_id = ?' );
|
|
1942
|
|
1943 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
1944 $sth->execute();
|
|
1945
|
|
1946 my %hash;
|
|
1947 $self->{'asm_exc_cache'} = \%hash;
|
|
1948
|
|
1949 my $row;
|
|
1950 while ( $row = $sth->fetchrow_arrayref() ) {
|
|
1951 my @result = @$row;
|
|
1952 $hash{ $result[0] } ||= [];
|
|
1953 push( @{ $hash{ $result[0] } }, \@result );
|
|
1954 }
|
|
1955 $sth->finish();
|
|
1956 } ## end sub _build_exception_cache
|
|
1957
|
|
1958 =head2 cache_toplevel_seq_mappings
|
|
1959
|
|
1960 Args : none
|
|
1961 Example : $slice_adaptor->cache_toplevel_seq_mappings();
|
|
1962 Description: caches all the assembly mappings needed for genes
|
|
1963 Returntype : None
|
|
1964 Exceptions : None
|
|
1965 Caller : general
|
|
1966 Status : At Risk
|
|
1967 : New experimental code
|
|
1968
|
|
1969 =cut
|
|
1970
|
|
1971 sub cache_toplevel_seq_mappings {
|
|
1972 my ($self) = @_;
|
|
1973
|
|
1974 # Get the sequence level to map too
|
|
1975
|
|
1976 my $sql = (<<SSQL);
|
|
1977 SELECT name
|
|
1978 FROM coord_system
|
|
1979 WHERE attrib like "%sequence_level%"
|
|
1980 AND species_id = ?
|
|
1981 SSQL
|
|
1982
|
|
1983 my $sth = $self->prepare($sql);
|
|
1984 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
1985 $sth->execute();
|
|
1986
|
|
1987 my $sequence_level = $sth->fetchrow_array();
|
|
1988
|
|
1989 $sth->finish();
|
|
1990
|
|
1991 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
1992 my $ama = $self->db->get_AssemblyMapperAdaptor();
|
|
1993
|
|
1994 my $cs1 = $csa->fetch_by_name($sequence_level);
|
|
1995
|
|
1996 #get level to map too.
|
|
1997
|
|
1998 $sql = (<<LSQL);
|
|
1999 SELECT DISTINCT(cs.name)
|
|
2000 FROM seq_region sr,
|
|
2001 seq_region_attrib sra,
|
|
2002 attrib_type at,
|
|
2003 coord_system cs
|
|
2004 WHERE sra.seq_region_id = sr.seq_region_id
|
|
2005 AND sra.attrib_type_id = at.attrib_type_id
|
|
2006 AND at.code = "toplevel"
|
|
2007 AND cs.coord_system_id = sr.coord_system_id
|
|
2008 AND cs.species_id = ?
|
|
2009 LSQL
|
|
2010
|
|
2011 $sth = $self->prepare($sql);
|
|
2012 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
2013 $sth->execute();
|
|
2014
|
|
2015 while ( my $csn = $sth->fetchrow_array() ) {
|
|
2016 if ( $csn eq $sequence_level ) { next }
|
|
2017 my $cs2 = $csa->fetch_by_name($csn);
|
|
2018 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
|
|
2019 $am->register_all();
|
|
2020 }
|
|
2021
|
|
2022 } ## end sub cache_toplevel_seq_mappings
|
|
2023
|
|
2024
|
|
2025 sub _build_circular_slice_cache {
|
|
2026 my $self = shift;
|
|
2027
|
|
2028 # build up a cache of circular sequence region ids
|
|
2029 my $sth =
|
|
2030 $self->prepare( "SELECT sra.seq_region_id FROM seq_region_attrib sra "
|
|
2031 . "INNER JOIN attrib_type at ON sra.attrib_type_id = at.attrib_type_id "
|
|
2032 . "INNER JOIN seq_region sr ON sra.seq_region_id = sr.seq_region_id "
|
|
2033 . "INNER JOIN coord_system cs ON sr.coord_system_id = cs.coord_system_id "
|
|
2034 . "WHERE code = 'circular_seq' and cs.species_id = ?");
|
|
2035
|
|
2036 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
2037 $sth->execute();
|
|
2038
|
|
2039 my $id;
|
|
2040 my %hash;
|
|
2041 if ( ($id) = $sth->fetchrow_array() ) {
|
|
2042 $self->{'circular_sr_id_cache'} = \%hash;
|
|
2043 $self->{'is_circular'} = 1;
|
|
2044 $hash{ $id } = $id;
|
|
2045 while ( ($id) = $sth->fetchrow_array() ) {
|
|
2046 $hash{ $id } = $id;
|
|
2047 }
|
|
2048 } else {
|
|
2049 $self->{'is_circular'} = 0;
|
|
2050 }
|
|
2051 $sth->finish();
|
|
2052 } ## end _build_circular_slice_cache
|
|
2053
|
|
2054
|
|
2055 #####################################
|
|
2056 # sub DEPRECATED METHODs
|
|
2057 #####################################
|
|
2058
|
|
2059 =head2 fetch_by_mapfrag
|
|
2060
|
|
2061 Function: DEPRECATED use fetch_by_misc_feature_attribute('synonym',$mapfrag)
|
|
2062
|
|
2063 =cut
|
|
2064
|
|
2065 sub fetch_by_mapfrag{
|
|
2066 my ($self,$mymapfrag,$flag,$size) = @_;
|
|
2067 deprecate('Use fetch_by_misc_feature_attribute instead');
|
|
2068 $flag ||= 'fixed-width'; # alt.. 'context'
|
|
2069 $size ||= $flag eq 'fixed-width' ? 100000 : 0;
|
|
2070 return $self->fetch_by_misc_feature_attribute('synonym',$mymapfrag,$size);
|
|
2071 }
|
|
2072
|
|
2073
|
|
2074
|
|
2075 =head2 fetch_by_chr_start_end
|
|
2076
|
|
2077 Description: DEPRECATED use fetch_by_region instead
|
|
2078
|
|
2079 =cut
|
|
2080
|
|
2081 sub fetch_by_chr_start_end {
|
|
2082 my ($self,$chr,$start,$end) = @_;
|
|
2083 deprecate('Use fetch_by_region() instead');
|
|
2084
|
|
2085 #assume that by chromosome the user actually meant top-level coord
|
|
2086 #system since this is the old behaviour of this deprecated method
|
|
2087 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
2088 my ($cs) = @{$csa->fetch_all()}; # get the highest coord system
|
|
2089
|
|
2090 return $self->fetch_by_region($cs->name,$chr,$start,$end,1,$cs->version);
|
|
2091 }
|
|
2092
|
|
2093
|
|
2094
|
|
2095 =head2 fetch_by_contig_name
|
|
2096
|
|
2097 Description: Deprecated. Use fetch_by_region(), Slice::project(),
|
|
2098 Slice::expand() instead
|
|
2099
|
|
2100 =cut
|
|
2101
|
|
2102 sub fetch_by_contig_name {
|
|
2103 my ($self, $name, $size) = @_;
|
|
2104
|
|
2105 deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
|
|
2106
|
|
2107 #previously wanted chromosomal slice on a given contig. Assume this means
|
|
2108 #a top-level slice on a given seq_region in the seq_level coord system
|
|
2109 my $csa = $self->db()->get_CoordSystemAdaptor();
|
|
2110 my $seq_level = $csa->fetch_sequence_level();
|
|
2111
|
|
2112 my $seq_lvl_slice = $self->fetch_by_region($seq_level->name(), $name);
|
|
2113
|
|
2114 if(!$seq_lvl_slice) {
|
|
2115 return undef;
|
|
2116 }
|
|
2117
|
|
2118 my @projection = @{$seq_lvl_slice->project('toplevel')};
|
|
2119
|
|
2120 if(@projection != 1) {
|
|
2121 warning("$name is mapped to multiple toplevel locations.");
|
|
2122 }
|
|
2123
|
|
2124 return $projection[0]->[2]->expand($size, $size);
|
|
2125 }
|
|
2126
|
|
2127
|
|
2128 =head2 fetch_by_clone_accession
|
|
2129
|
|
2130 Description: DEPRECATED. Use fetch_by_region, Slice::project, Slice::expand
|
|
2131 instead.
|
|
2132
|
|
2133 =cut
|
|
2134
|
|
2135 sub fetch_by_clone_accession{
|
|
2136 my ($self,$name,$size) = @_;
|
|
2137
|
|
2138 deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
|
|
2139
|
|
2140 my $csa = $self->db()->get_CoordSystemAdaptor();
|
|
2141 my $clone_cs = $csa->fetch_by_name('clone');
|
|
2142
|
|
2143 if(!$clone_cs) {
|
|
2144 warning('Clone coordinate system does not exist for this species');
|
|
2145 return undef;
|
|
2146 }
|
|
2147
|
|
2148 #this unfortunately needs a version on the end to work
|
|
2149 if(! ($name =~ /\./)) {
|
|
2150 my $sth =
|
|
2151 $self->prepare( "SELECT sr.name "
|
|
2152 . "FROM seq_region sr, coord_system cs "
|
|
2153 . "WHERE cs.name = 'clone' "
|
|
2154 . "AND cs.coord_system_id = sr.coord_system_id "
|
|
2155 . "AND sr.name LIKE '$name.%'"
|
|
2156 . "AND cs.species_id = ?" );
|
|
2157
|
|
2158 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
|
|
2159 $sth->execute();
|
|
2160
|
|
2161 if(!$sth->rows()) {
|
|
2162 $sth->finish();
|
|
2163 throw("Clone $name not found in database");
|
|
2164 }
|
|
2165
|
|
2166 ($name) = $sth->fetchrow_array();
|
|
2167
|
|
2168 $sth->finish();
|
|
2169 }
|
|
2170
|
|
2171 my $clone = $self->fetch_by_region($clone_cs->name(), $name);
|
|
2172 return undef if(!$clone);
|
|
2173
|
|
2174 my @projection = @{$clone->project('toplevel')};
|
|
2175
|
|
2176 if(@projection != 1) {
|
|
2177 warning("$name is mapped to multiple locations.");
|
|
2178 }
|
|
2179
|
|
2180 return $projection[0]->[2]->expand($size, $size);
|
|
2181 }
|
|
2182
|
|
2183
|
|
2184 =head2 fetch_by_supercontig_name
|
|
2185
|
|
2186 Description: DEPRECATED. Use fetch_by_region(), Slice::project() and
|
|
2187 Slice::expand() instead
|
|
2188
|
|
2189 =cut
|
|
2190
|
|
2191 sub fetch_by_supercontig_name {
|
|
2192 my ($self,$name, $size) = @_;
|
|
2193
|
|
2194 deprecate('Use fetch_by_region(), Slice::project() and Slice::expand().');
|
|
2195
|
|
2196 my $csa = $self->db()->get_CoordSystemAdaptor();
|
|
2197 my $sc_level = $csa->fetch_by_name('supercontig');
|
|
2198
|
|
2199 if(!$sc_level) {
|
|
2200 warning('No supercontig coordinate system exists for this species.');
|
|
2201 return undef;
|
|
2202 }
|
|
2203
|
|
2204 my $sc_slice = $self->fetch_by_region($sc_level->name(),$name);
|
|
2205
|
|
2206 return undef if(!$sc_slice);
|
|
2207
|
|
2208 my @projection = @{$sc_slice->project('toplevel')};
|
|
2209
|
|
2210 if(@projection > 1) {
|
|
2211 warning("$name is mapped to multiple locations in toplevel");
|
|
2212 }
|
|
2213
|
|
2214 return $projection[0]->[2]->expand($size, $size);
|
|
2215 }
|
|
2216
|
|
2217
|
|
2218
|
|
2219
|
|
2220 =head2 list_overlapping_supercontigs
|
|
2221
|
|
2222 Description: DEPRECATED use Slice::project instead
|
|
2223
|
|
2224 =cut
|
|
2225
|
|
2226 sub list_overlapping_supercontigs {
|
|
2227 my ($self,$slice) = @_;
|
|
2228
|
|
2229 deprecate('Use Slice::project() instead.');
|
|
2230
|
|
2231 my $csa = $self->db()->get_CoordSystemAdaptor();
|
|
2232 my $sc_level = $csa->fetch_by_name('supercontig');
|
|
2233
|
|
2234 if(!$sc_level) {
|
|
2235 warning('No supercontig coordinate system exists for this species.');
|
|
2236 return undef;
|
|
2237 }
|
|
2238
|
|
2239 my @out;
|
|
2240 foreach my $seg (@{$slice->project($sc_level->name(), $sc_level->version)}){
|
|
2241 push @out, $seg->[2]->seq_region_name();
|
|
2242 }
|
|
2243
|
|
2244 return \@out;
|
|
2245 }
|
|
2246
|
|
2247
|
|
2248
|
|
2249 =head2 fetch_by_chr_name
|
|
2250
|
|
2251 Description: DEPRECATED. Use fetch by region instead
|
|
2252
|
|
2253 =cut
|
|
2254
|
|
2255 sub fetch_by_chr_name{
|
|
2256 my ($self,$chr_name) = @_;
|
|
2257 deprecate('Use fetch_by_region() instead.');
|
|
2258
|
|
2259 my $csa = $self->db->get_CoordSystemAdaptor();
|
|
2260
|
|
2261 my $top_cs = @{$csa->fetch_all()};
|
|
2262
|
|
2263 return $self->fetch_by_region($top_cs->name(),$chr_name,
|
|
2264 undef,undef,undef,$top_cs->version);
|
|
2265 }
|
|
2266
|
|
2267
|
|
2268
|
|
2269
|
|
2270
|
|
2271
|
|
2272 1;
|