annotate variant_effect_predictor/Bio/EnsEMBL/Mapper/RangeRegistry.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 Bio::EnsEMBL::Mapper::RangeRegistry
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 use Bio::EnsEMBL::Mapper::RangeRegistry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 $rr = Bio::EnsEMBL::Mapper::RangeRegistry->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 # Get a fixed width chunk around the range of intereset. This
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 # will be used if any registration is actually necessary.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 $chunk_start = ( $start >> 20 ) << 20 + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 $chunk_end = ( ( $end >> 20 ) + 1 ) << 20;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 # Check if any registration is necessary for the range. If it is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 # register a large chunked area instead and return a listref of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 # unregistered areas that need to be loaded.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 if (
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 $pairs = $rr->check_and_register(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 $id, $start, $end, $chunk_start, $chunk_end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 ) )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 foreach my $pair (@$pairs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 my ( $pair_start, $pair_end ) = @$pair;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 # Fetch mappings for these regions from the assembly table and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 # load them into the mapper.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 ...;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 # The range ($start - $end) is already registered
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 ...;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 # Check if any registration is necessary. If it is register the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 # region and return a listref of pairs that need to be loaded.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 if ( $pairs = $rr->check_and_register( $id, $start, $end ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 ...;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 This module maintains an internal list of registered regions and is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 used to quickly ascertain if and what regions of a provided range need
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 registration.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 =head1 METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 package Bio::EnsEMBL::Mapper::RangeRegistry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 use Bio::EnsEMBL::Utils::Exception qw(throw);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 use integer;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 Arg [1] : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 Example : my $rr = Bio::EnsEMBL::Mapper::RangeRegistry->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 Description: Creates a new RangeRegistry object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 Returntype : Bio::EnsEMBL::Mapper::RangeRegistry
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 Caller : AssemblyMapperAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 my ($proto) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 my $class = ref($proto) || $proto;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 return bless( { 'registry' => {} }, $class );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 sub flush {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 my ($self) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 $self->{'registry'} = {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 =head2 check_and_register
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 Arg [1] : string $id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 The id of the range to be checked/registered (e.g. a sequenceid)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 Arg [2] : int $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 The start of the range to be checked
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 Arg [3] : int $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 The end of the range to be checked
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 Arg [4] : (optional) int $rstart
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 The start of the range to be registered
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 if the checked range was not fully registered
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 Arg [5] : (optional) int $rend
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 The end of the range to be registerd
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 if the checked range was not fully registered
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 Example : $ranges=$rr->check_and_register('X',500,600, 1,1_000_000));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 Description: Checks the range registry to see if the entire range
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 denoted by ($id : $start-$end) is already registered. If
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 it already is, then undef is returned. If it is not then
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 the range specified by $rstart and $rend is registered, and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 a list of regions that were required to completely register
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 $rstart-$rend is returned. If $rstart and $rend are not
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 defined they default to $start and $end respectively.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 The reason there is a single call to do both the checking and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 registering is to reduce the overhead. Much of the work to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 check if a range is registered is the same as registering a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 region around that range.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 Returntype : undef or listref of [start,end] range pairs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 Exceptions : throw if rstart is greater than start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 throw if rend is less than end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 throw if end is less than start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 throw if id, start, or end are not defined
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 Caller : AssemblyMapperAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 #"constants"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141 my $START = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 my $END = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 sub check_and_register {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 my ( $self, $id, $start, $end, $rstart, $rend ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 $rstart = $start if ( !defined($rstart) );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 $rend = $end if ( !defined($rend) );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 # Sanity checks
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 if ( !defined($id) || !defined($start) || !defined($end) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 throw("ID, start, end arguments are required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 # The following was commented out due to Ensembl Genomes requirements
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 # for bacterial genomes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 ## if ( $start > $end ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 ## throw( "start argument [$start] must be less than "
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 ## . "(or equal to) end argument [$end]" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 ## }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 ##
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 ## if ( $rstart > $rend ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 ## throw( "rstart argument [$rstart] must be less than "
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 ## . "(or equal to) rend argument [$rend] argument" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 ## }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 if ( $rstart > $start ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 throw("rstart [$rstart] must be less than or equal to start [$start]");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 if ( $rend < $end ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 throw("rend [$rend] must be greater than or equal to end [$end]");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 my $reg = $self->{'registry'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 my $list = $reg->{$id} ||= [];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 my @gap_pairs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 my $len = scalar(@$list);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 if ( $len == 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 # this is the first request for this id, return a gap pair for the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 # entire range and register it as seen
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 $list->[0] = [ $rstart, $rend ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 return [ [ $rstart, $rend ] ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 #####
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 # loop through the list of existing ranges recording any "gaps" where
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 # the existing range does not cover part of the requested range
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 my $start_idx = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 my $end_idx = $#$list;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 my ( $mid_idx, $range );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 # binary search the relevant pairs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 # helps if the list is big
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 while ( ( $end_idx - $start_idx ) > 1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 $mid_idx = ( $start_idx + $end_idx ) >> 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 $range = $list->[$mid_idx];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 if ( $range->[1] < $rstart ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 $start_idx = $mid_idx;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 $end_idx = $mid_idx;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 my ( $gap_start, $gap_end, $r_idx, $rstart_idx, $rend_idx );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 $gap_start = $rstart;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 my ( $pstart, $pend ) = @{ $list->[$CUR] };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 # no work needs to be done at all if we find a range pair that
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 # entirely overlaps the requested region
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 if ( $pstart <= $start && $pend >= $end ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 return undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 # find adjacent or overlapping regions already registered
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 if ( $pend >= ( $rstart - 1 ) && $pstart <= ( $rend + 1 ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 if ( !defined($rstart_idx) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 $rstart_idx = $CUR;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 $rend_idx = $CUR;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 if ( $pstart > $rstart ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 $gap_end = ( $rend < $pstart ) ? $rend : $pstart - 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 push @gap_pairs, [ $gap_start, $gap_end ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 $gap_start = ( $rstart > $pend ) ? $rstart : $pend + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 # if($pstart > $rend && !defined($r_idx)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 if ( $pend >= $rend && !defined($r_idx) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 $r_idx = $CUR;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 } ## end for ( my $CUR = $start_idx...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 # do we have to make another gap?
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 if ( $gap_start <= $rend ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 push @gap_pairs, [ $gap_start, $rend ];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 # Merge the new range into the registered list
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 if ( defined($rstart_idx) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 my ( $new_start, $new_end );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 if ( $rstart < $list->[$rstart_idx]->[0] ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 $new_start = $rstart;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 $new_start = $list->[$rstart_idx]->[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 if ( $rend > $list->[$rend_idx]->[1] ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 $new_end = $rend;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 $new_end = $list->[$rend_idx]->[1];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 splice( @$list, $rstart_idx,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 $rend_idx - $rstart_idx + 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 [ $new_start, $new_end ] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 } elsif ( defined($r_idx) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 splice( @$list, $r_idx, 0, [ $rstart, $rend ] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 push( @$list, [ $rstart, $rend ] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 return \@gap_pairs;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 } ## end sub check_and_register
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 # overlap size is just added to make RangeRegistry generally more useful
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 =head2 overlap_size
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 Arg [1] : string $id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 Arg [2] : int $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 Arg [3] : int $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 Example : my $overlap_size = $rr->( "chr1", 1, 100_000_000 )
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 Description: finds out how many bases of the given range are registered
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 Returntype : int
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 sub overlap_size {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 my ( $self, $id, $start, $end ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 my $overlap = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 if ( $start > $end ) { return 0 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 my $reg = $self->{'registry'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 my $list = $reg->{$id} ||= [];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 my $len = scalar(@$list);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 if ( $len == 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 # this is the first request for this id, return a gap pair for the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 # entire range and register it as seen
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 return 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 #####
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 # loop through the list of existing ranges recording any "gaps" where
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 # the existing range does not cover part of the requested range
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 my $start_idx = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 my $end_idx = $#$list;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 my ( $mid_idx, $range );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 # binary search the relevant pairs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 # helps if the list is big
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 while ( ( $end_idx - $start_idx ) > 1 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 $mid_idx = ( $start_idx + $end_idx ) >> 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 $range = $list->[$mid_idx];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 if ( $range->[1] < $start ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 $start_idx = $mid_idx;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 $end_idx = $mid_idx;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 my ( $pstart, $pend ) = @{ $list->[$CUR] };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 if ( $pstart > $end ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 # No more interesting ranges here.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 # no work needs to be done at all if we find a range pair that
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 # entirely overlaps the requested region
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 if ( $pstart <= $start && $pend >= $end ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 $overlap = $end - $start + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 last;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 my $mstart = ( $start < $pstart ? $pstart : $start );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 my $mend = ( $end < $pend ? $end : $pend );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 if ( $mend - $mstart >= 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 $overlap += ( $mend - $mstart + 1 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 return $overlap;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 } ## end sub overlap_size
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 # low level function to access the ranges
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 # only use for read access
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 sub get_ranges {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 my ( $self, $id ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 return $self->{'registry'}->{$id};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376