annotate variant_effect_predictor/Bio/EnsEMBL/Mapper/RangeRegistry.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Mapper::RangeRegistry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 use Bio::EnsEMBL::Mapper::RangeRegistry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 $rr = Bio::EnsEMBL::Mapper::RangeRegistry->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 # Get a fixed width chunk around the range of intereset. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 # will be used if any registration is actually necessary.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $chunk_start = ( $start >> 20 ) << 20 + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $chunk_end = ( ( $end >> 20 ) + 1 ) << 20;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # Check if any registration is necessary for the range. If it is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 # register a large chunked area instead and return a listref of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 # unregistered areas that need to be loaded.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 if (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 $pairs = $rr->check_and_register(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 $id, $start, $end, $chunk_start, $chunk_end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 foreach my $pair (@$pairs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 my ( $pair_start, $pair_end ) = @$pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 # Fetch mappings for these regions from the assembly table and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 # load them into the mapper.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 ...;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 # The range ($start - $end) is already registered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 ...;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 # Check if any registration is necessary. If it is register the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 # region and return a listref of pairs that need to be loaded.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 if ( $pairs = $rr->check_and_register( $id, $start, $end ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 ...;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 This module maintains an internal list of registered regions and is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 used to quickly ascertain if and what regions of a provided range need
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 registration.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 package Bio::EnsEMBL::Mapper::RangeRegistry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 use Bio::EnsEMBL::Utils::Exception qw(throw);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 use integer;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 Arg [1] : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Example : my $rr = Bio::EnsEMBL::Mapper::RangeRegistry->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 Description: Creates a new RangeRegistry object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Returntype : Bio::EnsEMBL::Mapper::RangeRegistry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 Caller : AssemblyMapperAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my ($proto) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $class = ref($proto) || $proto;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 return bless( { 'registry' => {} }, $class );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 sub flush {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 $self->{'registry'} = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 =head2 check_and_register
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 Arg [1] : string $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 The id of the range to be checked/registered (e.g. a sequenceid)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 Arg [2] : int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 The start of the range to be checked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 Arg [3] : int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 The end of the range to be checked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Arg [4] : (optional) int $rstart
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 The start of the range to be registered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 if the checked range was not fully registered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 Arg [5] : (optional) int $rend
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 The end of the range to be registerd
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 if the checked range was not fully registered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 Example : $ranges=$rr->check_and_register('X',500,600, 1,1_000_000));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 Description: Checks the range registry to see if the entire range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 denoted by ($id : $start-$end) is already registered. If
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 it already is, then undef is returned. If it is not then
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 the range specified by $rstart and $rend is registered, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 a list of regions that were required to completely register
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 $rstart-$rend is returned. If $rstart and $rend are not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 defined they default to $start and $end respectively.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 The reason there is a single call to do both the checking and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 registering is to reduce the overhead. Much of the work to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 check if a range is registered is the same as registering a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 region around that range.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 Returntype : undef or listref of [start,end] range pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 Exceptions : throw if rstart is greater than start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 throw if rend is less than end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 throw if end is less than start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 throw if id, start, or end are not defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 Caller : AssemblyMapperAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 #"constants"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 my $START = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 my $END = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 sub check_and_register {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my ( $self, $id, $start, $end, $rstart, $rend ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 $rstart = $start if ( !defined($rstart) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 $rend = $end if ( !defined($rend) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 # Sanity checks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 if ( !defined($id) || !defined($start) || !defined($end) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 throw("ID, start, end arguments are required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 # The following was commented out due to Ensembl Genomes requirements
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 # for bacterial genomes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 ## if ( $start > $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 ## throw( "start argument [$start] must be less than "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 ## . "(or equal to) end argument [$end]" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 ## }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 ##
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 ## if ( $rstart > $rend ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 ## throw( "rstart argument [$rstart] must be less than "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 ## . "(or equal to) rend argument [$rend] argument" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 ## }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 if ( $rstart > $start ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 throw("rstart [$rstart] must be less than or equal to start [$start]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 if ( $rend < $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 throw("rend [$rend] must be greater than or equal to end [$end]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my $reg = $self->{'registry'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 my $list = $reg->{$id} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 my @gap_pairs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 my $len = scalar(@$list);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 if ( $len == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 # this is the first request for this id, return a gap pair for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 # entire range and register it as seen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 $list->[0] = [ $rstart, $rend ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 return [ [ $rstart, $rend ] ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 #####
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 # loop through the list of existing ranges recording any "gaps" where
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 # the existing range does not cover part of the requested range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 my $start_idx = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my $end_idx = $#$list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 my ( $mid_idx, $range );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 # binary search the relevant pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 # helps if the list is big
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 while ( ( $end_idx - $start_idx ) > 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $mid_idx = ( $start_idx + $end_idx ) >> 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 $range = $list->[$mid_idx];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 if ( $range->[1] < $rstart ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $start_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 $end_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 my ( $gap_start, $gap_end, $r_idx, $rstart_idx, $rend_idx );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 $gap_start = $rstart;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 my ( $pstart, $pend ) = @{ $list->[$CUR] };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 # no work needs to be done at all if we find a range pair that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 # entirely overlaps the requested region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 if ( $pstart <= $start && $pend >= $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 # find adjacent or overlapping regions already registered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 if ( $pend >= ( $rstart - 1 ) && $pstart <= ( $rend + 1 ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 if ( !defined($rstart_idx) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 $rstart_idx = $CUR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 $rend_idx = $CUR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 if ( $pstart > $rstart ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 $gap_end = ( $rend < $pstart ) ? $rend : $pstart - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 push @gap_pairs, [ $gap_start, $gap_end ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 $gap_start = ( $rstart > $pend ) ? $rstart : $pend + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 # if($pstart > $rend && !defined($r_idx)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 if ( $pend >= $rend && !defined($r_idx) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 $r_idx = $CUR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 } ## end for ( my $CUR = $start_idx...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 # do we have to make another gap?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 if ( $gap_start <= $rend ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 push @gap_pairs, [ $gap_start, $rend ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 # Merge the new range into the registered list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if ( defined($rstart_idx) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 my ( $new_start, $new_end );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 if ( $rstart < $list->[$rstart_idx]->[0] ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $new_start = $rstart;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $new_start = $list->[$rstart_idx]->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 if ( $rend > $list->[$rend_idx]->[1] ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 $new_end = $rend;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $new_end = $list->[$rend_idx]->[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 splice( @$list, $rstart_idx,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 $rend_idx - $rstart_idx + 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 [ $new_start, $new_end ] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 } elsif ( defined($r_idx) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 splice( @$list, $r_idx, 0, [ $rstart, $rend ] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 push( @$list, [ $rstart, $rend ] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 return \@gap_pairs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 } ## end sub check_and_register
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 # overlap size is just added to make RangeRegistry generally more useful
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 =head2 overlap_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 Arg [1] : string $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 Arg [2] : int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 Arg [3] : int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 Example : my $overlap_size = $rr->( "chr1", 1, 100_000_000 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 Description: finds out how many bases of the given range are registered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 sub overlap_size {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 my ( $self, $id, $start, $end ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 my $overlap = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 if ( $start > $end ) { return 0 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 my $reg = $self->{'registry'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 my $list = $reg->{$id} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 my $len = scalar(@$list);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 if ( $len == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 # this is the first request for this id, return a gap pair for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 # entire range and register it as seen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 #####
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 # loop through the list of existing ranges recording any "gaps" where
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 # the existing range does not cover part of the requested range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 my $start_idx = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 my $end_idx = $#$list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 my ( $mid_idx, $range );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 # binary search the relevant pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 # helps if the list is big
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 while ( ( $end_idx - $start_idx ) > 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 $mid_idx = ( $start_idx + $end_idx ) >> 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 $range = $list->[$mid_idx];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 if ( $range->[1] < $start ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 $start_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 $end_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 my ( $pstart, $pend ) = @{ $list->[$CUR] };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 if ( $pstart > $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 # No more interesting ranges here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 # no work needs to be done at all if we find a range pair that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 # entirely overlaps the requested region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 if ( $pstart <= $start && $pend >= $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $overlap = $end - $start + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 my $mstart = ( $start < $pstart ? $pstart : $start );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 my $mend = ( $end < $pend ? $end : $pend );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 if ( $mend - $mstart >= 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 $overlap += ( $mend - $mstart + 1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 return $overlap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 } ## end sub overlap_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 # low level function to access the ranges
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 # only use for read access
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 sub get_ranges {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 my ( $self, $id ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 return $self->{'registry'}->{$id};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376