annotate variant_effect_predictor/Bio/EnsEMBL/Mapper.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $map = Bio::EnsEMBL::Mapper->new( 'rawcontig', 'chromosome' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 # add a coodinate mapping - supply two pairs or coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $map->add_map_coordinates(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 $contig_id, $contig_start, $contig_end, $contig_ori,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 $chr_name, chr_start, $chr_end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 # map from one coordinate system to another
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 my @coordlist =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 $mapper->map_coordinates( 627012, 2, 5, -1, "rawcontig" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 Generic mapper to provide coordinate transforms between two disjoint
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 coordinate systems. This mapper is intended to be 'context neutral' - in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 that it does not contain any code relating to any particular coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 system. This is provided in, for example, Bio::EnsEMBL::AssemblyMapper.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 package Bio::EnsEMBL::Mapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use integer;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use Bio::EnsEMBL::Mapper::Pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::EnsEMBL::Mapper::IndelPair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use Bio::EnsEMBL::Mapper::Unit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use Bio::EnsEMBL::Mapper::Coordinate;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 use Bio::EnsEMBL::Mapper::IndelCoordinate;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use Bio::EnsEMBL::Mapper::Gap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 use Bio::EnsEMBL::Utils::Exception qw(throw);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 # use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 Arg [1] : string $from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 The name of the 'from' coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 Arg [2] : string $to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 The name of the 'to' coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 Arg [3] : (optional) Bio::EnsEMBL::CoordSystem $from_cs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 The 'from' coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 Arg [4] : (optional) Bio::EnsEMBL::CoordSystem $to_cs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Example : my $mapper = Bio::EnsEMBL::Mapper->new('FROM', 'TO');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 Description: Constructor. Creates a new Bio::EnsEMBL::Mapper object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 Returntype : Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 my ( $proto, $from, $to, $from_cs, $to_cs ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 if ( !defined($to) || !defined($from) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 throw("Must supply 'to' and 'from' tags");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 my $class = ref($proto) || $proto;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my $self = bless( { "_pair_$from" => {},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 "_pair_$to" => {},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 'pair_count' => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 'to' => $to,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 'from' => $from,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 'to_cs' => $to_cs,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 'from_cs' => $from_cs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 $class );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 # do sql to get any componente with muliple assemblys.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 =head2 flush
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 Example : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Description: removes all cached information out of this mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 Caller : AssemblyMapper, ChainedAssemblyMapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 sub flush {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $from = $self->from();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my $to = $self->to();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 $self->{"_pair_$from"} = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $self->{"_pair_$to"} = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 $self->{'pair_count'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 =head2 map_coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 Arg 1 string $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 id of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 Arg 2 int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 start coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 Arg 3 int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 end coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 Arg 4 int $strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 raw contig orientation (+/- 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 Arg 5 string $type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 nature of transform - gives the type of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 coordinates to be transformed *from*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 Function generic map method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 Returntype array of Bio::EnsEMBL::Mapper::Coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 and/or Bio::EnsEMBL::Mapper::Gap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 Caller Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 sub map_coordinates {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 my ( $self, $id, $start, $end, $strand, $type ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 unless ( defined($id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 && defined($start)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 && defined($end)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 && defined($strand)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 && defined($type) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 throw("Expecting 5 arguments");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 # special case for handling inserts:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 if ( $start == $end + 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 return $self->map_insert( $id, $start, $end, $strand, $type );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 if ( !$self->{'_is_sorted'} ) { $self->_sort() }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $hash = $self->{"_pair_$type"};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 my ( $from, $to, $cs );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 if ( $type eq $self->{'to'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $from = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 $to = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 $cs = $self->{'from_cs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 $from = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 $to = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 $cs = $self->{'to_cs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 unless ( defined $hash ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 throw("Type $type is neither to or from coordinate systems");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 if ( !defined $hash->{ uc($id) } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 # one big gap!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $end );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 return $gap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 my $last_used_pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my @result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my ( $start_idx, $end_idx, $mid_idx, $pair, $self_coord );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 my $lr = $hash->{ uc($id) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 $start_idx = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $end_idx = $#{$lr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 # binary search the relevant pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 # helps if the list is big
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 while ( ( $end_idx - $start_idx ) > 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $mid_idx = ( $start_idx + $end_idx ) >> 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $pair = $lr->[$mid_idx];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 $self_coord = $pair->{$from};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 if ( $self_coord->{'end'} < $start ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $start_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 $end_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 my $rank = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 my $orig_start = $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 my $last_target_coord = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 for ( my $i = $start_idx; $i <= $#{$lr}; $i++ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 $pair = $lr->[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 my $self_coord = $pair->{$from};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 my $target_coord = $pair->{$to};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 # But not the case for haplotypes!! need to test for this case???
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 # so removing this till a better solution is found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 # if($self_coord->{'start'} < $start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 # $start = $orig_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 # $rank++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 if ( defined($last_target_coord)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 and $target_coord->{'id'} ne $last_target_coord )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 if ( $self_coord->{'start'} < $start )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 { # i.e. the same bit is being mapped to another assembled bit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 $start = $orig_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 $last_target_coord = $target_coord->{'id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 # if we haven't even reached the start, move on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 if ( $self_coord->{'end'} < $orig_start ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 # if we have over run, break
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 if ( $self_coord->{'start'} > $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if ( $start < $self_coord->{'start'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 # gap detected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 $self_coord->{'start'} - 1, $rank );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 push( @result, $gap );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $start = $gap->{'end'} + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 my ( $target_start, $target_end, $target_ori );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 my $res;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 if ( exists $pair->{'indel'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 # When next pair is an IndelPair and not a Coordinate, create the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 # new mapping Coordinate, the IndelCoordinate.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 $target_start = $target_coord->{'start'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 $target_end = $target_coord->{'end'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 #create a Gap object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 ( $self_coord->{'end'} < $end ? $self_coord->{'end'} : $end ) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 #create the Coordinate object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 my $coord =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 Bio::EnsEMBL::Mapper::Coordinate->new( $target_coord->{'id'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 $target_start, $target_end, $pair->{'ori'}*$strand, $cs );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 #and finally, the IndelCoordinate object with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 $res = Bio::EnsEMBL::Mapper::IndelCoordinate->new( $gap, $coord );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 # start is somewhere inside the region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 if ( $pair->{'ori'} == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $target_start =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 $target_coord->{'start'} +
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 ( $start - $self_coord->{'start'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 $target_end =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 $target_coord->{'end'} - ( $start - $self_coord->{'start'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 # Either we are enveloping this map or not. If yes, then end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 # point (self perspective) is determined solely by target. If
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 # not we need to adjust.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 if ( $end > $self_coord->{'end'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 # enveloped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 if ( $pair->{'ori'} == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 $target_end = $target_coord->{'end'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 $target_start = $target_coord->{'start'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 # need to adjust end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 if ( $pair->{'ori'} == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $target_end =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 $target_coord->{'start'} +
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 ( $end - $self_coord->{'start'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 $target_start =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 $target_coord->{'end'} - ( $end - $self_coord->{'start'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 $res =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 Bio::EnsEMBL::Mapper::Coordinate->new( $target_coord->{'id'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 $target_start, $target_end, $pair->{'ori'}*$strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 $cs, $rank );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 } ## end else [ if ( exists $pair->{'indel'...})]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 push( @result, $res );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 $last_used_pair = $pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 $start = $self_coord->{'end'} + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 } ## end for ( my $i = $start_idx...)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 if ( !defined $last_used_pair ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 my $gap = Bio::EnsEMBL::Mapper::Gap->new( $start, $end );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 push( @result, $gap );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 } elsif ( $last_used_pair->{$from}->{'end'} < $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 # gap at the end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 my $gap =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 Bio::EnsEMBL::Mapper::Gap->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 $last_used_pair->{$from}->{'end'} + 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 $end, $rank );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 push( @result, $gap );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 if ( $strand == -1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 @result = reverse(@result);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 return @result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 } ## end sub map_coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 =head2 map_insert
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 Arg [1] : string $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 Arg [2] : int $start - start coord. Since this is an insert should always
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 be one greater than end.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 Arg [3] : int $end - end coord. Since this is an insert should always
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 be one less than start.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 Arg [4] : int $strand (0, 1, -1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 Arg [5] : string $type - the coordinate system name the coords are from.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 Arg [6] : boolean $fastmap - if specified, this is being called from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 the fastmap call. The mapping done is not any faster for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 inserts, but the return value is different.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 Description: This is in internal function which handles the special mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 case for inserts (start = end +1). This function will be called
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 automatically by the map function so there is no reason to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 call it directly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and/or Gap objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 Caller : map_coordinates()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 sub map_insert {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 my ($self, $id, $start, $end, $strand, $type, $fastmap) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 # swap start/end and map the resultant 2bp coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 ($start, $end) =($end,$start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 my @coords = $self->map_coordinates($id, $start, $end, $strand, $type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 if(@coords == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 my $c = $coords[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 # swap start and end to convert back into insert
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 ($c->{'start'}, $c->{'end'}) = ($c->{'end'}, $c->{'start'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 throw("Unexpected: Got ",scalar(@coords)," expected 2.") if(@coords != 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 # adjust coordinates, remove gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 my ($c1, $c2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 if($strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 ($c2,$c1) = @coords;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 ($c1, $c2) = @coords;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 @coords = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 if(ref($c1) eq 'Bio::EnsEMBL::Mapper::Coordinate') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 # insert is after first coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 if($c1->{'strand'} * $strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 $c1->{'end'}--;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 $c1->{'start'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 @coords = ($c1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 if(ref($c2) eq 'Bio::EnsEMBL::Mapper::Coordinate') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 # insert is before second coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 if($c2->{'strand'} * $strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 $c2->{'start'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 $c2->{'end'}--;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 if($strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 unshift @coords, $c2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 push @coords, $c2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 if($fastmap) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 return undef if(@coords != 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 my $c = $coords[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 return ($c->{'id'}, $c->{'start'}, $c->{'end'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 $c->{'strand'}, $c->{'coord_system'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 return @coords;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 =head2 fastmap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 Arg 1 string $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 id of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 Arg 2 int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 start coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 Arg 3 int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 end coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 Arg 4 int $strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 raw contig orientation (+/- 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 Arg 5 int $type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 nature of transform - gives the type of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 coordinates to be transformed *from*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 Function inferior map method. Will only do ungapped unsplit mapping.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 Will return id, start, end strand in a list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 Returntype list of results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 Caller Bio::EnsEMBL::AssemblyMapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 sub fastmap {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 my ($self, $id, $start, $end, $strand, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 my ($from, $to, $cs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 if($end+1 == $start) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 return $self->map_insert($id, $start, $end, $strand, $type, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 if( ! $self->{'_is_sorted'} ) { $self->_sort() }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 if($type eq $self->{'to'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 $from = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 $to = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 $cs = $self->{'from_cs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 $from = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 $to = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 $cs = $self->{'to_cs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 my $hash = $self->{"_pair_$type"} or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 throw("Type $type is neither to or from coordinate systems");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 my $pairs = $hash->{uc($id)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 foreach my $pair (@$pairs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 my $self_coord = $pair->{$from};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 my $target_coord = $pair->{$to};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 # only super easy mapping is done
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 if( $start < $self_coord->{'start'} ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 $end > $self_coord->{'end'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 if( $pair->{'ori'} == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 return ( $target_coord->{'id'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 $target_coord->{'start'}+$start-$self_coord->{'start'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 $target_coord->{'start'}+$end-$self_coord->{'start'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 $strand, $cs );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 return ( $target_coord->{'id'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 $target_coord->{'end'} - ($end - $self_coord->{'start'}),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 $target_coord->{'end'} - ($start - $self_coord->{'start'}),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 -$strand, $cs );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 return ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 =head2 add_map_coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 Arg 1 int $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 id of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 Arg 2 int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 start coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 Arg 3 int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 end coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 Arg 4 int $strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 relative orientation of source and target (+/- 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 Arg 5 int $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 id of 'target' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 Arg 6 int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 start coordinate of 'target' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 Arg 7 int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 end coordinate of 'target' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 Function Stores details of mapping between
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 'source' and 'target' regions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 Returntype none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 Caller Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 sub add_map_coordinates {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 my ( $self, $contig_id, $contig_start, $contig_end, $contig_ori,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 $chr_name, $chr_start, $chr_end )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 unless ( defined($contig_id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 && defined($contig_start)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 && defined($contig_end)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 && defined($contig_ori)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 && defined($chr_name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 && defined($chr_start)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 && defined($chr_end) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 throw("7 arguments expected");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 if ( ( $contig_end - $contig_start ) != ( $chr_end - $chr_start ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 throw("Cannot deal with mis-lengthed mappings so far");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 my $from = Bio::EnsEMBL::Mapper::Unit->new( $contig_id, $contig_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 $contig_end );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 my $to =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 Bio::EnsEMBL::Mapper::Unit->new( $chr_name, $chr_start, $chr_end );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 my $pair = Bio::EnsEMBL::Mapper::Pair->new( $from, $to, $contig_ori );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 # place into hash on both ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 my $map_to = $self->{'to'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 my $map_from = $self->{'from'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 push( @{ $self->{"_pair_$map_to"}->{ uc($chr_name) } }, $pair );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 push( @{ $self->{"_pair_$map_from"}->{ uc($contig_id) } }, $pair );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 $self->{'pair_count'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 $self->{'_is_sorted'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 } ## end sub add_map_coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 =head2 add_indel_coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 Arg 1 int $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 id of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 Arg 2 int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 start coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 Arg 3 int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 end coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 Arg 4 int $strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 relative orientation of source and target (+/- 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 Arg 5 int $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 id of 'targe' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 Arg 6 int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 start coordinate of 'targe' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 Arg 7 int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 end coordinate of 'targe' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 Function stores details of mapping between two regions:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 'source' and 'target'. Returns 1 if the pair was added, 0 if it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 was already in. Used when adding an indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 Returntype int 0,1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 Caller Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 sub add_indel_coordinates{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 my ($self, $contig_id, $contig_start, $contig_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 $contig_ori, $chr_name, $chr_start, $chr_end) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 unless(defined($contig_id) && defined($contig_start) && defined($contig_end)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 && defined($contig_ori) && defined($chr_name) && defined($chr_start)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 && defined($chr_end)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 throw("7 arguments expected");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 #we need to create the IndelPair object to add to both lists, to and from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 my $from =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 Bio::EnsEMBL::Mapper::Unit->new($contig_id, $contig_start, $contig_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 my $to =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 Bio::EnsEMBL::Mapper::Unit->new($chr_name, $chr_start, $chr_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 my $pair = Bio::EnsEMBL::Mapper::IndelPair->new($from, $to, $contig_ori);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 # place into hash on both ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 my $map_to = $self->{'to'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 my $map_from = $self->{'from'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 push( @{$self->{"_pair_$map_to"}->{uc($chr_name)}}, $pair );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 push( @{$self->{"_pair_$map_from"}->{uc($contig_id)}}, $pair );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 $self->{'pair_count'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 $self->{'_is_sorted'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 =head2 map_indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 Arg [1] : string $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 Arg [2] : int $start - start coord. Since this is an indel should always
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 be one greater than end.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 Arg [3] : int $end - end coord. Since this is an indel should always
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 be one less than start.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 Arg [4] : int $strand (0, 1, -1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 Arg [5] : string $type - the coordinate system name the coords are from.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 Example : @coords = $mapper->map_indel();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 Description: This is in internal function which handles the special mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 case for indels (start = end +1). It will be used to map from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 a coordinate system with a gap to another that contains an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 insertion. It will be mainly used by the Variation API.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 Returntype : Bio::EnsEMBL::Mapper::Unit objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 sub map_indel {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 my ( $self, $id, $start, $end, $strand, $type ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 # swap start/end and map the resultant 2bp coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 ( $start, $end ) = ( $end, $start );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 if ( !$self->{'_is_sorted'} ) { $self->_sort() }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 my $hash = $self->{"_pair_$type"};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 my ( $from, $to, $cs );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 if ( $type eq $self->{'to'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 $from = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 $to = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 $cs = $self->{'from_cs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 $from = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 $to = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 $cs = $self->{'to_cs'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 unless ( defined $hash ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 throw("Type $type is neither to or from coordinate systems");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 my @indel_coordinates;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 my ( $start_idx, $end_idx, $mid_idx, $pair, $self_coord );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 my $lr = $hash->{ uc($id) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 $start_idx = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 $end_idx = $#{$lr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 # binary search the relevant pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 # helps if the list is big
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 while ( ( $end_idx - $start_idx ) > 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 $mid_idx = ( $start_idx + $end_idx ) >> 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 $pair = $lr->[$mid_idx];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 $self_coord = $pair->{$from};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 if ( $self_coord->{'end'} <= $start ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 $start_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 $end_idx = $mid_idx;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 for ( my $i = $start_idx; $i <= $#{$lr}; $i++ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 $pair = $lr->[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 my $self_coord = $pair->{$from};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 my $target_coord = $pair->{$to};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 if ( exists $pair->{'indel'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 #need to return unit coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 my $to =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 Bio::EnsEMBL::Mapper::Unit->new( $target_coord->{'id'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 $target_coord->{'start'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 $target_coord->{'end'}, );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 push @indel_coordinates, $to;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 return @indel_coordinates;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 } ## end sub map_indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 =head2 add_Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 Arg 1 Bio::EnsEMBL::Mapper $mapper2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 Example $mapper->add_Mapper($mapper2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 Function add all the map coordinates from $mapper to this mapper.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 This object will contain mapping pairs from both the old
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 object and $mapper2.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 Returntype int 0,1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 Exceptions throw if 'to' and 'from' from both Bio::EnsEMBL::Mappers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 are incompatible
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 Caller $mapper->methodname()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 sub add_Mapper{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 my ($self, $mapper) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 my $mapper_to = $mapper->{'to'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 my $mapper_from = $mapper->{'from'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 if ($mapper_to ne $self->{'to'} or $mapper_from ne $self->{'from'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 throw("Trying to add an incompatible Mapper");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 my $count_a = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 foreach my $seq_name (keys %{$mapper->{"_pair_$mapper_to"}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 push(@{$self->{"_pair_$mapper_to"}->{$seq_name}},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 @{$mapper->{"_pair_$mapper_to"}->{$seq_name}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 $count_a += scalar(@{$mapper->{"_pair_$mapper_to"}->{$seq_name}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 my $count_b = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 foreach my $seq_name (keys %{$mapper->{"_pair_$mapper_from"}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 push(@{$self->{"_pair_$mapper_from"}->{$seq_name}},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 @{$mapper->{"_pair_$mapper_from"}->{$seq_name}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 $count_b += scalar(@{$mapper->{"_pair_$mapper_from"}->{$seq_name}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 if ($count_a == $count_b) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 $self->{'pair_count'} += $count_a;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 throw("Trying to add a funny Mapper");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 $self->{'_is_sorted'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 =head2 list_pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 Arg 1 int $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 id of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 Arg 2 int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 start coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 Arg 3 int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 end coordinate of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 Arg 4 string $type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 nature of transform - gives the type of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 coordinates to be transformed *from*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 Function list all pairs of mappings in a region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 Returntype list of Bio::EnsEMBL::Mapper::Pair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780 Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 Caller Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 sub list_pairs {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 my ( $self, $id, $start, $end, $type ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 if ( !$self->{'_is_sorted'} ) { $self->_sort() }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 if ( !defined $type ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 throw("Expected 4 arguments");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 if ( $start > $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 throw( "Start is greater than end "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 . "for id $id, start $start, end $end\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 my $hash = $self->{"_pair_$type"};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 my ( $from, $to );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803 if ( $type eq $self->{'to'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 $from = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 $to = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 $from = 'from';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 $to = 'to';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 unless ( defined $hash ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 throw("Type $type is neither to or from coordinate systems");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 my @list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 unless ( exists $hash->{ uc($id) } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 return ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 @list = @{ $hash->{ uc($id) } };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 my @output;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 if ( $start == -1 && $end == -1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 return @list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 foreach my $p (@list) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 if ( $p->{$from}->{'end'} < $start ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 if ( $p->{$from}->{'start'} > $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 push( @output, $p );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 return @output;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 } ## end sub list_pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 =head2 to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 Arg 1 Bio::EnsEMBL::Mapper::Unit $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 id of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 Function accessor method form the 'source'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 and 'target' in a Mapper::Pair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 Returntype Bio::EnsEMBL::Mapper::Unit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 Caller Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 sub to {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 my ( $self, $value ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 if ( defined($value) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 $self->{'to'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 return $self->{'to'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 =head2 from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 Arg 1 Bio::EnsEMBL::Mapper::Unit $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 id of 'source' sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 Function accessor method form the 'source'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 and 'target' in a Mapper::Pair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 Returntype Bio::EnsEMBL::Mapper::Unit
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 Caller Bio::EnsEMBL::Mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 sub from {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 my ( $self, $value ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 if ( defined($value) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 $self->{'from'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 return $self->{'from'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 # _dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 # Arg 1 *FileHandle $fh
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 # Function convenience dump function
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 # possibly useful for debugging
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 # Returntype none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 # Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 # Caller internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 sub _dump{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 my ($self,$fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900 if( !defined $fh ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 $fh = \*STDERR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 foreach my $id ( keys %{$self->{'_pair_hash_from'}} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 print $fh "From Hash $id\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 foreach my $pair ( @{$self->{'_pair_hash_from'}->{uc($id)}} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 print $fh " ",$pair->from->start," ",$pair->from->end,":",$pair->to->start," ",$pair->to->end," ",$pair->to->id,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 # _sort
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 # Function sort function so that all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 # mappings are sorted by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 # chromosome start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 # Returntype none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 # Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 # Caller internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 sub _sort {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 my $to = $self->{'to'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 my $from = $self->{'from'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 foreach my $id ( keys %{ $self->{"_pair_$from"} } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 @{ $self->{"_pair_$from"}->{$id} } =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 sort { $a->{'from'}->{'start'} <=> $b->{'from'}->{'start'} }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 @{ $self->{"_pair_$from"}->{$id} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 foreach my $id ( keys %{ $self->{"_pair_$to"} } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 @{ $self->{"_pair_$to"}->{$id} } =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 sort { $a->{'to'}->{'start'} <=> $b->{'to'}->{'start'} }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 @{ $self->{"_pair_$to"}->{$id} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 $self->_merge_pairs();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 $self->_is_sorted(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 # this function merges pairs that are adjacent into one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 sub _merge_pairs {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 my ( $lr, $lr_from, $del_pair, $next_pair, $current_pair );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 my $map_to = $self->{'to'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 my $map_from = $self->{'from'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 $self->{'pair_count'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 for my $key ( keys %{$self->{"_pair_$map_to"}} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 $lr = $self->{"_pair_$map_to"}->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 my $next = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 my $length = $#{$lr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 while( $next <= $length ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 $current_pair = $lr->[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 $next_pair = $lr->[$next];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 $del_pair = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 if(exists $current_pair->{'indel'} || exists $next_pair->{'indel'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 #necessary to modify the merge function to not merge indels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 $next++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 # duplicate filter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 if( $current_pair->{'to'}->{'start'} == $next_pair->{'to'}->{'start'}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 and $current_pair->{'from'}->{'id'} == $next_pair->{'from'}->{'id'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 $del_pair = $next_pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 } elsif(( $current_pair->{'from'}->{'id'} eq $next_pair->{'from'}->{'id'} ) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 ( $next_pair->{'ori'} == $current_pair->{'ori'} ) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 ( $next_pair->{'to'}->{'start'} -1 == $current_pair->{'to'}->{'end'} )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 if( $current_pair->{'ori'} == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 # check forward strand merge
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 if( $next_pair->{'from'}->{'start'} - 1 == $current_pair->{'from'}->{'end'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 # normal merge with previous element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 $current_pair->{'to'}->{'end'} = $next_pair->{'to'}->{'end'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 $current_pair->{'from'}->{'end'} = $next_pair->{'from'}->{'end'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 $del_pair = $next_pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 # check backward strand merge
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 if( $next_pair->{'from'}->{'end'} + 1 == $current_pair->{'from'}->{'start'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 # yes its a merge
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 $current_pair->{'to'}->{'end'} = $next_pair->{'to'}->{'end'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 $current_pair->{'from'}->{'start'} = $next_pair->{'from'}->{'start'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 $del_pair = $next_pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 if( defined $del_pair ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 splice( @$lr, $next, 1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 $lr_from = $self->{"_pair_$map_from"}->{uc($del_pair->{'from'}->{'id'})};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 for( my $j=0; $j <= $#{$lr_from}; $j++ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 if( $lr_from->[$j] == $del_pair ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 splice( @$lr_from, $j, 1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 $length--;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 if( $length < $next ) { last; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 $next++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 $self->{'pair_count'} += scalar( @$lr );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 # _is_sorted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 # Arg 1 int $sorted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 # Function toggle for whether the (internal)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 # map data are sorted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 # Returntype int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 # Exceptions none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 # Caller internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 sub _is_sorted {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 my ($self, $value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 $self->{'_is_sorted'} = $value if (defined($value));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 return $self->{'_is_sorted'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 1;