annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/InternalIdMapper/BaseMapper.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 package Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 use Bio::EnsEMBL::IdMapping::BaseObject;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 use Bio::EnsEMBL::IdMapping::MappingList;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 # scores are considered the same if (2.0 * (s1-s2))/(s1 + s2) < this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 use constant SIMILAR_SCORE_RATIO => 0.01;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 # find the highest unambiguous score for all sources and targets in a scoring
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 # matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 sub basic_mapping {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 my $matrix = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 my $mapping_name = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 # argument checks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 unless ($matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 and $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 throw('Need a name for serialising the mapping.')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 unless ($mapping_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 # Create a new MappingList object. Specify AUTO_LOAD to load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 # serialised existing mappings if found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 my $dump_path =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 path_append( $self->conf->param('basedir'), 'mapping' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 my $mappings =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 Bio::EnsEMBL::IdMapping::MappingList->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 -DUMP_PATH => $dump_path,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 -CACHE_FILE => "${mapping_name}.ser",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 -AUTO_LOAD => 1, );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 # checkpoint test: return a previously stored MappingList
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 if ( $mappings->loaded ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 $self->logger->info(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 "Read existing mappings from ${mapping_name}.ser.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 return $mappings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my $sources_done = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 my $targets_done = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 # sort scoring matrix entries by descending score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 my @sorted_entries =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 sort { $b->score <=> $a->score } @{ $matrix->get_all_Entries };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 # debug
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 #my $idx = substr($mapping_name, -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 while ( my $entry = shift(@sorted_entries) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 #$self->logger->debug("\nxxx$idx ".$entry->to_string." ");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 # we already found a mapping for either source or target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 next
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 if ( $sources_done->{ $entry->source }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 or $targets_done->{ $entry->target } );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 #$self->logger->debug('d');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 # there's a better mapping for either source or target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 next
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 if ( $self->higher_score_exists(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 $entry, $matrix, $sources_done, $targets_done
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 ) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 #$self->logger->debug('h');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 # check for ambiguous mappings; they are dealt with later
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 my $other_sources = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 my $other_targets = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 if ( $self->ambiguous_mapping( $entry, $matrix,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 $other_sources, $other_targets ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 #$self->logger->debug('a');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 $other_sources =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $self->filter_sources( $other_sources, $sources_done );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $other_targets =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 $self->filter_targets( $other_targets, $targets_done );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 next if ( scalar(@$other_sources) or scalar(@$other_targets) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 #$self->logger->debug('A');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 # this is the best mapping, add it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 $mappings->add_Entry($entry);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $sources_done->{ $entry->source } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $targets_done->{ $entry->target } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 } ## end while ( my $entry = shift...)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 # create checkpoint
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 $mappings->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 return $mappings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 } ## end sub basic_mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 sub higher_score_exists {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 my ( $self, $entry, $matrix, $sources_done, $targets_done ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 my $source = $entry->source;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 my $target = $entry->target;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 my $score = $entry->score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 foreach
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 my $other_source ( @{ $matrix->get_sources_for_target($target) } )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 if ( $other_source != $source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 and !$sources_done->{$other_source}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 and $score < $matrix->get_score( $other_source, $target ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 foreach
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my $other_target ( @{ $matrix->get_targets_for_source($source) } )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 if ( $other_target != $target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 and !$targets_done->{$other_target}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 and $score < $matrix->get_score( $source, $other_target ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 } ## end sub higher_score_exists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 # find ambiguous mappings (see scores_similar() for definition)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 sub ambiguous_mapping {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 my ( $self, $entry, $matrix, $other_sources, $other_targets ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 my $source = $entry->source;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my $target = $entry->target;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 my $score = $entry->score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my $retval = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 foreach
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 my $other_source ( @{ $matrix->get_sources_for_target($target) } )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 my $other_score = $matrix->get_score( $other_source, $target );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 if ( $other_source != $source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 and ( $self->scores_similar( $score, $other_score )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 or $score < $other_score ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 $retval = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 push @{$other_sources}, $other_source;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 foreach
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my $other_target ( @{ $matrix->get_targets_for_source($source) } )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 my $other_score = $matrix->get_score( $source, $other_target );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 if ( $other_target != $target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 and ( $self->scores_similar( $score, $other_score )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 or $score < $other_score ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 $retval = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 push @{$other_targets}, $other_target;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 return $retval;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 } ## end sub ambiguous_mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 # rule for similarity taken from java code...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 sub scores_similar {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 my ( $self, $s1, $s2 ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 # always give priority to exact matches over very similar ones
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 return 0 if ( $s1 == 1 and $s2 < 1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 my $diff = $s1 - $s2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 $diff = -$diff if ( $diff < 0 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 my $pc = 2*$diff/( $s1 + $s2 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 return ( $pc < SIMILAR_SCORE_RATIO );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 sub filter_sources {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 my ( $self, $other_sources, $sources_done ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 unless ( scalar( @{$other_sources} )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 and scalar( keys %{$sources_done} ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 return $other_sources;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 my @tmp = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 foreach my $e ( @{$other_sources} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 push @tmp, $e unless ( $sources_done->{$e} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 return \@tmp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 sub filter_targets {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 my ( $self, $other_targets, $targets_done ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 unless ( scalar( @{$other_targets} )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 and scalar( keys %{$targets_done} ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 return $other_targets;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 my @tmp = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 foreach my $e ( @{$other_targets} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 push @tmp, $e unless ( $targets_done->{$e} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 return \@tmp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 1;