annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/InternalIdMapper.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 =head1 METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 package Bio::EnsEMBL::IdMapping::InternalIdMapper;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 use warnings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 no warnings 'uninitialized';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 use Bio::EnsEMBL::IdMapping::BaseObject;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 use Bio::EnsEMBL::Utils::ScriptUtils qw(inject path_append);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 use Bio::EnsEMBL::IdMapping::Entry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 use Bio::EnsEMBL::IdMapping::MappingList;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 use Bio::EnsEMBL::IdMapping::SyntenyFramework;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 # scores are considered the same if (2.0 * (s1-s2))/(s1 + s2) < this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 use constant SIMILAR_SCORE_RATIO => 0.01;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 sub map_genes {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 my $gene_scores = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 my $transcript_scores = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 my $gsb = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 # argument checks
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 unless ($gene_scores and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 $gene_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 throw('Need a gene Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 unless ($transcript_scores and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 throw('Need a transcript Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 unless ($gsb and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 $gsb->isa('Bio::EnsEMBL::IdMapping::GeneScoreBuilder')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 throw('Need a Bio::EnsEMBL::IdMapping::GeneScoreBuilder.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 $self->logger->info("== Internal ID mapping for genes...\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 -DUMP_PATH => $dump_path,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 -CACHE_FILE => 'gene_mappings.ser',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 my $mapping_cache = $mappings->cache_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 if (-s $mapping_cache) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 # read from file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 $self->logger->info("Reading gene mappings from file...\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 $self->logger->debug("Cache file $mapping_cache.\n", 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 $mappings->read_from_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 # create gene mappings
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 $self->logger->info("No gene mappings found. Will calculate them now.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 # determine which plugin methods to run
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 my @default_plugins = (qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::init_basic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::best_transcript
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::biotype
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::synteny
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric::internal_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 ));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 my @plugins = $self->conf->param('plugin_internal_id_mappers_gene');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 @plugins = @default_plugins unless (defined($plugins[0]));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 my $new_mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 -DUMP_PATH => $dump_path,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 -CACHE_FILE => 'gene_mappings0.ser',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 my @mappings = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 my $i = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 # run the scoring chain
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 foreach my $plugin (@plugins) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 ($gene_scores, $new_mappings) = $self->delegate_to_plugin($plugin, $i++,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 $gsb, $new_mappings, $gene_scores, $transcript_scores);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 push(@mappings, $new_mappings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 # report remaining ambiguities
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 $self->logger->info($gene_scores->get_source_count.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 " source genes are ambiguous with ".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 $gene_scores->get_target_count." target genes.\n\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 $self->log_ambiguous($gene_scores, 'gene');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 # merge mappings and write to file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 $mappings->add_all(@mappings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 $mappings->write_to_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 if ($self->logger->loglevel eq 'debug') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 $mappings->log('gene', $self->conf->param('basedir'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 return $mappings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 sub map_transcripts {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 my $transcript_scores = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 my $gene_mappings = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 my $tsb = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 # argument checks
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 unless ($transcript_scores and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 throw('Need a transcript Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 unless ($gene_mappings and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 unless ($tsb and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 $tsb->isa('Bio::EnsEMBL::IdMapping::TranscriptScoreBuilder')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 throw('Need a Bio::EnsEMBL::IdMapping::TranscriptScoreBuilder.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 $self->logger->info("== Internal ID mapping for transcripts...\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 -DUMP_PATH => $dump_path,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 -CACHE_FILE => 'transcript_mappings.ser',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 my $mapping_cache = $mappings->cache_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 if (-s $mapping_cache) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 # read from file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 $self->logger->info("Reading transcript mappings from file...\n", 0,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 $self->logger->debug("Cache file $mapping_cache.\n", 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 $mappings->read_from_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 # create transcript mappings
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 $self->logger->info("No transcript mappings found. Will calculate them now.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 # determine which plugin methods to run
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 my @default_plugins = (qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::init_basic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::non_exact_translation
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::biotype
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::mapped_gene
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::single_gene
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric::internal_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 ));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 my @plugins = $self->conf->param('plugin_internal_id_mappers_transcript');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 @plugins = @default_plugins unless (defined($plugins[0]));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 my $new_mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 -DUMP_PATH => $dump_path,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 -CACHE_FILE => 'transcript_mappings0.ser',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 my @mappings = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 my $i = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 # run the scoring chain
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 foreach my $plugin (@plugins) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 ($transcript_scores, $new_mappings) = $self->delegate_to_plugin($plugin,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 $i++, $tsb, $new_mappings, $transcript_scores, $gene_mappings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 push(@mappings, $new_mappings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 # report remaining ambiguities
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 $self->logger->info($transcript_scores->get_source_count.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 " source transcripts are ambiguous with ".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 $transcript_scores->get_target_count." target transcripts.\n\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 $self->log_ambiguous($transcript_scores, 'transcript');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 # merge mappings and write to file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 $mappings->add_all(@mappings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 $mappings->write_to_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 if ($self->logger->loglevel eq 'debug') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 $mappings->log('transcript', $self->conf->param('basedir'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 return $mappings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 sub map_exons {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 my $exon_scores = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 my $transcript_mappings = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 my $esb = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 # argument checks
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 unless ($exon_scores and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 $exon_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of exons.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 unless ($transcript_mappings and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 $transcript_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 throw('Need a Bio::EnsEMBL::IdMapping::MappingList of transcripts.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 unless ($esb and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 $esb->isa('Bio::EnsEMBL::IdMapping::ExonScoreBuilder')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 throw('Need a Bio::EnsEMBL::IdMapping::ExonScoreBuilder.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 $self->logger->info("== Internal ID mapping for exons...\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 -DUMP_PATH => $dump_path,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 -CACHE_FILE => 'exon_mappings.ser',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 my $mapping_cache = $mappings->cache_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 if (-s $mapping_cache) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 # read from file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 $self->logger->info("Reading exon mappings from file...\n", 0,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 $self->logger->debug("Cache file $mapping_cache.\n", 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 $mappings->read_from_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 # create exon mappings
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 $self->logger->info("No exon mappings found. Will calculate them now.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 # determine which plugin methods to run
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 my @default_plugins = (qw(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::init_basic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::mapped_transcript
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric::internal_id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 ));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 my @plugins = $self->conf->param('plugin_internal_id_mappers_exon');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 @plugins = @default_plugins unless (defined($plugins[0]));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 my $new_mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 -DUMP_PATH => $dump_path,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 -CACHE_FILE => 'exon_mappings0.ser',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 my @mappings = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 my $i = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 # run the scoring chain
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 foreach my $plugin (@plugins) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 ($exon_scores, $new_mappings) = $self->delegate_to_plugin($plugin, $i++,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 $esb, $new_mappings, $exon_scores);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 push(@mappings, $new_mappings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 # report remaining ambiguities
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 $self->logger->info($exon_scores->get_source_count.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 " source exons are ambiguous with ".
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 $exon_scores->get_target_count." target exons.\n\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 $self->log_ambiguous($exon_scores, 'exon');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 # merge mappings and write to file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 $mappings->add_all(@mappings);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 $mappings->write_to_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 if ($self->logger->loglevel eq 'debug') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 $mappings->log('exon', $self->conf->param('basedir'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 return $mappings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 # this is not implemented as a plugin, since a) it's too simple and b) it's
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 # tied to transcripts so there are no translation scores or score builder.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 sub map_translations {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 my $transcript_mappings = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 # argument checks
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 unless ($transcript_mappings and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 $transcript_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 throw('Need a Bio::EnsEMBL::IdMapping::MappingList of transcripts.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 $self->logger->info("== Internal ID mapping for translations...\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 -DUMP_PATH => $dump_path,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 -CACHE_FILE => 'translation_mappings.ser',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 my $mapping_cache = $mappings->cache_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 if (-s $mapping_cache) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 # read from file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 $self->logger->info("Reading translation mappings from file...\n", 0,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 $self->logger->debug("Cache file $mapping_cache.\n", 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 $mappings->read_from_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 # create translation mappings
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 $self->logger->info("No translation mappings found. Will calculate them now.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 $self->logger->info("Translation mapping...\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 # map translations for mapped transcripts
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 my $i = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 foreach my $entry (@{ $transcript_mappings->get_all_Entries }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 my $source_tl = $self->cache->get_by_key('transcripts_by_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 'source', $entry->source)->translation;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 my $target_tl = $self->cache->get_by_key('transcripts_by_id',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400 'target', $entry->target)->translation;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 if ($source_tl and $target_tl) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 # add mapping for the translations; note that the score is taken from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 # the transcript mapping
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 my $tl_entry = Bio::EnsEMBL::IdMapping::Entry->new_fast([
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 $source_tl->id, $target_tl->id, $entry->score
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 ]);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 $mappings->add_Entry($tl_entry);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 $i++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 $self->logger->debug("Skipped transcripts without translation: $i\n", 1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 $self->logger->info("New mappings: ".$mappings->get_entry_count."\n\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420 $mappings->write_to_file;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 if ($self->logger->loglevel eq 'debug') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 $mappings->log('translation', $self->conf->param('basedir'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 $self->logger->info("Done.\n\n", 0, 'stamped');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 return $mappings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 sub delegate_to_plugin {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 my $plugin = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 my $num = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 my $score_builder = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 my $mappings = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 my $scores = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 # argument checks
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 unless ($score_builder and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 $score_builder->isa('Bio::EnsEMBL::IdMapping::ScoreBuilder')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 throw('Need a Bio::EnsEMBL::IdMapping::ScoreBuilder.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449 unless ($mappings and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 throw('Need a Bio::EnsEMBL::IdMapping::MappingList.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 unless ($scores and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 $scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 # split plugin name into module and method
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 $plugin =~ /(.*)::(\w+)$/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 my $module = $1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 my $method = $2;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 unless ($module and $method) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 throw("Unable to determine module and method name from $plugin.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 # instantiate the plugin unless we already have an instance
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 my $plugin_instance;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 if ($self->has_plugin($module)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472 # re-use an existing plugin instance
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 $plugin_instance = $self->get_plugin($module);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 # inject and instantiate the plugin module
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 inject($module);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 $plugin_instance = $module->new(
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 -LOGGER => $self->logger,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 -CONF => $self->conf,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 -CACHE => $self->cache
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 $self->add_plugin($plugin_instance);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488 # run the method on the plugin
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 # pass in a sequence number (number of method run, used for generating
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 # checkpoint files), the scores used for determining the mapping, and all
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 # other arguments passed to this method (these will vary for different object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 # types)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 # return the scores and mappings to feed into the next plugin in the chain
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 return $plugin_instance->$method($num, $score_builder, $mappings, $scores, @_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 sub has_plugin {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 my $module = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 defined($self->{'_plugins'}->{$module}) ? (return 1) : (return 0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 sub get_plugin {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 my $module = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 return $self->{'_plugins'}->{$module};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 sub add_plugin {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 my $plugin_instance = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 $self->{'_plugins'}->{ref($plugin_instance)} = $plugin_instance;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 sub log_ambiguous {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 my $matrix = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 my $type = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 unless ($matrix and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 # create dump directory if it doesn't exist
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 my $debug_path = $self->conf->param('basedir').'/debug';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536 unless (-d $debug_path) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 system("mkdir -p $debug_path") == 0 or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538 throw("Unable to create directory $debug_path.\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 my $logfile = "$debug_path/ambiguous_${type}.txt";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 open(my $fh, '>', $logfile) or
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 throw("Unable to open $logfile for writing: $!");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 my @low_scoring = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 my @high_scoring = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548 my $last_id;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 # log by source
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551 foreach my $entry (sort { $a->source <=> $b->source }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 @{ $matrix->get_all_Entries }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 $last_id ||= $entry->target;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 if ($last_id != $entry->source) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 $self->write_ambiguous($type, 'source', $fh, \@low_scoring,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558 \@high_scoring);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 $last_id = $entry->source;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 if ($entry->score < 0.5) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 push @low_scoring, $entry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 push @high_scoring, $entry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 # write last source
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 $self->write_ambiguous($type, 'source', $fh, \@low_scoring, \@high_scoring);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 # now do the same by target
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573 $last_id = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 foreach my $entry (sort { $a->target <=> $b->target }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 @{ $matrix->get_all_Entries }) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 $last_id ||= $entry->target;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579 if ($last_id != $entry->target) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 $self->write_ambiguous($type, 'target', $fh, \@low_scoring,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581 \@high_scoring);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 $last_id = $entry->target;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585 if ($entry->score < 0.5) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 push @low_scoring, $entry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 push @high_scoring, $entry;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 # write last target
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 $self->write_ambiguous($type, 'target', $fh, \@low_scoring, \@high_scoring);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 close($fh);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 sub write_ambiguous {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600 my ($self, $type, $db_type, $fh, $low, $high) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 # if only source or target are ambiguous (i.e. you have only one mapping from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603 # this perspective) then log from the other perspective
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 if (scalar(@$low) + scalar(@$high) <= 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 @$low = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606 @$high = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 return;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 my $first_id;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611 if (@$low) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 $first_id = $low->[0]->$db_type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614 $first_id = $high->[0]->$db_type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 my $other_db_type;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 if ($db_type eq 'source') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619 $other_db_type = 'target';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621 $other_db_type = 'source';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 print $fh "$db_type $type $first_id scores ambiguously:\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 # high scorers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627 if (@$high) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628 print $fh " high scoring ${other_db_type}s\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630 while (my $e = shift(@$high)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631 print $fh " ", $e->$other_db_type, " ", $e->score, "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 # low scorers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636 if (@$low) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637 print $fh " low scoring ${other_db_type}s\n ";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 my $i = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 while (my $e = shift(@$low)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 print $fh "\n " unless (($i++)%10);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 print $fh $e->$other_db_type, ", ";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 print $fh "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652