comparison variant_effect_predictor/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblTranscriptGeneric.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 =head1 SYNOPSIS
24
25 =head1 DESCRIPTION
26
27 =head1 METHODS
28
29 =cut
30
31
32 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric;
33
34 use strict;
35 use warnings;
36 no warnings 'uninitialized';
37
38 use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
39 our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
40
41 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
42 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
43
44
45 #
46 # basic mapping
47 #
48 sub init_basic {
49 my $self = shift;
50 my $num = shift;
51 my $tsb = shift;
52 my $mappings = shift;
53 my $transcript_scores = shift;
54
55 $self->logger->info("Basic transcript mapping...\n", 0, 'stamped');
56
57 $mappings = $self->basic_mapping($transcript_scores,
58 "transcript_mappings$num");
59 $num++;
60 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
61 "transcript_matrix$num");
62
63 return ($new_scores, $mappings);
64 }
65
66
67 #
68 # handle cases with exact match but different translation
69 #
70 sub non_exact_translation {
71 my $self = shift;
72 my $num = shift;
73 my $tsb = shift;
74 my $mappings = shift;
75 my $transcript_scores = shift;
76
77 $self->logger->info("Exact Transcript non-exact Translation...\n", 0, 'stamped');
78
79 unless ($transcript_scores->loaded) {
80 $tsb->different_translation_rescore($transcript_scores);
81 $transcript_scores->write_to_file;
82 }
83
84 $mappings = $self->basic_mapping($transcript_scores,
85 "transcript_mappings$num");
86 $num++;
87 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
88 "transcript_matrix$num");
89
90 return ($new_scores, $mappings);
91 }
92
93
94 #
95 # reduce score for mappings of transcripts which do not belong to mapped
96 # genes
97 #
98 sub mapped_gene {
99 my $self = shift;
100 my $num = shift;
101 my $tsb = shift;
102 my $mappings = shift;
103 my $transcript_scores = shift;
104 my $gene_mappings = shift;
105
106 $self->logger->info("Transcripts in mapped genes...\n", 0, 'stamped');
107
108 unless ($transcript_scores->loaded) {
109 $tsb->non_mapped_gene_rescore($transcript_scores, $gene_mappings);
110 $transcript_scores->write_to_file;
111 }
112
113 $mappings = $self->basic_mapping($transcript_scores,
114 "transcript_mappings$num");
115 $num++;
116 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
117 "transcript_matrix$num");
118
119 return ($new_scores, $mappings);
120 }
121
122 #
123 # rescore by penalising scores between transcripts with different biotypes
124 #
125 sub biotype {
126 my $self = shift;
127 my $num = shift;
128 my $tsb = shift;
129 my $mappings = shift;
130 my $transcript_scores = shift;
131
132 $self->logger->info( "Retry with biotype disambiguation...\n",
133 0, 'stamped' );
134
135 unless ( $transcript_scores->loaded() ) {
136 $tsb->biotype_transcript_rescore($transcript_scores);
137 $transcript_scores->write_to_file();
138 }
139
140 my $new_mappings = $self->basic_mapping( $transcript_scores,
141 "transcript_mappings$num" );
142 $num++;
143 my $new_scores =
144 $tsb->create_shrinked_matrix( $transcript_scores, $new_mappings,
145 "transcript_matrix$num" );
146
147 return ( $new_scores, $new_mappings );
148 }
149
150 #
151 # selectively rescore by penalising scores between transcripts with
152 # different internalIDs
153 #
154 sub internal_id {
155 my $self = shift;
156 my $num = shift;
157 my $tsb = shift;
158 my $mappings = shift;
159 my $transcript_scores = shift;
160
161 $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
162
163 unless ($transcript_scores->loaded) {
164 $tsb->internal_id_rescore($transcript_scores);
165 $transcript_scores->write_to_file;
166 }
167
168 $mappings = $self->basic_mapping($transcript_scores,
169 "transcript_mappings$num");
170 $num++;
171 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
172 "transcript_matrix$num");
173
174 return ($new_scores, $mappings);
175 }
176
177
178 #
179 # handle ambiguities between transcripts in single genes
180 #
181 sub single_gene {
182 my $self = shift;
183 my $num = shift;
184 my $tsb = shift;
185 my $mappings = shift;
186 my $transcript_scores = shift;
187
188 $self->logger->info("Transcripts in single genes...\n", 0, 'stamped');
189
190 unless ($transcript_scores->loaded) {
191 $transcript_scores->write_to_file;
192 }
193
194 $mappings = $self->same_gene_transcript_mapping($transcript_scores,
195 "transcript_mappings$num");
196 $num++;
197 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
198 "transcript_matrix$num");
199
200 return ($new_scores, $mappings);
201 }
202
203
204 #
205 # modified basic mapper that maps transcripts that are ambiguous within one gene
206 #
207 sub same_gene_transcript_mapping {
208 my $self = shift;
209 my $matrix = shift;
210 my $mapping_name = shift;
211
212 # argument checks
213 unless ($matrix and
214 $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
215 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
216 }
217
218 throw('Need a name for serialising the mapping.') unless ($mapping_name);
219
220 # Create a new MappingList object. Specify AUTO_LOAD to load serialised
221 # existing mappings if found
222 my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
223
224 my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
225 -DUMP_PATH => $dump_path,
226 -CACHE_FILE => "${mapping_name}.ser",
227 -AUTO_LOAD => 1,
228 );
229
230 # checkpoint test: return a previously stored MappingList
231 if ($mappings->loaded) {
232 $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
233 return $mappings;
234 }
235
236 my $sources_done = {};
237 my $targets_done = {};
238
239 # sort scoring matrix entries by descending score
240 my @sorted_entries = sort { $b->score <=> $a->score ||
241 $a->source <=> $b->source || $a->target <=> $b->target }
242 @{ $matrix->get_all_Entries };
243
244 while (my $entry = shift(@sorted_entries)) {
245
246 # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
247
248 # we already found a mapping for either source or target yet
249 next if ($sources_done->{$entry->source} or
250 $targets_done->{$entry->target});
251
252 #$self->logger->debug('d');
253
254 my $other_sources = [];
255 my $other_targets = [];
256 my %source_genes = ();
257 my %target_genes = ();
258
259 if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
260 #$self->logger->debug('a');
261
262 $other_sources = $self->filter_sources($other_sources, $sources_done);
263 $other_targets = $self->filter_targets($other_targets, $targets_done);
264
265 $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
266 'source', $entry->source)} = 1;
267 $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
268 'target', $entry->target)} = 1;
269
270 foreach my $other_source (@{ $other_sources }) {
271 $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
272 'source', $other_source)} = 1;
273 }
274
275 foreach my $other_target (@{ $other_targets }) {
276 $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
277 'target', $other_target)} = 1;
278 }
279
280 # only add mapping if only one source and target gene involved
281 if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) {
282 #$self->logger->debug('O');
283 $mappings->add_Entry($entry);
284 }
285
286 } else {
287 #$self->logger->debug('A');
288
289 # this is the best mapping, add it
290 $mappings->add_Entry($entry);
291 }
292
293 $sources_done->{$entry->source} = 1;
294 $targets_done->{$entry->target} = 1;
295 }
296
297 # create checkpoint
298 $mappings->write_to_file;
299
300 return $mappings;
301 }
302
303
304 1;
305