annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblTranscriptGeneric.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 # basic mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 sub init_basic {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my $tsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 $self->logger->info("Basic transcript mapping...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 $mappings = $self->basic_mapping($transcript_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 "transcript_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 "transcript_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 return ($new_scores, $mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 # handle cases with exact match but different translation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 sub non_exact_translation {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 my $tsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 $self->logger->info("Exact Transcript non-exact Translation...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 unless ($transcript_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 $tsb->different_translation_rescore($transcript_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 $transcript_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 $mappings = $self->basic_mapping($transcript_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 "transcript_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 "transcript_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 return ($new_scores, $mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 # reduce score for mappings of transcripts which do not belong to mapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 # genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 sub mapped_gene {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 my $tsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 my $gene_mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 $self->logger->info("Transcripts in mapped genes...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 unless ($transcript_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 $tsb->non_mapped_gene_rescore($transcript_scores, $gene_mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 $transcript_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 $mappings = $self->basic_mapping($transcript_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 "transcript_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 "transcript_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 return ($new_scores, $mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 # rescore by penalising scores between transcripts with different biotypes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 sub biotype {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 my $tsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 $self->logger->info( "Retry with biotype disambiguation...\n",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 0, 'stamped' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 unless ( $transcript_scores->loaded() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $tsb->biotype_transcript_rescore($transcript_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $transcript_scores->write_to_file();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 my $new_mappings = $self->basic_mapping( $transcript_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 "transcript_mappings$num" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 my $new_scores =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 $tsb->create_shrinked_matrix( $transcript_scores, $new_mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 "transcript_matrix$num" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 return ( $new_scores, $new_mappings );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 # selectively rescore by penalising scores between transcripts with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 # different internalIDs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 sub internal_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 my $tsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 unless ($transcript_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 $tsb->internal_id_rescore($transcript_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 $transcript_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 $mappings = $self->basic_mapping($transcript_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 "transcript_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 "transcript_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 return ($new_scores, $mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 # handle ambiguities between transcripts in single genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 sub single_gene {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 my $tsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 $self->logger->info("Transcripts in single genes...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 unless ($transcript_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 $transcript_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $mappings = $self->same_gene_transcript_mapping($transcript_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 "transcript_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 "transcript_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 return ($new_scores, $mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 # modified basic mapper that maps transcripts that are ambiguous within one gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 sub same_gene_transcript_mapping {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 my $matrix = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my $mapping_name = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 # argument checks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 unless ($matrix and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 throw('Need a name for serialising the mapping.') unless ($mapping_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 # Create a new MappingList object. Specify AUTO_LOAD to load serialised
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 # existing mappings if found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 -DUMP_PATH => $dump_path,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 -CACHE_FILE => "${mapping_name}.ser",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 -AUTO_LOAD => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 # checkpoint test: return a previously stored MappingList
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 if ($mappings->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 return $mappings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 my $sources_done = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 my $targets_done = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 # sort scoring matrix entries by descending score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 my @sorted_entries = sort { $b->score <=> $a->score ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 $a->source <=> $b->source || $a->target <=> $b->target }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 @{ $matrix->get_all_Entries };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 while (my $entry = shift(@sorted_entries)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 # we already found a mapping for either source or target yet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 next if ($sources_done->{$entry->source} or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 $targets_done->{$entry->target});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 #$self->logger->debug('d');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 my $other_sources = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 my $other_targets = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 my %source_genes = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 my %target_genes = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 #$self->logger->debug('a');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $other_sources = $self->filter_sources($other_sources, $sources_done);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 $other_targets = $self->filter_targets($other_targets, $targets_done);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 'source', $entry->source)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 'target', $entry->target)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 foreach my $other_source (@{ $other_sources }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 $source_genes{$self->cache->get_by_key('genes_by_transcript_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 'source', $other_source)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 foreach my $other_target (@{ $other_targets }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 $target_genes{$self->cache->get_by_key('genes_by_transcript_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 'target', $other_target)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 # only add mapping if only one source and target gene involved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 #$self->logger->debug('O');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $mappings->add_Entry($entry);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 #$self->logger->debug('A');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 # this is the best mapping, add it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 $mappings->add_Entry($entry);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 $sources_done->{$entry->source} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $targets_done->{$entry->target} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 # create checkpoint
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 $mappings->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 return $mappings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305