annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric - default Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 InternalIdMapper implementation for genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 # basic mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 sub init_basic {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my $gsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 my $gene_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 $self->logger->info("Basic gene mapping...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 $mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 "gene_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 return ($new_scores, $mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 # build the synteny from unambiguous mappings
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 sub synteny {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my $gsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 my $gene_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 unless ($gene_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 $self->logger->info("Synteny Framework building...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 my $sf = Bio::EnsEMBL::IdMapping::SyntenyFramework->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 -DUMP_PATH => $dump_path,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 -CACHE_FILE => 'synteny_framework.ser',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 -LOGGER => $self->logger,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 -CONF => $self->conf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 -CACHE => $self->cache,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 $sf->build_synteny($mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 # use it to rescore the genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 $self->logger->info("\nSynteny assisted mapping...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 $gene_scores = $sf->rescore_gene_matrix_lsf($gene_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 # checkpoint
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 $gene_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 "gene_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 return ($new_scores, $new_mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 # rescore with simple scoring function and try again
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 sub best_transcript {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 my $gsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 my $gene_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 $self->logger->info("Retry with simple best transcript score...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 unless ($gene_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 $gsb->simple_gene_rescore($gene_scores, $transcript_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 $gene_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 "gene_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 return ($new_scores, $new_mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 # rescore by penalising scores between genes with different biotypes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 sub biotype {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 my $gsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 my $gene_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 $self->logger->info("Retry with biotype disambiguation...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 unless ($gene_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 $gsb->biotype_gene_rescore($gene_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 $gene_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 "gene_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 return ($new_scores, $new_mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 # selectively rescore by penalising scores between genes with different
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 # internalIDs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 sub internal_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 my $num = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 my $gsb = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 my $gene_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 unless ($gene_scores->loaded) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 $gsb->internal_id_rescore($gene_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 $gene_scores->write_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 $num++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 "gene_matrix$num");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 return ($new_scores, $new_mappings);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188