annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/StableIdMapper.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 package Bio::EnsEMBL::IdMapping::StableIdMapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 use Bio::EnsEMBL::IdMapping::BaseObject;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 use Bio::EnsEMBL::Utils::ScriptUtils qw(inject path_append);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 use Bio::EnsEMBL::IdMapping::ScoredMappingMatrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 use POSIX qw(strftime);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 # instance variables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 my %debug_mappings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 # inject a StableIdGenerator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 # If you write your own generators, make sure they extend
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 # Bio::EnsEMBL::Idmapping::BaseObject and additionally implement these three
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 # methods: initial_stable_id(), increment_stable_id() and calculate_version().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 my $stable_id_generator = $self->conf->param('plugin_stable_id_generator') ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 'Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 $self->logger->debug("Using $stable_id_generator to generate stable Ids.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 inject($stable_id_generator);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 # create a new StableIdGenerator object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 my $generator_instance = $stable_id_generator->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 -LOGGER => $self->logger,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 -CONF => $self->conf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 -CACHE => $self->cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 $self->stable_id_generator($generator_instance);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 sub generate_mapping_session {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 # only run this method once
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 return if ($self->mapping_session_date);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 $self->logger->info("Generating new mapping_session...\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 $self->mapping_session_date(time);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 $self->mapping_session_date_fmt(strftime("%Y-%m-%d %T",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 localtime($self->mapping_session_date)));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 my $s_dba = $self->cache->get_DBAdaptor('source');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 my $s_dbh = $s_dba->dbc->db_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my $t_dba = $self->cache->get_DBAdaptor('target');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my $t_dbh = $t_dba->dbc->db_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 # check if mapping_session_id was manually set by the configuration
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my $mapping_session_id = $self->conf->param('mapping_session_id');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 if ($mapping_session_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 $self->logger->debug("Using manually configured mapping_session_id $mapping_session_id\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 # calculate mapping_session_id from db
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 my $sql = qq(SELECT MAX(mapping_session_id) FROM mapping_session);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 $mapping_session_id = $self->fetch_value_from_db($s_dbh, $sql);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 unless ($mapping_session_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 $self->logger->debug("No previous mapping_session found.\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 # increment last mapping_session_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 $mapping_session_id++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 $self->logger->debug("Using mapping_session_id $mapping_session_id\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 $self->mapping_session_id($mapping_session_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 # write old mapping_session table to a file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my $fh = $self->get_filehandle('mapping_session.txt', 'tables');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 my $sth1 = $s_dbh->prepare("SELECT * FROM mapping_session");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $sth1->execute;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 while (my @row = $sth1->fetchrow_array) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 print $fh join("\t", @row);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 print $fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 $sth1->finish;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 # append the new mapping_session to the file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 my $release_sql = qq(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 SELECT meta_value FROM meta WHERE meta_key = 'schema_version'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 my $old_release = $self->fetch_value_from_db($s_dbh, $release_sql);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 my $new_release = $self->fetch_value_from_db($t_dbh, $release_sql);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 my $assembly_sql = qq(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 SELECT meta_value FROM meta WHERE meta_key = 'assembly.default'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 my $old_assembly = $self->fetch_value_from_db($s_dbh, $assembly_sql);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my $new_assembly = $self->fetch_value_from_db($t_dbh, $assembly_sql);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 unless ($old_release and $new_release and $old_assembly and $new_assembly) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 $self->logger->warning("Not all data for new mapping_session found:\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 $self->logger->info("old_release: $old_release, new_release: $new_release");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 $self->logger->info("old_assembly: $old_assembly, new_assembly $new_assembly\n", 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 print $fh join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 $mapping_session_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 $self->conf->param('sourcedbname'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 $self->conf->param('targetdbname'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 $old_release,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 $new_release,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 $old_assembly,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $new_assembly,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 $self->mapping_session_date_fmt);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 print $fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 $self->logger->info("Done writing ".++$i." mapping_session entries.\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 sub map_stable_ids {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 my $type = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 unless ($mappings and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of ${type}s.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 # generate a new mapping_session and write all mapping_session data to a file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 $self->generate_mapping_session;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 $self->logger->info("== Stable ID mapping for $type...\n\n", 0, 'stamped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 # check if there are any objects of this type at all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 unless (scalar(keys %all_sources)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 $self->logger->info("No cached ${type}s found.\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 my %stats = map { $_ => 0 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 qw(mapped_known mapped_novel new lost_known lost_novel);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 # create some lookup hashes from the mappings
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 my %sources_mapped = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my %targets_mapped = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 my %scores_by_target = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 foreach my $e (@{ $mappings->get_all_Entries }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $sources_mapped{$e->source} = $e->target;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 $targets_mapped{$e->target} = $e->source;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $scores_by_target{$e->target} = $e->score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 # determine starting stable ID for new assignments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 my $new_stable_id = $self->stable_id_generator->initial_stable_id($type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 # assign mapped and new stable IDs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 foreach my $tid (keys %all_targets) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 my $t_obj = $all_targets{$tid};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 # a mapping exists, assign stable ID accordingly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 if (my $sid = $targets_mapped{$tid}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 my $s_obj = $all_sources{$sid};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 # set target's stable ID and created_date
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $t_obj->stable_id($s_obj->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $t_obj->created_date($s_obj->created_date);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 # calculate and set version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 $t_obj->version($self->stable_id_generator->calculate_version(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 $s_obj, $t_obj));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 # change modified_date if version changed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 if ($s_obj->version == $t_obj->version) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 $t_obj->modified_date($s_obj->modified_date);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 $t_obj->modified_date($self->mapping_session_date);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 # create a stable_id_event entry (not for exons)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 unless ( $type eq 'exon' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 # Only add events when something changed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 if ( !( $s_obj->stable_id eq $t_obj->stable_id &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 $s_obj->version == $t_obj->version &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 $scores_by_target{$tid} > 0.9999 ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 my $key = join( "\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 $s_obj->stable_id, $s_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 $t_obj->stable_id, $t_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 $self->mapping_session_id, $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 $scores_by_target{$tid} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 $self->add_stable_id_event( 'new', $key );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 # add to debug hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 push @{ $debug_mappings{$type} }, [ $sid, $tid, $t_obj->stable_id ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 # stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 if ($s_obj->is_known) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $stats{'mapped_known'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $stats{'mapped_novel'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 # no mapping was found, assign a new stable ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 $t_obj->stable_id($new_stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 $t_obj->version(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $t_obj->created_date($self->mapping_session_date);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 $t_obj->modified_date($self->mapping_session_date);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 # create a stable_id_event entry (not for exons)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 unless ($type eq 'exon') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 my $key = join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 '\N',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 $t_obj->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 $t_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 $self->mapping_session_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $self->add_stable_id_event('new', $key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 # increment the stable Id (to be assigned to the next unmapped object)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 $new_stable_id = $self->stable_id_generator->increment_stable_id(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $new_stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 # stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 $stats{'new'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 # deletion events for lost sources
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 my $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 if ($type eq 'gene' or $type eq 'transcript') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 $fh = $self->get_filehandle("${type}s_lost.txt", 'debug');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 foreach my $sid (keys %all_sources) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 my $s_obj = $all_sources{$sid};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 # no mapping exists, add deletion event
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 unless ($sources_mapped{$sid}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 unless ($type eq 'exon') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 my $key = join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 $s_obj->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 $s_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 '\N',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 $self->mapping_session_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 $self->add_stable_id_event('new', $key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 # stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 my $status;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 if ($s_obj->is_known) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 $stats{'lost_known'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 $status = 'known';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 $stats{'lost_novel'}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 $status = 'novel';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 # log lost genes and transcripts (for debug purposes)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 # The Java app did this with a separate method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 # (StableIdMapper.dumpLostGeneAndTranscripts()) which also claims to log
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 # losses due to merge. Since at that point this data isn't available yet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 # the logging can be done much more efficient here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 if ($type eq 'gene' or $type eq 'transcript') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 print $fh $s_obj->stable_id, "\t$status\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 close($fh) if (defined($fh));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 # write stable IDs to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $self->write_stable_ids_to_file($type, \%all_targets);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 # also generate and write stats to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 $self->generate_mapping_stats($type, \%stats);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 $self->logger->info("Done.\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 sub generate_similarity_events {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 my ( $self, $mappings, $scores, $type ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 # argument checks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 unless ( $mappings and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList') )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 unless ( $scores and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 $scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 throw("Need a type (gene|transcript|translation).") unless ($type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 my $mapped;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 # add similarities for mapped entries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 foreach my $e ( @{ $mappings->get_all_Entries } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 # create lookup hash for mapped sources and targets; we'll need this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 # later
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 $mapped->{'source'}->{ $e->source } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 $mapped->{'target'}->{ $e->target } = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 # loop over all other entries which contain either source or target;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 # add similarity if score is within 1.5% of this entry (which is the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 # top scorer)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 my @others = @{ $scores->get_Entries_for_target( $e->target ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 push @others, @{ $scores->get_Entries_for_source( $e->source ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 while ( my $e2 = shift(@others) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 # skip self
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 if ( ( $e->source eq $e2->source ) and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 ( $e->target eq $e2->target ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 if ( $e2->score > ( $e->score*0.985 ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 my $s_obj =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 $self->cache->get_by_key( "${type}s_by_id", 'source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 $e2->source );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 my $t_obj =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 $self->cache->get_by_key( "${type}s_by_id", 'target',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 $e2->target );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 my $key = join( "\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 $s_obj->stable_id, $s_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 $t_obj->stable_id, $t_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 $self->mapping_session_id, $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 $e2->score );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 $self->add_stable_id_event( 'similarity', $key );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 # [todo] add overlap hack here? (see Java code)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 # probably better solution: let synteny rescoring affect this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 # decision
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 } ## end while ( my $e2 = shift(@others...))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 } ## end foreach my $e ( @{ $mappings...})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 # similarities for other entries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 foreach my $dbtype ( keys %$mapped ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 # note: $dbtype will be either 'source' or 'target'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 my $m1 = "get_all_${dbtype}s";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 my $m2 = "get_Entries_for_${dbtype}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 foreach my $id ( @{ $scores->$m1 } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 # skip if this is a mapped source/target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 if ( $mapped->{$dbtype}->{$id} ) { next }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 my @entries =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 sort { $b->score <=> $a->score } @{ $scores->$m2($id) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 unless (@entries) { next }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 # skip if top score < 0.75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 my $top_score = $entries[0]->score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 if ( $top_score < 0.75 ) { next }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 # add similarities for all entries within 5% of top scorer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 while ( my $e = shift(@entries) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 if ( $e->score > ( $top_score*0.95 ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 my $s_obj =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 $self->cache->get_by_key( "${type}s_by_id", 'source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 $e->source );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 my $t_obj =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 $self->cache->get_by_key( "${type}s_by_id", 'target',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 $e->target );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 my $key = join( "\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 $s_obj->stable_id, $s_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 $t_obj->stable_id, $t_obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 $self->mapping_session_id, $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 $e->score );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 $self->add_stable_id_event( 'similarity', $key );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 } ## end foreach my $id ( @{ $scores...})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 } ## end foreach my $dbtype ( keys %$mapped)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 } ## end sub generate_similarity_events
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 sub filter_same_gene_transcript_similarities {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 # argument checks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 unless ($transcript_scores and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of transcripts.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 # create a new matrix for the filtered entries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 my $filtered_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 -DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 -CACHE_FILE => 'filtered_transcript_scores.ser',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 # lookup hash for all target transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 my %all_targets = map { $_->stable_id => 1 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 values %{ $self->cache->get_by_name("transcripts_by_id", 'target') };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 foreach my $e (@{ $transcript_scores->get_all_Entries }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 my $s_tr = $self->cache->get_by_key('transcripts_by_id', 'source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 $e->source);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 my $s_gene = $self->cache->get_by_key('genes_by_transcript_id', 'source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 $e->source);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 my $t_gene = $self->cache->get_by_key('genes_by_transcript_id', 'target',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 $e->target);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 # workaround for caching issue: only gene objects in 'genes_by_id' cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 # have a stable ID assigned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 #$t_gene = $self->cache->get_by_key('genes_by_id', 'target', $t_gene->id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 #$self->logger->debug("xxx ".join(":", $s_tr->stable_id, $s_gene->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 # $t_gene->stable_id)."\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 # skip if source and target transcript are in same gene, BUT keep events for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 # deleted transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 if (($s_gene->stable_id eq $t_gene->stable_id) and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 $all_targets{$s_tr->stable_id}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 $filtered_scores->add_Entry($e);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 $self->logger->debug("Skipped $i same gene transcript mappings.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 return $filtered_scores;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 sub generate_translation_similarity_events {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 my $mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 my $transcript_scores = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 # argument checks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 unless ($mappings and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 unless ($transcript_scores and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 # create a fake translation scoring matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 my $translation_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 -DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 -CACHE_FILE => 'translation_scores.ser',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 foreach my $e (@{ $transcript_scores->get_all_Entries }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 my $s_tl = $self->cache->get_by_key('transcripts_by_id', 'source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 $e->source)->translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 my $t_tl = $self->cache->get_by_key('transcripts_by_id', 'target',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 $e->target)->translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 # add an entry to the translation scoring matrix using the score of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 # corresponding transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 if ($s_tl and $t_tl) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 $translation_scores->add_score($s_tl->id, $t_tl->id, $e->score);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 # now generate similarity events using this fake scoring matrix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 $self->generate_similarity_events($mappings, $translation_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 'translation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 sub write_stable_ids_to_file {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 my $type = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 my $all_targets = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 $self->logger->info("Writing ${type} stable IDs to file...\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 my $fh = $self->get_filehandle("${type}_stable_id.txt", 'tables');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 my @sorted_targets = map { $all_targets->{$_} } sort { $a <=> $b }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 keys %$all_targets;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 foreach my $obj (@sorted_targets) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 # check for missing created and modified dates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 my $created_date = $obj->created_date;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 unless ($created_date) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 #$self->logger->debug("Missing created_date for target ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 # $obj->to_string."\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 $created_date = $self->mapping_session_date;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 my $modified_date = $obj->modified_date;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 unless ($modified_date) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 #$self->logger->debug("Missing modified_date for target ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 # $obj->to_string."\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 $modified_date = $self->mapping_session_date;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 my $row = join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 $obj->id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 $obj->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 $obj->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 strftime("%Y-%m-%d %T", localtime($created_date)),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 strftime("%Y-%m-%d %T", localtime($modified_date)),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 print $fh "$row\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 $self->logger->info("Done writing ".scalar(@sorted_targets)." entries.\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 sub generate_mapping_stats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 my $type = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 my $stats = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 my $result = ucfirst($type)." mapping results:\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 my $fmt1 = "%-10s%-10s%-10s%-10s\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 my $fmt2 = "%-10s%6.0f %6.0f %4.2f%%\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 $result .= sprintf($fmt1, qw(TYPE MAPPED LOST PERCENTAGE));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 $result .= ('-'x40)."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 my $mapped_total = $stats->{'mapped_known'} + $stats->{'mapped_novel'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 my $lost_total = $stats->{'lost_known'} + $stats->{'lost_novel'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 my $known_total = $stats->{'mapped_known'} + $stats->{'lost_known'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 # no split into known and novel for exons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 unless ( $type eq 'exon' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 $result .= sprintf( $fmt2,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 'known',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 $stats->{'mapped_known'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 $stats->{'lost_known'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 ($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 $result .= sprintf( $fmt2,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 'novel',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 $stats->{'mapped_novel'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 $stats->{'lost_novel'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 ($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 } ## end unless ( $type eq 'exon' )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 $result .= sprintf($fmt2, 'total', $mapped_total, $lost_total,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 $mapped_total/($known_total + $novel_total)*100);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 # log result
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 $self->logger->info($result."\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 # write result to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 my $fh = $self->get_filehandle("${type}_mapping_stats.txt", 'stats');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 print $fh $result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 sub dump_debug_mappings {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 foreach my $type (keys %debug_mappings) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 $self->logger->debug("Writing $type mappings to debug/${type}_mappings.txt...\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 my $fh = $self->get_filehandle("${type}_mappings.txt", 'debug');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 foreach my $row (@{ $debug_mappings{$type} }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 print $fh join("\t", @$row);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 print $fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 $self->logger->debug("Done.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 sub write_stable_id_events {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 my $event_type = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 throw("Need an event type (new|similarity).") unless ($event_type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 $self->logger->debug("Writing $event_type stable_id_events to file...\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 my $fh = $self->get_filehandle("stable_id_event_${event_type}.txt", 'tables');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 foreach my $event (@{ $self->get_all_stable_id_events($event_type) }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 print $fh "$event\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 $self->logger->debug("Done writing $i entries.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 sub add_stable_id_event {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 my ($self, $type, $event) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 throw("Need an event type (new|similarity).") unless ($type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 $self->{'stable_id_events'}->{$type}->{$event} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 sub get_all_stable_id_events {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 my ($self, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 throw("Need an event type (new|similarity).") unless ($type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 return [ keys %{ $self->{'stable_id_events'}->{$type} } ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 sub mapping_session_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 $self->{'_mapping_session_id'} = shift if (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 return $self->{'_mapping_session_id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 sub mapping_session_date {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 $self->{'_mapping_session_date'} = shift if (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 return $self->{'_mapping_session_date'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 sub mapping_session_date_fmt {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 $self->{'_mapping_session_date_fmt'} = shift if (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 return $self->{'_mapping_session_date_fmt'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 sub stable_id_generator {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 $self->{'_stable_id_generator'} = shift if (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 return $self->{'_stable_id_generator'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762