0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 =head1 SYNOPSIS
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 =head1 METHODS
|
|
28
|
|
29 =cut
|
|
30
|
|
31 package Bio::EnsEMBL::IdMapping::StableIdMapper;
|
|
32
|
|
33 use strict;
|
|
34 use warnings;
|
|
35 no warnings 'uninitialized';
|
|
36
|
|
37 use Bio::EnsEMBL::IdMapping::BaseObject;
|
|
38 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
|
|
39
|
|
40 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
41 use Bio::EnsEMBL::Utils::ScriptUtils qw(inject path_append);
|
|
42 use Bio::EnsEMBL::IdMapping::ScoredMappingMatrix;
|
|
43 use POSIX qw(strftime);
|
|
44
|
|
45
|
|
46 # instance variables
|
|
47 my %debug_mappings;
|
|
48
|
|
49
|
|
50 sub new {
|
|
51 my $caller = shift;
|
|
52 my $class = ref($caller) || $caller;
|
|
53 my $self = $class->SUPER::new(@_);
|
|
54
|
|
55 # inject a StableIdGenerator
|
|
56 #
|
|
57 # If you write your own generators, make sure they extend
|
|
58 # Bio::EnsEMBL::Idmapping::BaseObject and additionally implement these three
|
|
59 # methods: initial_stable_id(), increment_stable_id() and calculate_version().
|
|
60 my $stable_id_generator = $self->conf->param('plugin_stable_id_generator') ||
|
|
61 'Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric';
|
|
62 $self->logger->debug("Using $stable_id_generator to generate stable Ids.\n");
|
|
63 inject($stable_id_generator);
|
|
64
|
|
65 # create a new StableIdGenerator object
|
|
66 my $generator_instance = $stable_id_generator->new(
|
|
67 -LOGGER => $self->logger,
|
|
68 -CONF => $self->conf,
|
|
69 -CACHE => $self->cache
|
|
70 );
|
|
71 $self->stable_id_generator($generator_instance);
|
|
72
|
|
73 return $self;
|
|
74 }
|
|
75
|
|
76
|
|
77 sub generate_mapping_session {
|
|
78 my $self = shift;
|
|
79
|
|
80 # only run this method once
|
|
81 return if ($self->mapping_session_date);
|
|
82
|
|
83 $self->logger->info("Generating new mapping_session...\n");
|
|
84
|
|
85 $self->mapping_session_date(time);
|
|
86 $self->mapping_session_date_fmt(strftime("%Y-%m-%d %T",
|
|
87 localtime($self->mapping_session_date)));
|
|
88
|
|
89 my $s_dba = $self->cache->get_DBAdaptor('source');
|
|
90 my $s_dbh = $s_dba->dbc->db_handle;
|
|
91 my $t_dba = $self->cache->get_DBAdaptor('target');
|
|
92 my $t_dbh = $t_dba->dbc->db_handle;
|
|
93
|
|
94 # check if mapping_session_id was manually set by the configuration
|
|
95 my $mapping_session_id = $self->conf->param('mapping_session_id');
|
|
96
|
|
97 if ($mapping_session_id) {
|
|
98
|
|
99 $self->logger->debug("Using manually configured mapping_session_id $mapping_session_id\n", 1);
|
|
100
|
|
101 } else {
|
|
102
|
|
103 # calculate mapping_session_id from db
|
|
104 my $sql = qq(SELECT MAX(mapping_session_id) FROM mapping_session);
|
|
105 $mapping_session_id = $self->fetch_value_from_db($s_dbh, $sql);
|
|
106
|
|
107 unless ($mapping_session_id) {
|
|
108 $self->logger->debug("No previous mapping_session found.\n", 1);
|
|
109 }
|
|
110
|
|
111 # increment last mapping_session_id
|
|
112 $mapping_session_id++;
|
|
113
|
|
114 $self->logger->debug("Using mapping_session_id $mapping_session_id\n", 1);
|
|
115 }
|
|
116
|
|
117 $self->mapping_session_id($mapping_session_id);
|
|
118
|
|
119 # write old mapping_session table to a file
|
|
120 my $i;
|
|
121 my $fh = $self->get_filehandle('mapping_session.txt', 'tables');
|
|
122
|
|
123 my $sth1 = $s_dbh->prepare("SELECT * FROM mapping_session");
|
|
124 $sth1->execute;
|
|
125
|
|
126 while (my @row = $sth1->fetchrow_array) {
|
|
127 $i++;
|
|
128 print $fh join("\t", @row);
|
|
129 print $fh "\n";
|
|
130 }
|
|
131
|
|
132 $sth1->finish;
|
|
133
|
|
134 # append the new mapping_session to the file
|
|
135 my $release_sql = qq(
|
|
136 SELECT meta_value FROM meta WHERE meta_key = 'schema_version'
|
|
137 );
|
|
138 my $old_release = $self->fetch_value_from_db($s_dbh, $release_sql);
|
|
139 my $new_release = $self->fetch_value_from_db($t_dbh, $release_sql);
|
|
140
|
|
141 my $assembly_sql = qq(
|
|
142 SELECT meta_value FROM meta WHERE meta_key = 'assembly.default'
|
|
143 );
|
|
144 my $old_assembly = $self->fetch_value_from_db($s_dbh, $assembly_sql);
|
|
145 my $new_assembly = $self->fetch_value_from_db($t_dbh, $assembly_sql);
|
|
146
|
|
147 unless ($old_release and $new_release and $old_assembly and $new_assembly) {
|
|
148 $self->logger->warning("Not all data for new mapping_session found:\n", 1);
|
|
149 $self->logger->info("old_release: $old_release, new_release: $new_release");
|
|
150 $self->logger->info("old_assembly: $old_assembly, new_assembly $new_assembly\n", 2);
|
|
151 }
|
|
152
|
|
153 print $fh join("\t",
|
|
154 $mapping_session_id,
|
|
155 $self->conf->param('sourcedbname'),
|
|
156 $self->conf->param('targetdbname'),
|
|
157 $old_release,
|
|
158 $new_release,
|
|
159 $old_assembly,
|
|
160 $new_assembly,
|
|
161 $self->mapping_session_date_fmt);
|
|
162
|
|
163 print $fh "\n";
|
|
164 close($fh);
|
|
165
|
|
166 $self->logger->info("Done writing ".++$i." mapping_session entries.\n\n");
|
|
167 }
|
|
168
|
|
169
|
|
170 sub map_stable_ids {
|
|
171 my $self = shift;
|
|
172 my $mappings = shift;
|
|
173 my $type = shift;
|
|
174
|
|
175 unless ($mappings and
|
|
176 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
|
|
177 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of ${type}s.");
|
|
178 }
|
|
179
|
|
180 # generate a new mapping_session and write all mapping_session data to a file
|
|
181 $self->generate_mapping_session;
|
|
182
|
|
183 $self->logger->info("== Stable ID mapping for $type...\n\n", 0, 'stamped');
|
|
184
|
|
185 # check if there are any objects of this type at all
|
|
186 my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') };
|
|
187 my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') };
|
|
188 unless (scalar(keys %all_sources)) {
|
|
189 $self->logger->info("No cached ${type}s found.\n\n");
|
|
190 return;
|
|
191 }
|
|
192
|
|
193 my %stats = map { $_ => 0 }
|
|
194 qw(mapped_known mapped_novel new lost_known lost_novel);
|
|
195
|
|
196 # create some lookup hashes from the mappings
|
|
197 my %sources_mapped = ();
|
|
198 my %targets_mapped = ();
|
|
199 my %scores_by_target = ();
|
|
200
|
|
201 foreach my $e (@{ $mappings->get_all_Entries }) {
|
|
202 $sources_mapped{$e->source} = $e->target;
|
|
203 $targets_mapped{$e->target} = $e->source;
|
|
204 $scores_by_target{$e->target} = $e->score;
|
|
205 }
|
|
206
|
|
207 # determine starting stable ID for new assignments
|
|
208 my $new_stable_id = $self->stable_id_generator->initial_stable_id($type);
|
|
209
|
|
210 #
|
|
211 # assign mapped and new stable IDs
|
|
212 #
|
|
213 foreach my $tid (keys %all_targets) {
|
|
214
|
|
215 my $t_obj = $all_targets{$tid};
|
|
216
|
|
217 # a mapping exists, assign stable ID accordingly
|
|
218 if (my $sid = $targets_mapped{$tid}) {
|
|
219
|
|
220 my $s_obj = $all_sources{$sid};
|
|
221
|
|
222 # set target's stable ID and created_date
|
|
223 $t_obj->stable_id($s_obj->stable_id);
|
|
224 $t_obj->created_date($s_obj->created_date);
|
|
225
|
|
226 # calculate and set version
|
|
227 $t_obj->version($self->stable_id_generator->calculate_version(
|
|
228 $s_obj, $t_obj));
|
|
229
|
|
230 # change modified_date if version changed
|
|
231 if ($s_obj->version == $t_obj->version) {
|
|
232 $t_obj->modified_date($s_obj->modified_date);
|
|
233 } else {
|
|
234 $t_obj->modified_date($self->mapping_session_date);
|
|
235 }
|
|
236
|
|
237 # create a stable_id_event entry (not for exons)
|
|
238 unless ( $type eq 'exon' ) {
|
|
239 # Only add events when something changed.
|
|
240 if ( !( $s_obj->stable_id eq $t_obj->stable_id &&
|
|
241 $s_obj->version == $t_obj->version &&
|
|
242 $scores_by_target{$tid} > 0.9999 ) )
|
|
243 {
|
|
244 my $key = join( "\t",
|
|
245 $s_obj->stable_id, $s_obj->version,
|
|
246 $t_obj->stable_id, $t_obj->version,
|
|
247 $self->mapping_session_id, $type,
|
|
248 $scores_by_target{$tid} );
|
|
249 $self->add_stable_id_event( 'new', $key );
|
|
250 }
|
|
251 }
|
|
252
|
|
253 # add to debug hash
|
|
254 push @{ $debug_mappings{$type} }, [ $sid, $tid, $t_obj->stable_id ];
|
|
255
|
|
256 # stats
|
|
257 if ($s_obj->is_known) {
|
|
258 $stats{'mapped_known'}++;
|
|
259 } else {
|
|
260 $stats{'mapped_novel'}++;
|
|
261 }
|
|
262
|
|
263 # no mapping was found, assign a new stable ID
|
|
264 } else {
|
|
265
|
|
266 $t_obj->stable_id($new_stable_id);
|
|
267 $t_obj->version(1);
|
|
268 $t_obj->created_date($self->mapping_session_date);
|
|
269 $t_obj->modified_date($self->mapping_session_date);
|
|
270
|
|
271 # create a stable_id_event entry (not for exons)
|
|
272 unless ($type eq 'exon') {
|
|
273 my $key = join("\t",
|
|
274 '\N',
|
|
275 0,
|
|
276 $t_obj->stable_id,
|
|
277 $t_obj->version,
|
|
278 $self->mapping_session_id,
|
|
279 $type,
|
|
280 0
|
|
281 );
|
|
282 $self->add_stable_id_event('new', $key);
|
|
283 }
|
|
284
|
|
285 # increment the stable Id (to be assigned to the next unmapped object)
|
|
286 $new_stable_id = $self->stable_id_generator->increment_stable_id(
|
|
287 $new_stable_id);
|
|
288
|
|
289 # stats
|
|
290 $stats{'new'}++;
|
|
291
|
|
292 }
|
|
293
|
|
294 }
|
|
295
|
|
296 #
|
|
297 # deletion events for lost sources
|
|
298 #
|
|
299 my $fh;
|
|
300 if ($type eq 'gene' or $type eq 'transcript') {
|
|
301 $fh = $self->get_filehandle("${type}s_lost.txt", 'debug');
|
|
302 }
|
|
303
|
|
304 foreach my $sid (keys %all_sources) {
|
|
305
|
|
306 my $s_obj = $all_sources{$sid};
|
|
307
|
|
308 # no mapping exists, add deletion event
|
|
309 unless ($sources_mapped{$sid}) {
|
|
310 unless ($type eq 'exon') {
|
|
311 my $key = join("\t",
|
|
312 $s_obj->stable_id,
|
|
313 $s_obj->version,
|
|
314 '\N',
|
|
315 0,
|
|
316 $self->mapping_session_id,
|
|
317 $type,
|
|
318 0
|
|
319 );
|
|
320 $self->add_stable_id_event('new', $key);
|
|
321 }
|
|
322
|
|
323 # stats
|
|
324 my $status;
|
|
325 if ($s_obj->is_known) {
|
|
326 $stats{'lost_known'}++;
|
|
327 $status = 'known';
|
|
328 } else {
|
|
329 $stats{'lost_novel'}++;
|
|
330 $status = 'novel';
|
|
331 }
|
|
332
|
|
333 # log lost genes and transcripts (for debug purposes)
|
|
334 #
|
|
335 # The Java app did this with a separate method
|
|
336 # (StableIdMapper.dumpLostGeneAndTranscripts()) which also claims to log
|
|
337 # losses due to merge. Since at that point this data isn't available yet
|
|
338 # the logging can be done much more efficient here
|
|
339 if ($type eq 'gene' or $type eq 'transcript') {
|
|
340 print $fh $s_obj->stable_id, "\t$status\n";
|
|
341 }
|
|
342 }
|
|
343 }
|
|
344
|
|
345 close($fh) if (defined($fh));
|
|
346
|
|
347 #
|
|
348 # write stable IDs to file
|
|
349 #
|
|
350 $self->write_stable_ids_to_file($type, \%all_targets);
|
|
351
|
|
352 # also generate and write stats to file
|
|
353 $self->generate_mapping_stats($type, \%stats);
|
|
354
|
|
355 $self->logger->info("Done.\n\n");
|
|
356 }
|
|
357
|
|
358
|
|
359 sub generate_similarity_events {
|
|
360 my ( $self, $mappings, $scores, $type ) = @_;
|
|
361
|
|
362 # argument checks
|
|
363 unless ( $mappings and
|
|
364 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList') )
|
|
365 {
|
|
366 throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
|
|
367 }
|
|
368
|
|
369 unless ( $scores and
|
|
370 $scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
|
|
371 {
|
|
372 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
|
|
373 }
|
|
374
|
|
375 throw("Need a type (gene|transcript|translation).") unless ($type);
|
|
376
|
|
377 my $mapped;
|
|
378
|
|
379 #
|
|
380 # add similarities for mapped entries
|
|
381 #
|
|
382 foreach my $e ( @{ $mappings->get_all_Entries } ) {
|
|
383
|
|
384 # create lookup hash for mapped sources and targets; we'll need this
|
|
385 # later
|
|
386 $mapped->{'source'}->{ $e->source } = 1;
|
|
387 $mapped->{'target'}->{ $e->target } = 1;
|
|
388
|
|
389 # loop over all other entries which contain either source or target;
|
|
390 # add similarity if score is within 1.5% of this entry (which is the
|
|
391 # top scorer)
|
|
392 my @others = @{ $scores->get_Entries_for_target( $e->target ) };
|
|
393 push @others, @{ $scores->get_Entries_for_source( $e->source ) };
|
|
394
|
|
395 while ( my $e2 = shift(@others) ) {
|
|
396
|
|
397 # skip self
|
|
398 if ( ( $e->source eq $e2->source ) and
|
|
399 ( $e->target eq $e2->target ) )
|
|
400 {
|
|
401 next;
|
|
402 }
|
|
403
|
|
404 if ( $e2->score > ( $e->score*0.985 ) ) {
|
|
405
|
|
406 my $s_obj =
|
|
407 $self->cache->get_by_key( "${type}s_by_id", 'source',
|
|
408 $e2->source );
|
|
409 my $t_obj =
|
|
410 $self->cache->get_by_key( "${type}s_by_id", 'target',
|
|
411 $e2->target );
|
|
412
|
|
413 my $key = join( "\t",
|
|
414 $s_obj->stable_id, $s_obj->version,
|
|
415 $t_obj->stable_id, $t_obj->version,
|
|
416 $self->mapping_session_id, $type,
|
|
417 $e2->score );
|
|
418 $self->add_stable_id_event( 'similarity', $key );
|
|
419
|
|
420 }
|
|
421
|
|
422 # [todo] add overlap hack here? (see Java code)
|
|
423 # probably better solution: let synteny rescoring affect this
|
|
424 # decision
|
|
425 } ## end while ( my $e2 = shift(@others...))
|
|
426
|
|
427 } ## end foreach my $e ( @{ $mappings...})
|
|
428
|
|
429 #
|
|
430 # similarities for other entries
|
|
431 #
|
|
432 foreach my $dbtype ( keys %$mapped ) {
|
|
433
|
|
434 # note: $dbtype will be either 'source' or 'target'
|
|
435 my $m1 = "get_all_${dbtype}s";
|
|
436 my $m2 = "get_Entries_for_${dbtype}";
|
|
437
|
|
438 foreach my $id ( @{ $scores->$m1 } ) {
|
|
439
|
|
440 # skip if this is a mapped source/target
|
|
441 if ( $mapped->{$dbtype}->{$id} ) { next }
|
|
442
|
|
443 my @entries =
|
|
444 sort { $b->score <=> $a->score } @{ $scores->$m2($id) };
|
|
445
|
|
446 unless (@entries) { next }
|
|
447
|
|
448 # skip if top score < 0.75
|
|
449 my $top_score = $entries[0]->score;
|
|
450 if ( $top_score < 0.75 ) { next }
|
|
451
|
|
452 # add similarities for all entries within 5% of top scorer
|
|
453 while ( my $e = shift(@entries) ) {
|
|
454
|
|
455 if ( $e->score > ( $top_score*0.95 ) ) {
|
|
456
|
|
457 my $s_obj =
|
|
458 $self->cache->get_by_key( "${type}s_by_id", 'source',
|
|
459 $e->source );
|
|
460 my $t_obj =
|
|
461 $self->cache->get_by_key( "${type}s_by_id", 'target',
|
|
462 $e->target );
|
|
463
|
|
464 my $key = join( "\t",
|
|
465 $s_obj->stable_id, $s_obj->version,
|
|
466 $t_obj->stable_id, $t_obj->version,
|
|
467 $self->mapping_session_id, $type,
|
|
468 $e->score );
|
|
469 $self->add_stable_id_event( 'similarity', $key );
|
|
470
|
|
471 }
|
|
472 }
|
|
473
|
|
474 } ## end foreach my $id ( @{ $scores...})
|
|
475 } ## end foreach my $dbtype ( keys %$mapped)
|
|
476
|
|
477 } ## end sub generate_similarity_events
|
|
478
|
|
479
|
|
480 sub filter_same_gene_transcript_similarities {
|
|
481 my $self = shift;
|
|
482 my $transcript_scores = shift;
|
|
483
|
|
484 # argument checks
|
|
485 unless ($transcript_scores and
|
|
486 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
|
|
487 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of transcripts.');
|
|
488 }
|
|
489
|
|
490 # create a new matrix for the filtered entries
|
|
491 my $filtered_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
|
|
492 -DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'),
|
|
493 -CACHE_FILE => 'filtered_transcript_scores.ser',
|
|
494 );
|
|
495
|
|
496 # lookup hash for all target transcripts
|
|
497 my %all_targets = map { $_->stable_id => 1 }
|
|
498 values %{ $self->cache->get_by_name("transcripts_by_id", 'target') };
|
|
499
|
|
500 my $i = 0;
|
|
501
|
|
502 foreach my $e (@{ $transcript_scores->get_all_Entries }) {
|
|
503
|
|
504 my $s_tr = $self->cache->get_by_key('transcripts_by_id', 'source',
|
|
505 $e->source);
|
|
506 my $s_gene = $self->cache->get_by_key('genes_by_transcript_id', 'source',
|
|
507 $e->source);
|
|
508 my $t_gene = $self->cache->get_by_key('genes_by_transcript_id', 'target',
|
|
509 $e->target);
|
|
510 # workaround for caching issue: only gene objects in 'genes_by_id' cache
|
|
511 # have a stable ID assigned
|
|
512 #$t_gene = $self->cache->get_by_key('genes_by_id', 'target', $t_gene->id);
|
|
513
|
|
514 #$self->logger->debug("xxx ".join(":", $s_tr->stable_id, $s_gene->stable_id,
|
|
515 # $t_gene->stable_id)."\n");
|
|
516
|
|
517 # skip if source and target transcript are in same gene, BUT keep events for
|
|
518 # deleted transcripts
|
|
519 if (($s_gene->stable_id eq $t_gene->stable_id) and
|
|
520 $all_targets{$s_tr->stable_id}) {
|
|
521 $i++;
|
|
522 next;
|
|
523 }
|
|
524
|
|
525 $filtered_scores->add_Entry($e);
|
|
526 }
|
|
527
|
|
528 $self->logger->debug("Skipped $i same gene transcript mappings.\n");
|
|
529
|
|
530 return $filtered_scores;
|
|
531 }
|
|
532
|
|
533
|
|
534 sub generate_translation_similarity_events {
|
|
535 my $self = shift;
|
|
536 my $mappings = shift;
|
|
537 my $transcript_scores = shift;
|
|
538
|
|
539 # argument checks
|
|
540 unless ($mappings and
|
|
541 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
|
|
542 throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
|
|
543 }
|
|
544
|
|
545 unless ($transcript_scores and
|
|
546 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
|
|
547 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
|
|
548 }
|
|
549
|
|
550 # create a fake translation scoring matrix
|
|
551 my $translation_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
|
|
552 -DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'),
|
|
553 -CACHE_FILE => 'translation_scores.ser',
|
|
554 );
|
|
555
|
|
556 foreach my $e (@{ $transcript_scores->get_all_Entries }) {
|
|
557
|
|
558 my $s_tl = $self->cache->get_by_key('transcripts_by_id', 'source',
|
|
559 $e->source)->translation;
|
|
560 my $t_tl = $self->cache->get_by_key('transcripts_by_id', 'target',
|
|
561 $e->target)->translation;
|
|
562
|
|
563 # add an entry to the translation scoring matrix using the score of the
|
|
564 # corresponding transcripts
|
|
565 if ($s_tl and $t_tl) {
|
|
566 $translation_scores->add_score($s_tl->id, $t_tl->id, $e->score);
|
|
567 }
|
|
568 }
|
|
569
|
|
570 # now generate similarity events using this fake scoring matrix
|
|
571 $self->generate_similarity_events($mappings, $translation_scores,
|
|
572 'translation');
|
|
573 }
|
|
574
|
|
575
|
|
576 sub write_stable_ids_to_file {
|
|
577 my $self = shift;
|
|
578 my $type = shift;
|
|
579 my $all_targets = shift;
|
|
580
|
|
581 $self->logger->info("Writing ${type} stable IDs to file...\n");
|
|
582
|
|
583 my $fh = $self->get_filehandle("${type}_stable_id.txt", 'tables');
|
|
584
|
|
585 my @sorted_targets = map { $all_targets->{$_} } sort { $a <=> $b }
|
|
586 keys %$all_targets;
|
|
587
|
|
588 foreach my $obj (@sorted_targets) {
|
|
589
|
|
590 # check for missing created and modified dates
|
|
591 my $created_date = $obj->created_date;
|
|
592 unless ($created_date) {
|
|
593 #$self->logger->debug("Missing created_date for target ".
|
|
594 # $obj->to_string."\n", 1);
|
|
595 $created_date = $self->mapping_session_date;
|
|
596 }
|
|
597
|
|
598 my $modified_date = $obj->modified_date;
|
|
599 unless ($modified_date) {
|
|
600 #$self->logger->debug("Missing modified_date for target ".
|
|
601 # $obj->to_string."\n", 1);
|
|
602 $modified_date = $self->mapping_session_date;
|
|
603 }
|
|
604
|
|
605 my $row = join("\t",
|
|
606 $obj->id,
|
|
607 $obj->stable_id,
|
|
608 $obj->version,
|
|
609 strftime("%Y-%m-%d %T", localtime($created_date)),
|
|
610 strftime("%Y-%m-%d %T", localtime($modified_date)),
|
|
611 );
|
|
612
|
|
613 print $fh "$row\n";
|
|
614 }
|
|
615
|
|
616 close($fh);
|
|
617
|
|
618 $self->logger->info("Done writing ".scalar(@sorted_targets)." entries.\n\n");
|
|
619 }
|
|
620
|
|
621
|
|
622 sub generate_mapping_stats {
|
|
623 my $self = shift;
|
|
624 my $type = shift;
|
|
625 my $stats = shift;
|
|
626
|
|
627 my $result = ucfirst($type)." mapping results:\n\n";
|
|
628
|
|
629 my $fmt1 = "%-10s%-10s%-10s%-10s\n";
|
|
630 my $fmt2 = "%-10s%6.0f %6.0f %4.2f%%\n";
|
|
631
|
|
632 $result .= sprintf($fmt1, qw(TYPE MAPPED LOST PERCENTAGE));
|
|
633 $result .= ('-'x40)."\n";
|
|
634
|
|
635 my $mapped_total = $stats->{'mapped_known'} + $stats->{'mapped_novel'};
|
|
636 my $lost_total = $stats->{'lost_known'} + $stats->{'lost_novel'};
|
|
637 my $known_total = $stats->{'mapped_known'} + $stats->{'lost_known'};
|
|
638 my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'};
|
|
639
|
|
640 # no split into known and novel for exons
|
|
641 unless ( $type eq 'exon' ) {
|
|
642 $result .= sprintf( $fmt2,
|
|
643 'known',
|
|
644 $stats->{'mapped_known'},
|
|
645 $stats->{'lost_known'},
|
|
646 ($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0)
|
|
647 );
|
|
648
|
|
649 $result .= sprintf( $fmt2,
|
|
650 'novel',
|
|
651 $stats->{'mapped_novel'},
|
|
652 $stats->{'lost_novel'},
|
|
653 ($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0)
|
|
654 );
|
|
655 } ## end unless ( $type eq 'exon' )
|
|
656
|
|
657 $result .= sprintf($fmt2, 'total', $mapped_total, $lost_total,
|
|
658 $mapped_total/($known_total + $novel_total)*100);
|
|
659
|
|
660 # log result
|
|
661 $self->logger->info($result."\n");
|
|
662
|
|
663 # write result to file
|
|
664 my $fh = $self->get_filehandle("${type}_mapping_stats.txt", 'stats');
|
|
665 print $fh $result;
|
|
666 close($fh);
|
|
667 }
|
|
668
|
|
669
|
|
670 sub dump_debug_mappings {
|
|
671 my $self = shift;
|
|
672
|
|
673 foreach my $type (keys %debug_mappings) {
|
|
674
|
|
675 $self->logger->debug("Writing $type mappings to debug/${type}_mappings.txt...\n");
|
|
676
|
|
677 my $fh = $self->get_filehandle("${type}_mappings.txt", 'debug');
|
|
678
|
|
679 foreach my $row (@{ $debug_mappings{$type} }) {
|
|
680 print $fh join("\t", @$row);
|
|
681 print $fh "\n";
|
|
682 }
|
|
683
|
|
684 close($fh);
|
|
685
|
|
686 $self->logger->debug("Done.\n");
|
|
687 }
|
|
688 }
|
|
689
|
|
690
|
|
691 sub write_stable_id_events {
|
|
692 my $self = shift;
|
|
693 my $event_type = shift;
|
|
694
|
|
695 throw("Need an event type (new|similarity).") unless ($event_type);
|
|
696
|
|
697 $self->logger->debug("Writing $event_type stable_id_events to file...\n");
|
|
698
|
|
699 my $fh = $self->get_filehandle("stable_id_event_${event_type}.txt", 'tables');
|
|
700 my $i = 0;
|
|
701
|
|
702 foreach my $event (@{ $self->get_all_stable_id_events($event_type) }) {
|
|
703 print $fh "$event\n";
|
|
704 $i++;
|
|
705 }
|
|
706
|
|
707 close($fh);
|
|
708
|
|
709 $self->logger->debug("Done writing $i entries.\n");
|
|
710 }
|
|
711
|
|
712
|
|
713 sub add_stable_id_event {
|
|
714 my ($self, $type, $event) = @_;
|
|
715
|
|
716 # argument check
|
|
717 throw("Need an event type (new|similarity).") unless ($type);
|
|
718
|
|
719 $self->{'stable_id_events'}->{$type}->{$event} = 1;
|
|
720 }
|
|
721
|
|
722
|
|
723 sub get_all_stable_id_events {
|
|
724 my ($self, $type) = @_;
|
|
725
|
|
726 # argument check
|
|
727 throw("Need an event type (new|similarity).") unless ($type);
|
|
728
|
|
729 return [ keys %{ $self->{'stable_id_events'}->{$type} } ];
|
|
730 }
|
|
731
|
|
732
|
|
733 sub mapping_session_id {
|
|
734 my $self = shift;
|
|
735 $self->{'_mapping_session_id'} = shift if (@_);
|
|
736 return $self->{'_mapping_session_id'};
|
|
737 }
|
|
738
|
|
739
|
|
740 sub mapping_session_date {
|
|
741 my $self = shift;
|
|
742 $self->{'_mapping_session_date'} = shift if (@_);
|
|
743 return $self->{'_mapping_session_date'};
|
|
744 }
|
|
745
|
|
746
|
|
747 sub mapping_session_date_fmt {
|
|
748 my $self = shift;
|
|
749 $self->{'_mapping_session_date_fmt'} = shift if (@_);
|
|
750 return $self->{'_mapping_session_date_fmt'};
|
|
751 }
|
|
752
|
|
753
|
|
754 sub stable_id_generator {
|
|
755 my $self = shift;
|
|
756 $self->{'_stable_id_generator'} = shift if (@_);
|
|
757 return $self->{'_stable_id_generator'};
|
|
758 }
|
|
759
|
|
760
|
|
761 1;
|
|
762
|