Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/IdMapping/StableIdMapper.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 =head1 SYNOPSIS | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 =head1 METHODS | |
28 | |
29 =cut | |
30 | |
31 package Bio::EnsEMBL::IdMapping::StableIdMapper; | |
32 | |
33 use strict; | |
34 use warnings; | |
35 no warnings 'uninitialized'; | |
36 | |
37 use Bio::EnsEMBL::IdMapping::BaseObject; | |
38 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject); | |
39 | |
40 use Bio::EnsEMBL::Utils::Exception qw(throw warning); | |
41 use Bio::EnsEMBL::Utils::ScriptUtils qw(inject path_append); | |
42 use Bio::EnsEMBL::IdMapping::ScoredMappingMatrix; | |
43 use POSIX qw(strftime); | |
44 | |
45 | |
46 # instance variables | |
47 my %debug_mappings; | |
48 | |
49 | |
50 sub new { | |
51 my $caller = shift; | |
52 my $class = ref($caller) || $caller; | |
53 my $self = $class->SUPER::new(@_); | |
54 | |
55 # inject a StableIdGenerator | |
56 # | |
57 # If you write your own generators, make sure they extend | |
58 # Bio::EnsEMBL::Idmapping::BaseObject and additionally implement these three | |
59 # methods: initial_stable_id(), increment_stable_id() and calculate_version(). | |
60 my $stable_id_generator = $self->conf->param('plugin_stable_id_generator') || | |
61 'Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric'; | |
62 $self->logger->debug("Using $stable_id_generator to generate stable Ids.\n"); | |
63 inject($stable_id_generator); | |
64 | |
65 # create a new StableIdGenerator object | |
66 my $generator_instance = $stable_id_generator->new( | |
67 -LOGGER => $self->logger, | |
68 -CONF => $self->conf, | |
69 -CACHE => $self->cache | |
70 ); | |
71 $self->stable_id_generator($generator_instance); | |
72 | |
73 return $self; | |
74 } | |
75 | |
76 | |
77 sub generate_mapping_session { | |
78 my $self = shift; | |
79 | |
80 # only run this method once | |
81 return if ($self->mapping_session_date); | |
82 | |
83 $self->logger->info("Generating new mapping_session...\n"); | |
84 | |
85 $self->mapping_session_date(time); | |
86 $self->mapping_session_date_fmt(strftime("%Y-%m-%d %T", | |
87 localtime($self->mapping_session_date))); | |
88 | |
89 my $s_dba = $self->cache->get_DBAdaptor('source'); | |
90 my $s_dbh = $s_dba->dbc->db_handle; | |
91 my $t_dba = $self->cache->get_DBAdaptor('target'); | |
92 my $t_dbh = $t_dba->dbc->db_handle; | |
93 | |
94 # check if mapping_session_id was manually set by the configuration | |
95 my $mapping_session_id = $self->conf->param('mapping_session_id'); | |
96 | |
97 if ($mapping_session_id) { | |
98 | |
99 $self->logger->debug("Using manually configured mapping_session_id $mapping_session_id\n", 1); | |
100 | |
101 } else { | |
102 | |
103 # calculate mapping_session_id from db | |
104 my $sql = qq(SELECT MAX(mapping_session_id) FROM mapping_session); | |
105 $mapping_session_id = $self->fetch_value_from_db($s_dbh, $sql); | |
106 | |
107 unless ($mapping_session_id) { | |
108 $self->logger->debug("No previous mapping_session found.\n", 1); | |
109 } | |
110 | |
111 # increment last mapping_session_id | |
112 $mapping_session_id++; | |
113 | |
114 $self->logger->debug("Using mapping_session_id $mapping_session_id\n", 1); | |
115 } | |
116 | |
117 $self->mapping_session_id($mapping_session_id); | |
118 | |
119 # write old mapping_session table to a file | |
120 my $i; | |
121 my $fh = $self->get_filehandle('mapping_session.txt', 'tables'); | |
122 | |
123 my $sth1 = $s_dbh->prepare("SELECT * FROM mapping_session"); | |
124 $sth1->execute; | |
125 | |
126 while (my @row = $sth1->fetchrow_array) { | |
127 $i++; | |
128 print $fh join("\t", @row); | |
129 print $fh "\n"; | |
130 } | |
131 | |
132 $sth1->finish; | |
133 | |
134 # append the new mapping_session to the file | |
135 my $release_sql = qq( | |
136 SELECT meta_value FROM meta WHERE meta_key = 'schema_version' | |
137 ); | |
138 my $old_release = $self->fetch_value_from_db($s_dbh, $release_sql); | |
139 my $new_release = $self->fetch_value_from_db($t_dbh, $release_sql); | |
140 | |
141 my $assembly_sql = qq( | |
142 SELECT meta_value FROM meta WHERE meta_key = 'assembly.default' | |
143 ); | |
144 my $old_assembly = $self->fetch_value_from_db($s_dbh, $assembly_sql); | |
145 my $new_assembly = $self->fetch_value_from_db($t_dbh, $assembly_sql); | |
146 | |
147 unless ($old_release and $new_release and $old_assembly and $new_assembly) { | |
148 $self->logger->warning("Not all data for new mapping_session found:\n", 1); | |
149 $self->logger->info("old_release: $old_release, new_release: $new_release"); | |
150 $self->logger->info("old_assembly: $old_assembly, new_assembly $new_assembly\n", 2); | |
151 } | |
152 | |
153 print $fh join("\t", | |
154 $mapping_session_id, | |
155 $self->conf->param('sourcedbname'), | |
156 $self->conf->param('targetdbname'), | |
157 $old_release, | |
158 $new_release, | |
159 $old_assembly, | |
160 $new_assembly, | |
161 $self->mapping_session_date_fmt); | |
162 | |
163 print $fh "\n"; | |
164 close($fh); | |
165 | |
166 $self->logger->info("Done writing ".++$i." mapping_session entries.\n\n"); | |
167 } | |
168 | |
169 | |
170 sub map_stable_ids { | |
171 my $self = shift; | |
172 my $mappings = shift; | |
173 my $type = shift; | |
174 | |
175 unless ($mappings and | |
176 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) { | |
177 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of ${type}s."); | |
178 } | |
179 | |
180 # generate a new mapping_session and write all mapping_session data to a file | |
181 $self->generate_mapping_session; | |
182 | |
183 $self->logger->info("== Stable ID mapping for $type...\n\n", 0, 'stamped'); | |
184 | |
185 # check if there are any objects of this type at all | |
186 my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') }; | |
187 my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') }; | |
188 unless (scalar(keys %all_sources)) { | |
189 $self->logger->info("No cached ${type}s found.\n\n"); | |
190 return; | |
191 } | |
192 | |
193 my %stats = map { $_ => 0 } | |
194 qw(mapped_known mapped_novel new lost_known lost_novel); | |
195 | |
196 # create some lookup hashes from the mappings | |
197 my %sources_mapped = (); | |
198 my %targets_mapped = (); | |
199 my %scores_by_target = (); | |
200 | |
201 foreach my $e (@{ $mappings->get_all_Entries }) { | |
202 $sources_mapped{$e->source} = $e->target; | |
203 $targets_mapped{$e->target} = $e->source; | |
204 $scores_by_target{$e->target} = $e->score; | |
205 } | |
206 | |
207 # determine starting stable ID for new assignments | |
208 my $new_stable_id = $self->stable_id_generator->initial_stable_id($type); | |
209 | |
210 # | |
211 # assign mapped and new stable IDs | |
212 # | |
213 foreach my $tid (keys %all_targets) { | |
214 | |
215 my $t_obj = $all_targets{$tid}; | |
216 | |
217 # a mapping exists, assign stable ID accordingly | |
218 if (my $sid = $targets_mapped{$tid}) { | |
219 | |
220 my $s_obj = $all_sources{$sid}; | |
221 | |
222 # set target's stable ID and created_date | |
223 $t_obj->stable_id($s_obj->stable_id); | |
224 $t_obj->created_date($s_obj->created_date); | |
225 | |
226 # calculate and set version | |
227 $t_obj->version($self->stable_id_generator->calculate_version( | |
228 $s_obj, $t_obj)); | |
229 | |
230 # change modified_date if version changed | |
231 if ($s_obj->version == $t_obj->version) { | |
232 $t_obj->modified_date($s_obj->modified_date); | |
233 } else { | |
234 $t_obj->modified_date($self->mapping_session_date); | |
235 } | |
236 | |
237 # create a stable_id_event entry (not for exons) | |
238 unless ( $type eq 'exon' ) { | |
239 # Only add events when something changed. | |
240 if ( !( $s_obj->stable_id eq $t_obj->stable_id && | |
241 $s_obj->version == $t_obj->version && | |
242 $scores_by_target{$tid} > 0.9999 ) ) | |
243 { | |
244 my $key = join( "\t", | |
245 $s_obj->stable_id, $s_obj->version, | |
246 $t_obj->stable_id, $t_obj->version, | |
247 $self->mapping_session_id, $type, | |
248 $scores_by_target{$tid} ); | |
249 $self->add_stable_id_event( 'new', $key ); | |
250 } | |
251 } | |
252 | |
253 # add to debug hash | |
254 push @{ $debug_mappings{$type} }, [ $sid, $tid, $t_obj->stable_id ]; | |
255 | |
256 # stats | |
257 if ($s_obj->is_known) { | |
258 $stats{'mapped_known'}++; | |
259 } else { | |
260 $stats{'mapped_novel'}++; | |
261 } | |
262 | |
263 # no mapping was found, assign a new stable ID | |
264 } else { | |
265 | |
266 $t_obj->stable_id($new_stable_id); | |
267 $t_obj->version(1); | |
268 $t_obj->created_date($self->mapping_session_date); | |
269 $t_obj->modified_date($self->mapping_session_date); | |
270 | |
271 # create a stable_id_event entry (not for exons) | |
272 unless ($type eq 'exon') { | |
273 my $key = join("\t", | |
274 '\N', | |
275 0, | |
276 $t_obj->stable_id, | |
277 $t_obj->version, | |
278 $self->mapping_session_id, | |
279 $type, | |
280 0 | |
281 ); | |
282 $self->add_stable_id_event('new', $key); | |
283 } | |
284 | |
285 # increment the stable Id (to be assigned to the next unmapped object) | |
286 $new_stable_id = $self->stable_id_generator->increment_stable_id( | |
287 $new_stable_id); | |
288 | |
289 # stats | |
290 $stats{'new'}++; | |
291 | |
292 } | |
293 | |
294 } | |
295 | |
296 # | |
297 # deletion events for lost sources | |
298 # | |
299 my $fh; | |
300 if ($type eq 'gene' or $type eq 'transcript') { | |
301 $fh = $self->get_filehandle("${type}s_lost.txt", 'debug'); | |
302 } | |
303 | |
304 foreach my $sid (keys %all_sources) { | |
305 | |
306 my $s_obj = $all_sources{$sid}; | |
307 | |
308 # no mapping exists, add deletion event | |
309 unless ($sources_mapped{$sid}) { | |
310 unless ($type eq 'exon') { | |
311 my $key = join("\t", | |
312 $s_obj->stable_id, | |
313 $s_obj->version, | |
314 '\N', | |
315 0, | |
316 $self->mapping_session_id, | |
317 $type, | |
318 0 | |
319 ); | |
320 $self->add_stable_id_event('new', $key); | |
321 } | |
322 | |
323 # stats | |
324 my $status; | |
325 if ($s_obj->is_known) { | |
326 $stats{'lost_known'}++; | |
327 $status = 'known'; | |
328 } else { | |
329 $stats{'lost_novel'}++; | |
330 $status = 'novel'; | |
331 } | |
332 | |
333 # log lost genes and transcripts (for debug purposes) | |
334 # | |
335 # The Java app did this with a separate method | |
336 # (StableIdMapper.dumpLostGeneAndTranscripts()) which also claims to log | |
337 # losses due to merge. Since at that point this data isn't available yet | |
338 # the logging can be done much more efficient here | |
339 if ($type eq 'gene' or $type eq 'transcript') { | |
340 print $fh $s_obj->stable_id, "\t$status\n"; | |
341 } | |
342 } | |
343 } | |
344 | |
345 close($fh) if (defined($fh)); | |
346 | |
347 # | |
348 # write stable IDs to file | |
349 # | |
350 $self->write_stable_ids_to_file($type, \%all_targets); | |
351 | |
352 # also generate and write stats to file | |
353 $self->generate_mapping_stats($type, \%stats); | |
354 | |
355 $self->logger->info("Done.\n\n"); | |
356 } | |
357 | |
358 | |
359 sub generate_similarity_events { | |
360 my ( $self, $mappings, $scores, $type ) = @_; | |
361 | |
362 # argument checks | |
363 unless ( $mappings and | |
364 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList') ) | |
365 { | |
366 throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.'); | |
367 } | |
368 | |
369 unless ( $scores and | |
370 $scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') ) | |
371 { | |
372 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'); | |
373 } | |
374 | |
375 throw("Need a type (gene|transcript|translation).") unless ($type); | |
376 | |
377 my $mapped; | |
378 | |
379 # | |
380 # add similarities for mapped entries | |
381 # | |
382 foreach my $e ( @{ $mappings->get_all_Entries } ) { | |
383 | |
384 # create lookup hash for mapped sources and targets; we'll need this | |
385 # later | |
386 $mapped->{'source'}->{ $e->source } = 1; | |
387 $mapped->{'target'}->{ $e->target } = 1; | |
388 | |
389 # loop over all other entries which contain either source or target; | |
390 # add similarity if score is within 1.5% of this entry (which is the | |
391 # top scorer) | |
392 my @others = @{ $scores->get_Entries_for_target( $e->target ) }; | |
393 push @others, @{ $scores->get_Entries_for_source( $e->source ) }; | |
394 | |
395 while ( my $e2 = shift(@others) ) { | |
396 | |
397 # skip self | |
398 if ( ( $e->source eq $e2->source ) and | |
399 ( $e->target eq $e2->target ) ) | |
400 { | |
401 next; | |
402 } | |
403 | |
404 if ( $e2->score > ( $e->score*0.985 ) ) { | |
405 | |
406 my $s_obj = | |
407 $self->cache->get_by_key( "${type}s_by_id", 'source', | |
408 $e2->source ); | |
409 my $t_obj = | |
410 $self->cache->get_by_key( "${type}s_by_id", 'target', | |
411 $e2->target ); | |
412 | |
413 my $key = join( "\t", | |
414 $s_obj->stable_id, $s_obj->version, | |
415 $t_obj->stable_id, $t_obj->version, | |
416 $self->mapping_session_id, $type, | |
417 $e2->score ); | |
418 $self->add_stable_id_event( 'similarity', $key ); | |
419 | |
420 } | |
421 | |
422 # [todo] add overlap hack here? (see Java code) | |
423 # probably better solution: let synteny rescoring affect this | |
424 # decision | |
425 } ## end while ( my $e2 = shift(@others...)) | |
426 | |
427 } ## end foreach my $e ( @{ $mappings...}) | |
428 | |
429 # | |
430 # similarities for other entries | |
431 # | |
432 foreach my $dbtype ( keys %$mapped ) { | |
433 | |
434 # note: $dbtype will be either 'source' or 'target' | |
435 my $m1 = "get_all_${dbtype}s"; | |
436 my $m2 = "get_Entries_for_${dbtype}"; | |
437 | |
438 foreach my $id ( @{ $scores->$m1 } ) { | |
439 | |
440 # skip if this is a mapped source/target | |
441 if ( $mapped->{$dbtype}->{$id} ) { next } | |
442 | |
443 my @entries = | |
444 sort { $b->score <=> $a->score } @{ $scores->$m2($id) }; | |
445 | |
446 unless (@entries) { next } | |
447 | |
448 # skip if top score < 0.75 | |
449 my $top_score = $entries[0]->score; | |
450 if ( $top_score < 0.75 ) { next } | |
451 | |
452 # add similarities for all entries within 5% of top scorer | |
453 while ( my $e = shift(@entries) ) { | |
454 | |
455 if ( $e->score > ( $top_score*0.95 ) ) { | |
456 | |
457 my $s_obj = | |
458 $self->cache->get_by_key( "${type}s_by_id", 'source', | |
459 $e->source ); | |
460 my $t_obj = | |
461 $self->cache->get_by_key( "${type}s_by_id", 'target', | |
462 $e->target ); | |
463 | |
464 my $key = join( "\t", | |
465 $s_obj->stable_id, $s_obj->version, | |
466 $t_obj->stable_id, $t_obj->version, | |
467 $self->mapping_session_id, $type, | |
468 $e->score ); | |
469 $self->add_stable_id_event( 'similarity', $key ); | |
470 | |
471 } | |
472 } | |
473 | |
474 } ## end foreach my $id ( @{ $scores...}) | |
475 } ## end foreach my $dbtype ( keys %$mapped) | |
476 | |
477 } ## end sub generate_similarity_events | |
478 | |
479 | |
480 sub filter_same_gene_transcript_similarities { | |
481 my $self = shift; | |
482 my $transcript_scores = shift; | |
483 | |
484 # argument checks | |
485 unless ($transcript_scores and | |
486 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) { | |
487 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of transcripts.'); | |
488 } | |
489 | |
490 # create a new matrix for the filtered entries | |
491 my $filtered_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new( | |
492 -DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'), | |
493 -CACHE_FILE => 'filtered_transcript_scores.ser', | |
494 ); | |
495 | |
496 # lookup hash for all target transcripts | |
497 my %all_targets = map { $_->stable_id => 1 } | |
498 values %{ $self->cache->get_by_name("transcripts_by_id", 'target') }; | |
499 | |
500 my $i = 0; | |
501 | |
502 foreach my $e (@{ $transcript_scores->get_all_Entries }) { | |
503 | |
504 my $s_tr = $self->cache->get_by_key('transcripts_by_id', 'source', | |
505 $e->source); | |
506 my $s_gene = $self->cache->get_by_key('genes_by_transcript_id', 'source', | |
507 $e->source); | |
508 my $t_gene = $self->cache->get_by_key('genes_by_transcript_id', 'target', | |
509 $e->target); | |
510 # workaround for caching issue: only gene objects in 'genes_by_id' cache | |
511 # have a stable ID assigned | |
512 #$t_gene = $self->cache->get_by_key('genes_by_id', 'target', $t_gene->id); | |
513 | |
514 #$self->logger->debug("xxx ".join(":", $s_tr->stable_id, $s_gene->stable_id, | |
515 # $t_gene->stable_id)."\n"); | |
516 | |
517 # skip if source and target transcript are in same gene, BUT keep events for | |
518 # deleted transcripts | |
519 if (($s_gene->stable_id eq $t_gene->stable_id) and | |
520 $all_targets{$s_tr->stable_id}) { | |
521 $i++; | |
522 next; | |
523 } | |
524 | |
525 $filtered_scores->add_Entry($e); | |
526 } | |
527 | |
528 $self->logger->debug("Skipped $i same gene transcript mappings.\n"); | |
529 | |
530 return $filtered_scores; | |
531 } | |
532 | |
533 | |
534 sub generate_translation_similarity_events { | |
535 my $self = shift; | |
536 my $mappings = shift; | |
537 my $transcript_scores = shift; | |
538 | |
539 # argument checks | |
540 unless ($mappings and | |
541 $mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) { | |
542 throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.'); | |
543 } | |
544 | |
545 unless ($transcript_scores and | |
546 $transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) { | |
547 throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'); | |
548 } | |
549 | |
550 # create a fake translation scoring matrix | |
551 my $translation_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new( | |
552 -DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'), | |
553 -CACHE_FILE => 'translation_scores.ser', | |
554 ); | |
555 | |
556 foreach my $e (@{ $transcript_scores->get_all_Entries }) { | |
557 | |
558 my $s_tl = $self->cache->get_by_key('transcripts_by_id', 'source', | |
559 $e->source)->translation; | |
560 my $t_tl = $self->cache->get_by_key('transcripts_by_id', 'target', | |
561 $e->target)->translation; | |
562 | |
563 # add an entry to the translation scoring matrix using the score of the | |
564 # corresponding transcripts | |
565 if ($s_tl and $t_tl) { | |
566 $translation_scores->add_score($s_tl->id, $t_tl->id, $e->score); | |
567 } | |
568 } | |
569 | |
570 # now generate similarity events using this fake scoring matrix | |
571 $self->generate_similarity_events($mappings, $translation_scores, | |
572 'translation'); | |
573 } | |
574 | |
575 | |
576 sub write_stable_ids_to_file { | |
577 my $self = shift; | |
578 my $type = shift; | |
579 my $all_targets = shift; | |
580 | |
581 $self->logger->info("Writing ${type} stable IDs to file...\n"); | |
582 | |
583 my $fh = $self->get_filehandle("${type}_stable_id.txt", 'tables'); | |
584 | |
585 my @sorted_targets = map { $all_targets->{$_} } sort { $a <=> $b } | |
586 keys %$all_targets; | |
587 | |
588 foreach my $obj (@sorted_targets) { | |
589 | |
590 # check for missing created and modified dates | |
591 my $created_date = $obj->created_date; | |
592 unless ($created_date) { | |
593 #$self->logger->debug("Missing created_date for target ". | |
594 # $obj->to_string."\n", 1); | |
595 $created_date = $self->mapping_session_date; | |
596 } | |
597 | |
598 my $modified_date = $obj->modified_date; | |
599 unless ($modified_date) { | |
600 #$self->logger->debug("Missing modified_date for target ". | |
601 # $obj->to_string."\n", 1); | |
602 $modified_date = $self->mapping_session_date; | |
603 } | |
604 | |
605 my $row = join("\t", | |
606 $obj->id, | |
607 $obj->stable_id, | |
608 $obj->version, | |
609 strftime("%Y-%m-%d %T", localtime($created_date)), | |
610 strftime("%Y-%m-%d %T", localtime($modified_date)), | |
611 ); | |
612 | |
613 print $fh "$row\n"; | |
614 } | |
615 | |
616 close($fh); | |
617 | |
618 $self->logger->info("Done writing ".scalar(@sorted_targets)." entries.\n\n"); | |
619 } | |
620 | |
621 | |
622 sub generate_mapping_stats { | |
623 my $self = shift; | |
624 my $type = shift; | |
625 my $stats = shift; | |
626 | |
627 my $result = ucfirst($type)." mapping results:\n\n"; | |
628 | |
629 my $fmt1 = "%-10s%-10s%-10s%-10s\n"; | |
630 my $fmt2 = "%-10s%6.0f %6.0f %4.2f%%\n"; | |
631 | |
632 $result .= sprintf($fmt1, qw(TYPE MAPPED LOST PERCENTAGE)); | |
633 $result .= ('-'x40)."\n"; | |
634 | |
635 my $mapped_total = $stats->{'mapped_known'} + $stats->{'mapped_novel'}; | |
636 my $lost_total = $stats->{'lost_known'} + $stats->{'lost_novel'}; | |
637 my $known_total = $stats->{'mapped_known'} + $stats->{'lost_known'}; | |
638 my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'}; | |
639 | |
640 # no split into known and novel for exons | |
641 unless ( $type eq 'exon' ) { | |
642 $result .= sprintf( $fmt2, | |
643 'known', | |
644 $stats->{'mapped_known'}, | |
645 $stats->{'lost_known'}, | |
646 ($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0) | |
647 ); | |
648 | |
649 $result .= sprintf( $fmt2, | |
650 'novel', | |
651 $stats->{'mapped_novel'}, | |
652 $stats->{'lost_novel'}, | |
653 ($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0) | |
654 ); | |
655 } ## end unless ( $type eq 'exon' ) | |
656 | |
657 $result .= sprintf($fmt2, 'total', $mapped_total, $lost_total, | |
658 $mapped_total/($known_total + $novel_total)*100); | |
659 | |
660 # log result | |
661 $self->logger->info($result."\n"); | |
662 | |
663 # write result to file | |
664 my $fh = $self->get_filehandle("${type}_mapping_stats.txt", 'stats'); | |
665 print $fh $result; | |
666 close($fh); | |
667 } | |
668 | |
669 | |
670 sub dump_debug_mappings { | |
671 my $self = shift; | |
672 | |
673 foreach my $type (keys %debug_mappings) { | |
674 | |
675 $self->logger->debug("Writing $type mappings to debug/${type}_mappings.txt...\n"); | |
676 | |
677 my $fh = $self->get_filehandle("${type}_mappings.txt", 'debug'); | |
678 | |
679 foreach my $row (@{ $debug_mappings{$type} }) { | |
680 print $fh join("\t", @$row); | |
681 print $fh "\n"; | |
682 } | |
683 | |
684 close($fh); | |
685 | |
686 $self->logger->debug("Done.\n"); | |
687 } | |
688 } | |
689 | |
690 | |
691 sub write_stable_id_events { | |
692 my $self = shift; | |
693 my $event_type = shift; | |
694 | |
695 throw("Need an event type (new|similarity).") unless ($event_type); | |
696 | |
697 $self->logger->debug("Writing $event_type stable_id_events to file...\n"); | |
698 | |
699 my $fh = $self->get_filehandle("stable_id_event_${event_type}.txt", 'tables'); | |
700 my $i = 0; | |
701 | |
702 foreach my $event (@{ $self->get_all_stable_id_events($event_type) }) { | |
703 print $fh "$event\n"; | |
704 $i++; | |
705 } | |
706 | |
707 close($fh); | |
708 | |
709 $self->logger->debug("Done writing $i entries.\n"); | |
710 } | |
711 | |
712 | |
713 sub add_stable_id_event { | |
714 my ($self, $type, $event) = @_; | |
715 | |
716 # argument check | |
717 throw("Need an event type (new|similarity).") unless ($type); | |
718 | |
719 $self->{'stable_id_events'}->{$type}->{$event} = 1; | |
720 } | |
721 | |
722 | |
723 sub get_all_stable_id_events { | |
724 my ($self, $type) = @_; | |
725 | |
726 # argument check | |
727 throw("Need an event type (new|similarity).") unless ($type); | |
728 | |
729 return [ keys %{ $self->{'stable_id_events'}->{$type} } ]; | |
730 } | |
731 | |
732 | |
733 sub mapping_session_id { | |
734 my $self = shift; | |
735 $self->{'_mapping_session_id'} = shift if (@_); | |
736 return $self->{'_mapping_session_id'}; | |
737 } | |
738 | |
739 | |
740 sub mapping_session_date { | |
741 my $self = shift; | |
742 $self->{'_mapping_session_date'} = shift if (@_); | |
743 return $self->{'_mapping_session_date'}; | |
744 } | |
745 | |
746 | |
747 sub mapping_session_date_fmt { | |
748 my $self = shift; | |
749 $self->{'_mapping_session_date_fmt'} = shift if (@_); | |
750 return $self->{'_mapping_session_date_fmt'}; | |
751 } | |
752 | |
753 | |
754 sub stable_id_generator { | |
755 my $self = shift; | |
756 $self->{'_stable_id_generator'} = shift if (@_); | |
757 return $self->{'_stable_id_generator'}; | |
758 } | |
759 | |
760 | |
761 1; | |
762 |