0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::IdMapping::ResultAnalyser - analyse stable Id mapping results
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 # get a result analyser
|
|
28 my $analyser = Bio::EnsEMBL::IdMapping::ResultAnalyser->new(
|
|
29 -LOGGER => $logger,
|
|
30 -CONF => $conf,
|
|
31 -CACHE => $cache
|
|
32 );
|
|
33
|
|
34 # analyse results
|
|
35 $analyser->analyse( $gene_mappings,
|
|
36 $stable_id_mapper->get_all_stable_id_events('similarity') );
|
|
37
|
|
38 # write results to file
|
|
39 $analyser->write_results_to_file;
|
|
40
|
|
41 # create click lists
|
|
42 $analyser->create_clicklist;
|
|
43
|
|
44 # mapping_summary
|
|
45 $analyser->create_mapping_summary;
|
|
46
|
|
47 =head1 DESCRIPTION
|
|
48
|
|
49 This is a utility module which analyses the stable Id mapping results
|
|
50 by providing various sorts of mapping statistics. It also creates
|
|
51 clicklists and a mapping summary.
|
|
52
|
|
53 =head1 METHODS
|
|
54
|
|
55 analyse
|
|
56 analyse_db
|
|
57 classify_source_genes_by_type
|
|
58 classify_genes_by_mapping_simple
|
|
59 classify_genes_by_mapping
|
|
60 add
|
|
61 get
|
|
62 get_all_by_subclass
|
|
63 get_all_by_class
|
|
64 get_count_by_subclass
|
|
65 get_count_by_class
|
|
66 get_all_classes
|
|
67 class_key
|
|
68 write_results_to_file
|
|
69 create_clicklist
|
|
70 create_mapping_summary
|
|
71 read_from_file
|
|
72
|
|
73 =cut
|
|
74
|
|
75
|
|
76 package Bio::EnsEMBL::IdMapping::ResultAnalyser;
|
|
77
|
|
78 use strict;
|
|
79 use warnings;
|
|
80 no warnings 'uninitialized';
|
|
81
|
|
82 use Bio::EnsEMBL::IdMapping::BaseObject;
|
|
83 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
|
|
84
|
|
85 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
|
|
86 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
|
|
87
|
|
88
|
|
89 =head2 analyse
|
|
90
|
|
91 Arg[1] : Bio::EnsEMBL::IdMapping::MappingList $gene_mappings - the gene
|
|
92 mappings to analyse
|
|
93 Arg[2] : Arrayref of Strings - similarity events
|
|
94 Example : $analyser->analyse($gene_mappings,
|
|
95 $stable_id_mapper->get_all_stable_id_events('similarity'));
|
|
96 Description : Analyses the results of a stable Id mapping run.
|
|
97 Return type : none
|
|
98 Exceptions : thrown on wrong or missing arguments
|
|
99 Caller : general
|
|
100 Status : At Risk
|
|
101 : under development
|
|
102
|
|
103 =cut
|
|
104
|
|
105 sub analyse {
|
|
106 my $self = shift;
|
|
107 my $gene_mappings = shift;
|
|
108 my $similarity_events = shift;
|
|
109
|
|
110 # argument check
|
|
111 unless ($gene_mappings and
|
|
112 $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
|
|
113 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
|
|
114 }
|
|
115
|
|
116 unless ($similarity_events and ref($similarity_events) eq 'ARRAY') {
|
|
117 throw("Need a list of similarity events.");
|
|
118 }
|
|
119
|
|
120 # classify source genes by type (status-logic_name-biotype)
|
|
121 $self->classify_source_genes_by_type;
|
|
122
|
|
123 # classify source genes by mapping status
|
|
124 $self->classify_genes_by_mapping($gene_mappings, $similarity_events);
|
|
125 }
|
|
126
|
|
127
|
|
128 =head2 classify_source_genes_by_type
|
|
129
|
|
130 Example : $analyser->classify_source_genes_by_type;
|
|
131 Description : Classifies source genes by type and adds them to the internal
|
|
132 datastructure. For the format of the classification string see
|
|
133 class_key().
|
|
134 Return type : none
|
|
135 Exceptions : none
|
|
136 Caller : internal
|
|
137 Status : At Risk
|
|
138 : under development
|
|
139
|
|
140 =cut
|
|
141
|
|
142 sub classify_source_genes_by_type {
|
|
143 my $self = shift;
|
|
144
|
|
145 foreach my $s_gene (values %{ $self->cache->get_by_name('genes_by_id', 'source') }) {
|
|
146 $self->add('source', $self->class_key($s_gene), 'all', $s_gene->stable_id);
|
|
147 }
|
|
148 }
|
|
149
|
|
150
|
|
151 =head2 classify_genes_by_mapping_simple
|
|
152
|
|
153 Arg[1] : Bio::EnsEMBL::IdMapping::MapppingList $gene_mappings - gene
|
|
154 mappings to classify
|
|
155 Example : $analyser->classify_genes_by_mapping_simple;
|
|
156 Description : Classifies target genes by mapping ('mapped' or 'unmapped').
|
|
157 Return type : none
|
|
158 Exceptions : thrown on wrong or missing argument
|
|
159 Caller : This method is not in use at the momen.
|
|
160 Status : At Risk
|
|
161 : under development
|
|
162
|
|
163 =cut
|
|
164
|
|
165 sub classify_genes_by_mapping_simple {
|
|
166 my $self = shift;
|
|
167 my $gene_mappings = shift;
|
|
168
|
|
169 # argument check
|
|
170 unless ($gene_mappings and
|
|
171 $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
|
|
172 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
|
|
173 }
|
|
174
|
|
175 my %result = ();
|
|
176
|
|
177 # firrst, create a lookup hash of source genes by target internal ID
|
|
178 my %source_genes_by_target = ();
|
|
179 foreach my $e (@{ $gene_mappings->get_all_Entries }) {
|
|
180 my $s_gene = $self->cache->get_by_key('genes_by_id', 'source', $e->source);
|
|
181 my $t_gene = $self->cache->get_by_key('genes_by_id', 'target', $e->target);
|
|
182 $source_genes_by_target{$t_gene->id} = $s_gene;
|
|
183 }
|
|
184
|
|
185 # now loop over target genes
|
|
186 foreach my $t_gene (values %{ $self->cache->get_by_name('genes_by_id', 'target') }) {
|
|
187
|
|
188 # check if target gene has all required properties set
|
|
189 unless ($t_gene->status and $t_gene->logic_name and $t_gene->biotype) {
|
|
190 $self->logger->warning("Missing data for target gene: ".
|
|
191 $t_gene->to_string."\n", 1);
|
|
192 }
|
|
193
|
|
194 my $class = $self->class_key($t_gene);
|
|
195
|
|
196 # classify as '1' if mapped (using source gene's stable ID), otherwise '0'
|
|
197 if (my $s_gene = $source_genes_by_target{$t_gene->id}) {
|
|
198 $self->add('target', $class, 'mapped', $s_gene->stable_id);
|
|
199 } else {
|
|
200 $self->add('target', $class, 'unmapped', $t_gene->stable_id);
|
|
201 }
|
|
202
|
|
203 }
|
|
204 }
|
|
205
|
|
206
|
|
207 =head2 classify_genes_by_mapping
|
|
208
|
|
209 Arg[1] : Bio::EnsEMBL::IdMapping::MapppingList $gene_mappings - gene
|
|
210 mappings to classify
|
|
211 Arg[2] : Arrayref of Strings - similarity events
|
|
212 Example : $analyser->classify_genes_by_mapping;
|
|
213 Description : Classifies genes by mapping. Status is
|
|
214 'mapped' => stable Id was mapped
|
|
215 'lost_similar' => stable Id not mapped, but there is a
|
|
216 similarity entry for the source Id
|
|
217 'lost_definite' => not mapped and no similarity
|
|
218 Return type : none
|
|
219 Exceptions : thrown on wrong or missing argument
|
|
220 Caller : This method is not in use at the momen.
|
|
221 Status : At Risk
|
|
222 : under development
|
|
223
|
|
224 =cut
|
|
225
|
|
226 sub classify_genes_by_mapping {
|
|
227 my $self = shift;
|
|
228 my $gene_mappings = shift;
|
|
229 my $similarity_events = shift;
|
|
230
|
|
231 # argument check
|
|
232 unless ($gene_mappings and
|
|
233 $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
|
|
234 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
|
|
235 }
|
|
236
|
|
237 unless ($similarity_events and ref($similarity_events) eq 'ARRAY') {
|
|
238 throw("Need a list of similarity events.");
|
|
239 }
|
|
240
|
|
241 # mapped genes
|
|
242 foreach my $e (@{ $gene_mappings->get_all_Entries }) {
|
|
243 my $s_gene = $self->cache->get_by_key('genes_by_id', 'source', $e->source);
|
|
244 $self->add('source', $self->class_key($s_gene), 'mapped',
|
|
245 $s_gene->stable_id);
|
|
246 }
|
|
247
|
|
248 # lookup hash for similarities
|
|
249 my %similar = ();
|
|
250 foreach my $event (@{ $similarity_events }) {
|
|
251 my ($stable_id) = split("\t", $event);
|
|
252 $similar{$stable_id} = 1;
|
|
253 }
|
|
254
|
|
255 # deleted genes
|
|
256 foreach my $s_gene (values %{ $self->cache->get_by_name('genes_by_id', 'source') }) {
|
|
257
|
|
258 my $stable_id = $s_gene->stable_id;
|
|
259 my $class = $self->class_key($s_gene);
|
|
260
|
|
261 unless ($self->get('source', $class, 'mapped', $stable_id)) {
|
|
262
|
|
263 # sub-classify as 'lost_similar' or 'lost_definite'
|
|
264 if ($similar{$stable_id}) {
|
|
265 $self->add('source', $class, 'lost_similar', $stable_id);
|
|
266 } else {
|
|
267 $self->add('source', $class, 'lost_definite', $stable_id);
|
|
268 }
|
|
269
|
|
270 }
|
|
271 }
|
|
272
|
|
273 }
|
|
274
|
|
275
|
|
276 =head2 add
|
|
277
|
|
278 Arg[1] : String $dbtype - db type ('source' or 'target')
|
|
279 Arg[2] : String $class - key identifying a gene type (see class_key())
|
|
280 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
|
|
281 Arg[4] : String $stable_id - gene stable Id
|
|
282 Arg[5] : String $val - value (usually 0 or 1)
|
|
283 Example : $analyser->add('source', 'KNOWN-ensembl-protein_coding',
|
|
284 'mapped', 'ENSG00002342', 1);
|
|
285 Description : Add a stable Id / property pair to a name/dbtype lookup hash.
|
|
286
|
|
287 The datastructure is a bit of a bloat, but is general enough to
|
|
288 be used as a lookup hash and to generate statistics (counts by
|
|
289 type) and debug lists (dump by type).
|
|
290 Return type : String - the added value
|
|
291 Exceptions : none
|
|
292 Caller : internal
|
|
293 Status : At Risk
|
|
294 : under development
|
|
295
|
|
296 =cut
|
|
297
|
|
298 sub add {
|
|
299 my ($self, $dbtype, $class, $subclass, $stable_id, $val) = @_;
|
|
300
|
|
301 # private method, so no argument check done for performance reasons
|
|
302
|
|
303 # default to a value of '1'
|
|
304 $val = 1 unless (defined($val));
|
|
305
|
|
306 $self->{$dbtype}->{$class}->{$subclass}->{$stable_id} = $val;
|
|
307 }
|
|
308
|
|
309
|
|
310 =head2 get
|
|
311
|
|
312 Arg[1] : String $dbtype - db type ('source' or 'target')
|
|
313 Arg[2] : String $class - key identifying a gene type (see class_key())
|
|
314 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
|
|
315 Arg[4] : String $stable_id - gene stable Id
|
|
316 Example : my $mapping_status = $analyser->get('source',
|
|
317 'KNOWN-ensembl-protein_coding', 'mapped', 'ENSG00002342');
|
|
318 Description : Gets a stable Id mapping status from the internal datastructure.
|
|
319 Return type : String
|
|
320 Exceptions : none
|
|
321 Caller : internal
|
|
322 Status : At Risk
|
|
323 : under development
|
|
324
|
|
325 =cut
|
|
326
|
|
327 sub get {
|
|
328 my ($self, $dbtype, $class, $subclass, $stable_id) = @_;
|
|
329
|
|
330 # private method, so no argument check done for performance reasons
|
|
331
|
|
332 return $self->{$dbtype}->{$class}->{$subclass}->{$stable_id};
|
|
333 }
|
|
334
|
|
335
|
|
336 =head2 get_all_by_subclass
|
|
337
|
|
338 Arg[1] : String $dbtype - db type ('source' or 'target')
|
|
339 Arg[2] : String $class - key identifying a gene type (see class_key())
|
|
340 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
|
|
341 Example : my @mapped_stable_ids = @{
|
|
342 $analyser->get_all_by_subclass(
|
|
343 'source', 'KNOWN-ensembl-protein_coding',
|
|
344 'mapped'
|
|
345 ) };
|
|
346 Description : Gets a list of stable Id for a given subclass.
|
|
347 Return type : Arrayref of String (stable Ids)
|
|
348 Exceptions : thrown on missing arguments
|
|
349 Caller : internal
|
|
350 Status : At Risk
|
|
351 : under development
|
|
352
|
|
353 =cut
|
|
354
|
|
355 sub get_all_by_subclass {
|
|
356 my ($self, $dbtype, $class, $subclass) = @_;
|
|
357
|
|
358 # argument check
|
|
359 throw("Need a dbtype (source|target).") unless ($dbtype);
|
|
360 throw("Need a class.") unless ($class);
|
|
361 throw("Need a subclass.") unless ($subclass);
|
|
362
|
|
363 return [ keys %{ $self->{$dbtype}->{$class}->{$subclass} || {} } ];
|
|
364 }
|
|
365
|
|
366
|
|
367 =head2 get_all_by_class
|
|
368
|
|
369 Arg[1] : String $dbtype - db type ('source' or 'target')
|
|
370 Arg[2] : String $class - key identifying a gene type (see class_key())
|
|
371 Example : my @stable_ids = @{
|
|
372 $analyser->get_all_by_class( 'source',
|
|
373 'KNOWN-ensembl-protein_coding' ) };
|
|
374 Description : Gets a list of stable Id for a given class.
|
|
375 Return type : Arrayref of String (stable Ids)
|
|
376 Exceptions : thrown on missing arguments
|
|
377 Caller : internal
|
|
378 Status : At Risk
|
|
379 : under development
|
|
380
|
|
381 =cut
|
|
382
|
|
383 sub get_all_by_class {
|
|
384 my ($self, $dbtype, $class) = @_;
|
|
385
|
|
386 # argument check
|
|
387 throw("Need a dbtype (source|target).") unless ($dbtype);
|
|
388 throw("Need a class.") unless ($class);
|
|
389
|
|
390 my %merged = ();
|
|
391
|
|
392 foreach my $subclass (keys %{ $self->{$dbtype}->{$class} || {} }) {
|
|
393 while (my ($key, $val) = each(%{ $self->{$dbtype}->{$class}->{$subclass} })) {
|
|
394 $merged{$key} = $val;
|
|
395 }
|
|
396 }
|
|
397
|
|
398 return [ keys %merged ];
|
|
399 }
|
|
400
|
|
401
|
|
402 =head2 get_count_by_subclass
|
|
403
|
|
404 Arg[1] : String $dbtype - db type ('source' or 'target')
|
|
405 Arg[2] : String $class - key identifying a gene type (see class_key())
|
|
406 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
|
|
407 Example : my $num_mapped = $analyser->get_count_by_subclass('source',
|
|
408 'KNOWN-ensembl-protein_coding', 'mapped');
|
|
409 Description : Gets the number of stable Ids for a given subclass.
|
|
410 Return type : Int
|
|
411 Exceptions : thrown on missing arguments
|
|
412 Caller : internal
|
|
413 Status : At Risk
|
|
414 : under development
|
|
415
|
|
416 =cut
|
|
417
|
|
418 sub get_count_by_subclass {
|
|
419 my ($self, $dbtype, $class, $subclass) = @_;
|
|
420
|
|
421 # argument check
|
|
422 throw("Need a dbtype (source|target).") unless ($dbtype);
|
|
423 throw("Need a class.") unless ($class);
|
|
424 throw("Need a subclass.") unless ($subclass);
|
|
425
|
|
426 return scalar(keys %{ $self->{$dbtype}->{$class}->{$subclass} || {} });
|
|
427 }
|
|
428
|
|
429
|
|
430 =head2 get_count_by_class
|
|
431
|
|
432 Arg[1] : String $dbtype - db type ('source' or 'target')
|
|
433 Arg[2] : String $class - key identifying a gene type (see class_key())
|
|
434 Example : my $num_mapped = $analyser->get_count_by_class('source',
|
|
435 'KNOWN-ensembl-protein_coding');
|
|
436 Description : Gets the number of stable Ids for a given class.
|
|
437 Return type : Int
|
|
438 Exceptions : thrown on missing arguments
|
|
439 Caller : internal
|
|
440 Status : At Risk
|
|
441 : under development
|
|
442
|
|
443 =cut
|
|
444
|
|
445 sub get_count_by_class {
|
|
446 my ($self, $dbtype, $class) = @_;
|
|
447
|
|
448 # argument check
|
|
449 throw("Need a dbtype (source|target).") unless ($dbtype);
|
|
450 throw("Need a class.") unless ($class);
|
|
451
|
|
452 return scalar(@{ $self->get_all_by_class($dbtype, $class) });
|
|
453 }
|
|
454
|
|
455
|
|
456 =head2 get_all_classes
|
|
457
|
|
458 Arg[1] : String $dbtype - db type ('source' or 'target')
|
|
459 Example : foreach my $class (@{ $analyser->get_all_classes('source') }) {
|
|
460 print "$class\n";
|
|
461 }
|
|
462 Description : Gets a list of classes in the ResultAnalyser.
|
|
463 Return type : Arrayref of String
|
|
464 Exceptions : thrown on missing argument
|
|
465 Caller : internal
|
|
466 Status : At Risk
|
|
467 : under development
|
|
468
|
|
469 =cut
|
|
470
|
|
471 sub get_all_classes {
|
|
472 my ($self, $dbtype) = @_;
|
|
473
|
|
474 # argument check
|
|
475 throw("Need a dbtype (source|target).") unless ($dbtype);
|
|
476
|
|
477 return [ sort keys %{ $self->{$dbtype} || {} } ];
|
|
478 }
|
|
479
|
|
480
|
|
481 =head2 class_key
|
|
482
|
|
483 Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $gene - a gene object
|
|
484 Example : my $class = $analyser->class_key($gene);
|
|
485 Description : Generates a key identifying a gene class. This identifier is
|
|
486 composed from the gene's status, logic naame, and biotye.
|
|
487 Return type : String
|
|
488 Exceptions : none
|
|
489 Caller : internal
|
|
490 Status : At Risk
|
|
491 : under development
|
|
492
|
|
493 =cut
|
|
494
|
|
495 sub class_key {
|
|
496 my ($self, $gene) = @_;
|
|
497 return join('-', map { $gene->$_ } qw(status logic_name biotype));
|
|
498 }
|
|
499
|
|
500
|
|
501 =head2 write_results_to_file
|
|
502
|
|
503 Example : $analyser->write_results_to_file;
|
|
504 Description : Writes the results of the result analysis to a file. This is a
|
|
505 human-readable text detailing the mapping statistics.
|
|
506 Return type : none
|
|
507 Exceptions : none
|
|
508 Caller : general
|
|
509 Status : At Risk
|
|
510 : under development
|
|
511
|
|
512 =cut
|
|
513
|
|
514 sub write_results_to_file {
|
|
515 my $self = shift;
|
|
516
|
|
517 my $fh = $self->get_filehandle('gene_detailed_mapping_stats.txt', 'stats');
|
|
518
|
|
519 my $fmt1 = "%-60s%-16s%-16s%-16s\n";
|
|
520 my $fmt2 = "%-60s%5.0f (%7s) %5.0f (%7s) %5.0f (%7s)\n";
|
|
521 my $fmt3 = "%3.2f%%";
|
|
522
|
|
523 print $fh "Gene detailed mapping results:\n\n";
|
|
524
|
|
525 print $fh sprintf($fmt1, "Gene type", "mapped", "lost (similar)",
|
|
526 "lost (definite)");
|
|
527
|
|
528 print $fh ('-'x108), "\n";
|
|
529
|
|
530 foreach my $class (@{ $self->get_all_classes('source') }) {
|
|
531 next if ($class eq 'all');
|
|
532
|
|
533 my $total = $self->get_count_by_class('source', $class);
|
|
534
|
|
535 # avoid division by zero error
|
|
536 unless ($total) {
|
|
537 $self->logger->warning("No count found for $class.\n", 1);
|
|
538 next;
|
|
539 }
|
|
540
|
|
541 my $mapped = $self->get_count_by_subclass('source', $class, 'mapped');
|
|
542 my $similar = $self->get_count_by_subclass('source', $class,
|
|
543 'lost_similar');
|
|
544 my $lost = $self->get_count_by_subclass('source', $class, 'lost_definite');
|
|
545
|
|
546 print $fh sprintf($fmt2,
|
|
547 $class,
|
|
548 $mapped, sprintf($fmt3, $mapped/$total*100),
|
|
549 $similar, sprintf($fmt3, $similar/$total*100),
|
|
550 $lost, sprintf($fmt3, $lost/$total*100));
|
|
551 }
|
|
552
|
|
553 close($fh);
|
|
554 }
|
|
555
|
|
556
|
|
557 =head2 create_clicklist
|
|
558
|
|
559 Example : $analyser->create_clicklist;
|
|
560 Description : Writes an html file which contains a list of all lost genes,
|
|
561 with hyperlinks to the appropriate archive website. This is to
|
|
562 manually check lost genes.
|
|
563 Return type : none
|
|
564 Exceptions : none
|
|
565 Caller : general
|
|
566 Status : At Risk
|
|
567 : under development
|
|
568
|
|
569 =cut
|
|
570
|
|
571 sub create_clicklist {
|
|
572 my $self = shift;
|
|
573
|
|
574 my $fh = $self->get_filehandle('genes_lost.html', 'stats');
|
|
575
|
|
576 # start html output
|
|
577 print $fh qq(<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n);
|
|
578 print $fh qq(<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-gb" lang="en-gb">);
|
|
579 print $fh "<head>\n";
|
|
580 print $fh "<title>Lost genes ";
|
|
581 print $fh $self->conf->param('sourcedbname'), ' -> ',
|
|
582 $self->conf->param('targetdbname');
|
|
583 print $fh "</title>\n";
|
|
584 print $fh "</head>\n<body>\n";
|
|
585
|
|
586 my $prefix = $self->conf->param('urlprefix');
|
|
587 unless ($prefix) {
|
|
588 $self->logger->warning("No urlprefix set, clicklists might not be useable.\n", 1);
|
|
589 }
|
|
590
|
|
591 my $navigation;
|
|
592 my $clicklist;
|
|
593
|
|
594 foreach my $class (@{ $self->get_all_classes('source') }) {
|
|
595 next if ($class eq 'all');
|
|
596
|
|
597 $navigation .= "$class ";
|
|
598 $clicklist .= "<h1>$class</h1>\n";
|
|
599
|
|
600 foreach my $subclass (qw(lost_similar lost_definite)) {
|
|
601
|
|
602 # navigation
|
|
603 $navigation .= qq(<a href="#${class}-$subclass">$subclass</a> );
|
|
604
|
|
605 # clicklist
|
|
606 $clicklist .= "<h2>$subclass</h2>\n";
|
|
607
|
|
608 foreach my $stable_id (@{ $self->get_all_by_subclass('source', $class, $subclass) }) {
|
|
609 $clicklist .= qq(<a href="${prefix}$stable_id">$stable_id</a><br />\n);
|
|
610 }
|
|
611
|
|
612 }
|
|
613
|
|
614 $navigation .= "<br />\n";
|
|
615 }
|
|
616
|
|
617 # print navigation and clicklist
|
|
618 print $fh "$navigation\n\n";
|
|
619 print $fh "$clicklist\n\n";
|
|
620
|
|
621 # html footer
|
|
622 print $fh "</body></html>\n";
|
|
623
|
|
624 close($fh);
|
|
625 }
|
|
626
|
|
627
|
|
628 =head2 create_mapping_summary
|
|
629
|
|
630 Example : $analyser->create_mapping_summary();
|
|
631 Description : Writes a text file containing a summary of the mapping stats.
|
|
632 This will be emailed to the genebuilder for evaluation (you will
|
|
633 have to manually send the email, using the text in
|
|
634 "mapping_summary.txt" as the template).
|
|
635 Return type : none
|
|
636 Exceptions : none
|
|
637 Caller : general
|
|
638 Status : At Risk
|
|
639 : under development
|
|
640
|
|
641 =cut
|
|
642
|
|
643 sub create_mapping_summary {
|
|
644 my $self = shift;
|
|
645
|
|
646 my $fh = $self->get_filehandle('mapping_summary.txt');
|
|
647
|
|
648 #
|
|
649 # title
|
|
650 #
|
|
651 print $fh qq(Stable ID mapping results\n);
|
|
652 print $fh qq(=========================\n\n);
|
|
653
|
|
654 #
|
|
655 # timing
|
|
656 #
|
|
657 print $fh "Run at: ".localtime()."\n";
|
|
658 print $fh "Runtime: ";
|
|
659 print $fh $self->logger->runtime, "\n\n";
|
|
660
|
|
661 #
|
|
662 # parameters used for this run
|
|
663 #
|
|
664 print $fh $self->conf->list_param_values;
|
|
665 print $fh "\n";
|
|
666
|
|
667 #
|
|
668 # mapping stats
|
|
669 #
|
|
670 foreach my $type (qw(exon transcript translation gene gene_detailed)) {
|
|
671 my $filename = "${type}_mapping_stats.txt";
|
|
672
|
|
673 if ($self->file_exists($filename, 'stats')) {
|
|
674 print $fh $self->read_from_file($filename, 'stats');
|
|
675 print $fh "\n\n";
|
|
676 } else {
|
|
677 print $fh "No mapping stats found for $type.\n\n";
|
|
678 }
|
|
679 }
|
|
680
|
|
681 #
|
|
682 # db uploads
|
|
683 #
|
|
684 my @uploads = (
|
|
685 ['stable_ids' => 'Stable IDs'],
|
|
686 ['events' => 'Stable ID events and mapping session'],
|
|
687 ['archive' => 'Gene and peptide archive'],
|
|
688 );
|
|
689
|
|
690 my $fmt1 = "%-40s%-20s\n";
|
|
691
|
|
692 print $fh qq(Data uploaded to db:\n);
|
|
693 print $fh qq(====================\n\n);
|
|
694
|
|
695 if ($self->conf->param('dry_run')) {
|
|
696
|
|
697 print $fh "None (dry run).\n";
|
|
698
|
|
699 } else {
|
|
700
|
|
701 foreach my $u (@uploads) {
|
|
702 my $uploaded = 'no';
|
|
703 $uploaded = 'yes' if ($self->conf->is_true("upload_".$u->[0]));
|
|
704 print $fh sprintf($fmt1, $u->[1], $uploaded);
|
|
705 }
|
|
706
|
|
707 }
|
|
708
|
|
709 print $fh "\n";
|
|
710
|
|
711 #
|
|
712 # stats and clicklist
|
|
713 #
|
|
714 my @output = (
|
|
715 ['stats' => 'statistics (including clicklists of deleted IDs)'],
|
|
716 ['debug' => 'detailed mapping output for debugging'],
|
|
717 ['tables' => 'data files for db upload'],
|
|
718 );
|
|
719
|
|
720 my $fmt2 = "%-20s%-50s\n";
|
|
721
|
|
722 print $fh qq(\nOutput directories:\n);
|
|
723 print $fh qq(===================\n\n);
|
|
724
|
|
725 print $fh sprintf($fmt2, qw(DIRECTORY DESCRIPTION));
|
|
726 print $fh ('-'x72), "\n";
|
|
727
|
|
728 print $fh sprintf($fmt2, 'basedir', $self->conf->param('basedir'));
|
|
729
|
|
730 foreach my $o (@output) {
|
|
731 print $fh sprintf($fmt2, '$basedir/'.$o->[0], $o->[1]);
|
|
732 }
|
|
733
|
|
734 print $fh "\n";
|
|
735
|
|
736 #
|
|
737 # clicklist of first 10 deleted genes
|
|
738 #
|
|
739 print $fh qq(\nFirst 10 deleted known genes:\n);
|
|
740 print $fh qq(=============================\n\n);
|
|
741
|
|
742 my $in_fh = $self->get_filehandle('genes_lost.txt', 'debug', '<');
|
|
743 my $prefix = $self->conf->param('urlprefix');
|
|
744 my $i;
|
|
745
|
|
746 while (<$in_fh>) {
|
|
747 last if (++$i > 10);
|
|
748
|
|
749 chomp;
|
|
750 my ($stable_id, $type) = split(/\s+/);
|
|
751
|
|
752 next unless ($type eq 'known');
|
|
753
|
|
754 print $fh sprintf($fmt2, $stable_id, "${prefix}$stable_id");
|
|
755 }
|
|
756
|
|
757 close($in_fh);
|
|
758 close($fh);
|
|
759 }
|
|
760
|
|
761
|
|
762 =head2 read_from_file
|
|
763
|
|
764 Arg[1] : String $filename - name of file to read
|
|
765 Arg[2] : (optional) String $append - directory name to append to basedir
|
|
766 Example : my $stats_text = $analyser->read_from_file('gene_mapping_stats',
|
|
767 'stats');
|
|
768 Description : Reads mapping stats from a file.
|
|
769 Return type : String
|
|
770 Exceptions : none
|
|
771 Caller : internal
|
|
772 Status : At Risk
|
|
773 : under development
|
|
774
|
|
775 =cut
|
|
776
|
|
777 sub read_from_file {
|
|
778 my $self = shift;
|
|
779 my $filename = shift;
|
|
780 my $append = shift;
|
|
781
|
|
782 my $in_fh = $self->get_filehandle($filename, $append, '<');
|
|
783
|
|
784 my $txt;
|
|
785
|
|
786 while (<$in_fh>) {
|
|
787 $txt .= $_;
|
|
788 }
|
|
789
|
|
790 return $txt;
|
|
791 }
|
|
792
|
|
793 1;
|
|
794
|