annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::IdMapping::ResultAnalyser - analyse stable Id mapping results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 # get a result analyser
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 my $analyser = Bio::EnsEMBL::IdMapping::ResultAnalyser->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 -LOGGER => $logger,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 -CONF => $conf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 -CACHE => $cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 # analyse results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 $analyser->analyse( $gene_mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 $stable_id_mapper->get_all_stable_id_events('similarity') );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 # write results to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 $analyser->write_results_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 # create click lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 $analyser->create_clicklist;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 # mapping_summary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 $analyser->create_mapping_summary;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 This is a utility module which analyses the stable Id mapping results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 by providing various sorts of mapping statistics. It also creates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 clicklists and a mapping summary.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 analyse
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 analyse_db
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 classify_source_genes_by_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 classify_genes_by_mapping_simple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 classify_genes_by_mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 add
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 get_all_by_subclass
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 get_all_by_class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 get_count_by_subclass
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 get_count_by_class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 get_all_classes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 class_key
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 write_results_to_file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 create_clicklist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 create_mapping_summary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 read_from_file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 package Bio::EnsEMBL::IdMapping::ResultAnalyser;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 use Bio::EnsEMBL::IdMapping::BaseObject;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 =head2 analyse
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Arg[1] : Bio::EnsEMBL::IdMapping::MappingList $gene_mappings - the gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 mappings to analyse
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 Arg[2] : Arrayref of Strings - similarity events
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 Example : $analyser->analyse($gene_mappings,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 $stable_id_mapper->get_all_stable_id_events('similarity'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 Description : Analyses the results of a stable Id mapping run.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 Exceptions : thrown on wrong or missing arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 sub analyse {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 my $gene_mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 my $similarity_events = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 unless ($gene_mappings and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 unless ($similarity_events and ref($similarity_events) eq 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 throw("Need a list of similarity events.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 # classify source genes by type (status-logic_name-biotype)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 $self->classify_source_genes_by_type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 # classify source genes by mapping status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $self->classify_genes_by_mapping($gene_mappings, $similarity_events);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 =head2 classify_source_genes_by_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 Example : $analyser->classify_source_genes_by_type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 Description : Classifies source genes by type and adds them to the internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 datastructure. For the format of the classification string see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 class_key().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 sub classify_source_genes_by_type {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 foreach my $s_gene (values %{ $self->cache->get_by_name('genes_by_id', 'source') }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 $self->add('source', $self->class_key($s_gene), 'all', $s_gene->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 =head2 classify_genes_by_mapping_simple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Arg[1] : Bio::EnsEMBL::IdMapping::MapppingList $gene_mappings - gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 mappings to classify
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 Example : $analyser->classify_genes_by_mapping_simple;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 Description : Classifies target genes by mapping ('mapped' or 'unmapped').
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Exceptions : thrown on wrong or missing argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Caller : This method is not in use at the momen.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 sub classify_genes_by_mapping_simple {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 my $gene_mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 unless ($gene_mappings and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 my %result = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 # firrst, create a lookup hash of source genes by target internal ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my %source_genes_by_target = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 foreach my $e (@{ $gene_mappings->get_all_Entries }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 my $s_gene = $self->cache->get_by_key('genes_by_id', 'source', $e->source);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 my $t_gene = $self->cache->get_by_key('genes_by_id', 'target', $e->target);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 $source_genes_by_target{$t_gene->id} = $s_gene;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 # now loop over target genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 foreach my $t_gene (values %{ $self->cache->get_by_name('genes_by_id', 'target') }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 # check if target gene has all required properties set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 unless ($t_gene->status and $t_gene->logic_name and $t_gene->biotype) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 $self->logger->warning("Missing data for target gene: ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 $t_gene->to_string."\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 my $class = $self->class_key($t_gene);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 # classify as '1' if mapped (using source gene's stable ID), otherwise '0'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 if (my $s_gene = $source_genes_by_target{$t_gene->id}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 $self->add('target', $class, 'mapped', $s_gene->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 $self->add('target', $class, 'unmapped', $t_gene->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 =head2 classify_genes_by_mapping
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 Arg[1] : Bio::EnsEMBL::IdMapping::MapppingList $gene_mappings - gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 mappings to classify
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 Arg[2] : Arrayref of Strings - similarity events
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 Example : $analyser->classify_genes_by_mapping;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 Description : Classifies genes by mapping. Status is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 'mapped' => stable Id was mapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 'lost_similar' => stable Id not mapped, but there is a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 similarity entry for the source Id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 'lost_definite' => not mapped and no similarity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 Exceptions : thrown on wrong or missing argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 Caller : This method is not in use at the momen.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 sub classify_genes_by_mapping {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 my $gene_mappings = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 my $similarity_events = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 unless ($gene_mappings and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 $gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 throw("Need a Bio::EnsEMBL::IdMapping::MappingList of genes.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 unless ($similarity_events and ref($similarity_events) eq 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 throw("Need a list of similarity events.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 # mapped genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 foreach my $e (@{ $gene_mappings->get_all_Entries }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 my $s_gene = $self->cache->get_by_key('genes_by_id', 'source', $e->source);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 $self->add('source', $self->class_key($s_gene), 'mapped',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 $s_gene->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 # lookup hash for similarities
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 my %similar = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 foreach my $event (@{ $similarity_events }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 my ($stable_id) = split("\t", $event);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 $similar{$stable_id} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 # deleted genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 foreach my $s_gene (values %{ $self->cache->get_by_name('genes_by_id', 'source') }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 my $stable_id = $s_gene->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 my $class = $self->class_key($s_gene);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 unless ($self->get('source', $class, 'mapped', $stable_id)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 # sub-classify as 'lost_similar' or 'lost_definite'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 if ($similar{$stable_id}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 $self->add('source', $class, 'lost_similar', $stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 $self->add('source', $class, 'lost_definite', $stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 =head2 add
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 Arg[1] : String $dbtype - db type ('source' or 'target')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 Arg[2] : String $class - key identifying a gene type (see class_key())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 Arg[4] : String $stable_id - gene stable Id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Arg[5] : String $val - value (usually 0 or 1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 Example : $analyser->add('source', 'KNOWN-ensembl-protein_coding',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 'mapped', 'ENSG00002342', 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 Description : Add a stable Id / property pair to a name/dbtype lookup hash.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 The datastructure is a bit of a bloat, but is general enough to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 be used as a lookup hash and to generate statistics (counts by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 type) and debug lists (dump by type).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 Return type : String - the added value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 sub add {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 my ($self, $dbtype, $class, $subclass, $stable_id, $val) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 # private method, so no argument check done for performance reasons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 # default to a value of '1'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 $val = 1 unless (defined($val));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $self->{$dbtype}->{$class}->{$subclass}->{$stable_id} = $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 =head2 get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 Arg[1] : String $dbtype - db type ('source' or 'target')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 Arg[2] : String $class - key identifying a gene type (see class_key())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 Arg[4] : String $stable_id - gene stable Id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 Example : my $mapping_status = $analyser->get('source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 'KNOWN-ensembl-protein_coding', 'mapped', 'ENSG00002342');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 Description : Gets a stable Id mapping status from the internal datastructure.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 Return type : String
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 sub get {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 my ($self, $dbtype, $class, $subclass, $stable_id) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 # private method, so no argument check done for performance reasons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 return $self->{$dbtype}->{$class}->{$subclass}->{$stable_id};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 =head2 get_all_by_subclass
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 Arg[1] : String $dbtype - db type ('source' or 'target')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 Arg[2] : String $class - key identifying a gene type (see class_key())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 Example : my @mapped_stable_ids = @{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 $analyser->get_all_by_subclass(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 'source', 'KNOWN-ensembl-protein_coding',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 'mapped'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 Description : Gets a list of stable Id for a given subclass.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 Return type : Arrayref of String (stable Ids)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 Exceptions : thrown on missing arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 sub get_all_by_subclass {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 my ($self, $dbtype, $class, $subclass) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 throw("Need a dbtype (source|target).") unless ($dbtype);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 throw("Need a class.") unless ($class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 throw("Need a subclass.") unless ($subclass);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 return [ keys %{ $self->{$dbtype}->{$class}->{$subclass} || {} } ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 =head2 get_all_by_class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 Arg[1] : String $dbtype - db type ('source' or 'target')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 Arg[2] : String $class - key identifying a gene type (see class_key())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 Example : my @stable_ids = @{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 $analyser->get_all_by_class( 'source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 'KNOWN-ensembl-protein_coding' ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 Description : Gets a list of stable Id for a given class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 Return type : Arrayref of String (stable Ids)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 Exceptions : thrown on missing arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 sub get_all_by_class {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 my ($self, $dbtype, $class) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 throw("Need a dbtype (source|target).") unless ($dbtype);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 throw("Need a class.") unless ($class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 my %merged = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 foreach my $subclass (keys %{ $self->{$dbtype}->{$class} || {} }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 while (my ($key, $val) = each(%{ $self->{$dbtype}->{$class}->{$subclass} })) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 $merged{$key} = $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 return [ keys %merged ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 =head2 get_count_by_subclass
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 Arg[1] : String $dbtype - db type ('source' or 'target')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 Arg[2] : String $class - key identifying a gene type (see class_key())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 Arg[3] : String $subclass - status identifier (e.g. 'mapped', 'lost')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 Example : my $num_mapped = $analyser->get_count_by_subclass('source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 'KNOWN-ensembl-protein_coding', 'mapped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 Description : Gets the number of stable Ids for a given subclass.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 Return type : Int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 Exceptions : thrown on missing arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 sub get_count_by_subclass {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 my ($self, $dbtype, $class, $subclass) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 throw("Need a dbtype (source|target).") unless ($dbtype);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 throw("Need a class.") unless ($class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 throw("Need a subclass.") unless ($subclass);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 return scalar(keys %{ $self->{$dbtype}->{$class}->{$subclass} || {} });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 =head2 get_count_by_class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 Arg[1] : String $dbtype - db type ('source' or 'target')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 Arg[2] : String $class - key identifying a gene type (see class_key())
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 Example : my $num_mapped = $analyser->get_count_by_class('source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 'KNOWN-ensembl-protein_coding');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 Description : Gets the number of stable Ids for a given class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 Return type : Int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 Exceptions : thrown on missing arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 sub get_count_by_class {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 my ($self, $dbtype, $class) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 throw("Need a dbtype (source|target).") unless ($dbtype);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 throw("Need a class.") unless ($class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 return scalar(@{ $self->get_all_by_class($dbtype, $class) });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 =head2 get_all_classes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 Arg[1] : String $dbtype - db type ('source' or 'target')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 Example : foreach my $class (@{ $analyser->get_all_classes('source') }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 print "$class\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 Description : Gets a list of classes in the ResultAnalyser.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 Return type : Arrayref of String
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 Exceptions : thrown on missing argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 sub get_all_classes {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 my ($self, $dbtype) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 throw("Need a dbtype (source|target).") unless ($dbtype);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 return [ sort keys %{ $self->{$dbtype} || {} } ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 =head2 class_key
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $gene - a gene object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 Example : my $class = $analyser->class_key($gene);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 Description : Generates a key identifying a gene class. This identifier is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 composed from the gene's status, logic naame, and biotye.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 Return type : String
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 sub class_key {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 my ($self, $gene) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 return join('-', map { $gene->$_ } qw(status logic_name biotype));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 =head2 write_results_to_file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 Example : $analyser->write_results_to_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 Description : Writes the results of the result analysis to a file. This is a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 human-readable text detailing the mapping statistics.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 sub write_results_to_file {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 my $fh = $self->get_filehandle('gene_detailed_mapping_stats.txt', 'stats');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 my $fmt1 = "%-60s%-16s%-16s%-16s\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 my $fmt2 = "%-60s%5.0f (%7s) %5.0f (%7s) %5.0f (%7s)\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 my $fmt3 = "%3.2f%%";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 print $fh "Gene detailed mapping results:\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 print $fh sprintf($fmt1, "Gene type", "mapped", "lost (similar)",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 "lost (definite)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 print $fh ('-'x108), "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 foreach my $class (@{ $self->get_all_classes('source') }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 next if ($class eq 'all');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 my $total = $self->get_count_by_class('source', $class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 # avoid division by zero error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 unless ($total) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 $self->logger->warning("No count found for $class.\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 my $mapped = $self->get_count_by_subclass('source', $class, 'mapped');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 my $similar = $self->get_count_by_subclass('source', $class,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 'lost_similar');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 my $lost = $self->get_count_by_subclass('source', $class, 'lost_definite');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 print $fh sprintf($fmt2,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 $class,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 $mapped, sprintf($fmt3, $mapped/$total*100),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 $similar, sprintf($fmt3, $similar/$total*100),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 $lost, sprintf($fmt3, $lost/$total*100));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 =head2 create_clicklist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 Example : $analyser->create_clicklist;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 Description : Writes an html file which contains a list of all lost genes,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 with hyperlinks to the appropriate archive website. This is to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 manually check lost genes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 sub create_clicklist {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 my $fh = $self->get_filehandle('genes_lost.html', 'stats');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 # start html output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 print $fh qq(<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 print $fh qq(<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-gb" lang="en-gb">);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 print $fh "<head>\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 print $fh "<title>Lost genes ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 print $fh $self->conf->param('sourcedbname'), ' -&gt; ',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 $self->conf->param('targetdbname');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 print $fh "</title>\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 print $fh "</head>\n<body>\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 my $prefix = $self->conf->param('urlprefix');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 unless ($prefix) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 $self->logger->warning("No urlprefix set, clicklists might not be useable.\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 my $navigation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 my $clicklist;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 foreach my $class (@{ $self->get_all_classes('source') }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 next if ($class eq 'all');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 $navigation .= "$class ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 $clicklist .= "<h1>$class</h1>\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 foreach my $subclass (qw(lost_similar lost_definite)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 # navigation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 $navigation .= qq(<a href="#${class}-$subclass">$subclass</a> );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 # clicklist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 $clicklist .= "<h2>$subclass</h2>\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 foreach my $stable_id (@{ $self->get_all_by_subclass('source', $class, $subclass) }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 $clicklist .= qq(<a href="${prefix}$stable_id">$stable_id</a><br />\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 $navigation .= "<br />\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 # print navigation and clicklist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 print $fh "$navigation\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 print $fh "$clicklist\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 # html footer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 print $fh "</body></html>\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 =head2 create_mapping_summary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 Example : $analyser->create_mapping_summary();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 Description : Writes a text file containing a summary of the mapping stats.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 This will be emailed to the genebuilder for evaluation (you will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 have to manually send the email, using the text in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 "mapping_summary.txt" as the template).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 sub create_mapping_summary {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 my $fh = $self->get_filehandle('mapping_summary.txt');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 # title
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 print $fh qq(Stable ID mapping results\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 print $fh qq(=========================\n\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 # timing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 print $fh "Run at: ".localtime()."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 print $fh "Runtime: ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 print $fh $self->logger->runtime, "\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 # parameters used for this run
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 print $fh $self->conf->list_param_values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 print $fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 # mapping stats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 foreach my $type (qw(exon transcript translation gene gene_detailed)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 my $filename = "${type}_mapping_stats.txt";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 if ($self->file_exists($filename, 'stats')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 print $fh $self->read_from_file($filename, 'stats');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 print $fh "\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 print $fh "No mapping stats found for $type.\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 # db uploads
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 my @uploads = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 ['stable_ids' => 'Stable IDs'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 ['events' => 'Stable ID events and mapping session'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 ['archive' => 'Gene and peptide archive'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 my $fmt1 = "%-40s%-20s\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 print $fh qq(Data uploaded to db:\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 print $fh qq(====================\n\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 if ($self->conf->param('dry_run')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 print $fh "None (dry run).\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 foreach my $u (@uploads) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 my $uploaded = 'no';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 $uploaded = 'yes' if ($self->conf->is_true("upload_".$u->[0]));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 print $fh sprintf($fmt1, $u->[1], $uploaded);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 print $fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 # stats and clicklist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 my @output = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 ['stats' => 'statistics (including clicklists of deleted IDs)'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 ['debug' => 'detailed mapping output for debugging'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 ['tables' => 'data files for db upload'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 my $fmt2 = "%-20s%-50s\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 print $fh qq(\nOutput directories:\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 print $fh qq(===================\n\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 print $fh sprintf($fmt2, qw(DIRECTORY DESCRIPTION));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 print $fh ('-'x72), "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 print $fh sprintf($fmt2, 'basedir', $self->conf->param('basedir'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 foreach my $o (@output) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 print $fh sprintf($fmt2, '$basedir/'.$o->[0], $o->[1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 print $fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 # clicklist of first 10 deleted genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 print $fh qq(\nFirst 10 deleted known genes:\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 print $fh qq(=============================\n\n);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 my $in_fh = $self->get_filehandle('genes_lost.txt', 'debug', '<');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 my $prefix = $self->conf->param('urlprefix');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 my $i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 while (<$in_fh>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 last if (++$i > 10);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 my ($stable_id, $type) = split(/\s+/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 next unless ($type eq 'known');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 print $fh sprintf($fmt2, $stable_id, "${prefix}$stable_id");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 close($in_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 =head2 read_from_file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 Arg[1] : String $filename - name of file to read
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 Arg[2] : (optional) String $append - directory name to append to basedir
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 Example : my $stats_text = $analyser->read_from_file('gene_mapping_stats',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 'stats');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 Description : Reads mapping stats from a file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 Return type : String
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 sub read_from_file {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 my $filename = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780 my $append = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 my $in_fh = $self->get_filehandle($filename, $append, '<');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 my $txt;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 while (<$in_fh>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 $txt .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 return $txt;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794