annotate variant_effect_predictor/Bio/EnsEMBL/Utils/Collector.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Utils::Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 # Inherit this base module in your feature specific Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 # instance:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 package Bio::EnsEMBL::Funcgen::Collector::ResultFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 use base('Bio::EnsEMBL::Utils::Collector');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # ... and define package config variables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $Bio::EnsEMBL::Funcgen::Collector::bin_model = 'SIMPLE';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 $Bio::EnsEMBL::Funcgen::Collector::window_sizes =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 [ 30, 65, 130, 260, 450, 648, 950, 1296 ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 # Could replace 30 with 0 here for low density data at natural resolution
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 $Bio::EnsEMBL::Utils::Collector::bin_method =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 'count'; # only used by collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 $Bio::EnsEMBL::Utils::Collector::packed_size = 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 # ... or simply use this module in a script either defining package
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 # config variables, or passing as parameters to the constructor:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 my $collector =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 Bio::EnsEMBL::Utils::BaseCollector->new( -pack_template => 'v' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 $Bio::EnsEMBL::Funcgen::Collector::pack_template = 'v';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 # Config variables can also be over-ridden by passing a config hash to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 # the store_window_bins_by_Slice() method:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 $collector->store_window_bins_by_Slice( $slice, (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 -pack_template => 'v',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 -packed_size => 2 ) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 # NOTE: Over-riding default config variables can cause problems when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 # storing or fetching data. e.g. Fetch may revert to using defaults or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 # table partitions may not match window sizes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 This package is the base Collector class which contains generic
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 getter/setter methods along with the main 'collecting' methods which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 perform the majority of the work in generating compressed data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 collections optimised for web display. The bins produced are aimed at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 storage in a BLOB representing an entire seq_region i.e. even bins with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 no features/null data are encoded as a 0 score. Non-BLOB collections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 are currently not supported.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 If your Collection class defines a Bio::EnsEMBL::Feature, then its
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 adaptor should inherit from the relevant Collection class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 The minimum prerequisites of the input features/data are that they have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 a start() and end() method. For instance a Bio::EnsEMBL::Features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 generated from a database or parsed from a flat file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 NOTE: This Collector does not have a lightweight mode previously used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 for dynamic/on the fly collecting i.e. it does not take advantage of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 bypassing object creation via the related BaseFeatureAdaptor method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 package Bio::EnsEMBL::Utils::Collector;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 use Bio::EnsEMBL::Utils::Argument ('rearrange');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 use Bio::EnsEMBL::Utils::Exception ('throw');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 ### Global package config vars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 # Defaults
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 our $max_view_width = 1000000; # Max bp width in location/detailed view
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 our $max_data_type_size = 16777216; # Default is 16MB for long BLOB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 # This is really a guide value as this should be set in the inheriting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 # Collector class by deducting the rest of the row size from this value.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 # Is is upto the inheritor to handle checking whether this size has been
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 # exceeded.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 # NOTE: Theoretically the min window size is: slice_length/(16777216/2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 # So for human chr1: 249,250,621/(16777216/2) = 29.7 => 30. However,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 # this size does not seem to directly translate to the MySQL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 # max_allowed_packet_size. Increasing max_allowed_packet_size to 64MB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 # solves this issue, and substr operation doesn't appear to incur any of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 # the potential memory(4*) usage issues.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 # Others global package variables which are set in the inheriting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 # Collector class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 our ( $bin_model, $bin_method, $pack_template,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 $packed_size, $window_sizes );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $collector = Bio::EnsEMBL::XXX::Collector::FEATURE->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 $collector->store_windows_by_Slice($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 # Where XXX is, e.g. Compara, FuncGen etc.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Description: Simple new method to enable use of collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 when not inherited by a descendant of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 Returntype : Bio::EnsEMBL::XXX::Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 Caller : Collector script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 return bless {}, $_[0]; # Simply blesses this class as an empty hash.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 # Do not set anything here, as will not be first in ISA for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 # FeatureAdaptors. Hence, not guaranteed to be called.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 =head2 new_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 Args : optional - string assembly version e.g. GRCh37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 Example : $collector->new_assembly('GRCh37');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 Description: Getter/Setter for new assembly version which should be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 used to project only 0 wsize Collections.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 Caller : store_window_bins_by_Slice() or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 write_collection() in inheriting Collector class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 sub new_assembly {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 my ( $self, $new_asm ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 if ( defined($new_asm) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 $self->{'new_assembly'} = $new_asm;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 return $self->{'new_assembly'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 ### Setter/Getter methods for basic/mandatory config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 # Can also be set using package variables in the inheriting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 # Collector/Adaptor or run script. Allows over-riding of defaults set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 # in Adaptor/Collector.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 # Package variables used here instead of attrs to enable easy
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 # default config in inheriting class/script method. Provided
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 # for easy/standardised fetch access outside of this package
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 # i.e. Collectors/Adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 =head2 max_data_type_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 Args : optional - int Maximum size of collection in bytes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 Example : $collector->max_data_type_size($new_max_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 Description: Getter/Setter for max_data_type_size, default is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 currently set at in this class as 16777216 (16MB), for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 long BLOB. This is used by the write_collection()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 method to determine when to build and store a compressed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 collection.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 Caller : bins_per_record() and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 write_collection() in inheriting Collector class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 sub max_data_type_size {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 my ( $self, $size ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 # Validate is sensible integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 if ( defined($size) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 if ( $size !~ /^\d+$/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 throw("max_data_type_size must be a integer of bytes, not $size");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 $max_data_type_size = $size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 } elsif ( !defined($max_data_type_size) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 # This should never happen as we have defaults in this module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 throw( 'You must define a '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 . '$Bio::EnsEMBL::Utils::Collector::max_data_type_size '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 . 'or pass -max_data_type_size config' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 return $max_data_type_size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 =head2 max_view_width
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 Args : optional - int Maximum width of view
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 Example : $collector->max_view_width($new_max_width);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 Description: Getter/Setter for max_view_width, default is currently
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 set at in this class as 500000bp, for maximum level of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 zoom permitted by location view.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 sub max_view_width {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 my ( $self, $size ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 # Validate is sensible integer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 if ( defined($size) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 if ( $size !~ /^\d+$/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 throw("max_view_width must be a integer, not $size");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 $max_view_width = $size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 } elsif ( !defined $max_view_width ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 # This should never happen as we have defaults in this module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 throw( 'You must define a '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 . '$Bio::EnsEMBL::Utils::Collector::max_view_width '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 . 'or pass -max_view_width config' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 return $max_view_width;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 =head2 bin_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 Args[0] : optional - string name of bin method e.g. 'max_magnitude'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 Args[1] : optional - Bio::EnsEMBL::Funcgen::Parsers::InputSet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 Example : my $bin_method = $self->bin_method();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 Description: Getter/Setter for bin_method, default is normally set in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 the inheriting Collector class either by package variable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 or by passing a config hash via the store methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 Exceptions : Throws if cannot set by package variable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 sub bin_method {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 my ( $self, $bmethod, $config ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 if ( defined($bmethod) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 $bin_method = $bmethod;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 if ( !defined($bin_method) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 throw( 'You must define a '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 . '$Bio::EnsEMBL::Utils::Collector::bin_method '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 . 'or pass -bin_method config' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 if ( !$self->can( "_calculate_" . $bin_method ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 throw("$bin_method is not a valid/available binning method");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 my $set_up_method = "_set_up_" . $bin_method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 if ( $self->can($set_up_method) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $self->$set_up_method($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 return $bin_method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 =head2 bin_model
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 Args : optional - string bin model e.g. SIMPLE or COMPLEX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 Example : my $bin_model = $self->bin_model;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 Description: Getter/Setter for bin_model, default should be set in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 inheriting Collector class. Currently only supports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 'SIMPLE' bin model.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 Exceptions : Throws if bin_model is not SIMPLE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 sub bin_model {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 my ( $self, $bmodel ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 if ( defined($bmodel) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $bin_model = $bmodel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 if ( !defined($bin_model) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 throw( 'You must define a '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 . '$Bio::EnsEMBL::Utils::Collector::bin_model '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 . 'or pass -bin_model config' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 if ( $bin_model ne 'SIMPLE' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 throw( 'Bio::EnsEMBL::Utils::Collector does not yet support '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 . 'non-SIMPLE bin models' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 return $bin_model;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 =head2 window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 Args : optional - arrayref of window sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 foreach my $wsize ( @{ $collector->window_sizes } )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 { # Do some collecting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 Description: Getter/Setter for window_sizes. Default should be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 in inheriting Collector (if the config is dynamic),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 FeatureAdaptor class or script using package variable or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 this method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 NOTE: Redefining these may cause a mismatch with the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 table partition definition.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 Returntype : arrayref of ints
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 Exceptions : Throws if cannot set a valid array of int window sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 Status : At Risk - rename bin_sizes?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 sub window_sizes {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 my ( $self, $sizes ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 if ( defined($sizes) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $window_sizes = $sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 if ( !( ref($window_sizes)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 && ( ref($window_sizes) eq 'ARRAY' )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 && ( scalar(@$window_sizes) > 0 ) ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 throw('Must pass -windows_sizes in the config '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 . 'or define $Bio::EnsEMBL::Utils::Collector::window_sizes '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 . 'in your Collector as an array ref of integer window_sizes' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 return $window_sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 =head2 has_window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 Args : int - window size to validate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 Example : if( $collector->has_window_size('30') ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 #Do something wrt to 30bp window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 Description: Simple utility method to validate whether this Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 has a given window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 Returntype : Boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 Exceptions : Throws if window size not specified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 sub has_window_size{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 my ( $self, $size ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 if(! defined $size){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 throw('You must pass a window size to validate');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 return grep(/$size/, @$window_sizes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 ### Getter/Setters for BLOB collection config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 # NOTE: Overriding the defaults here may cause a mismatch when the data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 # is retrieved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 =head2 pack_template
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 Args : optional - string perl 'pack' template
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 Example : $self->pack_template('v');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 Description: Getter/Setter for pack_template. Default should be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 in inheriting Collector (if the config is dynamic),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 FeatureAdaptor class or script using package variable or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 this method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 Exceptions : Throws if cannot set pack_template from package variable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 Caller : FeatureAdaptor::_obj_from_sth
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 sub pack_template {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 my ( $self, $template ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 if ( defined($template) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 $pack_template = $template;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 if ( !defined($pack_template) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 throw( 'Must pass a per score '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 . '-pack_template in the config '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 . 'or define $Bio::EnsEMBL::Utils::Collector::pack_template '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 . 'in your Collector' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 return $pack_template;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 =head2 packed_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 Args : optional - int size of perl 'pack' template in bytes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 Example : $self->packed_size(2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 Description: Getter/Setter for packed_size. Default should be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 in inheriting Collector (if the config is dynamic),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 FeatureAdaptor class or script using package variable or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 this method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 Exceptions : Throws if cannot set pack_template from pacakge variable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 Caller : current_packed_size() and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 FeatureAdaptor::_obj_from_sth()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 sub packed_size {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 my ( $self, $size ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 if ( defined($size) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 $packed_size = $size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 if ( !defined($packed_size) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 throw( 'Must pass -packed_size(wrt to pack_template) config '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 . 'or define $Bio::EnsEMBL::Utils::Collector::packed_size '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 . 'in your Collector' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 if ( $packed_size !~ /^\d+$/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 throw( "$packed_size is not an integer, "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 . "must pass a size integer for packed_size "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 . "which specifies size of pack_template:\t"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 . $pack_template );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 return $packed_size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 =head2 bins_per_record
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 Example : my $bin_per_records = $self->bin_per_record
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 Description: Simple method to calculate the max number of bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 allowed per record given the current config.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 Caller :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 sub bins_per_record {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 return int( $max_data_type_size/$packed_size );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 =head2 current_packed_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 Arg[0] : int - window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 Example : my $cps = $self->current_packed_size($wsize);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 Description: Simple method to calculate the max number of bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 allowed per record given the current config.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 Caller :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 sub current_packed_size {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 my ( $self, $wsize ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 return ( scalar( @{ $self->score_cache($wsize) } )*$packed_size );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 =head2 score_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 Arg[0] : int - window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 Example : my $cps = $self->current_packed_size($wsize);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 Description: Handles caching of bin scores for each window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 Returntype : arrayref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 Exceptions : Throws if no window size defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 Caller : current_packed_size() and store_collection()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 sub score_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 my ( $self, $wsize, $scores ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 if ( !defined($wsize) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 throw('Must pass a window size argument');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 $self->{'score_cache'}{$wsize} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 if ( defined($scores) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 push( @{ $self->{'score_cache'}{$wsize} }, @{$scores} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 return $self->{'score_cache'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 =head2 collection_start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 Arg[0] : int - window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 Arg[1] : optional int - seq_region_start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 Example : my $coll_start->(150);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 Description: Getter/Setter collection seq_region_start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 Exceptions : Throws if no window size defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 Caller : store_window_bin_by_Slice() and write_collection()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 sub collection_start {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 my ( $self, $wsize, $sr_start ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 if ( !defined($wsize) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 throw('Must pass a window size argument');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 if ( defined($sr_start) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 $self->{'collection_start'}{$wsize} = $sr_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 return $self->{'collection_start'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 =head2 collection_end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 Arg[0] : int - window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 Arg[1] : optional int - seq_region_end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 Example : my $coll_end->(150);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 Description: Getter/Setter collection seq_region_end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 Exceptions : Throws if no window size defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 Caller : inheriting Collector write_collection method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 sub collection_end{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 my ($self, $wsize, $sr_end) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 throw('Must pass a window size argument') if ! defined $wsize;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 if(defined $sr_end){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 $self->{'collection_end'}{$wsize} = $sr_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 return $self->{'collection_end'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 =head2 collection_strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 Arg[0] : int - window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 Arg[1] : optional int - seq_region_strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 Example : my $coll_start->(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 Description: Getter/Setter collection seq_region_strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 Exceptions : Throws if no window size defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 Caller : inheriting Collector write_collection method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 Status : At Risk - Collections are currently strandless
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 sub collection_strand {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 my ( $self, $wsize, $strand ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 if ( !defined($wsize) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 throw('Must pass a window size argument');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 if ( defined $strand ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 $self->{'collection_strand'}{$wsize} = $strand;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 return $self->{'collection_strand'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 ### Here follows the actual working methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 =head2 _get_Slice_chunks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 Description: Defines the optimal set of slice chunks to use for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 generating collections such that redundant fetches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 are minimized.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 Returntype : hashref of window_size chunk size pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 Exceptions : Throws if no window sizes or max_view_width defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 Caller : store_window_bin_by_Slice()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 sub _get_Slice_chunks {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 if ( !defined($window_sizes) || !defined($max_view_width) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 throw( 'You must pass both a window_size array ref '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 . 'and max_view_width arguments' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 if ( !defined( $self->{'_slice_chunks'} ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 # Calulate sensible slice length based on window sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 my @wsizes = sort { $a <=> $b } @$window_sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 # Handle calculating only 0 wsize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 if ( scalar(@wsizes) == 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 && $wsizes[0] == 0 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 return { $max_view_width => [0] };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 my $multiplier = int( $max_view_width/$wsizes[$#wsizes] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 my $chunk_length = $multiplier*$wsizes[$#wsizes];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 my $not_divisible = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 my %chunk_windows; # Registry of chunk lengths to run with windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 my %workable_chunks = map { $_ => {} } @wsizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 # get rid of natural resolution as this will always work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 delete $workable_chunks{'0'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 while ( $not_divisible && $chunk_length != 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 $not_divisible = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 foreach my $wsize (@wsizes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 if ( $wsize == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 # Special wsize for normal data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 # Set not divisible if modulus is true
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 if ( $chunk_length % $wsize ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 $not_divisible = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 $workable_chunks{$wsize}{$chunk_length} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 # Gradually shrink the length until we find a workable slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 # length for all windows.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 if ($not_divisible) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 $chunk_length -= $wsizes[$#wsizes];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 my %chunk_sets;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 if ( $chunk_length == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 print "Could not find chunk length "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 . "for all window sizes, "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 . "attempting to subset windows "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 . "using alternate slice length\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 foreach my $wsize ( keys(%workable_chunks) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 # Loop through windows, seeing if they are workable in the other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 # windows.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 foreach my $chunk ( keys( %{ $workable_chunks{$wsize} } ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 foreach my $other_wsize ( keys %workable_chunks ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 next if $wsize == $other_wsize;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 if ( exists( $workable_chunks{$other_wsize}{$chunk} ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 # only push it onto the other wsize, as we will do the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 # reverse later
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 $chunk_sets{$chunk}{$wsize} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 # %chunk_sets represents co-occurence of wsizes with repect to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 # chunks. Take the set which has the most windows and the longest
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 # chunk. Then get the largest which handles the rest.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 # define possible set lengths
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 my %set_lengths;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 map { $set_lengths{$i} = []; $i++ } @wsizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 # get rid of natural resolution as this will always work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 delete $set_lengths{'0'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 # Store chunks lengths for each set size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 foreach my $chunk ( keys(%chunk_sets) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 my $set_size = scalar( values( %{ $chunk_sets{$chunk} } ) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 push( @{ $set_lengths{$set_size} }, $chunk );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 # Get the biggest set with the longest length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 # Scalar here as we are disregarding natural resolution of 0 in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 # loop.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 my $largest_size = scalar(@wsizes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 my $found_largest_set = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 while ( !$found_largest_set ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 $largest_size--;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 if ( scalar( @{ $set_lengths{$largest_size} } ) > 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 $found_largest_set = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 my ($largest_chunk) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 sort { $b <=> $a } @{ $set_lengths{$largest_size} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 my @largest_windows = keys %{ $chunk_sets{$largest_chunk} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 @{ $chunk_windows{$largest_chunk} } = @largest_windows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 print "Largest chunk $largest_chunk($largest_size) "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 . "contains windows: @largest_windows\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 my %remaining_windows = map { $_ => {} } @wsizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 # get rid of natural resolution as this will always work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 delete $remaining_windows{'0'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 map { delete $remaining_windows{$_} } @largest_windows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 my $remaining_set_size = scalar( keys(%remaining_windows) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 # Use array here for practicality, would need to maintain hash if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 # we need to iterate.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 my @rwindows = keys(%remaining_windows);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 # Could be one window, but this will not be in the co-occurence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 # hash %chunk_sets.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 my $next_chunk;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 if ( scalar(@rwindows) == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 my ($last_window) = @rwindows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 # Find a suitably large chunk for this one window.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 $multiplier = int( 500000/$last_window );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 $next_chunk = $multiplier*$last_window;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 foreach my $chunk ( sort { $b <=> $a }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 @{ $set_lengths{$remaining_set_size} } )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 my $seen_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 foreach my $rwindow (@rwindows) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 if ( grep /$rwindow/,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 ( values( %{ $chunk_sets{$chunk} } ) ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 $seen_count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 if ( $seen_count == $remaining_set_size ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 $next_chunk = $chunk;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 @{ $chunk_windows{$next_chunk} } = @rwindows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 if ( defined($next_chunk) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 print "Found next chunk length $next_chunk "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 . "contains remaining windows:\t@rwindows\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 warn "Need to write iterative method for set definition";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 throw( 'Could not find workable slice length '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 . 'for remaining windows: '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 . join( ', ', @rwindows ) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 @{ $chunk_windows{$chunk_length} } = keys(%workable_chunks);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 print "Found workable chunk length $chunk_length "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 . "for all window sizes:\t"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 . join( ' ', @{ $chunk_windows{$chunk_length} } ) . "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 $self->{'_slice_chunks'} = \%chunk_windows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 } ## end if ( !defined( $self->...))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 return $self->{'_slice_chunks'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 } ## end sub _get_Slice_chunks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 =head2 set_config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 Arg[0] : optional hash - parameter hash(see above methods for more info):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 WINDOW_SIZES => array ref - subset of defined window
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 BIN_METHOD => string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 MAX_VIEW_WIDTH => int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 MAX_DATA_TYPE_SIZE => int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 PACK_TEMPLATE => string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 PACKED_SIZE => int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 BIN_MODEL => string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 NEW_ASSEMBLY => string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 METHOD_CONFIG => hash of method specific config params
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 SKIP_ZERO_WINDOW => boolean - skips generation of 0 wsize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 this is used if already generated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 from an assembly projection.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 NOTE: Over-riding any of the default config may cause
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 problems when storing or retrieving Collection data,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 except sub sets of default window sizes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 Description: This method replaces the constructor as new will not be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 called for Adaptor based Collectors.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 Separating this from the store method is currently
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 redundant as jobs are normally submitetd in Slice based
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 jobs. However, this will be required if the store method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 is further seaprated into fetch/generate and store methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 Exceptions : Throws if no window sizes or max_view_width defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 Caller : Inheritor Collector e.g. Bio::EnsEMBL::Funcgen:Collector::ResultFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 or script.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 sub set_config {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 my ( $self, %config ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 my ( $wsizes, $bmethod, $mv_width,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 $md_type_size, $template, $psize,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 $bmodel, $new_assm, $skip_zero_window,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 $method_config )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 = rearrange( [ 'WINDOW_SIZES', 'BIN_METHOD',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 'MAX_VIEW_WIDTH', 'MAX_DATA_TYPE_SIZE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 'PACK_TEMPLATE', 'PACKED_SIZE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 'BIN_MODEL', 'NEW_ASSEMBLY',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 'SKIP_ZERO_WINDOW', 'METHOD_CONFIG' ],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 %config );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 ### VAILDATE/SET VARS/CONFIG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 # Attrs used in this method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882 $self->bin_method( $bmethod, $method_config );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 $self->bin_model($bmodel);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 $self->window_sizes($wsizes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 # Set to undef if we have empty array? To change this we need to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 # pass the config hash -window_sizes conditionally
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 # This currently overwrite the defaults!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 # if ( ref($window_sizes) eq 'ARRAY'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 # && scalar( @{$window_sizes} ) == 0 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 # {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 # $window_sizes = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 # Attrs used in other (store) methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 $self->pack_template($template);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 $self->packed_size($psize);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 $self->max_data_type_size($md_type_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 $self->max_view_width($mv_width);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 # Other vars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 $self->new_assembly($new_assm);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 $self->{'_only_natural'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 $self->{'_store_natural'} = grep /^0$/, @$window_sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 ### Set window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 if ( $self->new_assembly() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 print "Assembly projection may cause problems "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 . "for large Collections, "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 . "defaulting to window_sizes = (0)\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 if ( $skip_zero_window ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 throw( "You cannot -skip_zero_window or "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 . "omit 0 from -window_sizes "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 . "when projecting to a new assembly($new_assm) "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 . "which should only be generated using window_size=0" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 # Then build the bins on the projected 0 level single Features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 # Test we haven't explicity set window_sizes to be something else
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 if ( defined($wsizes)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 && !( scalar(@$wsizes) == 1 && $wsizes->[0] == 0 ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 throw( "You have set window_sizes config "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 . "which are not safe when projecting to "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 . "a new assembly($new_assm), "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 . "please omit window_sizes config or set to 0" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 $self->window_sizes( [0] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 if ( $wsizes && $skip_zero_window &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 ( grep /^0$/, @$wsizes )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 #Only test passed params not default config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 throw( "You have specied skip_zero_window "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944 . "and window_size 0 in your parameters, "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 . "please remove one of these" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 elsif ( defined($window_sizes) && !grep /^0$/, @$window_sizes ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 $skip_zero_window = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 # re-add 0 window as we need this to build the collections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 # see ...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 unshift( @{$window_sizes}, 0 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 if ( $self->{'_store_natural'} && scalar( @{$window_sizes} ) == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 $self->{'_only_natural'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 if ($skip_zero_window) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 $self->{'_store_natural'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 } ## end sub set_config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 =head2 store_window_bins_by_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 Arg[0] : Bio::EnsEMBL:Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 Example : $collector->store_window_bins_by_Slice($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 Description: This is the main run method, it loops through
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 optimal slice chunks from _define_window_chunks,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 calls _bin_features_by_Slice as appropriate and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 calls write_collection in the inheriting Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 class/script.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 Exceptions : Throws if Bio::EnsEMBL::Slice is not defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 Caller : store methods in inheriting Collector class/script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 sub store_window_bins_by_Slice {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 my ( $self, $slice ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 warn "Need to be careful here "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 . "about cleaning start end strand caches between "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 . "serially run slices";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 if ( !( defined($slice)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 && ref($slice)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 && $slice->isa('Bio::EnsEMBL::Slice') ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 throw('You must pass a valid Bio::EnsEMBL::Slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 # Rollback previously stored features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 # Change 'can' to empty method stubb with pod ???
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 if ( $self->can('rollback_Features_by_Slice') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 $self->rollback_Features_by_Slice($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 warn ref($self)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 . " cannot rollback_Features_by_Slice. "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 . "This may result in storage failure "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 . "or duplicate Collections if there is pre-existing data";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 ### PROCESS CHUNKS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 my %chunk_windows = %{ $self->_get_Slice_chunks };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 my (%counts);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 my $store_natural = $self->{'_store_natural'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 my $only_natural = $self->{'_only_natural'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 $counts{0} = 0; # Set natural res count to 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 my $slice_end = $slice->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 my $orig_start = $slice->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 my $region = $slice->coord_system_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 my $version = $slice->coord_system->version;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 my $seq_region_name = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018 my $strand = $slice->strand;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 # Warn if this is not a full slice. Version needed in case we are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 # projecting from a non-default version slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 my $full_slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 $slice->adaptor->fetch_by_region( $region, $seq_region_name, undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 undef, undef, $version );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 if ( ( $full_slice->start() != $orig_start )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 || ( $full_slice->end() != $slice_end ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 warn "Generating collections using sub-Slices "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 . "can result in data issues/artifacts";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 # Last chunk might not be the correct window length. Test
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 # slices less than chunk length can cause failures in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 # _bin_features_by_window_sizes others?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 # Set the initial collection_start to orig_start. This is not the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 # case for 0 wsize where it must always be the true feature start.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 for my $wsize (@$window_sizes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 if ( $wsize == 0 ) { next }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 $self->collection_start( $wsize, $orig_start );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 # Also reset collection end and score cache in case we are running
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 # serially.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 $self->{collection_end}{$wsize} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 $self->{'score_cache'}{$wsize} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048 my $first_chunk_length = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 foreach my $chunk_length ( sort keys %chunk_windows ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 print "Processing windows "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 . join( ', ', @{ $chunk_windows{$chunk_length} } )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 . " with chunk length $chunk_length\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 # Set window counts to 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056 map $counts{$_} = 0, @{ $chunk_windows{$chunk_length} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 # May need to reset flat file parser handle or other caches via
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 # inheriting Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 if ( !$first_chunk_length ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 # Change 'can' to empty method stubb with pod???
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 if ( $self->can('reinitialise_input') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 $self->reinitialise_input();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 $first_chunk_length = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 # Now walk through slice using slice length chunks and build all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 # windows in each chunk.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 my $in_slice = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 my $start_adj = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 my ( $sub_slice, $sub_end, $features, $bins );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 my $sub_start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075 my $slice_length = $slice->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 # Always create in local coords for fetch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078 # Then change to seq_region coords for store if required
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 while ($in_slice) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 $sub_start += $start_adj;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 $sub_end = $sub_start + $chunk_length - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 if ( $sub_end >= $slice_length ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 # Surplus bins are removed in store/write_collection in caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 $in_slice = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 $sub_slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 $slice->adaptor->fetch_by_region( $region, $seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091 $sub_start + $orig_start - 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 $sub_end + $orig_start - 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 $strand, $version );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 # Can't subslice as this will not clip if we go over the length of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 # the slice, unlike normal slice fetching. Will clipping the end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 # to the slice end cause any problems here? How will this affect
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 # bin clipping?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 ### Grab features and shift chunk coords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 $features = $self->get_Features_by_Slice($sub_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 # warn "Binning "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 # . scalar(@$features)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 # . " Features for chunk length $chunk_length, on Slice "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 # . $sub_slice->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 if ( ( @{$features} )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 && ref( $features->[0] ) =~ /Bio::EnsEMBL::Utils::Collection/ )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111 # Would need to create base module with generic methods:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 # window_size, ...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 # Check that the returned feature/collections support window_size.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 # All Collections should be able to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 if ( $features->[0]->can('window_size') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 if ( $features->[0]->window_size != 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 throw( "You are trying to generated Collections from "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 . "a non-zero window sized Collection:\t"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 . $features->[1]->{'window_size'} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 # This should never happen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 # if ( !$skip_zero_window ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 # throw( 'You have retrieved data from a Collection '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 # . 'which without using -skip_zero_window '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 # . 'i.e. you are trying to generate overwrite '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 # . 'the data you are generating the Collections from' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133 throw( 'Something is wrong, '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 . 'the Collection you have retrieved '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135 . 'does not support the method window_size' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 } ## end if ( ( @{$features} ) ...)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 # Set collection start here for 0 window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 if ( @{$features}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141 && $store_natural
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142 && !defined( $self->collection_start(0) ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 $self->collection_start( 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 $features->[0]->start + $sub_start );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 if ($in_slice) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 $start_adj = $chunk_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 # Collect features into wsize bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 if ( !$only_natural ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 # Get hashref of wsize=>bin array pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155 $bins =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 $self->_bin_features_by_Slice_window_sizes(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157 -slice => $sub_slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 -window_sizes => $chunk_windows{$chunk_length},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 -features => $features, );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 # Handle 0 wsize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 if ($store_natural) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164 foreach my $feature ( @{$features} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 $counts{0}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 if ( $bin_model eq 'SIMPLE' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 $self->collection_start( 0, $feature->start + $sub_start );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 $self->write_collection(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 $slice, # Pass Slice to sub-slice when storing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173 $feature->end + $sub_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 $feature->strand, # Need to pass strand for 0 resolution
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 $feature->scores, );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179 print "Window size 0 (natural resolution) has "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 . scalar( @{$features} )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 . " feature bins for:\t"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 . $sub_slice->name . "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 # Now store collections for wsizes >0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 my $num_bins;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 foreach my $wsize ( sort keys( %{$bins} ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 $num_bins = scalar( @{ $bins->{$wsize} } );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190 $counts{$wsize} += $num_bins;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192 if ( $bin_model eq 'SIMPLE' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 $self->write_collection(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 $wsize,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 $slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 #$sub_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197 $sub_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 $slice->strand, # This is most likely 1!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 # Override this woth 0 in descendant Collector if required.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 $bins->{$wsize}, );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 throw( 'Bio::EnsEMBL::Utils::Collector '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 . 'does not yet support non-SIMPLE bin models' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205 # i.e. More than one score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 } ## end while ($in_slice)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 # Turn off storing of natural resolution for next chunk length sets
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 $store_natural = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 } ## end foreach my $chunk_length ( ...)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 # Write last collections for each wsize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 foreach my $wsize (@$window_sizes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 if ( ( $wsize == 0 && !$store_natural )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 || ( $wsize != 0 && $only_natural ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 print "Writing final $wsize window_size collection, "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225 . "this may result in slightly different "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 . "bin numbers from counts due to removing "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227 . "overhanging bins past end of slice\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 $self->write_collection( $wsize, $slice );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 # Print some counts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232 foreach my $wsize ( sort ( keys %counts ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 print "Generated "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234 . $counts{$wsize}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 . " bins for window size $wsize for "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236 . $slice->name . "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237 # Some may have failed to store if we are projecting to a new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 # assembly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242 } ## end sub store_window_bins_by_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 =head2 _bin_features_by_Slice_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 Args[0] : Bio::EnsEMBL::Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247 Args[1] : ARRAYREF of window sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 Args[2] : ARRAYREF of features with start and end method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249 e.g. Bio::EnsEMBL::Features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 $bins =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 $self->_bin_features_by_window_sizes(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254 -slice => $slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 -window_sizes => $chunk_windows{$chunk_length},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256 -features => $features, );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258 Description: Bins feature scores for a given list of window sizes and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 predefined method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260 Returntype : HASHREF of scores per bin per window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 Caller : store_window_bins_by_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267 sub _bin_features_by_Slice_window_sizes {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268 my ( $self, @args ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270 my ( $slice, $wsizes, $features ) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 rearrange( [ 'SLICE', 'WINDOW_SIZES', 'FEATURES' ], @args );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273 # Generate these once in caller?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274 my $calc_method = '_calculate_' . $bin_method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 my $post_method = '_post_process_' . $bin_method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 # Do this conditional on the Collection type i.e. is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278 # collection seq_region blob then no else yes Would need
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279 # $Bio::EnsEMBL::Utils::Collector::collection_format=BLOB|STANDARD
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 # if ( !defined($features) || !@{$features} ) { return {} }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282 # Set up some hashes to store data by window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 my ( %bins, %nbins, %bin_counts );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284 my $slice_start = $slice->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285 my $slice_length = $slice->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 # Set up some bin data for the windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288 foreach my $wsize (@$wsizes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289 $nbins{$wsize} = int( $slice_length/$wsize ); # int rounds down
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 # nbins is index of the bin not the 'number'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291 # Unless $slice_length is a multiple!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292 if ( !( $slice_length % $wsize ) ) { $nbins{$wsize}-- }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 # Create default bins with 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295 $bins{$wsize} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 map { $bins{$wsize}->[$_] = 0 } ( 0 .. $nbins{$wsize} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298 # Set bin counts to 0 for each bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299 $bin_counts{$wsize} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 # This is adding an undef to the start of the array!?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 map { $bin_counts{$wsize}->[ ($_) ] = 0 } @{ $bins{$wsize} };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304 foreach my $bin ( @{ $bins{$wsize} } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 $bin_counts{$wsize}->[$bin] = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309 my $feature_index = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310 my ( $bin_index, @bin_masks );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312 foreach my $feature ( @{$features} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313 # Set up the bins for each window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315 foreach my $wsize (@$wsizes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 my $start_bin = int( ( $feature->start )/$wsize );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 my $end_bin = int( ( $feature->end )/$wsize );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 if ( $end_bin > $nbins{$wsize} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 $end_bin = $nbins{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323 $self->$calc_method( $feature, $start_bin, $end_bin,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324 $wsize, \%bins, \%bin_counts );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 # Now do post processing of bins if required
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 if ( $self->can($post_method) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331 $self->$post_method( \%bins, \%bin_counts );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334 return \%bins;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335 } ## end sub _bin_features_by_Slice_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336 # end sub _bin_features_by_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 ### Here follows the bin methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340 # These may also be defined in the inheriting Collector class. No tests
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341 # as these are internal and require speed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344 =head2 _calculate_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 Args[0] : feature e.g. Bio::EnsEMBL::Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347 Args[1] : int - start bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 Args[2] : int - end bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349 Args[3] : int - window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350 Args[4] : hashref - score bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 Example : $self->$calc_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 Description: Adds count to bins which this feature overlaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355 Caller : _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360 sub _calculate_count {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363 my $bin_index;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 for ( $bin_index = $start_bin; $bin_index <= $end_bin; ++$bin_index )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 $bins_ref->{$wsize}->[$bin_index]++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 =head2 _calculate_average_score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376 Args[0] : feature e.g. Bio::EnsEMBL::Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377 Args[1] : int - start bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378 Args[2] : int - end bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379 Args[3] : int - window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380 Args[4] : hashref - score bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 Example : $self->$calc_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382 Description: Adds score to bins which this feature overlaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385 Caller : _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 sub _calculate_average_score {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 $bin_counts_ref )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394 = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396 # This is simple an average of all the scores for features which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397 # overlap this bin. No weighting with respect to the bin or the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 # feature.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400 my $score = $self->get_score_by_Feature($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402 for ( my $bin_index = $start_bin;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 $bin_index <= $end_bin;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 ++$bin_index )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406 # We should really push onto array here so we can have median or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407 # mean.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409 $bins_ref->{$wsize}->[$bin_index] += $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410 $bin_counts_ref->{$wsize}->[$bin_index]++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417 =head2 _post_process_average_score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419 Args[0] : hashref - score bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420 Args[1] : hashref - count bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421 Example : $self->$post_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422 Description: Post processes bins to calculate average score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425 Caller : _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 sub _post_process_average_score {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431 my ( $self, $bins_ref, $bin_counts_ref ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433 foreach my $wsize ( keys %{$bins_ref} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434 foreach my $bin_index ( 0 .. $#{ $bins_ref->{$wsize} } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 if ( $bin_counts_ref->{$wsize}->[$bin_index] ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 $bins_ref->{$wsize}->[$bin_index] /=
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438 $bin_counts_ref->{$wsize}->[$bin_index];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448 =head2 _calculate_max_magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450 Args[0] : feature e.g. Bio::EnsEMBL::Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451 Args[1] : int - start bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452 Args[2] : int - end bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 Args[3] : int - window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454 Args[4] : hashref - score bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 Example : $self->$calc_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456 Description: Sets max +/-ve scores for bins which this feature overlaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459 Caller : _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464 sub _calculate_max_magnitude {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467 my $score = $self->get_score_by_Feature($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469 # Max magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470 # Take the highest value +ve or -ve score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471 for ( my $bin_index = $start_bin;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472 $bin_index <= $end_bin;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 ++$bin_index )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476 # We really need to capture the lowest -ve and higest +ve scores
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477 # here and post process to pick between them.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479 $bins_ref->{$wsize}->[$bin_index] ||= [ 0, 0 ]; #-ve, +ve
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481 if ( $score < $bins_ref->{$wsize}->[$bin_index]->[0] ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482 $bins_ref->{$wsize}->[$bin_index]->[0] = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483 } elsif ( $score > $bins_ref->{$wsize}->[$bin_index][1] ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484 $bins_ref->{$wsize}->[$bin_index]->[1] = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 } ## end sub _calculate_max_magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 =head2 _post_process_max_magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494 Args[0] : hashref - score bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495 Args[1] : hashref - count bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 Example : $self->$post_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497 Description: Post processes bins to pick largest +ve or -ve score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500 Caller : _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 sub _post_process_max_magnitude {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506 my ( $self, $bins_ref ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508 # Take the highest value +ve or -ve score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510 foreach my $wsize ( keys %{$bins_ref} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511 foreach my $bin_index ( 0 .. $#{ $bins_ref->{$wsize} } ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513 # Have potential for no listref in a given bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515 # default value if we haven't seen anything is 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516 # Actually want an array of -ve +ve values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518 if ( $bins_ref->{$wsize}->[$bin_index] ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519 my $tmp_minus = -$bins_ref->{$wsize}->[$bin_index]->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521 if ( $tmp_minus > $bins_ref->{$wsize}->[$bin_index]->[1] ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522 $bins_ref->{$wsize}->[$bin_index] =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523 $bins_ref->{$wsize}->[$bin_index]->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525 $bins_ref->{$wsize}->[$bin_index] =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526 $bins_ref->{$wsize}->[$bin_index]->[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534 } ## end sub _post_process_max_magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537 =head2 _calculate_RPKM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539 Args[0] : feature e.g. Bio::EnsEMBL::Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540 Args[1] : int - start bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541 Args[2] : int - end bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542 Args[3] : int - window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543 Args[4] : hashref - score bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544 Example : $self->$calc_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545 Description: Stores counts to calculate Read Per Kb per Million(RPKM)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548 Caller : _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553 sub _calculate_RPKM {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556 $self->_calculate_count( $feature, $start_bin, $end_bin,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557 $wsize, $bins_ref );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 =head2 _post_process_RPKM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565 Args[0] : hashref - score bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566 Args[1] : hashref - count bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567 Example : $self->$post_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568 Description: Post processes bins to calculate average score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571 Caller : _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576 sub _post_process_RPKM {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577 my ( $self, $bins_ref ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579 #10^9 x C / NGB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580 #C = Reads overlapping bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581 #N = Total reads in the experiment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582 #G = Length of bin in bps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 #(don't really have to account for non-ref/HAPs or gender here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584 #as should be close enough, CellTypes/gender differences will be miniscule)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585 #B = length of each bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587 foreach my $wsize(keys %{$bins_ref}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 foreach my $bin_index(0..$#{$bins_ref->{$wsize}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590 $bins_ref->{$wsize}->[$bin_index] =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591 ((10**9) *
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592 $bins_ref->{$wsize}->[$bin_index])/(($self->_RPKM_factor($wsize)) * $wsize);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601 =head2 _set_up_RPKM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603 Args[0] : hashref - method config e.g
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605 DNADB => Bio::EnsEMBL::DBSQL::DBAdaptor,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606 TOTAL_FEATURE => $total_feature_count,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609 Example : $self->$set_up_method($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610 Description: Sets the RPKM factor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612 Exceptions : Throws is required config params are not set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613 Caller : bin_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619 sub _set_up_RPKM{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620 my ($self, $config) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622 my ($dnadb, $total_features) = rearrange([ 'DNADB', 'TOTAL_FEATURES'], %{$config});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624 #Test specifically here to notify about config hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 if(! $total_features){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626 throw("For RPKM you must pass a valid 'total_features' ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627 "as part of the method config hash.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630 if(! $dnadb){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631 throw("For RPKM you must pass 'dnadb' as part of the method config hash.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634 foreach my $wsize(@{$self->window_sizes}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 #Should never have 0 here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 $self->_RPKM_factor($wsize, ($wsize * $total_features)); #N*G
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638 warn "setting $wsize RPKM factor($wsize * $total_features) to ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639 $self->_RPKM_factor($wsize);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643 } ## end sub _set_up_RPKM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646 =head2 _RPKM_factor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 Args[0] : int - RPKM factor i.e. (Total reads in the experiment *
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649 Genome length)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650 Example : $self->_RPKM_factor($wsize, $factor);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 Description: Gets/Sets the RPKM factor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654 Caller : _set_up_RPKM, _post_process_RPKM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659 sub _RPKM_factor{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 my ($self, $wsize, $factor) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662 if (! defined $wsize){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663 throw("You must pass at least window_size to get or set the RPKM factor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 if(defined $factor){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667 $self->{'RPKM_factor'}{$wsize} = $factor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669 elsif(! exists $self->{'RPKM_factor'}{$wsize}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670 #This should never happen unless the window sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671 #are redefined after initialisation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672 throw("You have requested an RPKM factor for a window_size".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673 " which has not been set:\t$wsize");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676 return $self->{'RPKM_factor'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679 =head2 get_diploid_genome_length_by_gender
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681 Args[0] : string - RPKM factor i.e. (Total reads in the experiment *
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682 Genome length)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683 Args[1] : string - gender e.g. male or female
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686 my $glength =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687 $self->get_diploid_genome_length_by_gender( $dnadb, $gender );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689 Description: Gets the gender specific diploid genome length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690 including non-ref but not including haplotypes. Only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691 handles species with X/Y sex chromosomes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 Returntype : int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 Caller : _set_up_RPKM, _post_process_RPKM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695 Status : At Risk - Move to and export from generic Utils Slice module???
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699 sub get_diploid_genome_length_by_gender {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700 my ( $dnadb, $gender ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 my %sex_chrs = ( 'Y' => 'male',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703 'X' => 'female', );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 my $dip_length = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707 if (!(( ref($dnadb) && $dnadb->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708 && $dnadb->grou() eq 'core'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709 && ( defined $gender && $gender =~ /(male|female)/ ) ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711 throw( "Must provide valid "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712 . "Bio::EnsEMBL::DBSQL::DBAdaptor($dnadb) and "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713 . "gender ($gender) arguments" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716 my @ref_slices = $dnadb->get_SliceAdaptor->fetch_all('toplevel');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718 # Include non-ref(unassembled), but omit haps/lrgs(i.e. redundant)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720 foreach my $slice (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721 @{ $dnadb->get_SliceAdaptor->fetch_all( 'toplevel', undef, 1, 1 ) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724 # Include duplicated region for true diploid length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726 # Skip haps/lrgs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727 if ( ( $slice->coord_system->name() eq 'chromosome'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728 && !$slice->is_reference() )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729 || $slice->coord_system->name() eq 'lrg' )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734 if ( exists( $sex_chrs{ $slice->seq_region_name() } ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735 if ( $gender eq 'male' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 $dip_length += $slice->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737 } elsif ( $sex_chrs{ $slice->seq_region_name } eq 'male' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742 $dip_length += 2*$slice->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745 return $dip_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746 } ## end sub get_diploid_genome_length_by_gender
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749 1;