annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/Collector.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: Collector.pm,v 1.7 2011/01/10 11:27:34 nj1 Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Copyright (c) 1999-2011 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 developers list at <ensembl-dev@ebi.ac.uk>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 #Your Bio::Ensembl::Collection::Feature defs module should inherit from here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 #This could be a local defs file which you have created and require'd into your script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 #If your collections defs module refers to a Bio::EnsEMBL::Feature,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 #then it's adaptor should inherit from the collections defs module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 package Bio::EnsEMBL::Funcgen::Collector;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 #Move this to Bio::EnsEMBL::Utils::Collector for 59?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 use Bio::EnsEMBL::Utils::Argument ('rearrange');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 use Bio::EnsEMBL::Utils::Exception ('throw');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 use Bio::EnsEMBL::Funcgen::ResultFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 #use base('Bio::EnsEMBL::Collection');#ISA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 our ($pack_template, $packed_size, @window_sizes); #These get set in the FeatureAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 #Make these constants and remove setter functionality in methods?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 #Only really important for pack template and windows, maybe these if we are going to start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 our $max_data_type_size = 16777216; #Default is 16MB for long blob
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 #we need to deduct the size of the rest of the record here!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 #For a 2byte packet the smallest window size possible is:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 #(slice->length/(16777216/2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 #so int(bin_size)+1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 #Obviously have to use the largest slice here, for human chr1:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 #249,250,621/(16777216/2) = 29.7???
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 #We may need to up this slightly to account for larger chrs?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 #Implications on memory usage? Is it 4 times for blob manipulation?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 #Does substr require this manipulation?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 #This max_allowed_packet_size does not seem to translate directly to the size of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 #data being stored e.g. quite a bit more is needed. ISG haven't got to the bottom of this yet.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 #But have simply upped the config to 67108864 to handle the largest human chr.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 our $max_view_width = 500000;#Max width in Region In Detail;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 #our %VALID_BINNING_METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 #Remove this in favour of can->('calculate_.$method) and coderefs?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 #To do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 # 1 DONE Merge in Collection code, (no need to do this, removed inheritance)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 # 2 Write simple BED input to flat file output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 # 3 Separate store method so we can simply get, then wrap store around this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 # 4 Test get method with slice adjusts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 # 5 separate set_config?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 # 6 optimise generate_bin_chunks to handle just one window size for display?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 # 7 Handle packed_size pack_template as methods constants
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 # 8 Provide override method in basefeature adaptor which will use package constant in feature adaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 # This is because these are really adaptor config, the collector only needs to know the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 # packed_size, and in the absence of an feature adaptor also provides the default methods for both.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 # If we substr in the API then we need to set sensible limits on blob size, otherwise we will have to unpack a lot of data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 # to get at the slice we want.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 # OR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 # Change adaptor to substr in DB based on known blob ranges/window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 # and stitch together any which cross boundaries. This depends on speed of substr at end of large blob TEST!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 # Load with current code first and test this before making either change!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 # Delete empty (non-0) collections? i.e. For seq_regions which do not have any source features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 # 9 Handle PAR/HAP regions using fetch_normalised_slice_projections This has to be done in the feature adaptor! Then restrict to non_dup regions in calling script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 Example : my $collector = Bio::EnsEMBL::(Funcgen|Compara|Variation::)Collector::FEATURE->new;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 $collector->store_windows_by_Slice($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 Description: Simple new method to enable use of collector when not inherited by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 a descendant of Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 Returntype : Bio::EnsEMBL::Funcgen::Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 Caller : Collector script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 sub new{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 return bless {}, $_[0];#Simple blesses this class as an empty hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 #Do not set anything here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 #As will not be first in ISA for feature adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 #Hence not guaranteed to be called
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 #Setter/Getter methods if we don't have a dedicated Collector module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 #to set the package variables in? Also to allow overriding of defaults.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 #This can be used by the write_collection method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 #to determine when to build and store a compressed collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 #Effectively the max size of the data type you are using to store
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 #a compressed score. defaults to max for long blob 16MB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 #Generic method, but only ever called by write_collection in descendant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 sub new_assembly{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my ($self, $new_ass) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 if($new_ass){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 #Validate new assm to project to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $self->{'new_assembly'} = $new_ass;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 return $self->{'new_assembly'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 sub max_data_type_size{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my ($self, $size) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 #Validate is sensible integer?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 if($size && ! int($size)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 throw("max_data_type_size must be a integer of bytes, not $size");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 elsif($size){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 $self->{'max_data_type_size'} = $size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 elsif(! defined $self->{'max_data_type_size'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 #default set at head of this module or in descendant Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 $self->{'max_data_type_size'} = $Bio::EnsEMBL::Funcgen::Collector::max_data_type_size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 return $self->{'max_data_type_size'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 sub max_view_width{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 my ($self, $size) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 #Validate is sensible integer?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 if($size && ! int($size)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 throw("max_view_width must be a integer, not $size");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 elsif($size){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 $self->{'max_view_width'} = $size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 elsif(! defined $self->{'max_view_width'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 #default set at head of this module or in descendant Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $self->{'max_view_width'} = $Bio::EnsEMBL::Funcgen::Collector::max_view_width;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 return $self->{'max_view_width'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 sub bins_per_record(){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 #$collector_class::bins_per_record = ($collector_class::max_data_type_size/$collector_class::packed_size);#This should be done dynamically as we may redefine either of these variables?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 my ($self) = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 return int($self->max_data_type_size/$self->packed_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 #The defaults for these should be defined in the feature/format specific Collector descendant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 #either by specifying the package variables or using config attrs to set methods?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 #general config should be parsed here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 #rename bin_method?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 sub bin_method{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my ($self, $method) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 if($method || ! $self->{'bin_method'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 if($method){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 $self->{'bin_method'} = $method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 #should test can here? or validate versus hash?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 elsif(! $self->{'bin_method'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 if (! defined $Bio::EnsEMBL::Funcgen::Collector::bin_method){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 throw('Must pass a bin_method in the config or define $Bio::EnsEMBL::Funcgen::Collector::bin_method in your Collector');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 $self->{'bin_method'} = $Bio::EnsEMBL::Funcgen::Collector::bin_method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 #or current validate method if we are keeping the method in the if/else block
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 #if(! $self->can("calculate_${method}"))){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 #throw("$method is no a valid a valid binning method");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 return $self->{'bin_method'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 #We could replace this with a hash of bin_methods and models?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 #This could then be used to validate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 #Altho if we are going to commodotise the bin methods, then we need to be able to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 #define this in the child Collector. Could still do this by modifying the method/model
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 #hash from the child Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 sub bin_model{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 my ($self, $bin_model) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 if($bin_model || ! $self->{'bin_model'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 if($bin_model){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 $self->{'bin_model'} = $bin_model;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 elsif(! $self->{'bin_model'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 #Set as global constant defined in descendant Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 if (! defined $Bio::EnsEMBL::Funcgen::Collector::bin_model){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 throw('Must pass -bin_model in the config or define $Bio::EnsEMBL::Funcgen::Collector::bin_model in your Collector');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 $self->{'bin_model'} = $Bio::EnsEMBL::Funcgen::Collector::bin_model;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 #Need to validate bin models here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 throw('Bio::EnsEMBL::Funcgen::Collector does not yet support non-SIMPLE bin models') if $self->{'bin_model'} ne 'SIMPLE';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 return $self->{'bin_model'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 #This can be overridden by adaptor method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 #At present this could cause problems as we can pass window sizes in the config, but they will never be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 #as adaptor method is not a setter. Adaptor method should throw if we try and set them as this could cause problems when fetching and not knowing the custom sizes?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 sub window_sizes{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 my ($self, $sizes) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 if($sizes || ! $self->{'window_sizes'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 if($sizes){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $self->{'window_sizes'} = $sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 else{#! $self->{'windows_sizes'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 if (! @window_sizes){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 throw('Must pass -windows_sizes in the config or define @Bio::EnsEMBL::Funcgen::Collector::window_sizes in your Collector');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 @{$self->{'window_sizes'}} = @window_sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 if(ref($self->{'window_sizes'}) ne 'ARRAY' ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 scalar(@{$self->{'window_sizes'}}) == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 throw('window_sizes must be an arrayref of at least one window size');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 return $self->{'window_sizes'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 #Optional attrs dependant on whether Collection is packed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 #Can be redefined in the adaptor but becareful never to redefine the actual values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 #As these should really be constants for a given Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 #What is best here? We only want pack methods for storing/fetching compressed collections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 #Move this to base feature adaptor and define attrs as constants using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 #package variable? Or directly in new?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 #Then direct modification will be caught.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 #Just leave here for now.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 #Caller _obj_from_sth/store
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 sub pack_template{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 my ($self, $template) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 if($template){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $self->{'pack_template'} = $template;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 elsif(! $self->{'pack_template'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 #Set as global constant defined in descendant Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 if (! defined $Bio::EnsEMBL::Funcgen::Collector::pack_template){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 throw('Must pass a per score pack_template in the config or define $Bio::EnsEMBL::Funcgen::Collector::pack_template in your Collector');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 $self->{'pack_template'} = $Bio::EnsEMBL::Funcgen::Collector::pack_template;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 return $self->{'pack_template'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 #Caller _obj_from_sth/store & current_packed_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 sub packed_size{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 my ($self, $size) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 if($size){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 if(! int($size)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 throw("$size is not an integer, must pass a size integer for packed_size which specifies size of pack_template:\t".$self->pack_template);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 $self->{'packed_size'} = $size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 elsif(! $self->{'packed_size'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 #Set as global constant defined in descendant Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 if (! defined $Bio::EnsEMBL::Funcgen::Collector::packed_size){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 throw('Must pass a packed_size(wrt to pack_template) in the config or define $Bio::EnsEMBL::Funcgen::Collector::packed_size in your Collector');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 $self->{'packed_size'} = $Bio::EnsEMBL::Funcgen::Collector::packed_size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 return $self->{'packed_size'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 #These methods are used by the descendant Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 #For caching infor whilst building collections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 #This is used to log how big a collection has grown before storing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 sub current_packed_size{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 my ($self, $wsize) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 #$self->{'current_packed_size'}{$wsize} ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 #if(defined $cps){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 # $self->{'current_packed_size'}{$wsize} = $cps;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 # else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 # return $self->{'current_packed_size'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 return (scalar(@{$self->score_cache($wsize)})*$self->packed_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 sub score_cache{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 my ($self, $wsize, $scores) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 $self->{'score_cache'}{$wsize} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 if(defined $scores){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 push @{$self->{'score_cache'}{$wsize}}, @{$scores};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 #Do this here to stop passing the ref everytime
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 #Will this be faster?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 #Would certainly be faster if we were not returning a ref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 return $self->{'score_cache'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 #These last methods are only used for the 0 wsize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 #natural resolution and ar wrt the orig_slice passed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 #to store_windows_by_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 sub collection_start{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 my ($self, $wsize, $sr_start) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 if(defined $sr_start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 $self->{'collection_start'}{$wsize} = $sr_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 return $self->{'collection_start'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 sub collection_end{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 my ($self, $wsize, $sr_end) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 if(defined $sr_end){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 $self->{'collection_end'}{$wsize} = $sr_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 return $self->{'collection_end'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 sub collection_strand{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 my ($self, $wsize, $strand) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 if(defined $strand){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 $self->{'collection_strand'}{$wsize} = $strand;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 return $self->{'collection_strand'}{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 sub _create_feature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 my ( $this, $feature_type, $args ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 my $feature = $this->SUPER::_create_feature( $feature_type, $args );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 if ( !$this->_lightweight() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 my ( $phase, $end_phase, $stable_id, $version, $created_date,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 $modified_date, $is_current )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 = rearrange( [ 'PHASE', 'END_PHASE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 'STABLE_ID', 'VERSION',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 'CREATED_DATE', 'MODIFIED_DATE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 'IS_CURRENT'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 ],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 %{$args} );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 push( @{$feature},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 $phase, $end_phase, $stable_id, $version, $created_date,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 $modified_date, $is_current );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 return $feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 sub _create_feature_fast {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 my ( $this, $feature_type, $args ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 my $feature =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 $this->SUPER::_create_feature_fast( $feature_type, $args );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 return $feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 #This might not be sensible for Features which are split across tables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 sub _tables {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 my ($this) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 my @tables = $this->SUPER::_tables();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 if ( $this->_lightweight() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 return ( $tables[0] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 return @tables;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 sub _columns {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 my ($this) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 my @columns = $this->SUPER::_columns();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 if ( $this->_lightweight() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 #What is this doing?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 #Probably not sensible for ResultFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 @columns[ 5 .. $#columns ] = map( 1, 5 .. $#columns );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 return @columns;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 #Also not sensible for objects spread across several tables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 sub _default_where_clause {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 my ($this) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 if ( $this->_lightweight() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 return $this->SUPER::_default_where_clause();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 #This need to be generic
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 #Again we need to pass an accessor method/reference?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 #Will be some sort of generic fetch for feature adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 #or while loop for in flat file accessor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 #rollback to be handled in caller?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 # To do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 # 1 Allow variable chunks lengths (so we only have one resolution of windows?)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 # This will allow SNP collections which currently define classification i.e colour
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 # Density of SNPs within window will define shading. Count will be displayed in zmenu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 # This maybe something we have to do in the descendant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 # 2 Implement collection param definition in/from descendant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 # return collection config from adaptor fetch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 # window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 # fixed width?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 # render/collection style?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 # This chould be implemented in BaseFeatureAdaptor::generic_fetch?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 # Or could be done in the calling fetchmethod?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 #need to change this to get_window_bin_by_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 #to enable generating bins on uncompressed data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 #Need to remove all counts and store based code to store caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 #this would mean removing any pack based code too
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 #separate set_config method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 #Probelm here is size of slice?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 #We need to generate bins all in one go, but also need to store at interval
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 #so as not to explode memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 #Do we need to separate the window generation from the bin generation code?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 #Define the optimal way to generate windowed data by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 #finding the most common denominator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 sub _define_window_chunks{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 my ($self, $window_sizes, $max_view_size) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 ### DEFINE CHUNKS WRT WINDOWS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 #Shortcut for on the fly uncompressed collection retrieval
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 #if(scalar(@wsizes) = 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 #else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 #Calulate sensible slice length based on window sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 my @wsizes = sort {$a <=> $b} @$window_sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 #We need a default when only calculating 0 resolution
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 #Will binning code work with only 0 resolution?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 if((scalar(@wsizes) == 1) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 $wsizes[0] == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 return { $self->max_view_width => [0] };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 my $multiplier = int($max_view_size/$wsizes[$#wsizes]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 my $chunk_length = $multiplier * $wsizes[$#wsizes];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 my $not_divisible = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 my %chunk_windows;#Registry of chunk lengths to run with windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 my %workable_chunks = map {$_ => {}} @wsizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 delete $workable_chunks{'0'};#get rid of natural resolution as this will always work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 while($not_divisible && $chunk_length != 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 $not_divisible = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 foreach my $wsize(@wsizes){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 next if $wsize == 0;#Special wsize for normal data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 #Set not divisible if modulus is true
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 if($chunk_length % $wsize){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 $not_divisible = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 #No need to listref here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 $workable_chunks{$wsize}{$chunk_length} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 #warn "chunk length is $chunk_length and not_divisible is $not_divisible";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 #Gradually shrink the length until we find a workable slice length for all windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 $chunk_length -= $wsizes[$#wsizes] if $not_divisible;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 my %chunk_sets;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 if($chunk_length == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 print "Could not find chunk length for all window sizes, attempting to subset windows using alternate slice length\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 foreach my $wsize(keys %workable_chunks){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 #Loop through windows, seeing if they are workable in the other windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 foreach my $chunk(keys %{$workable_chunks{$wsize}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 foreach my $other_wsize(keys %workable_chunks){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 next if $wsize == $other_wsize;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 if(exists $workable_chunks{$other_wsize}{$chunk}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 #only push it onto the other wsize, as we will do the reverse later
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 $chunk_sets{$chunk}{$wsize} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 #Now we have a register of co-occurence of wsizes with repect to chunks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 #Loop through finding the least amount of sets with the longest chunk length?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 #There is no way to decide which is best?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 #we could calculate the number of loops? Factored by the chunk length?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 #Let's just print out and see what we get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 #warn "chunk sets are :\n".Data::Dumper::Dumper(\%chunk_sets);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 #For now let's just take the one which has the most windows and the longest chunk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 #Then we just get the largest which handles the rest.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 #define possible set lengths
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 my %set_lengths;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 map {$set_lengths{$i} = []; $i++} @wsizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 delete $set_lengths{'0'};#get rid of natural resolution as this will always work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 #store chunks lengths for each set size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 foreach my $chunk(keys %chunk_sets){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 my $set_size = scalar(values %{$chunk_sets{$chunk}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 push @{$set_lengths{$set_size}}, $chunk;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 #Now we get the biggest set with the longest length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 my $largest_size = scalar(@wsizes);#scalar here as we are disregarding natural resolution of 0 in loop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 my $found_largest_set = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 while(! $found_largest_set){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 $largest_size--;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 if(scalar(@{$set_lengths{$largest_size}}>0)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 $found_largest_set = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 #We should be able to loop this bit, to find all the biggest sets.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 my ($largest_chunk) = sort {$b<=>$a} @{$set_lengths{$largest_size}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 #we could even be selective here, but let's just take the first one for now
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 my @largest_windows = keys %{$chunk_sets{$largest_chunk}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 @{$chunk_windows{$largest_chunk}} = @largest_windows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 print "Largest chunk $largest_chunk($largest_size) contains windows: @largest_windows\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 my %remaining_windows = map {$_ => {}} @wsizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 delete $remaining_windows{'0'};#get rid of natural resolution as this will always work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 map { delete $remaining_windows{$_} } @largest_windows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 my $remaining_set_size = scalar(keys %remaining_windows);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 #swapping to array here for practicality, would need to maintain hash if we need to iterate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 my @rwindows = keys %remaining_windows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 #This could just be one window, but this will not be inthe co-occurence hash %chunk_sets
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 #Hence the normal approach will not work. and we just want to find a suitably large chunk for this one window.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 my $next_chunk;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 if(scalar(@rwindows) == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 #we just want to find a suitably large chunk for this one window.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 my ($last_window) = @rwindows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 $multiplier = int(500000/$last_window);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 $next_chunk = $multiplier * $last_window;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 #Now were are doing something very similar to above
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 #populating a set_size chunk length registry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 #my %seen_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 foreach my $chunk(sort {$b<=>$a} @{$set_lengths{$remaining_set_size}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 my $seen_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 foreach my $rwindow(@rwindows){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 $seen_count++ if grep/$rwindow/, (values %{$chunk_sets{$chunk}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 if ($seen_count == $remaining_set_size){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 $next_chunk = $chunk;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 @{$chunk_windows{$next_chunk}} = @rwindows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 if($next_chunk){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 print "Found next chunk length $next_chunk contains remaining windows:\t@rwindows\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 #Now we want to cycle through all the set lengths which could contain the ones not in the first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 #so we need to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 warn "Need to write iterative sub for set definition";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 throw('Could not find workable slice length for remaining windows: '.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 join(', ', @rwindows));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 @{$chunk_windows{$chunk_length}} = keys(%workable_chunks);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 print "Found workable chunk length($chunk_length) for all window sizes:\t".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 join(' ', @{$chunk_windows{$chunk_length}})."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 return \%chunk_windows;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 #Let's concentrate on store function first before we split out into store and fetch methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 #How will this work with the Bed parser?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 #The descendant collector will sort the input and detect the current slice before calling
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 #store_window_bins_by_Slice. This may require some caching of line or seeking as we will see the next slice before we have a chance to set it.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 #This will store as ResultFeature collections, so maybe we need to separate the input from output code?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 #i.e. Bed parser/wrapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 # ResultFeatureAdaptor wrapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 #These
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 #Problem with passing window_sizes here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 #We need to check that they aren't already defined a class variables as this could potentially
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 #screw up retrieval, expect for only 0 or all but 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 #Should we remove this config and force the class variable to be set in the 'adaptor'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 #Method is then only used internally, make private or only getter? Set by changing class vars?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 sub store_window_bins_by_Slice{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 my ($self, $slice, %config) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 my ($window_sizes, $logic_name, $bin_method, $fetch_method_ref, $max_view_width,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 $max_data_type_size, $pack_template, $packed_size, $bin_model, $new_assm, $skip_zero_window) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 rearrange( [ 'WINDOW_SIZES', 'LOGIC_NAME', 'BIN_METHOD', 'FETCH_METHOD_REF', 'MAX_VIEW_WIDTH', 'MAX_DATA_TYPE_SIZE', 'PACK_TEMPLATE', 'PACKED_SIZE', 'BIN_MODEL', 'NEW_ASSEMBLY', 'SKIP_ZERO_WINDOW'], %config );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 warn "Need to be careful here about cleaning start end strand caches between serially run slices";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 ### VAILDATE VARS/CONFIG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 #This could be done once in set_config, could then remove setter bahviour from attr methods?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 #All default defs params/methods can be overridden by config params
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 #Attrs used in this method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 $bin_method = $self->bin_method($bin_method);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 $bin_model = $self->bin_model($bin_model);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 #$window_sizes = $self->window_sizes($window_sizes);#Now done below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 #Set to undef if we ave empty array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 $window_sizes = undef if (ref($window_sizes) eq 'ARRAY' && scalar(@$window_sizes) == 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 #Attrs used in other (store) methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 $self->pack_template($pack_template);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 $self->packed_size($packed_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 $self->max_data_type_size($max_data_type_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 $self->max_view_width($max_view_width);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 #Other vars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 $self->new_assembly($new_assm);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 #Need to validate slice here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 warn "temp hack for bin_method validation";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 $bin_method = $self->validate_bin_method($bin_method);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 ### Set window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 if($self->new_assembly){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 print "Assembly projection may cause problems for large Collections, defaulting to window_sizes = (0)\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 #Then build the bins on the projected 0 level single ResultFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 #Test we haven't explicity set window_sizes to be soemthing else
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 if($window_sizes &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 ! ( scalar(@$window_sizes) == 1 && $window_sizes[0] == 0)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 throw("You have set window_sizes config which are not safe when projecting to a new assembly($new_assm), please omit window_sizes config or set to 0");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 $window_sizes = $self->window_sizes([0]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 if($window_sizes && $skip_zero_window && grep/^0$/,@$window_sizes){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 throw("You have specied skip_zero_window and window_size 0 in your config, please remove one of these");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 elsif($window_sizes && ! grep/^0$/,@$window_sizes){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 $skip_zero_window = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 unshift @$window_sizes, 0;#re-add 0 window as we need this to build the collections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 $window_sizes = $self->window_sizes($window_sizes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 #This is already done in the script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 if($skip_zero_window && $new_assm){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 throw("You cannot -skip_zero_window or omit 0 from -window_sizes when projecting to a new assembly($new_assm) which should only be generated using window_size=0");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 ### Rollback previously stored features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 if($self->can('rollback_Features_by_Slice')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 $self->rollback_Features_by_Slice($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 #This is currently the only warn output we can't get rid off
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 warn ref($self)." cannot rollback_Features_by_Slice. This may result in duplicate Collections being stored if there is pre-existing data";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 ### PROCESS CHUNKS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 #Not lightweight as we will be storing them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 # Temporarily set the collection to be lightweight???
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 #my $old_value = $this->_lightweight();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 #if ( defined($lightweight) ) { $this->_lightweight($lightweight) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 #else { $this->_lightweight(1) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 my %chunk_windows = %{$self->_define_window_chunks($self->window_sizes, $self->max_view_width)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 my (%counts, $store_natural);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 $store_natural = grep/^0/, @$window_sizes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 $counts{0}=0;#Set natural res count to 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 my $slice_end = $slice->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 my $orig_slice = $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 my $orig_start = $slice->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 #my $slice_adj = $slice->start - 1;#Removed this as we are now generating features local to orig_slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 #start/end conversion will be done in write/store_collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 my $region = $slice->coord_system_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 my $version = $slice->coord_system->version;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 my $seq_region_name = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 my $strand = $slice->strand;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 my $only_natural = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 #my $slice_adj = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 #We need to account for only 0 here when doing projection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 #The chunk window is set to max_view_widht in _define_chunk_windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 $only_natural = 1 if $store_natural && scalar(@$window_sizes) == 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 $store_natural = 0 if $skip_zero_window;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 #SHould really test these two, but should already be caught by now
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 #Set the initial collection_start to orig_start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 #Could default to 1, but we may not be starting from 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 #This is not the case for 0 wsize where it must always be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 #The first feature start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 for my $wsize(@{$self->window_sizes}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 next if $wsize == 0;# && $skip_zero_window;#We never want to assume start of 0 window collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 $self->collection_start($wsize, $orig_start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 foreach my $chunk_length(sort keys %chunk_windows){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 print "Processing windows ".join(', ', @{$chunk_windows{$chunk_length}}).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 " with chunk length $chunk_length\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 map $counts{$_} = 0, @{$chunk_windows{$chunk_length}}; #Set window counts to 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 #Now walk through slice using slice length chunks and build all windows in each chunk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 my $in_slice = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 my $start_adj = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 my ($sub_end, $features, $bins);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 my $sub_start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 my $slice_length = $slice->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 #Can we subslice and then exclusivly use bin_start(local to orig_slice)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 #Then we never have to deal with sr coord until we store
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 #This should me we never have to do the sr conversion unless we
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 #use a slice which doesn't start at 1(PAR or test)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 #Always create in local coords for fetch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900 #Then change to seq_region coords for store if required
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 while($in_slice){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 #$sr_start = $slice_start + $start_adj;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 $sub_start += $start_adj;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 #$slice_start = $sr_start;#Keep for next slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 #$sr_end = $sr_start + $chunk_length - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 $sub_end = $sub_start + $chunk_length - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 #Last chunk might not be the correct window length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 #Hence why we should do this on whole chromosomes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 if($sub_end >= $slice_length){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 #$sub_end = $slice_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 #No longer set to slice end, as we don't want to corrupt the bin definition?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 #Surplus bins are removed in store/write_collection in caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 #We could simply add the largest window the the end of the slice?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 #Then we will only build the minimum of excess bins?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 #This should be okay for bin calcs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 #But may screw up bin trimming in caller as we currently expect $ub_end to be a valid bin end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 #for all wsizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 #bin trimming should handle this, but this will corrupt the bin definition???
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 #bin definition is depedant on method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 #So this method need to be agnostic
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 #And deal with the rest in descendant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 $in_slice = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 $slice = $slice->adaptor->fetch_by_region($region, $seq_region_name, ($sub_start + $orig_start -1), ($sub_end + $orig_start - 1), $strand, $version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 #Can't subslice as this will not clip if we go over the length of the slice, unlike normal slice fetching
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 #hence we cannot rely on this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 #$slice = $orig_slice->sub_Slice($sub_start, $sub_end, $orig_slice->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 #warn "got sub slice $slice as $sub_start - $sub_end from ".$orig_slice->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 ### Grab features and shift chunk coords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 #features may already be a 0 wsize collection if we have projected from an old assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 #Could move this check to get_Features_by_Slice?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 #e.g. [ $features, \%config ]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 $features = $self->get_Features_by_Slice($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 #next if scalar(@$features) == 0;#We want to store values for all windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 if( (@$features) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 (ref($features->[0]) =~ /Bio::EnsEMBL::Funcgen::Collection/) ){#Change to isa 'Bio::EnsEMBL::Collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 #Check that the returned feature/collections support window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 if($features->[0]->can('window_size')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 if($features->[0]->window_size != 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 throw("You are trying to generated Collections from a non-zero window sized Collection:\t".$features->[1]->{'window_size'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 #This should never happen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 if(! $skip_zero_window){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 throw('You have retrieved data from a Collection which without using -skip_zero_window i.e. you are trying to generate overwrite the data you are generating the Collections from');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 throw('Something si wrong, the Collection you have retrieved does not support the method window_size');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 #Set collection start here for 0 window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 if(@$features && $store_natural && ! defined $self->collection_start(0)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 $self->collection_start(0, ($features->[0]->start + $sub_start));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 $start_adj = $chunk_length if($in_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 #This should return a hash of window size => bin array pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 if(! $only_natural){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 $bins = $self->_bin_features_by_window_sizes(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 -slice => $slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 -window_sizes => $chunk_windows{$chunk_length},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 -bin_method => $bin_method,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 -features => $features,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 #my $bin_start = $sr_start + $slice_adj;#This was only required for storing individual bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 #Could calc bin_start + slice_adjust ahere for all features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 #Doing this will break old code for single window collections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 #This is sr start and should be local to orig_slice!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 #We need to handle strandedness of slice!?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 #Store all normal features in result_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 if($store_natural){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 foreach my $feature(@$features){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 $counts{0}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 #warn "storing ".join(', ', ($feature->start, $feature->end, $feature->strand, $feature->scores->[0]));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 #Should we handle bin trimming here for overhanging slices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 #Then counts wil be correct and wont have to do in caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 #We could stop here if the feature seq_region start > orig_slice end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 #Current done in write/store_collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 #This may mean working in seq_region values rather than slice values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 #write_collection is implemented in descendant e.g. Bio::EnsEMBL::Funcgen::Collector::ResultFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 #as wrapper to adaptor store method or print to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 #These params need to be generated in a way defined by the descendant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 if($bin_model eq 'SIMPLE'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 #We need to pass the slice with this so we can sub slice when storing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 #the collection and set the start/end to 1 and length of slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 #we still need to store the first start to be able to sub slice correctly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 $self->collection_start(0, ($feature->start + $sub_start));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 #Need to pass strand for 0 resolution
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 $self->write_collection(0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 $orig_slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 #These are now wrt orig_slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 #($feature->start + $sub_start),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 ($feature->end + $sub_start),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 $feature->strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 $feature->scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 #We can have problems here if the original score type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048 #does not match the collected score type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 #For max magnitude this is not an issue
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 #as we take the larget value from the bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 #But for other methods this may not be true
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 #e.g. count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 #Hence, if we want to preserve the 0 window
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 #We must account for this in the feature collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 #e.g. set_collection_defs_by_ResultSet_window_size?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056 #Just omit 0 window for reads
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 print "Window size 0 (natural resolution) has ".scalar(@{$features})." feature bins for:\t".$slice->name."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 #Now store bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 # my ($bin_end, $bin_scores);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 my $num_bins;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 foreach my $wsize(sort keys %{$bins}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 $num_bins = scalar(@{$bins->{$wsize}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 #warn "$num_bins bin scores for $wsize:\t".join(',', @{$bins->{$wsize}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 #Should we handle bin trimming here for overhanging slices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 #Then counts wil be correct and wont have to do in caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076 $counts{$wsize}+= $num_bins;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 #We don't need this loop for collections as we can simply push all the scores at once
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 #Just use the slice start and end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 if($bin_model eq 'SIMPLE'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 $self->write_collection($wsize,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 $orig_slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 #$sub_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 $sub_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088 $orig_slice->strand,#This is most likely 1!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 #Override this woth 0 in descendant Collector if required.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 $bins->{$wsize},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 throw('Bio::EnsEMBL::Funcgen::Collector does not yet support non-SIMPLE bin models');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 #i.e. More than one score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 # #Reset start and end for new wsize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 # $bin_start = $slice->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 # $bin_end = $slice->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 # #We don't need this loop for collections as we can simply push all the scores at once
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 # foreach my $bin_index(0..$#{$bins->{$wsize}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 # #default method to handle simple fixed width bin?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 # #bin_end need to be defined dependant on the bin type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 # #($bin_start) = $self->process_default_bin($bins->{$wsize}->[$bin_index], $wsize);#?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 # #either define default bin method in descendant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 # #Or can we set a process_bin_method var?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 # #No just pass all this info to write collection and handle it there?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 # #Can we have just predefined rotueines handling different bin types?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 # #Simple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 # #Simple compressed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 # #Clipped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 # #This will prevent hanving to make attrs/method for storing persistent start/end/score info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 # #Need validate bin_type method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133 # #Could convert these to numbers for speed as with binning methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135 # if($bin_model eq 'SIMPLE'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 # $bin_scores = $bins->{$wsize}->[$bin_index];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 # warn "bin scores is $bin_scores";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142 # #next if ! $bin_score;#No we're no inc'ing the start ends for bins with no scores
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 # $bin_end += $wsize;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 # #if($bin_score){#Removed this as we always want to write the score even if it is 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 # #This is a little backwards as we are generating the object to store it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 # #If we are aiming for speed the maybe we could also commodotise the store method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150 # #store by args arrays? store_fast?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 # #Speed not essential for storing!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 # #Note: list ref passed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155 # #Don't need to pass all this info for fixed width blob collections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 # #Need to write some default handlers depedant on the collection type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157 # #Simple(original)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 # #Simple compressed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 # #Multi compressed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 # #Clipped uncompressed?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 # $self->write_collection($wsize,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164 # $orig_slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 # ($bin_start + $slice_adj),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 # ($bin_end + $slice_adj),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 # $orig_slice->strand,#This is most likely 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 # $bin_scores,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 # );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 # #Only count if we have a stored(projected?) feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 # $count++;#Change this to attr/method?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173 # #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 # $bin_start += $wsize;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177 # else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 # throw('Bio::EnsEMBL::Funcgen::Collector does not yet support non-SIMPLE bin models');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 #warn "Window size $wsize has ".scalar(@{$bins->{$wsize}})." bins";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 #$counts{$wsize}+= $count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187 $store_natural = 0; #Turn off storing of natural resolution for next chunk length sets
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190 #Now need to write last collections for each wsize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192 foreach my $wsize(@{$self->window_sizes}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 next if $wsize == 0 && ! $store_natural;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 next if $wsize != 0 && $only_natural;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197 print "Writing final $wsize window_size collection, this may result in slightly different bin numbers from counts due to removing overhanging bins past end of slice\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 $self->write_collection($wsize, $orig_slice);#store last collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 #Print some counts here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 foreach my $wsize(sort (keys %counts)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205 print "Generated ".$counts{$wsize}." bins for window size $wsize for ".$orig_slice->name."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 #Some may have failed to store if we are projecting to a new assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 #Need collection count here too, but would need methods for this?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 #Return this counts hash so we can print/log from the caller, hence we don't print in here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 =head2 _bin_features_by_window_sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 Args[0] : Bio::EnsEMBL::Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 Args[1] : ARRAYREF of window sizes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 Args[2] : int - bin method, currently defined by validate_bin_methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 Args[3] : ARRAYREF of Bio::EnsEMBL::Features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 Example : $bins = $self->_bin_features_by_window_sizes(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 -slice => $slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225 -window_sizes => $chunk_windows{$chunk_length},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 -bin_method => $bin_method,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227 -features => $features,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229 Description: Bins feature scores for a given list of window sizes and predefined method number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230 Returntype : HASHREF of scores per bin per window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 Exceptions : Throws if bin method not supported
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232 Caller : store_window_bins_by_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 #To do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239 # 1 Remove Bio::EnsEMBL::Feature dependancy? Or just create Features for non adaptor Collectors.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 # Is there a way we can skip the object generation in the adaptor completely and just
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 # pass the values we need?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242 # 2 Separate methods, so we can define custom methods in descendants?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243 # 3 Expand %bins model to optionally be one of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 # the following dependant on binning method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245 # Simple: fixed width containing arrays of scores for each window
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 # Multi: fixed width containing multiple arrays of scores for each window
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247 # Non-simple?: Separate aggregated features, either fixed width or not, not BLOB!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 # Clipped: default fixed width with option to clip start and end. Needs start/end attrs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249 # Can't store this in a blob due to non-standard start ends?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 # Most likely want more than one score here? Count/Density SNPs?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251 # Removes data skew from standard window bins, would need to store each bin and post
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 # process. Or do in line to avoid 2nd post-processing loop,requires awareness of when
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 # we have moved to a new bin between features. This holds for overlapping and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254 # non-overlapping features. Once we have observed a gap we need to clip the end of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 # last bin and clip the start of the new bin. This requires knowing the greatest end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256 # values from the last bin's feature. what if two overlapping features had the same
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 # start and different end, would we see the longest last? Check default slice_fetch sort
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 sub _bin_features_by_window_sizes{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260 my $this = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261 my ( $slice, $window_sizes, $method, $features ) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 rearrange( [ 'SLICE', 'WINDOW_SIZES', 'BIN_METHOD', 'FEATURES' ], @_ );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265 #Do this conditional on the Collection type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266 #i.e. is collection seq_region blob then no else yes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267 #if ( !defined($features) || !@{$features} ) { return {} }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269 #warn 'Processing '.scalar(@$features).' features for window sizes '.join(', ',@$window_sizes).' for slice '.$slice->name."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 #Set up some hashes to store data by window_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272 my (%bins, %nbins, %bin_counts);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273 my $slice_start = $slice->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 #Default handlers for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276 #my($first_bin);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 #if ( $method == 0 || # 'count' or 'density'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278 # $method == 3 || # 'fractional_count' or 'weight'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279 # $method == 4 # 'coverage'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 # ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281 # # For binning methods where each bin contain numerical values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282 # $first_bin = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284 # else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285 # # For binning methods where each bin does not contain numerical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 # # values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288 # #Remove this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289 # $first_bin = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293 #Set up some bin data for the windows
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 my $slice_length = $slice->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 foreach my $wsize (@$window_sizes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297 #TO DO: Need to modify this block if default 0's are undesirable for collection type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298 #i.e. should it be undef instead? May have prolbems representing undef in blob
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300 $nbins{$wsize} = int($slice_length / $wsize); #int rounds down
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 #nbins is actually the index of the bin not the 'number'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 #Unless slice_Length is a multiple!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303 $nbins{$wsize}-- if(! ($slice_length % $wsize));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 #Create default bins with 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 @{$bins{$wsize}} = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307 map {$bins{$wsize}->[$_] = 0} (0 .. $nbins{$wsize});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309 #Set bin counts to 0 for each bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310 @{$bin_counts{$wsize}} = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312 #This is adding an undef to the start of the array!?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313 map { $bin_counts{$wsize}->[($_)] = 0 } @{$bins{$wsize}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315 foreach my $bin(@{$bins{$wsize}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 $bin_counts{$wsize}->[$bin] = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 #warn "bin_counts are :\n".Data::Dumper::Dumper(\%bin_counts);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 #This fails for slices which are smaller than the chunk length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322 my $feature_index = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323 my ($bin_index, @bin_masks);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 foreach my $feature ( @{$features} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326 #Set up the bins for each window size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328 #Omit test for Bio::EnsEMBL::Feature here for speed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 #Only needs start/end methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331 foreach my $wsize (@$window_sizes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333 #We have already highjacked the object creation by here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334 #This is done in core BaseFeatureAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335 #We probably don't want to do this for ResultFeatures as we don't use the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336 #standard feature implementation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 #we already use an array and we don't store the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338 #as this is already known by the caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 #and we always build on top level so we don't need to remap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341 #We do however need the slice to store, as we only store local starts when generating
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342 #We need a store by Slice method?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343 #This will remove the need to inherit from Feature.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344 #These will need to be regenerated everytime we import a new build
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345 #As we do with the probe_features themselves
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 #This also mean the result_feature status has to be associated with a coord_system_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 #Which bins do the start and end lie in for this feature?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349 #Already dealing with local starts, so no slice subtraction
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350 #Could wrap these start/end methods via the descendant Collector
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 #to remove the Feature dependancy? Or just create Features when parsing in the caller
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 my $start_bin = int(($feature->start ) / $wsize);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353 my $end_bin = int(($feature->end) / $wsize );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354 $end_bin = $nbins{$wsize} if $end_bin > $nbins{$wsize};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358 #Slightly obfuscated code to match method number(faster)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 #by avoiding string comparisons.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360 #Could call methods directly using coderef set in validate_bin_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361 #Accessor may slow things down, but should be uniform for all methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362 #rather than being dependant on position in if/else block below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 #reserve 0 for descendant defined method?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 #There fore always fastest in this block, or use coderefs?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 if ( $method == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 # ----------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 # For 'count' and 'density'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370 for ( $bin_index = $start_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 $bin_index <= $end_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 $bins{$wsize}->[$bin_index]++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376 #warn "setting $wsize bin $bin_index to ". $bins{$wsize}->[$bin_index];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383 } elsif ( $method == 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 # ----------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385 # For 'indices' and 'index'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388 #How is this useful?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389 #Is this not just count per bin?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390 #No this is a list of the feature indices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 #So forms a distribution?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 throw('Not implemented for method for index');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 for ( my $bin_index = $start_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396 $bin_index <= $end_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 push( @{ $bins[$bin_index] }, $feature_index );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401 ++$feature_index;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 } elsif ( $method == 2 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 # ----------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 # For 'features' and 'feature'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407 throw('Not implemented for method for feature/features');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409 for ( my $bin_index = $start_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410 $bin_index <= $end_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412 push( @{ $bins[$bin_index] }, $feature );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415 } elsif ( $method == 3 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416 # ----------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417 # For 'fractional_count' and 'weight'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420 throw('Not implemented for method for fractional_count/weight');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422 if ( $start_bin == $end_bin ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423 ++$bins[$start_bin];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426 my $feature_length =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427 $feature->[FEATURE_END] - $feature->[FEATURE_START] + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429 # The first bin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 $bins[$start_bin] +=
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431 ( ( $start_bin + 1 )*$bin_length -
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432 ( $feature->[FEATURE_START] - $slice_start ) )/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433 $feature_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435 # The intermediate bins (if there are any)...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 for ( my $bin_index = $start_bin + 1 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 $bin_index <= $end_bin - 1 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439 $bins[$bin_index] += $bin_length/$feature_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442 # The last bin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443 $bins[$end_bin] +=
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 ( ( $feature->[FEATURE_END] - $slice_start ) -
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445 $end_bin*$bin_length +
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446 1 )/$feature_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448 } ## end else [ if ( $start_bin == $end_bin)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451 elsif ( $method == 4 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452 # ----------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 # For 'coverage'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 #What exactly is this doing?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456 #This is coverage of bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457 #Rather than coverage of feature as in fractional_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460 # my $feature_start = $feature->[FEATURE_START] - $slice_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461 # my $feature_end = $feature->[FEATURE_END] - $slice_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463 # if ( !defined( $bin_masks[$start_bin] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464 # || ( defined( $bin_masks[$start_bin] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465 # && $bin_masks[$start_bin] != 1 ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466 # # Mask the $start_bin from the start of the feature to the end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467 # # of the bin, or to the end of the feature (whichever occurs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468 # # first).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469 # my $bin_start = int( $start_bin*$bin_length );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470 # my $bin_end = int( ( $start_bin + 1 )*$bin_length - 1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471 # for ( my $pos = $feature_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472 # $pos <= $bin_end && $pos <= $feature_end ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 # ++$pos ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474 # $bin_masks[$start_bin][ $pos - $bin_start ] = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478 # for ( my $bin_index = $start_bin + 1 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479 # $bin_index <= $end_bin - 1 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480 # ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481 # # Mark the middle bins between $start_bin and $end_bin as fully
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482 # # masked out.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483 # $bin_masks[$bin_index] = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486 # if ( $end_bin != $start_bin ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 # if ( !defined( $bin_masks[$end_bin] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 # || ( defined( $bin_masks[$end_bin] )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490 # && $bin_masks[$end_bin] != 1 ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491 # # Mask the $end_bin from the start of the bin to the end of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 # # the feature, or to the end of the bin (whichever occurs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493 # # first).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494 # my $bin_start = int( $end_bin*$bin_length );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495 # my $bin_end = int( ( $end_bin + 1 )*$bin_length - 1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 # for ( my $pos = $bin_start ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497 # $pos <= $feature_end && $pos <= $bin_end ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 # ++$pos ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499 # $bin_masks[$end_bin][ $pos - $bin_start ] = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503 # } ## end elsif ( $method == 4 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508 elsif ( $method == 5 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509 #$self->$method($bin_index, $start_bin, $end_bin, $wsize, \%bins, \%bin_counts);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512 #average score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513 #This is simple an average of all the scores for features which overlap this bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514 #No weighting with respect to the bin or the feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516 for ( $bin_index = $start_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517 $bin_index <= $end_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520 #we should really push onto array here so we can have median or mean.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521 $bins{$wsize}->[$bin_index] += $this->get_score_by_Feature($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522 $bin_counts{$wsize}->[$bin_index]++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525 elsif( $method == 6){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526 #Max magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 #Take the highest value +ve or -ve score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528 for ( $bin_index = $start_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529 $bin_index <= $end_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532 #we really need to capture the lowest -ve and higest +ve scores here and post process
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 #To pick between them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535 my $score = $this->get_score_by_Feature($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536 #Write score method as wrapper to scores?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538 $bins{$wsize}->[$bin_index] ||= [0,0]; #-ve, +ve
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541 #warn "Comparing wsize $wsize bin $bin_index score $score to ". $bins{$wsize}->[$bin_index]->[0].' '.$bins{$wsize}->[$bin_index]->[1]."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543 if($score < $bins{$wsize}->[$bin_index]->[0]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544 #warn "setting -ve bin to $score\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545 $bins{$wsize}->[$bin_index]->[0] = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 elsif($score > $bins{$wsize}->[$bin_index][1]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548 #warn "setting +ve bin to $score\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549 $bins{$wsize}->[$bin_index]->[1] = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554 throw("Only accomodates average score method");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560 } ## end foreach my $feature ( @{$features...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 #Now do post processing of bins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567 if ( $method == 4 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569 # ------------------------------------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570 # For the 'coverage' method: Finish up by going through @bin_masks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571 # and sum up the arrays.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573 for ( my $bin_index = 0 ; $bin_index < $nbins ; ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574 if ( defined( $bin_masks[$bin_index] ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575 if ( !ref( $bin_masks[$bin_index] ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576 $bins[$bin_index] = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578 $bins[$bin_index] =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579 scalar( grep ( defined($_), @{ $bin_masks[$bin_index] } ) )/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580 $bin_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588 if( $method == 5){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 #For average score, need to divide bins by bin_counts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591 foreach my $wsize(keys %bins){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593 foreach my $bin_index(0..$#{$bins{$wsize}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595 if($bin_counts{$wsize}->[$bin_index]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596 $bins{$wsize}->[$bin_index] /= $bin_counts{$wsize}->[$bin_index];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598 #warn "bin_index $wsize:$bin_index has score ".$bins{$wsize}->[$bin_index];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602 elsif( $method == 6){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603 #Max magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604 #Take the highest value +ve or -ve score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606 foreach my $wsize(keys %bins){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608 foreach my $bin_index(0..$#{$bins{$wsize}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610 #So we have the potential that we have no listref in a given bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612 #default value if we haven't seen anything is 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613 #we actually want an array of -ve +ve values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615 #warn "Are we storing 0 values for absent data?";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616 #Not for max_magnitude, but maybe for others?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618 if($bins{$wsize}->[$bin_index]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619 #warn $wsize.':'.$bin_index.':'.$bins{$wsize}->[$bin_index]->[0].'-'.$bins{$wsize}->[$bin_index]->[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620 my $tmp_minus = $bins{$wsize}->[$bin_index]->[0] * -1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622 if($tmp_minus > $bins{$wsize}->[$bin_index]->[1]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623 $bins{$wsize}->[$bin_index] = $bins{$wsize}->[$bin_index]->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626 $bins{$wsize}->[$bin_index] = $bins{$wsize}->[$bin_index]->[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629 #warn "bin $bin_index now ". $bins{$wsize}->[$bin_index];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634 elsif($method != 0){#Do no post processing for count(0)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 throw('Collector currently only accomodates average_score, count and max magnitude methods');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639 #Could return bin_counts too summary reporting in zmenu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640 #Could also do counting of specific type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642 #warn "returning bins ".Data::Dumper::Dumper(\%bins);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644 return \%bins;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645 } ## end sub _bin_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650 #These could potentially be used as code refs to avoid having the if else block
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 #This way we can also define new methods in the descendant Collector?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652 #Would have to have pass args and refs to bin hashes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653 #This would slow things down over direct access here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654 #But speed is no longer that critical as we do not use the Collector for display
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655 #purposes, only to build the Collections which are then used for display directly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657 sub calculate_average_score{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 if ( $method == 5 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661 #average score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662 #This is simple an average of all the scores for features which overlap this bin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663 #No weighting with respect to the bin or the feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665 for ( my $bin_index = $start_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 $bin_index <= $end_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669 #we should really push onto array here so we can have median or mean.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670 $bins{$wsize}->[$bin_index] += $feature->score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671 $bin_counts{$wsize}->[$bin_index]++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680 sub post_process_average_score{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685 sub calculate_max_magnitude{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688 #Max magnitude
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689 #Take the highest value +ve or -ve score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690 for ( my $bin_index = $start_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691 $bin_index <= $end_bin ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 ++$bin_index ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 #we really need to capture the lowest -ve and higest +ve scores here and post process
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695 #To pick between them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697 my $score = $feature->score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698 $bins{$wsize}->[$bin_index] ||= [0,0]; #-ve, +ve
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700 if($score < $bins{$wsize}->[$bin_index]->[0]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701 $bins{$wsize}->[$bin_index]->[0] = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703 elsif($score > $bins{$wsize}->[$bin_index][1]){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704 $bins{$wsize}->[$bin_index]->[1] = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 sub post_process_max_magnitude{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716 #separated to allow addition of non-standard methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717 #Could potentially add these in new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718 #and put this back in _bin_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721 sub validate_bin_method{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722 my ($self, $method) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725 #change this to set the coderefs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726 #Just set anonymous sub to immediately return for non post processed methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727 #No need for coderef, just set the method name?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729 #if(! $self->can('calculate_'.$method)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 #throw("$method method does not have a valid calculate_${method} method");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733 #if($self->can('post_process_'.$method)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734 ##set post process flag?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735 #or simply do this can in line in the _bin_features sub?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741 #Add average_score to avoid changing Collection.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742 my $class = ref($self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 ${$class::VALID_BINNING_METHODS}{'average_score'} = 5;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744 ${$class::VALID_BINNING_METHODS}{'max_magnitude'} = 6;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745 ${$class::VALID_BINNING_METHODS}{'count'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749 #foreach my $method_name(keys %{$class::VALID_BINNING_METHODS}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1750 # warn "valid method is $method name";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1751 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1752
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1754 if ( ! exists( ${$class::VALID_BINNING_METHODS}{$method} ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1755 throw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1756 sprintf(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1757 "Invalid binning method '%s', valid methods are:\n\t%s\n",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1758 $method,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1759 join( "\n\t", sort( keys(%{$class::VALID_BINNING_METHODS}) ) ) ) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1760 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1761 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1762 #warn "found valid method $method with index ".${$class::VALID_BINNING_METHODS}{$method};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1763 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1764
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1765 return ${$class::VALID_BINNING_METHODS}{$method};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1766 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1767
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1769
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1770 1;