diff variant_effect_predictor/Bio/EnsEMBL/Utils/Collector.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Utils/Collector.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1749 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::Collector
+
+=head1 SYNOPSIS
+
+  # Inherit this base module in your feature specific Collector
+  # instance:
+
+  package Bio::EnsEMBL::Funcgen::Collector::ResultFeature;
+  use base('Bio::EnsEMBL::Utils::Collector');
+
+  # ... and define package config variables
+  $Bio::EnsEMBL::Funcgen::Collector::bin_model = 'SIMPLE';
+  $Bio::EnsEMBL::Funcgen::Collector::window_sizes =
+    [ 30, 65, 130, 260, 450, 648, 950, 1296 ];
+  # Could replace 30 with 0 here for low density data at natural resolution
+
+  $Bio::EnsEMBL::Utils::Collector::bin_method =
+    'count';    # only used by collector
+
+  $Bio::EnsEMBL::Utils::Collector::packed_size = 2;
+
+  # ... or simply use this module in a script either defining package
+  # config variables, or passing as parameters to the constructor:
+
+  my $collector =
+    Bio::EnsEMBL::Utils::BaseCollector->new( -pack_template => 'v' );
+
+  $Bio::EnsEMBL::Funcgen::Collector::pack_template = 'v';
+
+  # Config variables can also be over-ridden by passing a config hash to
+  # the store_window_bins_by_Slice() method:
+
+  $collector->store_window_bins_by_Slice( $slice, (
+                                            -pack_template => 'v',
+                                            -packed_size   => 2 ) );
+
+  # NOTE: Over-riding default config variables can cause problems when
+  # storing or fetching data. e.g. Fetch may revert to using defaults or
+  # table partitions may not match window sizes.
+
+=head1 DESCRIPTION
+
+This package is the base Collector class which contains generic
+getter/setter methods along with the main 'collecting' methods which
+perform the majority of the work in generating compressed data
+collections optimised for web display.  The bins produced are aimed at
+storage in a BLOB representing an entire seq_region i.e. even bins with
+no features/null data are encoded as a 0 score.  Non-BLOB collections
+are currently not supported.
+
+If your Collection class defines a Bio::EnsEMBL::Feature, then its
+adaptor should inherit from the relevant Collection class.
+
+The minimum prerequisites of the input features/data are that they have
+a start() and end() method.  For instance a Bio::EnsEMBL::Features
+generated from a database or parsed from a flat file.
+
+NOTE: This Collector does not have a lightweight mode previously used
+for dynamic/on the fly collecting i.e. it does not take advantage of
+bypassing object creation via the related BaseFeatureAdaptor method.
+
+=cut
+
+package Bio::EnsEMBL::Utils::Collector;
+
+use Bio::EnsEMBL::Utils::Argument  ('rearrange');
+use Bio::EnsEMBL::Utils::Exception ('throw');
+use strict;
+use warnings;
+
+### Global package config vars
+
+# Defaults
+our $max_view_width = 1000000;   # Max bp width in location/detailed view
+our $max_data_type_size = 16777216;    # Default is 16MB for long BLOB
+# This is really a guide value as this should be set in the inheriting
+# Collector class by deducting the rest of the row size from this value.
+# Is is upto the inheritor to handle checking whether this size has been 
+# exceeded.
+
+# NOTE: Theoretically the min window size is: slice_length/(16777216/2)
+# So for human chr1:  249,250,621/(16777216/2) = 29.7 => 30.  However,
+# this size does not seem to directly translate to the MySQL
+# max_allowed_packet_size.  Increasing max_allowed_packet_size to 64MB
+# solves this issue, and substr operation doesn't appear to incur any of
+# the potential memory(4*) usage issues.
+
+# Others global package variables which are set in the inheriting
+# Collector class.
+our ( $bin_model,   $bin_method, $pack_template,
+      $packed_size, $window_sizes );
+
+=head2 new
+
+  Args       : None
+  Example    :
+
+    my $collector = Bio::EnsEMBL::XXX::Collector::FEATURE->new();
+    $collector->store_windows_by_Slice($slice);
+
+    # Where XXX is, e.g. Compara, FuncGen etc.
+
+  Description: Simple new method to enable use of collector
+               when not inherited by a descendant of
+               Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
+
+  Returntype : Bio::EnsEMBL::XXX::Collector
+  Exceptions : None
+  Caller     : Collector script
+  Status     : At Risk
+
+=cut
+
+sub new {
+  return bless {}, $_[0];   # Simply blesses this class as an empty hash.
+
+  # Do not set anything here, as will not be first in ISA for
+  # FeatureAdaptors.  Hence, not guaranteed to be called.
+}
+
+=head2 new_assembly
+
+  Args       : optional - string assembly version e.g. GRCh37
+  Example    : $collector->new_assembly('GRCh37');
+  Description: Getter/Setter for new assembly version which should be
+               used to project only 0 wsize Collections.
+  Returntype : string
+  Exceptions : None
+  Caller     : store_window_bins_by_Slice() or
+               write_collection() in inheriting Collector class.
+  Status     : At Risk
+
+=cut
+
+sub new_assembly {
+  my ( $self, $new_asm ) = @_;
+
+  if ( defined($new_asm) ) {
+    $self->{'new_assembly'} = $new_asm;
+  }
+
+  return $self->{'new_assembly'};
+}
+
+### Setter/Getter methods for basic/mandatory config
+#   Can also be set using package variables in the inheriting
+#   Collector/Adaptor or run script.  Allows over-riding of defaults set
+#   in Adaptor/Collector.
+
+# Package variables used here instead of attrs to enable easy
+# default config in inheriting class/script method. Provided
+# for easy/standardised fetch access outside of this package
+# i.e. Collectors/Adaptors
+
+=head2 max_data_type_size
+
+  Args       : optional - int  Maximum size of collection in bytes
+  Example    : $collector->max_data_type_size($new_max_size);
+  Description: Getter/Setter for max_data_type_size, default is
+               currently set at in this class as 16777216 (16MB), for
+               long BLOB.  This is used by the write_collection()
+               method to determine when to build and store a compressed
+               collection.
+  Returntype : int
+  Exceptions : None
+  Caller     : bins_per_record() and
+               write_collection() in inheriting Collector class.
+  Status     : At Risk
+
+=cut
+
+sub max_data_type_size {
+  my ( $self, $size ) = @_;
+
+  # Validate is sensible integer
+
+  if ( defined($size) ) {
+    if ( $size !~ /^\d+$/ ) {
+      throw("max_data_type_size must be a integer of bytes, not $size");
+    }
+    $max_data_type_size = $size;
+  } elsif ( !defined($max_data_type_size) ) {
+    # This should never happen as we have defaults in this module.
+    throw(   'You must define a '
+           . '$Bio::EnsEMBL::Utils::Collector::max_data_type_size '
+           . 'or pass -max_data_type_size config' );
+  }
+
+  return $max_data_type_size;
+}
+
+=head2 max_view_width
+
+  Args       : optional - int  Maximum width of view
+  Example    : $collector->max_view_width($new_max_width);
+  Description: Getter/Setter for max_view_width, default is currently
+               set at in this class as 500000bp, for maximum level of
+               zoom permitted by location view.
+  Returntype : int
+  Exceptions : None
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub max_view_width {
+  my ( $self, $size ) = @_;
+
+  # Validate is sensible integer
+
+  if ( defined($size) ) {
+    if ( $size !~ /^\d+$/ ) {
+      throw("max_view_width must be a integer, not $size");
+    }
+    $max_view_width = $size;
+  } elsif ( !defined $max_view_width ) {
+    # This should never happen as we have defaults in this module.
+    throw(   'You must define a '
+           . '$Bio::EnsEMBL::Utils::Collector::max_view_width '
+           . 'or pass -max_view_width config' );
+  }
+
+  return $max_view_width;
+}
+
+=head2 bin_method
+
+  Args[0]    : optional - string name of bin method e.g. 'max_magnitude'
+  Args[1]    : optional - Bio::EnsEMBL::Funcgen::Parsers::InputSet
+  Example    : my $bin_method = $self->bin_method();
+  Description: Getter/Setter for bin_method, default is normally set in
+               the inheriting Collector class either by package variable
+               or by passing a config hash via the store methods.
+  Returntype : string
+  Exceptions : Throws if cannot set by package variable
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub bin_method {
+  my ( $self, $bmethod, $config ) = @_;
+
+  if ( defined($bmethod) ) {
+    $bin_method = $bmethod;
+  }
+
+  if ( !defined($bin_method) ) {
+    throw(   'You must define a '
+           . '$Bio::EnsEMBL::Utils::Collector::bin_method '
+           . 'or pass -bin_method config' );
+  }
+
+  if ( !$self->can( "_calculate_" . $bin_method ) ) {
+    throw("$bin_method is not a valid/available binning method");
+  }
+
+  my $set_up_method = "_set_up_" . $bin_method;
+  if ( $self->can($set_up_method) ) {
+    $self->$set_up_method($config);
+  }
+
+  return $bin_method;
+}
+
+=head2 bin_model
+
+  Args       : optional - string bin model e.g. SIMPLE or COMPLEX
+  Example    : my $bin_model = $self->bin_model;
+  Description: Getter/Setter for bin_model, default should be set in
+               inheriting Collector class.  Currently only supports
+               'SIMPLE' bin model.
+  Returntype : string
+  Exceptions : Throws if bin_model is not SIMPLE
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+sub bin_model {
+  my ( $self, $bmodel ) = @_;
+
+  if ( defined($bmodel) ) {
+    $bin_model = $bmodel;
+  }
+
+  if ( !defined($bin_model) ) {
+    throw(   'You must define a '
+           . '$Bio::EnsEMBL::Utils::Collector::bin_model '
+           . 'or pass -bin_model config' );
+  }
+
+  if ( $bin_model ne 'SIMPLE' ) {
+    throw(   'Bio::EnsEMBL::Utils::Collector does not yet support '
+           . 'non-SIMPLE bin models' );
+  }
+
+  return $bin_model;
+}
+
+
+=head2 window_sizes
+
+  Args       : optional - arrayref of window sizes
+  Example    :
+
+    foreach my $wsize ( @{ $collector->window_sizes } )
+    {    # Do some collecting
+    }
+
+  Description: Getter/Setter for window_sizes.  Default should be set
+               in inheriting Collector (if the config is dynamic),
+               FeatureAdaptor class or script using package variable or
+               this method.
+               NOTE: Redefining these may cause a mismatch with the
+               table partition definition.
+  Returntype : arrayref of ints
+  Exceptions : Throws if cannot set a valid array of int window sizes
+  Caller     : general
+  Status     : At Risk - rename bin_sizes?
+
+=cut
+
+sub window_sizes {
+  my ( $self, $sizes ) = @_;
+
+  if ( defined($sizes) ) {
+    $window_sizes = $sizes;
+  }
+
+  if ( !(    ref($window_sizes)
+          && ( ref($window_sizes) eq 'ARRAY' )
+          && ( scalar(@$window_sizes) > 0 ) ) )
+  {
+    throw('Must pass -windows_sizes in the config '
+        . 'or define $Bio::EnsEMBL::Utils::Collector::window_sizes '
+        . 'in your Collector as an array ref of integer window_sizes' );
+  }
+
+  return $window_sizes;
+}
+
+
+
+
+=head2 has_window_size
+
+  Args       : int - window size to validate
+  Example    : if( $collector->has_window_size('30') ){
+                   #Do something wrt to 30bp window size
+               }
+
+  Description: Simple utility method to validate whether this Collector
+               has a given window_size
+  Returntype : Boolean
+  Exceptions : Throws if window size not specified
+  Caller     : general
+  Status     : At Risk
+
+=cut
+
+
+sub has_window_size{
+  my ( $self, $size ) = @_;
+
+  if(! defined $size){
+	throw('You must pass a window size to validate');
+  }
+
+  return grep(/$size/, @$window_sizes); 
+}
+
+
+
+
+### Getter/Setters for BLOB collection config
+# NOTE: Overriding the defaults here may cause a mismatch when the data
+# is retrieved.
+
+=head2 pack_template
+
+  Args       : optional - string perl 'pack' template 
+  Example    : $self->pack_template('v');
+  Description: Getter/Setter for pack_template. Default should be set
+               in inheriting Collector (if the config is dynamic),
+               FeatureAdaptor class or script using package variable or
+               this method.
+  Returntype : string
+  Exceptions : Throws if cannot set pack_template from package variable
+  Caller     : FeatureAdaptor::_obj_from_sth
+  Status     : At Risk
+
+=cut
+
+sub pack_template {
+  my ( $self, $template ) = @_;
+
+  if ( defined($template) ) {
+    $pack_template = $template;
+  }
+
+  if ( !defined($pack_template) ) {
+    throw(   'Must pass a per score '
+           . '-pack_template in the config '
+           . 'or define $Bio::EnsEMBL::Utils::Collector::pack_template '
+           . 'in your Collector' );
+  }
+
+  return $pack_template;
+}
+
+=head2 packed_size
+
+  Args       : optional - int size of perl 'pack' template in bytes
+  Example    : $self->packed_size(2);
+  Description: Getter/Setter for packed_size.  Default should be set
+               in inheriting Collector (if the config is dynamic),
+               FeatureAdaptor class or script using package variable or
+               this method.
+  Returntype : string
+  Exceptions : Throws if cannot set pack_template from pacakge variable
+  Caller     : current_packed_size() and
+               FeatureAdaptor::_obj_from_sth()
+  Status     : At Risk
+
+=cut
+
+sub packed_size {
+  my ( $self, $size ) = @_;
+
+  if ( defined($size) ) {
+    $packed_size = $size;
+  }
+
+  if ( !defined($packed_size) ) {
+    throw(   'Must pass -packed_size(wrt to pack_template) config '
+           . 'or define $Bio::EnsEMBL::Utils::Collector::packed_size '
+           . 'in your Collector' );
+  }
+
+  if ( $packed_size !~ /^\d+$/ ) {
+    throw(   "$packed_size is not an integer, "
+           . "must pass a size integer for packed_size "
+           . "which specifies size of pack_template:\t"
+           . $pack_template );
+  }
+
+  return $packed_size;
+}
+
+=head2 bins_per_record
+
+  Example    : my $bin_per_records = $self->bin_per_record
+  Description: Simple method to calculate the max number of bins
+               allowed per record given the current config.
+  Returntype : int
+  Exceptions : None
+  Caller     :
+  Status     : At Risk
+
+=cut
+
+sub bins_per_record {
+  return int( $max_data_type_size/$packed_size );
+}
+
+
+=head2 current_packed_size
+
+  Arg[0]     : int - window size
+  Example    : my $cps = $self->current_packed_size($wsize);
+  Description: Simple method to calculate the max number of bins
+               allowed per record given the current config.
+  Returntype : int
+  Exceptions : None
+  Caller     :
+  Status     : At Risk
+
+=cut
+
+sub current_packed_size {
+  my ( $self, $wsize ) = @_;
+  return ( scalar( @{ $self->score_cache($wsize) } )*$packed_size );
+}
+
+
+=head2 score_cache
+
+  Arg[0]     : int - window size
+  Example    : my $cps = $self->current_packed_size($wsize);
+  Description: Handles caching of bin scores for each window size
+  Returntype : arrayref
+  Exceptions : Throws if no window size defined
+  Caller     : current_packed_size() and store_collection()
+  methods
+  Status     : At Risk
+
+=cut
+
+sub score_cache {
+  my ( $self, $wsize, $scores ) = @_;
+
+  if ( !defined($wsize) ) {
+    throw('Must pass a window size argument');
+  }
+
+  $self->{'score_cache'}{$wsize} ||= [];
+
+  if ( defined($scores) ) {
+    push( @{ $self->{'score_cache'}{$wsize} }, @{$scores} );
+  }
+
+  return $self->{'score_cache'}{$wsize};
+}
+
+
+=head2 collection_start
+
+  Arg[0]     : int - window_size
+  Arg[1]     : optional int - seq_region_start
+  Example    : my $coll_start->(150);
+  Description: Getter/Setter collection seq_region_start
+  Returntype : int
+  Exceptions : Throws if no window size defined
+  Caller     : store_window_bin_by_Slice() and write_collection()
+  Status     : At Risk
+
+=cut
+
+sub collection_start {
+  my ( $self, $wsize, $sr_start ) = @_;
+
+  if ( !defined($wsize) ) {
+    throw('Must pass a window size argument');
+  }
+
+  if ( defined($sr_start) ) {
+    $self->{'collection_start'}{$wsize} = $sr_start;
+  }
+
+  return $self->{'collection_start'}{$wsize};
+}
+
+
+=head2 collection_end
+
+  Arg[0]     : int - window_size
+  Arg[1]     : optional int - seq_region_end
+  Example    : my $coll_end->(150);
+  Description: Getter/Setter collection seq_region_end
+  Returntype : int
+  Exceptions : Throws if no window size defined
+  Caller     : inheriting Collector write_collection method
+  Status     : At Risk
+
+=cut
+
+sub collection_end{
+  my ($self, $wsize, $sr_end) = @_;
+  throw('Must pass a window size argument') if ! defined $wsize;
+
+  if(defined $sr_end){
+	$self->{'collection_end'}{$wsize} = $sr_end;
+  }
+  else{
+	return $self->{'collection_end'}{$wsize};
+  }
+}
+
+
+=head2 collection_strand
+
+  Arg[0]     : int - window_size
+  Arg[1]     : optional int - seq_region_strand
+  Example    : my $coll_start->(0);
+  Description: Getter/Setter collection seq_region_strand
+  Returntype : int
+  Exceptions : Throws if no window size defined
+  Caller     : inheriting Collector write_collection method
+  Status     : At Risk - Collections are currently strandless
+
+=cut
+
+sub collection_strand {
+  my ( $self, $wsize, $strand ) = @_;
+
+  if ( !defined($wsize) ) {
+    throw('Must pass a window size argument');
+  }
+
+  if ( defined $strand ) {
+    $self->{'collection_strand'}{$wsize} = $strand;
+  }
+
+  return $self->{'collection_strand'}{$wsize};
+}
+
+
+### Here follows the actual working methods
+
+=head2 _get_Slice_chunks
+
+  Description: Defines the optimal set of slice chunks to use for
+               generating collections such that redundant fetches
+               are minimized.
+  Returntype : hashref of window_size chunk size pairs
+  Exceptions : Throws if no window sizes or max_view_width defined
+  Caller     : store_window_bin_by_Slice()
+  Status     : At Risk
+
+=cut
+
+sub _get_Slice_chunks {
+  my $self = shift;
+
+  if ( !defined($window_sizes) || !defined($max_view_width) ) {
+    throw(   'You must pass both a window_size array ref '
+           . 'and max_view_width arguments' );
+  }
+
+  if ( !defined( $self->{'_slice_chunks'} ) ) {
+    # Calulate sensible slice length based on window sizes
+    my @wsizes = sort { $a <=> $b } @$window_sizes;
+
+    # Handle calculating only 0 wsize
+    if ( scalar(@wsizes) == 1
+         && $wsizes[0] == 0 )
+    {
+      return { $max_view_width => [0] };
+    }
+
+    my $multiplier = int( $max_view_width/$wsizes[$#wsizes] );
+
+    my $chunk_length  = $multiplier*$wsizes[$#wsizes];
+    my $not_divisible = 1;
+
+    my %chunk_windows;   # Registry of chunk lengths to run with windows
+    my %workable_chunks = map { $_ => {} } @wsizes;
+
+    # get rid of natural resolution as this will always work
+    delete $workable_chunks{'0'};
+
+    while ( $not_divisible && $chunk_length != 0 ) {
+      $not_divisible = 0;
+
+      foreach my $wsize (@wsizes) {
+        if ( $wsize == 0 ) {
+          # Special wsize for normal data
+          next;
+        }
+
+        # Set not divisible if modulus is true
+        if ( $chunk_length % $wsize ) {
+          $not_divisible = 1;
+        } else {
+          $workable_chunks{$wsize}{$chunk_length} = [];
+        }
+      }
+
+      # Gradually shrink the length until we find a workable slice
+      # length for all windows.
+      if ($not_divisible) {
+        $chunk_length -= $wsizes[$#wsizes];
+      }
+    }
+
+    my %chunk_sets;
+
+    if ( $chunk_length == 0 ) {
+      print "Could not find chunk length "
+        . "for all window sizes, "
+        . "attempting to subset windows "
+        . "using alternate slice length\n";
+
+      foreach my $wsize ( keys(%workable_chunks) ) {
+        # Loop through windows, seeing if they are workable in the other
+        # windows.
+
+        foreach my $chunk ( keys( %{ $workable_chunks{$wsize} } ) ) {
+
+          foreach my $other_wsize ( keys %workable_chunks ) {
+            next if $wsize == $other_wsize;
+
+            if ( exists( $workable_chunks{$other_wsize}{$chunk} ) ) {
+              # only push it onto the other wsize, as we will do the
+              # reverse later
+              $chunk_sets{$chunk}{$wsize} = undef;
+            }
+          }
+        }
+      }
+
+      # %chunk_sets represents co-occurence of wsizes with repect to
+      # chunks.  Take the set which has the most windows and the longest
+      # chunk.  Then get the largest which handles the rest.
+
+      # define possible set lengths
+      my $i = 0;
+      my %set_lengths;
+
+      map { $set_lengths{$i} = []; $i++ } @wsizes;
+
+      # get rid of natural resolution as this will always work
+      delete $set_lengths{'0'};
+
+      # Store chunks lengths for each set size
+      foreach my $chunk ( keys(%chunk_sets) ) {
+        my $set_size = scalar( values( %{ $chunk_sets{$chunk} } ) );
+        push( @{ $set_lengths{$set_size} }, $chunk );
+      }
+
+      # Get the biggest set with the longest length;
+
+      # Scalar here as we are disregarding natural resolution of 0 in
+      # loop.
+      my $largest_size      = scalar(@wsizes);
+      my $found_largest_set = 0;
+
+      while ( !$found_largest_set ) {
+        $largest_size--;
+
+        if ( scalar( @{ $set_lengths{$largest_size} } ) > 0 ) {
+          $found_largest_set = 1;
+        }
+      }
+
+      my ($largest_chunk) =
+        sort { $b <=> $a } @{ $set_lengths{$largest_size} };
+
+      my @largest_windows = keys %{ $chunk_sets{$largest_chunk} };
+      @{ $chunk_windows{$largest_chunk} } = @largest_windows;
+
+      print "Largest chunk $largest_chunk($largest_size) "
+        . "contains windows: @largest_windows\n";
+
+      my %remaining_windows = map { $_ => {} } @wsizes;
+
+      # get rid of natural resolution as this will always work
+      delete $remaining_windows{'0'};
+
+      map { delete $remaining_windows{$_} } @largest_windows;
+      my $remaining_set_size = scalar( keys(%remaining_windows) );
+
+      # Use array here for practicality, would need to maintain hash if
+      # we need to iterate.
+      my @rwindows = keys(%remaining_windows);
+
+      # Could be one window, but this will not be in the co-occurence
+      # hash %chunk_sets.
+      my $next_chunk;
+
+      if ( scalar(@rwindows) == 1 ) {
+        my ($last_window) = @rwindows;
+        # Find a suitably large chunk for this one window.
+        $multiplier = int( 500000/$last_window );
+        $next_chunk = $multiplier*$last_window;
+      } else {
+
+        foreach my $chunk ( sort { $b <=> $a }
+                            @{ $set_lengths{$remaining_set_size} } )
+        {
+          my $seen_count = 0;
+
+          foreach my $rwindow (@rwindows) {
+            if ( grep /$rwindow/,
+                 ( values( %{ $chunk_sets{$chunk} } ) ) )
+            {
+              $seen_count++;
+            }
+          }
+
+          if ( $seen_count == $remaining_set_size ) {
+            $next_chunk = $chunk;
+            last;
+          }
+        }
+
+      }
+
+      @{ $chunk_windows{$next_chunk} } = @rwindows;
+
+      if ( defined($next_chunk) ) {
+        print "Found next chunk length $next_chunk "
+          . "contains remaining windows:\t@rwindows\n";
+      } else {
+        warn "Need to write iterative method for set definition";
+        throw(   'Could not find workable slice length '
+               . 'for remaining windows: '
+               . join( ', ', @rwindows ) );
+      }
+    } else {
+      @{ $chunk_windows{$chunk_length} } = keys(%workable_chunks);
+      print "Found workable chunk length $chunk_length  "
+        . "for all window sizes:\t"
+        . join( ' ', @{ $chunk_windows{$chunk_length} } ) . "\n";
+    }
+
+    $self->{'_slice_chunks'} = \%chunk_windows;
+  } ## end if ( !defined( $self->...))
+
+  return $self->{'_slice_chunks'};
+} ## end sub _get_Slice_chunks
+
+
+
+
+=head2 set_config
+
+  Arg[0]     : optional hash - parameter hash(see above methods for more info):
+
+        WINDOW_SIZES        => array ref - subset of defined window
+                               sizes
+        BIN_METHOD          => string
+        MAX_VIEW_WIDTH      => int
+        MAX_DATA_TYPE_SIZE  => int
+        PACK_TEMPLATE       => string
+        PACKED_SIZE         => int
+        BIN_MODEL           => string
+        NEW_ASSEMBLY        => string
+        METHOD_CONFIG       => hash of method specific config params
+        SKIP_ZERO_WINDOW    => boolean - skips generation of 0 wsize
+                               this is used if already generated
+                               from an assembly projection.
+
+               NOTE: Over-riding any of the default config may cause
+               problems when storing or retrieving Collection data,
+               except sub sets of default window sizes.
+
+  Description: This method replaces the constructor as new will not be
+               called for Adaptor based Collectors.
+               Separating this from the store method is currently 
+               redundant as jobs are normally submitetd in Slice based 
+               jobs. However, this will be required if the store method 
+               is further seaprated into fetch/generate and store methods
+  Returntype : None
+  Exceptions : Throws if no window sizes or max_view_width defined
+  Caller     : Inheritor Collector e.g. Bio::EnsEMBL::Funcgen:Collector::ResultFeature
+               or script.
+  Status     : At Risk
+
+=cut
+
+sub set_config {
+  my ( $self, %config ) = @_;
+
+  my ( $wsizes,       $bmethod,  $mv_width,
+       $md_type_size, $template, $psize,
+       $bmodel,       $new_assm, $skip_zero_window,
+       $method_config )
+    = rearrange( [ 'WINDOW_SIZES',     'BIN_METHOD',
+                   'MAX_VIEW_WIDTH',   'MAX_DATA_TYPE_SIZE',
+                   'PACK_TEMPLATE',    'PACKED_SIZE',
+                   'BIN_MODEL',        'NEW_ASSEMBLY',
+                   'SKIP_ZERO_WINDOW', 'METHOD_CONFIG' ],
+                 %config );
+
+  ### VAILDATE/SET VARS/CONFIG
+
+  # Attrs used in this method
+  $self->bin_method( $bmethod, $method_config );
+  $self->bin_model($bmodel);
+  $self->window_sizes($wsizes);
+
+  # Set to undef if we have empty array? To change this we need to
+  # pass the config hash -window_sizes conditionally
+  # This currently overwrite the defaults!
+  #  if ( ref($window_sizes) eq 'ARRAY'
+  #       && scalar( @{$window_sizes} ) == 0 )
+  #  {
+  #    $window_sizes = undef;
+  #  }
+
+  # Attrs used in other (store) methods
+  $self->pack_template($template);
+  $self->packed_size($psize);
+  $self->max_data_type_size($md_type_size);
+  $self->max_view_width($mv_width);
+
+  # Other vars
+  $self->new_assembly($new_assm);
+  $self->{'_only_natural'} = 0;
+  $self->{'_store_natural'} = grep /^0$/, @$window_sizes;
+
+  ### Set window_sizes
+
+  if ( $self->new_assembly() ) {
+    print "Assembly projection may cause problems "
+      . "for large Collections, "
+      . "defaulting to window_sizes = (0)\n";
+
+
+	if ( $skip_zero_window ) {
+	  throw(   "You cannot -skip_zero_window or "
+			   . "omit 0 from -window_sizes "
+			   . "when projecting to a new assembly($new_assm) "
+			   . "which should only be generated using window_size=0" );
+	}
+	
+
+
+
+    # Then build the bins on the projected 0 level single Features
+
+    # Test we haven't explicity set window_sizes to be something else
+    if ( defined($wsizes)
+         && !( scalar(@$wsizes) == 1 && $wsizes->[0] == 0 ) )
+    {
+      throw(   "You have set window_sizes config "
+             . "which are not safe when projecting to "
+             . "a new assembly($new_assm), "
+             . "please omit window_sizes config or set to 0" );
+    }
+
+    $self->window_sizes( [0] );
+  } else {
+
+    if ( $wsizes && $skip_zero_window && 
+		 ( grep /^0$/, @$wsizes )) {
+	  #Only test passed params not default config
+
+      throw(   "You have specied skip_zero_window "
+             . "and window_size 0 in your parameters, "
+             . "please remove one of these" );
+    } 
+	elsif ( defined($window_sizes) && !grep /^0$/, @$window_sizes ) {
+      $skip_zero_window = 1;
+      # re-add 0 window as we need this to build the collections
+	  # see ...
+      unshift( @{$window_sizes}, 0 );
+    }
+  }
+
+ 
+  if ( $self->{'_store_natural'} && scalar( @{$window_sizes} ) == 1 ) {
+    $self->{'_only_natural'} = 1;
+  }
+  if ($skip_zero_window) {
+    $self->{'_store_natural'} = 0;
+  }
+
+  return;
+} ## end sub set_config
+
+=head2 store_window_bins_by_Slice
+
+  Arg[0]     : Bio::EnsEMBL:Slice
+  Example    : $collector->store_window_bins_by_Slice($slice);
+  Description: This is the main run method, it loops through
+               optimal slice chunks from _define_window_chunks,
+               calls _bin_features_by_Slice as appropriate and
+               calls write_collection in the inheriting Collector
+               class/script.
+  Returntype : None
+  Exceptions : Throws if Bio::EnsEMBL::Slice is not defined
+  Caller     : store methods in inheriting Collector class/script
+  Status     : At Risk
+
+=cut
+
+sub store_window_bins_by_Slice {
+  my ( $self, $slice ) = @_;
+
+  warn "Need to be careful here "
+    . "about cleaning start end strand caches between "
+    . "serially run slices";
+
+  if ( !(    defined($slice)
+          && ref($slice)
+          && $slice->isa('Bio::EnsEMBL::Slice') ) )
+  {
+    throw('You must pass a valid Bio::EnsEMBL::Slice');
+  }
+
+  # Rollback previously stored features.
+  # Change 'can' to empty method stubb with pod ???
+  if ( $self->can('rollback_Features_by_Slice') ) {
+    $self->rollback_Features_by_Slice($slice);
+  } else {
+    warn ref($self)
+      . " cannot rollback_Features_by_Slice. "
+      . "This may result in storage failure "
+      . "or duplicate Collections if there is pre-existing data";
+  }
+
+  ### PROCESS CHUNKS
+  my %chunk_windows = %{ $self->_get_Slice_chunks };
+  my (%counts);
+  my $store_natural = $self->{'_store_natural'};
+  my $only_natural  = $self->{'_only_natural'};
+  $counts{0} = 0;    # Set natural res count to 0
+  my $slice_end       = $slice->end;
+  my $orig_start      = $slice->start;
+  my $region          = $slice->coord_system_name;
+  my $version         = $slice->coord_system->version;
+  my $seq_region_name = $slice->seq_region_name;
+  my $strand          = $slice->strand;
+
+  # Warn if this is not a full slice.  Version needed in case we are
+  # projecting from a non-default version slice
+  my $full_slice =
+    $slice->adaptor->fetch_by_region( $region, $seq_region_name, undef,
+                                      undef, undef, $version );
+
+  if (    ( $full_slice->start() != $orig_start )
+       || ( $full_slice->end() != $slice_end ) )
+  {
+    warn "Generating collections using sub-Slices "
+      . "can result in data issues/artifacts";
+    # Last chunk might not be the correct window length.  Test
+    # slices less than chunk length can cause failures in
+    # _bin_features_by_window_sizes others?
+  }
+
+  # Set the initial collection_start to orig_start.  This is not the
+  # case for 0 wsize where it must always be the true feature start.
+  for my $wsize (@$window_sizes) {
+    if ( $wsize == 0 ) { next }
+    $self->collection_start( $wsize, $orig_start );
+
+    # Also reset collection end and score cache in case we are running
+    # serially.
+    $self->{collection_end}{$wsize} = undef;
+    $self->{'score_cache'}{$wsize} = [];
+  }
+
+  my $first_chunk_length = 1;
+
+  foreach my $chunk_length ( sort keys %chunk_windows ) {
+    print "Processing windows "
+      . join( ', ', @{ $chunk_windows{$chunk_length} } )
+      . " with chunk length $chunk_length\n";
+
+    # Set window counts to 0
+    map $counts{$_} = 0, @{ $chunk_windows{$chunk_length} };
+
+    # May need to reset flat file parser handle or other caches via
+    # inheriting Collector
+    if ( !$first_chunk_length ) {
+      # Change 'can' to empty method stubb with pod???
+      if ( $self->can('reinitialise_input') ) {
+        $self->reinitialise_input();
+      }
+    }
+
+    $first_chunk_length = 0;
+
+    # Now walk through slice using slice length chunks and build all
+    # windows in each chunk.
+    my $in_slice  = 1;
+    my $start_adj = 0;
+    my ( $sub_slice, $sub_end, $features, $bins );
+    my $sub_start    = 1;
+    my $slice_length = $slice->length();
+
+    # Always create in local coords for fetch
+    # Then change to seq_region coords for store if required
+
+    while ($in_slice) {
+      $sub_start += $start_adj;
+      $sub_end = $sub_start + $chunk_length - 1;
+
+      if ( $sub_end >= $slice_length ) {
+        # Surplus bins are removed in store/write_collection in caller
+        $in_slice = 0;
+      }
+
+      $sub_slice =
+        $slice->adaptor->fetch_by_region( $region, $seq_region_name,
+                                          $sub_start + $orig_start - 1,
+                                          $sub_end + $orig_start - 1,
+                                          $strand, $version );
+
+      # Can't subslice as this will not clip if we go over the length of
+      # the slice, unlike normal slice fetching.  Will clipping the end
+      # to the slice end cause any problems here?  How will this affect
+      # bin clipping?
+
+      ### Grab features and shift chunk coords
+      $features = $self->get_Features_by_Slice($sub_slice);
+
+      # warn "Binning "
+      #   . scalar(@$features)
+      #   . " Features for chunk length $chunk_length, on Slice "
+      #   . $sub_slice->name;
+
+      if ( ( @{$features} )
+         && ref( $features->[0] ) =~ /Bio::EnsEMBL::Utils::Collection/ )
+      {
+        # Would need to create base module with generic methods:
+        # window_size, ...
+
+      # Check that the returned feature/collections support window_size.
+      # All Collections should be able to
+
+        if ( $features->[0]->can('window_size') ) {
+          if ( $features->[0]->window_size != 0 ) {
+            throw(   "You are trying to generated Collections from "
+                   . "a non-zero window sized Collection:\t"
+                   . $features->[1]->{'window_size'} );
+          }
+
+        # This should never happen
+        # if ( !$skip_zero_window ) {
+        #   throw( 'You have retrieved data from a Collection '
+        #        . 'which without using -skip_zero_window '
+        #        . 'i.e. you are trying to generate overwrite '
+        #        . 'the data you are generating the Collections from' );
+        # }
+
+        } else {
+          throw(   'Something is wrong, '
+                 . 'the Collection you have retrieved '
+                 . 'does not support the method window_size' );
+        }
+      } ## end if ( ( @{$features} ) ...)
+
+      # Set collection start here for 0 window_size
+      if (    @{$features}
+           && $store_natural
+           && !defined( $self->collection_start(0) ) )
+      {
+        $self->collection_start( 0,
+                                 $features->[0]->start + $sub_start );
+      }
+
+      if ($in_slice) {
+        $start_adj = $chunk_length;
+      }
+
+      # Collect features into wsize bins
+      if ( !$only_natural ) {
+        # Get hashref of wsize=>bin array pairs
+        $bins =
+          $self->_bin_features_by_Slice_window_sizes(
+                         -slice        => $sub_slice,
+                         -window_sizes => $chunk_windows{$chunk_length},
+                         -features     => $features, );
+      }
+
+      # Handle 0 wsize
+      if ($store_natural) {
+        foreach my $feature ( @{$features} ) {
+          $counts{0}++;
+
+          if ( $bin_model eq 'SIMPLE' ) {
+            $self->collection_start( 0, $feature->start + $sub_start );
+
+            $self->write_collection(
+              0,
+              $slice,    # Pass Slice to sub-slice when storing
+              $feature->end + $sub_start,
+              $feature->strand,   # Need to pass strand for 0 resolution
+              $feature->scores, );
+          }
+        }
+
+        print "Window size 0 (natural resolution) has "
+          . scalar( @{$features} )
+          . " feature bins for:\t"
+          . $sub_slice->name . "\n";
+      }
+
+      # Now store collections for wsizes >0
+      my $num_bins;
+
+      foreach my $wsize ( sort keys( %{$bins} ) ) {
+        $num_bins = scalar( @{ $bins->{$wsize} } );
+        $counts{$wsize} += $num_bins;
+
+        if ( $bin_model eq 'SIMPLE' ) {
+          $self->write_collection(
+            $wsize,
+            $slice,
+            #$sub_start,
+            $sub_end,
+            $slice->strand,    # This is most likely 1!
+             # Override this woth 0 in descendant Collector if required.
+            $bins->{$wsize}, );
+
+        } else {
+          throw(   'Bio::EnsEMBL::Utils::Collector '
+                 . 'does not yet support non-SIMPLE bin models' );
+          # i.e. More than one score
+        }
+      }
+    } ## end while ($in_slice)
+
+    # Turn off storing of natural resolution for next chunk length sets
+    $store_natural = 0;
+  } ## end foreach my $chunk_length ( ...)
+
+  # Write last collections for each wsize
+
+  foreach my $wsize (@$window_sizes) {
+
+    if (    ( $wsize == 0 && !$store_natural )
+         || ( $wsize != 0 && $only_natural ) )
+    {
+      next;
+    }
+
+    print "Writing final $wsize window_size collection, "
+      . "this may result in slightly different "
+      . "bin numbers from counts due to removing "
+      . "overhanging bins past end of slice\n";
+    $self->write_collection( $wsize, $slice );
+  }
+
+  # Print some counts
+  foreach my $wsize ( sort ( keys %counts ) ) {
+    print "Generated "
+      . $counts{$wsize}
+      . " bins for window size $wsize for "
+      . $slice->name . "\n";
+    # Some may have failed to store if we are projecting to a new
+    # assembly.
+  }
+
+  return;
+} ## end sub store_window_bins_by_Slice
+
+=head2 _bin_features_by_Slice_window_sizes
+
+  Args[0]    : Bio::EnsEMBL::Slice
+  Args[1]    : ARRAYREF of window sizes
+  Args[2]    : ARRAYREF of features with start and end method
+               e.g. Bio::EnsEMBL::Features
+  Example    :
+
+      $bins =
+        $self->_bin_features_by_window_sizes(
+                         -slice        => $slice,
+                         -window_sizes => $chunk_windows{$chunk_length},
+                         -features     => $features, );
+
+  Description: Bins feature scores for a given list of window sizes and
+               predefined method.
+  Returntype : HASHREF of scores per bin per window size
+  Exceptions : None
+  Caller     : store_window_bins_by_Slice
+  Status     : At Risk
+
+=cut
+
+sub _bin_features_by_Slice_window_sizes {
+  my ( $self, @args ) = @_;
+
+  my ( $slice, $wsizes, $features ) =
+    rearrange( [ 'SLICE', 'WINDOW_SIZES', 'FEATURES' ], @args );
+
+  # Generate these once in caller?
+  my $calc_method = '_calculate_' . $bin_method;
+  my $post_method = '_post_process_' . $bin_method;
+
+  # Do this conditional on the Collection type i.e. is
+  # collection seq_region blob then no else yes Would need
+  # $Bio::EnsEMBL::Utils::Collector::collection_format=BLOB|STANDARD
+  # if ( !defined($features) || !@{$features} ) { return {} }
+
+  # Set up some hashes to store data by window_size
+  my ( %bins, %nbins, %bin_counts );
+  my $slice_start  = $slice->start();
+  my $slice_length = $slice->length();
+
+  # Set up some bin data for the windows
+  foreach my $wsize (@$wsizes) {
+    $nbins{$wsize} = int( $slice_length/$wsize );    # int rounds down
+         # nbins is index of the bin not the 'number'
+         # Unless $slice_length is a multiple!
+    if ( !( $slice_length % $wsize ) ) { $nbins{$wsize}-- }
+
+    # Create default bins with 0
+    $bins{$wsize} = [];
+    map { $bins{$wsize}->[$_] = 0 } ( 0 .. $nbins{$wsize} );
+
+    # Set bin counts to 0 for each bin
+    $bin_counts{$wsize} = [];
+
+    # This is adding an undef to the start of the array!?
+    map { $bin_counts{$wsize}->[ ($_) ] = 0 } @{ $bins{$wsize} };
+
+    foreach my $bin ( @{ $bins{$wsize} } ) {
+      $bin_counts{$wsize}->[$bin] = 0;
+    }
+  }
+
+  my $feature_index = 0;
+  my ( $bin_index, @bin_masks );
+
+  foreach my $feature ( @{$features} ) {
+    # Set up the bins for each window size
+
+    foreach my $wsize (@$wsizes) {
+      my $start_bin = int( ( $feature->start )/$wsize );
+      my $end_bin   = int( ( $feature->end )/$wsize );
+
+      if ( $end_bin > $nbins{$wsize} ) {
+        $end_bin = $nbins{$wsize};
+      }
+
+      $self->$calc_method( $feature, $start_bin, $end_bin,
+                           $wsize,   \%bins,     \%bin_counts );
+    }
+
+  }
+
+  # Now do post processing of bins if required
+  if ( $self->can($post_method) ) {
+    $self->$post_method( \%bins, \%bin_counts );
+  }
+
+  return \%bins;
+} ## end sub _bin_features_by_Slice_window_sizes
+# end sub _bin_features_by_Slice
+
+
+### Here follows the bin methods
+# These may also be defined in the inheriting Collector class.  No tests
+# as these are internal and require speed.
+
+
+=head2 _calculate_count
+
+  Args[0]    : feature e.g. Bio::EnsEMBL::Feature
+  Args[1]    : int - start bin
+  Args[2]    : int - end bin
+  Args[3]    : int - window_size
+  Args[4]    : hashref - score bins
+  Example    : $self->$calc_method
+  Description: Adds count to bins which this feature overlaps
+  Returntype : None
+  Exceptions : None
+  Caller     : _bin_features_by_window_sizes
+  Status     : At Risk
+
+=cut
+
+sub _calculate_count {
+  my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
+
+  my $bin_index;
+
+  for ( $bin_index = $start_bin; $bin_index <= $end_bin; ++$bin_index )
+  {
+    $bins_ref->{$wsize}->[$bin_index]++;
+  }
+
+  return;
+}
+
+
+=head2 _calculate_average_score
+
+  Args[0]    : feature e.g. Bio::EnsEMBL::Feature
+  Args[1]    : int - start bin
+  Args[2]    : int - end bin
+  Args[3]    : int - window_size
+  Args[4]    : hashref - score bins
+  Example    : $self->$calc_method
+  Description: Adds score to bins which this feature overlaps
+  Returntype : None
+  Exceptions : None
+  Caller     : _bin_features_by_window_sizes
+  Status     : At Risk
+
+=cut
+
+
+sub _calculate_average_score {
+  my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref,
+       $bin_counts_ref )
+    = @_;
+
+  # This is simple an average of all the scores for features which
+  # overlap this bin.  No weighting with respect to the bin or the
+  # feature.
+
+  my $score = $self->get_score_by_Feature($feature);
+
+  for ( my $bin_index = $start_bin;
+        $bin_index <= $end_bin;
+        ++$bin_index )
+  {
+    # We should really push onto array here so we can have median or
+    # mean.
+
+    $bins_ref->{$wsize}->[$bin_index] += $score;
+    $bin_counts_ref->{$wsize}->[$bin_index]++;
+  }
+
+  return;
+}
+
+
+=head2 _post_process_average_score
+
+  Args[0]    : hashref - score bins
+  Args[1]    : hashref - count bins
+  Example    : $self->$post_method
+  Description: Post processes bins to calculate average score
+  Returntype : None
+  Exceptions : None
+  Caller     : _bin_features_by_window_sizes
+  Status     : At Risk
+
+=cut	
+
+sub _post_process_average_score {
+  my ( $self, $bins_ref, $bin_counts_ref ) = @_;
+
+  foreach my $wsize ( keys %{$bins_ref} ) {
+    foreach my $bin_index ( 0 .. $#{ $bins_ref->{$wsize} } ) {
+
+      if ( $bin_counts_ref->{$wsize}->[$bin_index] ) {
+        $bins_ref->{$wsize}->[$bin_index] /=
+          $bin_counts_ref->{$wsize}->[$bin_index];
+      }
+
+    }
+  }
+
+  return;
+}
+
+
+=head2 _calculate_max_magnitude
+
+  Args[0]    : feature e.g. Bio::EnsEMBL::Feature
+  Args[1]    : int - start bin
+  Args[2]    : int - end bin
+  Args[3]    : int - window_size
+  Args[4]    : hashref - score bins
+  Example    : $self->$calc_method
+  Description: Sets max +/-ve scores for bins which this feature overlaps
+  Returntype : None
+  Exceptions : None
+  Caller     : _bin_features_by_window_sizes
+  Status     : At Risk
+
+=cut
+
+sub _calculate_max_magnitude {
+  my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
+
+  my $score = $self->get_score_by_Feature($feature);
+
+  # Max magnitude
+  # Take the highest value +ve or -ve score
+  for ( my $bin_index = $start_bin;
+        $bin_index <= $end_bin;
+        ++$bin_index )
+  {
+
+    # We really need to capture the lowest -ve and higest +ve scores
+    # here and post process to pick between them.
+
+    $bins_ref->{$wsize}->[$bin_index] ||= [ 0, 0 ];    #-ve, +ve
+
+    if ( $score < $bins_ref->{$wsize}->[$bin_index]->[0] ) {
+      $bins_ref->{$wsize}->[$bin_index]->[0] = $score;
+    } elsif ( $score > $bins_ref->{$wsize}->[$bin_index][1] ) {
+      $bins_ref->{$wsize}->[$bin_index]->[1] = $score;
+    }
+  }
+
+  return;
+} ## end sub _calculate_max_magnitude
+
+
+=head2 _post_process_max_magnitude
+
+  Args[0]    : hashref - score bins
+  Args[1]    : hashref - count bins
+  Example    : $self->$post_method
+  Description: Post processes bins to pick largest +ve or -ve score
+  Returntype : None
+  Exceptions : None
+  Caller     : _bin_features_by_window_sizes
+  Status     : At Risk
+
+=cut	
+
+sub _post_process_max_magnitude {
+  my ( $self, $bins_ref ) = @_;
+
+  # Take the highest value +ve or -ve score
+
+  foreach my $wsize ( keys %{$bins_ref} ) {
+    foreach my $bin_index ( 0 .. $#{ $bins_ref->{$wsize} } ) {
+
+      # Have potential for no listref in a given bin
+
+      # default value if we haven't seen anything is 0
+      # Actually want an array of -ve +ve values
+
+      if ( $bins_ref->{$wsize}->[$bin_index] ) {
+        my $tmp_minus = -$bins_ref->{$wsize}->[$bin_index]->[0];
+
+        if ( $tmp_minus > $bins_ref->{$wsize}->[$bin_index]->[1] ) {
+          $bins_ref->{$wsize}->[$bin_index] =
+            $bins_ref->{$wsize}->[$bin_index]->[0];
+        } else {
+          $bins_ref->{$wsize}->[$bin_index] =
+            $bins_ref->{$wsize}->[$bin_index]->[1];
+        }
+
+      }
+
+    }
+  }
+  return;
+} ## end sub _post_process_max_magnitude
+
+
+=head2 _calculate_RPKM
+
+  Args[0]    : feature e.g. Bio::EnsEMBL::Feature
+  Args[1]    : int - start bin
+  Args[2]    : int - end bin
+  Args[3]    : int - window_size
+  Args[4]    : hashref - score bins
+  Example    : $self->$calc_method
+  Description: Stores counts to calculate Read Per Kb per Million(RPKM)
+  Returntype : None
+  Exceptions : None
+  Caller     : _bin_features_by_window_sizes
+  Status     : At Risk
+
+=cut
+
+sub _calculate_RPKM {
+  my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
+
+  $self->_calculate_count( $feature, $start_bin, $end_bin,
+                           $wsize,   $bins_ref );
+
+  return;
+}
+
+
+=head2 _post_process_RPKM
+
+  Args[0]    : hashref - score bins
+  Args[1]    : hashref - count bins
+  Example    : $self->$post_method
+  Description: Post processes bins to calculate average score
+  Returntype : None
+  Exceptions : None
+  Caller     : _bin_features_by_window_sizes
+  Status     : At Risk
+
+=cut	
+
+sub _post_process_RPKM {
+  my ( $self, $bins_ref ) = @_;
+
+  #10^9 x C / NGB
+  #C = Reads overlapping bin
+  #N = Total reads in the experiment
+  #G = Length of bin in bps 
+  #(don't really have to account for non-ref/HAPs or gender here 
+  #as should be close enough, CellTypes/gender differences will be miniscule)
+  #B = length of each bin
+
+  foreach my $wsize(keys %{$bins_ref}){
+	
+	foreach my $bin_index(0..$#{$bins_ref->{$wsize}}){
+	  $bins_ref->{$wsize}->[$bin_index] =  
+		((10**9) *  
+		 $bins_ref->{$wsize}->[$bin_index])/(($self->_RPKM_factor($wsize)) * $wsize);
+	}
+  }
+
+  return;
+
+}
+
+
+=head2 _set_up_RPKM
+
+  Args[0]    : hashref - method config e.g
+                 {
+                  DNADB         => Bio::EnsEMBL::DBSQL::DBAdaptor,
+                  TOTAL_FEATURE => $total_feature_count,
+                 }
+
+  Example    : $self->$set_up_method($config);
+  Description: Sets the RPKM factor
+  Returntype : None
+  Exceptions : Throws is required config params are not set
+  Caller     : bin_method
+  Status     : At Risk
+
+=cut	
+
+
+sub _set_up_RPKM{
+  my ($self, $config) = @_;
+
+  my ($dnadb, $total_features) = rearrange([ 'DNADB', 'TOTAL_FEATURES'], %{$config});
+  
+  #Test specifically here to notify about config hash
+  if(! $total_features){
+	throw("For RPKM you must pass a valid 'total_features' ".
+		  "as part of the method config hash.");
+  }
+  
+  if(! $dnadb){
+	throw("For RPKM you must pass 'dnadb' as part of the method config hash.");
+  }
+
+  foreach my $wsize(@{$self->window_sizes}){
+	#Should never have 0 here
+	$self->_RPKM_factor($wsize, ($wsize * $total_features));	#N*G
+
+	warn "setting $wsize RPKM factor($wsize * $total_features) to ".	
+	  $self->_RPKM_factor($wsize);
+  }
+  
+  return;
+} ## end sub _set_up_RPKM
+
+
+=head2 _RPKM_factor
+
+  Args[0]    : int - RPKM factor i.e.  (Total reads in the experiment *
+               Genome length)
+  Example    : $self->_RPKM_factor($wsize, $factor);
+  Description: Gets/Sets the RPKM factor
+  Returntype : int
+  Exceptions : None
+  Caller     : _set_up_RPKM, _post_process_RPKM
+  Status     : At Risk
+
+=cut	
+
+sub _RPKM_factor{
+  my ($self, $wsize, $factor) = @_;
+
+  if (! defined $wsize){
+	throw("You must pass at least window_size to get or set the RPKM factor");
+  }
+
+  if(defined $factor){
+	$self->{'RPKM_factor'}{$wsize} = $factor;
+  }
+  elsif(! exists $self->{'RPKM_factor'}{$wsize}){
+	#This should never happen unless the window sizes 
+	#are redefined after initialisation
+	throw("You have requested an RPKM factor for a window_size".
+		  " which has not been set:\t$wsize");
+  }
+
+  return $self->{'RPKM_factor'}{$wsize};
+}
+
+=head2 get_diploid_genome_length_by_gender
+
+  Args[0]    : string - RPKM factor i.e.  (Total reads in the experiment *
+               Genome length)
+  Args[1]    : string - gender e.g. male or female
+  Example    :
+
+      my $glength =
+        $self->get_diploid_genome_length_by_gender( $dnadb, $gender );
+
+  Description: Gets the gender specific diploid genome length,
+               including non-ref but not including haplotypes.  Only
+               handles species with X/Y sex chromosomes.
+  Returntype : int
+  Exceptions : None
+  Caller     : _set_up_RPKM, _post_process_RPKM
+  Status     : At Risk - Move to and export from generic Utils Slice module???
+
+=cut	
+
+sub get_diploid_genome_length_by_gender {
+  my ( $dnadb, $gender ) = @_;
+
+  my %sex_chrs = ( 'Y' => 'male',
+                   'X' => 'female', );
+
+  my $dip_length = 0;
+
+  if (!(( ref($dnadb) && $dnadb->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') )
+        && $dnadb->grou() eq 'core'
+        && ( defined $gender && $gender =~ /(male|female)/ ) ) )
+  {
+    throw(   "Must provide valid "
+           . "Bio::EnsEMBL::DBSQL::DBAdaptor($dnadb) and "
+           . "gender ($gender) arguments" );
+  }
+
+  my @ref_slices = $dnadb->get_SliceAdaptor->fetch_all('toplevel');
+
+  # Include non-ref(unassembled), but omit haps/lrgs(i.e. redundant)
+
+  foreach my $slice (
+     @{ $dnadb->get_SliceAdaptor->fetch_all( 'toplevel', undef, 1, 1 ) }
+    )
+  {
+    # Include duplicated region for true diploid length
+
+    # Skip haps/lrgs
+    if ( ( $slice->coord_system->name() eq 'chromosome'
+           && !$slice->is_reference() )
+         || $slice->coord_system->name() eq 'lrg' )
+    {
+      next;
+    }
+
+    if ( exists( $sex_chrs{ $slice->seq_region_name() } ) ) {
+      if ( $gender eq 'male' ) {
+        $dip_length += $slice->length;
+      } elsif ( $sex_chrs{ $slice->seq_region_name } eq 'male' ) {
+        next;
+      }
+    }
+
+    $dip_length += 2*$slice->length;
+  }
+
+  return $dip_length;
+} ## end sub get_diploid_genome_length_by_gender
+
+
+1;