0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Utils::Collector
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 # Inherit this base module in your feature specific Collector
|
|
28 # instance:
|
|
29
|
|
30 package Bio::EnsEMBL::Funcgen::Collector::ResultFeature;
|
|
31 use base('Bio::EnsEMBL::Utils::Collector');
|
|
32
|
|
33 # ... and define package config variables
|
|
34 $Bio::EnsEMBL::Funcgen::Collector::bin_model = 'SIMPLE';
|
|
35 $Bio::EnsEMBL::Funcgen::Collector::window_sizes =
|
|
36 [ 30, 65, 130, 260, 450, 648, 950, 1296 ];
|
|
37 # Could replace 30 with 0 here for low density data at natural resolution
|
|
38
|
|
39 $Bio::EnsEMBL::Utils::Collector::bin_method =
|
|
40 'count'; # only used by collector
|
|
41
|
|
42 $Bio::EnsEMBL::Utils::Collector::packed_size = 2;
|
|
43
|
|
44 # ... or simply use this module in a script either defining package
|
|
45 # config variables, or passing as parameters to the constructor:
|
|
46
|
|
47 my $collector =
|
|
48 Bio::EnsEMBL::Utils::BaseCollector->new( -pack_template => 'v' );
|
|
49
|
|
50 $Bio::EnsEMBL::Funcgen::Collector::pack_template = 'v';
|
|
51
|
|
52 # Config variables can also be over-ridden by passing a config hash to
|
|
53 # the store_window_bins_by_Slice() method:
|
|
54
|
|
55 $collector->store_window_bins_by_Slice( $slice, (
|
|
56 -pack_template => 'v',
|
|
57 -packed_size => 2 ) );
|
|
58
|
|
59 # NOTE: Over-riding default config variables can cause problems when
|
|
60 # storing or fetching data. e.g. Fetch may revert to using defaults or
|
|
61 # table partitions may not match window sizes.
|
|
62
|
|
63 =head1 DESCRIPTION
|
|
64
|
|
65 This package is the base Collector class which contains generic
|
|
66 getter/setter methods along with the main 'collecting' methods which
|
|
67 perform the majority of the work in generating compressed data
|
|
68 collections optimised for web display. The bins produced are aimed at
|
|
69 storage in a BLOB representing an entire seq_region i.e. even bins with
|
|
70 no features/null data are encoded as a 0 score. Non-BLOB collections
|
|
71 are currently not supported.
|
|
72
|
|
73 If your Collection class defines a Bio::EnsEMBL::Feature, then its
|
|
74 adaptor should inherit from the relevant Collection class.
|
|
75
|
|
76 The minimum prerequisites of the input features/data are that they have
|
|
77 a start() and end() method. For instance a Bio::EnsEMBL::Features
|
|
78 generated from a database or parsed from a flat file.
|
|
79
|
|
80 NOTE: This Collector does not have a lightweight mode previously used
|
|
81 for dynamic/on the fly collecting i.e. it does not take advantage of
|
|
82 bypassing object creation via the related BaseFeatureAdaptor method.
|
|
83
|
|
84 =cut
|
|
85
|
|
86 package Bio::EnsEMBL::Utils::Collector;
|
|
87
|
|
88 use Bio::EnsEMBL::Utils::Argument ('rearrange');
|
|
89 use Bio::EnsEMBL::Utils::Exception ('throw');
|
|
90 use strict;
|
|
91 use warnings;
|
|
92
|
|
93 ### Global package config vars
|
|
94
|
|
95 # Defaults
|
|
96 our $max_view_width = 1000000; # Max bp width in location/detailed view
|
|
97 our $max_data_type_size = 16777216; # Default is 16MB for long BLOB
|
|
98 # This is really a guide value as this should be set in the inheriting
|
|
99 # Collector class by deducting the rest of the row size from this value.
|
|
100 # Is is upto the inheritor to handle checking whether this size has been
|
|
101 # exceeded.
|
|
102
|
|
103 # NOTE: Theoretically the min window size is: slice_length/(16777216/2)
|
|
104 # So for human chr1: 249,250,621/(16777216/2) = 29.7 => 30. However,
|
|
105 # this size does not seem to directly translate to the MySQL
|
|
106 # max_allowed_packet_size. Increasing max_allowed_packet_size to 64MB
|
|
107 # solves this issue, and substr operation doesn't appear to incur any of
|
|
108 # the potential memory(4*) usage issues.
|
|
109
|
|
110 # Others global package variables which are set in the inheriting
|
|
111 # Collector class.
|
|
112 our ( $bin_model, $bin_method, $pack_template,
|
|
113 $packed_size, $window_sizes );
|
|
114
|
|
115 =head2 new
|
|
116
|
|
117 Args : None
|
|
118 Example :
|
|
119
|
|
120 my $collector = Bio::EnsEMBL::XXX::Collector::FEATURE->new();
|
|
121 $collector->store_windows_by_Slice($slice);
|
|
122
|
|
123 # Where XXX is, e.g. Compara, FuncGen etc.
|
|
124
|
|
125 Description: Simple new method to enable use of collector
|
|
126 when not inherited by a descendant of
|
|
127 Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
|
|
128
|
|
129 Returntype : Bio::EnsEMBL::XXX::Collector
|
|
130 Exceptions : None
|
|
131 Caller : Collector script
|
|
132 Status : At Risk
|
|
133
|
|
134 =cut
|
|
135
|
|
136 sub new {
|
|
137 return bless {}, $_[0]; # Simply blesses this class as an empty hash.
|
|
138
|
|
139 # Do not set anything here, as will not be first in ISA for
|
|
140 # FeatureAdaptors. Hence, not guaranteed to be called.
|
|
141 }
|
|
142
|
|
143 =head2 new_assembly
|
|
144
|
|
145 Args : optional - string assembly version e.g. GRCh37
|
|
146 Example : $collector->new_assembly('GRCh37');
|
|
147 Description: Getter/Setter for new assembly version which should be
|
|
148 used to project only 0 wsize Collections.
|
|
149 Returntype : string
|
|
150 Exceptions : None
|
|
151 Caller : store_window_bins_by_Slice() or
|
|
152 write_collection() in inheriting Collector class.
|
|
153 Status : At Risk
|
|
154
|
|
155 =cut
|
|
156
|
|
157 sub new_assembly {
|
|
158 my ( $self, $new_asm ) = @_;
|
|
159
|
|
160 if ( defined($new_asm) ) {
|
|
161 $self->{'new_assembly'} = $new_asm;
|
|
162 }
|
|
163
|
|
164 return $self->{'new_assembly'};
|
|
165 }
|
|
166
|
|
167 ### Setter/Getter methods for basic/mandatory config
|
|
168 # Can also be set using package variables in the inheriting
|
|
169 # Collector/Adaptor or run script. Allows over-riding of defaults set
|
|
170 # in Adaptor/Collector.
|
|
171
|
|
172 # Package variables used here instead of attrs to enable easy
|
|
173 # default config in inheriting class/script method. Provided
|
|
174 # for easy/standardised fetch access outside of this package
|
|
175 # i.e. Collectors/Adaptors
|
|
176
|
|
177 =head2 max_data_type_size
|
|
178
|
|
179 Args : optional - int Maximum size of collection in bytes
|
|
180 Example : $collector->max_data_type_size($new_max_size);
|
|
181 Description: Getter/Setter for max_data_type_size, default is
|
|
182 currently set at in this class as 16777216 (16MB), for
|
|
183 long BLOB. This is used by the write_collection()
|
|
184 method to determine when to build and store a compressed
|
|
185 collection.
|
|
186 Returntype : int
|
|
187 Exceptions : None
|
|
188 Caller : bins_per_record() and
|
|
189 write_collection() in inheriting Collector class.
|
|
190 Status : At Risk
|
|
191
|
|
192 =cut
|
|
193
|
|
194 sub max_data_type_size {
|
|
195 my ( $self, $size ) = @_;
|
|
196
|
|
197 # Validate is sensible integer
|
|
198
|
|
199 if ( defined($size) ) {
|
|
200 if ( $size !~ /^\d+$/ ) {
|
|
201 throw("max_data_type_size must be a integer of bytes, not $size");
|
|
202 }
|
|
203 $max_data_type_size = $size;
|
|
204 } elsif ( !defined($max_data_type_size) ) {
|
|
205 # This should never happen as we have defaults in this module.
|
|
206 throw( 'You must define a '
|
|
207 . '$Bio::EnsEMBL::Utils::Collector::max_data_type_size '
|
|
208 . 'or pass -max_data_type_size config' );
|
|
209 }
|
|
210
|
|
211 return $max_data_type_size;
|
|
212 }
|
|
213
|
|
214 =head2 max_view_width
|
|
215
|
|
216 Args : optional - int Maximum width of view
|
|
217 Example : $collector->max_view_width($new_max_width);
|
|
218 Description: Getter/Setter for max_view_width, default is currently
|
|
219 set at in this class as 500000bp, for maximum level of
|
|
220 zoom permitted by location view.
|
|
221 Returntype : int
|
|
222 Exceptions : None
|
|
223 Caller : general
|
|
224 Status : At Risk
|
|
225
|
|
226 =cut
|
|
227
|
|
228 sub max_view_width {
|
|
229 my ( $self, $size ) = @_;
|
|
230
|
|
231 # Validate is sensible integer
|
|
232
|
|
233 if ( defined($size) ) {
|
|
234 if ( $size !~ /^\d+$/ ) {
|
|
235 throw("max_view_width must be a integer, not $size");
|
|
236 }
|
|
237 $max_view_width = $size;
|
|
238 } elsif ( !defined $max_view_width ) {
|
|
239 # This should never happen as we have defaults in this module.
|
|
240 throw( 'You must define a '
|
|
241 . '$Bio::EnsEMBL::Utils::Collector::max_view_width '
|
|
242 . 'or pass -max_view_width config' );
|
|
243 }
|
|
244
|
|
245 return $max_view_width;
|
|
246 }
|
|
247
|
|
248 =head2 bin_method
|
|
249
|
|
250 Args[0] : optional - string name of bin method e.g. 'max_magnitude'
|
|
251 Args[1] : optional - Bio::EnsEMBL::Funcgen::Parsers::InputSet
|
|
252 Example : my $bin_method = $self->bin_method();
|
|
253 Description: Getter/Setter for bin_method, default is normally set in
|
|
254 the inheriting Collector class either by package variable
|
|
255 or by passing a config hash via the store methods.
|
|
256 Returntype : string
|
|
257 Exceptions : Throws if cannot set by package variable
|
|
258 Caller : general
|
|
259 Status : At Risk
|
|
260
|
|
261 =cut
|
|
262
|
|
263 sub bin_method {
|
|
264 my ( $self, $bmethod, $config ) = @_;
|
|
265
|
|
266 if ( defined($bmethod) ) {
|
|
267 $bin_method = $bmethod;
|
|
268 }
|
|
269
|
|
270 if ( !defined($bin_method) ) {
|
|
271 throw( 'You must define a '
|
|
272 . '$Bio::EnsEMBL::Utils::Collector::bin_method '
|
|
273 . 'or pass -bin_method config' );
|
|
274 }
|
|
275
|
|
276 if ( !$self->can( "_calculate_" . $bin_method ) ) {
|
|
277 throw("$bin_method is not a valid/available binning method");
|
|
278 }
|
|
279
|
|
280 my $set_up_method = "_set_up_" . $bin_method;
|
|
281 if ( $self->can($set_up_method) ) {
|
|
282 $self->$set_up_method($config);
|
|
283 }
|
|
284
|
|
285 return $bin_method;
|
|
286 }
|
|
287
|
|
288 =head2 bin_model
|
|
289
|
|
290 Args : optional - string bin model e.g. SIMPLE or COMPLEX
|
|
291 Example : my $bin_model = $self->bin_model;
|
|
292 Description: Getter/Setter for bin_model, default should be set in
|
|
293 inheriting Collector class. Currently only supports
|
|
294 'SIMPLE' bin model.
|
|
295 Returntype : string
|
|
296 Exceptions : Throws if bin_model is not SIMPLE
|
|
297 Caller : general
|
|
298 Status : At Risk
|
|
299
|
|
300 =cut
|
|
301
|
|
302 sub bin_model {
|
|
303 my ( $self, $bmodel ) = @_;
|
|
304
|
|
305 if ( defined($bmodel) ) {
|
|
306 $bin_model = $bmodel;
|
|
307 }
|
|
308
|
|
309 if ( !defined($bin_model) ) {
|
|
310 throw( 'You must define a '
|
|
311 . '$Bio::EnsEMBL::Utils::Collector::bin_model '
|
|
312 . 'or pass -bin_model config' );
|
|
313 }
|
|
314
|
|
315 if ( $bin_model ne 'SIMPLE' ) {
|
|
316 throw( 'Bio::EnsEMBL::Utils::Collector does not yet support '
|
|
317 . 'non-SIMPLE bin models' );
|
|
318 }
|
|
319
|
|
320 return $bin_model;
|
|
321 }
|
|
322
|
|
323
|
|
324 =head2 window_sizes
|
|
325
|
|
326 Args : optional - arrayref of window sizes
|
|
327 Example :
|
|
328
|
|
329 foreach my $wsize ( @{ $collector->window_sizes } )
|
|
330 { # Do some collecting
|
|
331 }
|
|
332
|
|
333 Description: Getter/Setter for window_sizes. Default should be set
|
|
334 in inheriting Collector (if the config is dynamic),
|
|
335 FeatureAdaptor class or script using package variable or
|
|
336 this method.
|
|
337 NOTE: Redefining these may cause a mismatch with the
|
|
338 table partition definition.
|
|
339 Returntype : arrayref of ints
|
|
340 Exceptions : Throws if cannot set a valid array of int window sizes
|
|
341 Caller : general
|
|
342 Status : At Risk - rename bin_sizes?
|
|
343
|
|
344 =cut
|
|
345
|
|
346 sub window_sizes {
|
|
347 my ( $self, $sizes ) = @_;
|
|
348
|
|
349 if ( defined($sizes) ) {
|
|
350 $window_sizes = $sizes;
|
|
351 }
|
|
352
|
|
353 if ( !( ref($window_sizes)
|
|
354 && ( ref($window_sizes) eq 'ARRAY' )
|
|
355 && ( scalar(@$window_sizes) > 0 ) ) )
|
|
356 {
|
|
357 throw('Must pass -windows_sizes in the config '
|
|
358 . 'or define $Bio::EnsEMBL::Utils::Collector::window_sizes '
|
|
359 . 'in your Collector as an array ref of integer window_sizes' );
|
|
360 }
|
|
361
|
|
362 return $window_sizes;
|
|
363 }
|
|
364
|
|
365
|
|
366
|
|
367
|
|
368 =head2 has_window_size
|
|
369
|
|
370 Args : int - window size to validate
|
|
371 Example : if( $collector->has_window_size('30') ){
|
|
372 #Do something wrt to 30bp window size
|
|
373 }
|
|
374
|
|
375 Description: Simple utility method to validate whether this Collector
|
|
376 has a given window_size
|
|
377 Returntype : Boolean
|
|
378 Exceptions : Throws if window size not specified
|
|
379 Caller : general
|
|
380 Status : At Risk
|
|
381
|
|
382 =cut
|
|
383
|
|
384
|
|
385 sub has_window_size{
|
|
386 my ( $self, $size ) = @_;
|
|
387
|
|
388 if(! defined $size){
|
|
389 throw('You must pass a window size to validate');
|
|
390 }
|
|
391
|
|
392 return grep(/$size/, @$window_sizes);
|
|
393 }
|
|
394
|
|
395
|
|
396
|
|
397
|
|
398 ### Getter/Setters for BLOB collection config
|
|
399 # NOTE: Overriding the defaults here may cause a mismatch when the data
|
|
400 # is retrieved.
|
|
401
|
|
402 =head2 pack_template
|
|
403
|
|
404 Args : optional - string perl 'pack' template
|
|
405 Example : $self->pack_template('v');
|
|
406 Description: Getter/Setter for pack_template. Default should be set
|
|
407 in inheriting Collector (if the config is dynamic),
|
|
408 FeatureAdaptor class or script using package variable or
|
|
409 this method.
|
|
410 Returntype : string
|
|
411 Exceptions : Throws if cannot set pack_template from package variable
|
|
412 Caller : FeatureAdaptor::_obj_from_sth
|
|
413 Status : At Risk
|
|
414
|
|
415 =cut
|
|
416
|
|
417 sub pack_template {
|
|
418 my ( $self, $template ) = @_;
|
|
419
|
|
420 if ( defined($template) ) {
|
|
421 $pack_template = $template;
|
|
422 }
|
|
423
|
|
424 if ( !defined($pack_template) ) {
|
|
425 throw( 'Must pass a per score '
|
|
426 . '-pack_template in the config '
|
|
427 . 'or define $Bio::EnsEMBL::Utils::Collector::pack_template '
|
|
428 . 'in your Collector' );
|
|
429 }
|
|
430
|
|
431 return $pack_template;
|
|
432 }
|
|
433
|
|
434 =head2 packed_size
|
|
435
|
|
436 Args : optional - int size of perl 'pack' template in bytes
|
|
437 Example : $self->packed_size(2);
|
|
438 Description: Getter/Setter for packed_size. Default should be set
|
|
439 in inheriting Collector (if the config is dynamic),
|
|
440 FeatureAdaptor class or script using package variable or
|
|
441 this method.
|
|
442 Returntype : string
|
|
443 Exceptions : Throws if cannot set pack_template from pacakge variable
|
|
444 Caller : current_packed_size() and
|
|
445 FeatureAdaptor::_obj_from_sth()
|
|
446 Status : At Risk
|
|
447
|
|
448 =cut
|
|
449
|
|
450 sub packed_size {
|
|
451 my ( $self, $size ) = @_;
|
|
452
|
|
453 if ( defined($size) ) {
|
|
454 $packed_size = $size;
|
|
455 }
|
|
456
|
|
457 if ( !defined($packed_size) ) {
|
|
458 throw( 'Must pass -packed_size(wrt to pack_template) config '
|
|
459 . 'or define $Bio::EnsEMBL::Utils::Collector::packed_size '
|
|
460 . 'in your Collector' );
|
|
461 }
|
|
462
|
|
463 if ( $packed_size !~ /^\d+$/ ) {
|
|
464 throw( "$packed_size is not an integer, "
|
|
465 . "must pass a size integer for packed_size "
|
|
466 . "which specifies size of pack_template:\t"
|
|
467 . $pack_template );
|
|
468 }
|
|
469
|
|
470 return $packed_size;
|
|
471 }
|
|
472
|
|
473 =head2 bins_per_record
|
|
474
|
|
475 Example : my $bin_per_records = $self->bin_per_record
|
|
476 Description: Simple method to calculate the max number of bins
|
|
477 allowed per record given the current config.
|
|
478 Returntype : int
|
|
479 Exceptions : None
|
|
480 Caller :
|
|
481 Status : At Risk
|
|
482
|
|
483 =cut
|
|
484
|
|
485 sub bins_per_record {
|
|
486 return int( $max_data_type_size/$packed_size );
|
|
487 }
|
|
488
|
|
489
|
|
490 =head2 current_packed_size
|
|
491
|
|
492 Arg[0] : int - window size
|
|
493 Example : my $cps = $self->current_packed_size($wsize);
|
|
494 Description: Simple method to calculate the max number of bins
|
|
495 allowed per record given the current config.
|
|
496 Returntype : int
|
|
497 Exceptions : None
|
|
498 Caller :
|
|
499 Status : At Risk
|
|
500
|
|
501 =cut
|
|
502
|
|
503 sub current_packed_size {
|
|
504 my ( $self, $wsize ) = @_;
|
|
505 return ( scalar( @{ $self->score_cache($wsize) } )*$packed_size );
|
|
506 }
|
|
507
|
|
508
|
|
509 =head2 score_cache
|
|
510
|
|
511 Arg[0] : int - window size
|
|
512 Example : my $cps = $self->current_packed_size($wsize);
|
|
513 Description: Handles caching of bin scores for each window size
|
|
514 Returntype : arrayref
|
|
515 Exceptions : Throws if no window size defined
|
|
516 Caller : current_packed_size() and store_collection()
|
|
517 methods
|
|
518 Status : At Risk
|
|
519
|
|
520 =cut
|
|
521
|
|
522 sub score_cache {
|
|
523 my ( $self, $wsize, $scores ) = @_;
|
|
524
|
|
525 if ( !defined($wsize) ) {
|
|
526 throw('Must pass a window size argument');
|
|
527 }
|
|
528
|
|
529 $self->{'score_cache'}{$wsize} ||= [];
|
|
530
|
|
531 if ( defined($scores) ) {
|
|
532 push( @{ $self->{'score_cache'}{$wsize} }, @{$scores} );
|
|
533 }
|
|
534
|
|
535 return $self->{'score_cache'}{$wsize};
|
|
536 }
|
|
537
|
|
538
|
|
539 =head2 collection_start
|
|
540
|
|
541 Arg[0] : int - window_size
|
|
542 Arg[1] : optional int - seq_region_start
|
|
543 Example : my $coll_start->(150);
|
|
544 Description: Getter/Setter collection seq_region_start
|
|
545 Returntype : int
|
|
546 Exceptions : Throws if no window size defined
|
|
547 Caller : store_window_bin_by_Slice() and write_collection()
|
|
548 Status : At Risk
|
|
549
|
|
550 =cut
|
|
551
|
|
552 sub collection_start {
|
|
553 my ( $self, $wsize, $sr_start ) = @_;
|
|
554
|
|
555 if ( !defined($wsize) ) {
|
|
556 throw('Must pass a window size argument');
|
|
557 }
|
|
558
|
|
559 if ( defined($sr_start) ) {
|
|
560 $self->{'collection_start'}{$wsize} = $sr_start;
|
|
561 }
|
|
562
|
|
563 return $self->{'collection_start'}{$wsize};
|
|
564 }
|
|
565
|
|
566
|
|
567 =head2 collection_end
|
|
568
|
|
569 Arg[0] : int - window_size
|
|
570 Arg[1] : optional int - seq_region_end
|
|
571 Example : my $coll_end->(150);
|
|
572 Description: Getter/Setter collection seq_region_end
|
|
573 Returntype : int
|
|
574 Exceptions : Throws if no window size defined
|
|
575 Caller : inheriting Collector write_collection method
|
|
576 Status : At Risk
|
|
577
|
|
578 =cut
|
|
579
|
|
580 sub collection_end{
|
|
581 my ($self, $wsize, $sr_end) = @_;
|
|
582 throw('Must pass a window size argument') if ! defined $wsize;
|
|
583
|
|
584 if(defined $sr_end){
|
|
585 $self->{'collection_end'}{$wsize} = $sr_end;
|
|
586 }
|
|
587 else{
|
|
588 return $self->{'collection_end'}{$wsize};
|
|
589 }
|
|
590 }
|
|
591
|
|
592
|
|
593 =head2 collection_strand
|
|
594
|
|
595 Arg[0] : int - window_size
|
|
596 Arg[1] : optional int - seq_region_strand
|
|
597 Example : my $coll_start->(0);
|
|
598 Description: Getter/Setter collection seq_region_strand
|
|
599 Returntype : int
|
|
600 Exceptions : Throws if no window size defined
|
|
601 Caller : inheriting Collector write_collection method
|
|
602 Status : At Risk - Collections are currently strandless
|
|
603
|
|
604 =cut
|
|
605
|
|
606 sub collection_strand {
|
|
607 my ( $self, $wsize, $strand ) = @_;
|
|
608
|
|
609 if ( !defined($wsize) ) {
|
|
610 throw('Must pass a window size argument');
|
|
611 }
|
|
612
|
|
613 if ( defined $strand ) {
|
|
614 $self->{'collection_strand'}{$wsize} = $strand;
|
|
615 }
|
|
616
|
|
617 return $self->{'collection_strand'}{$wsize};
|
|
618 }
|
|
619
|
|
620
|
|
621 ### Here follows the actual working methods
|
|
622
|
|
623 =head2 _get_Slice_chunks
|
|
624
|
|
625 Description: Defines the optimal set of slice chunks to use for
|
|
626 generating collections such that redundant fetches
|
|
627 are minimized.
|
|
628 Returntype : hashref of window_size chunk size pairs
|
|
629 Exceptions : Throws if no window sizes or max_view_width defined
|
|
630 Caller : store_window_bin_by_Slice()
|
|
631 Status : At Risk
|
|
632
|
|
633 =cut
|
|
634
|
|
635 sub _get_Slice_chunks {
|
|
636 my $self = shift;
|
|
637
|
|
638 if ( !defined($window_sizes) || !defined($max_view_width) ) {
|
|
639 throw( 'You must pass both a window_size array ref '
|
|
640 . 'and max_view_width arguments' );
|
|
641 }
|
|
642
|
|
643 if ( !defined( $self->{'_slice_chunks'} ) ) {
|
|
644 # Calulate sensible slice length based on window sizes
|
|
645 my @wsizes = sort { $a <=> $b } @$window_sizes;
|
|
646
|
|
647 # Handle calculating only 0 wsize
|
|
648 if ( scalar(@wsizes) == 1
|
|
649 && $wsizes[0] == 0 )
|
|
650 {
|
|
651 return { $max_view_width => [0] };
|
|
652 }
|
|
653
|
|
654 my $multiplier = int( $max_view_width/$wsizes[$#wsizes] );
|
|
655
|
|
656 my $chunk_length = $multiplier*$wsizes[$#wsizes];
|
|
657 my $not_divisible = 1;
|
|
658
|
|
659 my %chunk_windows; # Registry of chunk lengths to run with windows
|
|
660 my %workable_chunks = map { $_ => {} } @wsizes;
|
|
661
|
|
662 # get rid of natural resolution as this will always work
|
|
663 delete $workable_chunks{'0'};
|
|
664
|
|
665 while ( $not_divisible && $chunk_length != 0 ) {
|
|
666 $not_divisible = 0;
|
|
667
|
|
668 foreach my $wsize (@wsizes) {
|
|
669 if ( $wsize == 0 ) {
|
|
670 # Special wsize for normal data
|
|
671 next;
|
|
672 }
|
|
673
|
|
674 # Set not divisible if modulus is true
|
|
675 if ( $chunk_length % $wsize ) {
|
|
676 $not_divisible = 1;
|
|
677 } else {
|
|
678 $workable_chunks{$wsize}{$chunk_length} = [];
|
|
679 }
|
|
680 }
|
|
681
|
|
682 # Gradually shrink the length until we find a workable slice
|
|
683 # length for all windows.
|
|
684 if ($not_divisible) {
|
|
685 $chunk_length -= $wsizes[$#wsizes];
|
|
686 }
|
|
687 }
|
|
688
|
|
689 my %chunk_sets;
|
|
690
|
|
691 if ( $chunk_length == 0 ) {
|
|
692 print "Could not find chunk length "
|
|
693 . "for all window sizes, "
|
|
694 . "attempting to subset windows "
|
|
695 . "using alternate slice length\n";
|
|
696
|
|
697 foreach my $wsize ( keys(%workable_chunks) ) {
|
|
698 # Loop through windows, seeing if they are workable in the other
|
|
699 # windows.
|
|
700
|
|
701 foreach my $chunk ( keys( %{ $workable_chunks{$wsize} } ) ) {
|
|
702
|
|
703 foreach my $other_wsize ( keys %workable_chunks ) {
|
|
704 next if $wsize == $other_wsize;
|
|
705
|
|
706 if ( exists( $workable_chunks{$other_wsize}{$chunk} ) ) {
|
|
707 # only push it onto the other wsize, as we will do the
|
|
708 # reverse later
|
|
709 $chunk_sets{$chunk}{$wsize} = undef;
|
|
710 }
|
|
711 }
|
|
712 }
|
|
713 }
|
|
714
|
|
715 # %chunk_sets represents co-occurence of wsizes with repect to
|
|
716 # chunks. Take the set which has the most windows and the longest
|
|
717 # chunk. Then get the largest which handles the rest.
|
|
718
|
|
719 # define possible set lengths
|
|
720 my $i = 0;
|
|
721 my %set_lengths;
|
|
722
|
|
723 map { $set_lengths{$i} = []; $i++ } @wsizes;
|
|
724
|
|
725 # get rid of natural resolution as this will always work
|
|
726 delete $set_lengths{'0'};
|
|
727
|
|
728 # Store chunks lengths for each set size
|
|
729 foreach my $chunk ( keys(%chunk_sets) ) {
|
|
730 my $set_size = scalar( values( %{ $chunk_sets{$chunk} } ) );
|
|
731 push( @{ $set_lengths{$set_size} }, $chunk );
|
|
732 }
|
|
733
|
|
734 # Get the biggest set with the longest length;
|
|
735
|
|
736 # Scalar here as we are disregarding natural resolution of 0 in
|
|
737 # loop.
|
|
738 my $largest_size = scalar(@wsizes);
|
|
739 my $found_largest_set = 0;
|
|
740
|
|
741 while ( !$found_largest_set ) {
|
|
742 $largest_size--;
|
|
743
|
|
744 if ( scalar( @{ $set_lengths{$largest_size} } ) > 0 ) {
|
|
745 $found_largest_set = 1;
|
|
746 }
|
|
747 }
|
|
748
|
|
749 my ($largest_chunk) =
|
|
750 sort { $b <=> $a } @{ $set_lengths{$largest_size} };
|
|
751
|
|
752 my @largest_windows = keys %{ $chunk_sets{$largest_chunk} };
|
|
753 @{ $chunk_windows{$largest_chunk} } = @largest_windows;
|
|
754
|
|
755 print "Largest chunk $largest_chunk($largest_size) "
|
|
756 . "contains windows: @largest_windows\n";
|
|
757
|
|
758 my %remaining_windows = map { $_ => {} } @wsizes;
|
|
759
|
|
760 # get rid of natural resolution as this will always work
|
|
761 delete $remaining_windows{'0'};
|
|
762
|
|
763 map { delete $remaining_windows{$_} } @largest_windows;
|
|
764 my $remaining_set_size = scalar( keys(%remaining_windows) );
|
|
765
|
|
766 # Use array here for practicality, would need to maintain hash if
|
|
767 # we need to iterate.
|
|
768 my @rwindows = keys(%remaining_windows);
|
|
769
|
|
770 # Could be one window, but this will not be in the co-occurence
|
|
771 # hash %chunk_sets.
|
|
772 my $next_chunk;
|
|
773
|
|
774 if ( scalar(@rwindows) == 1 ) {
|
|
775 my ($last_window) = @rwindows;
|
|
776 # Find a suitably large chunk for this one window.
|
|
777 $multiplier = int( 500000/$last_window );
|
|
778 $next_chunk = $multiplier*$last_window;
|
|
779 } else {
|
|
780
|
|
781 foreach my $chunk ( sort { $b <=> $a }
|
|
782 @{ $set_lengths{$remaining_set_size} } )
|
|
783 {
|
|
784 my $seen_count = 0;
|
|
785
|
|
786 foreach my $rwindow (@rwindows) {
|
|
787 if ( grep /$rwindow/,
|
|
788 ( values( %{ $chunk_sets{$chunk} } ) ) )
|
|
789 {
|
|
790 $seen_count++;
|
|
791 }
|
|
792 }
|
|
793
|
|
794 if ( $seen_count == $remaining_set_size ) {
|
|
795 $next_chunk = $chunk;
|
|
796 last;
|
|
797 }
|
|
798 }
|
|
799
|
|
800 }
|
|
801
|
|
802 @{ $chunk_windows{$next_chunk} } = @rwindows;
|
|
803
|
|
804 if ( defined($next_chunk) ) {
|
|
805 print "Found next chunk length $next_chunk "
|
|
806 . "contains remaining windows:\t@rwindows\n";
|
|
807 } else {
|
|
808 warn "Need to write iterative method for set definition";
|
|
809 throw( 'Could not find workable slice length '
|
|
810 . 'for remaining windows: '
|
|
811 . join( ', ', @rwindows ) );
|
|
812 }
|
|
813 } else {
|
|
814 @{ $chunk_windows{$chunk_length} } = keys(%workable_chunks);
|
|
815 print "Found workable chunk length $chunk_length "
|
|
816 . "for all window sizes:\t"
|
|
817 . join( ' ', @{ $chunk_windows{$chunk_length} } ) . "\n";
|
|
818 }
|
|
819
|
|
820 $self->{'_slice_chunks'} = \%chunk_windows;
|
|
821 } ## end if ( !defined( $self->...))
|
|
822
|
|
823 return $self->{'_slice_chunks'};
|
|
824 } ## end sub _get_Slice_chunks
|
|
825
|
|
826
|
|
827
|
|
828
|
|
829 =head2 set_config
|
|
830
|
|
831 Arg[0] : optional hash - parameter hash(see above methods for more info):
|
|
832
|
|
833 WINDOW_SIZES => array ref - subset of defined window
|
|
834 sizes
|
|
835 BIN_METHOD => string
|
|
836 MAX_VIEW_WIDTH => int
|
|
837 MAX_DATA_TYPE_SIZE => int
|
|
838 PACK_TEMPLATE => string
|
|
839 PACKED_SIZE => int
|
|
840 BIN_MODEL => string
|
|
841 NEW_ASSEMBLY => string
|
|
842 METHOD_CONFIG => hash of method specific config params
|
|
843 SKIP_ZERO_WINDOW => boolean - skips generation of 0 wsize
|
|
844 this is used if already generated
|
|
845 from an assembly projection.
|
|
846
|
|
847 NOTE: Over-riding any of the default config may cause
|
|
848 problems when storing or retrieving Collection data,
|
|
849 except sub sets of default window sizes.
|
|
850
|
|
851 Description: This method replaces the constructor as new will not be
|
|
852 called for Adaptor based Collectors.
|
|
853 Separating this from the store method is currently
|
|
854 redundant as jobs are normally submitetd in Slice based
|
|
855 jobs. However, this will be required if the store method
|
|
856 is further seaprated into fetch/generate and store methods
|
|
857 Returntype : None
|
|
858 Exceptions : Throws if no window sizes or max_view_width defined
|
|
859 Caller : Inheritor Collector e.g. Bio::EnsEMBL::Funcgen:Collector::ResultFeature
|
|
860 or script.
|
|
861 Status : At Risk
|
|
862
|
|
863 =cut
|
|
864
|
|
865 sub set_config {
|
|
866 my ( $self, %config ) = @_;
|
|
867
|
|
868 my ( $wsizes, $bmethod, $mv_width,
|
|
869 $md_type_size, $template, $psize,
|
|
870 $bmodel, $new_assm, $skip_zero_window,
|
|
871 $method_config )
|
|
872 = rearrange( [ 'WINDOW_SIZES', 'BIN_METHOD',
|
|
873 'MAX_VIEW_WIDTH', 'MAX_DATA_TYPE_SIZE',
|
|
874 'PACK_TEMPLATE', 'PACKED_SIZE',
|
|
875 'BIN_MODEL', 'NEW_ASSEMBLY',
|
|
876 'SKIP_ZERO_WINDOW', 'METHOD_CONFIG' ],
|
|
877 %config );
|
|
878
|
|
879 ### VAILDATE/SET VARS/CONFIG
|
|
880
|
|
881 # Attrs used in this method
|
|
882 $self->bin_method( $bmethod, $method_config );
|
|
883 $self->bin_model($bmodel);
|
|
884 $self->window_sizes($wsizes);
|
|
885
|
|
886 # Set to undef if we have empty array? To change this we need to
|
|
887 # pass the config hash -window_sizes conditionally
|
|
888 # This currently overwrite the defaults!
|
|
889 # if ( ref($window_sizes) eq 'ARRAY'
|
|
890 # && scalar( @{$window_sizes} ) == 0 )
|
|
891 # {
|
|
892 # $window_sizes = undef;
|
|
893 # }
|
|
894
|
|
895 # Attrs used in other (store) methods
|
|
896 $self->pack_template($template);
|
|
897 $self->packed_size($psize);
|
|
898 $self->max_data_type_size($md_type_size);
|
|
899 $self->max_view_width($mv_width);
|
|
900
|
|
901 # Other vars
|
|
902 $self->new_assembly($new_assm);
|
|
903 $self->{'_only_natural'} = 0;
|
|
904 $self->{'_store_natural'} = grep /^0$/, @$window_sizes;
|
|
905
|
|
906 ### Set window_sizes
|
|
907
|
|
908 if ( $self->new_assembly() ) {
|
|
909 print "Assembly projection may cause problems "
|
|
910 . "for large Collections, "
|
|
911 . "defaulting to window_sizes = (0)\n";
|
|
912
|
|
913
|
|
914 if ( $skip_zero_window ) {
|
|
915 throw( "You cannot -skip_zero_window or "
|
|
916 . "omit 0 from -window_sizes "
|
|
917 . "when projecting to a new assembly($new_assm) "
|
|
918 . "which should only be generated using window_size=0" );
|
|
919 }
|
|
920
|
|
921
|
|
922
|
|
923
|
|
924 # Then build the bins on the projected 0 level single Features
|
|
925
|
|
926 # Test we haven't explicity set window_sizes to be something else
|
|
927 if ( defined($wsizes)
|
|
928 && !( scalar(@$wsizes) == 1 && $wsizes->[0] == 0 ) )
|
|
929 {
|
|
930 throw( "You have set window_sizes config "
|
|
931 . "which are not safe when projecting to "
|
|
932 . "a new assembly($new_assm), "
|
|
933 . "please omit window_sizes config or set to 0" );
|
|
934 }
|
|
935
|
|
936 $self->window_sizes( [0] );
|
|
937 } else {
|
|
938
|
|
939 if ( $wsizes && $skip_zero_window &&
|
|
940 ( grep /^0$/, @$wsizes )) {
|
|
941 #Only test passed params not default config
|
|
942
|
|
943 throw( "You have specied skip_zero_window "
|
|
944 . "and window_size 0 in your parameters, "
|
|
945 . "please remove one of these" );
|
|
946 }
|
|
947 elsif ( defined($window_sizes) && !grep /^0$/, @$window_sizes ) {
|
|
948 $skip_zero_window = 1;
|
|
949 # re-add 0 window as we need this to build the collections
|
|
950 # see ...
|
|
951 unshift( @{$window_sizes}, 0 );
|
|
952 }
|
|
953 }
|
|
954
|
|
955
|
|
956 if ( $self->{'_store_natural'} && scalar( @{$window_sizes} ) == 1 ) {
|
|
957 $self->{'_only_natural'} = 1;
|
|
958 }
|
|
959 if ($skip_zero_window) {
|
|
960 $self->{'_store_natural'} = 0;
|
|
961 }
|
|
962
|
|
963 return;
|
|
964 } ## end sub set_config
|
|
965
|
|
966 =head2 store_window_bins_by_Slice
|
|
967
|
|
968 Arg[0] : Bio::EnsEMBL:Slice
|
|
969 Example : $collector->store_window_bins_by_Slice($slice);
|
|
970 Description: This is the main run method, it loops through
|
|
971 optimal slice chunks from _define_window_chunks,
|
|
972 calls _bin_features_by_Slice as appropriate and
|
|
973 calls write_collection in the inheriting Collector
|
|
974 class/script.
|
|
975 Returntype : None
|
|
976 Exceptions : Throws if Bio::EnsEMBL::Slice is not defined
|
|
977 Caller : store methods in inheriting Collector class/script
|
|
978 Status : At Risk
|
|
979
|
|
980 =cut
|
|
981
|
|
982 sub store_window_bins_by_Slice {
|
|
983 my ( $self, $slice ) = @_;
|
|
984
|
|
985 warn "Need to be careful here "
|
|
986 . "about cleaning start end strand caches between "
|
|
987 . "serially run slices";
|
|
988
|
|
989 if ( !( defined($slice)
|
|
990 && ref($slice)
|
|
991 && $slice->isa('Bio::EnsEMBL::Slice') ) )
|
|
992 {
|
|
993 throw('You must pass a valid Bio::EnsEMBL::Slice');
|
|
994 }
|
|
995
|
|
996 # Rollback previously stored features.
|
|
997 # Change 'can' to empty method stubb with pod ???
|
|
998 if ( $self->can('rollback_Features_by_Slice') ) {
|
|
999 $self->rollback_Features_by_Slice($slice);
|
|
1000 } else {
|
|
1001 warn ref($self)
|
|
1002 . " cannot rollback_Features_by_Slice. "
|
|
1003 . "This may result in storage failure "
|
|
1004 . "or duplicate Collections if there is pre-existing data";
|
|
1005 }
|
|
1006
|
|
1007 ### PROCESS CHUNKS
|
|
1008 my %chunk_windows = %{ $self->_get_Slice_chunks };
|
|
1009 my (%counts);
|
|
1010 my $store_natural = $self->{'_store_natural'};
|
|
1011 my $only_natural = $self->{'_only_natural'};
|
|
1012 $counts{0} = 0; # Set natural res count to 0
|
|
1013 my $slice_end = $slice->end;
|
|
1014 my $orig_start = $slice->start;
|
|
1015 my $region = $slice->coord_system_name;
|
|
1016 my $version = $slice->coord_system->version;
|
|
1017 my $seq_region_name = $slice->seq_region_name;
|
|
1018 my $strand = $slice->strand;
|
|
1019
|
|
1020 # Warn if this is not a full slice. Version needed in case we are
|
|
1021 # projecting from a non-default version slice
|
|
1022 my $full_slice =
|
|
1023 $slice->adaptor->fetch_by_region( $region, $seq_region_name, undef,
|
|
1024 undef, undef, $version );
|
|
1025
|
|
1026 if ( ( $full_slice->start() != $orig_start )
|
|
1027 || ( $full_slice->end() != $slice_end ) )
|
|
1028 {
|
|
1029 warn "Generating collections using sub-Slices "
|
|
1030 . "can result in data issues/artifacts";
|
|
1031 # Last chunk might not be the correct window length. Test
|
|
1032 # slices less than chunk length can cause failures in
|
|
1033 # _bin_features_by_window_sizes others?
|
|
1034 }
|
|
1035
|
|
1036 # Set the initial collection_start to orig_start. This is not the
|
|
1037 # case for 0 wsize where it must always be the true feature start.
|
|
1038 for my $wsize (@$window_sizes) {
|
|
1039 if ( $wsize == 0 ) { next }
|
|
1040 $self->collection_start( $wsize, $orig_start );
|
|
1041
|
|
1042 # Also reset collection end and score cache in case we are running
|
|
1043 # serially.
|
|
1044 $self->{collection_end}{$wsize} = undef;
|
|
1045 $self->{'score_cache'}{$wsize} = [];
|
|
1046 }
|
|
1047
|
|
1048 my $first_chunk_length = 1;
|
|
1049
|
|
1050 foreach my $chunk_length ( sort keys %chunk_windows ) {
|
|
1051 print "Processing windows "
|
|
1052 . join( ', ', @{ $chunk_windows{$chunk_length} } )
|
|
1053 . " with chunk length $chunk_length\n";
|
|
1054
|
|
1055 # Set window counts to 0
|
|
1056 map $counts{$_} = 0, @{ $chunk_windows{$chunk_length} };
|
|
1057
|
|
1058 # May need to reset flat file parser handle or other caches via
|
|
1059 # inheriting Collector
|
|
1060 if ( !$first_chunk_length ) {
|
|
1061 # Change 'can' to empty method stubb with pod???
|
|
1062 if ( $self->can('reinitialise_input') ) {
|
|
1063 $self->reinitialise_input();
|
|
1064 }
|
|
1065 }
|
|
1066
|
|
1067 $first_chunk_length = 0;
|
|
1068
|
|
1069 # Now walk through slice using slice length chunks and build all
|
|
1070 # windows in each chunk.
|
|
1071 my $in_slice = 1;
|
|
1072 my $start_adj = 0;
|
|
1073 my ( $sub_slice, $sub_end, $features, $bins );
|
|
1074 my $sub_start = 1;
|
|
1075 my $slice_length = $slice->length();
|
|
1076
|
|
1077 # Always create in local coords for fetch
|
|
1078 # Then change to seq_region coords for store if required
|
|
1079
|
|
1080 while ($in_slice) {
|
|
1081 $sub_start += $start_adj;
|
|
1082 $sub_end = $sub_start + $chunk_length - 1;
|
|
1083
|
|
1084 if ( $sub_end >= $slice_length ) {
|
|
1085 # Surplus bins are removed in store/write_collection in caller
|
|
1086 $in_slice = 0;
|
|
1087 }
|
|
1088
|
|
1089 $sub_slice =
|
|
1090 $slice->adaptor->fetch_by_region( $region, $seq_region_name,
|
|
1091 $sub_start + $orig_start - 1,
|
|
1092 $sub_end + $orig_start - 1,
|
|
1093 $strand, $version );
|
|
1094
|
|
1095 # Can't subslice as this will not clip if we go over the length of
|
|
1096 # the slice, unlike normal slice fetching. Will clipping the end
|
|
1097 # to the slice end cause any problems here? How will this affect
|
|
1098 # bin clipping?
|
|
1099
|
|
1100 ### Grab features and shift chunk coords
|
|
1101 $features = $self->get_Features_by_Slice($sub_slice);
|
|
1102
|
|
1103 # warn "Binning "
|
|
1104 # . scalar(@$features)
|
|
1105 # . " Features for chunk length $chunk_length, on Slice "
|
|
1106 # . $sub_slice->name;
|
|
1107
|
|
1108 if ( ( @{$features} )
|
|
1109 && ref( $features->[0] ) =~ /Bio::EnsEMBL::Utils::Collection/ )
|
|
1110 {
|
|
1111 # Would need to create base module with generic methods:
|
|
1112 # window_size, ...
|
|
1113
|
|
1114 # Check that the returned feature/collections support window_size.
|
|
1115 # All Collections should be able to
|
|
1116
|
|
1117 if ( $features->[0]->can('window_size') ) {
|
|
1118 if ( $features->[0]->window_size != 0 ) {
|
|
1119 throw( "You are trying to generated Collections from "
|
|
1120 . "a non-zero window sized Collection:\t"
|
|
1121 . $features->[1]->{'window_size'} );
|
|
1122 }
|
|
1123
|
|
1124 # This should never happen
|
|
1125 # if ( !$skip_zero_window ) {
|
|
1126 # throw( 'You have retrieved data from a Collection '
|
|
1127 # . 'which without using -skip_zero_window '
|
|
1128 # . 'i.e. you are trying to generate overwrite '
|
|
1129 # . 'the data you are generating the Collections from' );
|
|
1130 # }
|
|
1131
|
|
1132 } else {
|
|
1133 throw( 'Something is wrong, '
|
|
1134 . 'the Collection you have retrieved '
|
|
1135 . 'does not support the method window_size' );
|
|
1136 }
|
|
1137 } ## end if ( ( @{$features} ) ...)
|
|
1138
|
|
1139 # Set collection start here for 0 window_size
|
|
1140 if ( @{$features}
|
|
1141 && $store_natural
|
|
1142 && !defined( $self->collection_start(0) ) )
|
|
1143 {
|
|
1144 $self->collection_start( 0,
|
|
1145 $features->[0]->start + $sub_start );
|
|
1146 }
|
|
1147
|
|
1148 if ($in_slice) {
|
|
1149 $start_adj = $chunk_length;
|
|
1150 }
|
|
1151
|
|
1152 # Collect features into wsize bins
|
|
1153 if ( !$only_natural ) {
|
|
1154 # Get hashref of wsize=>bin array pairs
|
|
1155 $bins =
|
|
1156 $self->_bin_features_by_Slice_window_sizes(
|
|
1157 -slice => $sub_slice,
|
|
1158 -window_sizes => $chunk_windows{$chunk_length},
|
|
1159 -features => $features, );
|
|
1160 }
|
|
1161
|
|
1162 # Handle 0 wsize
|
|
1163 if ($store_natural) {
|
|
1164 foreach my $feature ( @{$features} ) {
|
|
1165 $counts{0}++;
|
|
1166
|
|
1167 if ( $bin_model eq 'SIMPLE' ) {
|
|
1168 $self->collection_start( 0, $feature->start + $sub_start );
|
|
1169
|
|
1170 $self->write_collection(
|
|
1171 0,
|
|
1172 $slice, # Pass Slice to sub-slice when storing
|
|
1173 $feature->end + $sub_start,
|
|
1174 $feature->strand, # Need to pass strand for 0 resolution
|
|
1175 $feature->scores, );
|
|
1176 }
|
|
1177 }
|
|
1178
|
|
1179 print "Window size 0 (natural resolution) has "
|
|
1180 . scalar( @{$features} )
|
|
1181 . " feature bins for:\t"
|
|
1182 . $sub_slice->name . "\n";
|
|
1183 }
|
|
1184
|
|
1185 # Now store collections for wsizes >0
|
|
1186 my $num_bins;
|
|
1187
|
|
1188 foreach my $wsize ( sort keys( %{$bins} ) ) {
|
|
1189 $num_bins = scalar( @{ $bins->{$wsize} } );
|
|
1190 $counts{$wsize} += $num_bins;
|
|
1191
|
|
1192 if ( $bin_model eq 'SIMPLE' ) {
|
|
1193 $self->write_collection(
|
|
1194 $wsize,
|
|
1195 $slice,
|
|
1196 #$sub_start,
|
|
1197 $sub_end,
|
|
1198 $slice->strand, # This is most likely 1!
|
|
1199 # Override this woth 0 in descendant Collector if required.
|
|
1200 $bins->{$wsize}, );
|
|
1201
|
|
1202 } else {
|
|
1203 throw( 'Bio::EnsEMBL::Utils::Collector '
|
|
1204 . 'does not yet support non-SIMPLE bin models' );
|
|
1205 # i.e. More than one score
|
|
1206 }
|
|
1207 }
|
|
1208 } ## end while ($in_slice)
|
|
1209
|
|
1210 # Turn off storing of natural resolution for next chunk length sets
|
|
1211 $store_natural = 0;
|
|
1212 } ## end foreach my $chunk_length ( ...)
|
|
1213
|
|
1214 # Write last collections for each wsize
|
|
1215
|
|
1216 foreach my $wsize (@$window_sizes) {
|
|
1217
|
|
1218 if ( ( $wsize == 0 && !$store_natural )
|
|
1219 || ( $wsize != 0 && $only_natural ) )
|
|
1220 {
|
|
1221 next;
|
|
1222 }
|
|
1223
|
|
1224 print "Writing final $wsize window_size collection, "
|
|
1225 . "this may result in slightly different "
|
|
1226 . "bin numbers from counts due to removing "
|
|
1227 . "overhanging bins past end of slice\n";
|
|
1228 $self->write_collection( $wsize, $slice );
|
|
1229 }
|
|
1230
|
|
1231 # Print some counts
|
|
1232 foreach my $wsize ( sort ( keys %counts ) ) {
|
|
1233 print "Generated "
|
|
1234 . $counts{$wsize}
|
|
1235 . " bins for window size $wsize for "
|
|
1236 . $slice->name . "\n";
|
|
1237 # Some may have failed to store if we are projecting to a new
|
|
1238 # assembly.
|
|
1239 }
|
|
1240
|
|
1241 return;
|
|
1242 } ## end sub store_window_bins_by_Slice
|
|
1243
|
|
1244 =head2 _bin_features_by_Slice_window_sizes
|
|
1245
|
|
1246 Args[0] : Bio::EnsEMBL::Slice
|
|
1247 Args[1] : ARRAYREF of window sizes
|
|
1248 Args[2] : ARRAYREF of features with start and end method
|
|
1249 e.g. Bio::EnsEMBL::Features
|
|
1250 Example :
|
|
1251
|
|
1252 $bins =
|
|
1253 $self->_bin_features_by_window_sizes(
|
|
1254 -slice => $slice,
|
|
1255 -window_sizes => $chunk_windows{$chunk_length},
|
|
1256 -features => $features, );
|
|
1257
|
|
1258 Description: Bins feature scores for a given list of window sizes and
|
|
1259 predefined method.
|
|
1260 Returntype : HASHREF of scores per bin per window size
|
|
1261 Exceptions : None
|
|
1262 Caller : store_window_bins_by_Slice
|
|
1263 Status : At Risk
|
|
1264
|
|
1265 =cut
|
|
1266
|
|
1267 sub _bin_features_by_Slice_window_sizes {
|
|
1268 my ( $self, @args ) = @_;
|
|
1269
|
|
1270 my ( $slice, $wsizes, $features ) =
|
|
1271 rearrange( [ 'SLICE', 'WINDOW_SIZES', 'FEATURES' ], @args );
|
|
1272
|
|
1273 # Generate these once in caller?
|
|
1274 my $calc_method = '_calculate_' . $bin_method;
|
|
1275 my $post_method = '_post_process_' . $bin_method;
|
|
1276
|
|
1277 # Do this conditional on the Collection type i.e. is
|
|
1278 # collection seq_region blob then no else yes Would need
|
|
1279 # $Bio::EnsEMBL::Utils::Collector::collection_format=BLOB|STANDARD
|
|
1280 # if ( !defined($features) || !@{$features} ) { return {} }
|
|
1281
|
|
1282 # Set up some hashes to store data by window_size
|
|
1283 my ( %bins, %nbins, %bin_counts );
|
|
1284 my $slice_start = $slice->start();
|
|
1285 my $slice_length = $slice->length();
|
|
1286
|
|
1287 # Set up some bin data for the windows
|
|
1288 foreach my $wsize (@$wsizes) {
|
|
1289 $nbins{$wsize} = int( $slice_length/$wsize ); # int rounds down
|
|
1290 # nbins is index of the bin not the 'number'
|
|
1291 # Unless $slice_length is a multiple!
|
|
1292 if ( !( $slice_length % $wsize ) ) { $nbins{$wsize}-- }
|
|
1293
|
|
1294 # Create default bins with 0
|
|
1295 $bins{$wsize} = [];
|
|
1296 map { $bins{$wsize}->[$_] = 0 } ( 0 .. $nbins{$wsize} );
|
|
1297
|
|
1298 # Set bin counts to 0 for each bin
|
|
1299 $bin_counts{$wsize} = [];
|
|
1300
|
|
1301 # This is adding an undef to the start of the array!?
|
|
1302 map { $bin_counts{$wsize}->[ ($_) ] = 0 } @{ $bins{$wsize} };
|
|
1303
|
|
1304 foreach my $bin ( @{ $bins{$wsize} } ) {
|
|
1305 $bin_counts{$wsize}->[$bin] = 0;
|
|
1306 }
|
|
1307 }
|
|
1308
|
|
1309 my $feature_index = 0;
|
|
1310 my ( $bin_index, @bin_masks );
|
|
1311
|
|
1312 foreach my $feature ( @{$features} ) {
|
|
1313 # Set up the bins for each window size
|
|
1314
|
|
1315 foreach my $wsize (@$wsizes) {
|
|
1316 my $start_bin = int( ( $feature->start )/$wsize );
|
|
1317 my $end_bin = int( ( $feature->end )/$wsize );
|
|
1318
|
|
1319 if ( $end_bin > $nbins{$wsize} ) {
|
|
1320 $end_bin = $nbins{$wsize};
|
|
1321 }
|
|
1322
|
|
1323 $self->$calc_method( $feature, $start_bin, $end_bin,
|
|
1324 $wsize, \%bins, \%bin_counts );
|
|
1325 }
|
|
1326
|
|
1327 }
|
|
1328
|
|
1329 # Now do post processing of bins if required
|
|
1330 if ( $self->can($post_method) ) {
|
|
1331 $self->$post_method( \%bins, \%bin_counts );
|
|
1332 }
|
|
1333
|
|
1334 return \%bins;
|
|
1335 } ## end sub _bin_features_by_Slice_window_sizes
|
|
1336 # end sub _bin_features_by_Slice
|
|
1337
|
|
1338
|
|
1339 ### Here follows the bin methods
|
|
1340 # These may also be defined in the inheriting Collector class. No tests
|
|
1341 # as these are internal and require speed.
|
|
1342
|
|
1343
|
|
1344 =head2 _calculate_count
|
|
1345
|
|
1346 Args[0] : feature e.g. Bio::EnsEMBL::Feature
|
|
1347 Args[1] : int - start bin
|
|
1348 Args[2] : int - end bin
|
|
1349 Args[3] : int - window_size
|
|
1350 Args[4] : hashref - score bins
|
|
1351 Example : $self->$calc_method
|
|
1352 Description: Adds count to bins which this feature overlaps
|
|
1353 Returntype : None
|
|
1354 Exceptions : None
|
|
1355 Caller : _bin_features_by_window_sizes
|
|
1356 Status : At Risk
|
|
1357
|
|
1358 =cut
|
|
1359
|
|
1360 sub _calculate_count {
|
|
1361 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
|
|
1362
|
|
1363 my $bin_index;
|
|
1364
|
|
1365 for ( $bin_index = $start_bin; $bin_index <= $end_bin; ++$bin_index )
|
|
1366 {
|
|
1367 $bins_ref->{$wsize}->[$bin_index]++;
|
|
1368 }
|
|
1369
|
|
1370 return;
|
|
1371 }
|
|
1372
|
|
1373
|
|
1374 =head2 _calculate_average_score
|
|
1375
|
|
1376 Args[0] : feature e.g. Bio::EnsEMBL::Feature
|
|
1377 Args[1] : int - start bin
|
|
1378 Args[2] : int - end bin
|
|
1379 Args[3] : int - window_size
|
|
1380 Args[4] : hashref - score bins
|
|
1381 Example : $self->$calc_method
|
|
1382 Description: Adds score to bins which this feature overlaps
|
|
1383 Returntype : None
|
|
1384 Exceptions : None
|
|
1385 Caller : _bin_features_by_window_sizes
|
|
1386 Status : At Risk
|
|
1387
|
|
1388 =cut
|
|
1389
|
|
1390
|
|
1391 sub _calculate_average_score {
|
|
1392 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref,
|
|
1393 $bin_counts_ref )
|
|
1394 = @_;
|
|
1395
|
|
1396 # This is simple an average of all the scores for features which
|
|
1397 # overlap this bin. No weighting with respect to the bin or the
|
|
1398 # feature.
|
|
1399
|
|
1400 my $score = $self->get_score_by_Feature($feature);
|
|
1401
|
|
1402 for ( my $bin_index = $start_bin;
|
|
1403 $bin_index <= $end_bin;
|
|
1404 ++$bin_index )
|
|
1405 {
|
|
1406 # We should really push onto array here so we can have median or
|
|
1407 # mean.
|
|
1408
|
|
1409 $bins_ref->{$wsize}->[$bin_index] += $score;
|
|
1410 $bin_counts_ref->{$wsize}->[$bin_index]++;
|
|
1411 }
|
|
1412
|
|
1413 return;
|
|
1414 }
|
|
1415
|
|
1416
|
|
1417 =head2 _post_process_average_score
|
|
1418
|
|
1419 Args[0] : hashref - score bins
|
|
1420 Args[1] : hashref - count bins
|
|
1421 Example : $self->$post_method
|
|
1422 Description: Post processes bins to calculate average score
|
|
1423 Returntype : None
|
|
1424 Exceptions : None
|
|
1425 Caller : _bin_features_by_window_sizes
|
|
1426 Status : At Risk
|
|
1427
|
|
1428 =cut
|
|
1429
|
|
1430 sub _post_process_average_score {
|
|
1431 my ( $self, $bins_ref, $bin_counts_ref ) = @_;
|
|
1432
|
|
1433 foreach my $wsize ( keys %{$bins_ref} ) {
|
|
1434 foreach my $bin_index ( 0 .. $#{ $bins_ref->{$wsize} } ) {
|
|
1435
|
|
1436 if ( $bin_counts_ref->{$wsize}->[$bin_index] ) {
|
|
1437 $bins_ref->{$wsize}->[$bin_index] /=
|
|
1438 $bin_counts_ref->{$wsize}->[$bin_index];
|
|
1439 }
|
|
1440
|
|
1441 }
|
|
1442 }
|
|
1443
|
|
1444 return;
|
|
1445 }
|
|
1446
|
|
1447
|
|
1448 =head2 _calculate_max_magnitude
|
|
1449
|
|
1450 Args[0] : feature e.g. Bio::EnsEMBL::Feature
|
|
1451 Args[1] : int - start bin
|
|
1452 Args[2] : int - end bin
|
|
1453 Args[3] : int - window_size
|
|
1454 Args[4] : hashref - score bins
|
|
1455 Example : $self->$calc_method
|
|
1456 Description: Sets max +/-ve scores for bins which this feature overlaps
|
|
1457 Returntype : None
|
|
1458 Exceptions : None
|
|
1459 Caller : _bin_features_by_window_sizes
|
|
1460 Status : At Risk
|
|
1461
|
|
1462 =cut
|
|
1463
|
|
1464 sub _calculate_max_magnitude {
|
|
1465 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
|
|
1466
|
|
1467 my $score = $self->get_score_by_Feature($feature);
|
|
1468
|
|
1469 # Max magnitude
|
|
1470 # Take the highest value +ve or -ve score
|
|
1471 for ( my $bin_index = $start_bin;
|
|
1472 $bin_index <= $end_bin;
|
|
1473 ++$bin_index )
|
|
1474 {
|
|
1475
|
|
1476 # We really need to capture the lowest -ve and higest +ve scores
|
|
1477 # here and post process to pick between them.
|
|
1478
|
|
1479 $bins_ref->{$wsize}->[$bin_index] ||= [ 0, 0 ]; #-ve, +ve
|
|
1480
|
|
1481 if ( $score < $bins_ref->{$wsize}->[$bin_index]->[0] ) {
|
|
1482 $bins_ref->{$wsize}->[$bin_index]->[0] = $score;
|
|
1483 } elsif ( $score > $bins_ref->{$wsize}->[$bin_index][1] ) {
|
|
1484 $bins_ref->{$wsize}->[$bin_index]->[1] = $score;
|
|
1485 }
|
|
1486 }
|
|
1487
|
|
1488 return;
|
|
1489 } ## end sub _calculate_max_magnitude
|
|
1490
|
|
1491
|
|
1492 =head2 _post_process_max_magnitude
|
|
1493
|
|
1494 Args[0] : hashref - score bins
|
|
1495 Args[1] : hashref - count bins
|
|
1496 Example : $self->$post_method
|
|
1497 Description: Post processes bins to pick largest +ve or -ve score
|
|
1498 Returntype : None
|
|
1499 Exceptions : None
|
|
1500 Caller : _bin_features_by_window_sizes
|
|
1501 Status : At Risk
|
|
1502
|
|
1503 =cut
|
|
1504
|
|
1505 sub _post_process_max_magnitude {
|
|
1506 my ( $self, $bins_ref ) = @_;
|
|
1507
|
|
1508 # Take the highest value +ve or -ve score
|
|
1509
|
|
1510 foreach my $wsize ( keys %{$bins_ref} ) {
|
|
1511 foreach my $bin_index ( 0 .. $#{ $bins_ref->{$wsize} } ) {
|
|
1512
|
|
1513 # Have potential for no listref in a given bin
|
|
1514
|
|
1515 # default value if we haven't seen anything is 0
|
|
1516 # Actually want an array of -ve +ve values
|
|
1517
|
|
1518 if ( $bins_ref->{$wsize}->[$bin_index] ) {
|
|
1519 my $tmp_minus = -$bins_ref->{$wsize}->[$bin_index]->[0];
|
|
1520
|
|
1521 if ( $tmp_minus > $bins_ref->{$wsize}->[$bin_index]->[1] ) {
|
|
1522 $bins_ref->{$wsize}->[$bin_index] =
|
|
1523 $bins_ref->{$wsize}->[$bin_index]->[0];
|
|
1524 } else {
|
|
1525 $bins_ref->{$wsize}->[$bin_index] =
|
|
1526 $bins_ref->{$wsize}->[$bin_index]->[1];
|
|
1527 }
|
|
1528
|
|
1529 }
|
|
1530
|
|
1531 }
|
|
1532 }
|
|
1533 return;
|
|
1534 } ## end sub _post_process_max_magnitude
|
|
1535
|
|
1536
|
|
1537 =head2 _calculate_RPKM
|
|
1538
|
|
1539 Args[0] : feature e.g. Bio::EnsEMBL::Feature
|
|
1540 Args[1] : int - start bin
|
|
1541 Args[2] : int - end bin
|
|
1542 Args[3] : int - window_size
|
|
1543 Args[4] : hashref - score bins
|
|
1544 Example : $self->$calc_method
|
|
1545 Description: Stores counts to calculate Read Per Kb per Million(RPKM)
|
|
1546 Returntype : None
|
|
1547 Exceptions : None
|
|
1548 Caller : _bin_features_by_window_sizes
|
|
1549 Status : At Risk
|
|
1550
|
|
1551 =cut
|
|
1552
|
|
1553 sub _calculate_RPKM {
|
|
1554 my ( $self, $feature, $start_bin, $end_bin, $wsize, $bins_ref ) = @_;
|
|
1555
|
|
1556 $self->_calculate_count( $feature, $start_bin, $end_bin,
|
|
1557 $wsize, $bins_ref );
|
|
1558
|
|
1559 return;
|
|
1560 }
|
|
1561
|
|
1562
|
|
1563 =head2 _post_process_RPKM
|
|
1564
|
|
1565 Args[0] : hashref - score bins
|
|
1566 Args[1] : hashref - count bins
|
|
1567 Example : $self->$post_method
|
|
1568 Description: Post processes bins to calculate average score
|
|
1569 Returntype : None
|
|
1570 Exceptions : None
|
|
1571 Caller : _bin_features_by_window_sizes
|
|
1572 Status : At Risk
|
|
1573
|
|
1574 =cut
|
|
1575
|
|
1576 sub _post_process_RPKM {
|
|
1577 my ( $self, $bins_ref ) = @_;
|
|
1578
|
|
1579 #10^9 x C / NGB
|
|
1580 #C = Reads overlapping bin
|
|
1581 #N = Total reads in the experiment
|
|
1582 #G = Length of bin in bps
|
|
1583 #(don't really have to account for non-ref/HAPs or gender here
|
|
1584 #as should be close enough, CellTypes/gender differences will be miniscule)
|
|
1585 #B = length of each bin
|
|
1586
|
|
1587 foreach my $wsize(keys %{$bins_ref}){
|
|
1588
|
|
1589 foreach my $bin_index(0..$#{$bins_ref->{$wsize}}){
|
|
1590 $bins_ref->{$wsize}->[$bin_index] =
|
|
1591 ((10**9) *
|
|
1592 $bins_ref->{$wsize}->[$bin_index])/(($self->_RPKM_factor($wsize)) * $wsize);
|
|
1593 }
|
|
1594 }
|
|
1595
|
|
1596 return;
|
|
1597
|
|
1598 }
|
|
1599
|
|
1600
|
|
1601 =head2 _set_up_RPKM
|
|
1602
|
|
1603 Args[0] : hashref - method config e.g
|
|
1604 {
|
|
1605 DNADB => Bio::EnsEMBL::DBSQL::DBAdaptor,
|
|
1606 TOTAL_FEATURE => $total_feature_count,
|
|
1607 }
|
|
1608
|
|
1609 Example : $self->$set_up_method($config);
|
|
1610 Description: Sets the RPKM factor
|
|
1611 Returntype : None
|
|
1612 Exceptions : Throws is required config params are not set
|
|
1613 Caller : bin_method
|
|
1614 Status : At Risk
|
|
1615
|
|
1616 =cut
|
|
1617
|
|
1618
|
|
1619 sub _set_up_RPKM{
|
|
1620 my ($self, $config) = @_;
|
|
1621
|
|
1622 my ($dnadb, $total_features) = rearrange([ 'DNADB', 'TOTAL_FEATURES'], %{$config});
|
|
1623
|
|
1624 #Test specifically here to notify about config hash
|
|
1625 if(! $total_features){
|
|
1626 throw("For RPKM you must pass a valid 'total_features' ".
|
|
1627 "as part of the method config hash.");
|
|
1628 }
|
|
1629
|
|
1630 if(! $dnadb){
|
|
1631 throw("For RPKM you must pass 'dnadb' as part of the method config hash.");
|
|
1632 }
|
|
1633
|
|
1634 foreach my $wsize(@{$self->window_sizes}){
|
|
1635 #Should never have 0 here
|
|
1636 $self->_RPKM_factor($wsize, ($wsize * $total_features)); #N*G
|
|
1637
|
|
1638 warn "setting $wsize RPKM factor($wsize * $total_features) to ".
|
|
1639 $self->_RPKM_factor($wsize);
|
|
1640 }
|
|
1641
|
|
1642 return;
|
|
1643 } ## end sub _set_up_RPKM
|
|
1644
|
|
1645
|
|
1646 =head2 _RPKM_factor
|
|
1647
|
|
1648 Args[0] : int - RPKM factor i.e. (Total reads in the experiment *
|
|
1649 Genome length)
|
|
1650 Example : $self->_RPKM_factor($wsize, $factor);
|
|
1651 Description: Gets/Sets the RPKM factor
|
|
1652 Returntype : int
|
|
1653 Exceptions : None
|
|
1654 Caller : _set_up_RPKM, _post_process_RPKM
|
|
1655 Status : At Risk
|
|
1656
|
|
1657 =cut
|
|
1658
|
|
1659 sub _RPKM_factor{
|
|
1660 my ($self, $wsize, $factor) = @_;
|
|
1661
|
|
1662 if (! defined $wsize){
|
|
1663 throw("You must pass at least window_size to get or set the RPKM factor");
|
|
1664 }
|
|
1665
|
|
1666 if(defined $factor){
|
|
1667 $self->{'RPKM_factor'}{$wsize} = $factor;
|
|
1668 }
|
|
1669 elsif(! exists $self->{'RPKM_factor'}{$wsize}){
|
|
1670 #This should never happen unless the window sizes
|
|
1671 #are redefined after initialisation
|
|
1672 throw("You have requested an RPKM factor for a window_size".
|
|
1673 " which has not been set:\t$wsize");
|
|
1674 }
|
|
1675
|
|
1676 return $self->{'RPKM_factor'}{$wsize};
|
|
1677 }
|
|
1678
|
|
1679 =head2 get_diploid_genome_length_by_gender
|
|
1680
|
|
1681 Args[0] : string - RPKM factor i.e. (Total reads in the experiment *
|
|
1682 Genome length)
|
|
1683 Args[1] : string - gender e.g. male or female
|
|
1684 Example :
|
|
1685
|
|
1686 my $glength =
|
|
1687 $self->get_diploid_genome_length_by_gender( $dnadb, $gender );
|
|
1688
|
|
1689 Description: Gets the gender specific diploid genome length,
|
|
1690 including non-ref but not including haplotypes. Only
|
|
1691 handles species with X/Y sex chromosomes.
|
|
1692 Returntype : int
|
|
1693 Exceptions : None
|
|
1694 Caller : _set_up_RPKM, _post_process_RPKM
|
|
1695 Status : At Risk - Move to and export from generic Utils Slice module???
|
|
1696
|
|
1697 =cut
|
|
1698
|
|
1699 sub get_diploid_genome_length_by_gender {
|
|
1700 my ( $dnadb, $gender ) = @_;
|
|
1701
|
|
1702 my %sex_chrs = ( 'Y' => 'male',
|
|
1703 'X' => 'female', );
|
|
1704
|
|
1705 my $dip_length = 0;
|
|
1706
|
|
1707 if (!(( ref($dnadb) && $dnadb->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') )
|
|
1708 && $dnadb->grou() eq 'core'
|
|
1709 && ( defined $gender && $gender =~ /(male|female)/ ) ) )
|
|
1710 {
|
|
1711 throw( "Must provide valid "
|
|
1712 . "Bio::EnsEMBL::DBSQL::DBAdaptor($dnadb) and "
|
|
1713 . "gender ($gender) arguments" );
|
|
1714 }
|
|
1715
|
|
1716 my @ref_slices = $dnadb->get_SliceAdaptor->fetch_all('toplevel');
|
|
1717
|
|
1718 # Include non-ref(unassembled), but omit haps/lrgs(i.e. redundant)
|
|
1719
|
|
1720 foreach my $slice (
|
|
1721 @{ $dnadb->get_SliceAdaptor->fetch_all( 'toplevel', undef, 1, 1 ) }
|
|
1722 )
|
|
1723 {
|
|
1724 # Include duplicated region for true diploid length
|
|
1725
|
|
1726 # Skip haps/lrgs
|
|
1727 if ( ( $slice->coord_system->name() eq 'chromosome'
|
|
1728 && !$slice->is_reference() )
|
|
1729 || $slice->coord_system->name() eq 'lrg' )
|
|
1730 {
|
|
1731 next;
|
|
1732 }
|
|
1733
|
|
1734 if ( exists( $sex_chrs{ $slice->seq_region_name() } ) ) {
|
|
1735 if ( $gender eq 'male' ) {
|
|
1736 $dip_length += $slice->length;
|
|
1737 } elsif ( $sex_chrs{ $slice->seq_region_name } eq 'male' ) {
|
|
1738 next;
|
|
1739 }
|
|
1740 }
|
|
1741
|
|
1742 $dip_length += 2*$slice->length;
|
|
1743 }
|
|
1744
|
|
1745 return $dip_length;
|
|
1746 } ## end sub get_diploid_genome_length_by_gender
|
|
1747
|
|
1748
|
|
1749 1;
|