0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Utils::Slice - Utility functions for slices
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 use Bio::EnsEMBL::Utils::Slice qw(split_Slices);
|
|
28
|
|
29 # ...
|
|
30
|
|
31 # get all chromosomes in the database
|
|
32 my $slices = $slice_adaptor->fetch_all('chromosome');
|
|
33
|
|
34 # split the chromosomes into equal chunks of size less than 1MB
|
|
35 # with an overlap of 1kb
|
|
36 $slices = split_Slices( $slices, 1e6, 1e3 );
|
|
37
|
|
38 =head1 METHODS
|
|
39
|
|
40 =cut
|
|
41
|
|
42
|
|
43 package Bio::EnsEMBL::Utils::Slice;
|
|
44
|
|
45 use strict;
|
|
46 use warnings;
|
|
47
|
|
48 use Exporter;
|
|
49
|
|
50 use vars qw(@ISA @EXPORT_OK);
|
|
51
|
|
52 @ISA = qw(Exporter);
|
|
53
|
|
54 @EXPORT_OK = qw(&split_Slices);
|
|
55
|
|
56 use Bio::EnsEMBL::Utils::Exception qw(throw);
|
|
57 use POSIX;
|
|
58
|
|
59 =head2 split_Slices
|
|
60
|
|
61 Arg [1] : ref to list of slices
|
|
62 Arg [2] : int maxlength of sub slices
|
|
63 Arg [3] : int overlap length (optional)
|
|
64 Example : my $sub_slices = split_Slices($slices,$maxlen,$overlap)
|
|
65 Description: splits a slice into smaller slices
|
|
66 Returntype : ref to list of slices
|
|
67 Exceptions : maxlen <1 or overlap < 0
|
|
68
|
|
69 =cut
|
|
70
|
|
71 sub split_Slices{
|
|
72 my ($slice_big,$max_length,$overlap)=@_;
|
|
73
|
|
74 if(!defined($max_length) or $max_length < 1){
|
|
75 throw("maxlength needs to be set and > 0");
|
|
76 }
|
|
77
|
|
78 if(!defined($overlap)){
|
|
79 $overlap = 0;
|
|
80 }
|
|
81 elsif($overlap < 0){
|
|
82 throw("negative overlaps not allowed");
|
|
83 }
|
|
84
|
|
85 my @out=();
|
|
86
|
|
87 foreach my $slice (@$slice_big){
|
|
88
|
|
89 my $start = $slice->start;
|
|
90 my $end;
|
|
91 my $multiple;
|
|
92 my $number;
|
|
93 my $length = $slice->length;
|
|
94
|
|
95 if($max_length && ($length > $overlap)) {
|
|
96 #No seq region may be longer than max_length but we want to make
|
|
97 #them all similar size so that the last one isn't much shorter.
|
|
98 #Divide the seq_region into the largest equal pieces that are shorter
|
|
99 #than max_length
|
|
100
|
|
101 #calculate number of slices to create
|
|
102 $number = ($length-$overlap) / ($max_length-$overlap);
|
|
103 $number = ceil($number); #round up to int
|
|
104
|
|
105 #calculate length of created slices
|
|
106 $multiple = $length / $number;
|
|
107 $multiple = floor($multiple); #round down to int
|
|
108 } else {
|
|
109 #just one slice of the whole seq_region
|
|
110 $number = 1;
|
|
111 $multiple = $length;
|
|
112 }
|
|
113
|
|
114 my $i;
|
|
115 for(my $i=0; $i < $number; $i++) {
|
|
116 $end = $start + $multiple + $overlap;
|
|
117
|
|
118 #any remainder gets added to the last slice of the seq_region
|
|
119 $end = $slice->end if($i == $number-1);
|
|
120 push @out, Bio::EnsEMBL::Slice->new
|
|
121 (-START => $start,
|
|
122 -END => $end,
|
|
123 -STRAND => 1,
|
|
124 -SEQ_REGION_NAME => $slice->seq_region_name,
|
|
125 -SEQ_REGION_LENGTH => $slice->seq_region_length,
|
|
126 -COORD_SYSTEM => $slice->coord_system,
|
|
127 -ADAPTOR => $slice->adaptor);
|
|
128 $start += $multiple + 1;
|
|
129 }
|
|
130 }
|
|
131
|
|
132 return \@out;
|
|
133 }
|
|
134
|
|
135
|
|
136
|
|
137 1;
|