Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/Slice.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL::Utils::Slice - Utility functions for slices | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 use Bio::EnsEMBL::Utils::Slice qw(split_Slices); | |
28 | |
29 # ... | |
30 | |
31 # get all chromosomes in the database | |
32 my $slices = $slice_adaptor->fetch_all('chromosome'); | |
33 | |
34 # split the chromosomes into equal chunks of size less than 1MB | |
35 # with an overlap of 1kb | |
36 $slices = split_Slices( $slices, 1e6, 1e3 ); | |
37 | |
38 =head1 METHODS | |
39 | |
40 =cut | |
41 | |
42 | |
43 package Bio::EnsEMBL::Utils::Slice; | |
44 | |
45 use strict; | |
46 use warnings; | |
47 | |
48 use Exporter; | |
49 | |
50 use vars qw(@ISA @EXPORT_OK); | |
51 | |
52 @ISA = qw(Exporter); | |
53 | |
54 @EXPORT_OK = qw(&split_Slices); | |
55 | |
56 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
57 use POSIX; | |
58 | |
59 =head2 split_Slices | |
60 | |
61 Arg [1] : ref to list of slices | |
62 Arg [2] : int maxlength of sub slices | |
63 Arg [3] : int overlap length (optional) | |
64 Example : my $sub_slices = split_Slices($slices,$maxlen,$overlap) | |
65 Description: splits a slice into smaller slices | |
66 Returntype : ref to list of slices | |
67 Exceptions : maxlen <1 or overlap < 0 | |
68 | |
69 =cut | |
70 | |
71 sub split_Slices{ | |
72 my ($slice_big,$max_length,$overlap)=@_; | |
73 | |
74 if(!defined($max_length) or $max_length < 1){ | |
75 throw("maxlength needs to be set and > 0"); | |
76 } | |
77 | |
78 if(!defined($overlap)){ | |
79 $overlap = 0; | |
80 } | |
81 elsif($overlap < 0){ | |
82 throw("negative overlaps not allowed"); | |
83 } | |
84 | |
85 my @out=(); | |
86 | |
87 foreach my $slice (@$slice_big){ | |
88 | |
89 my $start = $slice->start; | |
90 my $end; | |
91 my $multiple; | |
92 my $number; | |
93 my $length = $slice->length; | |
94 | |
95 if($max_length && ($length > $overlap)) { | |
96 #No seq region may be longer than max_length but we want to make | |
97 #them all similar size so that the last one isn't much shorter. | |
98 #Divide the seq_region into the largest equal pieces that are shorter | |
99 #than max_length | |
100 | |
101 #calculate number of slices to create | |
102 $number = ($length-$overlap) / ($max_length-$overlap); | |
103 $number = ceil($number); #round up to int | |
104 | |
105 #calculate length of created slices | |
106 $multiple = $length / $number; | |
107 $multiple = floor($multiple); #round down to int | |
108 } else { | |
109 #just one slice of the whole seq_region | |
110 $number = 1; | |
111 $multiple = $length; | |
112 } | |
113 | |
114 my $i; | |
115 for(my $i=0; $i < $number; $i++) { | |
116 $end = $start + $multiple + $overlap; | |
117 | |
118 #any remainder gets added to the last slice of the seq_region | |
119 $end = $slice->end if($i == $number-1); | |
120 push @out, Bio::EnsEMBL::Slice->new | |
121 (-START => $start, | |
122 -END => $end, | |
123 -STRAND => 1, | |
124 -SEQ_REGION_NAME => $slice->seq_region_name, | |
125 -SEQ_REGION_LENGTH => $slice->seq_region_length, | |
126 -COORD_SYSTEM => $slice->coord_system, | |
127 -ADAPTOR => $slice->adaptor); | |
128 $start += $multiple + 1; | |
129 } | |
130 } | |
131 | |
132 return \@out; | |
133 } | |
134 | |
135 | |
136 | |
137 1; |