annotate variant_effect_predictor/Bio/EnsEMBL/RepeatMaskedSlice.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 Bio::EnsEMBL::RepeatMaskedSlice - Arbitary Slice of a genome
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 $sa = $db->get_SliceAdaptor();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 $slice =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 $repeat_masked_slice = $slice->get_repeatmasked_seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 # get repeat masked sequence:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 my $dna = $repeat_masked_slice->seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 $dna = $repeat_masked_slice->subseq( 1, 1000 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 This is a specialised Bio::EnsEMBL::Slice class that is used to retrieve
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 repeat masked genomic sequence rather than normal genomic sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 =head1 METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 package Bio::EnsEMBL::RepeatMaskedSlice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 use warnings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 use Bio::EnsEMBL::Slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 use Bio::EnsEMBL::Utils::Exception;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 use vars qw(@ISA);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59 @ISA = ('Bio::EnsEMBL::Slice');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61 # The BLOCK_PWR is the lob_bin of the chunksize where you want your repeat features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 # to be retreived. This will create repeat feature retrieval calls that are likely
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 # to be on the same slice and hopefully create cache hits and less database traffic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 my $BLOCK_PWR = 18;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 Arg [-REPEAT_MASK] : The logic name of the repeats to be used for masking.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 If not provided, all repeats in the database are used.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 Arg [...] : Named superclass arguments. See B<Bio::EnsEMBL::Slice>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 Example : my $slice = Bio::EnsEMBL::RepeatMaskedSlice->new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 (-START => $start,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 -END => $end,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 -STRAND => $strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 -SEQ_REGION_NAME => $seq_region,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 -SEQ_REGION_LENGTH => $seq_region_length,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 -COORD_SYSTEM => $cs,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 -ADAPTOR => $adaptor,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 -REPEAT_MASK => ['repeat_masker'],
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 -SOFT_MASK => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 -NOT_DEFAULT_MASKING_CASES => {"repeat_class_SINE/MIR" => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 "repeat_name_AluSp" => 0});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 Description: Creates a Slice which behaves exactly as a normal slice but
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 that returns repeat masked sequence from the seq method.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 Returntype : Bio::EnsEMBL::RepeatMaskedSlice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 Caller : RawComputes (PredictionTranscript creation code).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 my $caller = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 my $class = ref($caller) || $caller;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 my ($logic_names, $soft_mask, $not_default_masking_cases) = rearrange(['REPEAT_MASK',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 'SOFT_MASK',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 'NOT_DEFAULT_MASKING_CASES'], @_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 my $self = $class->SUPER::new(@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 $logic_names ||= [''];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 if(ref($logic_names) ne 'ARRAY') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 throw("Reference to list of logic names argument expected.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 $self->{'repeat_mask_logic_names'} = $logic_names;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 $self->{'soft_mask'} = $soft_mask;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 $self->{'not_default_masking_cases'} = $not_default_masking_cases;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 $self->{'not_default_masking_cases'} ||= {};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 =head2 repeat_mask_logic_names
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 Arg [1] : reference to list of strings $logic_names (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 Example : $rm_slice->repeat_mask_logic_name(['repeat_masker']);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 Description: Getter/Setter for the logic_names of the repeats that are used
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 to mask this slices sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 Returntype : reference to list of strings
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 Caller : seq() method
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 sub repeat_mask_logic_names {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 if(@_) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 my $array = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 if(ref($array) ne 'ARRAY') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 throw('Reference to list of logic names argument expected.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 return $self->{'repeat_mask_logic_names'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 =head2 soft_mask
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 Arg [1] : boolean $soft_mask (optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 Example : $rm_slice->soft_mask(0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 Description: Getter/Setter which is used to turn on/off softmasking of the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 sequence returned by seq.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 Returntype : boolean
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 Caller : seq() method
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 sub soft_mask {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 $self->{'soft_mask'} = shift if(@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 return $self->{'soft_mask'} || 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 =head2 not_default_masking_cases
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 Arg [1] : hash reference $not_default_masking_cases (optional, default is {})
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 The values are 0 or 1 for hard and soft masking respectively
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 The keys of the hash should be of 2 forms
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 "repeat_class_" . $repeat_consensus->repeat_class,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 e.g. "repeat_class_SINE/MIR"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 "repeat_name_" . $repeat_consensus->name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 e.g. "repeat_name_MIR"
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 depending on which base you want to apply the not default masking either
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 the repeat_class or repeat_name. Both can be specified in the same hash
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 at the same time, but in that case, repeat_name setting has priority over
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 repeat_class. For example, you may have hard masking as default, and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 you may want soft masking of all repeat_class SINE/MIR,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 but repeat_name AluSp (which are also from repeat_class SINE/MIR)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 Example : $rm_slice->not_default_masking_cases({"repeat_class_SINE/MIR" => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 "repeat_name_AluSp" => 0});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 Description: Getter/Setter which is used to escape some repeat class or name from the default
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 masking in place.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 Returntype : hash reference
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 Caller : seq() and subseq() methods
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 sub not_default_masking_cases {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 $self->{'not_default_masking_cases'} = shift if (@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 return $self->{'not_default_masking_cases'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 =head2 seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 Arg [1] : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200 Example : print $rmslice->seq(), "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 Description: Retrieves the entire repeat masked sequence for this slice.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 See also the B<Bio::EnsEMBL::Slice> implementation of this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 method.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 Returntype : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 sub seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 # get all the features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 my $repeats = $self->_get_repeat_features($self);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 my $soft_mask = $self->soft_mask();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 my $not_default_masking_cases = $self->not_default_masking_cases();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 # get the dna
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 my $dna = $self->SUPER::seq(@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 # mask the dna
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 $self->_mask_features(\$dna,$repeats,$soft_mask,$not_default_masking_cases);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 return $dna;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 =head2 subseq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 Arg [1] : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 Example : print $rmslice->subseq(1, 1000);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 Description: Retrieves a repeat masked sequence from a specified subregion
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 of this slice. See also the B<Bio::EnsEMBL::Slice>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 implementation of this method.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 Returntype : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 sub subseq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 my $start = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 my $end = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251 my $strand = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 my $subsequence_slice = $self->sub_Slice($start, $end, $strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 # If frequent subseqs happen on repeatMasked sequence this results in
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 # a lot of feature retrieval from the database. To avoid this, features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 # are only retrieved from subslices with fixed space boundaries.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 # The access happens in block to make cache hits more likely
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 # ONLY DO IF WE ARE CACHING
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 my $subslice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 if(! $self->adaptor()->db()->no_cache()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 my $seq_region_slice = $self->seq_region_Slice();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 # The blocksize can be defined on the top of this module.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 my $block_min = ($subsequence_slice->start()-1) >> $BLOCK_PWR;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 my $block_max = ($subsequence_slice->end()-1) >> $BLOCK_PWR;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 my $sub_start = ($block_min << $BLOCK_PWR)+1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 my $sub_end = ($block_max+1)<<$BLOCK_PWR;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 if ($sub_end > $seq_region_slice->length) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 $sub_end = $seq_region_slice->length ;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 $subslice = $seq_region_slice->sub_Slice($sub_start, $sub_end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 $subslice = $subsequence_slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 my $repeats = $self->_get_repeat_features($subslice);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281 my $soft_mask = $self->soft_mask();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 my $not_default_masking_cases = $self->not_default_masking_cases();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 my $dna = $subsequence_slice->SUPER::seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 $subsequence_slice->_mask_features(\$dna,$repeats,$soft_mask,$not_default_masking_cases);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 return $dna;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 =head2 _get_repeat_features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 Args [1] : Bio::EnsEMBL::Slice to fetch features for
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 Description : Gets repeat features for the given slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 Returntype : ArrayRef[Bio::EnsEMBL::RepeatFeature] array of repeats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 sub _get_repeat_features {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 my ($self, $slice) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 my $logic_names = $self->repeat_mask_logic_names();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 my @repeats;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 foreach my $l (@$logic_names) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 push @repeats, @{$slice->get_all_RepeatFeatures($l)};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 return \@repeats;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 1;