annotate variant_effect_predictor/Bio/EnsEMBL/RepeatMaskedSlice.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::RepeatMaskedSlice - Arbitary Slice of a genome
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $sa = $db->get_SliceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 $slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 $repeat_masked_slice = $slice->get_repeatmasked_seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 # get repeat masked sequence:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 my $dna = $repeat_masked_slice->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 $dna = $repeat_masked_slice->subseq( 1, 1000 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 This is a specialised Bio::EnsEMBL::Slice class that is used to retrieve
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 repeat masked genomic sequence rather than normal genomic sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 package Bio::EnsEMBL::RepeatMaskedSlice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use Bio::EnsEMBL::Slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use Bio::EnsEMBL::Utils::Exception;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 @ISA = ('Bio::EnsEMBL::Slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 # The BLOCK_PWR is the lob_bin of the chunksize where you want your repeat features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 # to be retreived. This will create repeat feature retrieval calls that are likely
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 # to be on the same slice and hopefully create cache hits and less database traffic
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 my $BLOCK_PWR = 18;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 Arg [-REPEAT_MASK] : The logic name of the repeats to be used for masking.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 If not provided, all repeats in the database are used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 Arg [...] : Named superclass arguments. See B<Bio::EnsEMBL::Slice>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 Example : my $slice = Bio::EnsEMBL::RepeatMaskedSlice->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 (-START => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 -END => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 -STRAND => $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 -SEQ_REGION_NAME => $seq_region,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 -SEQ_REGION_LENGTH => $seq_region_length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 -COORD_SYSTEM => $cs,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 -ADAPTOR => $adaptor,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 -REPEAT_MASK => ['repeat_masker'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 -SOFT_MASK => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 -NOT_DEFAULT_MASKING_CASES => {"repeat_class_SINE/MIR" => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 "repeat_name_AluSp" => 0});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 Description: Creates a Slice which behaves exactly as a normal slice but
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 that returns repeat masked sequence from the seq method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 Returntype : Bio::EnsEMBL::RepeatMaskedSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 Caller : RawComputes (PredictionTranscript creation code).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 my ($logic_names, $soft_mask, $not_default_masking_cases) = rearrange(['REPEAT_MASK',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 'SOFT_MASK',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 'NOT_DEFAULT_MASKING_CASES'], @_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 $logic_names ||= [''];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 if(ref($logic_names) ne 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 throw("Reference to list of logic names argument expected.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 $self->{'repeat_mask_logic_names'} = $logic_names;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 $self->{'soft_mask'} = $soft_mask;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 $self->{'not_default_masking_cases'} = $not_default_masking_cases;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 $self->{'not_default_masking_cases'} ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 =head2 repeat_mask_logic_names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 Arg [1] : reference to list of strings $logic_names (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 Example : $rm_slice->repeat_mask_logic_name(['repeat_masker']);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 Description: Getter/Setter for the logic_names of the repeats that are used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 to mask this slices sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Returntype : reference to list of strings
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 Caller : seq() method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 sub repeat_mask_logic_names {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 if(@_) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 my $array = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 if(ref($array) ne 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 throw('Reference to list of logic names argument expected.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 return $self->{'repeat_mask_logic_names'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 =head2 soft_mask
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 Arg [1] : boolean $soft_mask (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Example : $rm_slice->soft_mask(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 Description: Getter/Setter which is used to turn on/off softmasking of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 sequence returned by seq.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 Returntype : boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 Caller : seq() method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 sub soft_mask {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 $self->{'soft_mask'} = shift if(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 return $self->{'soft_mask'} || 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 =head2 not_default_masking_cases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 Arg [1] : hash reference $not_default_masking_cases (optional, default is {})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 The values are 0 or 1 for hard and soft masking respectively
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 The keys of the hash should be of 2 forms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 "repeat_class_" . $repeat_consensus->repeat_class,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 e.g. "repeat_class_SINE/MIR"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 "repeat_name_" . $repeat_consensus->name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 e.g. "repeat_name_MIR"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 depending on which base you want to apply the not default masking either
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 the repeat_class or repeat_name. Both can be specified in the same hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 at the same time, but in that case, repeat_name setting has priority over
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 repeat_class. For example, you may have hard masking as default, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 you may want soft masking of all repeat_class SINE/MIR,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 but repeat_name AluSp (which are also from repeat_class SINE/MIR)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 Example : $rm_slice->not_default_masking_cases({"repeat_class_SINE/MIR" => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 "repeat_name_AluSp" => 0});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 Description: Getter/Setter which is used to escape some repeat class or name from the default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 masking in place.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 Returntype : hash reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 Caller : seq() and subseq() methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 sub not_default_masking_cases {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 $self->{'not_default_masking_cases'} = shift if (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 return $self->{'not_default_masking_cases'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 =head2 seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 Arg [1] : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 Example : print $rmslice->seq(), "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 Description: Retrieves the entire repeat masked sequence for this slice.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 See also the B<Bio::EnsEMBL::Slice> implementation of this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 sub seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 # get all the features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 my $repeats = $self->_get_repeat_features($self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 my $soft_mask = $self->soft_mask();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 my $not_default_masking_cases = $self->not_default_masking_cases();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 # get the dna
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 my $dna = $self->SUPER::seq(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 # mask the dna
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 $self->_mask_features(\$dna,$repeats,$soft_mask,$not_default_masking_cases);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 return $dna;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 =head2 subseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 Arg [1] : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 Example : print $rmslice->subseq(1, 1000);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 Description: Retrieves a repeat masked sequence from a specified subregion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 of this slice. See also the B<Bio::EnsEMBL::Slice>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 implementation of this method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 sub subseq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 my $start = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 my $end = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 my $strand = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 my $subsequence_slice = $self->sub_Slice($start, $end, $strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 # If frequent subseqs happen on repeatMasked sequence this results in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 # a lot of feature retrieval from the database. To avoid this, features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 # are only retrieved from subslices with fixed space boundaries.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 # The access happens in block to make cache hits more likely
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 # ONLY DO IF WE ARE CACHING
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 my $subslice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 if(! $self->adaptor()->db()->no_cache()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 my $seq_region_slice = $self->seq_region_Slice();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 # The blocksize can be defined on the top of this module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my $block_min = ($subsequence_slice->start()-1) >> $BLOCK_PWR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 my $block_max = ($subsequence_slice->end()-1) >> $BLOCK_PWR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 my $sub_start = ($block_min << $BLOCK_PWR)+1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 my $sub_end = ($block_max+1)<<$BLOCK_PWR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 if ($sub_end > $seq_region_slice->length) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 $sub_end = $seq_region_slice->length ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 $subslice = $seq_region_slice->sub_Slice($sub_start, $sub_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 $subslice = $subsequence_slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 my $repeats = $self->_get_repeat_features($subslice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 my $soft_mask = $self->soft_mask();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 my $not_default_masking_cases = $self->not_default_masking_cases();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 my $dna = $subsequence_slice->SUPER::seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $subsequence_slice->_mask_features(\$dna,$repeats,$soft_mask,$not_default_masking_cases);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 return $dna;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 =head2 _get_repeat_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 Args [1] : Bio::EnsEMBL::Slice to fetch features for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 Description : Gets repeat features for the given slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 Returntype : ArrayRef[Bio::EnsEMBL::RepeatFeature] array of repeats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 sub _get_repeat_features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 my ($self, $slice) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 my $logic_names = $self->repeat_mask_logic_names();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 my @repeats;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 foreach my $l (@$logic_names) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 push @repeats, @{$slice->get_all_RepeatFeatures($l)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 return \@repeats;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 1;