0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL:SeqEdit - A class representing a post transcriptional edit to a
|
|
24 sequence.
|
|
25
|
|
26 =head1 SYNOPSIS
|
|
27
|
|
28 use Bio::EnsEMBL::SeqEdit;
|
|
29 use Bio::EnsEMBL::Attribute;
|
|
30
|
|
31 # construct a SeqEdit object using a Transcript attribute
|
|
32
|
|
33 ($attribute) = @{ $translation->get_all_Attributes('_rna_edit') };
|
|
34
|
|
35 $seq_edit = Bio::EnsEMBL::SeqEdit( -ATTRIB => $attribute );
|
|
36
|
|
37 print $seq_edit->start(), "\n";
|
|
38 print $seq_edit->end(), "\n";
|
|
39 print $seq_edit->alt_seq(), "\n";
|
|
40
|
|
41 # apply the edit to some sequence
|
|
42 $seq = $transcript->spliced_seq();
|
|
43 print "Before modifiction: $seq\n";
|
|
44
|
|
45 $seq_edit->apply_edit( \$seq );
|
|
46 print "After modification: $seq\n";
|
|
47
|
|
48 # construct an attribute object from a SeqEdit and add it to a
|
|
49 # translation
|
|
50
|
|
51 $seq_edit = Bio::EnsEMBL::SeqEdit->new(
|
|
52 -CODE => '_selenocysteine',
|
|
53 -NAME => 'Selenocysteine',
|
|
54 -DESC => 'Selenocysteine',
|
|
55 -START => 10,
|
|
56 -END => 10,
|
|
57 -ALT_SEQ => 'U'
|
|
58 );
|
|
59
|
|
60 $attribute = $seq_edit->get_Attribute();
|
|
61 $translation->add_Attributes($attribute);
|
|
62
|
|
63 =head1 DESCRIPTION
|
|
64
|
|
65 This is a class used to represent post transcriptional
|
|
66 modifications to sequences. SeqEdit objects are stored as ordinary
|
|
67 Bio::EnsEMBL::Attributes with a parseable value and can be used to
|
|
68 represent RNA editing, selenocysteines etc.
|
|
69
|
|
70 Also see B<Bio::EnsEMBL::Attribute>
|
|
71
|
|
72 =head1 METHODS
|
|
73
|
|
74 =cut
|
|
75
|
|
76 package Bio::EnsEMBL::SeqEdit;
|
|
77
|
|
78 use strict;
|
|
79 use warnings;
|
|
80
|
|
81 use Bio::EnsEMBL::Attribute;
|
|
82 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
83 use Bio::EnsEMBL::Utils::Exception qw(throw);
|
|
84
|
|
85
|
|
86 =head2 new
|
|
87
|
|
88 Arg [-ATTRIB] : Bio::EnsEMBL::Attribute
|
|
89 Constructs a new SeqEdit from an Attribute.
|
|
90 Can only be provided if no other constructor arguments
|
|
91 are provided.
|
|
92 Arg [-START] : The start position of the edit.
|
|
93 Arg [-END] : The end position of the edit.
|
|
94 Arg [-ALT_SEQ] : The alternate sequence
|
|
95 Arg [-CODE] : A code for this SeqEdit
|
|
96 Arg [-NAME] : A name for this SeqEdit
|
|
97 Arg [-DESCRIPTION] : Arg passed to superclass constructor
|
|
98 Example : my $sea = Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $attrib);
|
|
99 my $sea = Bio::EnsEMBL::SeqEdit->new
|
|
100 (-START => 10,
|
|
101 -END => 12,
|
|
102 -ALT_SEQ => 'ACG',
|
|
103 -CODE => '_rna_edit',
|
|
104 -NAME => 'RNA Edit',
|
|
105 -DESCRIPTION => 'RNA edit');
|
|
106 Description: Constructs a SeqEdit representing a single edit to a
|
|
107 sequence, such as an rna modification or a selenocysteine.
|
|
108 Returntype : Bio::EnsEMBL::SeqEdit
|
|
109 Exceptions : throws if attribute set and other args aswell
|
|
110 throws if start and end not set correctly of attribure not set
|
|
111 Caller : general
|
|
112 Status : Stable
|
|
113
|
|
114 =cut
|
|
115
|
|
116 sub new {
|
|
117 my $class = shift;
|
|
118
|
|
119 my ($attrib, $start, $end, $alt_seq, $name, $desc, $code) =
|
|
120 rearrange([qw(ATTRIB START END ALT_SEQ NAME DESCRIPTION CODE)], @_);
|
|
121
|
|
122 my $self;
|
|
123
|
|
124 if($attrib) {
|
|
125 if(defined($start) || defined($end) || defined($alt_seq) ||
|
|
126 defined($name) || defined($desc) || defined($code)) {
|
|
127 throw("Cannot specify -ATTRIB argument with additional arguments.");
|
|
128 }
|
|
129
|
|
130 if(!ref($attrib) || !$attrib->isa('Bio::EnsEMBL::Attribute')) {
|
|
131 throw('Bio::EnsEMBL::Attribute argument expected.');
|
|
132 }
|
|
133
|
|
134 ($start, $end, $alt_seq) = split(/\s+/, $attrib->value());
|
|
135
|
|
136 if($start !~ /\d+/ || $end !~ /\d+/) {
|
|
137 throw('Could not parse value of attribute: '.$attrib->value());
|
|
138 }
|
|
139
|
|
140 $name = $attrib->name();
|
|
141 $code = $attrib->code();
|
|
142 $desc = $attrib->description();
|
|
143
|
|
144
|
|
145 }
|
|
146
|
|
147 if(defined($end) && defined($start) && $start > $end+1) {
|
|
148 throw("start must be less than or equal to end + 1");
|
|
149 }
|
|
150
|
|
151 if(defined($start) && $start < 1) {
|
|
152 throw("start must be greater than or equal to 1");
|
|
153 }
|
|
154
|
|
155 if(defined($end) && $end < 0) {
|
|
156 throw("end must be greater than or equal to 0");
|
|
157 }
|
|
158
|
|
159 $alt_seq ||= '';
|
|
160
|
|
161 return bless {'start' => $start,
|
|
162 'end' => $end,
|
|
163 'alt_seq' => $alt_seq,
|
|
164 'description' => $desc,
|
|
165 'name' => $name,
|
|
166 'code' => $code}, $class;
|
|
167 }
|
|
168
|
|
169
|
|
170
|
|
171 =head2 start
|
|
172
|
|
173 Arg [1] : (optional) int $start - the new start position
|
|
174 Example : $start = $se_attrib->start();
|
|
175 Description: Getter/Setter for the start position of the region replaced
|
|
176 by the alt_seq.
|
|
177
|
|
178 Coordinates are inclusive and one-based, which means that
|
|
179 inserts are unusually represented by a start 1bp higher than
|
|
180 the end.
|
|
181
|
|
182 E.g. start = 1, end = 1 is a replacement of the first base but
|
|
183 start = 1, end = 0 is an insert BEFORE the first base.
|
|
184 Returntype : int
|
|
185 Exceptions : none
|
|
186 Caller : Transcript, Translation
|
|
187 Status : Stable
|
|
188
|
|
189 =cut
|
|
190
|
|
191 sub start {
|
|
192 my $self = shift;
|
|
193
|
|
194 if(@_) {
|
|
195 my $start = shift;
|
|
196 if(defined($start) && $start < 1) {
|
|
197 throw("start must be greater than or equal to 1");
|
|
198 }
|
|
199 $self->{'start'} = $start;
|
|
200 }
|
|
201
|
|
202 return $self->{'start'};
|
|
203 }
|
|
204
|
|
205
|
|
206 =head2 end
|
|
207
|
|
208 Arg [1] : (optional) int $end - the new end position
|
|
209 Example : $end = $se_attrib->end();
|
|
210 Description: Getter/Setter for the end position of the region replaced
|
|
211 by the alt_seq.
|
|
212
|
|
213 Coordinates are inclusive and one-based, which means that
|
|
214 inserts are unusually represented by a start 1bp higher than
|
|
215 the end.
|
|
216
|
|
217 E.g. start = 1, end = 1 is a replacement of the first base but
|
|
218 start = 1, end = 0 is an insert BEFORE the first base.
|
|
219 Returntype : int
|
|
220 Exceptions : throws if end <= 0
|
|
221 Caller : Transcript, Translation
|
|
222 Status : Stable
|
|
223
|
|
224 =cut
|
|
225
|
|
226 sub end {
|
|
227 my $self = shift;
|
|
228
|
|
229 if(@_) {
|
|
230 my $end = shift;
|
|
231 if(defined($end) && $end < 0) {
|
|
232 throw("end must be greater than or equal to 0");
|
|
233 }
|
|
234 $self->{'end'} = $end;
|
|
235 }
|
|
236
|
|
237 return $self->{'end'};
|
|
238 }
|
|
239
|
|
240
|
|
241 =head2 alt_seq
|
|
242
|
|
243 Arg [1] : (optional) string $alt_seq
|
|
244 Example : my $alt_seq = $se_attrib->alt_seq();
|
|
245 Description: Getter/Setter for the replacement sequence used by this edit.
|
|
246 The sequence may either be a string of amino acids or
|
|
247 nucleotides depending on the context in which this edit is
|
|
248 used.
|
|
249
|
|
250 In the case of a deletion the replacement sequence is an empty
|
|
251 string.
|
|
252 Returntype : string
|
|
253 Exceptions : none
|
|
254 Caller : Transcript, Translation
|
|
255 Status : Stable
|
|
256
|
|
257 =cut
|
|
258
|
|
259 sub alt_seq {
|
|
260 my $self = shift;
|
|
261 $self->{'alt_seq'} = shift || '' if(@_);
|
|
262 return $self->{'alt_seq'};
|
|
263 }
|
|
264
|
|
265
|
|
266 =head2 length_diff
|
|
267
|
|
268 Arg [1] : none
|
|
269 Example : my $diff = $sea->length_diff();
|
|
270 Description: Returns the difference in length caused by applying this
|
|
271 edit to a sequence. This may be be negative (deletion),
|
|
272 positive (insertion) or 0 (replacement).
|
|
273
|
|
274 If either start or end are not defined 0 is returned.
|
|
275 Returntype : int
|
|
276 Exceptions : none
|
|
277 Caller : general
|
|
278 Status : Stable
|
|
279
|
|
280 =cut
|
|
281
|
|
282 sub length_diff {
|
|
283 my $self = shift;
|
|
284
|
|
285 return 0 if(!defined($self->{'end'}) || !defined($self->{'start'}));
|
|
286
|
|
287 return length($self->{'alt_seq'}) - ($self->{'end'} - $self->{'start'} + 1);
|
|
288 }
|
|
289
|
|
290
|
|
291
|
|
292 =head2 name
|
|
293
|
|
294 Arg [1] : (optional) string $name
|
|
295 Example : my $name = $seqedit->name();
|
|
296 Description: Getter/Setter for the name of this SeqEdit
|
|
297 Returntype : string
|
|
298 Exceptions : none
|
|
299 Caller : general
|
|
300 Status : Stable
|
|
301
|
|
302 =cut
|
|
303
|
|
304 sub name {
|
|
305 my $self = shift;
|
|
306 $self->{'name'} = shift if(@_);
|
|
307 return $self->{'name'};
|
|
308 }
|
|
309
|
|
310
|
|
311
|
|
312
|
|
313 =head2 code
|
|
314
|
|
315 Arg [1] : (optional) string $code
|
|
316 Example : my $code = $seqedit->code();
|
|
317 Description: Getter/Setter for the code of this SeqEdit
|
|
318 Returntype : string
|
|
319 Exceptions : none
|
|
320 Caller : general
|
|
321 Status : Stable
|
|
322
|
|
323 =cut
|
|
324
|
|
325 sub code {
|
|
326 my $self = shift;
|
|
327 $self->{'code'} = shift if(@_);
|
|
328 return $self->{'code'};
|
|
329 }
|
|
330
|
|
331
|
|
332
|
|
333 =head2 description
|
|
334
|
|
335 Arg [1] : (optional) string $desc
|
|
336 Example : my $desc = $seqedit->description();
|
|
337 Description: Getter/Setter for the description of this SeqEdit
|
|
338 Returntype : string
|
|
339 Exceptions : none
|
|
340 Caller : general
|
|
341 Status : Stable
|
|
342
|
|
343 =cut
|
|
344
|
|
345 sub description {
|
|
346 my $self = shift;
|
|
347 $self->{'description'} = shift if(@_);
|
|
348 return $self->{'description'};
|
|
349 }
|
|
350
|
|
351
|
|
352
|
|
353 =head2 get_Attribute
|
|
354
|
|
355 Arg [1] : none
|
|
356 Example : my $attrib = $seqedit->get_Attribute();
|
|
357 $transcript->add_Attributes($attrib);
|
|
358 Description: Converts a SeqEdit object into an Attribute object. This
|
|
359 allows the SeqEdit to be stored as any other attribute in the
|
|
360 ensembl database. The start/end and alt_seq properties
|
|
361 should be set before calling this method.
|
|
362 Returntype : Bio::EnsEMBL::Attribute
|
|
363 Exceptions : warning if start/end or alt_seq properties are not defined
|
|
364 Caller : general
|
|
365 Status : Stable
|
|
366
|
|
367 =cut
|
|
368
|
|
369 sub get_Attribute {
|
|
370 my $self = shift;
|
|
371
|
|
372 my $start = $self->start();
|
|
373 my $end = $self->end();
|
|
374 my $alt_seq = $self->alt_seq();
|
|
375
|
|
376 my $value;
|
|
377
|
|
378 if(defined($start) && defined($end) && defined($alt_seq)) {
|
|
379 $value = join(' ', $start, $end, $alt_seq);
|
|
380 } else {
|
|
381 warning('Attribute value cannot be created unless start, end and alt_seq' .
|
|
382 'properties are defined');
|
|
383 $value = '';
|
|
384 }
|
|
385
|
|
386 return Bio::EnsEMBL::Attribute->new(-CODE => $self->code(),
|
|
387 -VALUE => $value,
|
|
388 -NAME => $self->name(),
|
|
389 -DESCRIPTION => $self->description());
|
|
390 }
|
|
391
|
|
392
|
|
393 =head2 apply_edit
|
|
394
|
|
395 Arg [1] : reference to string $seqref
|
|
396 Example : $sequence = 'ACTGAATATTTAAGGCA';
|
|
397 $seqedit->apply_edit(\$sequence);
|
|
398 print $sequence, "\n";
|
|
399 Description: Applies this edit directly to a sequence which is
|
|
400 passed by reference. The coordinates of this SeqEdit
|
|
401 are assumed to be relative to the start of the sequence
|
|
402 argument.
|
|
403 If either the start or end of this SeqEdit are not defined
|
|
404 this function will not do anything to the passed sequence.
|
|
405 Returntype : reference to the same sequence that was passed in
|
|
406 Exceptions : none
|
|
407 Caller : Transcript, Translation
|
|
408 Status : Stable
|
|
409
|
|
410 =cut
|
|
411
|
|
412 sub apply_edit {
|
|
413 my $self = shift;
|
|
414 my $seqref = shift;
|
|
415
|
|
416 if(ref($seqref) ne 'SCALAR') {
|
|
417 throw("Reference to scalar argument expected");
|
|
418 }
|
|
419
|
|
420 if(!defined($self->{'start'}) || !defined($self->{'end'})) {
|
|
421 return $seqref;
|
|
422 }
|
|
423
|
|
424 my $len = $self->{'end'} - $self->{'start'} + 1;
|
|
425 substr($$seqref, $self->{'start'} - 1, $len) = $self->{'alt_seq'};
|
|
426
|
|
427 return $seqref;
|
|
428 }
|
|
429
|
|
430
|
|
431 1;
|