comparison variant_effect_predictor/Bio/EnsEMBL/SeqEdit.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL:SeqEdit - A class representing a post transcriptional edit to a
24 sequence.
25
26 =head1 SYNOPSIS
27
28 use Bio::EnsEMBL::SeqEdit;
29 use Bio::EnsEMBL::Attribute;
30
31 # construct a SeqEdit object using a Transcript attribute
32
33 ($attribute) = @{ $translation->get_all_Attributes('_rna_edit') };
34
35 $seq_edit = Bio::EnsEMBL::SeqEdit( -ATTRIB => $attribute );
36
37 print $seq_edit->start(), "\n";
38 print $seq_edit->end(), "\n";
39 print $seq_edit->alt_seq(), "\n";
40
41 # apply the edit to some sequence
42 $seq = $transcript->spliced_seq();
43 print "Before modifiction: $seq\n";
44
45 $seq_edit->apply_edit( \$seq );
46 print "After modification: $seq\n";
47
48 # construct an attribute object from a SeqEdit and add it to a
49 # translation
50
51 $seq_edit = Bio::EnsEMBL::SeqEdit->new(
52 -CODE => '_selenocysteine',
53 -NAME => 'Selenocysteine',
54 -DESC => 'Selenocysteine',
55 -START => 10,
56 -END => 10,
57 -ALT_SEQ => 'U'
58 );
59
60 $attribute = $seq_edit->get_Attribute();
61 $translation->add_Attributes($attribute);
62
63 =head1 DESCRIPTION
64
65 This is a class used to represent post transcriptional
66 modifications to sequences. SeqEdit objects are stored as ordinary
67 Bio::EnsEMBL::Attributes with a parseable value and can be used to
68 represent RNA editing, selenocysteines etc.
69
70 Also see B<Bio::EnsEMBL::Attribute>
71
72 =head1 METHODS
73
74 =cut
75
76 package Bio::EnsEMBL::SeqEdit;
77
78 use strict;
79 use warnings;
80
81 use Bio::EnsEMBL::Attribute;
82 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
83 use Bio::EnsEMBL::Utils::Exception qw(throw);
84
85
86 =head2 new
87
88 Arg [-ATTRIB] : Bio::EnsEMBL::Attribute
89 Constructs a new SeqEdit from an Attribute.
90 Can only be provided if no other constructor arguments
91 are provided.
92 Arg [-START] : The start position of the edit.
93 Arg [-END] : The end position of the edit.
94 Arg [-ALT_SEQ] : The alternate sequence
95 Arg [-CODE] : A code for this SeqEdit
96 Arg [-NAME] : A name for this SeqEdit
97 Arg [-DESCRIPTION] : Arg passed to superclass constructor
98 Example : my $sea = Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $attrib);
99 my $sea = Bio::EnsEMBL::SeqEdit->new
100 (-START => 10,
101 -END => 12,
102 -ALT_SEQ => 'ACG',
103 -CODE => '_rna_edit',
104 -NAME => 'RNA Edit',
105 -DESCRIPTION => 'RNA edit');
106 Description: Constructs a SeqEdit representing a single edit to a
107 sequence, such as an rna modification or a selenocysteine.
108 Returntype : Bio::EnsEMBL::SeqEdit
109 Exceptions : throws if attribute set and other args aswell
110 throws if start and end not set correctly of attribure not set
111 Caller : general
112 Status : Stable
113
114 =cut
115
116 sub new {
117 my $class = shift;
118
119 my ($attrib, $start, $end, $alt_seq, $name, $desc, $code) =
120 rearrange([qw(ATTRIB START END ALT_SEQ NAME DESCRIPTION CODE)], @_);
121
122 my $self;
123
124 if($attrib) {
125 if(defined($start) || defined($end) || defined($alt_seq) ||
126 defined($name) || defined($desc) || defined($code)) {
127 throw("Cannot specify -ATTRIB argument with additional arguments.");
128 }
129
130 if(!ref($attrib) || !$attrib->isa('Bio::EnsEMBL::Attribute')) {
131 throw('Bio::EnsEMBL::Attribute argument expected.');
132 }
133
134 ($start, $end, $alt_seq) = split(/\s+/, $attrib->value());
135
136 if($start !~ /\d+/ || $end !~ /\d+/) {
137 throw('Could not parse value of attribute: '.$attrib->value());
138 }
139
140 $name = $attrib->name();
141 $code = $attrib->code();
142 $desc = $attrib->description();
143
144
145 }
146
147 if(defined($end) && defined($start) && $start > $end+1) {
148 throw("start must be less than or equal to end + 1");
149 }
150
151 if(defined($start) && $start < 1) {
152 throw("start must be greater than or equal to 1");
153 }
154
155 if(defined($end) && $end < 0) {
156 throw("end must be greater than or equal to 0");
157 }
158
159 $alt_seq ||= '';
160
161 return bless {'start' => $start,
162 'end' => $end,
163 'alt_seq' => $alt_seq,
164 'description' => $desc,
165 'name' => $name,
166 'code' => $code}, $class;
167 }
168
169
170
171 =head2 start
172
173 Arg [1] : (optional) int $start - the new start position
174 Example : $start = $se_attrib->start();
175 Description: Getter/Setter for the start position of the region replaced
176 by the alt_seq.
177
178 Coordinates are inclusive and one-based, which means that
179 inserts are unusually represented by a start 1bp higher than
180 the end.
181
182 E.g. start = 1, end = 1 is a replacement of the first base but
183 start = 1, end = 0 is an insert BEFORE the first base.
184 Returntype : int
185 Exceptions : none
186 Caller : Transcript, Translation
187 Status : Stable
188
189 =cut
190
191 sub start {
192 my $self = shift;
193
194 if(@_) {
195 my $start = shift;
196 if(defined($start) && $start < 1) {
197 throw("start must be greater than or equal to 1");
198 }
199 $self->{'start'} = $start;
200 }
201
202 return $self->{'start'};
203 }
204
205
206 =head2 end
207
208 Arg [1] : (optional) int $end - the new end position
209 Example : $end = $se_attrib->end();
210 Description: Getter/Setter for the end position of the region replaced
211 by the alt_seq.
212
213 Coordinates are inclusive and one-based, which means that
214 inserts are unusually represented by a start 1bp higher than
215 the end.
216
217 E.g. start = 1, end = 1 is a replacement of the first base but
218 start = 1, end = 0 is an insert BEFORE the first base.
219 Returntype : int
220 Exceptions : throws if end <= 0
221 Caller : Transcript, Translation
222 Status : Stable
223
224 =cut
225
226 sub end {
227 my $self = shift;
228
229 if(@_) {
230 my $end = shift;
231 if(defined($end) && $end < 0) {
232 throw("end must be greater than or equal to 0");
233 }
234 $self->{'end'} = $end;
235 }
236
237 return $self->{'end'};
238 }
239
240
241 =head2 alt_seq
242
243 Arg [1] : (optional) string $alt_seq
244 Example : my $alt_seq = $se_attrib->alt_seq();
245 Description: Getter/Setter for the replacement sequence used by this edit.
246 The sequence may either be a string of amino acids or
247 nucleotides depending on the context in which this edit is
248 used.
249
250 In the case of a deletion the replacement sequence is an empty
251 string.
252 Returntype : string
253 Exceptions : none
254 Caller : Transcript, Translation
255 Status : Stable
256
257 =cut
258
259 sub alt_seq {
260 my $self = shift;
261 $self->{'alt_seq'} = shift || '' if(@_);
262 return $self->{'alt_seq'};
263 }
264
265
266 =head2 length_diff
267
268 Arg [1] : none
269 Example : my $diff = $sea->length_diff();
270 Description: Returns the difference in length caused by applying this
271 edit to a sequence. This may be be negative (deletion),
272 positive (insertion) or 0 (replacement).
273
274 If either start or end are not defined 0 is returned.
275 Returntype : int
276 Exceptions : none
277 Caller : general
278 Status : Stable
279
280 =cut
281
282 sub length_diff {
283 my $self = shift;
284
285 return 0 if(!defined($self->{'end'}) || !defined($self->{'start'}));
286
287 return length($self->{'alt_seq'}) - ($self->{'end'} - $self->{'start'} + 1);
288 }
289
290
291
292 =head2 name
293
294 Arg [1] : (optional) string $name
295 Example : my $name = $seqedit->name();
296 Description: Getter/Setter for the name of this SeqEdit
297 Returntype : string
298 Exceptions : none
299 Caller : general
300 Status : Stable
301
302 =cut
303
304 sub name {
305 my $self = shift;
306 $self->{'name'} = shift if(@_);
307 return $self->{'name'};
308 }
309
310
311
312
313 =head2 code
314
315 Arg [1] : (optional) string $code
316 Example : my $code = $seqedit->code();
317 Description: Getter/Setter for the code of this SeqEdit
318 Returntype : string
319 Exceptions : none
320 Caller : general
321 Status : Stable
322
323 =cut
324
325 sub code {
326 my $self = shift;
327 $self->{'code'} = shift if(@_);
328 return $self->{'code'};
329 }
330
331
332
333 =head2 description
334
335 Arg [1] : (optional) string $desc
336 Example : my $desc = $seqedit->description();
337 Description: Getter/Setter for the description of this SeqEdit
338 Returntype : string
339 Exceptions : none
340 Caller : general
341 Status : Stable
342
343 =cut
344
345 sub description {
346 my $self = shift;
347 $self->{'description'} = shift if(@_);
348 return $self->{'description'};
349 }
350
351
352
353 =head2 get_Attribute
354
355 Arg [1] : none
356 Example : my $attrib = $seqedit->get_Attribute();
357 $transcript->add_Attributes($attrib);
358 Description: Converts a SeqEdit object into an Attribute object. This
359 allows the SeqEdit to be stored as any other attribute in the
360 ensembl database. The start/end and alt_seq properties
361 should be set before calling this method.
362 Returntype : Bio::EnsEMBL::Attribute
363 Exceptions : warning if start/end or alt_seq properties are not defined
364 Caller : general
365 Status : Stable
366
367 =cut
368
369 sub get_Attribute {
370 my $self = shift;
371
372 my $start = $self->start();
373 my $end = $self->end();
374 my $alt_seq = $self->alt_seq();
375
376 my $value;
377
378 if(defined($start) && defined($end) && defined($alt_seq)) {
379 $value = join(' ', $start, $end, $alt_seq);
380 } else {
381 warning('Attribute value cannot be created unless start, end and alt_seq' .
382 'properties are defined');
383 $value = '';
384 }
385
386 return Bio::EnsEMBL::Attribute->new(-CODE => $self->code(),
387 -VALUE => $value,
388 -NAME => $self->name(),
389 -DESCRIPTION => $self->description());
390 }
391
392
393 =head2 apply_edit
394
395 Arg [1] : reference to string $seqref
396 Example : $sequence = 'ACTGAATATTTAAGGCA';
397 $seqedit->apply_edit(\$sequence);
398 print $sequence, "\n";
399 Description: Applies this edit directly to a sequence which is
400 passed by reference. The coordinates of this SeqEdit
401 are assumed to be relative to the start of the sequence
402 argument.
403 If either the start or end of this SeqEdit are not defined
404 this function will not do anything to the passed sequence.
405 Returntype : reference to the same sequence that was passed in
406 Exceptions : none
407 Caller : Transcript, Translation
408 Status : Stable
409
410 =cut
411
412 sub apply_edit {
413 my $self = shift;
414 my $seqref = shift;
415
416 if(ref($seqref) ne 'SCALAR') {
417 throw("Reference to scalar argument expected");
418 }
419
420 if(!defined($self->{'start'}) || !defined($self->{'end'})) {
421 return $seqref;
422 }
423
424 my $len = $self->{'end'} - $self->{'start'} + 1;
425 substr($$seqref, $self->{'start'} - 1, $len) = $self->{'alt_seq'};
426
427 return $seqref;
428 }
429
430
431 1;