Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/SeqEdit.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::EnsEMBL:SeqEdit - A class representing a post transcriptional edit to a | |
24 sequence. | |
25 | |
26 =head1 SYNOPSIS | |
27 | |
28 use Bio::EnsEMBL::SeqEdit; | |
29 use Bio::EnsEMBL::Attribute; | |
30 | |
31 # construct a SeqEdit object using a Transcript attribute | |
32 | |
33 ($attribute) = @{ $translation->get_all_Attributes('_rna_edit') }; | |
34 | |
35 $seq_edit = Bio::EnsEMBL::SeqEdit( -ATTRIB => $attribute ); | |
36 | |
37 print $seq_edit->start(), "\n"; | |
38 print $seq_edit->end(), "\n"; | |
39 print $seq_edit->alt_seq(), "\n"; | |
40 | |
41 # apply the edit to some sequence | |
42 $seq = $transcript->spliced_seq(); | |
43 print "Before modifiction: $seq\n"; | |
44 | |
45 $seq_edit->apply_edit( \$seq ); | |
46 print "After modification: $seq\n"; | |
47 | |
48 # construct an attribute object from a SeqEdit and add it to a | |
49 # translation | |
50 | |
51 $seq_edit = Bio::EnsEMBL::SeqEdit->new( | |
52 -CODE => '_selenocysteine', | |
53 -NAME => 'Selenocysteine', | |
54 -DESC => 'Selenocysteine', | |
55 -START => 10, | |
56 -END => 10, | |
57 -ALT_SEQ => 'U' | |
58 ); | |
59 | |
60 $attribute = $seq_edit->get_Attribute(); | |
61 $translation->add_Attributes($attribute); | |
62 | |
63 =head1 DESCRIPTION | |
64 | |
65 This is a class used to represent post transcriptional | |
66 modifications to sequences. SeqEdit objects are stored as ordinary | |
67 Bio::EnsEMBL::Attributes with a parseable value and can be used to | |
68 represent RNA editing, selenocysteines etc. | |
69 | |
70 Also see B<Bio::EnsEMBL::Attribute> | |
71 | |
72 =head1 METHODS | |
73 | |
74 =cut | |
75 | |
76 package Bio::EnsEMBL::SeqEdit; | |
77 | |
78 use strict; | |
79 use warnings; | |
80 | |
81 use Bio::EnsEMBL::Attribute; | |
82 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
83 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
84 | |
85 | |
86 =head2 new | |
87 | |
88 Arg [-ATTRIB] : Bio::EnsEMBL::Attribute | |
89 Constructs a new SeqEdit from an Attribute. | |
90 Can only be provided if no other constructor arguments | |
91 are provided. | |
92 Arg [-START] : The start position of the edit. | |
93 Arg [-END] : The end position of the edit. | |
94 Arg [-ALT_SEQ] : The alternate sequence | |
95 Arg [-CODE] : A code for this SeqEdit | |
96 Arg [-NAME] : A name for this SeqEdit | |
97 Arg [-DESCRIPTION] : Arg passed to superclass constructor | |
98 Example : my $sea = Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $attrib); | |
99 my $sea = Bio::EnsEMBL::SeqEdit->new | |
100 (-START => 10, | |
101 -END => 12, | |
102 -ALT_SEQ => 'ACG', | |
103 -CODE => '_rna_edit', | |
104 -NAME => 'RNA Edit', | |
105 -DESCRIPTION => 'RNA edit'); | |
106 Description: Constructs a SeqEdit representing a single edit to a | |
107 sequence, such as an rna modification or a selenocysteine. | |
108 Returntype : Bio::EnsEMBL::SeqEdit | |
109 Exceptions : throws if attribute set and other args aswell | |
110 throws if start and end not set correctly of attribure not set | |
111 Caller : general | |
112 Status : Stable | |
113 | |
114 =cut | |
115 | |
116 sub new { | |
117 my $class = shift; | |
118 | |
119 my ($attrib, $start, $end, $alt_seq, $name, $desc, $code) = | |
120 rearrange([qw(ATTRIB START END ALT_SEQ NAME DESCRIPTION CODE)], @_); | |
121 | |
122 my $self; | |
123 | |
124 if($attrib) { | |
125 if(defined($start) || defined($end) || defined($alt_seq) || | |
126 defined($name) || defined($desc) || defined($code)) { | |
127 throw("Cannot specify -ATTRIB argument with additional arguments."); | |
128 } | |
129 | |
130 if(!ref($attrib) || !$attrib->isa('Bio::EnsEMBL::Attribute')) { | |
131 throw('Bio::EnsEMBL::Attribute argument expected.'); | |
132 } | |
133 | |
134 ($start, $end, $alt_seq) = split(/\s+/, $attrib->value()); | |
135 | |
136 if($start !~ /\d+/ || $end !~ /\d+/) { | |
137 throw('Could not parse value of attribute: '.$attrib->value()); | |
138 } | |
139 | |
140 $name = $attrib->name(); | |
141 $code = $attrib->code(); | |
142 $desc = $attrib->description(); | |
143 | |
144 | |
145 } | |
146 | |
147 if(defined($end) && defined($start) && $start > $end+1) { | |
148 throw("start must be less than or equal to end + 1"); | |
149 } | |
150 | |
151 if(defined($start) && $start < 1) { | |
152 throw("start must be greater than or equal to 1"); | |
153 } | |
154 | |
155 if(defined($end) && $end < 0) { | |
156 throw("end must be greater than or equal to 0"); | |
157 } | |
158 | |
159 $alt_seq ||= ''; | |
160 | |
161 return bless {'start' => $start, | |
162 'end' => $end, | |
163 'alt_seq' => $alt_seq, | |
164 'description' => $desc, | |
165 'name' => $name, | |
166 'code' => $code}, $class; | |
167 } | |
168 | |
169 | |
170 | |
171 =head2 start | |
172 | |
173 Arg [1] : (optional) int $start - the new start position | |
174 Example : $start = $se_attrib->start(); | |
175 Description: Getter/Setter for the start position of the region replaced | |
176 by the alt_seq. | |
177 | |
178 Coordinates are inclusive and one-based, which means that | |
179 inserts are unusually represented by a start 1bp higher than | |
180 the end. | |
181 | |
182 E.g. start = 1, end = 1 is a replacement of the first base but | |
183 start = 1, end = 0 is an insert BEFORE the first base. | |
184 Returntype : int | |
185 Exceptions : none | |
186 Caller : Transcript, Translation | |
187 Status : Stable | |
188 | |
189 =cut | |
190 | |
191 sub start { | |
192 my $self = shift; | |
193 | |
194 if(@_) { | |
195 my $start = shift; | |
196 if(defined($start) && $start < 1) { | |
197 throw("start must be greater than or equal to 1"); | |
198 } | |
199 $self->{'start'} = $start; | |
200 } | |
201 | |
202 return $self->{'start'}; | |
203 } | |
204 | |
205 | |
206 =head2 end | |
207 | |
208 Arg [1] : (optional) int $end - the new end position | |
209 Example : $end = $se_attrib->end(); | |
210 Description: Getter/Setter for the end position of the region replaced | |
211 by the alt_seq. | |
212 | |
213 Coordinates are inclusive and one-based, which means that | |
214 inserts are unusually represented by a start 1bp higher than | |
215 the end. | |
216 | |
217 E.g. start = 1, end = 1 is a replacement of the first base but | |
218 start = 1, end = 0 is an insert BEFORE the first base. | |
219 Returntype : int | |
220 Exceptions : throws if end <= 0 | |
221 Caller : Transcript, Translation | |
222 Status : Stable | |
223 | |
224 =cut | |
225 | |
226 sub end { | |
227 my $self = shift; | |
228 | |
229 if(@_) { | |
230 my $end = shift; | |
231 if(defined($end) && $end < 0) { | |
232 throw("end must be greater than or equal to 0"); | |
233 } | |
234 $self->{'end'} = $end; | |
235 } | |
236 | |
237 return $self->{'end'}; | |
238 } | |
239 | |
240 | |
241 =head2 alt_seq | |
242 | |
243 Arg [1] : (optional) string $alt_seq | |
244 Example : my $alt_seq = $se_attrib->alt_seq(); | |
245 Description: Getter/Setter for the replacement sequence used by this edit. | |
246 The sequence may either be a string of amino acids or | |
247 nucleotides depending on the context in which this edit is | |
248 used. | |
249 | |
250 In the case of a deletion the replacement sequence is an empty | |
251 string. | |
252 Returntype : string | |
253 Exceptions : none | |
254 Caller : Transcript, Translation | |
255 Status : Stable | |
256 | |
257 =cut | |
258 | |
259 sub alt_seq { | |
260 my $self = shift; | |
261 $self->{'alt_seq'} = shift || '' if(@_); | |
262 return $self->{'alt_seq'}; | |
263 } | |
264 | |
265 | |
266 =head2 length_diff | |
267 | |
268 Arg [1] : none | |
269 Example : my $diff = $sea->length_diff(); | |
270 Description: Returns the difference in length caused by applying this | |
271 edit to a sequence. This may be be negative (deletion), | |
272 positive (insertion) or 0 (replacement). | |
273 | |
274 If either start or end are not defined 0 is returned. | |
275 Returntype : int | |
276 Exceptions : none | |
277 Caller : general | |
278 Status : Stable | |
279 | |
280 =cut | |
281 | |
282 sub length_diff { | |
283 my $self = shift; | |
284 | |
285 return 0 if(!defined($self->{'end'}) || !defined($self->{'start'})); | |
286 | |
287 return length($self->{'alt_seq'}) - ($self->{'end'} - $self->{'start'} + 1); | |
288 } | |
289 | |
290 | |
291 | |
292 =head2 name | |
293 | |
294 Arg [1] : (optional) string $name | |
295 Example : my $name = $seqedit->name(); | |
296 Description: Getter/Setter for the name of this SeqEdit | |
297 Returntype : string | |
298 Exceptions : none | |
299 Caller : general | |
300 Status : Stable | |
301 | |
302 =cut | |
303 | |
304 sub name { | |
305 my $self = shift; | |
306 $self->{'name'} = shift if(@_); | |
307 return $self->{'name'}; | |
308 } | |
309 | |
310 | |
311 | |
312 | |
313 =head2 code | |
314 | |
315 Arg [1] : (optional) string $code | |
316 Example : my $code = $seqedit->code(); | |
317 Description: Getter/Setter for the code of this SeqEdit | |
318 Returntype : string | |
319 Exceptions : none | |
320 Caller : general | |
321 Status : Stable | |
322 | |
323 =cut | |
324 | |
325 sub code { | |
326 my $self = shift; | |
327 $self->{'code'} = shift if(@_); | |
328 return $self->{'code'}; | |
329 } | |
330 | |
331 | |
332 | |
333 =head2 description | |
334 | |
335 Arg [1] : (optional) string $desc | |
336 Example : my $desc = $seqedit->description(); | |
337 Description: Getter/Setter for the description of this SeqEdit | |
338 Returntype : string | |
339 Exceptions : none | |
340 Caller : general | |
341 Status : Stable | |
342 | |
343 =cut | |
344 | |
345 sub description { | |
346 my $self = shift; | |
347 $self->{'description'} = shift if(@_); | |
348 return $self->{'description'}; | |
349 } | |
350 | |
351 | |
352 | |
353 =head2 get_Attribute | |
354 | |
355 Arg [1] : none | |
356 Example : my $attrib = $seqedit->get_Attribute(); | |
357 $transcript->add_Attributes($attrib); | |
358 Description: Converts a SeqEdit object into an Attribute object. This | |
359 allows the SeqEdit to be stored as any other attribute in the | |
360 ensembl database. The start/end and alt_seq properties | |
361 should be set before calling this method. | |
362 Returntype : Bio::EnsEMBL::Attribute | |
363 Exceptions : warning if start/end or alt_seq properties are not defined | |
364 Caller : general | |
365 Status : Stable | |
366 | |
367 =cut | |
368 | |
369 sub get_Attribute { | |
370 my $self = shift; | |
371 | |
372 my $start = $self->start(); | |
373 my $end = $self->end(); | |
374 my $alt_seq = $self->alt_seq(); | |
375 | |
376 my $value; | |
377 | |
378 if(defined($start) && defined($end) && defined($alt_seq)) { | |
379 $value = join(' ', $start, $end, $alt_seq); | |
380 } else { | |
381 warning('Attribute value cannot be created unless start, end and alt_seq' . | |
382 'properties are defined'); | |
383 $value = ''; | |
384 } | |
385 | |
386 return Bio::EnsEMBL::Attribute->new(-CODE => $self->code(), | |
387 -VALUE => $value, | |
388 -NAME => $self->name(), | |
389 -DESCRIPTION => $self->description()); | |
390 } | |
391 | |
392 | |
393 =head2 apply_edit | |
394 | |
395 Arg [1] : reference to string $seqref | |
396 Example : $sequence = 'ACTGAATATTTAAGGCA'; | |
397 $seqedit->apply_edit(\$sequence); | |
398 print $sequence, "\n"; | |
399 Description: Applies this edit directly to a sequence which is | |
400 passed by reference. The coordinates of this SeqEdit | |
401 are assumed to be relative to the start of the sequence | |
402 argument. | |
403 If either the start or end of this SeqEdit are not defined | |
404 this function will not do anything to the passed sequence. | |
405 Returntype : reference to the same sequence that was passed in | |
406 Exceptions : none | |
407 Caller : Transcript, Translation | |
408 Status : Stable | |
409 | |
410 =cut | |
411 | |
412 sub apply_edit { | |
413 my $self = shift; | |
414 my $seqref = shift; | |
415 | |
416 if(ref($seqref) ne 'SCALAR') { | |
417 throw("Reference to scalar argument expected"); | |
418 } | |
419 | |
420 if(!defined($self->{'start'}) || !defined($self->{'end'})) { | |
421 return $seqref; | |
422 } | |
423 | |
424 my $len = $self->{'end'} - $self->{'start'} + 1; | |
425 substr($$seqref, $self->{'start'} - 1, $len) = $self->{'alt_seq'}; | |
426 | |
427 return $seqref; | |
428 } | |
429 | |
430 | |
431 1; |