Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/SeqEdit.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 =head1 LICENSE | |
| 2 | |
| 3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
| 4 Genome Research Limited. All rights reserved. | |
| 5 | |
| 6 This software is distributed under a modified Apache license. | |
| 7 For license details, please see | |
| 8 | |
| 9 http://www.ensembl.org/info/about/code_licence.html | |
| 10 | |
| 11 =head1 CONTACT | |
| 12 | |
| 13 Please email comments or questions to the public Ensembl | |
| 14 developers list at <dev@ensembl.org>. | |
| 15 | |
| 16 Questions may also be sent to the Ensembl help desk at | |
| 17 <helpdesk@ensembl.org>. | |
| 18 | |
| 19 =cut | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::EnsEMBL:SeqEdit - A class representing a post transcriptional edit to a | |
| 24 sequence. | |
| 25 | |
| 26 =head1 SYNOPSIS | |
| 27 | |
| 28 use Bio::EnsEMBL::SeqEdit; | |
| 29 use Bio::EnsEMBL::Attribute; | |
| 30 | |
| 31 # construct a SeqEdit object using a Transcript attribute | |
| 32 | |
| 33 ($attribute) = @{ $translation->get_all_Attributes('_rna_edit') }; | |
| 34 | |
| 35 $seq_edit = Bio::EnsEMBL::SeqEdit( -ATTRIB => $attribute ); | |
| 36 | |
| 37 print $seq_edit->start(), "\n"; | |
| 38 print $seq_edit->end(), "\n"; | |
| 39 print $seq_edit->alt_seq(), "\n"; | |
| 40 | |
| 41 # apply the edit to some sequence | |
| 42 $seq = $transcript->spliced_seq(); | |
| 43 print "Before modifiction: $seq\n"; | |
| 44 | |
| 45 $seq_edit->apply_edit( \$seq ); | |
| 46 print "After modification: $seq\n"; | |
| 47 | |
| 48 # construct an attribute object from a SeqEdit and add it to a | |
| 49 # translation | |
| 50 | |
| 51 $seq_edit = Bio::EnsEMBL::SeqEdit->new( | |
| 52 -CODE => '_selenocysteine', | |
| 53 -NAME => 'Selenocysteine', | |
| 54 -DESC => 'Selenocysteine', | |
| 55 -START => 10, | |
| 56 -END => 10, | |
| 57 -ALT_SEQ => 'U' | |
| 58 ); | |
| 59 | |
| 60 $attribute = $seq_edit->get_Attribute(); | |
| 61 $translation->add_Attributes($attribute); | |
| 62 | |
| 63 =head1 DESCRIPTION | |
| 64 | |
| 65 This is a class used to represent post transcriptional | |
| 66 modifications to sequences. SeqEdit objects are stored as ordinary | |
| 67 Bio::EnsEMBL::Attributes with a parseable value and can be used to | |
| 68 represent RNA editing, selenocysteines etc. | |
| 69 | |
| 70 Also see B<Bio::EnsEMBL::Attribute> | |
| 71 | |
| 72 =head1 METHODS | |
| 73 | |
| 74 =cut | |
| 75 | |
| 76 package Bio::EnsEMBL::SeqEdit; | |
| 77 | |
| 78 use strict; | |
| 79 use warnings; | |
| 80 | |
| 81 use Bio::EnsEMBL::Attribute; | |
| 82 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
| 83 use Bio::EnsEMBL::Utils::Exception qw(throw); | |
| 84 | |
| 85 | |
| 86 =head2 new | |
| 87 | |
| 88 Arg [-ATTRIB] : Bio::EnsEMBL::Attribute | |
| 89 Constructs a new SeqEdit from an Attribute. | |
| 90 Can only be provided if no other constructor arguments | |
| 91 are provided. | |
| 92 Arg [-START] : The start position of the edit. | |
| 93 Arg [-END] : The end position of the edit. | |
| 94 Arg [-ALT_SEQ] : The alternate sequence | |
| 95 Arg [-CODE] : A code for this SeqEdit | |
| 96 Arg [-NAME] : A name for this SeqEdit | |
| 97 Arg [-DESCRIPTION] : Arg passed to superclass constructor | |
| 98 Example : my $sea = Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $attrib); | |
| 99 my $sea = Bio::EnsEMBL::SeqEdit->new | |
| 100 (-START => 10, | |
| 101 -END => 12, | |
| 102 -ALT_SEQ => 'ACG', | |
| 103 -CODE => '_rna_edit', | |
| 104 -NAME => 'RNA Edit', | |
| 105 -DESCRIPTION => 'RNA edit'); | |
| 106 Description: Constructs a SeqEdit representing a single edit to a | |
| 107 sequence, such as an rna modification or a selenocysteine. | |
| 108 Returntype : Bio::EnsEMBL::SeqEdit | |
| 109 Exceptions : throws if attribute set and other args aswell | |
| 110 throws if start and end not set correctly of attribure not set | |
| 111 Caller : general | |
| 112 Status : Stable | |
| 113 | |
| 114 =cut | |
| 115 | |
| 116 sub new { | |
| 117 my $class = shift; | |
| 118 | |
| 119 my ($attrib, $start, $end, $alt_seq, $name, $desc, $code) = | |
| 120 rearrange([qw(ATTRIB START END ALT_SEQ NAME DESCRIPTION CODE)], @_); | |
| 121 | |
| 122 my $self; | |
| 123 | |
| 124 if($attrib) { | |
| 125 if(defined($start) || defined($end) || defined($alt_seq) || | |
| 126 defined($name) || defined($desc) || defined($code)) { | |
| 127 throw("Cannot specify -ATTRIB argument with additional arguments."); | |
| 128 } | |
| 129 | |
| 130 if(!ref($attrib) || !$attrib->isa('Bio::EnsEMBL::Attribute')) { | |
| 131 throw('Bio::EnsEMBL::Attribute argument expected.'); | |
| 132 } | |
| 133 | |
| 134 ($start, $end, $alt_seq) = split(/\s+/, $attrib->value()); | |
| 135 | |
| 136 if($start !~ /\d+/ || $end !~ /\d+/) { | |
| 137 throw('Could not parse value of attribute: '.$attrib->value()); | |
| 138 } | |
| 139 | |
| 140 $name = $attrib->name(); | |
| 141 $code = $attrib->code(); | |
| 142 $desc = $attrib->description(); | |
| 143 | |
| 144 | |
| 145 } | |
| 146 | |
| 147 if(defined($end) && defined($start) && $start > $end+1) { | |
| 148 throw("start must be less than or equal to end + 1"); | |
| 149 } | |
| 150 | |
| 151 if(defined($start) && $start < 1) { | |
| 152 throw("start must be greater than or equal to 1"); | |
| 153 } | |
| 154 | |
| 155 if(defined($end) && $end < 0) { | |
| 156 throw("end must be greater than or equal to 0"); | |
| 157 } | |
| 158 | |
| 159 $alt_seq ||= ''; | |
| 160 | |
| 161 return bless {'start' => $start, | |
| 162 'end' => $end, | |
| 163 'alt_seq' => $alt_seq, | |
| 164 'description' => $desc, | |
| 165 'name' => $name, | |
| 166 'code' => $code}, $class; | |
| 167 } | |
| 168 | |
| 169 | |
| 170 | |
| 171 =head2 start | |
| 172 | |
| 173 Arg [1] : (optional) int $start - the new start position | |
| 174 Example : $start = $se_attrib->start(); | |
| 175 Description: Getter/Setter for the start position of the region replaced | |
| 176 by the alt_seq. | |
| 177 | |
| 178 Coordinates are inclusive and one-based, which means that | |
| 179 inserts are unusually represented by a start 1bp higher than | |
| 180 the end. | |
| 181 | |
| 182 E.g. start = 1, end = 1 is a replacement of the first base but | |
| 183 start = 1, end = 0 is an insert BEFORE the first base. | |
| 184 Returntype : int | |
| 185 Exceptions : none | |
| 186 Caller : Transcript, Translation | |
| 187 Status : Stable | |
| 188 | |
| 189 =cut | |
| 190 | |
| 191 sub start { | |
| 192 my $self = shift; | |
| 193 | |
| 194 if(@_) { | |
| 195 my $start = shift; | |
| 196 if(defined($start) && $start < 1) { | |
| 197 throw("start must be greater than or equal to 1"); | |
| 198 } | |
| 199 $self->{'start'} = $start; | |
| 200 } | |
| 201 | |
| 202 return $self->{'start'}; | |
| 203 } | |
| 204 | |
| 205 | |
| 206 =head2 end | |
| 207 | |
| 208 Arg [1] : (optional) int $end - the new end position | |
| 209 Example : $end = $se_attrib->end(); | |
| 210 Description: Getter/Setter for the end position of the region replaced | |
| 211 by the alt_seq. | |
| 212 | |
| 213 Coordinates are inclusive and one-based, which means that | |
| 214 inserts are unusually represented by a start 1bp higher than | |
| 215 the end. | |
| 216 | |
| 217 E.g. start = 1, end = 1 is a replacement of the first base but | |
| 218 start = 1, end = 0 is an insert BEFORE the first base. | |
| 219 Returntype : int | |
| 220 Exceptions : throws if end <= 0 | |
| 221 Caller : Transcript, Translation | |
| 222 Status : Stable | |
| 223 | |
| 224 =cut | |
| 225 | |
| 226 sub end { | |
| 227 my $self = shift; | |
| 228 | |
| 229 if(@_) { | |
| 230 my $end = shift; | |
| 231 if(defined($end) && $end < 0) { | |
| 232 throw("end must be greater than or equal to 0"); | |
| 233 } | |
| 234 $self->{'end'} = $end; | |
| 235 } | |
| 236 | |
| 237 return $self->{'end'}; | |
| 238 } | |
| 239 | |
| 240 | |
| 241 =head2 alt_seq | |
| 242 | |
| 243 Arg [1] : (optional) string $alt_seq | |
| 244 Example : my $alt_seq = $se_attrib->alt_seq(); | |
| 245 Description: Getter/Setter for the replacement sequence used by this edit. | |
| 246 The sequence may either be a string of amino acids or | |
| 247 nucleotides depending on the context in which this edit is | |
| 248 used. | |
| 249 | |
| 250 In the case of a deletion the replacement sequence is an empty | |
| 251 string. | |
| 252 Returntype : string | |
| 253 Exceptions : none | |
| 254 Caller : Transcript, Translation | |
| 255 Status : Stable | |
| 256 | |
| 257 =cut | |
| 258 | |
| 259 sub alt_seq { | |
| 260 my $self = shift; | |
| 261 $self->{'alt_seq'} = shift || '' if(@_); | |
| 262 return $self->{'alt_seq'}; | |
| 263 } | |
| 264 | |
| 265 | |
| 266 =head2 length_diff | |
| 267 | |
| 268 Arg [1] : none | |
| 269 Example : my $diff = $sea->length_diff(); | |
| 270 Description: Returns the difference in length caused by applying this | |
| 271 edit to a sequence. This may be be negative (deletion), | |
| 272 positive (insertion) or 0 (replacement). | |
| 273 | |
| 274 If either start or end are not defined 0 is returned. | |
| 275 Returntype : int | |
| 276 Exceptions : none | |
| 277 Caller : general | |
| 278 Status : Stable | |
| 279 | |
| 280 =cut | |
| 281 | |
| 282 sub length_diff { | |
| 283 my $self = shift; | |
| 284 | |
| 285 return 0 if(!defined($self->{'end'}) || !defined($self->{'start'})); | |
| 286 | |
| 287 return length($self->{'alt_seq'}) - ($self->{'end'} - $self->{'start'} + 1); | |
| 288 } | |
| 289 | |
| 290 | |
| 291 | |
| 292 =head2 name | |
| 293 | |
| 294 Arg [1] : (optional) string $name | |
| 295 Example : my $name = $seqedit->name(); | |
| 296 Description: Getter/Setter for the name of this SeqEdit | |
| 297 Returntype : string | |
| 298 Exceptions : none | |
| 299 Caller : general | |
| 300 Status : Stable | |
| 301 | |
| 302 =cut | |
| 303 | |
| 304 sub name { | |
| 305 my $self = shift; | |
| 306 $self->{'name'} = shift if(@_); | |
| 307 return $self->{'name'}; | |
| 308 } | |
| 309 | |
| 310 | |
| 311 | |
| 312 | |
| 313 =head2 code | |
| 314 | |
| 315 Arg [1] : (optional) string $code | |
| 316 Example : my $code = $seqedit->code(); | |
| 317 Description: Getter/Setter for the code of this SeqEdit | |
| 318 Returntype : string | |
| 319 Exceptions : none | |
| 320 Caller : general | |
| 321 Status : Stable | |
| 322 | |
| 323 =cut | |
| 324 | |
| 325 sub code { | |
| 326 my $self = shift; | |
| 327 $self->{'code'} = shift if(@_); | |
| 328 return $self->{'code'}; | |
| 329 } | |
| 330 | |
| 331 | |
| 332 | |
| 333 =head2 description | |
| 334 | |
| 335 Arg [1] : (optional) string $desc | |
| 336 Example : my $desc = $seqedit->description(); | |
| 337 Description: Getter/Setter for the description of this SeqEdit | |
| 338 Returntype : string | |
| 339 Exceptions : none | |
| 340 Caller : general | |
| 341 Status : Stable | |
| 342 | |
| 343 =cut | |
| 344 | |
| 345 sub description { | |
| 346 my $self = shift; | |
| 347 $self->{'description'} = shift if(@_); | |
| 348 return $self->{'description'}; | |
| 349 } | |
| 350 | |
| 351 | |
| 352 | |
| 353 =head2 get_Attribute | |
| 354 | |
| 355 Arg [1] : none | |
| 356 Example : my $attrib = $seqedit->get_Attribute(); | |
| 357 $transcript->add_Attributes($attrib); | |
| 358 Description: Converts a SeqEdit object into an Attribute object. This | |
| 359 allows the SeqEdit to be stored as any other attribute in the | |
| 360 ensembl database. The start/end and alt_seq properties | |
| 361 should be set before calling this method. | |
| 362 Returntype : Bio::EnsEMBL::Attribute | |
| 363 Exceptions : warning if start/end or alt_seq properties are not defined | |
| 364 Caller : general | |
| 365 Status : Stable | |
| 366 | |
| 367 =cut | |
| 368 | |
| 369 sub get_Attribute { | |
| 370 my $self = shift; | |
| 371 | |
| 372 my $start = $self->start(); | |
| 373 my $end = $self->end(); | |
| 374 my $alt_seq = $self->alt_seq(); | |
| 375 | |
| 376 my $value; | |
| 377 | |
| 378 if(defined($start) && defined($end) && defined($alt_seq)) { | |
| 379 $value = join(' ', $start, $end, $alt_seq); | |
| 380 } else { | |
| 381 warning('Attribute value cannot be created unless start, end and alt_seq' . | |
| 382 'properties are defined'); | |
| 383 $value = ''; | |
| 384 } | |
| 385 | |
| 386 return Bio::EnsEMBL::Attribute->new(-CODE => $self->code(), | |
| 387 -VALUE => $value, | |
| 388 -NAME => $self->name(), | |
| 389 -DESCRIPTION => $self->description()); | |
| 390 } | |
| 391 | |
| 392 | |
| 393 =head2 apply_edit | |
| 394 | |
| 395 Arg [1] : reference to string $seqref | |
| 396 Example : $sequence = 'ACTGAATATTTAAGGCA'; | |
| 397 $seqedit->apply_edit(\$sequence); | |
| 398 print $sequence, "\n"; | |
| 399 Description: Applies this edit directly to a sequence which is | |
| 400 passed by reference. The coordinates of this SeqEdit | |
| 401 are assumed to be relative to the start of the sequence | |
| 402 argument. | |
| 403 If either the start or end of this SeqEdit are not defined | |
| 404 this function will not do anything to the passed sequence. | |
| 405 Returntype : reference to the same sequence that was passed in | |
| 406 Exceptions : none | |
| 407 Caller : Transcript, Translation | |
| 408 Status : Stable | |
| 409 | |
| 410 =cut | |
| 411 | |
| 412 sub apply_edit { | |
| 413 my $self = shift; | |
| 414 my $seqref = shift; | |
| 415 | |
| 416 if(ref($seqref) ne 'SCALAR') { | |
| 417 throw("Reference to scalar argument expected"); | |
| 418 } | |
| 419 | |
| 420 if(!defined($self->{'start'}) || !defined($self->{'end'})) { | |
| 421 return $seqref; | |
| 422 } | |
| 423 | |
| 424 my $len = $self->{'end'} - $self->{'start'} + 1; | |
| 425 substr($$seqref, $self->{'start'} - 1, $len) = $self->{'alt_seq'}; | |
| 426 | |
| 427 return $seqref; | |
| 428 } | |
| 429 | |
| 430 | |
| 431 1; |
