Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Seq/EncodedSeq.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: EncodedSeq.pm,v 1.5.2.1 2003/04/28 12:11:57 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::Seq::EncodedSeq | |
4 # | |
5 # Cared for by Aaron Mackey | |
6 # | |
7 # Copyright Aaron Mackey | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Seq::EncodedSeq - subtype of L<Bio::LocatableSeq|Bio::LocatableSeq> to store DNA that encodes a protein | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 $obj = new Bio::Seq::EncodedSeq(-seq => $dna, | |
20 -encoding => "CCCCCCCIIIIICCCCC", | |
21 -start => 1, | |
22 -strand => 1, | |
23 -length => 17); | |
24 | |
25 # splice out (and possibly revcomp) the coding sequence | |
26 $cds = obj->cds; | |
27 | |
28 # obtain the protein translation of the sequence | |
29 $prot = $obj->translate; | |
30 | |
31 # other access/inspection routines as with Bio::LocatableSeq and | |
32 # Bio::SeqI; note that coordinates are relative only to the DNA | |
33 # sequence, not it's implicit encoded protein sequence. | |
34 | |
35 =head1 DESCRIPTION | |
36 | |
37 Bio::Seq::EncodedSeq is a L<Bio::LocatableSeq|Bio::LocatableSeq> | |
38 object that holds a DNA sequence as well as information about the | |
39 coding potential of that DNA sequence. It is meant to be useful in an | |
40 alignment context, where the DNA may contain frameshifts, gaps and/or | |
41 introns, or in describing the transcript of a gene. An EncodedSeq | |
42 provides the ability to access the "spliced" coding sequence, meaning | |
43 that all introns and gaps are removed, and any frameshifts are | |
44 adjusted to provide a "clean" CDS. | |
45 | |
46 In order to make simultaneous use of either the DNA or the implicit | |
47 encoded protein sequence coordinates, please see | |
48 L<Bio::Coordinate::EncodingPair>. | |
49 | |
50 =head1 ENCODING | |
51 | |
52 We use the term "encoding" here to refer to the series of symbols that | |
53 we use to identify which residues of a DNA sequence are protein-coding | |
54 (i.e. part of a codon), intronic, part of a 5' or 3', frameshift | |
55 "mutations", etc. From this information, a Bio::Seq::EncodedSeq is | |
56 able to "figure out" its translational CDS. There are two sets of | |
57 coding characters, one termed "implicit" and one termed "explicit". | |
58 | |
59 The "implict" encoding is a bit simpler than the "explicit" encoding: | |
60 'C' is used for any nucleotide that's part of a codon, 'U' for any | |
61 UTR, etc. The full list is shown below: | |
62 | |
63 Code Meaning | |
64 ---- ------- | |
65 C coding | |
66 I intronic | |
67 U untranslated | |
68 G gapped (for use in alignments) | |
69 F a "forward", +1 frameshift | |
70 B a "backward", -1 frameshift | |
71 | |
72 The "explicit" encoding is just an expansion of the "implicit" | |
73 encoding, to denote phase: | |
74 | |
75 Code Meaning | |
76 ---- ------- | |
77 C coding, 1st codon position | |
78 D coding, 2nd codon position | |
79 E coding, 3rd codon position | |
80 | |
81 I intronic, phase 0 (relative to intron begin) | |
82 J intronic, phase 1 | |
83 K intronic, phase 2 | |
84 | |
85 U untranslated 3'UTR | |
86 V untranslated 5'UTR | |
87 | |
88 G gapped (for use in alignments) | |
89 F a "forward", +1 frameshift | |
90 B a "backward", -1 frameshift | |
91 | |
92 Note that the explicit coding is meant to provide easy access to | |
93 position/phase specific nucleotides: | |
94 | |
95 $obj = new Bio::Seq::EncodedSeq (-seq => "ACAATCAGACTACG...", | |
96 -encoding => "CCCCCCIII..." | |
97 ); | |
98 | |
99 # fetch arrays of nucleotides at each codon position: | |
100 my @pos1 = $obj->dnaseq(encoding => 'C', explicit => 1); | |
101 my @pos2 = $obj->dnaseq(encoding => 'D'); | |
102 my @pos3 = $obj->dnaseq(encoding => 'E'); | |
103 | |
104 # fetch arrays of "3-1" codon dinucleotides, useful for genomic | |
105 # signature analyses without compounding influences of codon bias: | |
106 my @pairs = $obj->dnaseq(encoding => 'EC'); | |
107 | |
108 =head1 FEEDBACK | |
109 | |
110 =head2 Mailing Lists | |
111 | |
112 User feedback is an integral part of the evolution of this | |
113 and other Bioperl modules. Send your comments and suggestions preferably | |
114 to one of the Bioperl mailing lists. | |
115 Your participation is much appreciated. | |
116 | |
117 bioperl-l@bioperl.org - General discussion | |
118 http://www.bioperl.org/MailList.html - About the mailing lists | |
119 | |
120 =head2 Reporting Bugs | |
121 | |
122 Report bugs to the Bioperl bug tracking system to help us keep track | |
123 the bugs and their resolution. | |
124 Bug reports can be submitted via email or the web: | |
125 | |
126 bioperl-bugs@bio.perl.org | |
127 http://bugzilla.bioperl.org/ | |
128 | |
129 =head1 AUTHOR - Aaron Mackey | |
130 | |
131 Email amackey@virginia.edu | |
132 | |
133 =head1 APPENDIX | |
134 | |
135 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
136 | |
137 =cut | |
138 | |
139 | |
140 # Let the code begin... | |
141 | |
142 package Bio::Seq::EncodedSeq; | |
143 | |
144 use strict; | |
145 use vars qw(@ISA); | |
146 | |
147 use Bio::LocatableSeq; | |
148 | |
149 @ISA = qw(Bio::LocatableSeq); | |
150 | |
151 =head2 new | |
152 | |
153 Title : new | |
154 Usage : $obj = Bio::Seq::EncodedSeq->new(-seq => "AGTACGTGTCATG", | |
155 -encoding => "CCCCCCFCCCCCC", | |
156 -id => "myseq", | |
157 -start => 1, | |
158 -end => 13, | |
159 -strand => 1 | |
160 ); | |
161 Function: creates a new Bio::Seq::EncodedSeq object from a supplied DNA | |
162 sequence | |
163 Returns : a new Bio::Seq::EncodedSeq object | |
164 | |
165 Args : seq - primary nucleotide sequence used to encode the | |
166 protein; note that any positions involved in a | |
167 gap ('G') or backward frameshift ('B') should | |
168 have one or more gap characters; if the encoding | |
169 specifies G or B, but no (or not enough) gap | |
170 characters exist, *they'll be added*; similary, | |
171 if there are gap characters without a | |
172 corresponding G or B encoding, G's will be | |
173 inserted into the encoding. This allows some | |
174 flexibility in specifying your sequence and | |
175 coding without having to calculate a lot of the | |
176 encoding for yourself. | |
177 | |
178 encoding - a string of characters (see Encoding Table) | |
179 describing backwards frameshifts implied by the | |
180 encoding but not present in the sequence will be | |
181 added (as '-'s) to the sequence. If not | |
182 supplied, it will be assumed that all positions | |
183 are coding (C). Encoding may include either | |
184 implicit phase encoding characters (i.e. "CCC") | |
185 and/or explicit encoding characters (i.e. "CDE"). | |
186 Additionally, prefixed numbers may be used to | |
187 denote repetition (i.e. "27C3I28C"). | |
188 | |
189 Alternatively, encoding may be a hashref | |
190 datastructure, with encoding characters as keys | |
191 and Bio::LocationI objects (or arrayrefs of | |
192 Bio::LocationI objects) as values, e.g.: | |
193 | |
194 { C => [ Bio::Location::Simple->new(1,9), | |
195 Bio::Location::Simple->new(11,13) ], | |
196 F => Bio::Location::Simple->new(10,10), | |
197 } # same as "CCCCCCCCCFCCC" | |
198 | |
199 Note that if the location ranges overlap, the | |
200 behavior of the encoding will be undefined | |
201 (well, it will be defined, but only according to | |
202 the order in which the hash keys are read, which | |
203 is basically undefined ... just don't do that). | |
204 | |
205 id, start, end, strand - as with Bio::LocatableSeq; note | |
206 that the coordinates are relative to the | |
207 encoding DNA sequence, not the implicit protein | |
208 sequence. If strand is reversed, then the | |
209 encoding is assumed to be relative to the | |
210 reverse strand as well. | |
211 | |
212 =cut | |
213 | |
214 #' | |
215 | |
216 sub new { | |
217 | |
218 my ($self, @args) = @_; | |
219 $self = $self->SUPER::new(@args, -alphabet => 'dna'); | |
220 my ($enc) = $self->_rearrange([qw(ENCODING)], @args); | |
221 # set the real encoding: | |
222 if ($enc) { | |
223 $self->encoding($enc); | |
224 } else { | |
225 $self->_recheck_encoding; | |
226 } | |
227 return $self; | |
228 } | |
229 | |
230 =head2 encoding | |
231 | |
232 Title : encoding | |
233 Usage : $obj->encoding("CCCCCC"); | |
234 $obj->encoding( -encoding => { I => $location } ); | |
235 $enc = $obj->encoding(-explicit => 1); | |
236 $enc = $obj->encoding("CCCCCC", -explicit => 1); | |
237 $enc = $obj->encoding(-location => $location, | |
238 -explicit => 1, | |
239 -absolute => 1 ); | |
240 Function: get/set the objects encoding, either globally or by location(s). | |
241 Returns : the (possibly new) encoding string. | |
242 Args : encoding - see the encoding argument to the new() function. | |
243 | |
244 explicit - whether or not to return explicit phase | |
245 information in the coding (i.e. "CCC" becomes | |
246 "CDE", "III" becomes "IJK", etc); defaults to 0. | |
247 | |
248 location - optional; location to get/set the encoding. | |
249 Defaults to the entire sequence. | |
250 | |
251 absolute - whether or not the locational elements (either | |
252 in the encoding hashref or the location | |
253 argument) are relative to the absolute start/end | |
254 of the Bio::LocatableSeq, or to the internal, | |
255 relative coordinate system (beginning at 1); | |
256 defaults to 0 (i.e. relative) | |
257 | |
258 =cut | |
259 | |
260 sub encoding { | |
261 | |
262 my ($self, @args) = @_; | |
263 my ($enc, $loc, $exp, $abs) = $self->_rearrange([qw(ENCODING LOCATION EXPLICIT ABSOLUTE)], @args); | |
264 | |
265 if (!$enc) { | |
266 # do nothing; _recheck_encoding will fix for us, if necessary | |
267 } elsif (ref $enc eq 'HASH') { | |
268 $self->throw( -class => 'Bio::Root::NotImplemented', | |
269 -text => "Hashref functionality not yet implemented;\nplease email me if you really need this."); | |
270 #TODO: finish all this | |
271 while (my ($char, $locs) = each %$enc) { | |
272 if (ref $locs eq 'ARRAY') { | |
273 } elsif (UNIVERSAL::isa($locs, "Bio::LocationI")) { | |
274 } else { | |
275 $self->throw("Only a scalar or a ref to a hash; not a ref to a @{{lc ref $enc}}"); | |
276 } | |
277 } | |
278 } elsif (! ref $enc) { | |
279 $enc = uc $enc; | |
280 $exp = 1 if (!defined $exp && $enc =~ m/[DEJKV]/o); | |
281 | |
282 if ($enc =~ m/\d/o) { # numerically "enhanced" encoding | |
283 my $numenc = $enc; | |
284 $enc = ''; | |
285 while ($numenc =~ m/\G(\d*)([CDEIJKUVGFB])/g) { | |
286 my ($num, $char) = ($1, $2); | |
287 $num = 1 unless $num; | |
288 $enc .= $char x $num; | |
289 } | |
290 } | |
291 | |
292 if (defined $exp && $exp == 0 && $enc =~ m/([^CIUGFB])/) { | |
293 $self->throw("Unrecognized character '$1' in implicit encoding"); | |
294 } elsif ($enc =~ m/[^CDEIJKUVGFB]/) { | |
295 $self->throw("Unrecognized character '$1' in explicit encoding"); | |
296 } | |
297 | |
298 if ($loc) { # a global location, over which to apply the specified encoding. | |
299 | |
300 # balk if too many non-gap characters present in encoding: | |
301 my ($ct) = $enc =~ tr/GB/GB/; | |
302 $ct = length($enc) - $ct; | |
303 $self->throw("Location length doesn't match number of coding chars in encoding!") | |
304 if ($loc->location_type eq 'EXACT' && $loc->length != $ct); | |
305 | |
306 my $start = $loc->start; | |
307 my $end = $loc->end; | |
308 | |
309 # strip any encoding that hangs off the ends of the sequence: | |
310 if ($start < $self->start) { | |
311 my $diff = $self->start - $start; | |
312 $start = $self->start; | |
313 $enc = substr($enc, $diff); | |
314 } | |
315 if ($end > $self->end) { | |
316 my $diff = $end - $self->end; | |
317 $end = $self->end; | |
318 $enc = substr($enc, -$diff); | |
319 } | |
320 | |
321 my $currenc = $self->{_encoding}; | |
322 my $currseq = $self->seq; | |
323 | |
324 my ($spanstart, $spanend) = ($self->column_from_residue_number($start), | |
325 $self->column_from_residue_number($end) ); | |
326 | |
327 if ($currseq) { | |
328 # strip any gaps in sequence spanned by this location: | |
329 my ($before, $in, $after) = $currseq =~ m/(.{@{[ $spanstart - ($loc->location_type eq 'IN-BETWEEN' ? 0 : 1) ]}}) | |
330 (.{@{[ $spanend - $spanstart + ($loc->location_type eq 'IN-BETWEEN' ? -1 : 1) ]}}) | |
331 (.*) | |
332 /x; | |
333 $in =~ s/[\.\-]+//g; | |
334 $currseq = $before . $in . $after; | |
335 # change seq without changing the alphabet | |
336 $self->seq($currseq,$self->alphabet()); | |
337 } | |
338 | |
339 $currenc = reverse $currenc if $self->strand < 0; | |
340 substr($currenc, $spanstart, $spanend - $spanstart + ($loc->location_type eq 'IN-BETWEEN' ? -1 : 1), | |
341 $self->strand >= 0 ? $enc : reverse $enc); | |
342 $currenc = reverse $currenc if $self->strand < 0; | |
343 | |
344 $self->{_encoding} = $currenc; | |
345 $self->_recheck_encoding; | |
346 | |
347 $currenc = $self->{_encoding}; | |
348 $currenc = reverse $currenc if $self->strand < 0; | |
349 $enc = substr($currenc, $spanstart, length $enc); | |
350 $enc = reverse $enc if $self->strand < 0; | |
351 | |
352 return $exp ? $enc: $self->_convert2implicit($enc); | |
353 | |
354 } else { | |
355 # presume a global redefinition; strip any current gap | |
356 # characters in the sequence so they don't corrupt the | |
357 # encoding | |
358 my $dna = $self->seq; | |
359 $dna =~ s/[\.\-]//g; | |
360 $self->seq($dna, 'dna'); | |
361 $self->{_encoding} = $enc; | |
362 } | |
363 } else { | |
364 $self->throw("Only a scalar or a ref to a hash; not a ref to a @{{lc ref $enc}}"); | |
365 } | |
366 | |
367 $self->_recheck_encoding(); | |
368 | |
369 return $exp ? $self->{_encoding} : $self->_convert2implicit($self->{_encoding}); | |
370 } | |
371 | |
372 sub _convert2implicit { | |
373 | |
374 my ($self, $enc) = @_; | |
375 | |
376 $enc =~ s/[DE]/C/g; | |
377 $enc =~ s/[JK]/I/g; | |
378 $enc =~ s/V/U/g; | |
379 | |
380 return $enc; | |
381 } | |
382 | |
383 sub _recheck_encoding { | |
384 | |
385 my $self = shift; | |
386 | |
387 my @enc = split //, ($self->{_encoding} || ''); | |
388 | |
389 my @nt = split(//, $self->SUPER::seq); | |
390 @nt = reverse @nt if $self->strand && $self->strand < 0; | |
391 | |
392 # make sure an encoding exists! | |
393 @enc = ('C') x scalar grep { !/[\.\-]/o } @nt | |
394 unless @enc; | |
395 | |
396 # check for gaps to be truly present in the sequence | |
397 # and vice versa | |
398 my $i; | |
399 for ($i = 0 ; $i < @nt && $i < @enc ; $i++) { | |
400 if ($nt[$i] =~ /[\.\-]/o && $enc[$i] !~ m/[GB]/o) { | |
401 splice(@enc, $i, 0, 'G'); | |
402 } elsif ($nt[$i] !~ /[\.\-]/o && $enc[$i] =~ m/[GB]/o) { | |
403 splice(@nt, $i, 0, '-'); | |
404 } | |
405 } | |
406 if ($i < @enc) { | |
407 # extra encoding; presumably all gaps? | |
408 for ( ; $i < @enc ; $i++) { | |
409 if ($enc[$i] =~ m/[GB]/o) { | |
410 push @nt, '-'; | |
411 } else { | |
412 $self->throw("Extraneous encoding info: " . join('', @enc[$i..$#enc])); | |
413 } | |
414 } | |
415 } elsif ($i < @nt) { | |
416 for ( ; $i < @nt ; $i++) { | |
417 if ($nt[$i] =~ m/[\.\-]/o) { | |
418 push @enc, 'G'; | |
419 } else { | |
420 push @enc, 'C'; | |
421 } | |
422 } | |
423 } | |
424 | |
425 my @cde_array = qw(C D E); | |
426 my @ijk_array = qw(I J K); | |
427 # convert any leftover implicit coding into explicit coding | |
428 my ($Cct, $Ict, $Uct, $Vct, $Vwarned) = (0, 0, 0, 0); | |
429 for ($i = 0 ; $i < @enc ; $i++) { | |
430 if ($enc[$i] =~ m/[CDE]/o) { | |
431 my $temp_index = $Cct %3; | |
432 $enc[$i] = $cde_array[$temp_index]; | |
433 $Cct++; $Ict = 0; $Uct = 1; | |
434 $self->warn("3' untranslated encoding (V) seen prior to other coding symbols") | |
435 if ($Vct && !$Vwarned++); | |
436 } elsif ($enc[$i] =~ m/[IJK]/o) { | |
437 $enc[$i] = $ijk_array[$Ict % 3]; | |
438 $Ict++; $Uct = 1; | |
439 $self->warn("3' untranslated encoding (V) seen before other coding symbols") | |
440 if ($Vct && !$Vwarned++); | |
441 } elsif ($enc[$i] =~ m/[UV]/o) { | |
442 if ($Uct == 1) { | |
443 $enc[$i] = 'V'; | |
444 $Vct = 1; | |
445 } | |
446 } elsif ($enc[$i] eq 'B') { | |
447 $Cct++; $Ict++ | |
448 } elsif ($enc[$i] eq 'G') { | |
449 # gap; leave alone | |
450 } | |
451 } | |
452 | |
453 @nt = reverse @nt if $self->strand && $self->strand < 0; | |
454 | |
455 $self->{'seq'} = join('', @nt); | |
456 # $self->seq(join('', @nt), 'dna'); | |
457 $self->{_encoding} = join '', @enc; | |
458 } | |
459 | |
460 =head2 cds | |
461 | |
462 Title : cds | |
463 Usage : $cds = $obj->cds(-nogaps => 1); | |
464 Function: obtain the "spliced" DNA sequence, by removing any | |
465 nucleotides that participate in an UTR, forward frameshift | |
466 or intron, and replacing any unknown nucleotide implied by | |
467 a backward frameshift or gap with N's. | |
468 Returns : a Bio::Seq::EncodedSeq object, with an encoding consisting only | |
469 of "CCCC..". | |
470 Args : nogaps - strip any gap characters (resulting from 'G' or 'B' | |
471 encodings), rather than replacing them with N's. | |
472 | |
473 =cut | |
474 | |
475 sub cds { | |
476 | |
477 my ($self, @args) = @_; | |
478 | |
479 my ($nogaps, $loc) = $self->_rearrange([qw(NOGAPS LOCATION)], @args); | |
480 $nogaps = 0 unless defined $nogaps; | |
481 | |
482 my @nt = split //, $self->strand < 0 ? $self->revcom->seq : $self->seq; | |
483 my @enc = split //, $self->_convert2implicit($self->{_encoding}); | |
484 | |
485 my ($start, $end) = (0, scalar @nt); | |
486 | |
487 if ($loc) { | |
488 $start = $loc->start; | |
489 $start++ if $loc->location_type eq 'IN-BETWEEN'; | |
490 $start = $self->column_from_residue_number($start); | |
491 $start--; | |
492 | |
493 $end = $loc->end; | |
494 $end = $self->column_from_residue_number($end); | |
495 | |
496 ($start, $end) = ($end, $start) if $self->strand < 0; | |
497 $start--; | |
498 } | |
499 | |
500 for (my $i = $start ; $i < $end ; $i++) { | |
501 if ($enc[$i] eq 'I' || $enc[$i] eq 'U' || $enc[$i] eq 'F') { | |
502 # remove introns, untranslated and forward frameshift nucleotides | |
503 $nt[$i] = undef; | |
504 } elsif ($enc[$i] eq 'G' || $enc[$i] eq 'B') { | |
505 # replace gaps and backward frameshifts with N's, unless asked not to. | |
506 $nt[$i] = $nogaps ? undef : 'N'; | |
507 } | |
508 } | |
509 | |
510 return ($self->can_call_new ? ref($self) : __PACKAGE__)->new | |
511 (-seq => join('', grep { defined } @nt[$start..--$end]), | |
512 -start => $self->start, | |
513 -end => $self->end, | |
514 -strand => 1, -alphabet => 'dna'); | |
515 } | |
516 | |
517 =head2 translate | |
518 | |
519 Title : translate | |
520 Usage : $prot = $obj->translate(@args); | |
521 Function: obtain the protein sequence encoded by the underlying DNA | |
522 sequence; same as $obj->cds()->translate(@args). | |
523 Returns : a Bio::PrimarySeq object. | |
524 Args : same as the translate() function of Bio::PrimarySeqI | |
525 | |
526 =cut | |
527 | |
528 sub translate { shift->cds(-nogaps => 1, @_)->SUPER::translate(@_) }; | |
529 | |
530 =head2 protseq | |
531 | |
532 Title : seq | |
533 Usage : $protseq = $obj->protseq(); | |
534 Function: obtain the raw protein sequence encoded by the underlying | |
535 DNA sequence; This is the same as calling | |
536 $obj->translate()->seq(); | |
537 Returns : a string of single-letter amino acid codes | |
538 Args : same as the seq() function of Bio::PrimarySeq; note that this | |
539 function may not be used to set the protein sequence; see | |
540 the dnaseq() function for that. | |
541 | |
542 =cut | |
543 | |
544 sub protseq { shift->cds(-nogaps => 1, @_)->SUPER::translate(@_)->seq }; | |
545 | |
546 =head2 dnaseq | |
547 | |
548 Title : dnaseq | |
549 Usage : $dnaseq = $obj->dnaseq(); | |
550 $obj->dnaseq("ACGTGTCGT", "CCCCCCCCC"); | |
551 $obj->dnaseq(-seq => "ATG", | |
552 -encoding => "CCC", | |
553 -location => $loc ); | |
554 @introns = $obj->$dnaseq(-encoding => 'I') | |
555 Function: get/set the underlying DNA sequence; will overwrite any | |
556 current DNA and/or encoding information present. | |
557 Returns : a string of single-letter nucleotide codes, including any | |
558 gaps implied by the encoding. | |
559 Args : seq - the DNA sequence to be used as a replacement | |
560 encoding - the encoding of the DNA sequence (see the new() | |
561 constructor); defaults to all 'C' if setting a | |
562 new DNA sequence. If no new DNA sequence is | |
563 being provided, then the encoding is used as a | |
564 "filter" for which to return fragments of | |
565 non-overlapping DNA that match the encoding. | |
566 location - optional, the location of the DNA sequence to | |
567 get/set; defaults to the entire sequence. | |
568 | |
569 =cut | |
570 | |
571 sub dnaseq { | |
572 | |
573 my ($self, @args) = @_; | |
574 my ($seq, $enc, $loc) = $self->_rearrange([qw(DNASEQ ENCODING LOCATION)], @args); | |
575 | |
576 $self | |
577 | |
578 } | |
579 | |
580 # need to overload this so that we truncate both the seq and the encoding! | |
581 sub trunc { | |
582 | |
583 my ($self, $start, $end) = @_; | |
584 my $new = $self->SUPER::trunc($start, $end); | |
585 $start--; | |
586 my $enc = $self->{_encoding}; | |
587 $enc = reverse $enc if $self->strand < 0; | |
588 $enc = substr($enc, $start, $end - $start); | |
589 $enc = reverse $enc if $self->strand < 0; | |
590 $new->encoding($enc); | |
591 return $new; | |
592 } |