0
|
1 # $Id: PrimarySeq.pm,v 1.73.2.1 2003/06/29 00:25:27 jason Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::PrimarySeq
|
|
4 #
|
|
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
|
|
6 #
|
|
7 # Copyright Ewan Birney
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::PrimarySeq - Bioperl lightweight Sequence Object
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # The Bio::SeqIO for file reading, Bio::DB::GenBank for
|
|
20 # database reading
|
|
21
|
|
22 use Bio::Seq;
|
|
23 use Bio::SeqIO;
|
|
24 use Bio::DB::GenBank;
|
|
25
|
|
26 #make from memory
|
|
27 $seqobj = Bio::PrimarySeq->new ( -seq => 'ATGGGGTGGGCGGTGGGTGGTTTG',
|
|
28 -id => 'GeneFragment-12',
|
|
29 -accession_number => 'X78121',
|
|
30 -alphabet => 'dna',
|
|
31 -is_circular => 1
|
|
32 );
|
|
33 print "Sequence ", $seqobj->id(), " with accession ",
|
|
34 $seqobj->accession_number, "\n";
|
|
35
|
|
36 # read from file
|
|
37 $inputstream = Bio::SeqIO->new(-file => "myseq.fa",-format => 'Fasta');
|
|
38 $seqobj = $inputstream->next_seq();
|
|
39 print "Sequence ", $seqobj->id(), " and desc ", $seqobj->desc, "\n";
|
|
40
|
|
41
|
|
42 # to get out parts of the sequence.
|
|
43
|
|
44 print "Sequence ", $seqobj->id(), " with accession ",
|
|
45 $seqobj->accession_number, " and desc ", $seqobj->desc, "\n";
|
|
46
|
|
47 $string = $seqobj->seq();
|
|
48 $string2 = $seqobj->subseq(1,40);
|
|
49
|
|
50
|
|
51 =head1 DESCRIPTION
|
|
52
|
|
53 PrimarySeq is a lightweight Sequence object, storing little more than
|
|
54 the sequence, its name, a computer useful unique name. It does not
|
|
55 contain sequence features or other information. To have a sequence
|
|
56 with sequence features you should use the Seq object which uses this
|
|
57 object - go perldoc Bio::Seq
|
|
58
|
|
59 Although newusers will use Bio::PrimarySeq alot, in general you will
|
|
60 be using it from the Bio::Seq object. For more information on Bio::Seq
|
|
61 go perldoc Bio::Seq. For interest you might like to known that
|
|
62 Bio::Seq has-a Bio::PrimarySeq and forwards most of the function calls
|
|
63 to do with sequence to it (the has-a relationship lets us get out of a
|
|
64 otherwise nasty cyclical reference in Perl which would leak memory).
|
|
65
|
|
66 Sequence objects are defined by the Bio::PrimarySeqI interface, and this
|
|
67 object is a pure Perl implementation of the interface (if that's
|
|
68 gibberish to you, don't worry. The take home message is that this
|
|
69 object is the bioperl default sequence object, but other people can
|
|
70 use their own objects as sequences if they so wish). If you are
|
|
71 interested in wrapping your own objects as compliant Bioperl sequence
|
|
72 objects, then you should read the Bio::PrimarySeqI documentation
|
|
73
|
|
74 The documenation of this object is a merge of the Bio::PrimarySeq and
|
|
75 Bio::PrimarySeqI documentation. This allows all the methods which you can
|
|
76 call on sequence objects here.
|
|
77
|
|
78 =head1 FEEDBACK
|
|
79
|
|
80 =head2 Mailing Lists
|
|
81
|
|
82 User feedback is an integral part of the evolution of this and other
|
|
83 Bioperl modules. Send your comments and suggestions preferably to one
|
|
84 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
85
|
|
86 bioperl-l@bioperl.org - General discussion
|
|
87 http://bio.perl.org/MailList.html - About the mailing lists
|
|
88
|
|
89 =head2 Reporting Bugs
|
|
90
|
|
91 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
92 the bugs and their resolution. Bug reports can be submitted via email
|
|
93 or the web:
|
|
94
|
|
95 bioperl-bugs@bio.perl.org
|
|
96 http://bugzilla.bioperl.org/
|
|
97
|
|
98 =head1 AUTHOR - Ewan Birney
|
|
99
|
|
100 Email birney@sanger.ac.uk
|
|
101
|
|
102 Describe contact details here
|
|
103
|
|
104 =head1 APPENDIX
|
|
105
|
|
106 The rest of the documentation details each of the object
|
|
107 methods. Internal methods are usually preceded with a _
|
|
108
|
|
109 =cut
|
|
110
|
|
111
|
|
112 # Let the code begin...
|
|
113
|
|
114
|
|
115 package Bio::PrimarySeq;
|
|
116 use vars qw(@ISA);
|
|
117 use strict;
|
|
118
|
|
119 use Bio::Root::Root;
|
|
120 use Bio::PrimarySeqI;
|
|
121 use Bio::IdentifiableI;
|
|
122 use Bio::DescribableI;
|
|
123
|
|
124 @ISA = qw(Bio::Root::Root Bio::PrimarySeqI
|
|
125 Bio::IdentifiableI Bio::DescribableI);
|
|
126
|
|
127 #
|
|
128 # setup the allowed values for alphabet()
|
|
129 #
|
|
130
|
|
131 my %valid_type = map {$_, 1} qw( dna rna protein );
|
|
132
|
|
133 =head2 new
|
|
134
|
|
135 Title : new
|
|
136 Usage : $seq = Bio::PrimarySeq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
|
|
137 -id => 'human_id',
|
|
138 -accession_number => 'AL000012',
|
|
139 );
|
|
140
|
|
141 Function: Returns a new primary seq object from
|
|
142 basic constructors, being a string for the sequence
|
|
143 and strings for id and accession_number.
|
|
144
|
|
145 Note that you can provide an empty sequence string. However, in
|
|
146 this case you MUST specify the type of sequence you wish to
|
|
147 initialize by the parameter -alphabet. See alphabet() for possible
|
|
148 values.
|
|
149 Returns : a new Bio::PrimarySeq object
|
|
150 Args : -seq => sequence string
|
|
151 -display_id => display id of the sequence (locus name)
|
|
152 -accession_number => accession number
|
|
153 -primary_id => primary id (Genbank id)
|
|
154 -namespace => the namespace for the accession
|
|
155 -authority => the authority for the namespace
|
|
156 -desc => description text
|
|
157 -alphabet => sequence type (alphabet) (dna|rna|protein)
|
|
158 -id => alias for display id
|
|
159 -is_circular => boolean field for whether or not sequence is circular
|
|
160
|
|
161 =cut
|
|
162
|
|
163
|
|
164 sub new {
|
|
165 my ($class, @args) = @_;
|
|
166 my $self = $class->SUPER::new(@args);
|
|
167
|
|
168 my($seq,$id,$acc,$pid,$ns,$auth,$v,$oid,
|
|
169 $desc,$alphabet,$given_id,$is_circular,$direct,$ref_to_seq,$len) =
|
|
170 $self->_rearrange([qw(SEQ
|
|
171 DISPLAY_ID
|
|
172 ACCESSION_NUMBER
|
|
173 PRIMARY_ID
|
|
174 NAMESPACE
|
|
175 AUTHORITY
|
|
176 VERSION
|
|
177 OBJECT_ID
|
|
178 DESC
|
|
179 ALPHABET
|
|
180 ID
|
|
181 IS_CIRCULAR
|
|
182 DIRECT
|
|
183 REF_TO_SEQ
|
|
184 LENGTH
|
|
185 )],
|
|
186 @args);
|
|
187 if( defined $id && defined $given_id ) {
|
|
188 if( $id ne $given_id ) {
|
|
189 $self->throw("Provided both id and display_id constructor ".
|
|
190 "functions. [$id] [$given_id]");
|
|
191 }
|
|
192 }
|
|
193 if( defined $given_id ) { $id = $given_id; }
|
|
194
|
|
195 # let's set the length before the seq -- if there is one, this length is
|
|
196 # going to be invalidated
|
|
197 defined $len && $self->length($len);
|
|
198
|
|
199 # if alphabet is provided we set it first, so that it won't be guessed
|
|
200 # when the sequence is set
|
|
201 $alphabet && $self->alphabet($alphabet);
|
|
202
|
|
203 # if there is an alphabet, and direct is passed in, assumme the alphabet
|
|
204 # and sequence is ok
|
|
205
|
|
206 if( $direct && $ref_to_seq) {
|
|
207 $self->{'seq'} = $$ref_to_seq;
|
|
208 if( ! $alphabet ) {
|
|
209 $self->_guess_alphabet();
|
|
210 } # else it has been set already above
|
|
211 } else {
|
|
212 # print STDERR "DEBUG: setting sequence to [$seq]\n";
|
|
213 # note: the sequence string may be empty
|
|
214 $self->seq($seq) if defined($seq);
|
|
215 }
|
|
216
|
|
217 $id && $self->display_id($id);
|
|
218 $acc && $self->accession_number($acc);
|
|
219 defined $pid && $self->primary_id($pid);
|
|
220 $desc && $self->desc($desc);
|
|
221 $is_circular && $self->is_circular($is_circular);
|
|
222 $ns && $self->namespace($ns);
|
|
223 $auth && $self->authority($auth);
|
|
224 defined($v) && $self->version($v);
|
|
225 defined($oid) && $self->object_id($oid);
|
|
226
|
|
227 return $self;
|
|
228 }
|
|
229
|
|
230 sub direct_seq_set {
|
|
231 my $obj = shift;
|
|
232 return $obj->{'seq'} = shift if @_;
|
|
233 return undef;
|
|
234 }
|
|
235
|
|
236
|
|
237 =head2 seq
|
|
238
|
|
239 Title : seq
|
|
240 Usage : $string = $obj->seq()
|
|
241 Function: Returns the sequence as a string of letters. The
|
|
242 case of the letters is left up to the implementer.
|
|
243 Suggested cases are upper case for proteins and lower case for
|
|
244 DNA sequence (IUPAC standard), but you should not rely on this
|
|
245 Returns : A scalar
|
|
246 Args : Optionally on set the new value (a string). An optional second
|
|
247 argument presets the alphabet (otherwise it will be guessed).
|
|
248 Both parameters may also be given in named paramater style
|
|
249 with -seq and -alphabet being the names.
|
|
250
|
|
251 =cut
|
|
252
|
|
253 sub seq {
|
|
254 my ($obj,@args) = @_;
|
|
255
|
|
256 if( scalar(@args) == 0 ) {
|
|
257 return $obj->{'seq'};
|
|
258 }
|
|
259
|
|
260 my ($value,$alphabet) = @args;
|
|
261
|
|
262
|
|
263 if(@args) {
|
|
264 if(defined($value) && (! $obj->validate_seq($value))) {
|
|
265 $obj->throw("Attempting to set the sequence to [$value] ".
|
|
266 "which does not look healthy");
|
|
267 }
|
|
268 # if a sequence was already set we make sure that we re-adjust the
|
|
269 # mol.type, otherwise we skip guessing if mol.type is already set
|
|
270 # note: if the new seq is empty or undef, we don't consider that a
|
|
271 # change (we wouldn't have anything to guess on anyway)
|
|
272 my $is_changed_seq =
|
|
273 exists($obj->{'seq'}) && (CORE::length($value || '') > 0);
|
|
274 $obj->{'seq'} = $value;
|
|
275 # new alphabet overridden by arguments?
|
|
276 if($alphabet) {
|
|
277 # yes, set it no matter what
|
|
278 $obj->alphabet($alphabet);
|
|
279 } elsif( # if we changed a previous sequence to a new one
|
|
280 $is_changed_seq ||
|
|
281 # or if there is no alphabet yet at all
|
|
282 (! defined($obj->alphabet()))) {
|
|
283 # we need to guess the (possibly new) alphabet
|
|
284 $obj->_guess_alphabet();
|
|
285 } # else (seq not changed and alphabet was defined) do nothing
|
|
286 # if the seq is changed, make sure we unset a possibly set length
|
|
287 $obj->length(undef) if $is_changed_seq;
|
|
288 }
|
|
289 return $obj->{'seq'};
|
|
290 }
|
|
291
|
|
292 =head2 validate_seq
|
|
293
|
|
294 Title : validate_seq
|
|
295 Usage : if(! $seq->validate_seq($seq_str) ) {
|
|
296 print "sequence $seq_str is not valid for an object of type ",
|
|
297 ref($seq), "\n";
|
|
298 }
|
|
299 Function: Validates a given sequence string. A validating sequence string
|
|
300 must be accepted by seq(). A string that does not validate will
|
|
301 lead to an exception if passed to seq().
|
|
302
|
|
303 The implementation provided here does not take alphabet() into
|
|
304 account. Allowed are all letters (A-Z) and '-','.', '*' and '?'.
|
|
305
|
|
306 Example :
|
|
307 Returns : 1 if the supplied sequence string is valid for the object, and
|
|
308 0 otherwise.
|
|
309 Args : The sequence string to be validated.
|
|
310
|
|
311
|
|
312 =cut
|
|
313
|
|
314 sub validate_seq {
|
|
315 my ($self,$seqstr) = @_;
|
|
316 if( ! defined $seqstr ){ $seqstr = $self->seq(); }
|
|
317 return 0 unless( defined $seqstr);
|
|
318 if((CORE::length($seqstr) > 0) && ($seqstr !~ /^([A-Za-z\-\.\*\?]+)$/)) {
|
|
319 $self->warn("seq doesn't validate, mismatch is " .
|
|
320 ($seqstr =~ /([^A-Za-z\-\.\*\?]+)/g));
|
|
321 return 0;
|
|
322 }
|
|
323 return 1;
|
|
324 }
|
|
325
|
|
326 =head2 subseq
|
|
327
|
|
328 Title : subseq
|
|
329 Usage : $substring = $obj->subseq(10,40);
|
|
330 Function: returns the subseq from start to end, where the first base
|
|
331 is 1 and the number is inclusive, ie 1-2 are the first two
|
|
332 bases of the sequence
|
|
333 Returns : a string
|
|
334 Args : integer for start position
|
|
335 integer for end position
|
|
336 OR
|
|
337 Bio::LocationI location for subseq (strand honored)
|
|
338
|
|
339 =cut
|
|
340
|
|
341 sub subseq {
|
|
342 my ($self,$start,$end,$replace) = @_;
|
|
343
|
|
344 if( ref($start) && $start->isa('Bio::LocationI') ) {
|
|
345 my $loc = $start;
|
|
346 $replace = $end; # do we really use this anywhere? scary. HL
|
|
347 my $seq = "";
|
|
348 foreach my $subloc ($loc->each_Location()) {
|
|
349 my $piece = $self->subseq($subloc->start(),
|
|
350 $subloc->end(), $replace);
|
|
351 if($subloc->strand() < 0) {
|
|
352 $piece = Bio::PrimarySeq->new('-seq' => $piece)->revcom()->seq();
|
|
353 }
|
|
354 $seq .= $piece;
|
|
355 }
|
|
356 return $seq;
|
|
357 } elsif( defined $start && defined $end ) {
|
|
358 if( $start > $end ){
|
|
359 $self->throw("in subseq, start [$start] has to be ".
|
|
360 "greater than end [$end]");
|
|
361 }
|
|
362 if( $start <= 0 || $end > $self->length ) {
|
|
363 $self->throw("You have to have start positive\n\tand length less ".
|
|
364 "than the total length of sequence [$start:$end] ".
|
|
365 "Total ".$self->length."");
|
|
366 }
|
|
367
|
|
368 # remove one from start, and then length is end-start
|
|
369 $start--;
|
|
370 if( defined $replace ) {
|
|
371 return substr( $self->seq(), $start, ($end-$start), $replace);
|
|
372 } else {
|
|
373 return substr( $self->seq(), $start, ($end-$start));
|
|
374 }
|
|
375 } else {
|
|
376 $self->warn("Incorrect parameters to subseq - must be two integers ".
|
|
377 "or a Bio::LocationI object not ($start,$end)");
|
|
378 }
|
|
379 }
|
|
380
|
|
381 =head2 length
|
|
382
|
|
383 Title : length
|
|
384 Usage : $len = $seq->length();
|
|
385 Function: Get the length of the sequence in number of symbols (bases
|
|
386 or amino acids).
|
|
387
|
|
388 You can also set this attribute, even to a number that does
|
|
389 not match the length of the sequence string. This is useful
|
|
390 if you don''t want to set the sequence too, or if you want
|
|
391 to free up memory by unsetting the sequence. In the latter
|
|
392 case you could do e.g.
|
|
393
|
|
394 $seq->length($seq->length);
|
|
395 $seq->seq(undef);
|
|
396
|
|
397 Note that if you set the sequence to a value other than
|
|
398 undef at any time, the length attribute will be
|
|
399 invalidated, and the length of the sequence string will be
|
|
400 reported again. Also, we won''t let you lie about the length.
|
|
401
|
|
402 Example :
|
|
403 Returns : integer representing the length of the sequence.
|
|
404 Args : Optionally, the value on set
|
|
405
|
|
406 =cut
|
|
407
|
|
408 sub length {
|
|
409 my $self = shift;
|
|
410 my $len = CORE::length($self->seq() || '');
|
|
411
|
|
412 if(@_) {
|
|
413 my $val = shift;
|
|
414 if(defined($val) && $len && ($len != $val)) {
|
|
415 $self->throw("You're trying to lie about the length: ".
|
|
416 "is $len but you say ".$val);
|
|
417 }
|
|
418 $self->{'_seq_length'} = $val;
|
|
419 } elsif(defined($self->{'_seq_length'})) {
|
|
420 return $self->{'_seq_length'};
|
|
421 }
|
|
422 return $len;
|
|
423 }
|
|
424
|
|
425 =head2 display_id
|
|
426
|
|
427 Title : display_id or display_name
|
|
428 Usage : $id_string = $obj->display_id();
|
|
429 Function: returns the display id, aka the common name of the Sequence object.
|
|
430
|
|
431 The semantics of this is that it is the most likely string to
|
|
432 be used as an identifier of the sequence, and likely to have
|
|
433 "human" readability. The id is equivalent to the ID field of
|
|
434 the GenBank/EMBL databanks and the id field of the
|
|
435 Swissprot/sptrembl database. In fasta format, the >(\S+) is
|
|
436 presumed to be the id, though some people overload the id to
|
|
437 embed other information. Bioperl does not use any embedded
|
|
438 information in the ID field, and people are encouraged to use
|
|
439 other mechanisms (accession field for example, or extending
|
|
440 the sequence object) to solve this.
|
|
441
|
|
442 With the new Bio::DescribeableI interface, display_name aliases
|
|
443 to this method.
|
|
444
|
|
445 Returns : A string
|
|
446 Args : None
|
|
447
|
|
448
|
|
449 =cut
|
|
450
|
|
451 sub display_id {
|
|
452 my ($obj,$value) = @_;
|
|
453 if( defined $value) {
|
|
454 $obj->{'display_id'} = $value;
|
|
455 }
|
|
456 return $obj->{'display_id'};
|
|
457
|
|
458 }
|
|
459
|
|
460 =head2 accession_number
|
|
461
|
|
462 Title : accession_number or object_id
|
|
463 Usage : $unique_key = $obj->accession_number;
|
|
464 Function: Returns the unique biological id for a sequence, commonly
|
|
465 called the accession_number. For sequences from established
|
|
466 databases, the implementors should try to use the correct
|
|
467 accession number. Notice that primary_id() provides the
|
|
468 unique id for the implemetation, allowing multiple objects
|
|
469 to have the same accession number in a particular implementation.
|
|
470
|
|
471 For sequences with no accession number, this method should
|
|
472 return "unknown".
|
|
473
|
|
474 [Note this method name is likely to change in 1.3]
|
|
475
|
|
476 With the new Bio::IdentifiableI interface, this is aliased
|
|
477 to object_id
|
|
478
|
|
479 Returns : A string
|
|
480 Args : A string (optional) for setting
|
|
481
|
|
482 =cut
|
|
483
|
|
484 sub accession_number {
|
|
485 my( $obj, $acc ) = @_;
|
|
486
|
|
487 if (defined $acc) {
|
|
488 $obj->{'accession_number'} = $acc;
|
|
489 } else {
|
|
490 $acc = $obj->{'accession_number'};
|
|
491 $acc = 'unknown' unless defined $acc;
|
|
492 }
|
|
493 return $acc;
|
|
494 }
|
|
495
|
|
496 =head2 primary_id
|
|
497
|
|
498 Title : primary_id
|
|
499 Usage : $unique_key = $obj->primary_id;
|
|
500 Function: Returns the unique id for this object in this
|
|
501 implementation. This allows implementations to manage their
|
|
502 own object ids in a way the implementaiton can control
|
|
503 clients can expect one id to map to one object.
|
|
504
|
|
505 For sequences with no natural primary id, this method
|
|
506 should return a stringified memory location.
|
|
507
|
|
508 Returns : A string
|
|
509 Args : A string (optional, for setting)
|
|
510
|
|
511 =cut
|
|
512
|
|
513 sub primary_id {
|
|
514 my ($obj,$value) = @_;
|
|
515 if( defined $value) {
|
|
516 $obj->{'primary_id'} = $value;
|
|
517 }
|
|
518 if( ! exists $obj->{'primary_id'} ) {
|
|
519 return "$obj";
|
|
520 }
|
|
521 return $obj->{'primary_id'};
|
|
522
|
|
523 }
|
|
524
|
|
525
|
|
526 =head2 alphabet
|
|
527
|
|
528 Title : alphabet
|
|
529 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
|
|
530 Function: Returns the type of sequence being one of
|
|
531 'dna', 'rna' or 'protein'. This is case sensitive.
|
|
532
|
|
533 This is not called <type> because this would cause
|
|
534 upgrade problems from the 0.5 and earlier Seq objects.
|
|
535
|
|
536 Returns : a string either 'dna','rna','protein'. NB - the object must
|
|
537 make a call of the type - if there is no type specified it
|
|
538 has to guess.
|
|
539 Args : none
|
|
540
|
|
541
|
|
542 =cut
|
|
543
|
|
544 sub alphabet {
|
|
545 my ($obj,$value) = @_;
|
|
546 if (defined $value) {
|
|
547 $value = lc $value;
|
|
548 unless ( $valid_type{$value} ) {
|
|
549 $obj->throw("Molecular type '$value' is not a valid type (".
|
|
550 join(',', map "'$_'", sort keys %valid_type) .
|
|
551 ") lowercase");
|
|
552 }
|
|
553 $obj->{'alphabet'} = $value;
|
|
554 }
|
|
555 return $obj->{'alphabet'};
|
|
556 }
|
|
557
|
|
558 =head2 desc
|
|
559
|
|
560 Title : desc or description
|
|
561 Usage : $obj->desc($newval)
|
|
562 Function: Get/set description of the sequence.
|
|
563
|
|
564 description is an alias for this for compliance with the
|
|
565 Bio::DescribeableI interface.
|
|
566
|
|
567 Example :
|
|
568 Returns : value of desc (a string)
|
|
569 Args : newvalue (a string or undef, optional)
|
|
570
|
|
571
|
|
572 =cut
|
|
573
|
|
574 sub desc{
|
|
575 my $self = shift;
|
|
576
|
|
577 return $self->{'desc'} = shift if @_;
|
|
578 return $self->{'desc'};
|
|
579 }
|
|
580
|
|
581 =head2 can_call_new
|
|
582
|
|
583 Title : can_call_new
|
|
584 Usage :
|
|
585 Function:
|
|
586 Example :
|
|
587 Returns : true
|
|
588 Args :
|
|
589
|
|
590
|
|
591 =cut
|
|
592
|
|
593 sub can_call_new {
|
|
594 my ($self) = @_;
|
|
595
|
|
596 return 1;
|
|
597
|
|
598 }
|
|
599
|
|
600 =head2 id
|
|
601
|
|
602 Title : id
|
|
603 Usage : $id = $seq->id()
|
|
604 Function: This is mapped on display_id
|
|
605 Example :
|
|
606 Returns :
|
|
607 Args :
|
|
608
|
|
609
|
|
610 =cut
|
|
611
|
|
612 sub id {
|
|
613 return shift->display_id(@_);
|
|
614 }
|
|
615
|
|
616 =head2 is_circular
|
|
617
|
|
618 Title : is_circular
|
|
619 Usage : if( $obj->is_circular) { /Do Something/ }
|
|
620 Function: Returns true if the molecule is circular
|
|
621 Returns : Boolean value
|
|
622 Args : none
|
|
623
|
|
624 =cut
|
|
625
|
|
626 sub is_circular{
|
|
627 my $self = shift;
|
|
628 return $self->{'is_circular'} = shift if @_;
|
|
629 return $self->{'is_circular'};
|
|
630 }
|
|
631
|
|
632 =head1 Methods for Bio::IdentifiableI compliance
|
|
633
|
|
634 =cut
|
|
635
|
|
636 =head2 object_id
|
|
637
|
|
638 Title : object_id
|
|
639 Usage : $string = $obj->object_id()
|
|
640 Function: a string which represents the stable primary identifier
|
|
641 in this namespace of this object. For DNA sequences this
|
|
642 is its accession_number, similarly for protein sequences
|
|
643
|
|
644 This is aliased to accession_number().
|
|
645 Returns : A scalar
|
|
646
|
|
647
|
|
648 =cut
|
|
649
|
|
650 sub object_id {
|
|
651 return shift->accession_number(@_);
|
|
652 }
|
|
653
|
|
654 =head2 version
|
|
655
|
|
656 Title : version
|
|
657 Usage : $version = $obj->version()
|
|
658 Function: a number which differentiates between versions of
|
|
659 the same object. Higher numbers are considered to be
|
|
660 later and more relevant, but a single object described
|
|
661 the same identifier should represent the same concept
|
|
662
|
|
663 Returns : A number
|
|
664
|
|
665 =cut
|
|
666
|
|
667 sub version{
|
|
668 my ($self,$value) = @_;
|
|
669 if( defined $value) {
|
|
670 $self->{'_version'} = $value;
|
|
671 }
|
|
672 return $self->{'_version'};
|
|
673 }
|
|
674
|
|
675
|
|
676 =head2 authority
|
|
677
|
|
678 Title : authority
|
|
679 Usage : $authority = $obj->authority()
|
|
680 Function: a string which represents the organisation which
|
|
681 granted the namespace, written as the DNS name for
|
|
682 organisation (eg, wormbase.org)
|
|
683
|
|
684 Returns : A scalar
|
|
685
|
|
686 =cut
|
|
687
|
|
688 sub authority {
|
|
689 my ($obj,$value) = @_;
|
|
690 if( defined $value) {
|
|
691 $obj->{'authority'} = $value;
|
|
692 }
|
|
693 return $obj->{'authority'};
|
|
694 }
|
|
695
|
|
696 =head2 namespace
|
|
697
|
|
698 Title : namespace
|
|
699 Usage : $string = $obj->namespace()
|
|
700 Function: A string representing the name space this identifier
|
|
701 is valid in, often the database name or the name
|
|
702 describing the collection
|
|
703
|
|
704 Returns : A scalar
|
|
705
|
|
706
|
|
707 =cut
|
|
708
|
|
709 sub namespace{
|
|
710 my ($self,$value) = @_;
|
|
711 if( defined $value) {
|
|
712 $self->{'namespace'} = $value;
|
|
713 }
|
|
714 return $self->{'namespace'} || "";
|
|
715 }
|
|
716
|
|
717 =head1 Methods for Bio::DescribableI compliance
|
|
718
|
|
719 This comprises of display_name and description.
|
|
720
|
|
721 =cut
|
|
722
|
|
723 =head2 display_name
|
|
724
|
|
725 Title : display_name
|
|
726 Usage : $string = $obj->display_name()
|
|
727 Function: A string which is what should be displayed to the user
|
|
728 the string should have no spaces (ideally, though a cautious
|
|
729 user of this interface would not assumme this) and should be
|
|
730 less than thirty characters (though again, double checking
|
|
731 this is a good idea)
|
|
732
|
|
733 This is aliased to display_id().
|
|
734 Returns : A scalar
|
|
735
|
|
736 =cut
|
|
737
|
|
738 sub display_name {
|
|
739 return shift->display_id(@_);
|
|
740 }
|
|
741
|
|
742 =head2 description
|
|
743
|
|
744 Title : description
|
|
745 Usage : $string = $obj->description()
|
|
746 Function: A text string suitable for displaying to the user a
|
|
747 description. This string is likely to have spaces, but
|
|
748 should not have any newlines or formatting - just plain
|
|
749 text. The string should not be greater than 255 characters
|
|
750 and clients can feel justified at truncating strings at 255
|
|
751 characters for the purposes of display
|
|
752
|
|
753 This is aliased to desc().
|
|
754 Returns : A scalar
|
|
755
|
|
756 =cut
|
|
757
|
|
758 sub description {
|
|
759 return shift->desc(@_);
|
|
760 }
|
|
761
|
|
762 =head1 Methods Inherited from Bio::PrimarySeqI
|
|
763
|
|
764 These methods are available on Bio::PrimarySeq, although they are
|
|
765 actually implemented on Bio::PrimarySeqI
|
|
766
|
|
767 =head2 revcom
|
|
768
|
|
769 Title : revcom
|
|
770 Usage : $rev = $seq->revcom()
|
|
771 Function: Produces a new Bio::SeqI implementing object which
|
|
772 is the reversed complement of the sequence. For protein
|
|
773 sequences this throws an exception of
|
|
774 "Sequence is a protein. Cannot revcom"
|
|
775
|
|
776 The id is the same id as the orginal sequence, and the
|
|
777 accession number is also indentical. If someone wants to
|
|
778 track that this sequence has be reversed, it needs to
|
|
779 define its own extensions
|
|
780
|
|
781 To do an inplace edit of an object you can go:
|
|
782
|
|
783 $seqobj = $seqobj->revcom();
|
|
784
|
|
785 This of course, causes Perl to handle the garbage
|
|
786 collection of the old object, but it is roughly speaking as
|
|
787 efficient as an inplace edit.
|
|
788
|
|
789 Returns : A new (fresh) Bio::SeqI object
|
|
790 Args : none
|
|
791
|
|
792 =cut
|
|
793
|
|
794 =head2 trunc
|
|
795
|
|
796 Title : trunc
|
|
797 Usage : $subseq = $myseq->trunc(10,100);
|
|
798 Function: Provides a truncation of a sequence,
|
|
799
|
|
800 Example :
|
|
801 Returns : a fresh Bio::SeqI implementing object
|
|
802 Args :
|
|
803
|
|
804
|
|
805 =cut
|
|
806
|
|
807 =head1 Internal methods
|
|
808
|
|
809 These are internal methods to PrimarySeq
|
|
810
|
|
811 =cut
|
|
812
|
|
813 =head2 _guess_alphabet
|
|
814
|
|
815 Title : _guess_alphabet
|
|
816 Usage :
|
|
817 Function:
|
|
818 Example :
|
|
819 Returns :
|
|
820 Args :
|
|
821
|
|
822
|
|
823 =cut
|
|
824
|
|
825 sub _guess_alphabet {
|
|
826 my ($self) = @_;
|
|
827 my ($str,$str2,$total,$atgc,$u,$type);
|
|
828
|
|
829 $str = $self->seq();
|
|
830 $str =~ s/\-\.\?//g;
|
|
831
|
|
832 $total = CORE::length($str);
|
|
833 if( $total == 0 ) {
|
|
834 $self->throw("Got a sequence with no letters in - ".
|
|
835 "cannot guess alphabet [$str]");
|
|
836 }
|
|
837
|
|
838 $u = ($str =~ tr/Uu//);
|
|
839 $atgc = ($str =~ tr/ATGCNatgcn//);
|
|
840
|
|
841 if( ($atgc / $total) > 0.85 ) {
|
|
842 $type = 'dna';
|
|
843 } elsif( (($atgc + $u) / $total) > 0.85 ) {
|
|
844 $type = 'rna';
|
|
845 } else {
|
|
846 $type = 'protein';
|
|
847 }
|
|
848
|
|
849 $self->alphabet($type);
|
|
850 return $type;
|
|
851 }
|
|
852
|
|
853 ############################################################################
|
|
854 # aliases due to name changes or to compensate for our lack of consistency #
|
|
855 ############################################################################
|
|
856
|
|
857 sub accession {
|
|
858 my $self = shift;
|
|
859
|
|
860 $self->warn(ref($self)."::accession is deprecated, ".
|
|
861 "use accession_number() instead");
|
|
862 return $self->accession_number(@_);
|
|
863 }
|
|
864
|
|
865 1;
|
|
866
|