Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/PrimarySeq.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: PrimarySeq.pm,v 1.73.2.1 2003/06/29 00:25:27 jason Exp $ | |
2 # | |
3 # bioperl module for Bio::PrimarySeq | |
4 # | |
5 # Cared for by Ewan Birney <birney@sanger.ac.uk> | |
6 # | |
7 # Copyright Ewan Birney | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::PrimarySeq - Bioperl lightweight Sequence Object | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # The Bio::SeqIO for file reading, Bio::DB::GenBank for | |
20 # database reading | |
21 | |
22 use Bio::Seq; | |
23 use Bio::SeqIO; | |
24 use Bio::DB::GenBank; | |
25 | |
26 #make from memory | |
27 $seqobj = Bio::PrimarySeq->new ( -seq => 'ATGGGGTGGGCGGTGGGTGGTTTG', | |
28 -id => 'GeneFragment-12', | |
29 -accession_number => 'X78121', | |
30 -alphabet => 'dna', | |
31 -is_circular => 1 | |
32 ); | |
33 print "Sequence ", $seqobj->id(), " with accession ", | |
34 $seqobj->accession_number, "\n"; | |
35 | |
36 # read from file | |
37 $inputstream = Bio::SeqIO->new(-file => "myseq.fa",-format => 'Fasta'); | |
38 $seqobj = $inputstream->next_seq(); | |
39 print "Sequence ", $seqobj->id(), " and desc ", $seqobj->desc, "\n"; | |
40 | |
41 | |
42 # to get out parts of the sequence. | |
43 | |
44 print "Sequence ", $seqobj->id(), " with accession ", | |
45 $seqobj->accession_number, " and desc ", $seqobj->desc, "\n"; | |
46 | |
47 $string = $seqobj->seq(); | |
48 $string2 = $seqobj->subseq(1,40); | |
49 | |
50 | |
51 =head1 DESCRIPTION | |
52 | |
53 PrimarySeq is a lightweight Sequence object, storing little more than | |
54 the sequence, its name, a computer useful unique name. It does not | |
55 contain sequence features or other information. To have a sequence | |
56 with sequence features you should use the Seq object which uses this | |
57 object - go perldoc Bio::Seq | |
58 | |
59 Although newusers will use Bio::PrimarySeq alot, in general you will | |
60 be using it from the Bio::Seq object. For more information on Bio::Seq | |
61 go perldoc Bio::Seq. For interest you might like to known that | |
62 Bio::Seq has-a Bio::PrimarySeq and forwards most of the function calls | |
63 to do with sequence to it (the has-a relationship lets us get out of a | |
64 otherwise nasty cyclical reference in Perl which would leak memory). | |
65 | |
66 Sequence objects are defined by the Bio::PrimarySeqI interface, and this | |
67 object is a pure Perl implementation of the interface (if that's | |
68 gibberish to you, don't worry. The take home message is that this | |
69 object is the bioperl default sequence object, but other people can | |
70 use their own objects as sequences if they so wish). If you are | |
71 interested in wrapping your own objects as compliant Bioperl sequence | |
72 objects, then you should read the Bio::PrimarySeqI documentation | |
73 | |
74 The documenation of this object is a merge of the Bio::PrimarySeq and | |
75 Bio::PrimarySeqI documentation. This allows all the methods which you can | |
76 call on sequence objects here. | |
77 | |
78 =head1 FEEDBACK | |
79 | |
80 =head2 Mailing Lists | |
81 | |
82 User feedback is an integral part of the evolution of this and other | |
83 Bioperl modules. Send your comments and suggestions preferably to one | |
84 of the Bioperl mailing lists. Your participation is much appreciated. | |
85 | |
86 bioperl-l@bioperl.org - General discussion | |
87 http://bio.perl.org/MailList.html - About the mailing lists | |
88 | |
89 =head2 Reporting Bugs | |
90 | |
91 Report bugs to the Bioperl bug tracking system to help us keep track | |
92 the bugs and their resolution. Bug reports can be submitted via email | |
93 or the web: | |
94 | |
95 bioperl-bugs@bio.perl.org | |
96 http://bugzilla.bioperl.org/ | |
97 | |
98 =head1 AUTHOR - Ewan Birney | |
99 | |
100 Email birney@sanger.ac.uk | |
101 | |
102 Describe contact details here | |
103 | |
104 =head1 APPENDIX | |
105 | |
106 The rest of the documentation details each of the object | |
107 methods. Internal methods are usually preceded with a _ | |
108 | |
109 =cut | |
110 | |
111 | |
112 # Let the code begin... | |
113 | |
114 | |
115 package Bio::PrimarySeq; | |
116 use vars qw(@ISA); | |
117 use strict; | |
118 | |
119 use Bio::Root::Root; | |
120 use Bio::PrimarySeqI; | |
121 use Bio::IdentifiableI; | |
122 use Bio::DescribableI; | |
123 | |
124 @ISA = qw(Bio::Root::Root Bio::PrimarySeqI | |
125 Bio::IdentifiableI Bio::DescribableI); | |
126 | |
127 # | |
128 # setup the allowed values for alphabet() | |
129 # | |
130 | |
131 my %valid_type = map {$_, 1} qw( dna rna protein ); | |
132 | |
133 =head2 new | |
134 | |
135 Title : new | |
136 Usage : $seq = Bio::PrimarySeq->new( -seq => 'ATGGGGGTGGTGGTACCCT', | |
137 -id => 'human_id', | |
138 -accession_number => 'AL000012', | |
139 ); | |
140 | |
141 Function: Returns a new primary seq object from | |
142 basic constructors, being a string for the sequence | |
143 and strings for id and accession_number. | |
144 | |
145 Note that you can provide an empty sequence string. However, in | |
146 this case you MUST specify the type of sequence you wish to | |
147 initialize by the parameter -alphabet. See alphabet() for possible | |
148 values. | |
149 Returns : a new Bio::PrimarySeq object | |
150 Args : -seq => sequence string | |
151 -display_id => display id of the sequence (locus name) | |
152 -accession_number => accession number | |
153 -primary_id => primary id (Genbank id) | |
154 -namespace => the namespace for the accession | |
155 -authority => the authority for the namespace | |
156 -desc => description text | |
157 -alphabet => sequence type (alphabet) (dna|rna|protein) | |
158 -id => alias for display id | |
159 -is_circular => boolean field for whether or not sequence is circular | |
160 | |
161 =cut | |
162 | |
163 | |
164 sub new { | |
165 my ($class, @args) = @_; | |
166 my $self = $class->SUPER::new(@args); | |
167 | |
168 my($seq,$id,$acc,$pid,$ns,$auth,$v,$oid, | |
169 $desc,$alphabet,$given_id,$is_circular,$direct,$ref_to_seq,$len) = | |
170 $self->_rearrange([qw(SEQ | |
171 DISPLAY_ID | |
172 ACCESSION_NUMBER | |
173 PRIMARY_ID | |
174 NAMESPACE | |
175 AUTHORITY | |
176 VERSION | |
177 OBJECT_ID | |
178 DESC | |
179 ALPHABET | |
180 ID | |
181 IS_CIRCULAR | |
182 DIRECT | |
183 REF_TO_SEQ | |
184 LENGTH | |
185 )], | |
186 @args); | |
187 if( defined $id && defined $given_id ) { | |
188 if( $id ne $given_id ) { | |
189 $self->throw("Provided both id and display_id constructor ". | |
190 "functions. [$id] [$given_id]"); | |
191 } | |
192 } | |
193 if( defined $given_id ) { $id = $given_id; } | |
194 | |
195 # let's set the length before the seq -- if there is one, this length is | |
196 # going to be invalidated | |
197 defined $len && $self->length($len); | |
198 | |
199 # if alphabet is provided we set it first, so that it won't be guessed | |
200 # when the sequence is set | |
201 $alphabet && $self->alphabet($alphabet); | |
202 | |
203 # if there is an alphabet, and direct is passed in, assumme the alphabet | |
204 # and sequence is ok | |
205 | |
206 if( $direct && $ref_to_seq) { | |
207 $self->{'seq'} = $$ref_to_seq; | |
208 if( ! $alphabet ) { | |
209 $self->_guess_alphabet(); | |
210 } # else it has been set already above | |
211 } else { | |
212 # print STDERR "DEBUG: setting sequence to [$seq]\n"; | |
213 # note: the sequence string may be empty | |
214 $self->seq($seq) if defined($seq); | |
215 } | |
216 | |
217 $id && $self->display_id($id); | |
218 $acc && $self->accession_number($acc); | |
219 defined $pid && $self->primary_id($pid); | |
220 $desc && $self->desc($desc); | |
221 $is_circular && $self->is_circular($is_circular); | |
222 $ns && $self->namespace($ns); | |
223 $auth && $self->authority($auth); | |
224 defined($v) && $self->version($v); | |
225 defined($oid) && $self->object_id($oid); | |
226 | |
227 return $self; | |
228 } | |
229 | |
230 sub direct_seq_set { | |
231 my $obj = shift; | |
232 return $obj->{'seq'} = shift if @_; | |
233 return undef; | |
234 } | |
235 | |
236 | |
237 =head2 seq | |
238 | |
239 Title : seq | |
240 Usage : $string = $obj->seq() | |
241 Function: Returns the sequence as a string of letters. The | |
242 case of the letters is left up to the implementer. | |
243 Suggested cases are upper case for proteins and lower case for | |
244 DNA sequence (IUPAC standard), but you should not rely on this | |
245 Returns : A scalar | |
246 Args : Optionally on set the new value (a string). An optional second | |
247 argument presets the alphabet (otherwise it will be guessed). | |
248 Both parameters may also be given in named paramater style | |
249 with -seq and -alphabet being the names. | |
250 | |
251 =cut | |
252 | |
253 sub seq { | |
254 my ($obj,@args) = @_; | |
255 | |
256 if( scalar(@args) == 0 ) { | |
257 return $obj->{'seq'}; | |
258 } | |
259 | |
260 my ($value,$alphabet) = @args; | |
261 | |
262 | |
263 if(@args) { | |
264 if(defined($value) && (! $obj->validate_seq($value))) { | |
265 $obj->throw("Attempting to set the sequence to [$value] ". | |
266 "which does not look healthy"); | |
267 } | |
268 # if a sequence was already set we make sure that we re-adjust the | |
269 # mol.type, otherwise we skip guessing if mol.type is already set | |
270 # note: if the new seq is empty or undef, we don't consider that a | |
271 # change (we wouldn't have anything to guess on anyway) | |
272 my $is_changed_seq = | |
273 exists($obj->{'seq'}) && (CORE::length($value || '') > 0); | |
274 $obj->{'seq'} = $value; | |
275 # new alphabet overridden by arguments? | |
276 if($alphabet) { | |
277 # yes, set it no matter what | |
278 $obj->alphabet($alphabet); | |
279 } elsif( # if we changed a previous sequence to a new one | |
280 $is_changed_seq || | |
281 # or if there is no alphabet yet at all | |
282 (! defined($obj->alphabet()))) { | |
283 # we need to guess the (possibly new) alphabet | |
284 $obj->_guess_alphabet(); | |
285 } # else (seq not changed and alphabet was defined) do nothing | |
286 # if the seq is changed, make sure we unset a possibly set length | |
287 $obj->length(undef) if $is_changed_seq; | |
288 } | |
289 return $obj->{'seq'}; | |
290 } | |
291 | |
292 =head2 validate_seq | |
293 | |
294 Title : validate_seq | |
295 Usage : if(! $seq->validate_seq($seq_str) ) { | |
296 print "sequence $seq_str is not valid for an object of type ", | |
297 ref($seq), "\n"; | |
298 } | |
299 Function: Validates a given sequence string. A validating sequence string | |
300 must be accepted by seq(). A string that does not validate will | |
301 lead to an exception if passed to seq(). | |
302 | |
303 The implementation provided here does not take alphabet() into | |
304 account. Allowed are all letters (A-Z) and '-','.', '*' and '?'. | |
305 | |
306 Example : | |
307 Returns : 1 if the supplied sequence string is valid for the object, and | |
308 0 otherwise. | |
309 Args : The sequence string to be validated. | |
310 | |
311 | |
312 =cut | |
313 | |
314 sub validate_seq { | |
315 my ($self,$seqstr) = @_; | |
316 if( ! defined $seqstr ){ $seqstr = $self->seq(); } | |
317 return 0 unless( defined $seqstr); | |
318 if((CORE::length($seqstr) > 0) && ($seqstr !~ /^([A-Za-z\-\.\*\?]+)$/)) { | |
319 $self->warn("seq doesn't validate, mismatch is " . | |
320 ($seqstr =~ /([^A-Za-z\-\.\*\?]+)/g)); | |
321 return 0; | |
322 } | |
323 return 1; | |
324 } | |
325 | |
326 =head2 subseq | |
327 | |
328 Title : subseq | |
329 Usage : $substring = $obj->subseq(10,40); | |
330 Function: returns the subseq from start to end, where the first base | |
331 is 1 and the number is inclusive, ie 1-2 are the first two | |
332 bases of the sequence | |
333 Returns : a string | |
334 Args : integer for start position | |
335 integer for end position | |
336 OR | |
337 Bio::LocationI location for subseq (strand honored) | |
338 | |
339 =cut | |
340 | |
341 sub subseq { | |
342 my ($self,$start,$end,$replace) = @_; | |
343 | |
344 if( ref($start) && $start->isa('Bio::LocationI') ) { | |
345 my $loc = $start; | |
346 $replace = $end; # do we really use this anywhere? scary. HL | |
347 my $seq = ""; | |
348 foreach my $subloc ($loc->each_Location()) { | |
349 my $piece = $self->subseq($subloc->start(), | |
350 $subloc->end(), $replace); | |
351 if($subloc->strand() < 0) { | |
352 $piece = Bio::PrimarySeq->new('-seq' => $piece)->revcom()->seq(); | |
353 } | |
354 $seq .= $piece; | |
355 } | |
356 return $seq; | |
357 } elsif( defined $start && defined $end ) { | |
358 if( $start > $end ){ | |
359 $self->throw("in subseq, start [$start] has to be ". | |
360 "greater than end [$end]"); | |
361 } | |
362 if( $start <= 0 || $end > $self->length ) { | |
363 $self->throw("You have to have start positive\n\tand length less ". | |
364 "than the total length of sequence [$start:$end] ". | |
365 "Total ".$self->length.""); | |
366 } | |
367 | |
368 # remove one from start, and then length is end-start | |
369 $start--; | |
370 if( defined $replace ) { | |
371 return substr( $self->seq(), $start, ($end-$start), $replace); | |
372 } else { | |
373 return substr( $self->seq(), $start, ($end-$start)); | |
374 } | |
375 } else { | |
376 $self->warn("Incorrect parameters to subseq - must be two integers ". | |
377 "or a Bio::LocationI object not ($start,$end)"); | |
378 } | |
379 } | |
380 | |
381 =head2 length | |
382 | |
383 Title : length | |
384 Usage : $len = $seq->length(); | |
385 Function: Get the length of the sequence in number of symbols (bases | |
386 or amino acids). | |
387 | |
388 You can also set this attribute, even to a number that does | |
389 not match the length of the sequence string. This is useful | |
390 if you don''t want to set the sequence too, or if you want | |
391 to free up memory by unsetting the sequence. In the latter | |
392 case you could do e.g. | |
393 | |
394 $seq->length($seq->length); | |
395 $seq->seq(undef); | |
396 | |
397 Note that if you set the sequence to a value other than | |
398 undef at any time, the length attribute will be | |
399 invalidated, and the length of the sequence string will be | |
400 reported again. Also, we won''t let you lie about the length. | |
401 | |
402 Example : | |
403 Returns : integer representing the length of the sequence. | |
404 Args : Optionally, the value on set | |
405 | |
406 =cut | |
407 | |
408 sub length { | |
409 my $self = shift; | |
410 my $len = CORE::length($self->seq() || ''); | |
411 | |
412 if(@_) { | |
413 my $val = shift; | |
414 if(defined($val) && $len && ($len != $val)) { | |
415 $self->throw("You're trying to lie about the length: ". | |
416 "is $len but you say ".$val); | |
417 } | |
418 $self->{'_seq_length'} = $val; | |
419 } elsif(defined($self->{'_seq_length'})) { | |
420 return $self->{'_seq_length'}; | |
421 } | |
422 return $len; | |
423 } | |
424 | |
425 =head2 display_id | |
426 | |
427 Title : display_id or display_name | |
428 Usage : $id_string = $obj->display_id(); | |
429 Function: returns the display id, aka the common name of the Sequence object. | |
430 | |
431 The semantics of this is that it is the most likely string to | |
432 be used as an identifier of the sequence, and likely to have | |
433 "human" readability. The id is equivalent to the ID field of | |
434 the GenBank/EMBL databanks and the id field of the | |
435 Swissprot/sptrembl database. In fasta format, the >(\S+) is | |
436 presumed to be the id, though some people overload the id to | |
437 embed other information. Bioperl does not use any embedded | |
438 information in the ID field, and people are encouraged to use | |
439 other mechanisms (accession field for example, or extending | |
440 the sequence object) to solve this. | |
441 | |
442 With the new Bio::DescribeableI interface, display_name aliases | |
443 to this method. | |
444 | |
445 Returns : A string | |
446 Args : None | |
447 | |
448 | |
449 =cut | |
450 | |
451 sub display_id { | |
452 my ($obj,$value) = @_; | |
453 if( defined $value) { | |
454 $obj->{'display_id'} = $value; | |
455 } | |
456 return $obj->{'display_id'}; | |
457 | |
458 } | |
459 | |
460 =head2 accession_number | |
461 | |
462 Title : accession_number or object_id | |
463 Usage : $unique_key = $obj->accession_number; | |
464 Function: Returns the unique biological id for a sequence, commonly | |
465 called the accession_number. For sequences from established | |
466 databases, the implementors should try to use the correct | |
467 accession number. Notice that primary_id() provides the | |
468 unique id for the implemetation, allowing multiple objects | |
469 to have the same accession number in a particular implementation. | |
470 | |
471 For sequences with no accession number, this method should | |
472 return "unknown". | |
473 | |
474 [Note this method name is likely to change in 1.3] | |
475 | |
476 With the new Bio::IdentifiableI interface, this is aliased | |
477 to object_id | |
478 | |
479 Returns : A string | |
480 Args : A string (optional) for setting | |
481 | |
482 =cut | |
483 | |
484 sub accession_number { | |
485 my( $obj, $acc ) = @_; | |
486 | |
487 if (defined $acc) { | |
488 $obj->{'accession_number'} = $acc; | |
489 } else { | |
490 $acc = $obj->{'accession_number'}; | |
491 $acc = 'unknown' unless defined $acc; | |
492 } | |
493 return $acc; | |
494 } | |
495 | |
496 =head2 primary_id | |
497 | |
498 Title : primary_id | |
499 Usage : $unique_key = $obj->primary_id; | |
500 Function: Returns the unique id for this object in this | |
501 implementation. This allows implementations to manage their | |
502 own object ids in a way the implementaiton can control | |
503 clients can expect one id to map to one object. | |
504 | |
505 For sequences with no natural primary id, this method | |
506 should return a stringified memory location. | |
507 | |
508 Returns : A string | |
509 Args : A string (optional, for setting) | |
510 | |
511 =cut | |
512 | |
513 sub primary_id { | |
514 my ($obj,$value) = @_; | |
515 if( defined $value) { | |
516 $obj->{'primary_id'} = $value; | |
517 } | |
518 if( ! exists $obj->{'primary_id'} ) { | |
519 return "$obj"; | |
520 } | |
521 return $obj->{'primary_id'}; | |
522 | |
523 } | |
524 | |
525 | |
526 =head2 alphabet | |
527 | |
528 Title : alphabet | |
529 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | |
530 Function: Returns the type of sequence being one of | |
531 'dna', 'rna' or 'protein'. This is case sensitive. | |
532 | |
533 This is not called <type> because this would cause | |
534 upgrade problems from the 0.5 and earlier Seq objects. | |
535 | |
536 Returns : a string either 'dna','rna','protein'. NB - the object must | |
537 make a call of the type - if there is no type specified it | |
538 has to guess. | |
539 Args : none | |
540 | |
541 | |
542 =cut | |
543 | |
544 sub alphabet { | |
545 my ($obj,$value) = @_; | |
546 if (defined $value) { | |
547 $value = lc $value; | |
548 unless ( $valid_type{$value} ) { | |
549 $obj->throw("Molecular type '$value' is not a valid type (". | |
550 join(',', map "'$_'", sort keys %valid_type) . | |
551 ") lowercase"); | |
552 } | |
553 $obj->{'alphabet'} = $value; | |
554 } | |
555 return $obj->{'alphabet'}; | |
556 } | |
557 | |
558 =head2 desc | |
559 | |
560 Title : desc or description | |
561 Usage : $obj->desc($newval) | |
562 Function: Get/set description of the sequence. | |
563 | |
564 description is an alias for this for compliance with the | |
565 Bio::DescribeableI interface. | |
566 | |
567 Example : | |
568 Returns : value of desc (a string) | |
569 Args : newvalue (a string or undef, optional) | |
570 | |
571 | |
572 =cut | |
573 | |
574 sub desc{ | |
575 my $self = shift; | |
576 | |
577 return $self->{'desc'} = shift if @_; | |
578 return $self->{'desc'}; | |
579 } | |
580 | |
581 =head2 can_call_new | |
582 | |
583 Title : can_call_new | |
584 Usage : | |
585 Function: | |
586 Example : | |
587 Returns : true | |
588 Args : | |
589 | |
590 | |
591 =cut | |
592 | |
593 sub can_call_new { | |
594 my ($self) = @_; | |
595 | |
596 return 1; | |
597 | |
598 } | |
599 | |
600 =head2 id | |
601 | |
602 Title : id | |
603 Usage : $id = $seq->id() | |
604 Function: This is mapped on display_id | |
605 Example : | |
606 Returns : | |
607 Args : | |
608 | |
609 | |
610 =cut | |
611 | |
612 sub id { | |
613 return shift->display_id(@_); | |
614 } | |
615 | |
616 =head2 is_circular | |
617 | |
618 Title : is_circular | |
619 Usage : if( $obj->is_circular) { /Do Something/ } | |
620 Function: Returns true if the molecule is circular | |
621 Returns : Boolean value | |
622 Args : none | |
623 | |
624 =cut | |
625 | |
626 sub is_circular{ | |
627 my $self = shift; | |
628 return $self->{'is_circular'} = shift if @_; | |
629 return $self->{'is_circular'}; | |
630 } | |
631 | |
632 =head1 Methods for Bio::IdentifiableI compliance | |
633 | |
634 =cut | |
635 | |
636 =head2 object_id | |
637 | |
638 Title : object_id | |
639 Usage : $string = $obj->object_id() | |
640 Function: a string which represents the stable primary identifier | |
641 in this namespace of this object. For DNA sequences this | |
642 is its accession_number, similarly for protein sequences | |
643 | |
644 This is aliased to accession_number(). | |
645 Returns : A scalar | |
646 | |
647 | |
648 =cut | |
649 | |
650 sub object_id { | |
651 return shift->accession_number(@_); | |
652 } | |
653 | |
654 =head2 version | |
655 | |
656 Title : version | |
657 Usage : $version = $obj->version() | |
658 Function: a number which differentiates between versions of | |
659 the same object. Higher numbers are considered to be | |
660 later and more relevant, but a single object described | |
661 the same identifier should represent the same concept | |
662 | |
663 Returns : A number | |
664 | |
665 =cut | |
666 | |
667 sub version{ | |
668 my ($self,$value) = @_; | |
669 if( defined $value) { | |
670 $self->{'_version'} = $value; | |
671 } | |
672 return $self->{'_version'}; | |
673 } | |
674 | |
675 | |
676 =head2 authority | |
677 | |
678 Title : authority | |
679 Usage : $authority = $obj->authority() | |
680 Function: a string which represents the organisation which | |
681 granted the namespace, written as the DNS name for | |
682 organisation (eg, wormbase.org) | |
683 | |
684 Returns : A scalar | |
685 | |
686 =cut | |
687 | |
688 sub authority { | |
689 my ($obj,$value) = @_; | |
690 if( defined $value) { | |
691 $obj->{'authority'} = $value; | |
692 } | |
693 return $obj->{'authority'}; | |
694 } | |
695 | |
696 =head2 namespace | |
697 | |
698 Title : namespace | |
699 Usage : $string = $obj->namespace() | |
700 Function: A string representing the name space this identifier | |
701 is valid in, often the database name or the name | |
702 describing the collection | |
703 | |
704 Returns : A scalar | |
705 | |
706 | |
707 =cut | |
708 | |
709 sub namespace{ | |
710 my ($self,$value) = @_; | |
711 if( defined $value) { | |
712 $self->{'namespace'} = $value; | |
713 } | |
714 return $self->{'namespace'} || ""; | |
715 } | |
716 | |
717 =head1 Methods for Bio::DescribableI compliance | |
718 | |
719 This comprises of display_name and description. | |
720 | |
721 =cut | |
722 | |
723 =head2 display_name | |
724 | |
725 Title : display_name | |
726 Usage : $string = $obj->display_name() | |
727 Function: A string which is what should be displayed to the user | |
728 the string should have no spaces (ideally, though a cautious | |
729 user of this interface would not assumme this) and should be | |
730 less than thirty characters (though again, double checking | |
731 this is a good idea) | |
732 | |
733 This is aliased to display_id(). | |
734 Returns : A scalar | |
735 | |
736 =cut | |
737 | |
738 sub display_name { | |
739 return shift->display_id(@_); | |
740 } | |
741 | |
742 =head2 description | |
743 | |
744 Title : description | |
745 Usage : $string = $obj->description() | |
746 Function: A text string suitable for displaying to the user a | |
747 description. This string is likely to have spaces, but | |
748 should not have any newlines or formatting - just plain | |
749 text. The string should not be greater than 255 characters | |
750 and clients can feel justified at truncating strings at 255 | |
751 characters for the purposes of display | |
752 | |
753 This is aliased to desc(). | |
754 Returns : A scalar | |
755 | |
756 =cut | |
757 | |
758 sub description { | |
759 return shift->desc(@_); | |
760 } | |
761 | |
762 =head1 Methods Inherited from Bio::PrimarySeqI | |
763 | |
764 These methods are available on Bio::PrimarySeq, although they are | |
765 actually implemented on Bio::PrimarySeqI | |
766 | |
767 =head2 revcom | |
768 | |
769 Title : revcom | |
770 Usage : $rev = $seq->revcom() | |
771 Function: Produces a new Bio::SeqI implementing object which | |
772 is the reversed complement of the sequence. For protein | |
773 sequences this throws an exception of | |
774 "Sequence is a protein. Cannot revcom" | |
775 | |
776 The id is the same id as the orginal sequence, and the | |
777 accession number is also indentical. If someone wants to | |
778 track that this sequence has be reversed, it needs to | |
779 define its own extensions | |
780 | |
781 To do an inplace edit of an object you can go: | |
782 | |
783 $seqobj = $seqobj->revcom(); | |
784 | |
785 This of course, causes Perl to handle the garbage | |
786 collection of the old object, but it is roughly speaking as | |
787 efficient as an inplace edit. | |
788 | |
789 Returns : A new (fresh) Bio::SeqI object | |
790 Args : none | |
791 | |
792 =cut | |
793 | |
794 =head2 trunc | |
795 | |
796 Title : trunc | |
797 Usage : $subseq = $myseq->trunc(10,100); | |
798 Function: Provides a truncation of a sequence, | |
799 | |
800 Example : | |
801 Returns : a fresh Bio::SeqI implementing object | |
802 Args : | |
803 | |
804 | |
805 =cut | |
806 | |
807 =head1 Internal methods | |
808 | |
809 These are internal methods to PrimarySeq | |
810 | |
811 =cut | |
812 | |
813 =head2 _guess_alphabet | |
814 | |
815 Title : _guess_alphabet | |
816 Usage : | |
817 Function: | |
818 Example : | |
819 Returns : | |
820 Args : | |
821 | |
822 | |
823 =cut | |
824 | |
825 sub _guess_alphabet { | |
826 my ($self) = @_; | |
827 my ($str,$str2,$total,$atgc,$u,$type); | |
828 | |
829 $str = $self->seq(); | |
830 $str =~ s/\-\.\?//g; | |
831 | |
832 $total = CORE::length($str); | |
833 if( $total == 0 ) { | |
834 $self->throw("Got a sequence with no letters in - ". | |
835 "cannot guess alphabet [$str]"); | |
836 } | |
837 | |
838 $u = ($str =~ tr/Uu//); | |
839 $atgc = ($str =~ tr/ATGCNatgcn//); | |
840 | |
841 if( ($atgc / $total) > 0.85 ) { | |
842 $type = 'dna'; | |
843 } elsif( (($atgc + $u) / $total) > 0.85 ) { | |
844 $type = 'rna'; | |
845 } else { | |
846 $type = 'protein'; | |
847 } | |
848 | |
849 $self->alphabet($type); | |
850 return $type; | |
851 } | |
852 | |
853 ############################################################################ | |
854 # aliases due to name changes or to compensate for our lack of consistency # | |
855 ############################################################################ | |
856 | |
857 sub accession { | |
858 my $self = shift; | |
859 | |
860 $self->warn(ref($self)."::accession is deprecated, ". | |
861 "use accession_number() instead"); | |
862 return $self->accession_number(@_); | |
863 } | |
864 | |
865 1; | |
866 |