Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/PrimarySeq.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: PrimarySeq.pm,v 1.73.2.1 2003/06/29 00:25:27 jason Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::PrimarySeq | |
| 4 # | |
| 5 # Cared for by Ewan Birney <birney@sanger.ac.uk> | |
| 6 # | |
| 7 # Copyright Ewan Birney | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::PrimarySeq - Bioperl lightweight Sequence Object | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 # The Bio::SeqIO for file reading, Bio::DB::GenBank for | |
| 20 # database reading | |
| 21 | |
| 22 use Bio::Seq; | |
| 23 use Bio::SeqIO; | |
| 24 use Bio::DB::GenBank; | |
| 25 | |
| 26 #make from memory | |
| 27 $seqobj = Bio::PrimarySeq->new ( -seq => 'ATGGGGTGGGCGGTGGGTGGTTTG', | |
| 28 -id => 'GeneFragment-12', | |
| 29 -accession_number => 'X78121', | |
| 30 -alphabet => 'dna', | |
| 31 -is_circular => 1 | |
| 32 ); | |
| 33 print "Sequence ", $seqobj->id(), " with accession ", | |
| 34 $seqobj->accession_number, "\n"; | |
| 35 | |
| 36 # read from file | |
| 37 $inputstream = Bio::SeqIO->new(-file => "myseq.fa",-format => 'Fasta'); | |
| 38 $seqobj = $inputstream->next_seq(); | |
| 39 print "Sequence ", $seqobj->id(), " and desc ", $seqobj->desc, "\n"; | |
| 40 | |
| 41 | |
| 42 # to get out parts of the sequence. | |
| 43 | |
| 44 print "Sequence ", $seqobj->id(), " with accession ", | |
| 45 $seqobj->accession_number, " and desc ", $seqobj->desc, "\n"; | |
| 46 | |
| 47 $string = $seqobj->seq(); | |
| 48 $string2 = $seqobj->subseq(1,40); | |
| 49 | |
| 50 | |
| 51 =head1 DESCRIPTION | |
| 52 | |
| 53 PrimarySeq is a lightweight Sequence object, storing little more than | |
| 54 the sequence, its name, a computer useful unique name. It does not | |
| 55 contain sequence features or other information. To have a sequence | |
| 56 with sequence features you should use the Seq object which uses this | |
| 57 object - go perldoc Bio::Seq | |
| 58 | |
| 59 Although newusers will use Bio::PrimarySeq alot, in general you will | |
| 60 be using it from the Bio::Seq object. For more information on Bio::Seq | |
| 61 go perldoc Bio::Seq. For interest you might like to known that | |
| 62 Bio::Seq has-a Bio::PrimarySeq and forwards most of the function calls | |
| 63 to do with sequence to it (the has-a relationship lets us get out of a | |
| 64 otherwise nasty cyclical reference in Perl which would leak memory). | |
| 65 | |
| 66 Sequence objects are defined by the Bio::PrimarySeqI interface, and this | |
| 67 object is a pure Perl implementation of the interface (if that's | |
| 68 gibberish to you, don't worry. The take home message is that this | |
| 69 object is the bioperl default sequence object, but other people can | |
| 70 use their own objects as sequences if they so wish). If you are | |
| 71 interested in wrapping your own objects as compliant Bioperl sequence | |
| 72 objects, then you should read the Bio::PrimarySeqI documentation | |
| 73 | |
| 74 The documenation of this object is a merge of the Bio::PrimarySeq and | |
| 75 Bio::PrimarySeqI documentation. This allows all the methods which you can | |
| 76 call on sequence objects here. | |
| 77 | |
| 78 =head1 FEEDBACK | |
| 79 | |
| 80 =head2 Mailing Lists | |
| 81 | |
| 82 User feedback is an integral part of the evolution of this and other | |
| 83 Bioperl modules. Send your comments and suggestions preferably to one | |
| 84 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 85 | |
| 86 bioperl-l@bioperl.org - General discussion | |
| 87 http://bio.perl.org/MailList.html - About the mailing lists | |
| 88 | |
| 89 =head2 Reporting Bugs | |
| 90 | |
| 91 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 92 the bugs and their resolution. Bug reports can be submitted via email | |
| 93 or the web: | |
| 94 | |
| 95 bioperl-bugs@bio.perl.org | |
| 96 http://bugzilla.bioperl.org/ | |
| 97 | |
| 98 =head1 AUTHOR - Ewan Birney | |
| 99 | |
| 100 Email birney@sanger.ac.uk | |
| 101 | |
| 102 Describe contact details here | |
| 103 | |
| 104 =head1 APPENDIX | |
| 105 | |
| 106 The rest of the documentation details each of the object | |
| 107 methods. Internal methods are usually preceded with a _ | |
| 108 | |
| 109 =cut | |
| 110 | |
| 111 | |
| 112 # Let the code begin... | |
| 113 | |
| 114 | |
| 115 package Bio::PrimarySeq; | |
| 116 use vars qw(@ISA); | |
| 117 use strict; | |
| 118 | |
| 119 use Bio::Root::Root; | |
| 120 use Bio::PrimarySeqI; | |
| 121 use Bio::IdentifiableI; | |
| 122 use Bio::DescribableI; | |
| 123 | |
| 124 @ISA = qw(Bio::Root::Root Bio::PrimarySeqI | |
| 125 Bio::IdentifiableI Bio::DescribableI); | |
| 126 | |
| 127 # | |
| 128 # setup the allowed values for alphabet() | |
| 129 # | |
| 130 | |
| 131 my %valid_type = map {$_, 1} qw( dna rna protein ); | |
| 132 | |
| 133 =head2 new | |
| 134 | |
| 135 Title : new | |
| 136 Usage : $seq = Bio::PrimarySeq->new( -seq => 'ATGGGGGTGGTGGTACCCT', | |
| 137 -id => 'human_id', | |
| 138 -accession_number => 'AL000012', | |
| 139 ); | |
| 140 | |
| 141 Function: Returns a new primary seq object from | |
| 142 basic constructors, being a string for the sequence | |
| 143 and strings for id and accession_number. | |
| 144 | |
| 145 Note that you can provide an empty sequence string. However, in | |
| 146 this case you MUST specify the type of sequence you wish to | |
| 147 initialize by the parameter -alphabet. See alphabet() for possible | |
| 148 values. | |
| 149 Returns : a new Bio::PrimarySeq object | |
| 150 Args : -seq => sequence string | |
| 151 -display_id => display id of the sequence (locus name) | |
| 152 -accession_number => accession number | |
| 153 -primary_id => primary id (Genbank id) | |
| 154 -namespace => the namespace for the accession | |
| 155 -authority => the authority for the namespace | |
| 156 -desc => description text | |
| 157 -alphabet => sequence type (alphabet) (dna|rna|protein) | |
| 158 -id => alias for display id | |
| 159 -is_circular => boolean field for whether or not sequence is circular | |
| 160 | |
| 161 =cut | |
| 162 | |
| 163 | |
| 164 sub new { | |
| 165 my ($class, @args) = @_; | |
| 166 my $self = $class->SUPER::new(@args); | |
| 167 | |
| 168 my($seq,$id,$acc,$pid,$ns,$auth,$v,$oid, | |
| 169 $desc,$alphabet,$given_id,$is_circular,$direct,$ref_to_seq,$len) = | |
| 170 $self->_rearrange([qw(SEQ | |
| 171 DISPLAY_ID | |
| 172 ACCESSION_NUMBER | |
| 173 PRIMARY_ID | |
| 174 NAMESPACE | |
| 175 AUTHORITY | |
| 176 VERSION | |
| 177 OBJECT_ID | |
| 178 DESC | |
| 179 ALPHABET | |
| 180 ID | |
| 181 IS_CIRCULAR | |
| 182 DIRECT | |
| 183 REF_TO_SEQ | |
| 184 LENGTH | |
| 185 )], | |
| 186 @args); | |
| 187 if( defined $id && defined $given_id ) { | |
| 188 if( $id ne $given_id ) { | |
| 189 $self->throw("Provided both id and display_id constructor ". | |
| 190 "functions. [$id] [$given_id]"); | |
| 191 } | |
| 192 } | |
| 193 if( defined $given_id ) { $id = $given_id; } | |
| 194 | |
| 195 # let's set the length before the seq -- if there is one, this length is | |
| 196 # going to be invalidated | |
| 197 defined $len && $self->length($len); | |
| 198 | |
| 199 # if alphabet is provided we set it first, so that it won't be guessed | |
| 200 # when the sequence is set | |
| 201 $alphabet && $self->alphabet($alphabet); | |
| 202 | |
| 203 # if there is an alphabet, and direct is passed in, assumme the alphabet | |
| 204 # and sequence is ok | |
| 205 | |
| 206 if( $direct && $ref_to_seq) { | |
| 207 $self->{'seq'} = $$ref_to_seq; | |
| 208 if( ! $alphabet ) { | |
| 209 $self->_guess_alphabet(); | |
| 210 } # else it has been set already above | |
| 211 } else { | |
| 212 # print STDERR "DEBUG: setting sequence to [$seq]\n"; | |
| 213 # note: the sequence string may be empty | |
| 214 $self->seq($seq) if defined($seq); | |
| 215 } | |
| 216 | |
| 217 $id && $self->display_id($id); | |
| 218 $acc && $self->accession_number($acc); | |
| 219 defined $pid && $self->primary_id($pid); | |
| 220 $desc && $self->desc($desc); | |
| 221 $is_circular && $self->is_circular($is_circular); | |
| 222 $ns && $self->namespace($ns); | |
| 223 $auth && $self->authority($auth); | |
| 224 defined($v) && $self->version($v); | |
| 225 defined($oid) && $self->object_id($oid); | |
| 226 | |
| 227 return $self; | |
| 228 } | |
| 229 | |
| 230 sub direct_seq_set { | |
| 231 my $obj = shift; | |
| 232 return $obj->{'seq'} = shift if @_; | |
| 233 return undef; | |
| 234 } | |
| 235 | |
| 236 | |
| 237 =head2 seq | |
| 238 | |
| 239 Title : seq | |
| 240 Usage : $string = $obj->seq() | |
| 241 Function: Returns the sequence as a string of letters. The | |
| 242 case of the letters is left up to the implementer. | |
| 243 Suggested cases are upper case for proteins and lower case for | |
| 244 DNA sequence (IUPAC standard), but you should not rely on this | |
| 245 Returns : A scalar | |
| 246 Args : Optionally on set the new value (a string). An optional second | |
| 247 argument presets the alphabet (otherwise it will be guessed). | |
| 248 Both parameters may also be given in named paramater style | |
| 249 with -seq and -alphabet being the names. | |
| 250 | |
| 251 =cut | |
| 252 | |
| 253 sub seq { | |
| 254 my ($obj,@args) = @_; | |
| 255 | |
| 256 if( scalar(@args) == 0 ) { | |
| 257 return $obj->{'seq'}; | |
| 258 } | |
| 259 | |
| 260 my ($value,$alphabet) = @args; | |
| 261 | |
| 262 | |
| 263 if(@args) { | |
| 264 if(defined($value) && (! $obj->validate_seq($value))) { | |
| 265 $obj->throw("Attempting to set the sequence to [$value] ". | |
| 266 "which does not look healthy"); | |
| 267 } | |
| 268 # if a sequence was already set we make sure that we re-adjust the | |
| 269 # mol.type, otherwise we skip guessing if mol.type is already set | |
| 270 # note: if the new seq is empty or undef, we don't consider that a | |
| 271 # change (we wouldn't have anything to guess on anyway) | |
| 272 my $is_changed_seq = | |
| 273 exists($obj->{'seq'}) && (CORE::length($value || '') > 0); | |
| 274 $obj->{'seq'} = $value; | |
| 275 # new alphabet overridden by arguments? | |
| 276 if($alphabet) { | |
| 277 # yes, set it no matter what | |
| 278 $obj->alphabet($alphabet); | |
| 279 } elsif( # if we changed a previous sequence to a new one | |
| 280 $is_changed_seq || | |
| 281 # or if there is no alphabet yet at all | |
| 282 (! defined($obj->alphabet()))) { | |
| 283 # we need to guess the (possibly new) alphabet | |
| 284 $obj->_guess_alphabet(); | |
| 285 } # else (seq not changed and alphabet was defined) do nothing | |
| 286 # if the seq is changed, make sure we unset a possibly set length | |
| 287 $obj->length(undef) if $is_changed_seq; | |
| 288 } | |
| 289 return $obj->{'seq'}; | |
| 290 } | |
| 291 | |
| 292 =head2 validate_seq | |
| 293 | |
| 294 Title : validate_seq | |
| 295 Usage : if(! $seq->validate_seq($seq_str) ) { | |
| 296 print "sequence $seq_str is not valid for an object of type ", | |
| 297 ref($seq), "\n"; | |
| 298 } | |
| 299 Function: Validates a given sequence string. A validating sequence string | |
| 300 must be accepted by seq(). A string that does not validate will | |
| 301 lead to an exception if passed to seq(). | |
| 302 | |
| 303 The implementation provided here does not take alphabet() into | |
| 304 account. Allowed are all letters (A-Z) and '-','.', '*' and '?'. | |
| 305 | |
| 306 Example : | |
| 307 Returns : 1 if the supplied sequence string is valid for the object, and | |
| 308 0 otherwise. | |
| 309 Args : The sequence string to be validated. | |
| 310 | |
| 311 | |
| 312 =cut | |
| 313 | |
| 314 sub validate_seq { | |
| 315 my ($self,$seqstr) = @_; | |
| 316 if( ! defined $seqstr ){ $seqstr = $self->seq(); } | |
| 317 return 0 unless( defined $seqstr); | |
| 318 if((CORE::length($seqstr) > 0) && ($seqstr !~ /^([A-Za-z\-\.\*\?]+)$/)) { | |
| 319 $self->warn("seq doesn't validate, mismatch is " . | |
| 320 ($seqstr =~ /([^A-Za-z\-\.\*\?]+)/g)); | |
| 321 return 0; | |
| 322 } | |
| 323 return 1; | |
| 324 } | |
| 325 | |
| 326 =head2 subseq | |
| 327 | |
| 328 Title : subseq | |
| 329 Usage : $substring = $obj->subseq(10,40); | |
| 330 Function: returns the subseq from start to end, where the first base | |
| 331 is 1 and the number is inclusive, ie 1-2 are the first two | |
| 332 bases of the sequence | |
| 333 Returns : a string | |
| 334 Args : integer for start position | |
| 335 integer for end position | |
| 336 OR | |
| 337 Bio::LocationI location for subseq (strand honored) | |
| 338 | |
| 339 =cut | |
| 340 | |
| 341 sub subseq { | |
| 342 my ($self,$start,$end,$replace) = @_; | |
| 343 | |
| 344 if( ref($start) && $start->isa('Bio::LocationI') ) { | |
| 345 my $loc = $start; | |
| 346 $replace = $end; # do we really use this anywhere? scary. HL | |
| 347 my $seq = ""; | |
| 348 foreach my $subloc ($loc->each_Location()) { | |
| 349 my $piece = $self->subseq($subloc->start(), | |
| 350 $subloc->end(), $replace); | |
| 351 if($subloc->strand() < 0) { | |
| 352 $piece = Bio::PrimarySeq->new('-seq' => $piece)->revcom()->seq(); | |
| 353 } | |
| 354 $seq .= $piece; | |
| 355 } | |
| 356 return $seq; | |
| 357 } elsif( defined $start && defined $end ) { | |
| 358 if( $start > $end ){ | |
| 359 $self->throw("in subseq, start [$start] has to be ". | |
| 360 "greater than end [$end]"); | |
| 361 } | |
| 362 if( $start <= 0 || $end > $self->length ) { | |
| 363 $self->throw("You have to have start positive\n\tand length less ". | |
| 364 "than the total length of sequence [$start:$end] ". | |
| 365 "Total ".$self->length.""); | |
| 366 } | |
| 367 | |
| 368 # remove one from start, and then length is end-start | |
| 369 $start--; | |
| 370 if( defined $replace ) { | |
| 371 return substr( $self->seq(), $start, ($end-$start), $replace); | |
| 372 } else { | |
| 373 return substr( $self->seq(), $start, ($end-$start)); | |
| 374 } | |
| 375 } else { | |
| 376 $self->warn("Incorrect parameters to subseq - must be two integers ". | |
| 377 "or a Bio::LocationI object not ($start,$end)"); | |
| 378 } | |
| 379 } | |
| 380 | |
| 381 =head2 length | |
| 382 | |
| 383 Title : length | |
| 384 Usage : $len = $seq->length(); | |
| 385 Function: Get the length of the sequence in number of symbols (bases | |
| 386 or amino acids). | |
| 387 | |
| 388 You can also set this attribute, even to a number that does | |
| 389 not match the length of the sequence string. This is useful | |
| 390 if you don''t want to set the sequence too, or if you want | |
| 391 to free up memory by unsetting the sequence. In the latter | |
| 392 case you could do e.g. | |
| 393 | |
| 394 $seq->length($seq->length); | |
| 395 $seq->seq(undef); | |
| 396 | |
| 397 Note that if you set the sequence to a value other than | |
| 398 undef at any time, the length attribute will be | |
| 399 invalidated, and the length of the sequence string will be | |
| 400 reported again. Also, we won''t let you lie about the length. | |
| 401 | |
| 402 Example : | |
| 403 Returns : integer representing the length of the sequence. | |
| 404 Args : Optionally, the value on set | |
| 405 | |
| 406 =cut | |
| 407 | |
| 408 sub length { | |
| 409 my $self = shift; | |
| 410 my $len = CORE::length($self->seq() || ''); | |
| 411 | |
| 412 if(@_) { | |
| 413 my $val = shift; | |
| 414 if(defined($val) && $len && ($len != $val)) { | |
| 415 $self->throw("You're trying to lie about the length: ". | |
| 416 "is $len but you say ".$val); | |
| 417 } | |
| 418 $self->{'_seq_length'} = $val; | |
| 419 } elsif(defined($self->{'_seq_length'})) { | |
| 420 return $self->{'_seq_length'}; | |
| 421 } | |
| 422 return $len; | |
| 423 } | |
| 424 | |
| 425 =head2 display_id | |
| 426 | |
| 427 Title : display_id or display_name | |
| 428 Usage : $id_string = $obj->display_id(); | |
| 429 Function: returns the display id, aka the common name of the Sequence object. | |
| 430 | |
| 431 The semantics of this is that it is the most likely string to | |
| 432 be used as an identifier of the sequence, and likely to have | |
| 433 "human" readability. The id is equivalent to the ID field of | |
| 434 the GenBank/EMBL databanks and the id field of the | |
| 435 Swissprot/sptrembl database. In fasta format, the >(\S+) is | |
| 436 presumed to be the id, though some people overload the id to | |
| 437 embed other information. Bioperl does not use any embedded | |
| 438 information in the ID field, and people are encouraged to use | |
| 439 other mechanisms (accession field for example, or extending | |
| 440 the sequence object) to solve this. | |
| 441 | |
| 442 With the new Bio::DescribeableI interface, display_name aliases | |
| 443 to this method. | |
| 444 | |
| 445 Returns : A string | |
| 446 Args : None | |
| 447 | |
| 448 | |
| 449 =cut | |
| 450 | |
| 451 sub display_id { | |
| 452 my ($obj,$value) = @_; | |
| 453 if( defined $value) { | |
| 454 $obj->{'display_id'} = $value; | |
| 455 } | |
| 456 return $obj->{'display_id'}; | |
| 457 | |
| 458 } | |
| 459 | |
| 460 =head2 accession_number | |
| 461 | |
| 462 Title : accession_number or object_id | |
| 463 Usage : $unique_key = $obj->accession_number; | |
| 464 Function: Returns the unique biological id for a sequence, commonly | |
| 465 called the accession_number. For sequences from established | |
| 466 databases, the implementors should try to use the correct | |
| 467 accession number. Notice that primary_id() provides the | |
| 468 unique id for the implemetation, allowing multiple objects | |
| 469 to have the same accession number in a particular implementation. | |
| 470 | |
| 471 For sequences with no accession number, this method should | |
| 472 return "unknown". | |
| 473 | |
| 474 [Note this method name is likely to change in 1.3] | |
| 475 | |
| 476 With the new Bio::IdentifiableI interface, this is aliased | |
| 477 to object_id | |
| 478 | |
| 479 Returns : A string | |
| 480 Args : A string (optional) for setting | |
| 481 | |
| 482 =cut | |
| 483 | |
| 484 sub accession_number { | |
| 485 my( $obj, $acc ) = @_; | |
| 486 | |
| 487 if (defined $acc) { | |
| 488 $obj->{'accession_number'} = $acc; | |
| 489 } else { | |
| 490 $acc = $obj->{'accession_number'}; | |
| 491 $acc = 'unknown' unless defined $acc; | |
| 492 } | |
| 493 return $acc; | |
| 494 } | |
| 495 | |
| 496 =head2 primary_id | |
| 497 | |
| 498 Title : primary_id | |
| 499 Usage : $unique_key = $obj->primary_id; | |
| 500 Function: Returns the unique id for this object in this | |
| 501 implementation. This allows implementations to manage their | |
| 502 own object ids in a way the implementaiton can control | |
| 503 clients can expect one id to map to one object. | |
| 504 | |
| 505 For sequences with no natural primary id, this method | |
| 506 should return a stringified memory location. | |
| 507 | |
| 508 Returns : A string | |
| 509 Args : A string (optional, for setting) | |
| 510 | |
| 511 =cut | |
| 512 | |
| 513 sub primary_id { | |
| 514 my ($obj,$value) = @_; | |
| 515 if( defined $value) { | |
| 516 $obj->{'primary_id'} = $value; | |
| 517 } | |
| 518 if( ! exists $obj->{'primary_id'} ) { | |
| 519 return "$obj"; | |
| 520 } | |
| 521 return $obj->{'primary_id'}; | |
| 522 | |
| 523 } | |
| 524 | |
| 525 | |
| 526 =head2 alphabet | |
| 527 | |
| 528 Title : alphabet | |
| 529 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | |
| 530 Function: Returns the type of sequence being one of | |
| 531 'dna', 'rna' or 'protein'. This is case sensitive. | |
| 532 | |
| 533 This is not called <type> because this would cause | |
| 534 upgrade problems from the 0.5 and earlier Seq objects. | |
| 535 | |
| 536 Returns : a string either 'dna','rna','protein'. NB - the object must | |
| 537 make a call of the type - if there is no type specified it | |
| 538 has to guess. | |
| 539 Args : none | |
| 540 | |
| 541 | |
| 542 =cut | |
| 543 | |
| 544 sub alphabet { | |
| 545 my ($obj,$value) = @_; | |
| 546 if (defined $value) { | |
| 547 $value = lc $value; | |
| 548 unless ( $valid_type{$value} ) { | |
| 549 $obj->throw("Molecular type '$value' is not a valid type (". | |
| 550 join(',', map "'$_'", sort keys %valid_type) . | |
| 551 ") lowercase"); | |
| 552 } | |
| 553 $obj->{'alphabet'} = $value; | |
| 554 } | |
| 555 return $obj->{'alphabet'}; | |
| 556 } | |
| 557 | |
| 558 =head2 desc | |
| 559 | |
| 560 Title : desc or description | |
| 561 Usage : $obj->desc($newval) | |
| 562 Function: Get/set description of the sequence. | |
| 563 | |
| 564 description is an alias for this for compliance with the | |
| 565 Bio::DescribeableI interface. | |
| 566 | |
| 567 Example : | |
| 568 Returns : value of desc (a string) | |
| 569 Args : newvalue (a string or undef, optional) | |
| 570 | |
| 571 | |
| 572 =cut | |
| 573 | |
| 574 sub desc{ | |
| 575 my $self = shift; | |
| 576 | |
| 577 return $self->{'desc'} = shift if @_; | |
| 578 return $self->{'desc'}; | |
| 579 } | |
| 580 | |
| 581 =head2 can_call_new | |
| 582 | |
| 583 Title : can_call_new | |
| 584 Usage : | |
| 585 Function: | |
| 586 Example : | |
| 587 Returns : true | |
| 588 Args : | |
| 589 | |
| 590 | |
| 591 =cut | |
| 592 | |
| 593 sub can_call_new { | |
| 594 my ($self) = @_; | |
| 595 | |
| 596 return 1; | |
| 597 | |
| 598 } | |
| 599 | |
| 600 =head2 id | |
| 601 | |
| 602 Title : id | |
| 603 Usage : $id = $seq->id() | |
| 604 Function: This is mapped on display_id | |
| 605 Example : | |
| 606 Returns : | |
| 607 Args : | |
| 608 | |
| 609 | |
| 610 =cut | |
| 611 | |
| 612 sub id { | |
| 613 return shift->display_id(@_); | |
| 614 } | |
| 615 | |
| 616 =head2 is_circular | |
| 617 | |
| 618 Title : is_circular | |
| 619 Usage : if( $obj->is_circular) { /Do Something/ } | |
| 620 Function: Returns true if the molecule is circular | |
| 621 Returns : Boolean value | |
| 622 Args : none | |
| 623 | |
| 624 =cut | |
| 625 | |
| 626 sub is_circular{ | |
| 627 my $self = shift; | |
| 628 return $self->{'is_circular'} = shift if @_; | |
| 629 return $self->{'is_circular'}; | |
| 630 } | |
| 631 | |
| 632 =head1 Methods for Bio::IdentifiableI compliance | |
| 633 | |
| 634 =cut | |
| 635 | |
| 636 =head2 object_id | |
| 637 | |
| 638 Title : object_id | |
| 639 Usage : $string = $obj->object_id() | |
| 640 Function: a string which represents the stable primary identifier | |
| 641 in this namespace of this object. For DNA sequences this | |
| 642 is its accession_number, similarly for protein sequences | |
| 643 | |
| 644 This is aliased to accession_number(). | |
| 645 Returns : A scalar | |
| 646 | |
| 647 | |
| 648 =cut | |
| 649 | |
| 650 sub object_id { | |
| 651 return shift->accession_number(@_); | |
| 652 } | |
| 653 | |
| 654 =head2 version | |
| 655 | |
| 656 Title : version | |
| 657 Usage : $version = $obj->version() | |
| 658 Function: a number which differentiates between versions of | |
| 659 the same object. Higher numbers are considered to be | |
| 660 later and more relevant, but a single object described | |
| 661 the same identifier should represent the same concept | |
| 662 | |
| 663 Returns : A number | |
| 664 | |
| 665 =cut | |
| 666 | |
| 667 sub version{ | |
| 668 my ($self,$value) = @_; | |
| 669 if( defined $value) { | |
| 670 $self->{'_version'} = $value; | |
| 671 } | |
| 672 return $self->{'_version'}; | |
| 673 } | |
| 674 | |
| 675 | |
| 676 =head2 authority | |
| 677 | |
| 678 Title : authority | |
| 679 Usage : $authority = $obj->authority() | |
| 680 Function: a string which represents the organisation which | |
| 681 granted the namespace, written as the DNS name for | |
| 682 organisation (eg, wormbase.org) | |
| 683 | |
| 684 Returns : A scalar | |
| 685 | |
| 686 =cut | |
| 687 | |
| 688 sub authority { | |
| 689 my ($obj,$value) = @_; | |
| 690 if( defined $value) { | |
| 691 $obj->{'authority'} = $value; | |
| 692 } | |
| 693 return $obj->{'authority'}; | |
| 694 } | |
| 695 | |
| 696 =head2 namespace | |
| 697 | |
| 698 Title : namespace | |
| 699 Usage : $string = $obj->namespace() | |
| 700 Function: A string representing the name space this identifier | |
| 701 is valid in, often the database name or the name | |
| 702 describing the collection | |
| 703 | |
| 704 Returns : A scalar | |
| 705 | |
| 706 | |
| 707 =cut | |
| 708 | |
| 709 sub namespace{ | |
| 710 my ($self,$value) = @_; | |
| 711 if( defined $value) { | |
| 712 $self->{'namespace'} = $value; | |
| 713 } | |
| 714 return $self->{'namespace'} || ""; | |
| 715 } | |
| 716 | |
| 717 =head1 Methods for Bio::DescribableI compliance | |
| 718 | |
| 719 This comprises of display_name and description. | |
| 720 | |
| 721 =cut | |
| 722 | |
| 723 =head2 display_name | |
| 724 | |
| 725 Title : display_name | |
| 726 Usage : $string = $obj->display_name() | |
| 727 Function: A string which is what should be displayed to the user | |
| 728 the string should have no spaces (ideally, though a cautious | |
| 729 user of this interface would not assumme this) and should be | |
| 730 less than thirty characters (though again, double checking | |
| 731 this is a good idea) | |
| 732 | |
| 733 This is aliased to display_id(). | |
| 734 Returns : A scalar | |
| 735 | |
| 736 =cut | |
| 737 | |
| 738 sub display_name { | |
| 739 return shift->display_id(@_); | |
| 740 } | |
| 741 | |
| 742 =head2 description | |
| 743 | |
| 744 Title : description | |
| 745 Usage : $string = $obj->description() | |
| 746 Function: A text string suitable for displaying to the user a | |
| 747 description. This string is likely to have spaces, but | |
| 748 should not have any newlines or formatting - just plain | |
| 749 text. The string should not be greater than 255 characters | |
| 750 and clients can feel justified at truncating strings at 255 | |
| 751 characters for the purposes of display | |
| 752 | |
| 753 This is aliased to desc(). | |
| 754 Returns : A scalar | |
| 755 | |
| 756 =cut | |
| 757 | |
| 758 sub description { | |
| 759 return shift->desc(@_); | |
| 760 } | |
| 761 | |
| 762 =head1 Methods Inherited from Bio::PrimarySeqI | |
| 763 | |
| 764 These methods are available on Bio::PrimarySeq, although they are | |
| 765 actually implemented on Bio::PrimarySeqI | |
| 766 | |
| 767 =head2 revcom | |
| 768 | |
| 769 Title : revcom | |
| 770 Usage : $rev = $seq->revcom() | |
| 771 Function: Produces a new Bio::SeqI implementing object which | |
| 772 is the reversed complement of the sequence. For protein | |
| 773 sequences this throws an exception of | |
| 774 "Sequence is a protein. Cannot revcom" | |
| 775 | |
| 776 The id is the same id as the orginal sequence, and the | |
| 777 accession number is also indentical. If someone wants to | |
| 778 track that this sequence has be reversed, it needs to | |
| 779 define its own extensions | |
| 780 | |
| 781 To do an inplace edit of an object you can go: | |
| 782 | |
| 783 $seqobj = $seqobj->revcom(); | |
| 784 | |
| 785 This of course, causes Perl to handle the garbage | |
| 786 collection of the old object, but it is roughly speaking as | |
| 787 efficient as an inplace edit. | |
| 788 | |
| 789 Returns : A new (fresh) Bio::SeqI object | |
| 790 Args : none | |
| 791 | |
| 792 =cut | |
| 793 | |
| 794 =head2 trunc | |
| 795 | |
| 796 Title : trunc | |
| 797 Usage : $subseq = $myseq->trunc(10,100); | |
| 798 Function: Provides a truncation of a sequence, | |
| 799 | |
| 800 Example : | |
| 801 Returns : a fresh Bio::SeqI implementing object | |
| 802 Args : | |
| 803 | |
| 804 | |
| 805 =cut | |
| 806 | |
| 807 =head1 Internal methods | |
| 808 | |
| 809 These are internal methods to PrimarySeq | |
| 810 | |
| 811 =cut | |
| 812 | |
| 813 =head2 _guess_alphabet | |
| 814 | |
| 815 Title : _guess_alphabet | |
| 816 Usage : | |
| 817 Function: | |
| 818 Example : | |
| 819 Returns : | |
| 820 Args : | |
| 821 | |
| 822 | |
| 823 =cut | |
| 824 | |
| 825 sub _guess_alphabet { | |
| 826 my ($self) = @_; | |
| 827 my ($str,$str2,$total,$atgc,$u,$type); | |
| 828 | |
| 829 $str = $self->seq(); | |
| 830 $str =~ s/\-\.\?//g; | |
| 831 | |
| 832 $total = CORE::length($str); | |
| 833 if( $total == 0 ) { | |
| 834 $self->throw("Got a sequence with no letters in - ". | |
| 835 "cannot guess alphabet [$str]"); | |
| 836 } | |
| 837 | |
| 838 $u = ($str =~ tr/Uu//); | |
| 839 $atgc = ($str =~ tr/ATGCNatgcn//); | |
| 840 | |
| 841 if( ($atgc / $total) > 0.85 ) { | |
| 842 $type = 'dna'; | |
| 843 } elsif( (($atgc + $u) / $total) > 0.85 ) { | |
| 844 $type = 'rna'; | |
| 845 } else { | |
| 846 $type = 'protein'; | |
| 847 } | |
| 848 | |
| 849 $self->alphabet($type); | |
| 850 return $type; | |
| 851 } | |
| 852 | |
| 853 ############################################################################ | |
| 854 # aliases due to name changes or to compensate for our lack of consistency # | |
| 855 ############################################################################ | |
| 856 | |
| 857 sub accession { | |
| 858 my $self = shift; | |
| 859 | |
| 860 $self->warn(ref($self)."::accession is deprecated, ". | |
| 861 "use accession_number() instead"); | |
| 862 return $self->accession_number(@_); | |
| 863 } | |
| 864 | |
| 865 1; | |
| 866 |
