| 0 | 1 # $Id: Seq.pm,v 1.76.2.2 2003/07/03 20:01:32 jason Exp $ | 
|  | 2 # | 
|  | 3 # BioPerl module for Bio::Seq | 
|  | 4 # | 
|  | 5 # Cared for by Ewan Birney <birney@ebi.ac.uk> | 
|  | 6 # | 
|  | 7 # Copyright Ewan Birney | 
|  | 8 # | 
|  | 9 # You may distribute this module under the same terms as perl itself | 
|  | 10 | 
|  | 11 # POD documentation - main docs before the code | 
|  | 12 | 
|  | 13 =head1 NAME | 
|  | 14 | 
|  | 15 Bio::Seq - Sequence object, with features | 
|  | 16 | 
|  | 17 =head1 SYNOPSIS | 
|  | 18 | 
|  | 19     # This is the main sequence object in Bioperl | 
|  | 20 | 
|  | 21     # gets a sequence from a file | 
|  | 22     $seqio  = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat'); | 
|  | 23     $seqobj = $seqio->next_seq(); | 
|  | 24 | 
|  | 25     # SeqIO can both read and write sequences; see Bio::SeqIO | 
|  | 26     # for more information and examples | 
|  | 27 | 
|  | 28     # get from database | 
|  | 29     $db = Bio::DB::GenBank->new(); | 
|  | 30     $seqobj = $db->get_Seq_by_acc('X78121'); | 
|  | 31 | 
|  | 32     # make from strings in script | 
|  | 33     $seqobj = Bio::Seq->new( -display_id => 'my_id', | 
|  | 34 			     -seq => $sequence_as_string); | 
|  | 35 | 
|  | 36     # gets sequence as a string from sequence object | 
|  | 37     $seqstr   = $seqobj->seq(); # actual sequence as a string | 
|  | 38     $seqstr   = $seqobj->subseq(10,50); # slice in biological coordinates | 
|  | 39 | 
|  | 40     # retrieves information from the sequence | 
|  | 41     # features must implement Bio::SeqFeatureI interface | 
|  | 42 | 
|  | 43     @features = $seqobj->get_SeqFeatures(); # just top level | 
|  | 44     foreach my $feat ( @features ) { | 
|  | 45 	print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ", | 
|  | 46 	$feat->end," strand ",$feat->strand,"\n"; | 
|  | 47 | 
|  | 48         # features retain link to underlying sequence object | 
|  | 49         print "Feature sequence is ",$feat->seq->seq(),"\n" | 
|  | 50     } | 
|  | 51 | 
|  | 52     # sequences may have a species | 
|  | 53 | 
|  | 54     if( defined $seq->species ) { | 
|  | 55 	print "Sequence is from ",$species->binomial_name," [",$species->common_name,"]\n"; | 
|  | 56     } | 
|  | 57 | 
|  | 58     # annotation objects are Bio::AnnotationCollectionI's | 
|  | 59     $ann      = $seqobj->annotation(); # annotation object | 
|  | 60 | 
|  | 61     # references is one type of annotations to get. Also get | 
|  | 62     # comment and dblink. Look at Bio::AnnotationCollection for | 
|  | 63     # more information | 
|  | 64 | 
|  | 65     foreach my $ref ( $ann->get_Annotations('reference') ) { | 
|  | 66 	print "Reference ",$ref->title,"\n"; | 
|  | 67     } | 
|  | 68 | 
|  | 69     # you can get truncations, translations and reverse complements, these | 
|  | 70     # all give back Bio::Seq objects themselves, though currently with no | 
|  | 71     # features transfered | 
|  | 72 | 
|  | 73     my $trunc = $seqobj->trunc(100,200); | 
|  | 74     my $rev   = $seqobj->revcom(); | 
|  | 75 | 
|  | 76     # there are many options to translate - check out the docs | 
|  | 77     my $trans = $seqobj->translate(); | 
|  | 78 | 
|  | 79     # these functions can be chained together | 
|  | 80 | 
|  | 81     my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate(); | 
|  | 82 | 
|  | 83 | 
|  | 84 | 
|  | 85 =head1 DESCRIPTION | 
|  | 86 | 
|  | 87 A Seq object is a sequence with sequence features placed on it. The | 
|  | 88 Seq object contains a PrimarySeq object for the actual sequence and | 
|  | 89 also implements its interface. | 
|  | 90 | 
|  | 91 In Bioperl we have 3 main players that people are going to use frequently | 
|  | 92 | 
|  | 93   Bio::PrimarySeq  - just the sequence and its names, nothing else. | 
|  | 94   Bio::SeqFeatureI - a location on a sequence, potentially with a sequence | 
|  | 95                      and annotation. | 
|  | 96   Bio::Seq         - A sequence and a collection of sequence features | 
|  | 97                      (an aggregate) with its own annotation. | 
|  | 98 | 
|  | 99 Although Bioperl is not tied heavily to file formats these distinctions do | 
|  | 100 map to file formats sensibly and for some bioinformaticians this might help | 
|  | 101 | 
|  | 102   Bio::PrimarySeq  - Fasta file of a sequence | 
|  | 103   Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table | 
|  | 104   Bio::Seq         - A single EMBL/GenBank/DDBJ entry | 
|  | 105 | 
|  | 106 By having this split we avoid a lot of nasty circular references | 
|  | 107 (sequence features can hold a reference to a sequence without the sequence | 
|  | 108 holding a reference to the sequence feature). See L<Bio::PrimarySeq> and | 
|  | 109 L<Bio::SeqFeatureI> for more information. | 
|  | 110 | 
|  | 111 Ian Korf really helped in the design of the Seq and SeqFeature system. | 
|  | 112 | 
|  | 113 =head1 EXAMPLES | 
|  | 114 | 
|  | 115 A simple and fundamental block of code | 
|  | 116 | 
|  | 117   use Bio::SeqIO; | 
|  | 118 | 
|  | 119   my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object | 
|  | 120   my $seqobj = $seqIOobj->next_seq;              # get a Seq object | 
|  | 121 | 
|  | 122 With the Seq object in hand one has access to a powerful set of Bioperl | 
|  | 123 methods and Bioperl objects. This next script will take a file of sequences | 
|  | 124 in EMBL format and create a file of the reverse-complemented sequences | 
|  | 125 in Fasta format using Seq objects. It also prints out details about the | 
|  | 126 exons it finds as sequence features in Genbank Flat File format. | 
|  | 127 | 
|  | 128   use Bio::Seq; | 
|  | 129   use Bio::SeqIO; | 
|  | 130 | 
|  | 131   $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | 
|  | 132   $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | 
|  | 133 | 
|  | 134   while((my $seqobj = $seqin->next_seq())) { | 
|  | 135       print "Seen sequence ",$seqobj->display_id,", start of seq ", | 
|  | 136             substr($seqobj->seq,1,10),"\n"; | 
|  | 137       if( $seqobj->alphabet eq 'dna') { | 
|  | 138 	    $rev = $seqobj->revcom; | 
|  | 139 	    $id  = $seqobj->display_id(); | 
|  | 140             $id  = "$id.rev"; | 
|  | 141             $rev->display_id($id); | 
|  | 142             $seqout->write_seq($rev); | 
|  | 143       } | 
|  | 144 | 
|  | 145       foreach $feat ( $seqobj->get_SeqFeatures() ) { | 
|  | 146            if( $feat->primary_tag eq 'exon' ) { | 
|  | 147               print STDOUT "Location ",$feat->start,":", | 
|  | 148                     $feat->end," GFF[",$feat->gff_string,"]\n"; | 
|  | 149 	   } | 
|  | 150       } | 
|  | 151   } | 
|  | 152 | 
|  | 153 Let's examine the script. The lines below import the Bioperl modules. | 
|  | 154 Seq is the main Bioperl sequence object and SeqIO is the Bioperl support | 
|  | 155 for reading sequences from files and to files | 
|  | 156 | 
|  | 157   use Bio::Seq; | 
|  | 158   use Bio::SeqIO; | 
|  | 159 | 
|  | 160 These two lines create two SeqIO streams: one for reading in sequences | 
|  | 161 and one for outputting sequences: | 
|  | 162 | 
|  | 163   $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | 
|  | 164   $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | 
|  | 165 | 
|  | 166 Notice that in the "$seqout" case there is a greater-than sign, | 
|  | 167 indicating the file is being opened for writing. | 
|  | 168 | 
|  | 169 Using the | 
|  | 170 | 
|  | 171   '-argument' => value | 
|  | 172 | 
|  | 173 syntax is common in Bioperl. The file argument is like an argument | 
|  | 174 to open() . You can also pass in filehandles or FileHandle objects by | 
|  | 175 using the -fh argument (see L<Bio::SeqIO> documentation for details). | 
|  | 176 Many formats in Bioperl are handled, including Fasta, EMBL, GenBank, | 
|  | 177 Swissprot (swiss), PIR, and GCG. | 
|  | 178 | 
|  | 179   $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | 
|  | 180   $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | 
|  | 181 | 
|  | 182 This is the main loop which will loop progressively through sequences | 
|  | 183 in a file, and each call to $seqio-E<gt>next_seq() provides a new Seq | 
|  | 184 object from the file: | 
|  | 185 | 
|  | 186   while((my $seqobj = $seqio->next_seq())) { | 
|  | 187 | 
|  | 188 This print line below accesses fields in the Seq object directly. The | 
|  | 189 $seqobj-E<gt>display_id is the way to access the display_id attribute | 
|  | 190 of the Seq object. The $seqobj-E<gt>seq method gets the actual | 
|  | 191 sequence out as string. Then you can do manipulation of this if | 
|  | 192 you want to (there are however easy ways of doing truncation, | 
|  | 193 reverse-complement and translation). | 
|  | 194 | 
|  | 195   print "Seen sequence ",$seqobj->display_id,", start of seq ", | 
|  | 196                substr($seqobj->seq,1,10),"\n"; | 
|  | 197 | 
|  | 198 Bioperl has to guess the alphabet of the sequence, being either 'dna', | 
|  | 199 'rna', or 'protein'. The alphabet attribute is one of these three | 
|  | 200 possibilities. | 
|  | 201 | 
|  | 202   if( $seqobj->alphabet eq 'dna') { | 
|  | 203 | 
|  | 204 The $seqobj-E<gt>revcom method provides the reverse complement of the Seq | 
|  | 205 object as another Seq object. Thus, the $rev variable is a reference to | 
|  | 206 another Seq object. For example, one could repeat the above print line | 
|  | 207 for this Seq object (putting $rev in place of $seqobj). In this | 
|  | 208 case we are going to output the object into the file stream we built | 
|  | 209 earlier on. | 
|  | 210 | 
|  | 211   $rev = $seqobj->revcom; | 
|  | 212 | 
|  | 213 When we output it, we want the id of the outputted object | 
|  | 214 to be changed to "$id.rev", ie, with .rev on the end of the name. The | 
|  | 215 following lines retrieve the id of the sequence object, add .rev | 
|  | 216 to this and then set the display_id of the rev sequence object to | 
|  | 217 this. Notice that to set the display_id attribute you just need | 
|  | 218 call the same method, display_id(), with the new value as an argument. | 
|  | 219 Getting and setting values with the same method is common in Bioperl. | 
|  | 220 | 
|  | 221   $id  = $seqobj->display_id(); | 
|  | 222   $id  = "$id.rev"; | 
|  | 223   $rev->display_id($id); | 
|  | 224 | 
|  | 225 The write_seq method on the SeqIO output object, $seqout, writes the | 
|  | 226 $rev object to the filestream we built at the top of the script. | 
|  | 227 The filestream knows that it is outputting in fasta format, and | 
|  | 228 so it provides fasta output. | 
|  | 229 | 
|  | 230   $seqout->write_seq($rev); | 
|  | 231 | 
|  | 232 This block of code loops over sequence features in the sequence | 
|  | 233 object, trying to find ones who have been tagged as 'exon'. | 
|  | 234 Features have start and end attributes and can be outputted | 
|  | 235 in Genbank Flat File format, GFF, a standarized format for sequence | 
|  | 236 features. | 
|  | 237 | 
|  | 238   foreach $feat ( $seqobj->get_SeqFeatures() ) { | 
|  | 239       if( $feat->primary_tag eq 'exon' ) { | 
|  | 240           print STDOUT "Location ",$feat->start,":", | 
|  | 241              $feat->end," GFF[",$feat->gff_string,"]\n"; | 
|  | 242       } | 
|  | 243   } | 
|  | 244 | 
|  | 245 The code above shows how a few Bio::Seq methods suffice to read, parse, | 
|  | 246 reformat and analyze sequences from a file. A full list of methods | 
|  | 247 available to Bio::Seq objects is shown below. Bear in mind that some of | 
|  | 248 these methods come from PrimarySeq objects, which are simpler | 
|  | 249 than Seq objects, stripped of features (see L<Bio::PrimarySeq> for | 
|  | 250 more information). | 
|  | 251 | 
|  | 252   # these methods return strings, and accept strings in some cases: | 
|  | 253 | 
|  | 254   $seqobj->seq();              # string of sequence | 
|  | 255   $seqobj->subseq(5,10);       # part of the sequence as a string | 
|  | 256   $seqobj->accession_number(); # when there, the accession number | 
|  | 257   $seqobj->moltype();          # one of 'dna','rna',or 'protein' | 
|  | 258   $seqobj->seq_version()       # when there, the version | 
|  | 259   $seqobj->keywords();         # when there, the Keywords line | 
|  | 260   $seqobj->length()            # length | 
|  | 261   $seqobj->desc();             # description | 
|  | 262   $seqobj->primary_id();       # a unique id for this sequence regardless | 
|  | 263                                # of its display_id or accession number | 
|  | 264   $seqobj->display_id();       # the human readable id of the sequence | 
|  | 265 | 
|  | 266 Some of these values map to fields in common formats. For example, The | 
|  | 267 display_id() method returns the LOCUS name of a Genbank entry, | 
|  | 268 the (\S+) following the E<gt> character in a Fasta file, the ID from | 
|  | 269 a SwissProt file, and so on. The desc() method will return the DEFINITION | 
|  | 270 line of a Genbank file, the description following the display_id in a | 
|  | 271 Fasta file, and the DE field in a SwissProt file. | 
|  | 272 | 
|  | 273   # the following methods return new Seq objects, but | 
|  | 274   # do not transfer features across to the new object: | 
|  | 275 | 
|  | 276   $seqobj->trunc(5,10)  # truncation from 5 to 10 as new object | 
|  | 277   $seqobj->revcom       # reverse complements sequence | 
|  | 278   $seqobj->translate    # translation of the sequence | 
|  | 279 | 
|  | 280   # if new() can be called this method returns 1, else 0 | 
|  | 281 | 
|  | 282   $seqobj->can_call_new | 
|  | 283 | 
|  | 284   # the following method determines if the given string will be accepted | 
|  | 285   # by the seq() method - if the string is acceptable then validate() | 
|  | 286   # returns 1, or 0 if not | 
|  | 287 | 
|  | 288   $seqobj->validate_seq($string) | 
|  | 289 | 
|  | 290   # the following method returns or accepts a Species object: | 
|  | 291 | 
|  | 292   $seqobj->species(); | 
|  | 293 | 
|  | 294 Please see L<Bio::Species> for more information on this object. | 
|  | 295 | 
|  | 296   # the following method returns or accepts an Annotation object | 
|  | 297   # which in turn allows access to Annotation::Reference | 
|  | 298   # and Annotation::Comment objects: | 
|  | 299 | 
|  | 300   $seqobj->annotation(); | 
|  | 301 | 
|  | 302 These annotations typically refer to entire sequences, unlike | 
|  | 303 features.  See L<Bio::AnnotationCollectionI>, | 
|  | 304 L<Bio::Annotation::Collection>, L<Bio::Annotation::Reference>, and | 
|  | 305 L<Bio::Annotation::Comment> for details. | 
|  | 306 | 
|  | 307 It is also important to be able to describe defined portions of a | 
|  | 308 sequence. The combination of some description and the corresponding | 
|  | 309 sub-sequence is called a feature - an exon and its coordinates within | 
|  | 310 a gene is an example of a feature, or a domain within a protein. | 
|  | 311 | 
|  | 312   # the following methods return an array of SeqFeatureI objects: | 
|  | 313 | 
|  | 314   $seqobj->get_SeqFeatures # The 'top level' sequence features | 
|  | 315   $seqobj->get_all_SeqFeatures # All sequence features, including sub-seq | 
|  | 316                                # features, such as features in an exon | 
|  | 317 | 
|  | 318   # to find out the number of features use: | 
|  | 319 | 
|  | 320   $seqobj->feature_count | 
|  | 321 | 
|  | 322 Here are just some of the methods available to SeqFeatureI objects: | 
|  | 323 | 
|  | 324   # these methods return numbers: | 
|  | 325 | 
|  | 326   $feat->start          # start position (1 is the first base) | 
|  | 327   $feat->end            # end position (2 is the second base) | 
|  | 328   $feat->strand         # 1 means forward, -1 reverse, 0 not relevant | 
|  | 329 | 
|  | 330   # these methods return or accept strings: | 
|  | 331 | 
|  | 332   $feat->primary_tag    # the name of the sequence feature, eg | 
|  | 333                         # 'exon', 'glycoslyation site', 'TM domain' | 
|  | 334   $feat->source_tag     # where the feature comes from, eg, 'EMBL_GenBank', | 
|  | 335                         # or 'BLAST' | 
|  | 336 | 
|  | 337   # this method returns the more austere PrimarySeq object, not a | 
|  | 338   # Seq object - the main difference is that PrimarySeq objects do not | 
|  | 339   # themselves contain sequence features | 
|  | 340 | 
|  | 341   $feat->seq            # the sequence between start,end on the | 
|  | 342                         # correct strand of the sequence | 
|  | 343 | 
|  | 344 See L<Bio::PrimarySeq> for more details on PrimarySeq objects. | 
|  | 345 | 
|  | 346   # useful methods for feature comparisons, for start/end points | 
|  | 347 | 
|  | 348   $feat->overlaps($other)  # do $feat and $other overlap? | 
|  | 349   $feat->contains($other)  # is $other completely within $feat? | 
|  | 350   $feat->equals($other)    # do $feat and $other completely agree? | 
|  | 351 | 
|  | 352   # one can also add features | 
|  | 353 | 
|  | 354   $seqobj->add_SeqFeature($feat)     # returns 1 if successful | 
|  | 355   $seqobj->add_SeqFeature(@features) # returns 1 if successful | 
|  | 356 | 
|  | 357   # sub features. For complex join() statements, the feature | 
|  | 358   # is one sequence feature with many sub SeqFeatures | 
|  | 359 | 
|  | 360   $feat->sub_SeqFeature  # returns array of sub seq features | 
|  | 361 | 
|  | 362 Please see L<Bio::SeqFeatureI> and L<Bio::SeqFeature::Generic>, | 
|  | 363 for more information on sequence features. | 
|  | 364 | 
|  | 365 It is worth mentioning that one can also retrieve the start and end | 
|  | 366 positions of a feature using a Bio::LocationI object: | 
|  | 367 | 
|  | 368   $location = $feat->location # $location is a Bio::LocationI object | 
|  | 369   $location->start;           # start position | 
|  | 370   $location->end;             # end position | 
|  | 371 | 
|  | 372 This is useful because one needs a Bio::Location::SplitLocationI object | 
|  | 373 in order to retrieve the coordinates inside the Genbank or EMBL join() | 
|  | 374 statements (e.g. "CDS    join(51..142,273..495,1346..1474)"): | 
|  | 375 | 
|  | 376   if ( $feat->location->isa('Bio::Location::SplitLocationI') && | 
|  | 377 	       $feat->primary_tag eq 'CDS' )  { | 
|  | 378     foreach $loc ( $feat->location->sub_Location ) { | 
|  | 379       print $loc->start . ".." . $loc->end . "\n"; | 
|  | 380     } | 
|  | 381   } | 
|  | 382 | 
|  | 383 See L<Bio::LocationI> and L<Bio::Location::SplitLocationI> for more | 
|  | 384 information. | 
|  | 385 | 
|  | 386 =head1 Implemented Interfaces | 
|  | 387 | 
|  | 388 This class implements the following interfaces. | 
|  | 389 | 
|  | 390 =over 4 | 
|  | 391 | 
|  | 392 =item Bio::SeqI | 
|  | 393 | 
|  | 394 Note that this includes implementing Bio::PrimarySeqI. | 
|  | 395 | 
|  | 396 =item Bio::IdentifiableI | 
|  | 397 | 
|  | 398 =item Bio::DescribableI | 
|  | 399 | 
|  | 400 =item Bio::AnnotatableI | 
|  | 401 | 
|  | 402 =item Bio::FeatureHolderI | 
|  | 403 | 
|  | 404 =back | 
|  | 405 | 
|  | 406 =head1 FEEDBACK | 
|  | 407 | 
|  | 408 | 
|  | 409 =head2 Mailing Lists | 
|  | 410 | 
|  | 411 User feedback is an integral part of the evolution of this and other | 
|  | 412 Bioperl modules. Send your comments and suggestions preferably to one | 
|  | 413 of the Bioperl mailing lists. Your participation is much appreciated. | 
|  | 414 | 
|  | 415   bioperl-l@bioperl.org              - General discussion | 
|  | 416   http://bio.perl.org/MailList.html  - About the mailing lists | 
|  | 417 | 
|  | 418 =head2 Reporting Bugs | 
|  | 419 | 
|  | 420 Report bugs to the Bioperl bug tracking system to help us keep track | 
|  | 421 the bugs and their resolution.  Bug reports can be submitted via email | 
|  | 422 or the web: | 
|  | 423 | 
|  | 424   bioperl-bugs@bioperl.org | 
|  | 425   http://bugzilla.bioperl.org/ | 
|  | 426 | 
|  | 427 =head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects | 
|  | 428 | 
|  | 429 Email birney@ebi.ac.uk | 
|  | 430 | 
|  | 431 =head1 CONTRIBUTORS | 
|  | 432 | 
|  | 433 Jason Stajich E<lt>jason@bioperl.orgE<gt> | 
|  | 434 | 
|  | 435 =head1 APPENDIX | 
|  | 436 | 
|  | 437 | 
|  | 438 The rest of the documentation details each of the object | 
|  | 439 methods. Internal methods are usually preceded with a "_". | 
|  | 440 | 
|  | 441 =cut | 
|  | 442 | 
|  | 443 #' | 
|  | 444 # Let the code begin... | 
|  | 445 | 
|  | 446 | 
|  | 447 package Bio::Seq; | 
|  | 448 use vars qw(@ISA $VERSION); | 
|  | 449 use strict; | 
|  | 450 | 
|  | 451 | 
|  | 452 # Object preamble - inherits from Bio::Root::Object | 
|  | 453 | 
|  | 454 use Bio::Root::Root; | 
|  | 455 use Bio::SeqI; | 
|  | 456 use Bio::Annotation::Collection; | 
|  | 457 use Bio::PrimarySeq; | 
|  | 458 use Bio::IdentifiableI; | 
|  | 459 use Bio::DescribableI; | 
|  | 460 use Bio::AnnotatableI; | 
|  | 461 use Bio::FeatureHolderI; | 
|  | 462 | 
|  | 463 $VERSION = '1.1'; | 
|  | 464 @ISA = qw(Bio::Root::Root Bio::SeqI | 
|  | 465 	  Bio::IdentifiableI Bio::DescribableI | 
|  | 466 	  Bio::AnnotatableI Bio::FeatureHolderI); | 
|  | 467 | 
|  | 468 =head2 new | 
|  | 469 | 
|  | 470  Title   : new | 
|  | 471  Usage   : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT', | 
|  | 472                                  -id  => 'human_id', | 
|  | 473 				 -accession_number => 'AL000012', | 
|  | 474 			       ); | 
|  | 475 | 
|  | 476  Function: Returns a new Seq object from | 
|  | 477            basic constructors, being a string for the sequence | 
|  | 478            and strings for id and accession_number | 
|  | 479  Returns : a new Bio::Seq object | 
|  | 480 | 
|  | 481 =cut | 
|  | 482 | 
|  | 483 sub new { | 
|  | 484     my($caller,@args) = @_; | 
|  | 485 | 
|  | 486     if( $caller ne 'Bio::Seq') { | 
|  | 487 	$caller = ref($caller) if ref($caller); | 
|  | 488     } | 
|  | 489 | 
|  | 490     # we know our inherietance heirarchy | 
|  | 491     my $self = Bio::Root::Root->new(@args); | 
|  | 492     bless $self,$caller; | 
|  | 493 | 
|  | 494     # this is way too sneaky probably. We delegate the construction of | 
|  | 495     # the Seq object onto PrimarySeq and then pop primary_seq into | 
|  | 496     # our primary_seq slot | 
|  | 497 | 
|  | 498     my $pseq = Bio::PrimarySeq->new(@args); | 
|  | 499 | 
|  | 500     # as we have just made this, we know it is ok to set hash directly | 
|  | 501     # rather than going through the method | 
|  | 502 | 
|  | 503     $self->{'primary_seq'} = $pseq; | 
|  | 504 | 
|  | 505     # setting this array is now delayed until the final | 
|  | 506     # moment, again speed ups for non feature containing things | 
|  | 507     # $self->{'_as_feat'} = []; | 
|  | 508 | 
|  | 509 | 
|  | 510     my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args); | 
|  | 511 | 
|  | 512     # for a number of cases - reading fasta files - these are never set. This | 
|  | 513     # gives a quick optimisation around testing things later on | 
|  | 514 | 
|  | 515     if( defined $ann || defined $pid || defined $feat || defined $species ) { | 
|  | 516 	$pid && $self->primary_id($pid); | 
|  | 517 	$species && $self->species($species); | 
|  | 518 	$ann && $self->annotation($ann); | 
|  | 519 | 
|  | 520 	if( defined $feat ) { | 
|  | 521 	    if( ref($feat) !~ /ARRAY/i ) { | 
|  | 522 		if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) { | 
|  | 523 		    $self->add_SeqFeature($feat); | 
|  | 524 		} else { | 
|  | 525 		    $self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self)); | 
|  | 526 		} | 
|  | 527 	    } else { | 
|  | 528 		foreach my $feature ( @$feat ) { | 
|  | 529 		    $self->add_SeqFeature($feature); | 
|  | 530 		} | 
|  | 531 	    } | 
|  | 532 	} | 
|  | 533     } | 
|  | 534 | 
|  | 535     return $self; | 
|  | 536 } | 
|  | 537 | 
|  | 538 =head1 PrimarySeq interface | 
|  | 539 | 
|  | 540 | 
|  | 541 The PrimarySeq interface provides the basic sequence getting | 
|  | 542 and setting methods for on all sequences. | 
|  | 543 | 
|  | 544 These methods implement the Bio::PrimarySeq interface by delegating | 
|  | 545 to the primary_seq inside the object. This means that you | 
|  | 546 can use a Seq object wherever there is a PrimarySeq, and | 
|  | 547 of course, you are free to use these functions anyway. | 
|  | 548 | 
|  | 549 =cut | 
|  | 550 | 
|  | 551 =head2 seq | 
|  | 552 | 
|  | 553  Title   : seq | 
|  | 554  Usage   : $string = $obj->seq() | 
|  | 555  Function: Get/Set the sequence as a string of letters. The | 
|  | 556            case of the letters is left up to the implementer. | 
|  | 557            Suggested cases are upper case for proteins and lower case for | 
|  | 558            DNA sequence (IUPAC standard), | 
|  | 559            but implementations are suggested to keep an open mind about | 
|  | 560            case (some users... want mixed case!) | 
|  | 561  Returns : A scalar | 
|  | 562  Args    : Optionally on set the new value (a string). An optional second | 
|  | 563            argument presets the alphabet (otherwise it will be guessed). | 
|  | 564            Both parameters may also be given in named paramater style | 
|  | 565            with -seq and -alphabet being the names. | 
|  | 566 | 
|  | 567 =cut | 
|  | 568 | 
|  | 569 sub seq { | 
|  | 570     return shift->primary_seq()->seq(@_); | 
|  | 571 } | 
|  | 572 | 
|  | 573 =head2 validate_seq | 
|  | 574 | 
|  | 575  Title   : validate_seq | 
|  | 576  Usage   : if(! $seq->validate_seq($seq_str) ) { | 
|  | 577                 print "sequence $seq_str is not valid for an object of type ", | 
|  | 578 		      ref($seq), "\n"; | 
|  | 579 	   } | 
|  | 580  Function: Validates a given sequence string. A validating sequence string | 
|  | 581            must be accepted by seq(). A string that does not validate will | 
|  | 582            lead to an exception if passed to seq(). | 
|  | 583 | 
|  | 584            The implementation provided here does not take alphabet() into | 
|  | 585            account. Allowed are all letters (A-Z) and '-','.', and '*'. | 
|  | 586 | 
|  | 587  Example : | 
|  | 588  Returns : 1 if the supplied sequence string is valid for the object, and | 
|  | 589            0 otherwise. | 
|  | 590  Args    : The sequence string to be validated. | 
|  | 591 | 
|  | 592 | 
|  | 593 =cut | 
|  | 594 | 
|  | 595 sub validate_seq { | 
|  | 596     return shift->primary_seq()->validate_seq(@_); | 
|  | 597 } | 
|  | 598 | 
|  | 599 =head2 length | 
|  | 600 | 
|  | 601  Title   : length | 
|  | 602  Usage   : $len = $seq->length() | 
|  | 603  Function: | 
|  | 604  Example : | 
|  | 605  Returns : Integer representing the length of the sequence. | 
|  | 606  Args    : None | 
|  | 607 | 
|  | 608 =cut | 
|  | 609 | 
|  | 610 sub length { | 
|  | 611     return shift->primary_seq()->length(@_); | 
|  | 612 } | 
|  | 613 | 
|  | 614 =head1 Methods from the Bio::PrimarySeqI interface | 
|  | 615 | 
|  | 616 =cut | 
|  | 617 | 
|  | 618 =head2 subseq | 
|  | 619 | 
|  | 620  Title   : subseq | 
|  | 621  Usage   : $substring = $obj->subseq(10,40); | 
|  | 622  Function: Returns the subseq from start to end, where the first base | 
|  | 623            is 1 and the number is inclusive, ie 1-2 are the first two | 
|  | 624            bases of the sequence | 
|  | 625 | 
|  | 626            Start cannot be larger than end but can be equal | 
|  | 627 | 
|  | 628  Returns : A string | 
|  | 629  Args    : 2 integers | 
|  | 630 | 
|  | 631 | 
|  | 632 =cut | 
|  | 633 | 
|  | 634 sub subseq { | 
|  | 635     return shift->primary_seq()->subseq(@_); | 
|  | 636 } | 
|  | 637 | 
|  | 638 =head2 display_id | 
|  | 639 | 
|  | 640  Title   : display_id | 
|  | 641  Usage   : $id = $obj->display_id or $obj->display_id($newid); | 
|  | 642  Function: Gets or sets the display id, also known as the common name of | 
|  | 643            the Seq object. | 
|  | 644 | 
|  | 645            The semantics of this is that it is the most likely string | 
|  | 646            to be used as an identifier of the sequence, and likely to | 
|  | 647            have "human" readability.  The id is equivalent to the LOCUS | 
|  | 648            field of the GenBank/EMBL databanks and the ID field of the | 
|  | 649            Swissprot/sptrembl database. In fasta format, the >(\S+) is | 
|  | 650            presumed to be the id, though some people overload the id | 
|  | 651            to embed other information. Bioperl does not use any | 
|  | 652            embedded information in the ID field, and people are | 
|  | 653            encouraged to use other mechanisms (accession field for | 
|  | 654            example, or extending the sequence object) to solve this. | 
|  | 655 | 
|  | 656            Notice that $seq->id() maps to this function, mainly for | 
|  | 657            legacy/convenience issues. | 
|  | 658  Returns : A string | 
|  | 659  Args    : None or a new id | 
|  | 660 | 
|  | 661 | 
|  | 662 =cut | 
|  | 663 | 
|  | 664 sub display_id { | 
|  | 665    return shift->primary_seq->display_id(@_); | 
|  | 666 } | 
|  | 667 | 
|  | 668 | 
|  | 669 | 
|  | 670 =head2 accession_number | 
|  | 671 | 
|  | 672  Title   : accession_number | 
|  | 673  Usage   : $unique_biological_key = $obj->accession_number; | 
|  | 674  Function: Returns the unique biological id for a sequence, commonly | 
|  | 675            called the accession_number. For sequences from established | 
|  | 676            databases, the implementors should try to use the correct | 
|  | 677            accession number. Notice that primary_id() provides the | 
|  | 678            unique id for the implemetation, allowing multiple objects | 
|  | 679            to have the same accession number in a particular implementation. | 
|  | 680 | 
|  | 681            For sequences with no accession number, this method should return | 
|  | 682            "unknown". | 
|  | 683 | 
|  | 684            Can also be used to set the accession number. | 
|  | 685  Example : $key = $seq->accession_number or $seq->accession_number($key) | 
|  | 686  Returns : A string | 
|  | 687  Args    : None or an accession number | 
|  | 688 | 
|  | 689 | 
|  | 690 =cut | 
|  | 691 | 
|  | 692 sub accession_number { | 
|  | 693    return shift->primary_seq->accession_number(@_); | 
|  | 694 } | 
|  | 695 | 
|  | 696 =head2 desc | 
|  | 697 | 
|  | 698  Title   : desc | 
|  | 699  Usage   : $seqobj->desc($string) or $seqobj->desc() | 
|  | 700  Function: Sets or gets the description of the sequence | 
|  | 701  Example : | 
|  | 702  Returns : The description | 
|  | 703  Args    : The description or none | 
|  | 704 | 
|  | 705 | 
|  | 706 =cut | 
|  | 707 | 
|  | 708 sub desc { | 
|  | 709    return shift->primary_seq->desc(@_); | 
|  | 710 } | 
|  | 711 | 
|  | 712 =head2 primary_id | 
|  | 713 | 
|  | 714  Title   : primary_id | 
|  | 715  Usage   : $unique_implementation_key = $obj->primary_id; | 
|  | 716  Function: Returns the unique id for this object in this | 
|  | 717            implementation. This allows implementations to manage | 
|  | 718            their own object ids in a way the implementation can control | 
|  | 719            clients can expect one id to map to one object. | 
|  | 720 | 
|  | 721            For sequences with no natural id, this method should return | 
|  | 722            a stringified memory location. | 
|  | 723 | 
|  | 724            Can also be used to set the primary_id. | 
|  | 725 | 
|  | 726            Also notice that this method is not delegated to the | 
|  | 727            internal Bio::PrimarySeq object | 
|  | 728 | 
|  | 729            [Note this method name is likely to change in 1.3] | 
|  | 730 | 
|  | 731  Example : $id = $seq->primary_id or $seq->primary_id($id) | 
|  | 732  Returns : A string | 
|  | 733  Args    : None or an id | 
|  | 734 | 
|  | 735 | 
|  | 736 =cut | 
|  | 737 | 
|  | 738 sub primary_id { | 
|  | 739    my ($obj,$value) = @_; | 
|  | 740 | 
|  | 741    if( defined $value) { | 
|  | 742       $obj->{'primary_id'} = $value; | 
|  | 743     } | 
|  | 744    if( ! exists $obj->{'primary_id'} ) { | 
|  | 745        return "$obj"; | 
|  | 746    } | 
|  | 747    return $obj->{'primary_id'}; | 
|  | 748 } | 
|  | 749 | 
|  | 750 =head2 can_call_new | 
|  | 751 | 
|  | 752  Title   : can_call_new | 
|  | 753  Usage   : if ( $obj->can_call_new ) { | 
|  | 754              $newobj = $obj->new( %param ); | 
|  | 755 	   } | 
|  | 756  Function: can_call_new returns 1 or 0 depending | 
|  | 757            on whether an implementation allows new | 
|  | 758            constructor to be called. If a new constructor | 
|  | 759            is allowed, then it should take the followed hashed | 
|  | 760            constructor list. | 
|  | 761 | 
|  | 762            $myobject->new( -seq => $sequence_as_string, | 
|  | 763 			   -display_id  => $id | 
|  | 764 			   -accession_number => $accession | 
|  | 765 			   -alphabet => 'dna', | 
|  | 766 			 ); | 
|  | 767  Example : | 
|  | 768  Returns : 1 or 0 | 
|  | 769  Args    : None | 
|  | 770 | 
|  | 771 | 
|  | 772 =cut | 
|  | 773 | 
|  | 774 sub can_call_new { | 
|  | 775     return 1; | 
|  | 776 } | 
|  | 777 | 
|  | 778 =head2 alphabet | 
|  | 779 | 
|  | 780  Title   : alphabet | 
|  | 781  Usage   : if ( $obj->alphabet eq 'dna' ) { /Do Something/ } | 
|  | 782  Function: Returns the type of sequence being one of | 
|  | 783            'dna', 'rna' or 'protein'. This is case sensitive. | 
|  | 784 | 
|  | 785            This is not called <type> because this would cause | 
|  | 786            upgrade problems from the 0.5 and earlier Seq objects. | 
|  | 787 | 
|  | 788  Returns : A string either 'dna','rna','protein'. NB - the object must | 
|  | 789            make a call of the type - if there is no type specified it | 
|  | 790            has to guess. | 
|  | 791  Args    : None | 
|  | 792 | 
|  | 793 | 
|  | 794 =cut | 
|  | 795 | 
|  | 796 sub alphabet { | 
|  | 797    my $self = shift; | 
|  | 798    return $self->primary_seq->alphabet(@_) if @_ && defined $_[0]; | 
|  | 799    return $self->primary_seq->alphabet(); | 
|  | 800 } | 
|  | 801 | 
|  | 802 sub is_circular { shift->primary_seq->is_circular } | 
|  | 803 | 
|  | 804 =head1 Methods for Bio::IdentifiableI compliance | 
|  | 805 | 
|  | 806 =cut | 
|  | 807 | 
|  | 808 =head2 object_id | 
|  | 809 | 
|  | 810  Title   : object_id | 
|  | 811  Usage   : $string    = $obj->object_id() | 
|  | 812  Function: a string which represents the stable primary identifier | 
|  | 813            in this namespace of this object. For DNA sequences this | 
|  | 814            is its accession_number, similarly for protein sequences | 
|  | 815 | 
|  | 816            This is aliased to accession_number(). | 
|  | 817  Returns : A scalar | 
|  | 818 | 
|  | 819 | 
|  | 820 =cut | 
|  | 821 | 
|  | 822 sub object_id { | 
|  | 823     return shift->accession_number(@_); | 
|  | 824 } | 
|  | 825 | 
|  | 826 =head2 version | 
|  | 827 | 
|  | 828  Title   : version | 
|  | 829  Usage   : $version    = $obj->version() | 
|  | 830  Function: a number which differentiates between versions of | 
|  | 831            the same object. Higher numbers are considered to be | 
|  | 832            later and more relevant, but a single object described | 
|  | 833            the same identifier should represent the same concept | 
|  | 834 | 
|  | 835  Returns : A number | 
|  | 836 | 
|  | 837 =cut | 
|  | 838 | 
|  | 839 sub version{ | 
|  | 840     return shift->primary_seq->version(@_); | 
|  | 841 } | 
|  | 842 | 
|  | 843 | 
|  | 844 =head2 authority | 
|  | 845 | 
|  | 846  Title   : authority | 
|  | 847  Usage   : $authority    = $obj->authority() | 
|  | 848  Function: a string which represents the organisation which | 
|  | 849            granted the namespace, written as the DNS name for | 
|  | 850            organisation (eg, wormbase.org) | 
|  | 851 | 
|  | 852  Returns : A scalar | 
|  | 853 | 
|  | 854 =cut | 
|  | 855 | 
|  | 856 sub authority { | 
|  | 857     return shift->primary_seq()->authority(@_); | 
|  | 858 } | 
|  | 859 | 
|  | 860 =head2 namespace | 
|  | 861 | 
|  | 862  Title   : namespace | 
|  | 863  Usage   : $string    = $obj->namespace() | 
|  | 864  Function: A string representing the name space this identifier | 
|  | 865            is valid in, often the database name or the name | 
|  | 866            describing the collection | 
|  | 867 | 
|  | 868  Returns : A scalar | 
|  | 869 | 
|  | 870 | 
|  | 871 =cut | 
|  | 872 | 
|  | 873 sub namespace{ | 
|  | 874     return shift->primary_seq()->namespace(@_); | 
|  | 875 } | 
|  | 876 | 
|  | 877 =head1 Methods for Bio::DescribableI compliance | 
|  | 878 | 
|  | 879 =cut | 
|  | 880 | 
|  | 881 =head2 display_name | 
|  | 882 | 
|  | 883  Title   : display_name | 
|  | 884  Usage   : $string    = $obj->display_name() | 
|  | 885  Function: A string which is what should be displayed to the user | 
|  | 886            the string should have no spaces (ideally, though a cautious | 
|  | 887            user of this interface would not assumme this) and should be | 
|  | 888            less than thirty characters (though again, double checking | 
|  | 889            this is a good idea) | 
|  | 890 | 
|  | 891            This is aliased to display_id(). | 
|  | 892  Returns : A scalar | 
|  | 893 | 
|  | 894 =cut | 
|  | 895 | 
|  | 896 sub display_name { | 
|  | 897     return shift->display_id(@_); | 
|  | 898 } | 
|  | 899 | 
|  | 900 =head2 description | 
|  | 901 | 
|  | 902  Title   : description | 
|  | 903  Usage   : $string    = $obj->description() | 
|  | 904  Function: A text string suitable for displaying to the user a | 
|  | 905            description. This string is likely to have spaces, but | 
|  | 906            should not have any newlines or formatting - just plain | 
|  | 907            text. The string should not be greater than 255 characters | 
|  | 908            and clients can feel justified at truncating strings at 255 | 
|  | 909            characters for the purposes of display | 
|  | 910 | 
|  | 911            This is aliased to desc(). | 
|  | 912  Returns : A scalar | 
|  | 913 | 
|  | 914 =cut | 
|  | 915 | 
|  | 916 sub description { | 
|  | 917     return shift->desc(@_); | 
|  | 918 } | 
|  | 919 | 
|  | 920 =head1 Methods for implementing Bio::AnnotatableI | 
|  | 921 | 
|  | 922 =cut | 
|  | 923 | 
|  | 924 =head2 annotation | 
|  | 925 | 
|  | 926  Title   : annotation | 
|  | 927  Usage   : $ann = $seq->annotation or $seq->annotation($annotation) | 
|  | 928  Function: Gets or sets the annotation | 
|  | 929  Returns : L<Bio::AnnotationCollectionI> object | 
|  | 930  Args    : None or L<Bio::AnnotationCollectionI> object | 
|  | 931 | 
|  | 932 See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection> | 
|  | 933 for more information | 
|  | 934 | 
|  | 935 =cut | 
|  | 936 | 
|  | 937 sub annotation { | 
|  | 938     my ($obj,$value) = @_; | 
|  | 939     if( defined $value ) { | 
|  | 940 	$obj->throw("object of class ".ref($value)." does not implement ". | 
|  | 941 		    "Bio::AnnotationCollectionI. Too bad.") | 
|  | 942 	    unless $value->isa("Bio::AnnotationCollectionI"); | 
|  | 943 	$obj->{'_annotation'} = $value; | 
|  | 944     } elsif( ! defined $obj->{'_annotation'}) { | 
|  | 945 	$obj->{'_annotation'} = new Bio::Annotation::Collection; | 
|  | 946     } | 
|  | 947     return $obj->{'_annotation'}; | 
|  | 948 } | 
|  | 949 | 
|  | 950 =head1 Methods to implement Bio::FeatureHolderI | 
|  | 951 | 
|  | 952 This includes methods for retrieving, adding, and removing features. | 
|  | 953 | 
|  | 954 =cut | 
|  | 955 | 
|  | 956 =head2 get_SeqFeatures | 
|  | 957 | 
|  | 958  Title   : get_SeqFeatures | 
|  | 959  Usage   : | 
|  | 960  Function: Get the feature objects held by this feature holder. | 
|  | 961 | 
|  | 962            Features which are not top-level are subfeatures of one or | 
|  | 963            more of the returned feature objects, which means that you | 
|  | 964            must traverse the subfeature arrays of each top-level | 
|  | 965            feature object in order to traverse all features associated | 
|  | 966            with this sequence. | 
|  | 967 | 
|  | 968            Use get_all_SeqFeatures() if you want the feature tree | 
|  | 969            flattened into one single array. | 
|  | 970 | 
|  | 971  Example : | 
|  | 972  Returns : an array of Bio::SeqFeatureI implementing objects | 
|  | 973  Args    : none | 
|  | 974 | 
|  | 975 At some day we may want to expand this method to allow for a feature | 
|  | 976 filter to be passed in. | 
|  | 977 | 
|  | 978 =cut | 
|  | 979 | 
|  | 980 sub get_SeqFeatures{ | 
|  | 981    my $self = shift; | 
|  | 982 | 
|  | 983    if( !defined $self->{'_as_feat'} ) { | 
|  | 984        $self->{'_as_feat'} = []; | 
|  | 985    } | 
|  | 986 | 
|  | 987    return @{$self->{'_as_feat'}}; | 
|  | 988 } | 
|  | 989 | 
|  | 990 =head2 get_all_SeqFeatures | 
|  | 991 | 
|  | 992  Title   : get_all_SeqFeatures | 
|  | 993  Usage   : @feat_ary = $seq->get_all_SeqFeatures(); | 
|  | 994  Function: Returns the tree of feature objects attached to this | 
|  | 995            sequence object flattened into one single array. Top-level | 
|  | 996            features will still contain their subfeature-arrays, which | 
|  | 997            means that you will encounter subfeatures twice if you | 
|  | 998            traverse the subfeature tree of the returned objects. | 
|  | 999 | 
|  | 1000            Use get_SeqFeatures() if you want the array to contain only | 
|  | 1001            the top-level features. | 
|  | 1002 | 
|  | 1003  Returns : An array of Bio::SeqFeatureI implementing objects. | 
|  | 1004  Args    : None | 
|  | 1005 | 
|  | 1006 | 
|  | 1007 =cut | 
|  | 1008 | 
|  | 1009 # this implementation is inherited from FeatureHolderI | 
|  | 1010 | 
|  | 1011 =head2 feature_count | 
|  | 1012 | 
|  | 1013  Title   : feature_count | 
|  | 1014  Usage   : $seq->feature_count() | 
|  | 1015  Function: Return the number of SeqFeatures attached to a sequence | 
|  | 1016  Returns : integer representing the number of SeqFeatures | 
|  | 1017  Args    : None | 
|  | 1018 | 
|  | 1019 | 
|  | 1020 =cut | 
|  | 1021 | 
|  | 1022 sub feature_count { | 
|  | 1023     my ($self) = @_; | 
|  | 1024 | 
|  | 1025     if (defined($self->{'_as_feat'})) { | 
|  | 1026 	return ($#{$self->{'_as_feat'}} + 1); | 
|  | 1027     } else { | 
|  | 1028 	return 0; | 
|  | 1029     } | 
|  | 1030 } | 
|  | 1031 | 
|  | 1032 =head2 add_SeqFeature | 
|  | 1033 | 
|  | 1034  Title   : add_SeqFeature | 
|  | 1035  Usage   : $seq->add_SeqFeature($feat); | 
|  | 1036            $seq->add_SeqFeature(@feat); | 
|  | 1037  Function: Adds the given feature object (or each of an array of feature | 
|  | 1038            objects to the feature array of this | 
|  | 1039            sequence. The object passed is required to implement the | 
|  | 1040            Bio::SeqFeatureI interface. | 
|  | 1041  Returns : 1 on success | 
|  | 1042  Args    : A Bio::SeqFeatureI implementing object, or an array of such objects. | 
|  | 1043 | 
|  | 1044 | 
|  | 1045 =cut | 
|  | 1046 | 
|  | 1047 sub add_SeqFeature { | 
|  | 1048    my ($self,@feat) = @_; | 
|  | 1049 | 
|  | 1050    $self->{'_as_feat'} = [] unless $self->{'_as_feat'}; | 
|  | 1051 | 
|  | 1052    foreach my $feat ( @feat ) { | 
|  | 1053        if( !$feat->isa("Bio::SeqFeatureI") ) { | 
|  | 1054 	   $self->throw("$feat is not a SeqFeatureI and that's what we expect..."); | 
|  | 1055        } | 
|  | 1056 | 
|  | 1057        # make sure we attach ourselves to the feature if the feature wants it | 
|  | 1058        my $aseq = $self->primary_seq; | 
|  | 1059        $feat->attach_seq($aseq) if $aseq; | 
|  | 1060 | 
|  | 1061        push(@{$self->{'_as_feat'}},$feat); | 
|  | 1062    } | 
|  | 1063    return 1; | 
|  | 1064 } | 
|  | 1065 | 
|  | 1066 =head2 remove_SeqFeatures | 
|  | 1067 | 
|  | 1068  Title   : remove_SeqFeatures | 
|  | 1069  Usage   : $seq->remove_SeqFeatures(); | 
|  | 1070  Function: Flushes all attached SeqFeatureI objects. | 
|  | 1071 | 
|  | 1072            To remove individual feature objects, delete those from the returned | 
|  | 1073            array and re-add the rest. | 
|  | 1074  Example : | 
|  | 1075  Returns : The array of Bio::SeqFeatureI objects removed from this seq. | 
|  | 1076  Args    : None | 
|  | 1077 | 
|  | 1078 | 
|  | 1079 =cut | 
|  | 1080 | 
|  | 1081 sub remove_SeqFeatures { | 
|  | 1082     my $self = shift; | 
|  | 1083 | 
|  | 1084     return () unless $self->{'_as_feat'}; | 
|  | 1085     my @feats = @{$self->{'_as_feat'}}; | 
|  | 1086     $self->{'_as_feat'} = []; | 
|  | 1087     return @feats; | 
|  | 1088 } | 
|  | 1089 | 
|  | 1090 =head1 Methods provided in the Bio::PrimarySeqI interface | 
|  | 1091 | 
|  | 1092 | 
|  | 1093 These methods are inherited from the PrimarySeq interface | 
|  | 1094 and work as one expects, building new Bio::Seq objects | 
|  | 1095 or other information as expected. See L<Bio::PrimarySeq> | 
|  | 1096 for more information. | 
|  | 1097 | 
|  | 1098 Sequence Features are B<not> transfered to the new objects. | 
|  | 1099 This is possibly a mistake. Anyone who feels the urge in | 
|  | 1100 dealing with this is welcome to give it a go. | 
|  | 1101 | 
|  | 1102 =head2 revcom | 
|  | 1103 | 
|  | 1104  Title   : revcom | 
|  | 1105  Usage   : $rev = $seq->revcom() | 
|  | 1106  Function: Produces a new Bio::Seq object which | 
|  | 1107            is the reversed complement of the sequence. For protein | 
|  | 1108            sequences this throws an exception of "Sequence is a protein. | 
|  | 1109            Cannot revcom" | 
|  | 1110 | 
|  | 1111            The id is the same id as the original sequence, and the | 
|  | 1112            accession number is also identical. If someone wants to track | 
|  | 1113            that this sequence has be reversed, it needs to define its own | 
|  | 1114            extensions | 
|  | 1115 | 
|  | 1116            To do an in-place edit of an object you can go: | 
|  | 1117 | 
|  | 1118            $seq = $seq->revcom(); | 
|  | 1119 | 
|  | 1120            This of course, causes Perl to handle the garbage collection of | 
|  | 1121            the old object, but it is roughly speaking as efficient as an | 
|  | 1122            in-place edit. | 
|  | 1123 | 
|  | 1124  Returns : A new (fresh) Bio::Seq object | 
|  | 1125  Args    : None | 
|  | 1126 | 
|  | 1127 | 
|  | 1128 =cut | 
|  | 1129 | 
|  | 1130 =head2 trunc | 
|  | 1131 | 
|  | 1132  Title   : trunc | 
|  | 1133  Usage   : $subseq = $myseq->trunc(10,100); | 
|  | 1134  Function: Provides a truncation of a sequence | 
|  | 1135 | 
|  | 1136  Example : | 
|  | 1137  Returns : A fresh Seq object | 
|  | 1138  Args    : A Seq object | 
|  | 1139 | 
|  | 1140 | 
|  | 1141 =cut | 
|  | 1142 | 
|  | 1143 =head2 id | 
|  | 1144 | 
|  | 1145  Title   : id | 
|  | 1146  Usage   : $id = $seq->id() | 
|  | 1147  Function: This is mapped on display_id | 
|  | 1148  Returns : value of display_id() | 
|  | 1149  Args    : [optional] value to update display_id | 
|  | 1150 | 
|  | 1151 | 
|  | 1152 =cut | 
|  | 1153 | 
|  | 1154 sub  id { | 
|  | 1155     return shift->display_id(@_); | 
|  | 1156 } | 
|  | 1157 | 
|  | 1158 | 
|  | 1159 =head1 Seq only methods | 
|  | 1160 | 
|  | 1161 | 
|  | 1162 These methods are specific to the Bio::Seq object, and not | 
|  | 1163 found on the Bio::PrimarySeq object | 
|  | 1164 | 
|  | 1165 =head2 primary_seq | 
|  | 1166 | 
|  | 1167  Title   : primary_seq | 
|  | 1168  Usage   : $seq->primary_seq or $seq->primary_seq($newval) | 
|  | 1169  Function: Get or set a PrimarySeq object | 
|  | 1170  Example : | 
|  | 1171  Returns : PrimarySeq object | 
|  | 1172  Args    : None or PrimarySeq object | 
|  | 1173 | 
|  | 1174 | 
|  | 1175 =cut | 
|  | 1176 | 
|  | 1177 sub primary_seq { | 
|  | 1178    my ($obj,$value) = @_; | 
|  | 1179 | 
|  | 1180    if( defined $value) { | 
|  | 1181        if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) { | 
|  | 1182 	   $obj->throw("$value is not a Bio::PrimarySeq compliant object"); | 
|  | 1183        } | 
|  | 1184 | 
|  | 1185        $obj->{'primary_seq'} = $value; | 
|  | 1186        # descend down over all seqfeature objects, seeing whether they | 
|  | 1187        # want an attached seq. | 
|  | 1188 | 
|  | 1189        foreach my $sf ( $obj->get_SeqFeatures() ) { | 
|  | 1190 	   $sf->attach_seq($value); | 
|  | 1191        } | 
|  | 1192 | 
|  | 1193    } | 
|  | 1194    return $obj->{'primary_seq'}; | 
|  | 1195 | 
|  | 1196 } | 
|  | 1197 | 
|  | 1198 =head2 species | 
|  | 1199 | 
|  | 1200  Title   : species | 
|  | 1201  Usage   : $species = $seq->species() or $seq->species($species) | 
|  | 1202  Function: Gets or sets the species | 
|  | 1203  Returns : L<Bio::Species> object | 
|  | 1204  Args    : None or L<Bio::Species> object | 
|  | 1205 | 
|  | 1206 See L<Bio::Species> for more information | 
|  | 1207 | 
|  | 1208 =cut | 
|  | 1209 | 
|  | 1210 sub species { | 
|  | 1211     my ($self, $species) = @_; | 
|  | 1212     if ($species) { | 
|  | 1213         $self->{'species'} = $species; | 
|  | 1214     } else { | 
|  | 1215         return $self->{'species'}; | 
|  | 1216     } | 
|  | 1217 } | 
|  | 1218 | 
|  | 1219 =head1 Internal methods | 
|  | 1220 | 
|  | 1221 =cut | 
|  | 1222 | 
|  | 1223 # keep AUTOLOAD happy | 
|  | 1224 sub DESTROY { } | 
|  | 1225 | 
|  | 1226 ############################################################################ | 
|  | 1227 # aliases due to name changes or to compensate for our lack of consistency # | 
|  | 1228 ############################################################################ | 
|  | 1229 | 
|  | 1230 # in all other modules we use the object in the singular -- | 
|  | 1231 # lack of consistency sucks | 
|  | 1232 *flush_SeqFeature = \&remove_SeqFeatures; | 
|  | 1233 *flush_SeqFeatures = \&remove_SeqFeatures; | 
|  | 1234 | 
|  | 1235 # this is now get_SeqFeatures() (from FeatureHolderI) | 
|  | 1236 *top_SeqFeatures = \&get_SeqFeatures; | 
|  | 1237 | 
|  | 1238 # this is now get_all_SeqFeatures() in FeatureHolderI | 
|  | 1239 sub all_SeqFeatures{ | 
|  | 1240     return shift->get_all_SeqFeatures(@_); | 
|  | 1241 } | 
|  | 1242 | 
|  | 1243 sub accession { | 
|  | 1244     my $self = shift; | 
|  | 1245     $self->warn(ref($self)."::accession is deprecated, ". | 
|  | 1246 		"use accession_number() instead"); | 
|  | 1247     return $self->accession_number(@_); | 
|  | 1248 } | 
|  | 1249 | 
|  | 1250 1; |