ensembl: variant_effect_predictor/Bio/Seq.pm comparison

comparison variant_effect_predictor/Bio/Seq.pm @ 0:1f6dce3d34e0

Uploaded

author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:1f6dce3d34e0
+# $Id: Seq.pm,v 1.76.2.2 2003/07/03 20:01:32 jason Exp $
+#
+# BioPerl module for Bio::Seq
+#
+# Cared for by Ewan Birney <birney@ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+=head1 NAME
+Bio::Seq - Sequence object, with features
+=head1 SYNOPSIS
+# This is the main sequence object in Bioperl
+# gets a sequence from a file
+$seqio  = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat');
+$seqobj = $seqio->next_seq();
+# SeqIO can both read and write sequences; see Bio::SeqIO
+# for more information and examples
+# get from database
+$db = Bio::DB::GenBank->new();
+$seqobj = $db->get_Seq_by_acc('X78121');
+# make from strings in script
+$seqobj = Bio::Seq->new( -display_id => 'my_id',
+			     -seq => $sequence_as_string);
+# gets sequence as a string from sequence object
+$seqstr   = $seqobj->seq(); # actual sequence as a string
+$seqstr   = $seqobj->subseq(10,50); # slice in biological coordinates
+# retrieves information from the sequence
+# features must implement Bio::SeqFeatureI interface
+@features = $seqobj->get_SeqFeatures(); # just top level
+foreach my $feat ( @features ) {
+	print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ",
+	$feat->end," strand ",$feat->strand,"\n";
+# features retain link to underlying sequence object
+print "Feature sequence is ",$feat->seq->seq(),"\n"
+}
+# sequences may have a species
+if( defined $seq->species ) {
+	print "Sequence is from ",$species->binomial_name," [",$species->common_name,"]\n";
+}
+# annotation objects are Bio::AnnotationCollectionI's
+$ann      = $seqobj->annotation(); # annotation object
+# references is one type of annotations to get. Also get
+# comment and dblink. Look at Bio::AnnotationCollection for
+# more information
+foreach my $ref ( $ann->get_Annotations('reference') ) {
+	print "Reference ",$ref->title,"\n";
+}
+# you can get truncations, translations and reverse complements, these
+# all give back Bio::Seq objects themselves, though currently with no
+# features transfered
+my $trunc = $seqobj->trunc(100,200);
+my $rev   = $seqobj->revcom();
+# there are many options to translate - check out the docs
+my $trans = $seqobj->translate();
+# these functions can be chained together
+my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate();
+=head1 DESCRIPTION
+A Seq object is a sequence with sequence features placed on it. The
+Seq object contains a PrimarySeq object for the actual sequence and
+also implements its interface.
+In Bioperl we have 3 main players that people are going to use frequently
+Bio::PrimarySeq  - just the sequence and its names, nothing else.
+Bio::SeqFeatureI - a location on a sequence, potentially with a sequence
+and annotation.
+Bio::Seq         - A sequence and a collection of sequence features
+(an aggregate) with its own annotation.
+Although Bioperl is not tied heavily to file formats these distinctions do
+map to file formats sensibly and for some bioinformaticians this might help
+Bio::PrimarySeq  - Fasta file of a sequence
+Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table
+Bio::Seq         - A single EMBL/GenBank/DDBJ entry
+By having this split we avoid a lot of nasty circular references
+(sequence features can hold a reference to a sequence without the sequence
+holding a reference to the sequence feature). See L<Bio::PrimarySeq> and
+L<Bio::SeqFeatureI> for more information.
+Ian Korf really helped in the design of the Seq and SeqFeature system.
+=head1 EXAMPLES
+A simple and fundamental block of code
+use Bio::SeqIO;
+my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object
+my $seqobj = $seqIOobj->next_seq;              # get a Seq object
+With the Seq object in hand one has access to a powerful set of Bioperl
+methods and Bioperl objects. This next script will take a file of sequences
+in EMBL format and create a file of the reverse-complemented sequences
+in Fasta format using Seq objects. It also prints out details about the
+exons it finds as sequence features in Genbank Flat File format.
+use Bio::Seq;
+use Bio::SeqIO;
+$seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+$seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+while((my $seqobj = $seqin->next_seq())) {
+print "Seen sequence ",$seqobj->display_id,", start of seq ",
+substr($seqobj->seq,1,10),"\n";
+if( $seqobj->alphabet eq 'dna') {
+	    $rev = $seqobj->revcom;
+	    $id  = $seqobj->display_id();
+$id  = "$id.rev";
+$rev->display_id($id);
+$seqout->write_seq($rev);
+}
+foreach $feat ( $seqobj->get_SeqFeatures() ) {
+if( $feat->primary_tag eq 'exon' ) {
+print STDOUT "Location ",$feat->start,":",
+$feat->end," GFF[",$feat->gff_string,"]\n";
+	   }
+}
+}
+Let's examine the script. The lines below import the Bioperl modules.
+Seq is the main Bioperl sequence object and SeqIO is the Bioperl support
+for reading sequences from files and to files
+use Bio::Seq;
+use Bio::SeqIO;
+These two lines create two SeqIO streams: one for reading in sequences
+and one for outputting sequences:
+$seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+$seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+Notice that in the "$seqout" case there is a greater-than sign,
+indicating the file is being opened for writing.
+Using the
+'-argument' => value
+syntax is common in Bioperl. The file argument is like an argument
+to open() . You can also pass in filehandles or FileHandle objects by
+using the -fh argument (see L<Bio::SeqIO> documentation for details).
+Many formats in Bioperl are handled, including Fasta, EMBL, GenBank,
+Swissprot (swiss), PIR, and GCG.
+$seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+$seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+This is the main loop which will loop progressively through sequences
+in a file, and each call to $seqio-E<gt>next_seq() provides a new Seq
+object from the file:
+while((my $seqobj = $seqio->next_seq())) {
+This print line below accesses fields in the Seq object directly. The
+$seqobj-E<gt>display_id is the way to access the display_id attribute
+of the Seq object. The $seqobj-E<gt>seq method gets the actual
+sequence out as string. Then you can do manipulation of this if
+you want to (there are however easy ways of doing truncation,
+reverse-complement and translation).
+print "Seen sequence ",$seqobj->display_id,", start of seq ",
+substr($seqobj->seq,1,10),"\n";
+Bioperl has to guess the alphabet of the sequence, being either 'dna',
+'rna', or 'protein'. The alphabet attribute is one of these three
+possibilities.
+if( $seqobj->alphabet eq 'dna') {
+The $seqobj-E<gt>revcom method provides the reverse complement of the Seq
+object as another Seq object. Thus, the $rev variable is a reference to
+another Seq object. For example, one could repeat the above print line
+for this Seq object (putting $rev in place of $seqobj). In this
+case we are going to output the object into the file stream we built
+earlier on.
+$rev = $seqobj->revcom;
+When we output it, we want the id of the outputted object
+to be changed to "$id.rev", ie, with .rev on the end of the name. The
+following lines retrieve the id of the sequence object, add .rev
+to this and then set the display_id of the rev sequence object to
+this. Notice that to set the display_id attribute you just need
+call the same method, display_id(), with the new value as an argument.
+Getting and setting values with the same method is common in Bioperl.
+$id  = $seqobj->display_id();
+$id  = "$id.rev";
+$rev->display_id($id);
+The write_seq method on the SeqIO output object, $seqout, writes the
+$rev object to the filestream we built at the top of the script.
+The filestream knows that it is outputting in fasta format, and
+so it provides fasta output.
+$seqout->write_seq($rev);
+This block of code loops over sequence features in the sequence
+object, trying to find ones who have been tagged as 'exon'.
+Features have start and end attributes and can be outputted
+in Genbank Flat File format, GFF, a standarized format for sequence
+features.
+foreach $feat ( $seqobj->get_SeqFeatures() ) {
+if( $feat->primary_tag eq 'exon' ) {
+print STDOUT "Location ",$feat->start,":",
+$feat->end," GFF[",$feat->gff_string,"]\n";
+}
+}
+The code above shows how a few Bio::Seq methods suffice to read, parse,
+reformat and analyze sequences from a file. A full list of methods
+available to Bio::Seq objects is shown below. Bear in mind that some of
+these methods come from PrimarySeq objects, which are simpler
+than Seq objects, stripped of features (see L<Bio::PrimarySeq> for
+more information).
+# these methods return strings, and accept strings in some cases:
+$seqobj->seq();              # string of sequence
+$seqobj->subseq(5,10);       # part of the sequence as a string
+$seqobj->accession_number(); # when there, the accession number
+$seqobj->moltype();          # one of 'dna','rna',or 'protein'
+$seqobj->seq_version()       # when there, the version
+$seqobj->keywords();         # when there, the Keywords line
+$seqobj->length()            # length
+$seqobj->desc();             # description
+$seqobj->primary_id();       # a unique id for this sequence regardless
+# of its display_id or accession number
+$seqobj->display_id();       # the human readable id of the sequence
+Some of these values map to fields in common formats. For example, The
+display_id() method returns the LOCUS name of a Genbank entry,
+the (\S+) following the E<gt> character in a Fasta file, the ID from
+a SwissProt file, and so on. The desc() method will return the DEFINITION
+line of a Genbank file, the description following the display_id in a
+Fasta file, and the DE field in a SwissProt file.
+# the following methods return new Seq objects, but
+# do not transfer features across to the new object:
+$seqobj->trunc(5,10)  # truncation from 5 to 10 as new object
+$seqobj->revcom       # reverse complements sequence
+$seqobj->translate    # translation of the sequence
+# if new() can be called this method returns 1, else 0
+$seqobj->can_call_new
+# the following method determines if the given string will be accepted
+# by the seq() method - if the string is acceptable then validate()
+# returns 1, or 0 if not
+$seqobj->validate_seq($string)
+# the following method returns or accepts a Species object:
+$seqobj->species();
+Please see L<Bio::Species> for more information on this object.
+# the following method returns or accepts an Annotation object
+# which in turn allows access to Annotation::Reference
+# and Annotation::Comment objects:
+$seqobj->annotation();
+These annotations typically refer to entire sequences, unlike
+features.  See L<Bio::AnnotationCollectionI>,
+L<Bio::Annotation::Collection>, L<Bio::Annotation::Reference>, and
+L<Bio::Annotation::Comment> for details.
+It is also important to be able to describe defined portions of a
+sequence. The combination of some description and the corresponding
+sub-sequence is called a feature - an exon and its coordinates within
+a gene is an example of a feature, or a domain within a protein.
+# the following methods return an array of SeqFeatureI objects:
+$seqobj->get_SeqFeatures # The 'top level' sequence features
+$seqobj->get_all_SeqFeatures # All sequence features, including sub-seq
+# features, such as features in an exon
+# to find out the number of features use:
+$seqobj->feature_count
+Here are just some of the methods available to SeqFeatureI objects:
+# these methods return numbers:
+$feat->start          # start position (1 is the first base)
+$feat->end            # end position (2 is the second base)
+$feat->strand         # 1 means forward, -1 reverse, 0 not relevant
+# these methods return or accept strings:
+$feat->primary_tag    # the name of the sequence feature, eg
+# 'exon', 'glycoslyation site', 'TM domain'
+$feat->source_tag     # where the feature comes from, eg, 'EMBL_GenBank',
+# or 'BLAST'
+# this method returns the more austere PrimarySeq object, not a
+# Seq object - the main difference is that PrimarySeq objects do not
+# themselves contain sequence features
+$feat->seq            # the sequence between start,end on the
+# correct strand of the sequence
+See L<Bio::PrimarySeq> for more details on PrimarySeq objects.
+# useful methods for feature comparisons, for start/end points
+$feat->overlaps($other)  # do $feat and $other overlap?
+$feat->contains($other)  # is $other completely within $feat?
+$feat->equals($other)    # do $feat and $other completely agree?
+# one can also add features
+$seqobj->add_SeqFeature($feat)     # returns 1 if successful
+$seqobj->add_SeqFeature(@features) # returns 1 if successful
+# sub features. For complex join() statements, the feature
+# is one sequence feature with many sub SeqFeatures
+$feat->sub_SeqFeature  # returns array of sub seq features
+Please see L<Bio::SeqFeatureI> and L<Bio::SeqFeature::Generic>,
+for more information on sequence features.
+It is worth mentioning that one can also retrieve the start and end
+positions of a feature using a Bio::LocationI object:
+$location = $feat->location # $location is a Bio::LocationI object
+$location->start;           # start position
+$location->end;             # end position
+This is useful because one needs a Bio::Location::SplitLocationI object
+in order to retrieve the coordinates inside the Genbank or EMBL join()
+statements (e.g. "CDS    join(51..142,273..495,1346..1474)"):
+if ( $feat->location->isa('Bio::Location::SplitLocationI') &&
+	       $feat->primary_tag eq 'CDS' )  {
+foreach $loc ( $feat->location->sub_Location ) {
+print $loc->start . ".." . $loc->end . "\n";
+}
+}
+See L<Bio::LocationI> and L<Bio::Location::SplitLocationI> for more
+information.
+=head1 Implemented Interfaces
+This class implements the following interfaces.
+=over 4
+=item Bio::SeqI
+Note that this includes implementing Bio::PrimarySeqI.
+=item Bio::IdentifiableI
+=item Bio::DescribableI
+=item Bio::AnnotatableI
+=item Bio::FeatureHolderI
+=back
+=head1 FEEDBACK
+=head2 Mailing Lists
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+bioperl-l@bioperl.org              - General discussion
+http://bio.perl.org/MailList.html  - About the mailing lists
+=head2 Reporting Bugs
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+bioperl-bugs@bioperl.org
+http://bugzilla.bioperl.org/
+=head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects
+Email birney@ebi.ac.uk
+=head1 CONTRIBUTORS
+Jason Stajich E<lt>jason@bioperl.orgE<gt>
+=head1 APPENDIX
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+=cut
+#'
+# Let the code begin...
+package Bio::Seq;
+use vars qw(@ISA $VERSION);
+use strict;
+# Object preamble - inherits from Bio::Root::Object
+use Bio::Root::Root;
+use Bio::SeqI;
+use Bio::Annotation::Collection;
+use Bio::PrimarySeq;
+use Bio::IdentifiableI;
+use Bio::DescribableI;
+use Bio::AnnotatableI;
+use Bio::FeatureHolderI;
+$VERSION = '1.1';
+@ISA = qw(Bio::Root::Root Bio::SeqI
+	  Bio::IdentifiableI Bio::DescribableI
+	  Bio::AnnotatableI Bio::FeatureHolderI);
+=head2 new
+Title   : new
+Usage   : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
+-id  => 'human_id',
+				 -accession_number => 'AL000012',
+			       );
+Function: Returns a new Seq object from
+basic constructors, being a string for the sequence
+and strings for id and accession_number
+Returns : a new Bio::Seq object
+=cut
+sub new {
+my($caller,@args) = @_;
+if( $caller ne 'Bio::Seq') {
+	$caller = ref($caller) if ref($caller);
+}
+# we know our inherietance heirarchy
+my $self = Bio::Root::Root->new(@args);
+bless $self,$caller;
+# this is way too sneaky probably. We delegate the construction of
+# the Seq object onto PrimarySeq and then pop primary_seq into
+# our primary_seq slot
+my $pseq = Bio::PrimarySeq->new(@args);
+# as we have just made this, we know it is ok to set hash directly
+# rather than going through the method
+$self->{'primary_seq'} = $pseq;
+# setting this array is now delayed until the final
+# moment, again speed ups for non feature containing things
+# $self->{'_as_feat'} = [];
+my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args);
+# for a number of cases - reading fasta files - these are never set. This
+# gives a quick optimisation around testing things later on
+if( defined $ann || defined $pid || defined $feat || defined $species ) {
+	$pid && $self->primary_id($pid);
+	$species && $self->species($species);
+	$ann && $self->annotation($ann);
+	if( defined $feat ) {
+	    if( ref($feat) !~ /ARRAY/i ) {
+		if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) {
+		    $self->add_SeqFeature($feat);
+		} else {
+		    $self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self));
+		}
+	    } else {
+		foreach my $feature ( @$feat ) {
+		    $self->add_SeqFeature($feature);
+		}
+	    }
+	}
+}
+return $self;
+}
+=head1 PrimarySeq interface
+The PrimarySeq interface provides the basic sequence getting
+and setting methods for on all sequences.
+These methods implement the Bio::PrimarySeq interface by delegating
+to the primary_seq inside the object. This means that you
+can use a Seq object wherever there is a PrimarySeq, and
+of course, you are free to use these functions anyway.
+=cut
+=head2 seq
+Title   : seq
+Usage   : $string = $obj->seq()
+Function: Get/Set the sequence as a string of letters. The
+case of the letters is left up to the implementer.
+Suggested cases are upper case for proteins and lower case for
+DNA sequence (IUPAC standard),
+but implementations are suggested to keep an open mind about
+case (some users... want mixed case!)
+Returns : A scalar
+Args    : Optionally on set the new value (a string). An optional second
+argument presets the alphabet (otherwise it will be guessed).
+Both parameters may also be given in named paramater style
+with -seq and -alphabet being the names.
+=cut
+sub seq {
+return shift->primary_seq()->seq(@_);
+}
+=head2 validate_seq
+Title   : validate_seq
+Usage   : if(! $seq->validate_seq($seq_str) ) {
+print "sequence $seq_str is not valid for an object of type ",
+		      ref($seq), "\n";
+	   }
+Function: Validates a given sequence string. A validating sequence string
+must be accepted by seq(). A string that does not validate will
+lead to an exception if passed to seq().
+The implementation provided here does not take alphabet() into
+account. Allowed are all letters (A-Z) and '-','.', and '*'.
+Example :
+Returns : 1 if the supplied sequence string is valid for the object, and
+0 otherwise.
+Args    : The sequence string to be validated.
+=cut
+sub validate_seq {
+return shift->primary_seq()->validate_seq(@_);
+}
+=head2 length
+Title   : length
+Usage   : $len = $seq->length()
+Function:
+Example :
+Returns : Integer representing the length of the sequence.
+Args    : None
+=cut
+sub length {
+return shift->primary_seq()->length(@_);
+}
+=head1 Methods from the Bio::PrimarySeqI interface
+=cut
+=head2 subseq
+Title   : subseq
+Usage   : $substring = $obj->subseq(10,40);
+Function: Returns the subseq from start to end, where the first base
+is 1 and the number is inclusive, ie 1-2 are the first two
+bases of the sequence
+Start cannot be larger than end but can be equal
+Returns : A string
+Args    : 2 integers
+=cut
+sub subseq {
+return shift->primary_seq()->subseq(@_);
+}
+=head2 display_id
+Title   : display_id
+Usage   : $id = $obj->display_id or $obj->display_id($newid);
+Function: Gets or sets the display id, also known as the common name of
+the Seq object.
+The semantics of this is that it is the most likely string
+to be used as an identifier of the sequence, and likely to
+have "human" readability.  The id is equivalent to the LOCUS
+field of the GenBank/EMBL databanks and the ID field of the
+Swissprot/sptrembl database. In fasta format, the >(\S+) is
+presumed to be the id, though some people overload the id
+to embed other information. Bioperl does not use any
+embedded information in the ID field, and people are
+encouraged to use other mechanisms (accession field for
+example, or extending the sequence object) to solve this.
+Notice that $seq->id() maps to this function, mainly for
+legacy/convenience issues.
+Returns : A string
+Args    : None or a new id
+=cut
+sub display_id {
+return shift->primary_seq->display_id(@_);
+}
+=head2 accession_number
+Title   : accession_number
+Usage   : $unique_biological_key = $obj->accession_number;
+Function: Returns the unique biological id for a sequence, commonly
+called the accession_number. For sequences from established
+databases, the implementors should try to use the correct
+accession number. Notice that primary_id() provides the
+unique id for the implemetation, allowing multiple objects
+to have the same accession number in a particular implementation.
+For sequences with no accession number, this method should return
+"unknown".
+Can also be used to set the accession number.
+Example : $key = $seq->accession_number or $seq->accession_number($key)
+Returns : A string
+Args    : None or an accession number
+=cut
+sub accession_number {
+return shift->primary_seq->accession_number(@_);
+}
+=head2 desc
+Title   : desc
+Usage   : $seqobj->desc($string) or $seqobj->desc()
+Function: Sets or gets the description of the sequence
+Example :
+Returns : The description
+Args    : The description or none
+=cut
+sub desc {
+return shift->primary_seq->desc(@_);
+}
+=head2 primary_id
+Title   : primary_id
+Usage   : $unique_implementation_key = $obj->primary_id;
+Function: Returns the unique id for this object in this
+implementation. This allows implementations to manage
+their own object ids in a way the implementation can control
+clients can expect one id to map to one object.
+For sequences with no natural id, this method should return
+a stringified memory location.
+Can also be used to set the primary_id.
+Also notice that this method is not delegated to the
+internal Bio::PrimarySeq object
+[Note this method name is likely to change in 1.3]
+Example : $id = $seq->primary_id or $seq->primary_id($id)
+Returns : A string
+Args    : None or an id
+=cut
+sub primary_id {
+my ($obj,$value) = @_;
+if( defined $value) {
+$obj->{'primary_id'} = $value;
+}
+if( ! exists $obj->{'primary_id'} ) {
+return "$obj";
+}
+return $obj->{'primary_id'};
+}
+=head2 can_call_new
+Title   : can_call_new
+Usage   : if ( $obj->can_call_new ) {
+$newobj = $obj->new( %param );
+	   }
+Function: can_call_new returns 1 or 0 depending
+on whether an implementation allows new
+constructor to be called. If a new constructor
+is allowed, then it should take the followed hashed
+constructor list.
+$myobject->new( -seq => $sequence_as_string,
+			   -display_id  => $id
+			   -accession_number => $accession
+			   -alphabet => 'dna',
+			 );
+Example :
+Returns : 1 or 0
+Args    : None
+=cut
+sub can_call_new {
+return 1;
+}
+=head2 alphabet
+Title   : alphabet
+Usage   : if ( $obj->alphabet eq 'dna' ) { /Do Something/ }
+Function: Returns the type of sequence being one of
+'dna', 'rna' or 'protein'. This is case sensitive.
+This is not called <type> because this would cause
+upgrade problems from the 0.5 and earlier Seq objects.
+Returns : A string either 'dna','rna','protein'. NB - the object must
+make a call of the type - if there is no type specified it
+has to guess.
+Args    : None
+=cut
+sub alphabet {
+my $self = shift;
+return $self->primary_seq->alphabet(@_) if @_ && defined $_[0];
+return $self->primary_seq->alphabet();
+}
+sub is_circular { shift->primary_seq->is_circular }
+=head1 Methods for Bio::IdentifiableI compliance
+=cut
+=head2 object_id
+Title   : object_id
+Usage   : $string    = $obj->object_id()
+Function: a string which represents the stable primary identifier
+in this namespace of this object. For DNA sequences this
+is its accession_number, similarly for protein sequences
+This is aliased to accession_number().
+Returns : A scalar
+=cut
+sub object_id {
+return shift->accession_number(@_);
+}
+=head2 version
+Title   : version
+Usage   : $version    = $obj->version()
+Function: a number which differentiates between versions of
+the same object. Higher numbers are considered to be
+later and more relevant, but a single object described
+the same identifier should represent the same concept
+Returns : A number
+=cut
+sub version{
+return shift->primary_seq->version(@_);
+}
+=head2 authority
+Title   : authority
+Usage   : $authority    = $obj->authority()
+Function: a string which represents the organisation which
+granted the namespace, written as the DNS name for
+organisation (eg, wormbase.org)
+Returns : A scalar
+=cut
+sub authority {
+return shift->primary_seq()->authority(@_);
+}
+=head2 namespace
+Title   : namespace
+Usage   : $string    = $obj->namespace()
+Function: A string representing the name space this identifier
+is valid in, often the database name or the name
+describing the collection
+Returns : A scalar
+=cut
+sub namespace{
+return shift->primary_seq()->namespace(@_);
+}
+=head1 Methods for Bio::DescribableI compliance
+=cut
+=head2 display_name
+Title   : display_name
+Usage   : $string    = $obj->display_name()
+Function: A string which is what should be displayed to the user
+the string should have no spaces (ideally, though a cautious
+user of this interface would not assumme this) and should be
+less than thirty characters (though again, double checking
+this is a good idea)
+This is aliased to display_id().
+Returns : A scalar
+=cut
+sub display_name {
+return shift->display_id(@_);
+}
+=head2 description
+Title   : description
+Usage   : $string    = $obj->description()
+Function: A text string suitable for displaying to the user a
+description. This string is likely to have spaces, but
+should not have any newlines or formatting - just plain
+text. The string should not be greater than 255 characters
+and clients can feel justified at truncating strings at 255
+characters for the purposes of display
+This is aliased to desc().
+Returns : A scalar
+=cut
+sub description {
+return shift->desc(@_);
+}
+=head1 Methods for implementing Bio::AnnotatableI
+=cut
+=head2 annotation
+Title   : annotation
+Usage   : $ann = $seq->annotation or $seq->annotation($annotation)
+Function: Gets or sets the annotation
+Returns : L<Bio::AnnotationCollectionI> object
+Args    : None or L<Bio::AnnotationCollectionI> object
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information
+=cut
+sub annotation {
+my ($obj,$value) = @_;
+if( defined $value ) {
+	$obj->throw("object of class ".ref($value)." does not implement ".
+		    "Bio::AnnotationCollectionI. Too bad.")
+	    unless $value->isa("Bio::AnnotationCollectionI");
+	$obj->{'_annotation'} = $value;
+} elsif( ! defined $obj->{'_annotation'}) {
+	$obj->{'_annotation'} = new Bio::Annotation::Collection;
+}
+return $obj->{'_annotation'};
+}
+=head1 Methods to implement Bio::FeatureHolderI
+This includes methods for retrieving, adding, and removing features.
+=cut
+=head2 get_SeqFeatures
+Title   : get_SeqFeatures
+Usage   :
+Function: Get the feature objects held by this feature holder.
+Features which are not top-level are subfeatures of one or
+more of the returned feature objects, which means that you
+must traverse the subfeature arrays of each top-level
+feature object in order to traverse all features associated
+with this sequence.
+Use get_all_SeqFeatures() if you want the feature tree
+flattened into one single array.
+Example :
+Returns : an array of Bio::SeqFeatureI implementing objects
+Args    : none
+At some day we may want to expand this method to allow for a feature
+filter to be passed in.
+=cut
+sub get_SeqFeatures{
+my $self = shift;
+if( !defined $self->{'_as_feat'} ) {
+$self->{'_as_feat'} = [];
+}
+return @{$self->{'_as_feat'}};
+}
+=head2 get_all_SeqFeatures
+Title   : get_all_SeqFeatures
+Usage   : @feat_ary = $seq->get_all_SeqFeatures();
+Function: Returns the tree of feature objects attached to this
+sequence object flattened into one single array. Top-level
+features will still contain their subfeature-arrays, which
+means that you will encounter subfeatures twice if you
+traverse the subfeature tree of the returned objects.
+Use get_SeqFeatures() if you want the array to contain only
+the top-level features.
+Returns : An array of Bio::SeqFeatureI implementing objects.
+Args    : None
+=cut
+# this implementation is inherited from FeatureHolderI
+=head2 feature_count
+Title   : feature_count
+Usage   : $seq->feature_count()
+Function: Return the number of SeqFeatures attached to a sequence
+Returns : integer representing the number of SeqFeatures
+Args    : None
+=cut
+sub feature_count {
+my ($self) = @_;
+if (defined($self->{'_as_feat'})) {
+	return ($#{$self->{'_as_feat'}} + 1);
+} else {
+	return 0;
+}
+}
+=head2 add_SeqFeature
+Title   : add_SeqFeature
+Usage   : $seq->add_SeqFeature($feat);
+$seq->add_SeqFeature(@feat);
+Function: Adds the given feature object (or each of an array of feature
+objects to the feature array of this
+sequence. The object passed is required to implement the
+Bio::SeqFeatureI interface.
+Returns : 1 on success
+Args    : A Bio::SeqFeatureI implementing object, or an array of such objects.
+=cut
+sub add_SeqFeature {
+my ($self,@feat) = @_;
+$self->{'_as_feat'} = [] unless $self->{'_as_feat'};
+foreach my $feat ( @feat ) {
+if( !$feat->isa("Bio::SeqFeatureI") ) {
+	   $self->throw("$feat is not a SeqFeatureI and that's what we expect...");
+}
+# make sure we attach ourselves to the feature if the feature wants it
+my $aseq = $self->primary_seq;
+$feat->attach_seq($aseq) if $aseq;
+push(@{$self->{'_as_feat'}},$feat);
+}
+return 1;
+}
+=head2 remove_SeqFeatures
+Title   : remove_SeqFeatures
+Usage   : $seq->remove_SeqFeatures();
+Function: Flushes all attached SeqFeatureI objects.
+To remove individual feature objects, delete those from the returned
+array and re-add the rest.
+Example :
+Returns : The array of Bio::SeqFeatureI objects removed from this seq.
+Args    : None
+=cut
+sub remove_SeqFeatures {
+my $self = shift;
+return () unless $self->{'_as_feat'};
+my @feats = @{$self->{'_as_feat'}};
+$self->{'_as_feat'} = [];
+return @feats;
+}
+=head1 Methods provided in the Bio::PrimarySeqI interface
+These methods are inherited from the PrimarySeq interface
+and work as one expects, building new Bio::Seq objects
+or other information as expected. See L<Bio::PrimarySeq>
+for more information.
+Sequence Features are B<not> transfered to the new objects.
+This is possibly a mistake. Anyone who feels the urge in
+dealing with this is welcome to give it a go.
+=head2 revcom
+Title   : revcom
+Usage   : $rev = $seq->revcom()
+Function: Produces a new Bio::Seq object which
+is the reversed complement of the sequence. For protein
+sequences this throws an exception of "Sequence is a protein.
+Cannot revcom"
+The id is the same id as the original sequence, and the
+accession number is also identical. If someone wants to track
+that this sequence has be reversed, it needs to define its own
+extensions
+To do an in-place edit of an object you can go:
+$seq = $seq->revcom();
+This of course, causes Perl to handle the garbage collection of
+the old object, but it is roughly speaking as efficient as an
+in-place edit.
+Returns : A new (fresh) Bio::Seq object
+Args    : None
+=cut
+=head2 trunc
+Title   : trunc
+Usage   : $subseq = $myseq->trunc(10,100);
+Function: Provides a truncation of a sequence
+Example :
+Returns : A fresh Seq object
+Args    : A Seq object
+=cut
+=head2 id
+Title   : id
+Usage   : $id = $seq->id()
+Function: This is mapped on display_id
+Returns : value of display_id()
+Args    : [optional] value to update display_id
+=cut
+sub  id {
+return shift->display_id(@_);
+}
+=head1 Seq only methods
+These methods are specific to the Bio::Seq object, and not
+found on the Bio::PrimarySeq object
+=head2 primary_seq
+Title   : primary_seq
+Usage   : $seq->primary_seq or $seq->primary_seq($newval)
+Function: Get or set a PrimarySeq object
+Example :
+Returns : PrimarySeq object
+Args    : None or PrimarySeq object
+=cut
+sub primary_seq {
+my ($obj,$value) = @_;
+if( defined $value) {
+if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) {
+	   $obj->throw("$value is not a Bio::PrimarySeq compliant object");
+}
+$obj->{'primary_seq'} = $value;
+# descend down over all seqfeature objects, seeing whether they
+# want an attached seq.
+foreach my $sf ( $obj->get_SeqFeatures() ) {
+	   $sf->attach_seq($value);
+}
+}
+return $obj->{'primary_seq'};
+}
+=head2 species
+Title   : species
+Usage   : $species = $seq->species() or $seq->species($species)
+Function: Gets or sets the species
+Returns : L<Bio::Species> object
+Args    : None or L<Bio::Species> object
+See L<Bio::Species> for more information
+=cut
+sub species {
+my ($self, $species) = @_;
+if ($species) {
+$self->{'species'} = $species;
+} else {
+return $self->{'species'};
+}
+}
+=head1 Internal methods
+=cut
+# keep AUTOLOAD happy
+sub DESTROY { }
+############################################################################
+# aliases due to name changes or to compensate for our lack of consistency #
+############################################################################
+# in all other modules we use the object in the singular --
+# lack of consistency sucks
+*flush_SeqFeature = \&remove_SeqFeatures;
+*flush_SeqFeatures = \&remove_SeqFeatures;
+# this is now get_SeqFeatures() (from FeatureHolderI)
+*top_SeqFeatures = \&get_SeqFeatures;
+# this is now get_all_SeqFeatures() in FeatureHolderI
+sub all_SeqFeatures{
+return shift->get_all_SeqFeatures(@_);
+}
+sub accession {
+my $self = shift;
+$self->warn(ref($self)."::accession is deprecated, ".
+		"use accession_number() instead");
+return $self->accession_number(@_);
+}
+1;

Mercurial > repos > mahtabm > ensembl

comparison variant_effect_predictor/Bio/Seq.pm @ 0:1f6dce3d34e0