Mercurial > repos > mahtabm > ensembl

diff variant_effect_predictor/Bio/Cluster/UniGene.pm @ 0:1f6dce3d34e0
Uploaded
author: mahtabm
date: Thu, 11 Apr 2013 02:01:53 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Cluster/UniGene.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1396 @@
+# $Id: UniGene.pm,v 1.23.2.2 2003/09/15 01:52:03 andrew Exp $
+#
+# BioPerl module for Bio::Cluster::UniGene.pm
+#
+# Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
+#
+# Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
+# Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
+# http://meg.otago.ac.nz/
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# April 17, 2002 - Initial implementation by Andrew Macgregor
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Cluster::UniGene - UniGene object
+
+=head1 SYNOPSIS
+
+	use Bio::Cluster::UniGene;
+	use Bio::ClusterIO;
+
+	$stream  = Bio::ClusterIO->new('-file' => "Hs.data", 
+                                       '-format' => "unigene");
+	# note: we quote -format to keep older perl's from complaining.
+
+	while ( my $in = $stream->next_cluster() ) {
+		print $in->unigene_id() . "\n";
+		while ( my $sequence = $in->next_seq() ) {
+			print $sequence->accession_number() . "\n";
+		}
+       }
+
+=head1 DESCRIPTION
+
+This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
+for the representation if UniGene clusters in Bioperl. It is returned
+by the L<Bio::ClusterIO> parser for unigene format and contains all
+the data associated with one UniGene record.
+
+This class implements several interfaces and hence can be used
+wherever instances of such interfaces are expected. In particular, the
+interfaces are L<Bio::ClusterI> as the base interface for all cluster
+representations, and in addition L<Bio::IdentifiableI> and
+L<Bio::DescribableI>.
+
+The following lists the UniGene specific methods that are available
+(see below for details). Be aware next_XXX iterators take a snapshot
+of the array property when they are first called, and this snapshot is
+not reset until the iterator is exhausted. Hence, once called you need
+to exhaust the iterator to see any changes that have been made to the
+property in the meantime. You will usually want to use the
+non-iterator equivalents and loop over the elements yourself.
+
+new() - standard new call
+
+unigene_id() - set/get unigene_id
+
+title() - set/get title (description)
+
+gene() - set/get gene
+
+cytoband() - set/get cytoband
+
+mgi() - set/get mgi
+
+locuslink() - set/get locuslink
+
+gnm_terminus() - set/get gnm_terminus
+
+scount() - set/get scount
+
+express() - set/get express, currently takes/returns a reference to an
+array of expressed tissues
+
+next_express() - returns the next tissue expression from the expressed
+tissue array
+
+chromosome() - set/get chromosome, currently takes/returns a reference
+to an array of chromosome lines
+
+next_chromosome() - returns the next chromosome line from the array of
+chromosome lines
+
+sts() - set/get sts, currently takes/returns a reference to an array
+of sts lines
+
+next_sts() - returns the next sts line from the array of sts lines
+
+txmap() - set/get txmap, currently takes/returns a reference to an
+array of txmap lines
+
+next_txmap() - returns the next txmap line from the array of txmap
+lines
+
+protsim() - set/get protsim, currently takes/returns a reference to an
+array of protsim lines
+
+next_protsim() - returns the next protsim line from the array of
+protsim lines
+
+sequences() - set/get sequence, currently takes/returns a reference to
+an array of references to seq info
+
+next_seq() - returns a Seq object that currently only contains an
+accession number
+
+
+=head1 Implemented Interfaces
+
+This class implementes the following interfaces.
+
+=over 4
+
+=item Bio::Cluster::UniGeneI
+
+This includes implementing Bio::ClusterI.
+
+=item Bio::IdentifiableI
+
+=item Bio::DescribableI
+
+=item Bio::AnnotatableI
+
+=item Bio::Factory::SequenceStreamI
+
+=back
+
+=head1 FEEDBACK
+
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bio.perl.org/MailList.html  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Andrew Macgregor
+
+Email andrew@anatomy.otago.ac.nz
+
+=head1 CONTRIBUTORS
+
+Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Cluster::UniGene;
+use vars qw(@ISA);
+use strict;
+
+
+use Bio::Root::Root;
+use Bio::IdentifiableI;
+use Bio::DescribableI;
+use Bio::AnnotatableI;
+use Bio::Annotation::Collection;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::SimpleValue;
+use Bio::Species;
+use Bio::Factory::SequenceStreamI;
+use Bio::Seq::SeqFactory;
+use Bio::Cluster::UniGeneI;
+
+@ISA = qw(Bio::Root::Root Bio::Cluster::UniGeneI
+	  Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI
+	  Bio::Factory::SequenceStreamI);
+
+my %species_map = (
+		   'Aga' => "Anopheles gambiae",
+		   'At'  => "Arabidopsis thaliana",
+		   'Bt'  => "Bos taurus",
+		   'Cel' => "Caenorhabditis elegans",
+		   'Cin' => "Ciona intestinalis",
+		   'Cre' => "Chlamydomonas reinhardtii",
+		   'Ddi'  => "Dictyostelium discoideum",
+		   'Dr'  => "Danio rerio",
+		   'Dm'  => "Drosophila melanogaster",
+		   'Gga' => "Gallus gallus",
+		   'Gma' => "Glycine max",
+		   'Hs'  => "Homo sapiens",
+		   'Hv'  => "Hordeum vulgare",
+		   'Les' => "Lycopersicon esculentum",
+		   'Mtr' => "Medicago truncatula",
+		   'Mm'  => "Mus musculus",
+		   'Os'  => "Oryza sativa",
+		   'Ola' => "Oryzias latipes",
+		   'Rn'  => "Rattus norvegicus",
+		   'Str' => "Silurana tropicalis",
+		   'Sbi' => "Sorghum bicolor",
+		   'Ssc' => "Sus scrofa",
+		   'Ta'  => "Triticum aestivum",
+		   'Xl'  => "Xenopus laevis",
+		   'Zm'  => "Zea mays",
+		   );
+		 
+
+=head2 new
+
+ Title   : new
+ Usage   : used by ClusterIO
+ Returns : a new Bio::Cluster::Unigene object
+
+=cut
+
+sub new {
+    # standard new call..
+    my($caller,@args) = @_;
+    my $self = $caller->SUPER::new(@args);
+
+    my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
+	$self->_rearrange([qw(UNIGENE_ID
+			      DESCRIPTION
+			      MEMBERS
+			      SIZE
+			      SPECIES
+			      DISPLAY_ID
+			      OBJECT_ID
+			      NAMESPACE
+			      AUTHORITY
+			      VERSION
+			      SEQFACTORY
+			      )], @args);
+
+    $self->{'_alphabet'} = 'dna';
+
+    $self->unigene_id($ugid) if $ugid;
+    $self->description($desc) if $desc;
+    $self->sequences($mems) if $mems;
+    $self->size($size) if defined($size);
+    $self->display_id($dispid) if $dispid; # overwrites ugid
+    $self->object_id($id) if $id;          # overwrites dispid
+    $self->namespace($ns || 'UniGene');
+    $self->authority($auth || 'NCBI');
+    $self->version($v) if defined($v);
+    if( ! defined $seqfact ) {
+	$seqfact = new Bio::Seq::SeqFactory
+	    (-verbose => $self->verbose(), 
+	     -type => 'Bio::Seq::RichSeq');
+    }
+    $self->sequence_factory($seqfact);
+    if( (! $species) && (defined $self->unigene_id() && 
+			 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
+	# try set a default one depending on the ID
+	$species = $species_map{$1};
+    }
+    $self->species($species);
+    return $self;
+}
+
+
+=head1 L<Bio::Cluster::UniGeneI> methods
+
+=cut
+
+=head2 unigene_id
+
+ Title   : unigene_id
+ Usage   : unigene_id();
+ Function: Returns the unigene_id associated with the object.
+ Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
+ Returns : A string
+ Args    : None or an id
+
+
+=cut
+
+sub unigene_id {
+	my ($obj,$value) = @_;
+	if( defined $value) {
+		$obj->{'unigene_id'} = $value;
+	}
+	return $obj->{'unigene_id'};
+}
+
+
+
+=head2 title
+
+ Title   : title
+ Usage   : title();
+ Function: Returns the title associated with the object.
+ Example : $title = $unigene->title or $unigene->title($title)
+ Returns : A string
+ Args    : None or a title
+
+
+=cut
+
+sub title {
+	my ($obj,$value) = @_;
+	if( defined $value) {
+		$obj->{'title'} = $value;
+	}
+	return $obj->{'title'};
+}
+
+
+=head2 gene
+
+ Title   : gene
+ Usage   : gene();
+ Function: Returns the gene associated with the object.
+ Example : $gene = $unigene->gene or $unigene->gene($gene)
+ Returns : A string
+ Args    : None or a gene
+
+
+=cut
+
+sub gene {
+    my $self = shift;
+    return $self->_annotation_value('gene_name', @_);
+}
+
+
+=head2 cytoband
+
+ Title   : cytoband
+ Usage   : cytoband();
+ Function: Returns the cytoband associated with the object.
+ Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
+ Returns : A string
+ Args    : None or a cytoband
+
+
+=cut
+
+sub cytoband {
+    my $self = shift;
+    return $self->_annotation_value('cyto_band', @_);
+}
+
+=head2 mgi
+
+ Title   : mgi
+ Usage   : mgi();
+ Function: Returns the mgi associated with the object.
+ Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
+ Returns : A string
+ Args    : None or a mgi
+
+
+=cut
+
+sub mgi {
+    my $self = shift;
+    my $acc;
+
+    if(@_) {
+	# purge first
+	$self->_remove_dblink('dblink','MGI');
+	# then add if a valid value is present
+	if($acc = shift) {
+	    $self->_annotation_dblink('dblink','MGI',$acc);
+	}
+    } else {
+	($acc) = $self->_annotation_dblink('dblink','MGI');
+    }
+    return $acc;
+}
+
+
+=head2 locuslink
+
+ Title   : locuslink
+ Usage   : locuslink();
+ Function: Returns or stores a reference to an array containing locuslink data.
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub locuslink {
+    my ($self,$ll) = @_;
+    
+    if($ll) {
+	# purge first
+	$self->_remove_dblink('dblink','LocusLink');
+	# then add as many accessions as are present
+	foreach my $acc (@$ll) {
+	    $self->_annotation_dblink('dblink','LocusLink',$acc);
+	}
+    } else {
+	my @accs = $self->_annotation_dblink('dblink','LocusLink');
+	$ll = [@accs];
+    }
+    return $ll;
+}
+
+
+=head2 gnm_terminus
+
+ Title   : gnm_terminus
+ Usage   : gnm_terminus();
+ Function: Returns the gnm_terminus associated with the object.
+ Example : $gnm_terminus = $unigene->gnm_terminus or 
+           $unigene->gnm_terminus($gnm_terminus)
+ Returns : A string
+ Args    : None or a gnm_terminus
+
+=cut
+
+sub gnm_terminus {
+    my $self = shift;
+    return $self->_annotation_value('gnm_terminus', @_);
+}
+
+=head2 scount
+
+ Title   : scount
+ Usage   : scount();
+ Function: Returns the scount associated with the object.
+ Example : $scount = $unigene->scount or $unigene->scount($scount)
+ Returns : A string
+ Args    : None or a scount
+
+=cut
+
+sub scount {
+	my ($obj,$value) = @_;
+	if( defined $value) {
+	    $obj->{'scount'} = $value;
+	} elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
+	    $obj->{'scount'} = $obj->size();
+	}
+	return $obj->{'scount'};
+}
+
+
+=head2 express
+
+ Title   : express
+ Usage   : express();
+ Function: Returns or stores a reference to an array containing 
+           tissue expression data
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub express {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('expressed',@_);
+}
+
+
+=head2 chromosome
+
+ Title   : chromosome
+ Usage   : chromosome();
+ Function: Returns or stores a reference to an array containing
+           chromosome lines
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub chromosome {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('chromosome',@_);
+ }
+
+
+=head2 sts
+
+ Title   : sts
+ Usage   : sts();
+ Function: Returns or stores a reference to an array containing sts lines
+
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub sts {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('sts',@_);
+}
+
+
+=head2 txmap
+
+ Title   : txmap
+ Usage   : txmap();
+ Function: Returns or stores a reference to an array containing txmap lines
+
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub txmap {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('txmap',@_);
+}
+
+
+=head2 protsim
+
+ Title   : protsim
+ Usage   : protsim();
+ Function: Returns or stores a reference to an array containing protsim lines
+	   This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub protsim {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('protsim',@_);
+}
+
+
+=head2 sequences
+
+ Title   : sequences
+ Usage   : sequences();
+ Function: Returns or stores a reference to an array containing
+           sequence data.
+
+           This is mostly reserved for ClusterIO parsers. You should
+           use get_members() for get and add_member()/remove_members()
+           for set.
+
+ Returns : An array reference, or undef
+ Args    : None or an array reference or undef
+
+=cut
+
+sub sequences {
+    my $self = shift;
+
+    return $self->{'members'} = shift if @_;
+    return $self->{'members'};
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $obj->species($newval)
+ Function: Get/set the species object for this Unigene cluster.
+ Example : 
+ Returns : value of species (a L<Bio::Species> object)
+ Args    : on set, new value (a L<Bio::Species> object or 
+           the binomial name, or undef, optional)
+
+
+=cut
+
+sub species{
+    my $self = shift;
+
+    if(@_) {
+	my $species = shift;
+	if($species && (! ref($species))) {
+	    my @class = reverse(split(' ',$species));
+	    $species = Bio::Species->new(-classification => \@class);
+	}
+	return $self->{'species'} = $species;
+    }
+    return $self->{'species'};
+}
+
+
+=head1 L<Bio::ClusterI> methods
+
+=cut
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : 
+ Function: Get/set the display name or identifier for the cluster
+
+           This is aliased to unigene_id().
+
+ Returns : a string
+ Args    : optional, on set the display ID ( a string)
+
+=cut
+
+sub display_id{
+    return shift->unigene_id(@_);
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : Bio::ClusterI->description("POLYUBIQUITIN")
+ Function: get/set for the consensus description of the cluster
+
+           This is aliased to title().
+
+ Returns : the description string 
+ Args    : Optional the description string 
+
+=cut
+
+sub description{
+    return shift->title(@_);
+}
+
+=head2 size
+
+ Title   : size
+ Usage   : Bio::ClusterI->size();
+ Function: get for the size of the family, 
+           calculated from the number of members
+
+           This is aliased to scount().
+
+ Returns : the size of the cluster
+ Args    : 
+
+=cut
+
+sub size {
+    my $self = shift;
+
+    # hard-wiring the size is allowed if there are no sequences
+    return $self->scount(@_) unless defined($self->sequences());
+    # but we can't change the number of members through this method
+    my $n = scalar(@{$self->sequences()});
+    if(@_ && ($n != $_[0])) {
+	$self->throw("Cannot change cluster size using size() from $n to ".
+		     $_[0]);
+    }
+    return $n;
+}
+
+=head2 cluster_score
+
+ Title   : cluster_score
+ Usage   : $cluster ->cluster_score(100);
+ Function: get/set for cluster_score which
+           represent the score in which the clustering
+           algorithm assigns to this cluster.
+
+           For UniGene clusters, there really is no cluster score that
+           would come with the data. However, we provide an
+           implementation here so that you can score UniGene clusters
+           if you want to.
+
+ Returns : a number
+ Args    : optionally, on set a number
+
+=cut
+
+sub cluster_score{
+    my $self = shift;
+
+    return $self->{'cluster_score'} = shift if @_;
+    return $self->{'cluster_score'};
+}
+
+=head2 get_members
+
+ Title   : get_members
+ Usage   : Bio::ClusterI->get_members(($seq1, $seq2));
+ Function: retrieve the members of the family by some criteria
+
+           Will return all members if no criteria are provided.
+
+           At this time this implementation does not support
+           specifying criteria and will always return all members.
+
+ Returns : the array of members
+ Args    : 
+
+=cut
+
+sub get_members {
+    my $self = shift;
+
+    my $mems = $self->sequences() || [];
+    # already objects?
+    if(@$mems && (ref($mems->[0]) eq "HASH")) {
+	# nope, we need to build the object list from scratch
+	my @memlist = ();
+	while(my $seq = $self->next_seq()) {
+	    push(@memlist, $seq);
+	}
+	# we cache this array of objects as the new member list
+	$mems = \@memlist;
+	$self->sequences($mems);
+    }
+    # done
+    return @$mems;
+}
+
+
+=head1 Annotatable view at the object properties
+
+=cut
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($newval)
+ Function: Get/set the L<Bio::AnnotationCollectionI> object for
+           this UniGene cluster.
+
+           Many attributes of this class are actually stored within
+           the annotation collection object as L<Bio::AnnotationI>
+           compliant objects, so you can conveniently access them
+           through the same interface as you would e.g. access
+           L<Bio::SeqI> annotation properties.
+
+           If you call this method in set mode and replace the
+           annotation collection with another one you should know
+           exactly what you are doing.
+
+ Example : 
+ Returns : a L<Bio::AnnotationCollectionI> compliant object
+ Args    : on set, new value (a L<Bio::AnnotationCollectionI> 
+           compliant object or undef, optional)
+
+
+=cut
+
+sub annotation{
+    my $self = shift;
+
+    if(@_) {
+	return $self->{'annotation'} = shift;
+    } elsif(! exists($self->{'annotation'})) {
+	$self->{'annotation'} = Bio::Annotation::Collection->new();
+    }
+    return $self->{'annotation'};
+}
+
+
+=head1 Implementation specific methods
+
+ These are mostly for adding/removing to array properties, and for
+ methods with special functionality.
+
+=cut
+
+=head2 add_member
+
+ Title   : add_member
+ Usage   :
+ Function: Adds a member object to the list of members.
+ Example :
+ Returns : TRUE if the new member was successfuly added, and FALSE
+           otherwise.
+ Args    : The member to add.
+
+
+=cut
+
+sub add_member{
+    my ($self,@mems) = @_;
+
+    my $memlist = $self->{'members'} || [];
+    # this is an object interface; is the member list already objects?
+    if(@$memlist && (ref($memlist->[0]) eq "HASH")) {
+	# nope, convert to objects
+        $memlist = [$self->get_members()];
+    }
+    # add new member(s)
+    push(@$memlist, @mems);
+    # store if we created this array ref ourselves
+    $self->sequences($memlist);
+    # done
+    return 1;
+}
+
+=head2 remove_members
+
+ Title   : remove_members
+ Usage   :
+ Function: Remove the list of members for this cluster such that the
+           member list is undefined afterwards (as opposed to zero members).
+ Example :
+ Returns : the previous list of members
+ Args    : none
+
+
+=cut
+
+sub remove_members{
+    my $self = shift;
+
+    my @mems = $self->get_members();
+    $self->sequences(undef);
+    return @mems;
+}
+
+
+=head2 next_locuslink
+
+ Title   : next_locuslink
+ Usage   : next_locuslink();
+ Function: Returns the next locuslink from an array referred 
+           to using $obj->{'locuslink'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $locuslink = $in->next_locuslink() ) {
+				print "$locuslink\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_locuslink {
+    my ($obj) = @_;
+
+    return $obj->_next_element("ll","locuslink");
+}
+
+=head2 next_express
+
+ Title   : next_express
+ Usage   : next_express();
+ Function: Returns the next tissue from an array referred 
+           to using $obj->{'express'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $express = $in->next_express() ) {
+				print "$express\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_express {
+    my ($obj) = @_;
+
+    return $obj->_next_element("express","express");
+}
+
+
+=head2 next_chromosome
+
+ Title   : next_chromosome
+ Usage   : next_chromosome();
+ Function: Returns the next chromosome line from an array referred
+           to using $obj->{'chromosome'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $chromosome = $in->next_chromosome() ) {
+				print "$chromosome\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_chromosome {
+    my ($obj) = @_;
+
+    return $obj->_next_element("chr","chromosome");
+}
+
+
+=head2 next_protsim
+
+ Title   : next_protsim
+ Usage   : next_protsim();
+ Function: Returns the next protsim line from an array referred 
+           to using $obj->{'protsim'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $protsim = $in->next_protsim() ) {
+				print "$protsim\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_protsim {
+    my ($obj) = @_;
+
+    return $obj->_next_element("protsim","protsim");
+}
+
+
+=head2 next_sts
+
+ Title   : next_sts
+ Usage   : next_sts();
+ Function: Returns the next sts line from an array referred 
+           to using $obj->{'sts'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $sts = $in->next_sts() ) {
+				print "$sts\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_sts {
+    my ($obj) = @_;
+
+    return $obj->_next_element("sts","sts");
+}
+
+
+=head2 next_txmap
+
+ Title   : next_txmap
+ Usage   : next_txmap();
+ Function: Returns the next txmap line from an array 
+           referred to using $obj->{'txmap'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $tsmap = $in->next_txmap() ) {
+				print "$txmap\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_txmap {
+    my ($obj) = @_;
+
+    return $obj->_next_element("txmap","txmap");
+}
+
+###############################
+# private method
+#
+# args: prefix name for the queue
+#       name of the method from which to re-fill
+# returns: the next element from that queue, or undef if the queue is empty
+###############################
+sub _next_element{
+    my ($self,$queuename,$meth) = @_;
+
+    $queuename = "_".$queuename."_queue";
+    if(! exists($self->{$queuename})) {
+	# re-initialize from array of sequence data
+	$self->{$queuename} = [@{$self->$meth() }];
+    }
+    my $queue = $self->{$queuename};
+    # is queue exhausted (equivalent to end of stream)?
+    if(! @$queue) {
+	# yes, remove queue and signal to the caller
+	delete $self->{$queuename};
+	return undef;
+    }
+    return shift(@$queue);
+}
+
+=head1 L<Bio::IdentifiableI> methods
+
+=cut
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences
+
+           This is aliased to unigene_id().
+
+ Returns : A scalar
+
+
+=cut
+
+sub object_id {
+    return shift->unigene_id(@_);
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: a number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept
+
+           Unigene clusters usually won''t have a version, so this
+           will be mostly undefined.
+
+ Returns : A number
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub version {
+    my $self = shift;
+
+    return $self->{'version'} = shift if @_;
+    return $self->{'version'};
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for  
+           organisation (eg, wormbase.org)
+
+ Returns : A scalar
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub authority {
+    my $self = shift;
+
+    return $self->{'authority'} = shift if @_;
+    return $self->{'authority'};
+}
+
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection 
+
+ Returns : A scalar
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub namespace {
+    my $self = shift;
+
+    return $self->{'namespace'} = shift if @_;
+    return $self->{'namespace'};
+}
+
+=head1 L<Bio::DescribableI> methods
+
+=cut
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user
+           the string should have no spaces (ideally, though a cautious
+           user of this interface would not assumme this) and should be
+           less than thirty characters (though again, double checking 
+           this is a good idea)
+
+           This is aliased to unigene_id().
+
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub display_name {
+    return shift->unigene_id(@_);
+}
+
+
+=head2 description()
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a 
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text. The string should not be greater than 255 characters
+           and clients can feel justified at truncating strings at 255
+           characters for the purposes of display
+
+           This is already demanded by Bio::ClusterI and hence is
+           present anyway.
+
+ Returns : A scalar
+
+
+=cut
+
+
+=head1 L<Bio::Factory::SequenceStreamI> methods
+
+=cut
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : next_seq();
+ Function: Returns the next seq as a Seq object as defined by 
+           $seq->sequence_factory(), 
+           at present an empty Bio::Seq::RichSeq object with 
+           just the accession_number() and pid() set
+
+           This iterator will not exhaust the array of member
+           sequences. If you call next_seq() again after it returned
+           undef, it will re-cycle through the list of member
+           sequences.
+
+ Example :  while ( my $sequence = $in->next_seq() ) {
+             print $sequence->accession_number() . "\n";
+	    }
+ Returns : Bio::PrimarySeqI object
+ Args    : None
+
+=cut
+
+sub next_seq {
+    my ($obj) = @_;
+
+    if(! exists($obj->{'_seq_queue'})) {
+	# re-initialize from array of sequence data
+	$obj->{'_seq_queue'} = [@{$obj->sequences()}];
+    }
+    my $queue = $obj->{'_seq_queue'};
+    # is queue exhausted (equivalent to end of stream)?
+    if(! @$queue) {
+	# yes, remove queue and signal to the caller
+	delete $obj->{'_seq_queue'};
+	return undef;
+    }
+    # no, still data in the queue: get the next one from the queue
+    my $seq_h = shift(@$queue);
+    # if this is not a simple hash ref, it's an object already, and we'll
+    # return just that
+    return $seq_h if(ref($seq_h) ne 'HASH');
+    # nope, we need to assemble this object from scratch
+    #
+    # assemble the annotation collection
+    my $ac = Bio::Annotation::Collection->new();
+    foreach my $k (keys %$seq_h) {
+	next if $k =~ /acc|pid|nid|version/;
+	my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k,
+						    -value   => $seq_h->{$k});
+	$ac->add_Annotation($ann);
+    }
+    # assemble the initialization parameters and create object
+    my $seqobj = $obj->sequence_factory->create(
+	  -accession_number => $seq_h->{acc},
+	  -pid              => $seq_h->{pid},
+	  # why does NCBI prepend a 'g' to its own identifiers??
+	  -primary_id       => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ?
+				     substr($seq_h->{nid},1) : $seq_h->{nid},
+	  -display_id       => $seq_h->{acc},
+	  -seq_version		=> $seq_h->{version},
+	  -alphabet         => $obj->{'_alphabet'},
+	  -namespace        => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank',
+	  -authority        => $obj->authority(), # default is NCBI
+	  -species          => $obj->species(),
+	  -annotation       => $ac
+	  );
+    return $seqobj;
+}
+
+=head2 sequence_factory
+
+ Title   : sequence_factory
+ Usage   : $seqio->sequence_factory($seqfactory)
+ Function: Get/Set the Bio::Factory::SequenceFactoryI
+ Returns : Bio::Factory::SequenceFactoryI
+ Args    : [optional] Bio::Factory::SequenceFactoryI
+
+
+=cut
+
+sub sequence_factory {
+    my ($self,$obj) = @_;   
+    if( defined $obj ) {
+	if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
+	    $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
+	}
+	$self->{'_seqfactory'} = $obj;
+    }
+    $self->{'_seqfactory'};
+}
+
+=head1 Private methods
+
+=cut
+
+=head2 _annotation_value
+
+ Title   : _annotation_value
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : the value (a string)
+ Args    : annotation key (a string)
+           on set, annotation value (a string)
+
+
+=cut
+
+sub _annotation_value{
+    my $self = shift;
+    my $key = shift;
+
+    my ($ann, $val);
+    if(@_) {
+	$val = shift;
+	if(! defined($val)) {
+	    ($ann) = $self->annotation->remove_Annotations($key);
+	    return $ann ? $ann->value() : undef;
+	}
+    }
+    ($ann) = $self->annotation->get_Annotations($key);
+    if($ann && (! $val)) {
+	# get mode and exists
+	$val = $ann->value();
+    } elsif($val) {
+	# set mode
+	if(! $ann) {
+	    $ann = Bio::Annotation::SimpleValue->new(-tagname => $key);
+	    $self->annotation->add_Annotation($ann);
+	}
+	$ann->value($val);
+    }
+    return $val;
+}
+
+
+=head2 _annotation_value_ary
+
+ Title   : _annotation_value_ary
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : reference to the array of values
+ Args    : annotation key (a string)
+           on set, reference to an array holding the values
+
+
+=cut
+
+sub _annotation_value_ary{
+    my ($self,$key,$arr) = @_;
+
+    my $ac = $self->annotation;
+    if($arr) {
+	# purge first
+	$ac->remove_Annotations($key);
+	# then add as many values as are present
+	foreach my $val (@$arr) {
+	    my $ann = Bio::Annotation::SimpleValue->new(-value => $val,
+							-tagname => $key
+							);
+	    $ac->add_Annotation($ann);
+	}
+    } else {
+	my @vals = map { $_->value(); } $ac->get_Annotations($key);
+	$arr = [@vals];
+    }
+    return $arr;
+}
+
+
+=head2 _annotation_dblink
+
+ Title   : _annotation_dblink
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : array of accessions for the given database (namespace)
+ Args    : annotation key (a string)
+           dbname (a string) (optional on get, mandatory on set)
+           on set, accession or ID (a string), and version
+
+
+=cut
+
+sub _annotation_dblink{
+    my ($self,$key,$dbname,$acc,$version) = @_;
+
+    if($acc) {
+	# set mode -- this is adding here
+	my $ann = Bio::Annotation::DBLink->new(-tagname    => $key,
+					       -primary_id => $acc,
+					       -database   => $dbname,
+					       -version    => $version);
+	$self->annotation->add_Annotation($ann);
+	return 1;
+    } else {
+	# get mode
+	my @anns = $self->annotation->get_Annotations($key);
+	# filter out those that don't match the requested database
+	if($dbname) {
+	    @anns = grep { $_->database() eq $dbname; } @anns;
+	}
+	return map { $_->primary_id(); } @anns;
+    }
+}
+
+=head2 _remove_dblink
+
+ Title   : _remove_dblink
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : array of accessions for the given database (namespace)
+ Args    : annotation key (a string)
+           dbname (a string) (optional)
+
+
+=cut
+
+sub _remove_dblink{
+    my ($self,$key,$dbname) = @_;
+
+    my $ac = $self->annotation();
+    my @anns = ();
+    if($dbname) {
+	foreach my $ann ($ac->remove_Annotations($key)) {
+	    if($ann->database() eq $dbname) {
+		push(@anns, $ann);
+	    } else {
+		$ac->add_Annotation($ann);
+	    }
+	}
+    } else {
+	@anns = $ac->remove_Annotations($key);
+    }
+    return map { $_->primary_id(); } @anns;
+}
+
+
+#####################################################################
+# aliases for naming consistency or other reasons                   #
+#####################################################################
+
+*sequence = \&sequences;
+
+1;
author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children