comparison variant_effect_predictor/Bio/Cluster/UniGene.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: UniGene.pm,v 1.23.2.2 2003/09/15 01:52:03 andrew Exp $
2 #
3 # BioPerl module for Bio::Cluster::UniGene.pm
4 #
5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
6 #
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
9 # http://meg.otago.ac.nz/
10 #
11 # You may distribute this module under the same terms as perl itself
12 #
13 # _history
14 # April 17, 2002 - Initial implementation by Andrew Macgregor
15 # POD documentation - main docs before the code
16
17 =head1 NAME
18
19 Bio::Cluster::UniGene - UniGene object
20
21 =head1 SYNOPSIS
22
23 use Bio::Cluster::UniGene;
24 use Bio::ClusterIO;
25
26 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
27 '-format' => "unigene");
28 # note: we quote -format to keep older perl's from complaining.
29
30 while ( my $in = $stream->next_cluster() ) {
31 print $in->unigene_id() . "\n";
32 while ( my $sequence = $in->next_seq() ) {
33 print $sequence->accession_number() . "\n";
34 }
35 }
36
37 =head1 DESCRIPTION
38
39 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
40 for the representation if UniGene clusters in Bioperl. It is returned
41 by the L<Bio::ClusterIO> parser for unigene format and contains all
42 the data associated with one UniGene record.
43
44 This class implements several interfaces and hence can be used
45 wherever instances of such interfaces are expected. In particular, the
46 interfaces are L<Bio::ClusterI> as the base interface for all cluster
47 representations, and in addition L<Bio::IdentifiableI> and
48 L<Bio::DescribableI>.
49
50 The following lists the UniGene specific methods that are available
51 (see below for details). Be aware next_XXX iterators take a snapshot
52 of the array property when they are first called, and this snapshot is
53 not reset until the iterator is exhausted. Hence, once called you need
54 to exhaust the iterator to see any changes that have been made to the
55 property in the meantime. You will usually want to use the
56 non-iterator equivalents and loop over the elements yourself.
57
58 new() - standard new call
59
60 unigene_id() - set/get unigene_id
61
62 title() - set/get title (description)
63
64 gene() - set/get gene
65
66 cytoband() - set/get cytoband
67
68 mgi() - set/get mgi
69
70 locuslink() - set/get locuslink
71
72 gnm_terminus() - set/get gnm_terminus
73
74 scount() - set/get scount
75
76 express() - set/get express, currently takes/returns a reference to an
77 array of expressed tissues
78
79 next_express() - returns the next tissue expression from the expressed
80 tissue array
81
82 chromosome() - set/get chromosome, currently takes/returns a reference
83 to an array of chromosome lines
84
85 next_chromosome() - returns the next chromosome line from the array of
86 chromosome lines
87
88 sts() - set/get sts, currently takes/returns a reference to an array
89 of sts lines
90
91 next_sts() - returns the next sts line from the array of sts lines
92
93 txmap() - set/get txmap, currently takes/returns a reference to an
94 array of txmap lines
95
96 next_txmap() - returns the next txmap line from the array of txmap
97 lines
98
99 protsim() - set/get protsim, currently takes/returns a reference to an
100 array of protsim lines
101
102 next_protsim() - returns the next protsim line from the array of
103 protsim lines
104
105 sequences() - set/get sequence, currently takes/returns a reference to
106 an array of references to seq info
107
108 next_seq() - returns a Seq object that currently only contains an
109 accession number
110
111
112 =head1 Implemented Interfaces
113
114 This class implementes the following interfaces.
115
116 =over 4
117
118 =item Bio::Cluster::UniGeneI
119
120 This includes implementing Bio::ClusterI.
121
122 =item Bio::IdentifiableI
123
124 =item Bio::DescribableI
125
126 =item Bio::AnnotatableI
127
128 =item Bio::Factory::SequenceStreamI
129
130 =back
131
132 =head1 FEEDBACK
133
134
135 =head2 Mailing Lists
136
137 User feedback is an integral part of the evolution of this and other
138 Bioperl modules. Send your comments and suggestions preferably to one
139 of the Bioperl mailing lists. Your participation is much appreciated.
140
141 bioperl-l@bioperl.org - General discussion
142 http://bio.perl.org/MailList.html - About the mailing lists
143
144 =head2 Reporting Bugs
145
146 Report bugs to the Bioperl bug tracking system to help us keep track
147 the bugs and their resolution. Bug reports can be submitted via email
148 or the web:
149
150 bioperl-bugs@bioperl.org
151 http://bugzilla.bioperl.org/
152
153 =head1 AUTHOR - Andrew Macgregor
154
155 Email andrew@anatomy.otago.ac.nz
156
157 =head1 CONTRIBUTORS
158
159 Hilmar Lapp, hlapp at gmx.net
160
161 =head1 APPENDIX
162
163
164 The rest of the documentation details each of the object
165 methods. Internal methods are usually preceded with a "_".
166
167 =cut
168
169 # Let the code begin...
170
171
172 package Bio::Cluster::UniGene;
173 use vars qw(@ISA);
174 use strict;
175
176
177 use Bio::Root::Root;
178 use Bio::IdentifiableI;
179 use Bio::DescribableI;
180 use Bio::AnnotatableI;
181 use Bio::Annotation::Collection;
182 use Bio::Annotation::DBLink;
183 use Bio::Annotation::SimpleValue;
184 use Bio::Species;
185 use Bio::Factory::SequenceStreamI;
186 use Bio::Seq::SeqFactory;
187 use Bio::Cluster::UniGeneI;
188
189 @ISA = qw(Bio::Root::Root Bio::Cluster::UniGeneI
190 Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI
191 Bio::Factory::SequenceStreamI);
192
193 my %species_map = (
194 'Aga' => "Anopheles gambiae",
195 'At' => "Arabidopsis thaliana",
196 'Bt' => "Bos taurus",
197 'Cel' => "Caenorhabditis elegans",
198 'Cin' => "Ciona intestinalis",
199 'Cre' => "Chlamydomonas reinhardtii",
200 'Ddi' => "Dictyostelium discoideum",
201 'Dr' => "Danio rerio",
202 'Dm' => "Drosophila melanogaster",
203 'Gga' => "Gallus gallus",
204 'Gma' => "Glycine max",
205 'Hs' => "Homo sapiens",
206 'Hv' => "Hordeum vulgare",
207 'Les' => "Lycopersicon esculentum",
208 'Mtr' => "Medicago truncatula",
209 'Mm' => "Mus musculus",
210 'Os' => "Oryza sativa",
211 'Ola' => "Oryzias latipes",
212 'Rn' => "Rattus norvegicus",
213 'Str' => "Silurana tropicalis",
214 'Sbi' => "Sorghum bicolor",
215 'Ssc' => "Sus scrofa",
216 'Ta' => "Triticum aestivum",
217 'Xl' => "Xenopus laevis",
218 'Zm' => "Zea mays",
219 );
220
221
222 =head2 new
223
224 Title : new
225 Usage : used by ClusterIO
226 Returns : a new Bio::Cluster::Unigene object
227
228 =cut
229
230 sub new {
231 # standard new call..
232 my($caller,@args) = @_;
233 my $self = $caller->SUPER::new(@args);
234
235 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
236 $self->_rearrange([qw(UNIGENE_ID
237 DESCRIPTION
238 MEMBERS
239 SIZE
240 SPECIES
241 DISPLAY_ID
242 OBJECT_ID
243 NAMESPACE
244 AUTHORITY
245 VERSION
246 SEQFACTORY
247 )], @args);
248
249 $self->{'_alphabet'} = 'dna';
250
251 $self->unigene_id($ugid) if $ugid;
252 $self->description($desc) if $desc;
253 $self->sequences($mems) if $mems;
254 $self->size($size) if defined($size);
255 $self->display_id($dispid) if $dispid; # overwrites ugid
256 $self->object_id($id) if $id; # overwrites dispid
257 $self->namespace($ns || 'UniGene');
258 $self->authority($auth || 'NCBI');
259 $self->version($v) if defined($v);
260 if( ! defined $seqfact ) {
261 $seqfact = new Bio::Seq::SeqFactory
262 (-verbose => $self->verbose(),
263 -type => 'Bio::Seq::RichSeq');
264 }
265 $self->sequence_factory($seqfact);
266 if( (! $species) && (defined $self->unigene_id() &&
267 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
268 # try set a default one depending on the ID
269 $species = $species_map{$1};
270 }
271 $self->species($species);
272 return $self;
273 }
274
275
276 =head1 L<Bio::Cluster::UniGeneI> methods
277
278 =cut
279
280 =head2 unigene_id
281
282 Title : unigene_id
283 Usage : unigene_id();
284 Function: Returns the unigene_id associated with the object.
285 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
286 Returns : A string
287 Args : None or an id
288
289
290 =cut
291
292 sub unigene_id {
293 my ($obj,$value) = @_;
294 if( defined $value) {
295 $obj->{'unigene_id'} = $value;
296 }
297 return $obj->{'unigene_id'};
298 }
299
300
301
302 =head2 title
303
304 Title : title
305 Usage : title();
306 Function: Returns the title associated with the object.
307 Example : $title = $unigene->title or $unigene->title($title)
308 Returns : A string
309 Args : None or a title
310
311
312 =cut
313
314 sub title {
315 my ($obj,$value) = @_;
316 if( defined $value) {
317 $obj->{'title'} = $value;
318 }
319 return $obj->{'title'};
320 }
321
322
323 =head2 gene
324
325 Title : gene
326 Usage : gene();
327 Function: Returns the gene associated with the object.
328 Example : $gene = $unigene->gene or $unigene->gene($gene)
329 Returns : A string
330 Args : None or a gene
331
332
333 =cut
334
335 sub gene {
336 my $self = shift;
337 return $self->_annotation_value('gene_name', @_);
338 }
339
340
341 =head2 cytoband
342
343 Title : cytoband
344 Usage : cytoband();
345 Function: Returns the cytoband associated with the object.
346 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
347 Returns : A string
348 Args : None or a cytoband
349
350
351 =cut
352
353 sub cytoband {
354 my $self = shift;
355 return $self->_annotation_value('cyto_band', @_);
356 }
357
358 =head2 mgi
359
360 Title : mgi
361 Usage : mgi();
362 Function: Returns the mgi associated with the object.
363 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
364 Returns : A string
365 Args : None or a mgi
366
367
368 =cut
369
370 sub mgi {
371 my $self = shift;
372 my $acc;
373
374 if(@_) {
375 # purge first
376 $self->_remove_dblink('dblink','MGI');
377 # then add if a valid value is present
378 if($acc = shift) {
379 $self->_annotation_dblink('dblink','MGI',$acc);
380 }
381 } else {
382 ($acc) = $self->_annotation_dblink('dblink','MGI');
383 }
384 return $acc;
385 }
386
387
388 =head2 locuslink
389
390 Title : locuslink
391 Usage : locuslink();
392 Function: Returns or stores a reference to an array containing locuslink data.
393 Returns : An array reference
394 Args : None or an array reference
395
396 =cut
397
398 sub locuslink {
399 my ($self,$ll) = @_;
400
401 if($ll) {
402 # purge first
403 $self->_remove_dblink('dblink','LocusLink');
404 # then add as many accessions as are present
405 foreach my $acc (@$ll) {
406 $self->_annotation_dblink('dblink','LocusLink',$acc);
407 }
408 } else {
409 my @accs = $self->_annotation_dblink('dblink','LocusLink');
410 $ll = [@accs];
411 }
412 return $ll;
413 }
414
415
416 =head2 gnm_terminus
417
418 Title : gnm_terminus
419 Usage : gnm_terminus();
420 Function: Returns the gnm_terminus associated with the object.
421 Example : $gnm_terminus = $unigene->gnm_terminus or
422 $unigene->gnm_terminus($gnm_terminus)
423 Returns : A string
424 Args : None or a gnm_terminus
425
426 =cut
427
428 sub gnm_terminus {
429 my $self = shift;
430 return $self->_annotation_value('gnm_terminus', @_);
431 }
432
433 =head2 scount
434
435 Title : scount
436 Usage : scount();
437 Function: Returns the scount associated with the object.
438 Example : $scount = $unigene->scount or $unigene->scount($scount)
439 Returns : A string
440 Args : None or a scount
441
442 =cut
443
444 sub scount {
445 my ($obj,$value) = @_;
446 if( defined $value) {
447 $obj->{'scount'} = $value;
448 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
449 $obj->{'scount'} = $obj->size();
450 }
451 return $obj->{'scount'};
452 }
453
454
455 =head2 express
456
457 Title : express
458 Usage : express();
459 Function: Returns or stores a reference to an array containing
460 tissue expression data
461 Returns : An array reference
462 Args : None or an array reference
463
464 =cut
465
466 sub express {
467 my $self = shift;
468
469 return $self->_annotation_value_ary('expressed',@_);
470 }
471
472
473 =head2 chromosome
474
475 Title : chromosome
476 Usage : chromosome();
477 Function: Returns or stores a reference to an array containing
478 chromosome lines
479 Returns : An array reference
480 Args : None or an array reference
481
482 =cut
483
484 sub chromosome {
485 my $self = shift;
486
487 return $self->_annotation_value_ary('chromosome',@_);
488 }
489
490
491 =head2 sts
492
493 Title : sts
494 Usage : sts();
495 Function: Returns or stores a reference to an array containing sts lines
496
497 Returns : An array reference
498 Args : None or an array reference
499
500 =cut
501
502 sub sts {
503 my $self = shift;
504
505 return $self->_annotation_value_ary('sts',@_);
506 }
507
508
509 =head2 txmap
510
511 Title : txmap
512 Usage : txmap();
513 Function: Returns or stores a reference to an array containing txmap lines
514
515 Returns : An array reference
516 Args : None or an array reference
517
518 =cut
519
520 sub txmap {
521 my $self = shift;
522
523 return $self->_annotation_value_ary('txmap',@_);
524 }
525
526
527 =head2 protsim
528
529 Title : protsim
530 Usage : protsim();
531 Function: Returns or stores a reference to an array containing protsim lines
532 This should really only be used by ClusterIO, not directly
533 Returns : An array reference
534 Args : None or an array reference
535
536 =cut
537
538 sub protsim {
539 my $self = shift;
540
541 return $self->_annotation_value_ary('protsim',@_);
542 }
543
544
545 =head2 sequences
546
547 Title : sequences
548 Usage : sequences();
549 Function: Returns or stores a reference to an array containing
550 sequence data.
551
552 This is mostly reserved for ClusterIO parsers. You should
553 use get_members() for get and add_member()/remove_members()
554 for set.
555
556 Returns : An array reference, or undef
557 Args : None or an array reference or undef
558
559 =cut
560
561 sub sequences {
562 my $self = shift;
563
564 return $self->{'members'} = shift if @_;
565 return $self->{'members'};
566 }
567
568 =head2 species
569
570 Title : species
571 Usage : $obj->species($newval)
572 Function: Get/set the species object for this Unigene cluster.
573 Example :
574 Returns : value of species (a L<Bio::Species> object)
575 Args : on set, new value (a L<Bio::Species> object or
576 the binomial name, or undef, optional)
577
578
579 =cut
580
581 sub species{
582 my $self = shift;
583
584 if(@_) {
585 my $species = shift;
586 if($species && (! ref($species))) {
587 my @class = reverse(split(' ',$species));
588 $species = Bio::Species->new(-classification => \@class);
589 }
590 return $self->{'species'} = $species;
591 }
592 return $self->{'species'};
593 }
594
595
596 =head1 L<Bio::ClusterI> methods
597
598 =cut
599
600 =head2 display_id
601
602 Title : display_id
603 Usage :
604 Function: Get/set the display name or identifier for the cluster
605
606 This is aliased to unigene_id().
607
608 Returns : a string
609 Args : optional, on set the display ID ( a string)
610
611 =cut
612
613 sub display_id{
614 return shift->unigene_id(@_);
615 }
616
617 =head2 description
618
619 Title : description
620 Usage : Bio::ClusterI->description("POLYUBIQUITIN")
621 Function: get/set for the consensus description of the cluster
622
623 This is aliased to title().
624
625 Returns : the description string
626 Args : Optional the description string
627
628 =cut
629
630 sub description{
631 return shift->title(@_);
632 }
633
634 =head2 size
635
636 Title : size
637 Usage : Bio::ClusterI->size();
638 Function: get for the size of the family,
639 calculated from the number of members
640
641 This is aliased to scount().
642
643 Returns : the size of the cluster
644 Args :
645
646 =cut
647
648 sub size {
649 my $self = shift;
650
651 # hard-wiring the size is allowed if there are no sequences
652 return $self->scount(@_) unless defined($self->sequences());
653 # but we can't change the number of members through this method
654 my $n = scalar(@{$self->sequences()});
655 if(@_ && ($n != $_[0])) {
656 $self->throw("Cannot change cluster size using size() from $n to ".
657 $_[0]);
658 }
659 return $n;
660 }
661
662 =head2 cluster_score
663
664 Title : cluster_score
665 Usage : $cluster ->cluster_score(100);
666 Function: get/set for cluster_score which
667 represent the score in which the clustering
668 algorithm assigns to this cluster.
669
670 For UniGene clusters, there really is no cluster score that
671 would come with the data. However, we provide an
672 implementation here so that you can score UniGene clusters
673 if you want to.
674
675 Returns : a number
676 Args : optionally, on set a number
677
678 =cut
679
680 sub cluster_score{
681 my $self = shift;
682
683 return $self->{'cluster_score'} = shift if @_;
684 return $self->{'cluster_score'};
685 }
686
687 =head2 get_members
688
689 Title : get_members
690 Usage : Bio::ClusterI->get_members(($seq1, $seq2));
691 Function: retrieve the members of the family by some criteria
692
693 Will return all members if no criteria are provided.
694
695 At this time this implementation does not support
696 specifying criteria and will always return all members.
697
698 Returns : the array of members
699 Args :
700
701 =cut
702
703 sub get_members {
704 my $self = shift;
705
706 my $mems = $self->sequences() || [];
707 # already objects?
708 if(@$mems && (ref($mems->[0]) eq "HASH")) {
709 # nope, we need to build the object list from scratch
710 my @memlist = ();
711 while(my $seq = $self->next_seq()) {
712 push(@memlist, $seq);
713 }
714 # we cache this array of objects as the new member list
715 $mems = \@memlist;
716 $self->sequences($mems);
717 }
718 # done
719 return @$mems;
720 }
721
722
723 =head1 Annotatable view at the object properties
724
725 =cut
726
727 =head2 annotation
728
729 Title : annotation
730 Usage : $obj->annotation($newval)
731 Function: Get/set the L<Bio::AnnotationCollectionI> object for
732 this UniGene cluster.
733
734 Many attributes of this class are actually stored within
735 the annotation collection object as L<Bio::AnnotationI>
736 compliant objects, so you can conveniently access them
737 through the same interface as you would e.g. access
738 L<Bio::SeqI> annotation properties.
739
740 If you call this method in set mode and replace the
741 annotation collection with another one you should know
742 exactly what you are doing.
743
744 Example :
745 Returns : a L<Bio::AnnotationCollectionI> compliant object
746 Args : on set, new value (a L<Bio::AnnotationCollectionI>
747 compliant object or undef, optional)
748
749
750 =cut
751
752 sub annotation{
753 my $self = shift;
754
755 if(@_) {
756 return $self->{'annotation'} = shift;
757 } elsif(! exists($self->{'annotation'})) {
758 $self->{'annotation'} = Bio::Annotation::Collection->new();
759 }
760 return $self->{'annotation'};
761 }
762
763
764 =head1 Implementation specific methods
765
766 These are mostly for adding/removing to array properties, and for
767 methods with special functionality.
768
769 =cut
770
771 =head2 add_member
772
773 Title : add_member
774 Usage :
775 Function: Adds a member object to the list of members.
776 Example :
777 Returns : TRUE if the new member was successfuly added, and FALSE
778 otherwise.
779 Args : The member to add.
780
781
782 =cut
783
784 sub add_member{
785 my ($self,@mems) = @_;
786
787 my $memlist = $self->{'members'} || [];
788 # this is an object interface; is the member list already objects?
789 if(@$memlist && (ref($memlist->[0]) eq "HASH")) {
790 # nope, convert to objects
791 $memlist = [$self->get_members()];
792 }
793 # add new member(s)
794 push(@$memlist, @mems);
795 # store if we created this array ref ourselves
796 $self->sequences($memlist);
797 # done
798 return 1;
799 }
800
801 =head2 remove_members
802
803 Title : remove_members
804 Usage :
805 Function: Remove the list of members for this cluster such that the
806 member list is undefined afterwards (as opposed to zero members).
807 Example :
808 Returns : the previous list of members
809 Args : none
810
811
812 =cut
813
814 sub remove_members{
815 my $self = shift;
816
817 my @mems = $self->get_members();
818 $self->sequences(undef);
819 return @mems;
820 }
821
822
823 =head2 next_locuslink
824
825 Title : next_locuslink
826 Usage : next_locuslink();
827 Function: Returns the next locuslink from an array referred
828 to using $obj->{'locuslink'}
829
830 If you call this iterator again after it returned undef, it
831 will re-cycle through the list of elements. Changes in the
832 underlying array property while you loop over this iterator
833 will not be reflected until you exhaust the iterator.
834
835 Example : while ( my $locuslink = $in->next_locuslink() ) {
836 print "$locuslink\n";
837 }
838 Returns : String
839 Args : None
840
841 =cut
842
843 sub next_locuslink {
844 my ($obj) = @_;
845
846 return $obj->_next_element("ll","locuslink");
847 }
848
849 =head2 next_express
850
851 Title : next_express
852 Usage : next_express();
853 Function: Returns the next tissue from an array referred
854 to using $obj->{'express'}
855
856 If you call this iterator again after it returned undef, it
857 will re-cycle through the list of elements. Changes in the
858 underlying array property while you loop over this iterator
859 will not be reflected until you exhaust the iterator.
860
861 Example : while ( my $express = $in->next_express() ) {
862 print "$express\n";
863 }
864 Returns : String
865 Args : None
866
867 =cut
868
869 sub next_express {
870 my ($obj) = @_;
871
872 return $obj->_next_element("express","express");
873 }
874
875
876 =head2 next_chromosome
877
878 Title : next_chromosome
879 Usage : next_chromosome();
880 Function: Returns the next chromosome line from an array referred
881 to using $obj->{'chromosome'}
882
883 If you call this iterator again after it returned undef, it
884 will re-cycle through the list of elements. Changes in the
885 underlying array property while you loop over this iterator
886 will not be reflected until you exhaust the iterator.
887
888 Example : while ( my $chromosome = $in->next_chromosome() ) {
889 print "$chromosome\n";
890 }
891 Returns : String
892 Args : None
893
894 =cut
895
896 sub next_chromosome {
897 my ($obj) = @_;
898
899 return $obj->_next_element("chr","chromosome");
900 }
901
902
903 =head2 next_protsim
904
905 Title : next_protsim
906 Usage : next_protsim();
907 Function: Returns the next protsim line from an array referred
908 to using $obj->{'protsim'}
909
910 If you call this iterator again after it returned undef, it
911 will re-cycle through the list of elements. Changes in the
912 underlying array property while you loop over this iterator
913 will not be reflected until you exhaust the iterator.
914
915 Example : while ( my $protsim = $in->next_protsim() ) {
916 print "$protsim\n";
917 }
918 Returns : String
919 Args : None
920
921 =cut
922
923 sub next_protsim {
924 my ($obj) = @_;
925
926 return $obj->_next_element("protsim","protsim");
927 }
928
929
930 =head2 next_sts
931
932 Title : next_sts
933 Usage : next_sts();
934 Function: Returns the next sts line from an array referred
935 to using $obj->{'sts'}
936
937 If you call this iterator again after it returned undef, it
938 will re-cycle through the list of elements. Changes in the
939 underlying array property while you loop over this iterator
940 will not be reflected until you exhaust the iterator.
941
942 Example : while ( my $sts = $in->next_sts() ) {
943 print "$sts\n";
944 }
945 Returns : String
946 Args : None
947
948 =cut
949
950 sub next_sts {
951 my ($obj) = @_;
952
953 return $obj->_next_element("sts","sts");
954 }
955
956
957 =head2 next_txmap
958
959 Title : next_txmap
960 Usage : next_txmap();
961 Function: Returns the next txmap line from an array
962 referred to using $obj->{'txmap'}
963
964 If you call this iterator again after it returned undef, it
965 will re-cycle through the list of elements. Changes in the
966 underlying array property while you loop over this iterator
967 will not be reflected until you exhaust the iterator.
968
969 Example : while ( my $tsmap = $in->next_txmap() ) {
970 print "$txmap\n";
971 }
972 Returns : String
973 Args : None
974
975 =cut
976
977 sub next_txmap {
978 my ($obj) = @_;
979
980 return $obj->_next_element("txmap","txmap");
981 }
982
983 ###############################
984 # private method
985 #
986 # args: prefix name for the queue
987 # name of the method from which to re-fill
988 # returns: the next element from that queue, or undef if the queue is empty
989 ###############################
990 sub _next_element{
991 my ($self,$queuename,$meth) = @_;
992
993 $queuename = "_".$queuename."_queue";
994 if(! exists($self->{$queuename})) {
995 # re-initialize from array of sequence data
996 $self->{$queuename} = [@{$self->$meth() }];
997 }
998 my $queue = $self->{$queuename};
999 # is queue exhausted (equivalent to end of stream)?
1000 if(! @$queue) {
1001 # yes, remove queue and signal to the caller
1002 delete $self->{$queuename};
1003 return undef;
1004 }
1005 return shift(@$queue);
1006 }
1007
1008 =head1 L<Bio::IdentifiableI> methods
1009
1010 =cut
1011
1012 =head2 object_id
1013
1014 Title : object_id
1015 Usage : $string = $obj->object_id()
1016 Function: a string which represents the stable primary identifier
1017 in this namespace of this object. For DNA sequences this
1018 is its accession_number, similarly for protein sequences
1019
1020 This is aliased to unigene_id().
1021
1022 Returns : A scalar
1023
1024
1025 =cut
1026
1027 sub object_id {
1028 return shift->unigene_id(@_);
1029 }
1030
1031 =head2 version
1032
1033 Title : version
1034 Usage : $version = $obj->version()
1035 Function: a number which differentiates between versions of
1036 the same object. Higher numbers are considered to be
1037 later and more relevant, but a single object described
1038 the same identifier should represent the same concept
1039
1040 Unigene clusters usually won''t have a version, so this
1041 will be mostly undefined.
1042
1043 Returns : A number
1044 Args : on set, new value (a scalar or undef, optional)
1045
1046
1047 =cut
1048
1049 sub version {
1050 my $self = shift;
1051
1052 return $self->{'version'} = shift if @_;
1053 return $self->{'version'};
1054 }
1055
1056
1057 =head2 authority
1058
1059 Title : authority
1060 Usage : $authority = $obj->authority()
1061 Function: a string which represents the organisation which
1062 granted the namespace, written as the DNS name for
1063 organisation (eg, wormbase.org)
1064
1065 Returns : A scalar
1066 Args : on set, new value (a scalar or undef, optional)
1067
1068
1069 =cut
1070
1071 sub authority {
1072 my $self = shift;
1073
1074 return $self->{'authority'} = shift if @_;
1075 return $self->{'authority'};
1076 }
1077
1078
1079 =head2 namespace
1080
1081 Title : namespace
1082 Usage : $string = $obj->namespace()
1083 Function: A string representing the name space this identifier
1084 is valid in, often the database name or the name
1085 describing the collection
1086
1087 Returns : A scalar
1088 Args : on set, new value (a scalar or undef, optional)
1089
1090
1091 =cut
1092
1093 sub namespace {
1094 my $self = shift;
1095
1096 return $self->{'namespace'} = shift if @_;
1097 return $self->{'namespace'};
1098 }
1099
1100 =head1 L<Bio::DescribableI> methods
1101
1102 =cut
1103
1104 =head2 display_name
1105
1106 Title : display_name
1107 Usage : $string = $obj->display_name()
1108 Function: A string which is what should be displayed to the user
1109 the string should have no spaces (ideally, though a cautious
1110 user of this interface would not assumme this) and should be
1111 less than thirty characters (though again, double checking
1112 this is a good idea)
1113
1114 This is aliased to unigene_id().
1115
1116 Returns : A scalar
1117 Status : Virtual
1118
1119 =cut
1120
1121 sub display_name {
1122 return shift->unigene_id(@_);
1123 }
1124
1125
1126 =head2 description()
1127
1128 Title : description
1129 Usage : $string = $obj->description()
1130 Function: A text string suitable for displaying to the user a
1131 description. This string is likely to have spaces, but
1132 should not have any newlines or formatting - just plain
1133 text. The string should not be greater than 255 characters
1134 and clients can feel justified at truncating strings at 255
1135 characters for the purposes of display
1136
1137 This is already demanded by Bio::ClusterI and hence is
1138 present anyway.
1139
1140 Returns : A scalar
1141
1142
1143 =cut
1144
1145
1146 =head1 L<Bio::Factory::SequenceStreamI> methods
1147
1148 =cut
1149
1150 =head2 next_seq
1151
1152 Title : next_seq
1153 Usage : next_seq();
1154 Function: Returns the next seq as a Seq object as defined by
1155 $seq->sequence_factory(),
1156 at present an empty Bio::Seq::RichSeq object with
1157 just the accession_number() and pid() set
1158
1159 This iterator will not exhaust the array of member
1160 sequences. If you call next_seq() again after it returned
1161 undef, it will re-cycle through the list of member
1162 sequences.
1163
1164 Example : while ( my $sequence = $in->next_seq() ) {
1165 print $sequence->accession_number() . "\n";
1166 }
1167 Returns : Bio::PrimarySeqI object
1168 Args : None
1169
1170 =cut
1171
1172 sub next_seq {
1173 my ($obj) = @_;
1174
1175 if(! exists($obj->{'_seq_queue'})) {
1176 # re-initialize from array of sequence data
1177 $obj->{'_seq_queue'} = [@{$obj->sequences()}];
1178 }
1179 my $queue = $obj->{'_seq_queue'};
1180 # is queue exhausted (equivalent to end of stream)?
1181 if(! @$queue) {
1182 # yes, remove queue and signal to the caller
1183 delete $obj->{'_seq_queue'};
1184 return undef;
1185 }
1186 # no, still data in the queue: get the next one from the queue
1187 my $seq_h = shift(@$queue);
1188 # if this is not a simple hash ref, it's an object already, and we'll
1189 # return just that
1190 return $seq_h if(ref($seq_h) ne 'HASH');
1191 # nope, we need to assemble this object from scratch
1192 #
1193 # assemble the annotation collection
1194 my $ac = Bio::Annotation::Collection->new();
1195 foreach my $k (keys %$seq_h) {
1196 next if $k =~ /acc|pid|nid|version/;
1197 my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k,
1198 -value => $seq_h->{$k});
1199 $ac->add_Annotation($ann);
1200 }
1201 # assemble the initialization parameters and create object
1202 my $seqobj = $obj->sequence_factory->create(
1203 -accession_number => $seq_h->{acc},
1204 -pid => $seq_h->{pid},
1205 # why does NCBI prepend a 'g' to its own identifiers??
1206 -primary_id => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ?
1207 substr($seq_h->{nid},1) : $seq_h->{nid},
1208 -display_id => $seq_h->{acc},
1209 -seq_version => $seq_h->{version},
1210 -alphabet => $obj->{'_alphabet'},
1211 -namespace => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank',
1212 -authority => $obj->authority(), # default is NCBI
1213 -species => $obj->species(),
1214 -annotation => $ac
1215 );
1216 return $seqobj;
1217 }
1218
1219 =head2 sequence_factory
1220
1221 Title : sequence_factory
1222 Usage : $seqio->sequence_factory($seqfactory)
1223 Function: Get/Set the Bio::Factory::SequenceFactoryI
1224 Returns : Bio::Factory::SequenceFactoryI
1225 Args : [optional] Bio::Factory::SequenceFactoryI
1226
1227
1228 =cut
1229
1230 sub sequence_factory {
1231 my ($self,$obj) = @_;
1232 if( defined $obj ) {
1233 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
1234 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
1235 }
1236 $self->{'_seqfactory'} = $obj;
1237 }
1238 $self->{'_seqfactory'};
1239 }
1240
1241 =head1 Private methods
1242
1243 =cut
1244
1245 =head2 _annotation_value
1246
1247 Title : _annotation_value
1248 Usage :
1249 Function: Private method.
1250 Example :
1251 Returns : the value (a string)
1252 Args : annotation key (a string)
1253 on set, annotation value (a string)
1254
1255
1256 =cut
1257
1258 sub _annotation_value{
1259 my $self = shift;
1260 my $key = shift;
1261
1262 my ($ann, $val);
1263 if(@_) {
1264 $val = shift;
1265 if(! defined($val)) {
1266 ($ann) = $self->annotation->remove_Annotations($key);
1267 return $ann ? $ann->value() : undef;
1268 }
1269 }
1270 ($ann) = $self->annotation->get_Annotations($key);
1271 if($ann && (! $val)) {
1272 # get mode and exists
1273 $val = $ann->value();
1274 } elsif($val) {
1275 # set mode
1276 if(! $ann) {
1277 $ann = Bio::Annotation::SimpleValue->new(-tagname => $key);
1278 $self->annotation->add_Annotation($ann);
1279 }
1280 $ann->value($val);
1281 }
1282 return $val;
1283 }
1284
1285
1286 =head2 _annotation_value_ary
1287
1288 Title : _annotation_value_ary
1289 Usage :
1290 Function: Private method.
1291 Example :
1292 Returns : reference to the array of values
1293 Args : annotation key (a string)
1294 on set, reference to an array holding the values
1295
1296
1297 =cut
1298
1299 sub _annotation_value_ary{
1300 my ($self,$key,$arr) = @_;
1301
1302 my $ac = $self->annotation;
1303 if($arr) {
1304 # purge first
1305 $ac->remove_Annotations($key);
1306 # then add as many values as are present
1307 foreach my $val (@$arr) {
1308 my $ann = Bio::Annotation::SimpleValue->new(-value => $val,
1309 -tagname => $key
1310 );
1311 $ac->add_Annotation($ann);
1312 }
1313 } else {
1314 my @vals = map { $_->value(); } $ac->get_Annotations($key);
1315 $arr = [@vals];
1316 }
1317 return $arr;
1318 }
1319
1320
1321 =head2 _annotation_dblink
1322
1323 Title : _annotation_dblink
1324 Usage :
1325 Function: Private method.
1326 Example :
1327 Returns : array of accessions for the given database (namespace)
1328 Args : annotation key (a string)
1329 dbname (a string) (optional on get, mandatory on set)
1330 on set, accession or ID (a string), and version
1331
1332
1333 =cut
1334
1335 sub _annotation_dblink{
1336 my ($self,$key,$dbname,$acc,$version) = @_;
1337
1338 if($acc) {
1339 # set mode -- this is adding here
1340 my $ann = Bio::Annotation::DBLink->new(-tagname => $key,
1341 -primary_id => $acc,
1342 -database => $dbname,
1343 -version => $version);
1344 $self->annotation->add_Annotation($ann);
1345 return 1;
1346 } else {
1347 # get mode
1348 my @anns = $self->annotation->get_Annotations($key);
1349 # filter out those that don't match the requested database
1350 if($dbname) {
1351 @anns = grep { $_->database() eq $dbname; } @anns;
1352 }
1353 return map { $_->primary_id(); } @anns;
1354 }
1355 }
1356
1357 =head2 _remove_dblink
1358
1359 Title : _remove_dblink
1360 Usage :
1361 Function: Private method.
1362 Example :
1363 Returns : array of accessions for the given database (namespace)
1364 Args : annotation key (a string)
1365 dbname (a string) (optional)
1366
1367
1368 =cut
1369
1370 sub _remove_dblink{
1371 my ($self,$key,$dbname) = @_;
1372
1373 my $ac = $self->annotation();
1374 my @anns = ();
1375 if($dbname) {
1376 foreach my $ann ($ac->remove_Annotations($key)) {
1377 if($ann->database() eq $dbname) {
1378 push(@anns, $ann);
1379 } else {
1380 $ac->add_Annotation($ann);
1381 }
1382 }
1383 } else {
1384 @anns = $ac->remove_Annotations($key);
1385 }
1386 return map { $_->primary_id(); } @anns;
1387 }
1388
1389
1390 #####################################################################
1391 # aliases for naming consistency or other reasons #
1392 #####################################################################
1393
1394 *sequence = \&sequences;
1395
1396 1;