Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Cluster/UniGene.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: UniGene.pm,v 1.23.2.2 2003/09/15 01:52:03 andrew Exp $ | |
2 # | |
3 # BioPerl module for Bio::Cluster::UniGene.pm | |
4 # | |
5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz> | |
6 # | |
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green | |
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago | |
9 # http://meg.otago.ac.nz/ | |
10 # | |
11 # You may distribute this module under the same terms as perl itself | |
12 # | |
13 # _history | |
14 # April 17, 2002 - Initial implementation by Andrew Macgregor | |
15 # POD documentation - main docs before the code | |
16 | |
17 =head1 NAME | |
18 | |
19 Bio::Cluster::UniGene - UniGene object | |
20 | |
21 =head1 SYNOPSIS | |
22 | |
23 use Bio::Cluster::UniGene; | |
24 use Bio::ClusterIO; | |
25 | |
26 $stream = Bio::ClusterIO->new('-file' => "Hs.data", | |
27 '-format' => "unigene"); | |
28 # note: we quote -format to keep older perl's from complaining. | |
29 | |
30 while ( my $in = $stream->next_cluster() ) { | |
31 print $in->unigene_id() . "\n"; | |
32 while ( my $sequence = $in->next_seq() ) { | |
33 print $sequence->accession_number() . "\n"; | |
34 } | |
35 } | |
36 | |
37 =head1 DESCRIPTION | |
38 | |
39 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface | |
40 for the representation if UniGene clusters in Bioperl. It is returned | |
41 by the L<Bio::ClusterIO> parser for unigene format and contains all | |
42 the data associated with one UniGene record. | |
43 | |
44 This class implements several interfaces and hence can be used | |
45 wherever instances of such interfaces are expected. In particular, the | |
46 interfaces are L<Bio::ClusterI> as the base interface for all cluster | |
47 representations, and in addition L<Bio::IdentifiableI> and | |
48 L<Bio::DescribableI>. | |
49 | |
50 The following lists the UniGene specific methods that are available | |
51 (see below for details). Be aware next_XXX iterators take a snapshot | |
52 of the array property when they are first called, and this snapshot is | |
53 not reset until the iterator is exhausted. Hence, once called you need | |
54 to exhaust the iterator to see any changes that have been made to the | |
55 property in the meantime. You will usually want to use the | |
56 non-iterator equivalents and loop over the elements yourself. | |
57 | |
58 new() - standard new call | |
59 | |
60 unigene_id() - set/get unigene_id | |
61 | |
62 title() - set/get title (description) | |
63 | |
64 gene() - set/get gene | |
65 | |
66 cytoband() - set/get cytoband | |
67 | |
68 mgi() - set/get mgi | |
69 | |
70 locuslink() - set/get locuslink | |
71 | |
72 gnm_terminus() - set/get gnm_terminus | |
73 | |
74 scount() - set/get scount | |
75 | |
76 express() - set/get express, currently takes/returns a reference to an | |
77 array of expressed tissues | |
78 | |
79 next_express() - returns the next tissue expression from the expressed | |
80 tissue array | |
81 | |
82 chromosome() - set/get chromosome, currently takes/returns a reference | |
83 to an array of chromosome lines | |
84 | |
85 next_chromosome() - returns the next chromosome line from the array of | |
86 chromosome lines | |
87 | |
88 sts() - set/get sts, currently takes/returns a reference to an array | |
89 of sts lines | |
90 | |
91 next_sts() - returns the next sts line from the array of sts lines | |
92 | |
93 txmap() - set/get txmap, currently takes/returns a reference to an | |
94 array of txmap lines | |
95 | |
96 next_txmap() - returns the next txmap line from the array of txmap | |
97 lines | |
98 | |
99 protsim() - set/get protsim, currently takes/returns a reference to an | |
100 array of protsim lines | |
101 | |
102 next_protsim() - returns the next protsim line from the array of | |
103 protsim lines | |
104 | |
105 sequences() - set/get sequence, currently takes/returns a reference to | |
106 an array of references to seq info | |
107 | |
108 next_seq() - returns a Seq object that currently only contains an | |
109 accession number | |
110 | |
111 | |
112 =head1 Implemented Interfaces | |
113 | |
114 This class implementes the following interfaces. | |
115 | |
116 =over 4 | |
117 | |
118 =item Bio::Cluster::UniGeneI | |
119 | |
120 This includes implementing Bio::ClusterI. | |
121 | |
122 =item Bio::IdentifiableI | |
123 | |
124 =item Bio::DescribableI | |
125 | |
126 =item Bio::AnnotatableI | |
127 | |
128 =item Bio::Factory::SequenceStreamI | |
129 | |
130 =back | |
131 | |
132 =head1 FEEDBACK | |
133 | |
134 | |
135 =head2 Mailing Lists | |
136 | |
137 User feedback is an integral part of the evolution of this and other | |
138 Bioperl modules. Send your comments and suggestions preferably to one | |
139 of the Bioperl mailing lists. Your participation is much appreciated. | |
140 | |
141 bioperl-l@bioperl.org - General discussion | |
142 http://bio.perl.org/MailList.html - About the mailing lists | |
143 | |
144 =head2 Reporting Bugs | |
145 | |
146 Report bugs to the Bioperl bug tracking system to help us keep track | |
147 the bugs and their resolution. Bug reports can be submitted via email | |
148 or the web: | |
149 | |
150 bioperl-bugs@bioperl.org | |
151 http://bugzilla.bioperl.org/ | |
152 | |
153 =head1 AUTHOR - Andrew Macgregor | |
154 | |
155 Email andrew@anatomy.otago.ac.nz | |
156 | |
157 =head1 CONTRIBUTORS | |
158 | |
159 Hilmar Lapp, hlapp at gmx.net | |
160 | |
161 =head1 APPENDIX | |
162 | |
163 | |
164 The rest of the documentation details each of the object | |
165 methods. Internal methods are usually preceded with a "_". | |
166 | |
167 =cut | |
168 | |
169 # Let the code begin... | |
170 | |
171 | |
172 package Bio::Cluster::UniGene; | |
173 use vars qw(@ISA); | |
174 use strict; | |
175 | |
176 | |
177 use Bio::Root::Root; | |
178 use Bio::IdentifiableI; | |
179 use Bio::DescribableI; | |
180 use Bio::AnnotatableI; | |
181 use Bio::Annotation::Collection; | |
182 use Bio::Annotation::DBLink; | |
183 use Bio::Annotation::SimpleValue; | |
184 use Bio::Species; | |
185 use Bio::Factory::SequenceStreamI; | |
186 use Bio::Seq::SeqFactory; | |
187 use Bio::Cluster::UniGeneI; | |
188 | |
189 @ISA = qw(Bio::Root::Root Bio::Cluster::UniGeneI | |
190 Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI | |
191 Bio::Factory::SequenceStreamI); | |
192 | |
193 my %species_map = ( | |
194 'Aga' => "Anopheles gambiae", | |
195 'At' => "Arabidopsis thaliana", | |
196 'Bt' => "Bos taurus", | |
197 'Cel' => "Caenorhabditis elegans", | |
198 'Cin' => "Ciona intestinalis", | |
199 'Cre' => "Chlamydomonas reinhardtii", | |
200 'Ddi' => "Dictyostelium discoideum", | |
201 'Dr' => "Danio rerio", | |
202 'Dm' => "Drosophila melanogaster", | |
203 'Gga' => "Gallus gallus", | |
204 'Gma' => "Glycine max", | |
205 'Hs' => "Homo sapiens", | |
206 'Hv' => "Hordeum vulgare", | |
207 'Les' => "Lycopersicon esculentum", | |
208 'Mtr' => "Medicago truncatula", | |
209 'Mm' => "Mus musculus", | |
210 'Os' => "Oryza sativa", | |
211 'Ola' => "Oryzias latipes", | |
212 'Rn' => "Rattus norvegicus", | |
213 'Str' => "Silurana tropicalis", | |
214 'Sbi' => "Sorghum bicolor", | |
215 'Ssc' => "Sus scrofa", | |
216 'Ta' => "Triticum aestivum", | |
217 'Xl' => "Xenopus laevis", | |
218 'Zm' => "Zea mays", | |
219 ); | |
220 | |
221 | |
222 =head2 new | |
223 | |
224 Title : new | |
225 Usage : used by ClusterIO | |
226 Returns : a new Bio::Cluster::Unigene object | |
227 | |
228 =cut | |
229 | |
230 sub new { | |
231 # standard new call.. | |
232 my($caller,@args) = @_; | |
233 my $self = $caller->SUPER::new(@args); | |
234 | |
235 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) = | |
236 $self->_rearrange([qw(UNIGENE_ID | |
237 DESCRIPTION | |
238 MEMBERS | |
239 SIZE | |
240 SPECIES | |
241 DISPLAY_ID | |
242 OBJECT_ID | |
243 NAMESPACE | |
244 AUTHORITY | |
245 VERSION | |
246 SEQFACTORY | |
247 )], @args); | |
248 | |
249 $self->{'_alphabet'} = 'dna'; | |
250 | |
251 $self->unigene_id($ugid) if $ugid; | |
252 $self->description($desc) if $desc; | |
253 $self->sequences($mems) if $mems; | |
254 $self->size($size) if defined($size); | |
255 $self->display_id($dispid) if $dispid; # overwrites ugid | |
256 $self->object_id($id) if $id; # overwrites dispid | |
257 $self->namespace($ns || 'UniGene'); | |
258 $self->authority($auth || 'NCBI'); | |
259 $self->version($v) if defined($v); | |
260 if( ! defined $seqfact ) { | |
261 $seqfact = new Bio::Seq::SeqFactory | |
262 (-verbose => $self->verbose(), | |
263 -type => 'Bio::Seq::RichSeq'); | |
264 } | |
265 $self->sequence_factory($seqfact); | |
266 if( (! $species) && (defined $self->unigene_id() && | |
267 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) { | |
268 # try set a default one depending on the ID | |
269 $species = $species_map{$1}; | |
270 } | |
271 $self->species($species); | |
272 return $self; | |
273 } | |
274 | |
275 | |
276 =head1 L<Bio::Cluster::UniGeneI> methods | |
277 | |
278 =cut | |
279 | |
280 =head2 unigene_id | |
281 | |
282 Title : unigene_id | |
283 Usage : unigene_id(); | |
284 Function: Returns the unigene_id associated with the object. | |
285 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id) | |
286 Returns : A string | |
287 Args : None or an id | |
288 | |
289 | |
290 =cut | |
291 | |
292 sub unigene_id { | |
293 my ($obj,$value) = @_; | |
294 if( defined $value) { | |
295 $obj->{'unigene_id'} = $value; | |
296 } | |
297 return $obj->{'unigene_id'}; | |
298 } | |
299 | |
300 | |
301 | |
302 =head2 title | |
303 | |
304 Title : title | |
305 Usage : title(); | |
306 Function: Returns the title associated with the object. | |
307 Example : $title = $unigene->title or $unigene->title($title) | |
308 Returns : A string | |
309 Args : None or a title | |
310 | |
311 | |
312 =cut | |
313 | |
314 sub title { | |
315 my ($obj,$value) = @_; | |
316 if( defined $value) { | |
317 $obj->{'title'} = $value; | |
318 } | |
319 return $obj->{'title'}; | |
320 } | |
321 | |
322 | |
323 =head2 gene | |
324 | |
325 Title : gene | |
326 Usage : gene(); | |
327 Function: Returns the gene associated with the object. | |
328 Example : $gene = $unigene->gene or $unigene->gene($gene) | |
329 Returns : A string | |
330 Args : None or a gene | |
331 | |
332 | |
333 =cut | |
334 | |
335 sub gene { | |
336 my $self = shift; | |
337 return $self->_annotation_value('gene_name', @_); | |
338 } | |
339 | |
340 | |
341 =head2 cytoband | |
342 | |
343 Title : cytoband | |
344 Usage : cytoband(); | |
345 Function: Returns the cytoband associated with the object. | |
346 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband) | |
347 Returns : A string | |
348 Args : None or a cytoband | |
349 | |
350 | |
351 =cut | |
352 | |
353 sub cytoband { | |
354 my $self = shift; | |
355 return $self->_annotation_value('cyto_band', @_); | |
356 } | |
357 | |
358 =head2 mgi | |
359 | |
360 Title : mgi | |
361 Usage : mgi(); | |
362 Function: Returns the mgi associated with the object. | |
363 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi) | |
364 Returns : A string | |
365 Args : None or a mgi | |
366 | |
367 | |
368 =cut | |
369 | |
370 sub mgi { | |
371 my $self = shift; | |
372 my $acc; | |
373 | |
374 if(@_) { | |
375 # purge first | |
376 $self->_remove_dblink('dblink','MGI'); | |
377 # then add if a valid value is present | |
378 if($acc = shift) { | |
379 $self->_annotation_dblink('dblink','MGI',$acc); | |
380 } | |
381 } else { | |
382 ($acc) = $self->_annotation_dblink('dblink','MGI'); | |
383 } | |
384 return $acc; | |
385 } | |
386 | |
387 | |
388 =head2 locuslink | |
389 | |
390 Title : locuslink | |
391 Usage : locuslink(); | |
392 Function: Returns or stores a reference to an array containing locuslink data. | |
393 Returns : An array reference | |
394 Args : None or an array reference | |
395 | |
396 =cut | |
397 | |
398 sub locuslink { | |
399 my ($self,$ll) = @_; | |
400 | |
401 if($ll) { | |
402 # purge first | |
403 $self->_remove_dblink('dblink','LocusLink'); | |
404 # then add as many accessions as are present | |
405 foreach my $acc (@$ll) { | |
406 $self->_annotation_dblink('dblink','LocusLink',$acc); | |
407 } | |
408 } else { | |
409 my @accs = $self->_annotation_dblink('dblink','LocusLink'); | |
410 $ll = [@accs]; | |
411 } | |
412 return $ll; | |
413 } | |
414 | |
415 | |
416 =head2 gnm_terminus | |
417 | |
418 Title : gnm_terminus | |
419 Usage : gnm_terminus(); | |
420 Function: Returns the gnm_terminus associated with the object. | |
421 Example : $gnm_terminus = $unigene->gnm_terminus or | |
422 $unigene->gnm_terminus($gnm_terminus) | |
423 Returns : A string | |
424 Args : None or a gnm_terminus | |
425 | |
426 =cut | |
427 | |
428 sub gnm_terminus { | |
429 my $self = shift; | |
430 return $self->_annotation_value('gnm_terminus', @_); | |
431 } | |
432 | |
433 =head2 scount | |
434 | |
435 Title : scount | |
436 Usage : scount(); | |
437 Function: Returns the scount associated with the object. | |
438 Example : $scount = $unigene->scount or $unigene->scount($scount) | |
439 Returns : A string | |
440 Args : None or a scount | |
441 | |
442 =cut | |
443 | |
444 sub scount { | |
445 my ($obj,$value) = @_; | |
446 if( defined $value) { | |
447 $obj->{'scount'} = $value; | |
448 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) { | |
449 $obj->{'scount'} = $obj->size(); | |
450 } | |
451 return $obj->{'scount'}; | |
452 } | |
453 | |
454 | |
455 =head2 express | |
456 | |
457 Title : express | |
458 Usage : express(); | |
459 Function: Returns or stores a reference to an array containing | |
460 tissue expression data | |
461 Returns : An array reference | |
462 Args : None or an array reference | |
463 | |
464 =cut | |
465 | |
466 sub express { | |
467 my $self = shift; | |
468 | |
469 return $self->_annotation_value_ary('expressed',@_); | |
470 } | |
471 | |
472 | |
473 =head2 chromosome | |
474 | |
475 Title : chromosome | |
476 Usage : chromosome(); | |
477 Function: Returns or stores a reference to an array containing | |
478 chromosome lines | |
479 Returns : An array reference | |
480 Args : None or an array reference | |
481 | |
482 =cut | |
483 | |
484 sub chromosome { | |
485 my $self = shift; | |
486 | |
487 return $self->_annotation_value_ary('chromosome',@_); | |
488 } | |
489 | |
490 | |
491 =head2 sts | |
492 | |
493 Title : sts | |
494 Usage : sts(); | |
495 Function: Returns or stores a reference to an array containing sts lines | |
496 | |
497 Returns : An array reference | |
498 Args : None or an array reference | |
499 | |
500 =cut | |
501 | |
502 sub sts { | |
503 my $self = shift; | |
504 | |
505 return $self->_annotation_value_ary('sts',@_); | |
506 } | |
507 | |
508 | |
509 =head2 txmap | |
510 | |
511 Title : txmap | |
512 Usage : txmap(); | |
513 Function: Returns or stores a reference to an array containing txmap lines | |
514 | |
515 Returns : An array reference | |
516 Args : None or an array reference | |
517 | |
518 =cut | |
519 | |
520 sub txmap { | |
521 my $self = shift; | |
522 | |
523 return $self->_annotation_value_ary('txmap',@_); | |
524 } | |
525 | |
526 | |
527 =head2 protsim | |
528 | |
529 Title : protsim | |
530 Usage : protsim(); | |
531 Function: Returns or stores a reference to an array containing protsim lines | |
532 This should really only be used by ClusterIO, not directly | |
533 Returns : An array reference | |
534 Args : None or an array reference | |
535 | |
536 =cut | |
537 | |
538 sub protsim { | |
539 my $self = shift; | |
540 | |
541 return $self->_annotation_value_ary('protsim',@_); | |
542 } | |
543 | |
544 | |
545 =head2 sequences | |
546 | |
547 Title : sequences | |
548 Usage : sequences(); | |
549 Function: Returns or stores a reference to an array containing | |
550 sequence data. | |
551 | |
552 This is mostly reserved for ClusterIO parsers. You should | |
553 use get_members() for get and add_member()/remove_members() | |
554 for set. | |
555 | |
556 Returns : An array reference, or undef | |
557 Args : None or an array reference or undef | |
558 | |
559 =cut | |
560 | |
561 sub sequences { | |
562 my $self = shift; | |
563 | |
564 return $self->{'members'} = shift if @_; | |
565 return $self->{'members'}; | |
566 } | |
567 | |
568 =head2 species | |
569 | |
570 Title : species | |
571 Usage : $obj->species($newval) | |
572 Function: Get/set the species object for this Unigene cluster. | |
573 Example : | |
574 Returns : value of species (a L<Bio::Species> object) | |
575 Args : on set, new value (a L<Bio::Species> object or | |
576 the binomial name, or undef, optional) | |
577 | |
578 | |
579 =cut | |
580 | |
581 sub species{ | |
582 my $self = shift; | |
583 | |
584 if(@_) { | |
585 my $species = shift; | |
586 if($species && (! ref($species))) { | |
587 my @class = reverse(split(' ',$species)); | |
588 $species = Bio::Species->new(-classification => \@class); | |
589 } | |
590 return $self->{'species'} = $species; | |
591 } | |
592 return $self->{'species'}; | |
593 } | |
594 | |
595 | |
596 =head1 L<Bio::ClusterI> methods | |
597 | |
598 =cut | |
599 | |
600 =head2 display_id | |
601 | |
602 Title : display_id | |
603 Usage : | |
604 Function: Get/set the display name or identifier for the cluster | |
605 | |
606 This is aliased to unigene_id(). | |
607 | |
608 Returns : a string | |
609 Args : optional, on set the display ID ( a string) | |
610 | |
611 =cut | |
612 | |
613 sub display_id{ | |
614 return shift->unigene_id(@_); | |
615 } | |
616 | |
617 =head2 description | |
618 | |
619 Title : description | |
620 Usage : Bio::ClusterI->description("POLYUBIQUITIN") | |
621 Function: get/set for the consensus description of the cluster | |
622 | |
623 This is aliased to title(). | |
624 | |
625 Returns : the description string | |
626 Args : Optional the description string | |
627 | |
628 =cut | |
629 | |
630 sub description{ | |
631 return shift->title(@_); | |
632 } | |
633 | |
634 =head2 size | |
635 | |
636 Title : size | |
637 Usage : Bio::ClusterI->size(); | |
638 Function: get for the size of the family, | |
639 calculated from the number of members | |
640 | |
641 This is aliased to scount(). | |
642 | |
643 Returns : the size of the cluster | |
644 Args : | |
645 | |
646 =cut | |
647 | |
648 sub size { | |
649 my $self = shift; | |
650 | |
651 # hard-wiring the size is allowed if there are no sequences | |
652 return $self->scount(@_) unless defined($self->sequences()); | |
653 # but we can't change the number of members through this method | |
654 my $n = scalar(@{$self->sequences()}); | |
655 if(@_ && ($n != $_[0])) { | |
656 $self->throw("Cannot change cluster size using size() from $n to ". | |
657 $_[0]); | |
658 } | |
659 return $n; | |
660 } | |
661 | |
662 =head2 cluster_score | |
663 | |
664 Title : cluster_score | |
665 Usage : $cluster ->cluster_score(100); | |
666 Function: get/set for cluster_score which | |
667 represent the score in which the clustering | |
668 algorithm assigns to this cluster. | |
669 | |
670 For UniGene clusters, there really is no cluster score that | |
671 would come with the data. However, we provide an | |
672 implementation here so that you can score UniGene clusters | |
673 if you want to. | |
674 | |
675 Returns : a number | |
676 Args : optionally, on set a number | |
677 | |
678 =cut | |
679 | |
680 sub cluster_score{ | |
681 my $self = shift; | |
682 | |
683 return $self->{'cluster_score'} = shift if @_; | |
684 return $self->{'cluster_score'}; | |
685 } | |
686 | |
687 =head2 get_members | |
688 | |
689 Title : get_members | |
690 Usage : Bio::ClusterI->get_members(($seq1, $seq2)); | |
691 Function: retrieve the members of the family by some criteria | |
692 | |
693 Will return all members if no criteria are provided. | |
694 | |
695 At this time this implementation does not support | |
696 specifying criteria and will always return all members. | |
697 | |
698 Returns : the array of members | |
699 Args : | |
700 | |
701 =cut | |
702 | |
703 sub get_members { | |
704 my $self = shift; | |
705 | |
706 my $mems = $self->sequences() || []; | |
707 # already objects? | |
708 if(@$mems && (ref($mems->[0]) eq "HASH")) { | |
709 # nope, we need to build the object list from scratch | |
710 my @memlist = (); | |
711 while(my $seq = $self->next_seq()) { | |
712 push(@memlist, $seq); | |
713 } | |
714 # we cache this array of objects as the new member list | |
715 $mems = \@memlist; | |
716 $self->sequences($mems); | |
717 } | |
718 # done | |
719 return @$mems; | |
720 } | |
721 | |
722 | |
723 =head1 Annotatable view at the object properties | |
724 | |
725 =cut | |
726 | |
727 =head2 annotation | |
728 | |
729 Title : annotation | |
730 Usage : $obj->annotation($newval) | |
731 Function: Get/set the L<Bio::AnnotationCollectionI> object for | |
732 this UniGene cluster. | |
733 | |
734 Many attributes of this class are actually stored within | |
735 the annotation collection object as L<Bio::AnnotationI> | |
736 compliant objects, so you can conveniently access them | |
737 through the same interface as you would e.g. access | |
738 L<Bio::SeqI> annotation properties. | |
739 | |
740 If you call this method in set mode and replace the | |
741 annotation collection with another one you should know | |
742 exactly what you are doing. | |
743 | |
744 Example : | |
745 Returns : a L<Bio::AnnotationCollectionI> compliant object | |
746 Args : on set, new value (a L<Bio::AnnotationCollectionI> | |
747 compliant object or undef, optional) | |
748 | |
749 | |
750 =cut | |
751 | |
752 sub annotation{ | |
753 my $self = shift; | |
754 | |
755 if(@_) { | |
756 return $self->{'annotation'} = shift; | |
757 } elsif(! exists($self->{'annotation'})) { | |
758 $self->{'annotation'} = Bio::Annotation::Collection->new(); | |
759 } | |
760 return $self->{'annotation'}; | |
761 } | |
762 | |
763 | |
764 =head1 Implementation specific methods | |
765 | |
766 These are mostly for adding/removing to array properties, and for | |
767 methods with special functionality. | |
768 | |
769 =cut | |
770 | |
771 =head2 add_member | |
772 | |
773 Title : add_member | |
774 Usage : | |
775 Function: Adds a member object to the list of members. | |
776 Example : | |
777 Returns : TRUE if the new member was successfuly added, and FALSE | |
778 otherwise. | |
779 Args : The member to add. | |
780 | |
781 | |
782 =cut | |
783 | |
784 sub add_member{ | |
785 my ($self,@mems) = @_; | |
786 | |
787 my $memlist = $self->{'members'} || []; | |
788 # this is an object interface; is the member list already objects? | |
789 if(@$memlist && (ref($memlist->[0]) eq "HASH")) { | |
790 # nope, convert to objects | |
791 $memlist = [$self->get_members()]; | |
792 } | |
793 # add new member(s) | |
794 push(@$memlist, @mems); | |
795 # store if we created this array ref ourselves | |
796 $self->sequences($memlist); | |
797 # done | |
798 return 1; | |
799 } | |
800 | |
801 =head2 remove_members | |
802 | |
803 Title : remove_members | |
804 Usage : | |
805 Function: Remove the list of members for this cluster such that the | |
806 member list is undefined afterwards (as opposed to zero members). | |
807 Example : | |
808 Returns : the previous list of members | |
809 Args : none | |
810 | |
811 | |
812 =cut | |
813 | |
814 sub remove_members{ | |
815 my $self = shift; | |
816 | |
817 my @mems = $self->get_members(); | |
818 $self->sequences(undef); | |
819 return @mems; | |
820 } | |
821 | |
822 | |
823 =head2 next_locuslink | |
824 | |
825 Title : next_locuslink | |
826 Usage : next_locuslink(); | |
827 Function: Returns the next locuslink from an array referred | |
828 to using $obj->{'locuslink'} | |
829 | |
830 If you call this iterator again after it returned undef, it | |
831 will re-cycle through the list of elements. Changes in the | |
832 underlying array property while you loop over this iterator | |
833 will not be reflected until you exhaust the iterator. | |
834 | |
835 Example : while ( my $locuslink = $in->next_locuslink() ) { | |
836 print "$locuslink\n"; | |
837 } | |
838 Returns : String | |
839 Args : None | |
840 | |
841 =cut | |
842 | |
843 sub next_locuslink { | |
844 my ($obj) = @_; | |
845 | |
846 return $obj->_next_element("ll","locuslink"); | |
847 } | |
848 | |
849 =head2 next_express | |
850 | |
851 Title : next_express | |
852 Usage : next_express(); | |
853 Function: Returns the next tissue from an array referred | |
854 to using $obj->{'express'} | |
855 | |
856 If you call this iterator again after it returned undef, it | |
857 will re-cycle through the list of elements. Changes in the | |
858 underlying array property while you loop over this iterator | |
859 will not be reflected until you exhaust the iterator. | |
860 | |
861 Example : while ( my $express = $in->next_express() ) { | |
862 print "$express\n"; | |
863 } | |
864 Returns : String | |
865 Args : None | |
866 | |
867 =cut | |
868 | |
869 sub next_express { | |
870 my ($obj) = @_; | |
871 | |
872 return $obj->_next_element("express","express"); | |
873 } | |
874 | |
875 | |
876 =head2 next_chromosome | |
877 | |
878 Title : next_chromosome | |
879 Usage : next_chromosome(); | |
880 Function: Returns the next chromosome line from an array referred | |
881 to using $obj->{'chromosome'} | |
882 | |
883 If you call this iterator again after it returned undef, it | |
884 will re-cycle through the list of elements. Changes in the | |
885 underlying array property while you loop over this iterator | |
886 will not be reflected until you exhaust the iterator. | |
887 | |
888 Example : while ( my $chromosome = $in->next_chromosome() ) { | |
889 print "$chromosome\n"; | |
890 } | |
891 Returns : String | |
892 Args : None | |
893 | |
894 =cut | |
895 | |
896 sub next_chromosome { | |
897 my ($obj) = @_; | |
898 | |
899 return $obj->_next_element("chr","chromosome"); | |
900 } | |
901 | |
902 | |
903 =head2 next_protsim | |
904 | |
905 Title : next_protsim | |
906 Usage : next_protsim(); | |
907 Function: Returns the next protsim line from an array referred | |
908 to using $obj->{'protsim'} | |
909 | |
910 If you call this iterator again after it returned undef, it | |
911 will re-cycle through the list of elements. Changes in the | |
912 underlying array property while you loop over this iterator | |
913 will not be reflected until you exhaust the iterator. | |
914 | |
915 Example : while ( my $protsim = $in->next_protsim() ) { | |
916 print "$protsim\n"; | |
917 } | |
918 Returns : String | |
919 Args : None | |
920 | |
921 =cut | |
922 | |
923 sub next_protsim { | |
924 my ($obj) = @_; | |
925 | |
926 return $obj->_next_element("protsim","protsim"); | |
927 } | |
928 | |
929 | |
930 =head2 next_sts | |
931 | |
932 Title : next_sts | |
933 Usage : next_sts(); | |
934 Function: Returns the next sts line from an array referred | |
935 to using $obj->{'sts'} | |
936 | |
937 If you call this iterator again after it returned undef, it | |
938 will re-cycle through the list of elements. Changes in the | |
939 underlying array property while you loop over this iterator | |
940 will not be reflected until you exhaust the iterator. | |
941 | |
942 Example : while ( my $sts = $in->next_sts() ) { | |
943 print "$sts\n"; | |
944 } | |
945 Returns : String | |
946 Args : None | |
947 | |
948 =cut | |
949 | |
950 sub next_sts { | |
951 my ($obj) = @_; | |
952 | |
953 return $obj->_next_element("sts","sts"); | |
954 } | |
955 | |
956 | |
957 =head2 next_txmap | |
958 | |
959 Title : next_txmap | |
960 Usage : next_txmap(); | |
961 Function: Returns the next txmap line from an array | |
962 referred to using $obj->{'txmap'} | |
963 | |
964 If you call this iterator again after it returned undef, it | |
965 will re-cycle through the list of elements. Changes in the | |
966 underlying array property while you loop over this iterator | |
967 will not be reflected until you exhaust the iterator. | |
968 | |
969 Example : while ( my $tsmap = $in->next_txmap() ) { | |
970 print "$txmap\n"; | |
971 } | |
972 Returns : String | |
973 Args : None | |
974 | |
975 =cut | |
976 | |
977 sub next_txmap { | |
978 my ($obj) = @_; | |
979 | |
980 return $obj->_next_element("txmap","txmap"); | |
981 } | |
982 | |
983 ############################### | |
984 # private method | |
985 # | |
986 # args: prefix name for the queue | |
987 # name of the method from which to re-fill | |
988 # returns: the next element from that queue, or undef if the queue is empty | |
989 ############################### | |
990 sub _next_element{ | |
991 my ($self,$queuename,$meth) = @_; | |
992 | |
993 $queuename = "_".$queuename."_queue"; | |
994 if(! exists($self->{$queuename})) { | |
995 # re-initialize from array of sequence data | |
996 $self->{$queuename} = [@{$self->$meth() }]; | |
997 } | |
998 my $queue = $self->{$queuename}; | |
999 # is queue exhausted (equivalent to end of stream)? | |
1000 if(! @$queue) { | |
1001 # yes, remove queue and signal to the caller | |
1002 delete $self->{$queuename}; | |
1003 return undef; | |
1004 } | |
1005 return shift(@$queue); | |
1006 } | |
1007 | |
1008 =head1 L<Bio::IdentifiableI> methods | |
1009 | |
1010 =cut | |
1011 | |
1012 =head2 object_id | |
1013 | |
1014 Title : object_id | |
1015 Usage : $string = $obj->object_id() | |
1016 Function: a string which represents the stable primary identifier | |
1017 in this namespace of this object. For DNA sequences this | |
1018 is its accession_number, similarly for protein sequences | |
1019 | |
1020 This is aliased to unigene_id(). | |
1021 | |
1022 Returns : A scalar | |
1023 | |
1024 | |
1025 =cut | |
1026 | |
1027 sub object_id { | |
1028 return shift->unigene_id(@_); | |
1029 } | |
1030 | |
1031 =head2 version | |
1032 | |
1033 Title : version | |
1034 Usage : $version = $obj->version() | |
1035 Function: a number which differentiates between versions of | |
1036 the same object. Higher numbers are considered to be | |
1037 later and more relevant, but a single object described | |
1038 the same identifier should represent the same concept | |
1039 | |
1040 Unigene clusters usually won''t have a version, so this | |
1041 will be mostly undefined. | |
1042 | |
1043 Returns : A number | |
1044 Args : on set, new value (a scalar or undef, optional) | |
1045 | |
1046 | |
1047 =cut | |
1048 | |
1049 sub version { | |
1050 my $self = shift; | |
1051 | |
1052 return $self->{'version'} = shift if @_; | |
1053 return $self->{'version'}; | |
1054 } | |
1055 | |
1056 | |
1057 =head2 authority | |
1058 | |
1059 Title : authority | |
1060 Usage : $authority = $obj->authority() | |
1061 Function: a string which represents the organisation which | |
1062 granted the namespace, written as the DNS name for | |
1063 organisation (eg, wormbase.org) | |
1064 | |
1065 Returns : A scalar | |
1066 Args : on set, new value (a scalar or undef, optional) | |
1067 | |
1068 | |
1069 =cut | |
1070 | |
1071 sub authority { | |
1072 my $self = shift; | |
1073 | |
1074 return $self->{'authority'} = shift if @_; | |
1075 return $self->{'authority'}; | |
1076 } | |
1077 | |
1078 | |
1079 =head2 namespace | |
1080 | |
1081 Title : namespace | |
1082 Usage : $string = $obj->namespace() | |
1083 Function: A string representing the name space this identifier | |
1084 is valid in, often the database name or the name | |
1085 describing the collection | |
1086 | |
1087 Returns : A scalar | |
1088 Args : on set, new value (a scalar or undef, optional) | |
1089 | |
1090 | |
1091 =cut | |
1092 | |
1093 sub namespace { | |
1094 my $self = shift; | |
1095 | |
1096 return $self->{'namespace'} = shift if @_; | |
1097 return $self->{'namespace'}; | |
1098 } | |
1099 | |
1100 =head1 L<Bio::DescribableI> methods | |
1101 | |
1102 =cut | |
1103 | |
1104 =head2 display_name | |
1105 | |
1106 Title : display_name | |
1107 Usage : $string = $obj->display_name() | |
1108 Function: A string which is what should be displayed to the user | |
1109 the string should have no spaces (ideally, though a cautious | |
1110 user of this interface would not assumme this) and should be | |
1111 less than thirty characters (though again, double checking | |
1112 this is a good idea) | |
1113 | |
1114 This is aliased to unigene_id(). | |
1115 | |
1116 Returns : A scalar | |
1117 Status : Virtual | |
1118 | |
1119 =cut | |
1120 | |
1121 sub display_name { | |
1122 return shift->unigene_id(@_); | |
1123 } | |
1124 | |
1125 | |
1126 =head2 description() | |
1127 | |
1128 Title : description | |
1129 Usage : $string = $obj->description() | |
1130 Function: A text string suitable for displaying to the user a | |
1131 description. This string is likely to have spaces, but | |
1132 should not have any newlines or formatting - just plain | |
1133 text. The string should not be greater than 255 characters | |
1134 and clients can feel justified at truncating strings at 255 | |
1135 characters for the purposes of display | |
1136 | |
1137 This is already demanded by Bio::ClusterI and hence is | |
1138 present anyway. | |
1139 | |
1140 Returns : A scalar | |
1141 | |
1142 | |
1143 =cut | |
1144 | |
1145 | |
1146 =head1 L<Bio::Factory::SequenceStreamI> methods | |
1147 | |
1148 =cut | |
1149 | |
1150 =head2 next_seq | |
1151 | |
1152 Title : next_seq | |
1153 Usage : next_seq(); | |
1154 Function: Returns the next seq as a Seq object as defined by | |
1155 $seq->sequence_factory(), | |
1156 at present an empty Bio::Seq::RichSeq object with | |
1157 just the accession_number() and pid() set | |
1158 | |
1159 This iterator will not exhaust the array of member | |
1160 sequences. If you call next_seq() again after it returned | |
1161 undef, it will re-cycle through the list of member | |
1162 sequences. | |
1163 | |
1164 Example : while ( my $sequence = $in->next_seq() ) { | |
1165 print $sequence->accession_number() . "\n"; | |
1166 } | |
1167 Returns : Bio::PrimarySeqI object | |
1168 Args : None | |
1169 | |
1170 =cut | |
1171 | |
1172 sub next_seq { | |
1173 my ($obj) = @_; | |
1174 | |
1175 if(! exists($obj->{'_seq_queue'})) { | |
1176 # re-initialize from array of sequence data | |
1177 $obj->{'_seq_queue'} = [@{$obj->sequences()}]; | |
1178 } | |
1179 my $queue = $obj->{'_seq_queue'}; | |
1180 # is queue exhausted (equivalent to end of stream)? | |
1181 if(! @$queue) { | |
1182 # yes, remove queue and signal to the caller | |
1183 delete $obj->{'_seq_queue'}; | |
1184 return undef; | |
1185 } | |
1186 # no, still data in the queue: get the next one from the queue | |
1187 my $seq_h = shift(@$queue); | |
1188 # if this is not a simple hash ref, it's an object already, and we'll | |
1189 # return just that | |
1190 return $seq_h if(ref($seq_h) ne 'HASH'); | |
1191 # nope, we need to assemble this object from scratch | |
1192 # | |
1193 # assemble the annotation collection | |
1194 my $ac = Bio::Annotation::Collection->new(); | |
1195 foreach my $k (keys %$seq_h) { | |
1196 next if $k =~ /acc|pid|nid|version/; | |
1197 my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k, | |
1198 -value => $seq_h->{$k}); | |
1199 $ac->add_Annotation($ann); | |
1200 } | |
1201 # assemble the initialization parameters and create object | |
1202 my $seqobj = $obj->sequence_factory->create( | |
1203 -accession_number => $seq_h->{acc}, | |
1204 -pid => $seq_h->{pid}, | |
1205 # why does NCBI prepend a 'g' to its own identifiers?? | |
1206 -primary_id => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ? | |
1207 substr($seq_h->{nid},1) : $seq_h->{nid}, | |
1208 -display_id => $seq_h->{acc}, | |
1209 -seq_version => $seq_h->{version}, | |
1210 -alphabet => $obj->{'_alphabet'}, | |
1211 -namespace => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank', | |
1212 -authority => $obj->authority(), # default is NCBI | |
1213 -species => $obj->species(), | |
1214 -annotation => $ac | |
1215 ); | |
1216 return $seqobj; | |
1217 } | |
1218 | |
1219 =head2 sequence_factory | |
1220 | |
1221 Title : sequence_factory | |
1222 Usage : $seqio->sequence_factory($seqfactory) | |
1223 Function: Get/Set the Bio::Factory::SequenceFactoryI | |
1224 Returns : Bio::Factory::SequenceFactoryI | |
1225 Args : [optional] Bio::Factory::SequenceFactoryI | |
1226 | |
1227 | |
1228 =cut | |
1229 | |
1230 sub sequence_factory { | |
1231 my ($self,$obj) = @_; | |
1232 if( defined $obj ) { | |
1233 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) { | |
1234 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()"); | |
1235 } | |
1236 $self->{'_seqfactory'} = $obj; | |
1237 } | |
1238 $self->{'_seqfactory'}; | |
1239 } | |
1240 | |
1241 =head1 Private methods | |
1242 | |
1243 =cut | |
1244 | |
1245 =head2 _annotation_value | |
1246 | |
1247 Title : _annotation_value | |
1248 Usage : | |
1249 Function: Private method. | |
1250 Example : | |
1251 Returns : the value (a string) | |
1252 Args : annotation key (a string) | |
1253 on set, annotation value (a string) | |
1254 | |
1255 | |
1256 =cut | |
1257 | |
1258 sub _annotation_value{ | |
1259 my $self = shift; | |
1260 my $key = shift; | |
1261 | |
1262 my ($ann, $val); | |
1263 if(@_) { | |
1264 $val = shift; | |
1265 if(! defined($val)) { | |
1266 ($ann) = $self->annotation->remove_Annotations($key); | |
1267 return $ann ? $ann->value() : undef; | |
1268 } | |
1269 } | |
1270 ($ann) = $self->annotation->get_Annotations($key); | |
1271 if($ann && (! $val)) { | |
1272 # get mode and exists | |
1273 $val = $ann->value(); | |
1274 } elsif($val) { | |
1275 # set mode | |
1276 if(! $ann) { | |
1277 $ann = Bio::Annotation::SimpleValue->new(-tagname => $key); | |
1278 $self->annotation->add_Annotation($ann); | |
1279 } | |
1280 $ann->value($val); | |
1281 } | |
1282 return $val; | |
1283 } | |
1284 | |
1285 | |
1286 =head2 _annotation_value_ary | |
1287 | |
1288 Title : _annotation_value_ary | |
1289 Usage : | |
1290 Function: Private method. | |
1291 Example : | |
1292 Returns : reference to the array of values | |
1293 Args : annotation key (a string) | |
1294 on set, reference to an array holding the values | |
1295 | |
1296 | |
1297 =cut | |
1298 | |
1299 sub _annotation_value_ary{ | |
1300 my ($self,$key,$arr) = @_; | |
1301 | |
1302 my $ac = $self->annotation; | |
1303 if($arr) { | |
1304 # purge first | |
1305 $ac->remove_Annotations($key); | |
1306 # then add as many values as are present | |
1307 foreach my $val (@$arr) { | |
1308 my $ann = Bio::Annotation::SimpleValue->new(-value => $val, | |
1309 -tagname => $key | |
1310 ); | |
1311 $ac->add_Annotation($ann); | |
1312 } | |
1313 } else { | |
1314 my @vals = map { $_->value(); } $ac->get_Annotations($key); | |
1315 $arr = [@vals]; | |
1316 } | |
1317 return $arr; | |
1318 } | |
1319 | |
1320 | |
1321 =head2 _annotation_dblink | |
1322 | |
1323 Title : _annotation_dblink | |
1324 Usage : | |
1325 Function: Private method. | |
1326 Example : | |
1327 Returns : array of accessions for the given database (namespace) | |
1328 Args : annotation key (a string) | |
1329 dbname (a string) (optional on get, mandatory on set) | |
1330 on set, accession or ID (a string), and version | |
1331 | |
1332 | |
1333 =cut | |
1334 | |
1335 sub _annotation_dblink{ | |
1336 my ($self,$key,$dbname,$acc,$version) = @_; | |
1337 | |
1338 if($acc) { | |
1339 # set mode -- this is adding here | |
1340 my $ann = Bio::Annotation::DBLink->new(-tagname => $key, | |
1341 -primary_id => $acc, | |
1342 -database => $dbname, | |
1343 -version => $version); | |
1344 $self->annotation->add_Annotation($ann); | |
1345 return 1; | |
1346 } else { | |
1347 # get mode | |
1348 my @anns = $self->annotation->get_Annotations($key); | |
1349 # filter out those that don't match the requested database | |
1350 if($dbname) { | |
1351 @anns = grep { $_->database() eq $dbname; } @anns; | |
1352 } | |
1353 return map { $_->primary_id(); } @anns; | |
1354 } | |
1355 } | |
1356 | |
1357 =head2 _remove_dblink | |
1358 | |
1359 Title : _remove_dblink | |
1360 Usage : | |
1361 Function: Private method. | |
1362 Example : | |
1363 Returns : array of accessions for the given database (namespace) | |
1364 Args : annotation key (a string) | |
1365 dbname (a string) (optional) | |
1366 | |
1367 | |
1368 =cut | |
1369 | |
1370 sub _remove_dblink{ | |
1371 my ($self,$key,$dbname) = @_; | |
1372 | |
1373 my $ac = $self->annotation(); | |
1374 my @anns = (); | |
1375 if($dbname) { | |
1376 foreach my $ann ($ac->remove_Annotations($key)) { | |
1377 if($ann->database() eq $dbname) { | |
1378 push(@anns, $ann); | |
1379 } else { | |
1380 $ac->add_Annotation($ann); | |
1381 } | |
1382 } | |
1383 } else { | |
1384 @anns = $ac->remove_Annotations($key); | |
1385 } | |
1386 return map { $_->primary_id(); } @anns; | |
1387 } | |
1388 | |
1389 | |
1390 ##################################################################### | |
1391 # aliases for naming consistency or other reasons # | |
1392 ##################################################################### | |
1393 | |
1394 *sequence = \&sequences; | |
1395 | |
1396 1; |