0
|
1 # POD documentation - main docs before the code
|
|
2 #
|
|
3 #
|
|
4
|
|
5 =head1 NAME
|
|
6
|
|
7 Bio::DB::InMemoryCache - Abstract interface for a sequence database
|
|
8
|
|
9 =head1 SYNOPSIS
|
|
10
|
|
11 $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db,
|
|
12 -number => 1000);
|
|
13
|
|
14 #
|
|
15 # get a database object somehow using a concrete class
|
|
16 #
|
|
17
|
|
18 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
|
|
19
|
|
20 #
|
|
21 # $seq is a Bio::Seq object
|
|
22 #
|
|
23
|
|
24 =head1 DESCRIPTION
|
|
25
|
|
26 This is a memory cache system which saves the objects returned by Bio::DB::RandomAccessI in
|
|
27 memory to a hard limit of sequences.
|
|
28
|
|
29 =head1 CONTACT
|
|
30
|
|
31 Ewan Birney
|
|
32
|
|
33 =head2 Reporting Bugs
|
|
34
|
|
35 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
36 the bugs and their resolution. Bug reports can be submitted via email
|
|
37 or the web:
|
|
38
|
|
39 bioperl-bugs@bio.perl.org
|
|
40 http://bugzilla.bioperl.org/
|
|
41
|
|
42 =head1 APPENDIX
|
|
43
|
|
44 The rest of the documentation details each of the object
|
|
45 methods. Internal methods are usually preceded with a _
|
|
46
|
|
47 =cut
|
|
48
|
|
49
|
|
50 # Let the code begin...
|
|
51
|
|
52 package Bio::DB::InMemoryCache;
|
|
53
|
|
54 use Bio::DB::SeqI;
|
|
55
|
|
56 use vars qw(@ISA);
|
|
57 use strict;
|
|
58
|
|
59 use Bio::Root::Root;
|
|
60 use Bio::Seq;
|
|
61
|
|
62 @ISA = qw(Bio::Root::Root Bio::DB::SeqI);
|
|
63
|
|
64
|
|
65 sub new {
|
|
66 my ($class,@args) = @_;
|
|
67
|
|
68 my $self = Bio::Root::Root->new();
|
|
69 bless $self,$class;
|
|
70
|
|
71 my ($seqdb,$number,$agr) = $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args);
|
|
72
|
|
73 if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
|
|
74 $self->throw("Must be a randomaccess database not a [$seqdb]");
|
|
75 }
|
|
76 if( !defined $number ) {
|
|
77 $number = 1000;
|
|
78 }
|
|
79
|
|
80 $self->seqdb($seqdb);
|
|
81 $self->number($number);
|
|
82 $self->agr($agr);
|
|
83
|
|
84 # we consider acc as the primary id here
|
|
85 $self->{'_cache_number_hash'} = {};
|
|
86 $self->{'_cache_id_hash'} = {};
|
|
87 $self->{'_cache_acc_hash'} = {};
|
|
88 $self->{'_cache_number'} = 1;
|
|
89
|
|
90 return $self;
|
|
91 }
|
|
92
|
|
93
|
|
94
|
|
95 =head2 get_Seq_by_id
|
|
96
|
|
97 Title : get_Seq_by_id
|
|
98 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
|
|
99 Function: Gets a Bio::Seq object by its name
|
|
100 Returns : a Bio::Seq object
|
|
101 Args : the id (as a string) of a sequence
|
|
102 Throws : "id does not exist" exception
|
|
103
|
|
104
|
|
105 =cut
|
|
106
|
|
107 sub get_Seq_by_id{
|
|
108 my ($self,$id) = @_;
|
|
109
|
|
110 if( defined $self->{'_cache_id_hash'}->{$id} ) {
|
|
111 my $acc = $self->{'_cache_id_hash'}->{$id};
|
|
112 my $seq = $self->{'_cache_acc_hash'}->{$acc};
|
|
113 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
|
|
114 return $seq;
|
|
115 } else {
|
|
116 return $self->_load_Seq('id',$id);
|
|
117 }
|
|
118 }
|
|
119
|
|
120 =head2 get_Seq_by_acc
|
|
121
|
|
122 Title : get_Seq_by_acc
|
|
123 Usage : $seq = $db->get_Seq_by_acc('X77802');
|
|
124 Function: Gets a Bio::Seq object by accession number
|
|
125 Returns : A Bio::Seq object
|
|
126 Args : accession number (as a string)
|
|
127 Throws : "acc does not exist" exception
|
|
128
|
|
129
|
|
130 =cut
|
|
131
|
|
132 sub get_Seq_by_acc{
|
|
133 my ($self,$acc) = @_;
|
|
134
|
|
135 #print STDERR "In cache get for $acc\n";
|
|
136 if( defined $self->{'_cache_acc_hash'}->{$acc} ) {
|
|
137 #print STDERR "Returning cached $acc\n";
|
|
138 my $seq = $self->{'_cache_acc_hash'}->{$acc};
|
|
139 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
|
|
140 return $seq;
|
|
141 } else {
|
|
142 return $self->_load_Seq('acc',$acc);
|
|
143 }
|
|
144 }
|
|
145
|
|
146
|
|
147
|
|
148 sub number {
|
|
149 my ($self, $number) = @_;
|
|
150 if ($number) {
|
|
151 $self->{'number'} = $number;
|
|
152 } else {
|
|
153 return $self->{'number'};
|
|
154 }
|
|
155 }
|
|
156
|
|
157 sub seqdb {
|
|
158 my ($self, $seqdb) = @_;
|
|
159 if ($seqdb) {
|
|
160 $self->{'seqdb'} = $seqdb;
|
|
161 } else {
|
|
162 return $self->{'seqdb'};
|
|
163 }
|
|
164 }
|
|
165
|
|
166 sub agr {
|
|
167 my ($self, $agr) = @_;
|
|
168 if ($agr) {
|
|
169 $self->{'agr'} = $agr;
|
|
170 } else {
|
|
171 return $self->{'agr'};
|
|
172 }
|
|
173 }
|
|
174
|
|
175
|
|
176 sub _load_Seq {
|
|
177 my ($self,$type,$id) = @_;
|
|
178
|
|
179 my $seq;
|
|
180
|
|
181 if( $type eq 'id') {
|
|
182 $seq = $self->seqdb->get_Seq_by_id($id);
|
|
183 }elsif ( $type eq 'acc' ) {
|
|
184 $seq = $self->seqdb->get_Seq_by_acc($id);
|
|
185 } else {
|
|
186 $self->throw("Bad internal error. Don't understand $type");
|
|
187 }
|
|
188
|
|
189 if( $self->agr() ) {
|
|
190 #print STDERR "Pulling out into memory\n";
|
|
191 my $newseq = Bio::Seq->new( -display_id => $seq->display_id,
|
|
192 -accession_number => $seq->accession,
|
|
193 -seq => $seq->seq,
|
|
194 -desc => $seq->desc,
|
|
195 );
|
|
196 if( $self->agr() == 1 ) {
|
|
197 foreach my $sf ( $seq->top_SeqFeatures() ) {
|
|
198 $newseq->add_SeqFeature($sf);
|
|
199 }
|
|
200
|
|
201 $newseq->annotation($seq->annotation);
|
|
202 }
|
|
203 $seq = $newseq;
|
|
204 }
|
|
205
|
|
206 if( $self->_number_free < 1 ) {
|
|
207 # remove the latest thing from the hash
|
|
208 my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=> $self->{'_cache_number_hash'}->{$b} } keys %{$self->{'_cache_number_hash'}};
|
|
209
|
|
210 my $acc = shift @accs;
|
|
211 # remove this guy
|
|
212 my $seq = $self->{'_cache_acc_hash'}->{$acc};
|
|
213
|
|
214 delete $self->{'_cache_number_hash'}->{$acc};
|
|
215 delete $self->{'_cache_id_hash'}->{$seq->id};
|
|
216 delete $self->{'_cache_acc_hash'}->{$acc};
|
|
217 }
|
|
218
|
|
219 # up the number, register this sequence into the hash.
|
|
220 $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession;
|
|
221 $self->{'_cache_acc_hash'}->{$seq->accession} = $seq;
|
|
222 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
|
|
223
|
|
224 return $seq;
|
|
225 }
|
|
226
|
|
227
|
|
228 sub _number_free {
|
|
229 my $self = shift;
|
|
230
|
|
231 return $self->number - scalar(keys %{$self->{'_cache_number_hash'}});
|
|
232 }
|
|
233
|
|
234
|
|
235
|
|
236
|
|
237 =head2 get_Seq_by_version
|
|
238
|
|
239 Title : get_Seq_by_version
|
|
240 Usage : $seq = $db->get_Seq_by_version('X77802.1');
|
|
241 Function: Gets a Bio::Seq object by sequence version
|
|
242 Returns : A Bio::Seq object
|
|
243 Args : accession.version (as a string)
|
|
244 Throws : "acc.version does not exist" exception
|
|
245
|
|
246 =cut
|
|
247
|
|
248
|
|
249 sub get_Seq_by_version{
|
|
250 my ($self,@args) = @_;
|
|
251 $self->throw("Not implemented it");
|
|
252 }
|
|
253
|
|
254
|
|
255
|
|
256 ## End of Package
|
|
257
|
|
258 1;
|