Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/DB/InMemoryCache.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # POD documentation - main docs before the code | |
| 2 # | |
| 3 # | |
| 4 | |
| 5 =head1 NAME | |
| 6 | |
| 7 Bio::DB::InMemoryCache - Abstract interface for a sequence database | |
| 8 | |
| 9 =head1 SYNOPSIS | |
| 10 | |
| 11 $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db, | |
| 12 -number => 1000); | |
| 13 | |
| 14 # | |
| 15 # get a database object somehow using a concrete class | |
| 16 # | |
| 17 | |
| 18 $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN'); | |
| 19 | |
| 20 # | |
| 21 # $seq is a Bio::Seq object | |
| 22 # | |
| 23 | |
| 24 =head1 DESCRIPTION | |
| 25 | |
| 26 This is a memory cache system which saves the objects returned by Bio::DB::RandomAccessI in | |
| 27 memory to a hard limit of sequences. | |
| 28 | |
| 29 =head1 CONTACT | |
| 30 | |
| 31 Ewan Birney | |
| 32 | |
| 33 =head2 Reporting Bugs | |
| 34 | |
| 35 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 36 the bugs and their resolution. Bug reports can be submitted via email | |
| 37 or the web: | |
| 38 | |
| 39 bioperl-bugs@bio.perl.org | |
| 40 http://bugzilla.bioperl.org/ | |
| 41 | |
| 42 =head1 APPENDIX | |
| 43 | |
| 44 The rest of the documentation details each of the object | |
| 45 methods. Internal methods are usually preceded with a _ | |
| 46 | |
| 47 =cut | |
| 48 | |
| 49 | |
| 50 # Let the code begin... | |
| 51 | |
| 52 package Bio::DB::InMemoryCache; | |
| 53 | |
| 54 use Bio::DB::SeqI; | |
| 55 | |
| 56 use vars qw(@ISA); | |
| 57 use strict; | |
| 58 | |
| 59 use Bio::Root::Root; | |
| 60 use Bio::Seq; | |
| 61 | |
| 62 @ISA = qw(Bio::Root::Root Bio::DB::SeqI); | |
| 63 | |
| 64 | |
| 65 sub new { | |
| 66 my ($class,@args) = @_; | |
| 67 | |
| 68 my $self = Bio::Root::Root->new(); | |
| 69 bless $self,$class; | |
| 70 | |
| 71 my ($seqdb,$number,$agr) = $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args); | |
| 72 | |
| 73 if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) { | |
| 74 $self->throw("Must be a randomaccess database not a [$seqdb]"); | |
| 75 } | |
| 76 if( !defined $number ) { | |
| 77 $number = 1000; | |
| 78 } | |
| 79 | |
| 80 $self->seqdb($seqdb); | |
| 81 $self->number($number); | |
| 82 $self->agr($agr); | |
| 83 | |
| 84 # we consider acc as the primary id here | |
| 85 $self->{'_cache_number_hash'} = {}; | |
| 86 $self->{'_cache_id_hash'} = {}; | |
| 87 $self->{'_cache_acc_hash'} = {}; | |
| 88 $self->{'_cache_number'} = 1; | |
| 89 | |
| 90 return $self; | |
| 91 } | |
| 92 | |
| 93 | |
| 94 | |
| 95 =head2 get_Seq_by_id | |
| 96 | |
| 97 Title : get_Seq_by_id | |
| 98 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') | |
| 99 Function: Gets a Bio::Seq object by its name | |
| 100 Returns : a Bio::Seq object | |
| 101 Args : the id (as a string) of a sequence | |
| 102 Throws : "id does not exist" exception | |
| 103 | |
| 104 | |
| 105 =cut | |
| 106 | |
| 107 sub get_Seq_by_id{ | |
| 108 my ($self,$id) = @_; | |
| 109 | |
| 110 if( defined $self->{'_cache_id_hash'}->{$id} ) { | |
| 111 my $acc = $self->{'_cache_id_hash'}->{$id}; | |
| 112 my $seq = $self->{'_cache_acc_hash'}->{$acc}; | |
| 113 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++; | |
| 114 return $seq; | |
| 115 } else { | |
| 116 return $self->_load_Seq('id',$id); | |
| 117 } | |
| 118 } | |
| 119 | |
| 120 =head2 get_Seq_by_acc | |
| 121 | |
| 122 Title : get_Seq_by_acc | |
| 123 Usage : $seq = $db->get_Seq_by_acc('X77802'); | |
| 124 Function: Gets a Bio::Seq object by accession number | |
| 125 Returns : A Bio::Seq object | |
| 126 Args : accession number (as a string) | |
| 127 Throws : "acc does not exist" exception | |
| 128 | |
| 129 | |
| 130 =cut | |
| 131 | |
| 132 sub get_Seq_by_acc{ | |
| 133 my ($self,$acc) = @_; | |
| 134 | |
| 135 #print STDERR "In cache get for $acc\n"; | |
| 136 if( defined $self->{'_cache_acc_hash'}->{$acc} ) { | |
| 137 #print STDERR "Returning cached $acc\n"; | |
| 138 my $seq = $self->{'_cache_acc_hash'}->{$acc}; | |
| 139 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++; | |
| 140 return $seq; | |
| 141 } else { | |
| 142 return $self->_load_Seq('acc',$acc); | |
| 143 } | |
| 144 } | |
| 145 | |
| 146 | |
| 147 | |
| 148 sub number { | |
| 149 my ($self, $number) = @_; | |
| 150 if ($number) { | |
| 151 $self->{'number'} = $number; | |
| 152 } else { | |
| 153 return $self->{'number'}; | |
| 154 } | |
| 155 } | |
| 156 | |
| 157 sub seqdb { | |
| 158 my ($self, $seqdb) = @_; | |
| 159 if ($seqdb) { | |
| 160 $self->{'seqdb'} = $seqdb; | |
| 161 } else { | |
| 162 return $self->{'seqdb'}; | |
| 163 } | |
| 164 } | |
| 165 | |
| 166 sub agr { | |
| 167 my ($self, $agr) = @_; | |
| 168 if ($agr) { | |
| 169 $self->{'agr'} = $agr; | |
| 170 } else { | |
| 171 return $self->{'agr'}; | |
| 172 } | |
| 173 } | |
| 174 | |
| 175 | |
| 176 sub _load_Seq { | |
| 177 my ($self,$type,$id) = @_; | |
| 178 | |
| 179 my $seq; | |
| 180 | |
| 181 if( $type eq 'id') { | |
| 182 $seq = $self->seqdb->get_Seq_by_id($id); | |
| 183 }elsif ( $type eq 'acc' ) { | |
| 184 $seq = $self->seqdb->get_Seq_by_acc($id); | |
| 185 } else { | |
| 186 $self->throw("Bad internal error. Don't understand $type"); | |
| 187 } | |
| 188 | |
| 189 if( $self->agr() ) { | |
| 190 #print STDERR "Pulling out into memory\n"; | |
| 191 my $newseq = Bio::Seq->new( -display_id => $seq->display_id, | |
| 192 -accession_number => $seq->accession, | |
| 193 -seq => $seq->seq, | |
| 194 -desc => $seq->desc, | |
| 195 ); | |
| 196 if( $self->agr() == 1 ) { | |
| 197 foreach my $sf ( $seq->top_SeqFeatures() ) { | |
| 198 $newseq->add_SeqFeature($sf); | |
| 199 } | |
| 200 | |
| 201 $newseq->annotation($seq->annotation); | |
| 202 } | |
| 203 $seq = $newseq; | |
| 204 } | |
| 205 | |
| 206 if( $self->_number_free < 1 ) { | |
| 207 # remove the latest thing from the hash | |
| 208 my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=> $self->{'_cache_number_hash'}->{$b} } keys %{$self->{'_cache_number_hash'}}; | |
| 209 | |
| 210 my $acc = shift @accs; | |
| 211 # remove this guy | |
| 212 my $seq = $self->{'_cache_acc_hash'}->{$acc}; | |
| 213 | |
| 214 delete $self->{'_cache_number_hash'}->{$acc}; | |
| 215 delete $self->{'_cache_id_hash'}->{$seq->id}; | |
| 216 delete $self->{'_cache_acc_hash'}->{$acc}; | |
| 217 } | |
| 218 | |
| 219 # up the number, register this sequence into the hash. | |
| 220 $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession; | |
| 221 $self->{'_cache_acc_hash'}->{$seq->accession} = $seq; | |
| 222 $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++; | |
| 223 | |
| 224 return $seq; | |
| 225 } | |
| 226 | |
| 227 | |
| 228 sub _number_free { | |
| 229 my $self = shift; | |
| 230 | |
| 231 return $self->number - scalar(keys %{$self->{'_cache_number_hash'}}); | |
| 232 } | |
| 233 | |
| 234 | |
| 235 | |
| 236 | |
| 237 =head2 get_Seq_by_version | |
| 238 | |
| 239 Title : get_Seq_by_version | |
| 240 Usage : $seq = $db->get_Seq_by_version('X77802.1'); | |
| 241 Function: Gets a Bio::Seq object by sequence version | |
| 242 Returns : A Bio::Seq object | |
| 243 Args : accession.version (as a string) | |
| 244 Throws : "acc.version does not exist" exception | |
| 245 | |
| 246 =cut | |
| 247 | |
| 248 | |
| 249 sub get_Seq_by_version{ | |
| 250 my ($self,@args) = @_; | |
| 251 $self->throw("Not implemented it"); | |
| 252 } | |
| 253 | |
| 254 | |
| 255 | |
| 256 ## End of Package | |
| 257 | |
| 258 1; |
