diff variant_effect_predictor/Bio/DB/Universal.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/DB/Universal.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,247 @@
+
+#
+# BioPerl module for Bio::DB::Universal
+#
+# Cared for by Ewan Birney <birney@ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Universal - Artificial database that delegates to specific databases
+
+=head1 SYNOPSIS
+
+    $uni = Bio::DB::Universal->new();
+
+    # by default connects to web databases. We can also
+    # substitute local databases
+
+    $embl = Bio::Index::EMBL->new( -filename => '/some/index/filename/locally/stored');
+    $uni->use_database('embl',$embl);
+
+    # treat it like a normal database. Recognises strings
+    # like gb|XXXXXX and embl:YYYYYY
+
+    $seq1 = $uni->get_Seq_by_id("embl:HSHNRNPA");
+    $seq2 = $uni->get_Seq_by_acc("gb|A000012");
+
+    # with no separator, tries to guess database. In this case the
+    # _ is considered to be indicative of swissprot
+    $seq3 = $uni->get_Seq_by_id('ROA1_HUMAN');
+
+=head1 DESCRIPTION
+
+Artificial database that delegates to specific databases, with a
+"smart" (well, smartish) guessing routine for what the ids. No doubt
+the smart routine can be made smarter.
+
+The hope is that you can make this database and just throw ids at it -
+for most easy cases it will sort you out. Personally, I would be
+making sure I knew where each id came from and putting it into its own
+database first - but this is a quick and dirty solution.
+
+By default this connects to web orientated databases, with all the
+reliability and network bandwidth costs this implies. However you can
+subsistute your own local databases - they could be Bio::Index
+databases (DBM file and flat file) or bioperl-db based (MySQL based)
+or biocorba-based (whatever you like behind the corba interface).
+
+Internally the tags for the databases are
+
+   genbank - ncbi dna database
+   embl    - ebi's dna database (these two share accession number space)
+   swiss   - swissprot + sptrembl (EBI's protein database)
+
+We should extend this for RefSeq and other sequence databases which 
+are out there... ;)
+
+Inspired by Lincoln Stein, written by Ewan Birney.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l@bio.perl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via email
+or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney@ebi.ac.uk
+
+Describe contact details here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::Universal;
+use vars qw(@ISA);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+use Bio::DB::RandomAccessI;
+
+use Bio::DB::GenBank;
+use Bio::DB::SwissProt;
+use Bio::DB::EMBL;
+
+
+@ISA = qw(Bio::DB::RandomAccessI Bio::Root::Root);
+# new() can be inherited from Bio::Root::Root
+
+sub new {
+    my ($class) = @_;
+
+    my $self = {};
+    bless $self,$class;
+
+    $self->{'db_hash'} = {};
+
+    # default databases
+
+    $self->use_database('embl',Bio::DB::EMBL->new);
+    $self->use_database('genbank',Bio::DB::GenBank->new);
+    $self->use_database('swiss',Bio::DB::GenBank->new);
+
+    return $self;
+}
+
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub get_Seq_by_id{
+   my ($self,$str) = @_;
+
+   my ($tag,$id) = $self->guess_id($str);
+
+   return $self->{'db_hash'}->{$tag}->get_Seq_by_id($id);
+}
+
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub get_Seq_by_acc {
+   my ($self,$str) = @_;
+
+   my ($tag,$id) = $self->guess_id($str);
+
+   return $self->{'db_hash'}->{$tag}->get_Seq_by_acc($id);
+}
+
+
+
+=head2 guess_id
+
+ Title   : guess_id
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub guess_id{
+   my ($self,$str) = @_;
+   
+   if( $str =~ /(\S+)[:|\/;](\w+)/ ) {
+       my $tag;
+       my $db = $1;
+       my $id = $2;
+       if( $db =~ /gb/i || $db =~ /genbank/i || $db =~ /ncbi/i ) {
+	   $tag = 'genbank';
+       } elsif ( $db =~ /embl/i || $db =~ /emblbank/ || $db =~ /^em/i ) {
+	   $tag = 'embl';
+       } elsif ( $db =~ /swiss/i || $db =~ /^sw/i || $db =~ /sptr/ ) {
+	   $tag = 'swiss';
+       } else {
+	   # throw for the moment
+	   $self->throw("Could not guess database type $db from $str");
+       }
+       return ($tag,$id);
+
+   } else {
+       my $tag;
+       # auto-guess from just the id
+       if( $str =~ /_/ ) {
+	   $tag = 'swiss';
+       } elsif ( $str =~ /^[QPR]\w+\d$/ ) {
+	   $tag = 'swiss';
+       } elsif ( $str =~ /[A-Z]\d+/ ) {
+	   $tag = 'genbank';
+       } else {
+	   # default genbank...
+	   $tag = 'genbank';
+       }
+       return ($tag,$str);
+   }
+
+   
+}
+
+
+=head2 use_database
+
+ Title   : use_database
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub use_database{
+   my ($self,$name,$database) = @_;
+
+   $self->{'db_hash'}->{$name} = $database;
+}
+
+