Mercurial > repos > willmclaren > ensembl_vep
diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/Array.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/Array.pm Fri Aug 03 10:04:48 2012 -0400 @@ -0,0 +1,588 @@ +# +# Ensembl module for Bio::EnsEMBL::Funcgen::Array +# +# You may distribute this module under the same terms as Perl itself + + +=head1 LICENSE + + Copyright (c) 1999-2011 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <ensembl-dev@ebi.ac.uk>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + + +=head1 NAME + +Bio::EnsEMBL::Funcgen::Array - A module to represent a nucleotide microarray. + +=head1 SYNOPSIS + +use Bio::EnsEMBL::Funcgen::Array; + +my $array = Bio::EnsEMBL::Funcgen::Array->new( + -NAME => 'Array-1', + -FORMAT => 'Tiled', + -SIZE => '1', + -VENDOR => 'Nimblegen', + -DESCRIPTION => $desc, + -TYPE => 'OLIGO', + -CLASS => 'VENDOR_FORMAT' +); + +my $db_adaptor = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(...); +my $array_adaptor = $db_adaptor->get_ArrayAdaptor(); +my $array = $array_adaptor->fetch_by_name($array_name) + +=head1 DESCRIPTION + +An Array object represents a nucleotide (OLIGO, PCR etc.) microarray. The data +(currently the name, format, size, species, vendor and description) are stored +in the array table. + +=cut + + +use strict; +use warnings; + + +package Bio::EnsEMBL::Funcgen::Array; + + +use Bio::EnsEMBL::Utils::Argument qw( rearrange ); +use Bio::EnsEMBL::Utils::Exception qw( throw warning ); +use Bio::EnsEMBL::Funcgen::Storable; + +use vars qw(@ISA);# %VALID_TYPE); +@ISA = qw(Bio::EnsEMBL::Funcgen::Storable); + + +# Possible types for OligoArray objects +#This should match the vendor enum values? +#%VALID_TYPE = ( +# 'AFFY' => 1, +# 'OLIGO' => 1, +#); + + +=head2 new + + Arg [-NAME] : string - the name of this array + Arg [-VENDOR] : string - the vendor of this array (AFFY, NIMBLEGEN etc) + Arg [-TYPE] : string - type of array e.g. OLIGO, PCR + Arg [-FORMAT] : string - the format of this array (TILED, TARGETTED, GENE etc) + Arg [-DESCRIPTION] : strin - description of the array + +#array_chips is array of hashes or design_id and name, dbID will be populated on store, this should be a simple object! + + Example : my $array = Bio::EnsEMBL::Funcgen::Array->new( + -NAME => 'Array-1', + -FORMAT => 'Tiled', + -SIZE => '1', + -VENDOR => 'Nimblegen', + -TYPE => 'OLIGO', + -DESCRIPTION => $desc, + -CLASS => 'VENDOR_FORMAT',#e.g. AFFY_UTR, ILLUMINA_WG + ); + Description: Creates a new Bio::EnsEMBL::Funcgen::Array object. + Returntype : Bio::EnsEMBL::Funcgen::Array + Exceptions : None ? should throw if mandatort params not set/valid + Caller : General + Status : At risk + +=cut + +sub new { + my $caller = shift; + + my $class = ref($caller) || $caller; + my $self = $class->SUPER::new(@_); + + my ($name, $format, $size, $vendor, $type, $desc, $aclass) + = rearrange( ['NAME', 'FORMAT', 'SIZE', 'VENDOR', 'TYPE', 'DESCRIPTION', 'CLASS'], @_ ); + + #mandatory params? + #name, format, vendor + #enum on format? + + my @stack = caller(); + + if($self->dbID() && $stack[0] ne "Bio::EnsEMBL::Funcgen::DBSQL::ArrayAdaptor"){ + throw("You must use the ArrayAdaptor($stack[0]) to generate Arrays with a dbID i.e. from the DB, as this module accomodates updating which may cause incorrect data if the object is not generated form the DB"); + } + + + throw("Must provide a vendor parameter") if ! $vendor; + throw("Must provide a name parameter") if ! $name; + #any others? + + + $self->name($name); + $self->format($format) if defined $format; + + if(defined $format && $format eq 'EXPRESSION' && ! defined $class){ + throw('You must defined a class if you are importing and array with an EXPRESSION format'); + } + + $self->class(uc($aclass)) if defined $aclass; + $self->size($size) if defined $size; + $self->vendor($vendor); + $self->description($desc) if defined $desc; + $self->type($type) if defined $type; + + return $self; +} + +=head2 get_all_Probes + + Args : None + Example : my $probes = $array->get_all_Probes(); + Description: Returns all probes on an array. Needs a database connection. + Returntype : Listref of Bio::EnsEMBL::Funcgen::Probe objects + Exceptions : None + Caller : General + Status : At Risk + +=cut + +sub get_all_Probes { + my $self = shift; + + if ( $self->dbID() && $self->adaptor() ) { + my $opa = $self->adaptor()->db()->get_ProbeAdaptor(); + my $probes = $opa->fetch_all_by_Array($self); + return $probes; + } else { + warning('Need database connection to retrieve Probes'); + return []; + } +} + +=head2 get_all_Probe_dbIDs + + Args : None + Example : my @dbids = @{$array->get_all_Probe_dbIDs}; + Description: Returns an array ref of all the Probe database IDs for this array + Returntype : arrayref of ints + Exceptions : None + Caller : General + Status : At Risk + +=cut + +sub get_all_Probe_dbIDs { + my $self = shift; + + if(! $self->{probe_dbids}){ + #check for adaptor here? + + if(! $self->adaptor){ + throw('Must have set an adaptor to get_all_Probe_dbIDs'); + } + + $self->{probe_dbids} = $self->adaptor->fetch_Probe_dbIDs_by_Array($self); + } + + return $self->{probe_dbids}; +} + + + + +#Nath new get methods + +=head2 get_all_ProbeSets + + Args : None + Example : my $probesets = $array->get_all_ProbeSets(); + Description: Returns all probesets on an array. Needs a database connection. + Returntype : Listref of Bio::EnsEMBL::Funcgen::ProbeSets objects + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub get_all_ProbeSets { + my $self = shift; + + if ( $self->dbID() && $self->adaptor() ) { + my $opsa = $self->adaptor()->db()->get_ProbeSetAdaptor(); + my $probesets = $opsa->fetch_all_by_Array($self); + return $probesets; + } else { + warning('Need database connection to retrieve ProbeSets'); + return []; + } +} + + +#All the array_chip methods will be migrated to ArrayChip.pm + +=head2 get_array_chip_ids + + Example : my @ac_ids = @{$array->get_array_chip_ids()}; + Description: Returns all array_chip_ids for this array. + Returntype : Listref of array_chip ids + Exceptions : Throws if none retrieved + Caller : General + Status : At Risk + +=cut + +sub get_array_chip_ids { + my $self = shift; + + my @ac_ids; + + + $self->get_ArrayChips(); + + #should we get_ArrayChips is we have none cached? + #this may cause problem + + + foreach my $achip(values %{$self->{'array_chips'}}){ + push @ac_ids, $achip->dbID(); + } + + if(! @ac_ids){ + throw("No array_chip_ids available"); # should this be warn? + } + + return \@ac_ids; +} + +=head2 get_design_ids + + Example : my @design_ids = @{$array->get_design_ids()}; + Description: Returns a the design_ids for each array_chip contained within this array + Returntype : list + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + + + +sub get_design_ids{ + my $self = shift; + return [keys %{$self->{'array_chips'}}]; +} + + + +=head2 name + + Arg [1] : (optional) string - the name of this array + Example : my $name = $array->name(); + Description: Getter, setter of the name attribute for Array + objects. + Returntype : string + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub name{ + my $self = shift; + + $self->{'name'} = shift if @_; + + #do we need this? + #if ( !exists $self->{'name'} && $self->dbID() && $self->adaptor() ) { + # $self->adaptor->fetch_attributes($self); + #} + + return $self->{'name'}; +} + + +=head2 type + + Arg [1] : (optional) string - the type of this array + Example : $array->type('OLIGO'); + Description: Getter, setter of the type attribute for Array + objects. + Returntype : string + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub type{ + my $self = shift; + + $self->{'type'} = shift if @_; + + return $self->{'type'}; +} + + +=head2 format + + Arg [1] : (optional) string - the format of the array + Example : my $format = $array->format(); + Description: Getter, setter of format attribute for + Array objects e.g. Tiled, Targetted etc... + Returntype : string + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub format { + my $self = shift; + + $self->{'format'} = shift if @_; + + #do we need this? + #if ( !exists $self->{'format'} && $self->dbID() && $self->adaptor() ) { + # $self->adaptor->fetch_attributes($self); + #} + + return $self->{'format'}; +} + +=head2 class + + Arg [1] : (optional) string - the class of the array + Example : my $class = $array->class('AFFY_UTR'); + Description: Getter, setter of class attribute for + Array objects e.g. AFFY_UTR, AFFY_ST + Returntype : string + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub class { + my $self = shift; + + $self->{'class'} = shift if @_; + + return $self->{'class'}; +} + + +=head2 size + + Arg [1] : (optional) int - the number of ? in the array + Example : my $size = $array->size(); + Description: Getter of size attribute for Array objects. This + simply counts the constituent ArrayChips + Returntype : int + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub size { + my $self = shift; + + return scalar(keys %{$self->{'array_chips'}}); +} + + +=head2 vendor + + Arg [1] : (optional) string - the name of the array vendor + Example : my $vendor = $array->vendor(); + Description: Getter, setter of vendor attribute for + Array objects. + Returntype : string + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub vendor { + my $self = shift; + $self->{'vendor'} = shift if @_; + + #do we need this? + #if ( !exists $self->{'vendor'} && $self->dbID() && $self->adaptor() ) { + # $self->adaptor->fetch_attributes($self); + #} + + return $self->{'vendor'}; +} + +=head2 description + + Arg [1] : (optional) string - the description of the array + Example : my $size = $array->description(); + Description: Getter, setter of description attribute for + Array objects. + Returntype : string + Exceptions : None + Caller : General + Status : Medium Risk + +=cut + +sub description { + my $self = shift; + $self->{'description'} = shift if @_; + + #do we need this? + #if ( !exists $self->{'description'} && $self->dbID() && $self->adaptor() ) { + # $self->adaptor->fetch_attributes($self); + #} + + return $self->{'description'}; +} + +=head2 probe_count + + Example : my $num_probes = $array->probe_count(); + Description: Return number of probes on array + Returntype : string + Exceptions : None + Caller : General + Status : At Risk + +=cut + +sub probe_count { + my ($self) = @_; + #Do we want a distinct flag here? + + if(! defined $self->{'probe_count'}){ + $self->{'probe_count'} = $self->adaptor->fetch_probe_count_by_Array($self); + } + + return $self->{'probe_count'}; +} + + + +=head2 get_ArrayChips + + Example : my @achips = @{$array->get_ArrayChips()}; + Description: Getter, setter and lazy loader of array_chip hashes + Returntype : Arrays of ArrayChip objects + Exceptions : Throws exception if none found for array_id + Caller : General + Status : High Risk - migrate to ArrayChip.pm + +=cut + +sub get_ArrayChips { + my $self = shift; + + #lazy loaded as we won't want this for light DB + #should do meta check and want here + + if ( ! exists $self->{'array_chips'}){ + + if( $self->dbID() && $self->adaptor() ) { + #$self->adaptor->fetch_attributes($self); + #need to do this differently as we're accessing a different table + $self->{'array_chips'} = {}; + + foreach my $achip(@{$self->adaptor->db->get_ArrayChipAdaptor->fetch_all_by_array_id($self->dbID())}){ + $self->{'array_chips'}{$achip->design_id} = $achip; + #%{$self->{'array_chips'}} = %{$self->adaptor->db->get_ArrayAdaptor->_fetch_array_chips_by_array_dbID($self->dbID())}; + } + } + else{ + throw("Need array dbID and DB connection to retrieve array_chips"); + } + } + + return [ values %{$self->{'array_chips'}} ]; +} + +=head2 get_ArrayChip_by_design_id + + Arg [1] : (mandatory) int - design_id + Example : my %ac = %{$array->get_ArrayChip_by_design_id('1234')}; + Description: Getter for array_chip hashes + Returntype : Hashref + Exceptions : Throws exception if no design_id defined, warns if not part of array + Caller : General + Status : At risk + +=cut + +sub get_ArrayChip_by_design_id{ + my ($self, $design_id) = @_; + + + #warn "This needs to get the array chip if not defined?? but we're using it to test whether is has been stored same problem as probe_design?"; + + my ($achip); + throw("Must supply a valid array chip design_id") if (! defined $design_id); + + if(defined $self->{'array_chips'}{$design_id}){ + $achip = $self->{'array_chips'}{$design_id}; + }else{ + #No we use this to check whether it has been stored with the array + #warn("should this throw? Array does not contain ArrayChip:$design_id\n"); + } + + return $achip; +} + +=head2 add_ArrayChip + + Arg [1] : mandatory - Bio::EnsEMBL::Funcgen::ArrayChip + Example : $array->add_ArrayChip($array_chip); + Description: Setter for array chips + Returntype : None + Exceptions : Throws if arg not a Bio::EnsEMBL::Funcgen::ArrayChip, or Array not stored + Caller : General + Status : Ar risk + +=cut + +#This uses previosuly stored array_chips withotu warning +#Need to implement fetch_store method? + +sub add_ArrayChip{ + my ($self, $array_chip) = @_; + + throw("You must supply a stored Bio::EnsEMBL::Funcgen::ArrayChip") if(! ($array_chip && + $array_chip->isa("Bio::EnsEMBL::Funcgen::ArrayChip") && + $array_chip->dbID())); + + if ($self->dbID() && $self->adaptor()){ + $self->get_ArrayChips() if (! $self->{'array_chips'}); + + if(exists $self->{'array_chips'}{$array_chip->design_id}){ + $array_chip = $self->{'array_chips'}{$array_chip->design_id}; + #warn("Array chip for ".$array_chip->design_id()." already exists, using previous stored array chip\n"); + }else{ + $self->{'array_chips'}{$array_chip->design_id} = $array_chip; + } + + }else{ + throw("Array must be stored before adding an array_chip"); + } + + return; +} + + +1; +