diff variant_effect_predictor/Bio/Align/DNAStatistics.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Align/DNAStatistics.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,683 @@
+# $Id: DNAStatistics.pm,v 1.4 2002/10/22 07:45:10 lapp Exp $
+#
+# BioPerl module for Bio::Align::DNAStatistics
+#
+# Cared for by Jason Stajich <jason@bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Align::DNAStatistics - Calculate some statistics for a DNA alignment
+
+=head1 SYNOPSIS
+
+    use Bio::Align::DNAStatistics;
+    use Bio::AlignIO;
+
+    my $stats = new Bio::Align::PairwiseStatistics;
+    my $alignin = new Bio::AlignIO(-format => 'emboss',
+				   -file   => 't/data/insulin.water');
+    my $jc = $stats->distance($aln, 'Jukes-Cantor');
+    foreach my $r ( @$jc )  {
+	print "\t";
+	foreach my $r ( @$d ) {
+	    print "$r\t";
+	} 
+	print "\n";
+    }
+
+=head1 DESCRIPTION
+
+This object contains routines for calculating various statistics and
+distances for DNA alignments.  The routines are not well tested and do
+contain errors at this point.  Work is underway to correct them, but
+do not expect this code to give you the right answer currently!  Use
+dnadist/distmat in the PHLYIP or EMBOSS packages to calculate the
+distances.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  bioperl-bugs@bioperl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason@bioperl.org
+
+Describe contact details here
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Align::DNAStatistics;
+use vars qw(@ISA %DNAChanges @Nucleotides %NucleotideIndexes 
+	    $GapChars $SeqCount $DefaultGapPenalty %DistanceMethods);
+use strict;
+use Bio::Align::PairwiseStatistics;
+use Bio::Root::Root;
+
+BEGIN {
+    $GapChars = '(\.|\-)';
+    @Nucleotides = qw(A G T C);
+    $SeqCount = 2;
+    # these values come from EMBOSS distmat implementation
+    %NucleotideIndexes = ( 'A' => 0,
+			   'T' => 1,
+			   'C' => 2,
+			   'G' => 3,
+
+			   'AT' => 0,
+			   'AC' => 1,
+			   'AG' => 2,
+			   'CT' => 3,
+			   'GT' => 4,
+			   'CG' => 5,
+
+# these are wrong now
+#			   'S' => [ 1, 3],
+#			   'W' => [ 0, 4],
+#			   'Y' => [ 2, 3],
+#			   'R' => [ 0, 1],
+#			   'M' => [ 0, 3],
+#			   'K' => [ 1, 2],
+#			   'B' => [ 1, 2, 3],
+#			   'H' => [ 0, 2, 3],
+#			   'V' => [ 0, 1, 3],
+#			   'D' => [ 0, 1, 2],
+			   );
+
+    $DefaultGapPenalty = 0;
+    # could put ambiguities here?
+    %DNAChanges = ( 'Transversions' => { 'A' => [ 'T', 'C'],
+					 'T' => [ 'A', 'G'],
+					 'C' => [ 'A', 'G'],
+					 'G' => [ 'C', 'T'],
+				     },
+		    'Transitions'   => { 'A' => [ 'G' ],
+					 'G' => [ 'A' ],
+					 'C' => [ 'T' ],
+					 'T' => [ 'C' ],
+				     },
+		    );
+    %DistanceMethods = ( 'jc|jukes|jukes-cantor' => 'JukesCantor',
+			 'f81'                   => 'F81',
+			 'k2|k2p|k80|kimura'        => 'Kimura',
+			 't92|tamura|tamura92'   => 'Tamura',
+			 'f84'                   => 'F84',
+			 'tajimanei|tajima-nei'  => 'TajimaNei' );
+}
+
+@ISA = qw( Bio::Root::Root Bio::Align::StatisticsI );
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Align::DNAStatistics();
+ Function: Builds a new Bio::Align::DNAStatistics object 
+ Returns : Bio::Align::DNAStatistics
+ Args    : none
+
+
+=cut
+
+sub new { 
+    my ($class,@args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    $self->pairwise_stats( new Bio::Align::PairwiseStatistics());
+
+    return $self;
+}
+
+
+=head2 distance
+
+ Title   : distance
+ Usage   : my $distance_mat = $stats->distance(-align  => $aln, 
+		 			       -method => $method);
+ Function: Calculates a distance matrix for all pairwise distances of
+           sequences in an alignment.
+ Returns : Array ref
+ Args    : -align  => Bio::Align::AlignI object
+           -method => String specifying specific distance method 
+                      (implementing class may assume a default)
+
+=cut
+
+sub distance{
+   my ($self,@args) = @_;
+   my ($aln,$method) = $self->_rearrange([qw(ALIGN METHOD)],@args);
+   if( ! defined $aln || ! ref ($aln) || ! $aln->isa('Bio::Align::AlignI') ) { 
+       $self->throw("Must supply a valid Bio::Align::AlignI for the -align parameter in distance");
+   }
+   $method ||= 'JukesCantor';
+   foreach my $m ( keys %DistanceMethods ) {
+       if(defined $m &&  $method =~ /$m/i ) {
+	   my $mtd = "D_$DistanceMethods{$m}";
+	   return $self->$mtd($aln);
+       }
+   }
+   $self->warn("Unrecognized distance method $method must be one of [".
+	       join(',',$self->available_distance_methods())."]");
+   return undef;
+}
+
+=head2 available_distance_methods
+
+ Title   : available_distance_methods
+ Usage   : my @methods = $stats->available_distance_methods();
+ Function: Enumerates the possible distance methods
+ Returns : Array of strings
+ Args    : none
+
+
+=cut
+
+sub available_distance_methods{
+   my ($self,@args) = @_;
+   return values %DistanceMethods;
+}
+
+=head2 D - distance methods
+
+=cut
+
+=head2 D_JukesCantor
+
+ Title   : D_JukesCantor
+ Usage   : my $d = $stat->D_JukesCantor($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Jukes-Cantor 1 parameter model. 
+ Returns : ArrayRef of all pairwise distances of all sequence pairs in the alignment
+ Args    : Bio::Align::AlignI of DNA sequences
+           double - gap penalty
+
+
+=cut
+
+sub D_JukesCantor{
+   my ($self,$aln,$gappenalty) = @_;
+   return 0 unless $self->_check_arg($aln);
+   $gappenalty = $DefaultGapPenalty unless defined $gappenalty;
+   # ambiguities ignored at this point
+   
+   my (@seqs);
+   foreach my $seq ( $aln->each_seq) {
+       push @seqs, [ split(//,uc $seq->seq())];
+   }
+   my $seqct = scalar @seqs;
+   my @DVals; 
+   for(my $i = 1; $i <= $seqct; $i++ ) {
+       for( my $j = $i+1; $j <= $seqct; $j++ ) {
+	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i-1],
+							       $seqs[$j-1]);
+	   # just want diagonals
+	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] + 
+		     $matrix->[2]->[2] + $matrix->[3]->[3] );
+	   my $D = 1 - ( $m / ($aln->length - $gaps + ( $gaps * $gappenalty)));
+	   my $d = (- 3 / 4) * log ( 1 - (4 * $D/ 3));
+	   $DVals[$i]->[$j] = $DVals[$j]->[$i] = $d;
+       }
+   }
+   return \@DVals;
+}
+
+=head2 D_F81
+
+ Title   : D_F81
+ Usage   : my $d = $stat->D_F81($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Felsenstein 1981 distance model. 
+ Returns : ArrayRef of a 2d array of all pairwise distances in the alignment
+ Args    : Bio::Align::AlignI of DNA sequences
+
+
+=cut
+
+sub D_F81{
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   $self->throw("This isn't implemented yet - sorry");
+}
+
+
+# M Kimura, J. Mol. Evol., 1980, 16, 111.
+
+=head2 D_Kimura
+
+ Title   : D_Kimura
+ Usage   : my $d = $stat->D_Kimura($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Kimura 2 parameter model.
+ Returns : ArrayRef of pairwise distances between all sequences in alignment
+ Args    : Bio::Align::AlignI of DNA sequences
+
+
+=cut
+
+sub D_Kimura{
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   my $seqct = $aln->no_sequences;
+   my @KVals;
+   for( my $i = 1; $i <= $seqct; $i++ ) {
+       for( my $j = $i+1; $j <= $seqct; $j++ ) {
+	   my $pairwise = $aln->select_noncont($i,$j);
+	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
+	   my $P = $self->transitions($pairwise) / $L;
+	   my $Q = $self->transversions($pairwise) / $L;
+	   
+	   my $a = 1 / ( 1 - (2 * $P) - $Q);
+	   my $b = 1 / ( 1 - 2 * $Q );
+	   my $K = (1/2) * log ( $a ) + (1/4) * log($b);
+	   $KVals[$i]->[$j] = $K;
+	   $KVals[$j]->[$i] = $K;
+       }
+   }
+   return \@KVals;
+}
+
+#  K Tamura, Mol. Biol. Evol. 1992, 9, 678.
+
+=head2 D_Tamura
+
+ Title   : D_Tamura
+ Usage   :
+ Function:
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub D_Tamura{
+   my ($self,$aln) = @_;
+   my $seqct = $aln->no_sequences;
+   my @KVals;
+   for( my $i = 1; $i <= $seqct; $i++ ) {
+       for( my $j = $i+1; $j <= $seqct; $j++ ) {
+       }
+   }
+	   my $O = 0.25;
+   my $t = 0;
+   my $a = 0;
+   my $b = 0;
+   
+
+   my $d = 4 * $O * ( 1 - $O ) * $a * $t  + 2 * $b * $t;
+   return $d;
+}
+
+=head2 D_F84
+
+ Title   : D_F84
+ Usage   : my $d = $stat->D_F84($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Felsenstein 1984 distance model. 
+ Returns : Distance value
+ Args    : Bio::Align::AlignI of DNA sequences
+           double - gap penalty
+
+=cut
+
+sub D_F84{
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+}
+
+# Tajima and Nei, Mol. Biol. Evol. 1984, 1, 269.
+
+=head2 D_TajimaNei
+
+ Title   : D_TajimaNei
+ Usage   : my $d = $stat->D_TajimaNei($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the TajimaNei 1984 distance model. 
+ Returns : Distance value
+ Args    : Bio::Align::AlignI of DNA sequences
+
+
+=cut
+
+sub D_TajimaNei{
+   my ($self,$aln) = @_;
+   $self->warn("The result from this method is not correct right now");
+   my (@seqs);
+   foreach my $seq ( $aln->each_seq) {
+       push @seqs, [ split(//,uc $seq->seq())];
+   }
+   my $seqct = scalar @seqs;
+   my @DVals; 
+   for(my $i = 1; $i <= $seqct; $i++ ) {
+       for( my $j = $i+1; $j <= $seqct; $j++ ) {
+	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i-1],
+							       $seqs[$j-1]);
+	   my $fij2;
+	   my $slen = $aln->length - $gaps;
+	   for( my $bs = 0; $bs < 4; $bs++ ) {
+	       my $fi = 0;
+	       map {$fi += $matrix->[$bs]->[$_] } 0..3;
+	       my $fj = 0;
+	       map { $fj += $matrix->[$_]->[$bs] } 0..3;
+	       my $fij = ( $fi && $fj ) ? ($fi + $fj) /( 2 * $slen) : 0;
+	       $fij2 += $fij**2;
+	   }
+	   my ($pair,$h) = (0,0);
+	   for( my $bs = 0; $bs < 3; $bs++ ) {
+	       for( my $bs1 = $bs+1; $bs1 <= 3; $bs1++ ) {
+		   my $fij = $pfreq->[$pair++] / $slen;
+		   if( $fij ) {
+		       
+		       my ($ci1,$ci2,$cj1,$cj2) = (0,0,0,0);
+
+		       map { $ci1 += $matrix->[$_]->[$bs] } 0..3;
+		       map { $cj1 += $matrix->[$bs]->[$_] } 0..3;
+		       map { $ci2 += $matrix->[$_]->[$bs1] } 0..3;
+		       map { $cj2 += $matrix->[$bs1]->[$_] } 0..3;
+		       
+		       $h += ( $fij*$fij / 2 ) / 
+			   (  ( ( $ci1 + $cj1 ) / 2 * $slen ) *
+			      ( ( $ci2 + $cj2 ) /2 * $slen ) 
+			      );
+		       $self->debug( "h is $h fij = $fij ci1 =$ci1 cj1=$cj1 ci2=$ci2 cj2=$cj2\n");
+		   }
+	       }
+	   }
+	   # just want diagonals first
+
+	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] + 
+		     $matrix->[2]->[2] + $matrix->[3]->[3] );
+	   my $D = 1 - ( $m / $slen);
+
+	   my $b = (1-$fij2+(($D**2)/$h)) / 2;
+	   $self->debug("h is $h fij2 is $fij2 b is $b\n");
+
+	   my $d = (-1 * $b) * log ( 1 - $D/ $b);
+	   $DVals[$i]->[$j] = $DVals[$j]->[$i] = $d;
+       }
+   }
+   return \@DVals;
+
+
+}
+
+# HKY -- HASEGAWA, M., H. KISHINO, and T. YANO. 1985
+# Tamura and Nei 1993?
+# GTR? 
+
+=head2 K - sequence substitution methods
+
+=cut
+
+=head2 K_JukesCantor
+
+ Title   : K_JukesCantor
+ Usage   : my $k = $stats->K_JukesCantor($aln)
+ Function: Calculates K - the number of nucleotide substitutions between 
+           2 seqs - according to the Jukes-Cantor 1 parameter model
+           This only involves the number of changes between two sequences.
+ Returns : double
+ Args    : Bio::Align::AlignI
+
+
+=cut
+
+sub K_JukesCantor{
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   my $seqct = $aln->no_sequences;
+   my @KVals;
+   for( my $i = 1; $i <= $seqct; $i++ ) {
+       for( my $j = $i+1; $j <= $seqct; $j++ ) {
+	   my $pairwise = $aln->select_noncont($i,$j);
+	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
+	   my $N = $self->pairwise_stats->number_of_differences($pairwise);
+	   my $p = $N / $L;   
+	   my $K = - ( 3 / 4) * log ( 1 - (( 4 * $p) / 3 ));
+	   $KVals[$i]->[$j] = $KVals[$j]->[$i] = $K;
+       }
+   }
+   return \@KVals;
+}
+
+=head2 K_TajimaNei
+
+ Title   : K_TajimaNei
+ Usage   : my $k = $stats->K_TajimaNei($aln)
+ Function: Calculates K - the number of nucleotide substitutions between 
+           2 seqs - according to the Kimura 2 parameter model.
+           This does not assume equal frequencies among all the nucleotides.
+ Returns : ArrayRef of 2d matrix which contains pairwise K values for 
+           all sequences in the alignment
+ Args    : Bio::Align::AlignI
+
+=cut
+
+sub K_TajimaNei {
+    my ($self,$aln) = @_;
+    return 0 unless $self->_check_arg($aln);
+
+    my @seqs;
+    foreach my $seq ( $aln->each_seq) {
+	push @seqs, [ split(//,uc $seq->seq())];
+    }
+    my @KVals;
+    my $L = $self->pairwise_stats->number_of_comparable_bases($aln);	    
+    my $seqct = scalar @seqs;
+    for( my $i = 1; $i <= $seqct; $i++ ) {
+	for( my $j = $i+1; $j <= $seqct; $j++ ) {
+	    my (%q,%y);
+	    my ($first,$second) = ($seqs[$i-1],$seqs[$j-1]);
+	    
+	    for (my $k = 0;$k<$aln->length; $k++ ) {	
+		next if( $first->[$k] =~ /^$GapChars$/ ||
+			 $second->[$k]  =~ /^$GapChars$/);
+		
+		$q{$second->[$k]}++;
+		$q{$first->[$k]}++;
+		if( $first->[$k] ne $second->[$k] ) {		
+		    $y{$first->[$k]}->{$second->[$k]}++;
+		}
+	    }
+	    
+	    my $q_sum = 0;
+	    foreach my $let ( @Nucleotides ) {              
+		# ct is the number of sequences compared (2)
+		# L is the length of the alignment without gaps
+		# $ct * $L = total number of nt compared
+		my $avg = $q{$let} / ( $SeqCount * $L );
+		$q_sum += $avg**2;
+	    }
+	    my $b1 = 1 - $q_sum;
+	    my $h = 0;
+	    for( my $i = 0; $i <= 2; $i++ ) {
+		for( my $j = $i+1; $j <= 3; $j++) {	    
+		    $y{$Nucleotides[$i]}->{$Nucleotides[$j]} ||= 0;
+		    $y{$Nucleotides[$j]}->{$Nucleotides[$i]} ||= 0;
+		    my $x = ($y{$Nucleotides[$i]}->{$Nucleotides[$j]} + 
+			     $y{$Nucleotides[$j]}->{$Nucleotides[$i]}) / $L;
+		    $h += ($x ** 2) / ( 2 * $q{$Nucleotides[$i]} * 
+					$q{$Nucleotides[$j]} );
+		}
+	    }
+	    my $N = $self->pairwise_stats->number_of_differences($aln);
+	    my $p = $N / $L;
+	    my $b = ( $b1 + $p ** 2 / $h ) / 2;	    
+	    my $K = - $b * log ( 1 - $p / $b );
+	    $KVals[$i]->[$j] = $KVals[$j]->[$i] = $K;
+	}
+    }
+    return \@KVals;
+}
+
+
+
+=head2 transversions
+
+ Title   : transversions
+ Usage   : my $transversions = $stats->transversion($aln);
+ Function: Calculates the number of transversions between two sequences in 
+           an alignment
+ Returns : integer
+ Args    : Bio::Align::AlignI
+
+
+=cut
+
+sub transversions{
+   my ($self,$aln) = @_;
+   return $self->_trans_count_helper($aln, $DNAChanges{'Transversions'});
+}
+
+=head2 transitions
+
+ Title   : transitions
+ Usage   : my $transitions = Bio::Align::DNAStatistics->transitions($aln);
+ Function: Calculates the number of transitions in a given DNA alignment
+ Returns : integer representing the number of transitions
+ Args    : Bio::Align::AlignI object
+
+
+=cut
+
+sub transitions{
+   my ($self,$aln) = @_;
+   return $self->_trans_count_helper($aln, $DNAChanges{'Transitions'});
+}
+
+
+sub _trans_count_helper {
+    my ($self,$aln,$type) = @_;
+    return 0 unless( $self->_check_arg($aln) );
+    if( ! $aln->is_flush ) { $self->throw("must be flush") }
+    my (@seqs,@tcount);
+    foreach my $seq ( $aln->get_seq_by_pos(1), $aln->get_seq_by_pos(2) ) {
+	push @seqs, [ split(//,$seq->seq())];
+    }
+    my ($first,$second) = @seqs;
+
+    for (my $i = 0;$i<$aln->length; $i++ ) { 
+	next if( $first->[$i]  =~ /^$GapChars$/ ||
+		 $second->[$i]  =~ /^$GapChars$/);	
+	if( $first->[$i] ne $second->[$i] ) {
+	    foreach my $nt ( @{$type->{$first->[$i]}} ) {
+		if( $nt eq $second->[$i]) {
+		    $tcount[$i]++;
+		}
+	    }
+	}
+    }
+    my $sum = 0;
+    map { if( $_) { $sum += $_} } @tcount;
+    return $sum;
+}
+
+# this will generate a matrix which records across the row, the number
+# of DNA subst 
+# 
+sub _build_nt_matrix {
+    my ($self,$seqa,$seqb) = @_;
+    
+
+    my $basect_matrix = [ [ qw(0 0 0 0) ],  # number of bases that match
+			  [ qw(0 0 0 0) ],
+			  [ qw(0 0 0 0) ],
+			  [ qw(0 0 0 0) ] ];
+    my $gaps = 0;                           # number of gaps
+    my $pfreq = [ qw( 0 0 0 0 0 0)];        # matrix for pair frequency
+    
+    for( my $i = 0; $i < scalar @$seqa; $i++) {
+	
+	my ($ti,$tj) = ($seqa->[$i],$seqb->[$i]);
+	$ti =~ tr/U/T/;
+	$tj =~ tr/U/T/;
+
+	if( $ti =~ /^$GapChars$/) { $gaps++; next; }
+	if( $tj =~ /^$GapChars$/) { $gaps++; next }
+
+	my $ti_index = $NucleotideIndexes{$ti};		
+	my $tj_index = $NucleotideIndexes{$tj};	    
+
+	if( ! defined $ti_index ) {
+	    print "ti_index not defined for $ti\n";
+	    next;
+	}
+	
+	$basect_matrix->[$ti_index]->[$tj_index]++;
+	
+	if( $ti ne $tj ) {
+	    $pfreq->[$NucleotideIndexes{join('',sort ($ti,$tj))}]++;
+	}
+    }
+    return ($basect_matrix,$pfreq,$gaps);
+}
+
+sub _check_arg {
+    my($self,$aln ) = @_;
+    if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
+	$self->warn("Must provide a Bio::Align::AlignI compliant object to Bio::Align::DNAStatistics");
+	return 0;
+    } elsif( $aln->get_seq_by_pos(1)->alphabet ne 'dna' ) { 
+	$self->warn("Must provide a DNA alignment to Bio::Align::DNAStatistics, you provided a " . $aln->get_seq_by_pos(1)->alphabet);
+	return 0;
+    }
+    return 1;
+}
+
+=head2 Data Methods
+
+=cut
+
+=head2 pairwise_stats
+
+ Title   : pairwise_stats
+ Usage   : $obj->pairwise_stats($newval)
+ Function: 
+ Returns : value of pairwise_stats
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub pairwise_stats{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_pairwise_stats'} = $value;
+    }
+    return $self->{'_pairwise_stats'};
+
+}
+
+1;