diff variant_effect_predictor/Bio/SearchIO/Writer/ResultTableWriter.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/SearchIO/Writer/ResultTableWriter.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,458 @@
+# $Id: ResultTableWriter.pm,v 1.13 2002/12/05 13:46:35 heikki Exp $
+
+=head1 NAME
+
+Bio::SearchIO::Writer::ResultTableWriter - Outputs tab-delimited data for each Bio::Search::Result::ResultI object.
+
+=head1 SYNOPSIS
+
+=head2 Example 1: Using the default columns
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::ResultTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::ResultTableWriter->new();
+
+    my $out = Bio::SearchIO->new( -writer => $writer );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Example 2: Specifying a subset of columns 
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::ResultTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::ResultTableWriter->new( 
+                                  -columns => [qw(
+                                                  query_name
+                                                  query_length
+                                                  )]  );
+
+    my $out = Bio::SearchIO->new( -writer => $writer,
+				  -file   => ">result.out" );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Custom Labels
+
+You can also specify different column labels if you don't want to use
+the defaults.  Do this by specifying a C<-labels> hash reference
+parameter when creating the ResultTableWriter object.  The keys of the
+hash should be the column number (left-most column = 1) for the label(s)
+you want to specify. Here's an example:
+
+    my $writer = Bio::SearchIO::Writer::ResultTableWriter->new( 
+                               -columns => [qw( query_name 
+                                                query_length
+                                                query_description )],
+                               -labels  => { 1 => 'QUERY_GI',
+  	                                     2 => 'QUERY_LENGTH' } );
+
+
+=head1 DESCRIPTION
+
+Bio::SearchIO::Writer::ResultTableWriter outputs data in tab-delimited
+format for each search result, one row per search result. This is a very
+coarse-grain level of information since it only includes data
+stored in the Bio::Search::Result::ResultI object itself and does not
+include any information about hits or HSPs.
+
+You most likely will never use this object but instead will use one of
+its subclasses: Bio::SearchIO::Writer::HitTableWriter or
+Bio::SearchIO::Writer::HSPTableWriter.
+
+=head2 Available Columns
+
+Here are the columns that can be specified in the C<-columns>
+parameter when creating a ResultTableWriter object.  If a C<-columns> parameter
+is not specified, this list, in this order, will be used as the default.
+
+    query_name
+    query_length
+    query_description
+
+For more details about these columns, see the documentation for the
+corresponding method in L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+    bioperl-l@bioperl.org              - General discussion
+    http://bio.perl.org/MailList.html  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via email
+or the web:
+
+    bioperl-bugs@bio.perl.org                   
+    http://bugzilla.bioperl.org/           
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac@bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports
+and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+L<Bio::SearchIO::Writer::HitTableWriter>,
+L<Bio::SearchIO::Writer::HSPTableWriter>
+
+=head1 METHODS
+
+=cut
+
+
+package Bio::SearchIO::Writer::ResultTableWriter;
+
+use strict;
+use Bio::Root::Root;
+use Bio::SearchIO::SearchWriterI;
+
+use vars qw( @ISA );
+@ISA = qw( Bio::Root::Root Bio::SearchIO::SearchWriterI );
+
+# Array fields: column, object, method[/argument], printf format, column label
+# Methods are defined in Bio::Search::Result::ResultI.
+# Tech note: If a bogus method is supplied, it will result in all values to be zero.
+#            Don't know why this is.
+my %column_map = (
+                  'query_name'        => ['1', 'result', 'query_name', 's', 'QUERY' ],
+                  'query_length'      => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
+                  'query_description' => ['3', 'result', 'query_description', 's', 'DESC_Q'],
+                 );
+
+sub column_map { return %column_map }
+
+sub new {
+    my ($class, @args) = @_; 
+    my $self = $class->SUPER::new(@args);
+
+    my( $col_spec, $label_spec,
+	$filters ) = $self->_rearrange( [qw(COLUMNS 
+					    LABELS
+					    FILTERS)], @args);
+    
+    $self->_set_cols( $col_spec );
+    $self->_set_labels( $label_spec ) if $label_spec;
+    $self->_set_printf_fmt();
+    $self->_set_row_data_func();
+    $self->_set_column_labels();
+    
+    if( defined $filters ) {
+	if( !ref($filters) =~ /HASH/i ) { 
+	    $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
+	} else { 
+	    while( my ($type,$code) = each %{$filters} ) {
+		$self->filter($type,$code);
+	    }
+	}
+    }
+
+
+    return $self;
+}
+
+
+# Purpose : Stores the column spec internally. Also performs QC on the 
+#           user-supplied column specification.
+#
+sub _set_cols {
+    my ($self, $col_spec_ref) = @_;
+    return if defined $self->{'_cols'};  # only set columns once
+
+    my %map = $self->column_map;
+
+    if( not defined $col_spec_ref) {
+        print STDERR "\nUsing default column map.\n";
+	$col_spec_ref = [ map { $_ } sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map ];
+    }
+
+    if( ref($col_spec_ref) eq 'ARRAY') {
+        # printf "%d columns to process\n", scalar(@$col_spec_ref);
+        my @col_spec = @{$col_spec_ref};
+        while( my $item = lc(shift @col_spec) ) {
+            if( not defined ($map{$item}) ) {
+                $self->throw(-class =>'Bio::Root::BadParameter',
+                             -text => "Unknown column name: $item"
+                            );
+            }
+            push @{$self->{'_cols'}}, $item;
+            #print "pushing on to col $col_num, $inner: $item\n";
+        }
+    }
+    else {
+        $self->throw(-class =>'Bio::Root::BadParameter',
+                     -text => "Can't set columns: not a ARRAY ref",
+                     -value => $col_spec_ref
+                    );
+    }
+}
+
+sub _set_printf_fmt {
+    my ($self) = @_;
+
+    my @cols = $self->columns();
+    my %map = $self->column_map;
+
+    my $printf_fmt = '';
+
+    foreach my $col ( @cols ) {
+	$printf_fmt .= "\%$map{$col}->[3]\t";
+    }
+
+    $printf_fmt =~ s/\\t$//;
+
+    $self->{'_printf_fmt'} = $printf_fmt;
+}
+
+sub printf_fmt { shift->{'_printf_fmt'} }
+
+# Sets the data to be used for the labels.
+sub _set_labels {
+    my ($self, $label_spec) = @_;
+    if( ref($label_spec) eq 'HASH') {
+        foreach my $col ( sort { $a <=> $b } keys %$label_spec ) {
+#            print "LABEL: $col $label_spec->{$col}\n";
+            $self->{'_custom_labels'}->{$col} = $label_spec->{$col};
+        }
+    }
+    else {
+        $self->throw(-class =>'Bio::Root::BadParameter',
+                     -text => "Can't set labels: not a HASH ref: $label_spec"
+                    );
+    }
+}
+
+sub _set_column_labels {
+    my $self = shift;
+
+    my @cols = $self->columns;
+    my %map = $self->column_map;
+    my $printf_fmt = '';
+    my (@data, $label, @underbars);
+
+    my $i = 0;
+    foreach my $col( @cols ) {
+	$i++;
+        $printf_fmt .= "\%s\t";
+
+        if(defined $self->{'_custom_labels'}->{$i}) {
+	    $label = $self->{'_custom_labels'}->{$i};
+        }
+	else {
+	    $label = $map{$col}->[4];
+	}
+	push @data, $label;
+        push @underbars, '-' x length($label);
+
+    }
+    $printf_fmt =~ s/\\t$//;
+
+    my $str = sprintf "$printf_fmt\n", @data;
+
+    $str =~ s/\t\n/\n/;
+    $str .= sprintf "$printf_fmt\n", @underbars;
+
+    $str =~ s/\t\n/\n/gs;
+    $self->{'_column_labels'} = $str;
+}
+
+# Purpose : Generate a function that will call the appropriate
+# methods on the result, hit, and hsp objects to retrieve the column data 
+# specified in the column spec.
+#
+# We should only have to go through the column spec once
+# for a given ResultTableWriter. To permit this, we'll generate code 
+# for a method that returns an array of the data for a row of output
+# given a result, hit, and hsp object as arguments.
+#
+sub _set_row_data_func {
+    my $self = shift;
+
+    # Now we need to generate a string that can be eval'd to get the data.
+    my @cols = $self->columns();
+    my %map = $self->column_map;
+    my @data;
+    while( my $col = shift @cols ) {
+	my $object = $map{$col}->[1];
+	my $method = $map{$col}->[2];
+        my $arg = '';
+        if( $method =~ m!(\w+)/(\w+)! ) {
+            $method = $1;
+            $arg = "\"$2\"";
+        }
+        push @data, "\$$object->$method($arg)";
+    }
+    my $code = join( ",", @data);
+
+    if( $self->verbose > 0 ) {
+## Begin Debugging	
+	$self->debug( "Data to print:\n");
+	foreach( 0..$#data) { $self->debug( " [". ($_+ 1) . "] $data[$_]\n");}
+	$self->debug( "CODE:\n$code\n");
+	$self->debug("Printf format: ". $self->printf_fmt. "\n");
+## End Debugging
+    }
+
+    my $func = sub {
+        my ($result, $hit, $hsp) = @_;
+        my @r = eval $code;
+        # This should reduce the occurrence of those opaque "all zeros" bugs.
+	if( $@ ) { $self->throw("Trouble in ResultTableWriter::_set_row_data_func() eval: $@\n\n"); 
+               }
+	return @r;
+    };
+    $self->{'_row_data_func'} = $func;
+}
+
+sub row_data_func { shift->{'_row_data_func'} }
+
+
+=head2 to_string()
+
+Note: this method is not intended for direct use. The
+SearchIO::write_result() method calls it automatically if the writer
+is hooked up to a SearchIO object as illustrated in L<the SYNOPSIS section | SYNOPSIS>.
+
+ Title     : to_string()
+           :
+ Usage     : print $writer->to_string( $result_obj, [$include_labels] );
+           :
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+           : $include_labels = boolean, if true column labels are included (default: false)
+           :
+ Returns   : String containing tab-delimited set of data for each hit 
+           : in a ResultI object. Some data is summed across multiple HSPs.
+           :
+ Throws    : n/a
+
+=cut
+
+#----------------
+sub to_string {
+#----------------
+    my ($self, $result, $include_labels) = @_;
+
+    my $str = $include_labels ? $self->column_labels() : '';
+    my $resultfilter = $self->filter('RESULT');
+    if( ! defined $resultfilter ||
+        &{$resultfilter}($result) ) {	
+	my @row_data  = &{$self->{'_row_data_func'}}( $result );
+	$str .= sprintf "$self->{'_printf_fmt'}\n", @row_data;
+	$str =~ s/\t\n/\n/gs;
+    }
+    return $str;
+}
+
+
+
+sub columns {
+    my $self = shift;
+    my @cols;
+    if( ref $self->{'_cols'} ) {
+        @cols = @{$self->{'_cols'}};
+    }
+    else {
+        my %map = $self->column_map;
+        @cols = sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map;
+   }
+    return @cols;
+}
+
+
+=head2 column_labels
+
+ Usage     : print $result_obj->column_labels();
+ Purpose   : Get column labels for to_string().
+ Returns   : String containing column labels. Tab-delimited.
+ Argument  : n/a
+ Throws    : n/a
+
+=cut
+
+sub column_labels { shift->{'_column_labels'} }
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: The method to call when ending a report, this is
+           mostly for cleanup for formats which require you to 
+           have something at the end of the document.  Nothing for
+           a text message.
+ Returns : string
+ Args    : none
+
+=cut
+
+sub end_report {
+    return '';
+}
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+
+
+=cut
+
+
+# Is this really needed?
+#=head2 signif_format
+#
+# Usage     : $writer->signif_format( [FMT] );
+# Purpose   : Allows retrieval of the P/Expect exponent values only
+#           : or as a two-element list (mantissa, exponent).
+# Usage     : $writer->signif_format('exp');
+#           : $writer->signif_format('parts');
+# Returns   : String or '' if not set.
+# Argument  : String, FMT = 'exp' (return the exponent only)
+#           :             = 'parts'(return exponent + mantissa in 2-elem list)
+#           :              = undefined (return the raw value)
+# Comments  : P/Expect values are still stored internally as the full,
+#           : scientific notation value.
+#
+#=cut
+#
+##-------------
+#sub signif_format {
+##-------------
+#    my $self = shift;
+#    if(@_) { $self->{'_signif_format'} = shift; }
+#    return $self->{'_signif_format'};
+#}
+
+1;