diff variant_effect_predictor/Bio/EnsEMBL/Utils/ConfParser.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Utils/ConfParser.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,618 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2012 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <dev@ensembl.org>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+=head1 NAME
+
+Bio::EnsEMBL::Utils::ConfParser - configuration parser for perl scripts
+
+=head1 SYNOPSIS
+
+  my $conf = new Bio::EnsEMBL::Utils::ConfParser(
+    -SERVERROOT   => "/path/to/ensembl",
+    -DEFAULT_CONF => "my.default.conf"
+  );
+
+  # parse options from configuration file and commandline
+  $conf->parse_options(
+    'mandatory_string_opt=s' => 1,
+    'optional_numeric_opt=n' => 0,
+  );
+
+  # get a paramter value
+  my $val = $conf->param('manadatory_string_op');
+
+=head1 DESCRIPTION
+
+This module parses a configuration file and the commandline options
+passed to a script (the latter superseed the former). Configuration
+files contain ini-file style name-value pairs, and the commandline
+options are passed to Getopt::Long for parsing.
+
+The parameter values are consequently accessible via the param()
+method. You can also create a commandline string of all current
+parameters and their values to pass to another script.
+
+=cut
+
+package Bio::EnsEMBL::Utils::ConfParser;
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+use Getopt::Long;
+use Text::Wrap;
+use Cwd qw(abs_path);
+use Pod::Usage qw(pod2usage);
+use Bio::EnsEMBL::Utils::Argument qw(rearrange);
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use Bio::EnsEMBL::Utils::ScriptUtils qw(user_proceed);
+
+
+=head2 new
+
+  Arg [SERVERROOT] :
+                String $serverroot - root directory of your ensembl code
+  Arg [DEFAULT_CONF] :
+                String $default_conf - default configuration file
+  Example     : my $conf = new Bio::EnsEMBL::Utils::ConfParser(
+                  -SERVERROOT => '/path/to/ensembl',
+                  -DEFAULT_CONF => 'my.default.conf'
+                );
+  Description : object constructor
+  Return type : Bio::EnsEMBL::Utils::ConfParser object
+  Exceptions  : thrown if no serverroot is provided
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub new {
+  my $caller = shift;
+  my $class = ref($caller) || $caller;
+
+  my ($serverroot, $default_conf) =
+    rearrange([qw(SERVERROOT DEFAULT_CONF)], @_);
+
+  throw("You must supply a serverroot.") unless ($serverroot);
+
+  my $self = {};
+  bless ($self, $class);
+
+  $self->serverroot($serverroot);
+  $self->default_conf($default_conf || "$ENV{HOME}/.ensembl_script.conf");
+
+  return $self;
+}
+
+
+=head2 parse_options
+
+  Arg[1..n]   : pairs of option definitions and mandatory flag (see below for
+                details)
+  Example     : $conf->parse_options(
+                  'mandatory_string_opt=s' => 1,
+                  'optional_numeric_opt=n' => 0,
+                );
+  Description : This method reads options from an (optional) configuration file
+                and parses the commandline options supplied by the user.
+                Commandline options will superseed config file settings. The
+                string "$SERVERROOT" in the configuration entries will be
+                replaced by  the appropriate value.
+
+                The arguments passed to this method are pairs of a Getopt::Long
+                style option definition (in fact it will be passed to
+                GetOptions() directly) and a flag indicating whether this
+                option is mandatory (1) or optional (0).
+
+                In addition to these user-defined options, a set of common
+                options is always parsed. See _common_options() for details.
+                
+                If you run your script with --interactive the user will be
+                asked to confirm the parameters after parsing.
+                
+                All parameters will then be accessible via $self->param('name').
+  Return type : true on success 
+  Exceptions  : thrown if configuration file can't be opened
+                thrown on missing mandatory parameters
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub parse_options {
+  my ($self, @params) = @_;
+
+  # add common options to user supplied list
+  push @params, $self->_common_options;
+
+  # read common commandline options
+  my %h;
+  my %params = @params;
+
+  Getopt::Long::Configure('pass_through');
+  &GetOptions(\%h, keys %params);
+
+  # reads config file
+  my $conffile = $h{'conffile'} || $self->default_conf;
+  $conffile = abs_path($conffile);
+
+  if (-e $conffile) {
+    open(CONF, $conffile) or throw( 
+        "Unable to open configuration file $conffile for reading: $!");
+
+    my $serverroot = $self->serverroot;
+    my $last;
+
+    while (my $line = <CONF>) {
+      chomp $line;
+      
+      # remove leading and trailing whitespace
+      $line =~ s/^\s*//;
+      $line =~ s/\s*$//;
+
+      # join with next line if terminated with backslash (this is to allow
+      # multiline configuration settings
+      $line = $last . $line;
+      if ($line =~ /\\$/) {
+        $line =~ s/\\$//;
+        $last = $line;
+        next;
+      } else {
+        $last = undef;
+      }
+
+      # remove comments
+      $line =~ s/^[#;].*//;
+      $line =~ s/\s+[;].*$//;
+
+      # read options into internal parameter datastructure
+      next unless ($line =~ /(\w\S*)\s*=\s*(.*)/);
+      my $name = $1;
+      my $val = $2;
+
+      # strip optional quotes from parameter values
+      $val =~ s/^["'](.*)["']/$1/;
+
+      # replace $SERVERROOT with value
+      if ($val =~ /\$SERVERROOT/) {
+        $val =~ s/\$SERVERROOT/$serverroot/g;
+        $val = abs_path($val);
+      }
+      $self->param($name, $val);
+    }
+
+    $self->param('conffile', $conffile);
+  }
+
+  # override configured parameter with commandline options
+  map { $self->param($_, $h{$_}) } keys %h;
+
+  # check for required params, convert comma to list, maintain an ordered
+  # list of parameters and list of 'flag' type params
+  my @missing = ();
+  my $i = 0;
+
+  foreach my $param (@params) {
+    next if ($i++ % 2);
+
+    my $required = $params{$param};
+    my ($list, $flag);
+    $list = 1 if ($param =~ /\@$/);
+    $flag = 1 if ($param =~ /!$/);
+    $param =~ s/(^\w+).*/$1/;
+    
+    $self->comma_to_list($param) if ($list);
+
+    push @missing, $param if ($required and !$self->param($param));
+    push @{ $self->{'_ordered_params'} }, $param;
+    $self->{'_flag_params'}->{$param} = 1 if ($flag);
+  }
+  
+  if (@missing) {
+    throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
+  }
+
+  # error handling and --help
+  pod2usage(1) if ($self->param('help'));
+
+  # ask user to confirm parameters to proceed
+  $self->confirm_params;
+
+  return(1);
+}
+
+
+#
+# Commonly used options. These are parsed by default even if they are not
+# passed to parse_options() explicitely.
+#
+sub _common_options {
+  my $self = shift;
+  return (
+    'conffile|conf=s' => 0,
+    'logfile|log=s' => 0,
+    'logauto!' => 0,
+    'logautobase=s' => 0,
+    'logautoid=s' => 0,
+    'logpath=s' => 0,
+    'logappend|log_append|log-append!' => 0,
+    'loglevel=s' => 0,
+    'is_component|is-component!' => 0,
+    'interactive|i!' => 0,
+    'dry_run|dry-run|dry|n!' => 0,
+    'help|h|?' => 0,
+  );
+}
+
+
+=head2 confirm_params
+
+  Example     : $conf->confirm_params;
+  Description : If the script is run with the --interactive switch, this method
+                prints a table of all parameters and their values and asks user
+                to confirm if he wants to proceed.
+  Return type : true on success
+  Exceptions  : none
+  Caller      : parse_options()
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub confirm_params {
+  my $self = shift;
+
+  if ($self->param('interactive')) {
+    # print parameter table
+    print "Running script with these parameters:\n\n";
+    print $self->list_param_values;
+
+    # ask user if he wants to proceed
+    exit unless user_proceed("Continue?", 1, 'n');
+  }
+  
+  return(1);
+}
+
+
+=head2 param
+
+  Arg[1]      : Parameter name
+  Arg[2..n]   : (optional) List of values to set
+  Example     : # getter
+                my $dbname = $conf->param('dbname');
+
+                # setter
+                $conf->param('port', 3306);
+                $conf->param('chromosomes', 1, 6, 'X');
+  Description : Getter/setter for parameters. Accepts single-value params and
+                list params.
+  Return type : Scalar value for single-value parameters, array of values for
+                list parameters
+  Exceptions  : thrown if no parameter name is supplied
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub param {
+  my $self = shift;
+  my $name = shift or throw("You must supply a parameter name");
+
+  # setter
+  if (@_) {
+    if (scalar(@_) == 1) {
+      # single value
+      $self->{'_param'}->{$name} = shift;
+    } else {
+      # list of values
+      undef $self->{'_param'}->{$name};
+      @{ $self->{'_param'}->{$name} } = @_;
+    }
+  }
+
+  # getter
+  if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
+    # list parameter
+    return @{ $self->{'_param'}->{$name} };
+  } elsif (defined($self->{'_param'}->{$name})) {
+    # single-value parameter
+    return $self->{'_param'}->{$name};
+  } else {
+    return undef;
+  }
+}
+
+
+=head2 is_true
+
+  Arg[1]      : Parameter name
+  Example     : unless ($conf->is_true('upload')) {
+                  print "Won't upload data.\n";
+                  next;
+                }
+  Description : Checks whether a param value is set to 'true', which is defined
+                here as TRUE (in the Perl sense) but not the string 'no'.
+  Return type : Boolean
+  Exceptions  : thrown if no parameter name is supplied
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub is_true {
+  my $self = shift;
+  my $name = shift or throw("You must supply a parameter name");
+
+  my $param = $self->param($name);
+
+  if ($param and !($param =~ /^no$/i)) {
+    return(1);
+  } else {
+    return(0);
+  }
+}
+
+
+=head2 list_params
+
+  Example     : print "Current parameter names:\n";
+                foreach my $param (@{ $conf->list_params }) {
+                  print "  $param\n";
+                }
+  Description : Returns a list of the currently available parameter names. The
+                list will be in the same order as option definitions were
+                passed to the new() method.
+  Return type : Arrayref of parameter names
+  Exceptions  : none
+  Caller      : list_param_values(), create_commandline_options()
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub list_params {
+  my $self = shift;
+  return $self->{'_ordered_params'} || [];
+}
+
+
+=head2 list_param_values
+
+  Example     : print LOG $conf->list_param_values;
+  Description : prints a table of the parameters used in the script
+  Return type : String - the table to print
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub list_param_values {
+  my $self = shift;
+
+  $Text::Wrap::colums = 72;
+
+  my $txt = sprintf "    %-20s%-40s\n", qw(PARAMETER VALUE);
+  $txt .= "    " . "-"x70 . "\n";
+
+  foreach my $key (@{ $self->list_params }) {
+    my $val;
+    if (defined($self->param($key))) {
+      $txt .= Text::Wrap::wrap(sprintf('    %-19s ', $key), ' 'x24,
+        join(", ", $self->param($key)))."\n";
+    }
+  }
+
+  $txt .= "\n";
+
+  return $txt;
+}
+
+
+=head2 create_commandline_options
+
+  Arg[1..n]   : param/value pairs which should be added to or override the
+                currently defined parameters
+  Example     : $conf->create_commandline_options(
+                    'dbname' => 'homo_sapiens_vega_33_35e',
+                    'interactive' => 0
+                );
+  Description : Creates a commandline options string of all current paramters
+                that can be passed to another script.
+  Return type : String - commandline options string
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub create_commandline_options {
+  my ($self, %replace) = @_;
+
+  my %param_hash;
+
+  # deal with list values
+  foreach my $param (@{ $self->list_params }) {
+    my ($first, @rest) = $self->param($param);
+    next unless (defined($first));
+
+    if (@rest) {
+      $first = join(",", $first, @rest);
+    }
+    $param_hash{$param} = $first;
+  }
+
+  # replace values
+  foreach my $key (keys %replace) {
+    $param_hash{$key} = $replace{$key};
+  }
+
+  # create the commandline options string
+  my $options_string;
+  foreach my $param (keys %param_hash) {
+
+    my $val = $param_hash{$param};
+
+    # deal with 'flag' type params correctly
+    if ($self->{'_flag_params'}->{$param}) {
+      # change 'myparam' to 'nomyparam' if no value set
+      $param = 'no'.$param unless ($val);
+
+      # unset value (this is how flags behave)
+      $val = undef;
+    } else {
+      # don't add the param if it's not a flag param and no value is set
+      next unless (defined($val));
+
+      # quote the value if it contains blanks
+      if ($val =~ /\s+/) {
+        # use an appropriate quoting style
+        ($val =~ /'/) ? ($val = qq("$val")) : ($val = qq('$val'));
+      }
+    }
+    
+    $options_string .= sprintf(qq(--%s %s ), $param, $val);
+  }
+
+  return $options_string;
+}
+
+
+=head2 comma_to_list
+
+  Arg[1..n]   : list of parameter names to parse
+  Example     : $conf->comma_to_list('chromosomes');
+  Description : Transparently converts comma-separated lists into arrays (to
+                allow different styles of commandline options, see perldoc
+                Getopt::Long for details). Parameters are converted in place
+                (accessible through $self->param('name')).
+  Return type : true on success
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub comma_to_list {
+  my $self = shift;
+  
+  foreach my $param (@_) {
+    $self->param($param, split (/,/, join (',', $self->param($param))));
+  }
+  
+  return(1);
+}
+
+
+=head2 list_or_file
+
+  Arg[1]      : Name of parameter to parse
+  Example     : $conf->list_or_file('gene');
+  Description : Determines whether a parameter holds a list or it is a filename
+                to read the list entries from.
+  Return type : true on success
+  Exceptions  : thrown if list file can't be opened
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub list_or_file {
+  my ($self, $param) = @_;
+  
+  my @vals = $self->param($param);
+  return unless (@vals);
+
+  my $firstval = $vals[0];
+  
+  if (scalar(@vals) == 1 && -e $firstval) {
+    # we didn't get a list of values, but a file to read values from
+    @vals = ();
+    
+    open(IN, $firstval) or throw("Cannot open $firstval for reading: $!");
+    
+    while(<IN>){
+      chomp;
+      push(@vals, $_);
+    }
+    
+    close(IN);
+    
+    $self->param($param, @vals);
+  }
+  
+  $self->comma_to_list($param);
+  
+  return(1);
+}
+
+
+=head2 serverroot
+
+  Arg[1]      : (optional) String - root directory of your ensembl checkout
+  Example     : my $serverroot = $conf->serverroot;
+  Description : Getter/setter for the root directory of your ensembl checkout.
+  Return type : String
+  Exceptions  : none
+  Caller      : new(), general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub serverroot {
+  my $self = shift;
+  $self->{'_serverroot'} = shift if (@_);
+  return $self->{'_serverroot'};
+}
+
+
+=head2 default_conf
+
+  Arg[1]      : (optional) String - default configuration file
+  Example     : $conf->default_conf('my.default.conf');
+  Description : Getter/setter for the default configuration file.
+  Return type : String
+  Exceptions  : none
+  Caller      : new(), general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub default_conf {
+  my $self = shift;
+  $self->{'_default_conf'} = shift if (@_);
+  return $self->{'_default_conf'};
+}
+
+
+1;
+