Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/Root/Utilities.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Root/Utilities.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,1202 @@ +#----------------------------------------------------------------------------- +# PACKAGE : Bio::Root::Utilities.pm +# PURPOSE : Provides general-purpose utilities of potential interest to any Perl script. +# AUTHOR : Steve Chervitz (sac@bioperl.org) +# CREATED : Feb 1996 +# REVISION: $Id: Utilities.pm,v 1.21 2002/10/22 07:38:37 lapp Exp $ +# STATUS : Alpha +# +# This module manages file compression and uncompression using gzip or +# the UNIX compress programs (see the compress() and uncompress() methods). +# Also, it can create filehandles from gzipped files. If you want to use a +# different compression utility (such as zip, pkzip, stuffit, etc.) you +# are on your own. +# +# If you manage to incorporate an alternate compression utility into this +# module, please post a note to the bio.perl.org mailing list +# bioperl-l@bioperl.org +# +# TODO : Configure $GNU_PATH during installation. +# Improve documentation (POD). +# Make use of Date::Manip and/or Date::DateCalc as appropriate. +# +# MODIFICATIONS: See bottom of file. +# +# Copyright (c) 1996-2000 Steve Chervitz. All Rights Reserved. +# This module is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. +# +#----------------------------------------------------------------------------- + +package Bio::Root::Utilities; +use strict; + +BEGIN { + use vars qw($Loaded_POSIX $Loaded_IOScalar); + $Loaded_POSIX = 1; + unless( eval "require POSIX" ) { + $Loaded_POSIX = 0; + } +} + +use Bio::Root::Global qw(:data :std $TIMEOUT_SECS); +use Bio::Root::Object (); +use Exporter (); +#use AutoLoader; +#*AUTOLOAD = \&AutoLoader::AUTOLOAD; + +use vars qw( @ISA @EXPORT_OK %EXPORT_TAGS ); +@ISA = qw( Bio::Root::Root Exporter); +@EXPORT_OK = qw($Util); +%EXPORT_TAGS = ( obj => [qw($Util)], + std => [qw($Util)],); + +use vars qw($ID $VERSION $Util $GNU_PATH $DEFAULT_NEWLINE); + +$ID = 'Bio::Root::Utilities'; +$VERSION = 0.05; + +# $GNU_PATH points to the directory containing the gzip and gunzip +# executables. It may be required for executing gzip/gunzip +# in some situations (e.g., when $ENV{PATH} doesn't contain this dir. +# Customize $GNU_PATH for your site if the compress() or +# uncompress() functions are generating exceptions. +$GNU_PATH = ''; +#$GNU_PATH = '/tools/gnu/bin/'; + +$DEFAULT_NEWLINE = "\012"; # \n (used if get_newline() fails for some reason) + +## Static UTIL object. +$Util = {}; +bless $Util, $ID; +$Util->{'_name'} = 'Static Utilities object'; + +## POD Documentation: + +=head1 NAME + +Bio::Root::Utilities - General-purpose utility module + +=head1 SYNOPSIS + +=head2 Object Creation + + use Bio::Root::Utilities qw(:obj); + +There is no need to create a new Bio::Root::Utilities.pm object when +the C<:obj> tag is used. This tag will import the static $Util +object created by Bio::Root::Utilities.pm into your name space. This +saves you from having to call C<new Bio::Root::Utilities>. + +You are free to not use the :obj tag and create the object as you +like, but a Bio::Root::Utilities object is not configurable; any given +script only needs a single copy. + + $date_stamp = $Util->date_format('yyy-mm-dd'); + + $clean = $Util->untaint($dirty); + + $Util->mail_authority("Something you should know about..."); + + ...and other methods. See below. + +=head1 INSTALLATION + +This module is included with the central Bioperl distribution: + + http://bio.perl.org/Core/Latest + ftp://bio.perl.org/pub/DIST + +Follow the installation instructions included in the README file. + +=head1 DESCRIPTION + +Provides general-purpose utilities of potential interest to any Perl script. +Scripts and modules are expected to use the static $Util object exported by +this package with the C<:obj> tag. + +=head1 DEPENDENCIES + +B<Bio::Root::Utilities.pm> inherits from B<Bio::Root::Object.pm>. +It also relies on the GNU gzip program for file compression/uncompression. + +=head1 SEE ALSO + + Bio::Root::Object.pm - Core object + Bio::Root::Global.pm - Manages global variables/constants + + http://bio.perl.org/Projects/modules.html - Online module documentation + http://bio.perl.org/ - Bioperl Project Homepage + + FileHandle.pm (included in the Perl distribution or CPAN). + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other Bioperl modules. +Send your comments and suggestions preferably to one of the Bioperl mailing lists. +Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track the bugs and +their resolution. Bug reports can be submitted via email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR + +Steve Chervitz E<lt>sac@bioperl.orgE<gt> + +See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments. + +=head1 VERSION + +Bio::Root::Utilities.pm, 0.042 + +=head1 ACKNOWLEDGEMENTS + +This module was developed under the auspices of the Saccharomyces Genome +Database: + http://genome-www.stanford.edu/Saccharomyces + +=head1 COPYRIGHT + +Copyright (c) 1997-98 Steve Chervitz. All Rights Reserved. +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +=cut + +# +## +### +#### END of main POD documentation. +### +## +#' + + +=head1 APPENDIX + +Methods beginning with a leading underscore are considered private +and are intended for internal use by this module. They are +B<not> considered part of the public interface and are described here +for documentation purposes only. + +=cut + + +############################################################################ +## INSTANCE METHODS ## +############################################################################ + +=head2 date_format + + Title : date_format + Usage : $Util->date_format( [FMT], [DATE]) + Purpose : -- Get a string containing the formated date or time + : taken when this routine is invoked. + : -- Provides a way to avoid using `date`. + : -- Provides an interface to localtime(). + : -- Interconverts some date formats. + : + : (For additional functionality, use Date::Manip or + : Date::DateCalc available from CPAN). + Example : $Util->date_format(); + : $date = $Util->date_format('yyyy-mmm-dd', '11/22/92'); + Returns : String (unless 'list' is provided as argument, see below) + : + : 'yyyy-mm-dd' = 1996-05-03 # default format. + : 'yyyy-dd-mm' = 1996-03-05 + : 'yyyy-mmm-dd' = 1996-May-03 + : 'd-m-y' = 3-May-1996 + : 'd m y' = 3 May 1996 + : 'dmy' = 3may96 + : 'mdy' = May 3, 1996 + : 'ymd' = 96may3 + : 'md' = may3 + : 'year' = 1996 + : 'hms' = 23:01:59 # 'hms' can be tacked on to any of the above options + : # to add the time stamp: eg 'dmyhms' + : 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998 + : 'list' = the contents of localtime(time) in an array. + Argument : (all are optional) + : FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd | + : mdy | ymd | md | d-m-y | hms | hm + : ('hms' may be appended to any of these to + : add a time stamp) + : + : DATE = String containing date to be converted. + : Acceptable input formats: + : 12/1/97 (for 1 December 1997) + : 1997-12-01 + : 1997-Dec-01 + Throws : + Comments : Relies on the $BASE_YEAR constant exported by Bio:Root::Global.pm. + : + : If you don't care about formatting or using backticks, you can + : always use: $date = `date`; + : + : For more features, use Date::Manip.pm, (which I should + : probably switch to...) + +See Also : L<file_date>(), L<month2num>() + +=cut + +#---------------' +sub date_format { +#--------------- + my $self = shift; + my $option = shift; + my $date = shift; # optional date to be converted. + + my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst); + + $option ||= 'yyyy-mm-dd'; + + my ($month_txt, $day_txt, $month_num, $fullYear); + my (@date); + + # Load a supplied date for conversion: + if(defined($date) && ($date =~ /[\D-]+/)) { + if( $date =~ /\//) { + ($mon,$mday,$year) = split(/\//, $date); + } elsif($date =~ /(\d{4})-(\d{1,2})-(\d{1,2})/) { + ($year,$mon,$mday) = ($1, $2, $3); + } elsif($date =~ /(\d{4})-(\w{3,})-(\d{1,2})/) { + ($year,$mon,$mday) = ($1, $2, $3); + $mon = $self->month2num($2); + } else { + print STDERR "\n*** Unsupported input date format: $date\n"; + } + if(length($year) == 4) { $year = substr $year, 2; } + $mon -= 1; + } else { + ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = @date = + localtime(($date ? $date : time())); + return @date if $option =~ /list/i; + } + $month_txt = $MONTHS[$mon]; + $day_txt = $DAYS[$wday] if defined $wday; + $month_num = $mon+1; + $fullYear = $BASE_YEAR+$year; + +# print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>; + + if( $option =~ /yyyy-mm-dd/i ) { + $date = sprintf "%4d-%02d-%02d",$fullYear,$month_num,$mday; + } elsif( $option =~ /yyyy-dd-mm/i ) { + $date = sprintf "%4d-%02d-%02d",$fullYear,$mday,$month_num; + } elsif( $option =~ /yyyy-mmm-dd/i ) { + $date = sprintf "%4d-%3s-%02d",$fullYear,$month_txt,$mday; + } elsif( $option =~ /full|unix/i ) { + $date = sprintf "%3s %3s %2d %02d:%02d:%02d %d",$day_txt, $month_txt, $mday, $hour, $min, $sec, $fullYear; + } elsif( $option =~ /mdy/i ) { + $date = "$month_txt $mday, $fullYear"; + } elsif( $option =~ /ymd/i ) { + $date = $year."\l$month_txt$mday"; + } elsif( $option =~ /dmy/i ) { + $date = $mday."\l$month_txt$year"; + } elsif( $option =~ /md/i ) { + $date = "\l$month_txt$mday"; + } elsif( $option =~ /d-m-y/i ) { + $date = "$mday-$month_txt-$fullYear"; + } elsif( $option =~ /d m y/i ) { + $date = "$mday $month_txt $fullYear"; + } elsif( $option =~ /year/i ) { + $date = $fullYear; + } elsif( $option =~ /dmy/i ) { + $date = $mday.'-'.$month_txt.'-'.$fullYear; + } elsif($option and $option !~ /hms/i) { + print STDERR "\n*** Unrecognized date format request: $option\n"; + } + + if( $option =~ /hms/i) { + $date .= " $hour:$min:$sec" if $date; + $date ||= "$hour:$min:$sec"; + } + + return $date || join(" ", @date); +} + + +=head2 month2num + + Title : month2num + Purpose : Converts a string containing a name of a month to integer + : representing the number of the month in the year. + Example : $Util->month2num("march"); # returns 3 + Argument : The string argument must contain at least the first + : three characters of the month's name. Case insensitive. + Throws : Exception if the conversion fails. + +=cut + +#--------------' +sub month2num { +#-------------- + + my ($self, $str) = @_; + + # Get string in proper format for conversion. + $str = substr($str, 0, 3); + for(0..$#MONTHS) { + return $_+1 if $str =~ /$MONTHS[$_]/i; + } + $self->throw("Invalid month name: $str"); +} + +=head2 num2month + + Title : num2month + Purpose : Does the opposite of month2num. + : Converts a number into a string containing a name of a month. + Example : $Util->num2month(3); # returns 'Mar' + Throws : Exception if supplied number is out of range. + +=cut + +#------------- +sub num2month { +#------------- + my ($self, $num) = @_; + + $self->throw("Month out of range: $num") if $num < 1 or $num > 12; + return $MONTHS[$num]; +} + +=head2 compress + + Title : compress + Usage : $Util->compress(filename, [tmp]); + Purpose : Compress a file to conserve disk space. + Example : $Util->compress("/usr/people/me/data.txt"); + Returns : String (name of compressed file, full path). + Argument : filename = String (name of file to be compressed, full path). + : If the supplied filename ends with '.gz' or '.Z', + : that extension will be removed before attempting to compress. + : tmp = boolean, + : If true, (or if user is not the owner of the file) + : the file is compressed to a tmp file + : If false, file is clobbered with the compressed version. + Throws : Exception if file cannot be compressed + : If user is not owner of the file, generates a warning + : and compresses to a tmp file. + : To avoid this warning, use the -o file test operator + : and call this function with a true second argument. + Comments : Attempts to compress using gzip (default compression level). + : If that fails, will attempt to use compress. + : In some situations, the full path to the gzip executable + : may be required. This can be specified with the $GNU_PATH + : package global variable. When installed, $GNU_PATH is an + : empty string. + +See Also : L<uncompress>() + +=cut + +#------------' +sub compress { +#------------ + my $self = shift; + my $fileName = shift; + my $tmp = shift || 0; + + if($fileName =~ /(\.gz|\.Z)$/) { $fileName =~ s/$1$//; }; + $DEBUG && print STDERR "gzipping file $fileName"; + + my ($compressed, @args); + + if($tmp or not -o $fileName) { + if($Loaded_POSIX) { + $compressed = POSIX::tmpnam; + } else { + $compressed = _get_pseudo_tmpnam(); + } + $compressed .= ".tmp.bioperl"; + $compressed .= '.gz'; + @args = ($GNU_PATH."gzip -f < $fileName > $compressed"); + not $tmp and + $self->warn("Not owner of file $fileName\nCompressing to tmp file $compressed."); + $tmp = 1; + } else { + $compressed = "$fileName.gz"; + @args = ($GNU_PATH.'gzip', '-f', $fileName); + } + + if(system(@args) != 0) { + # gzip may not be present. Try compress. + $compressed = "$fileName.Z"; + if($tmp) { + @args = ("/usr/bin/compress -f < $fileName > $compressed"); + } else { + @args = ('/usr/bin/compress', '-f', $fileName); + } + system(@args) == 0 or + $self->throw("Failed to gzip/compress file $fileName: $!", + "Confirm current \$GNU_PATH: $GNU_PATH", + "Edit \$GNU_PATH in Bio::Root::Utilities.pm if necessary."); + } + + return $compressed; +} + + +=head2 uncompress + + Title : uncompress + Usage : $Util->uncompress(filename, [tmp]); + Purpose : Uncompress a file. + Example : $Util->uncompress("/usr/people/me/data.txt.gz"); + Returns : String (name of uncompressed file, full path). + Argument : filename = String (name of file to be uncompressed, full path). + : If the supplied filename does not end with '.gz' or '.Z' + : a '.gz' will be appended before attempting to uncompress. + : tmp = boolean, + : If true, (or if user is not the owner of the file) + : the file is uncompressed to a tmp file + : If false, file is clobbered with the uncompressed version. + Throws : Exception if file cannot be uncompressed + : If user is not owner of the file, generates a warning + : and uncompresses to a tmp file. + : To avoid this warning, use the -o file test operator + : and call this function with a true second argument. + Comments : Attempts to uncompress using gunzip. + : If that fails, will use uncompress. + : In some situations, the full path to the gzip executable + : may be required. This can be specified with the $GNU_PATH + : package global variable. When installed, $GNU_PATH is an + : empty string. + +See Also : L<compress>() + +=cut + +#--------------- +sub uncompress { +#--------------- + my $self = shift; + my $fileName = shift; + my $tmp = shift || 0; + + if(not $fileName =~ /(\.gz|\.Z)$/) { $fileName .= '.gz'; } + $DEBUG && print STDERR "gunzipping file $fileName"; + + my($uncompressed, @args); + + if($tmp or not -o $fileName) { + if($Loaded_POSIX) { + $uncompressed = POSIX::tmpnam; + } else { + $uncompressed = _get_pseudo_tmpnam(); + } + $uncompressed .= ".tmp.bioperl"; + @args = ($GNU_PATH."gunzip -f < $fileName > $uncompressed"); + not $tmp and $self->verbose > 0 and + $self->warn("Not owner of file $fileName\nUncompressing to tmp file $uncompressed."); + $tmp = 1; + } else { + @args = ($GNU_PATH.'gunzip', '-f', $fileName); + ($uncompressed = $fileName) =~ s/(\.gz|\.Z)$//; + } + +# $ENV{'PATH'} = '/tools/gnu/bin'; + + if(system(@args) != 0) { + # gunzip may not be present. Try uncompress. + ($uncompressed = $fileName) =~ s/(\.gz|\.Z)$//; + if($tmp) { + @args = ("/usr/bin/uncompress -f < $fileName > $uncompressed"); + } else { + @args = ('/usr/bin/uncompress', '-f', $fileName); + } + system(@args) == 0 or + $self->throw("Failed to gunzip/uncompress file $fileName: $!", + "Confirm current \$GNU_PATH: $GNU_PATH", + "Edit \$GNU_PATH in Bio::Root::Utilities.pm if necessary."); + } + + return $uncompressed; +} + + +=head2 file_date + + Title : file_date + Usage : $Util->file_date( filename [,date_format]) + Purpose : Obtains the date of a given file. + : Provides flexible formatting via date_format(). + Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15) + Argument : filename = string, full path name for file + : date_format = string, desired format for date (see date_format()). + : Default = yyyy-mm-dd + Thows : Exception if no file is provided or does not exist. + Comments : Uses the mtime field as obtained by stat(). + +=cut + +#-------------- +sub file_date { +#-------------- + my ($self, $file, $fmt) = @_; + + $self->throw("No such file: $file") if not $file or not -e $file; + + $fmt ||= 'yyyy-mm-dd'; + + my @file_data = stat($file); + return $self->date_format($fmt, $file_data[9]); # mtime field +} + + +=head2 untaint + + Title : untaint + Purpose : To remove nasty shell characters from untrusted data + : and allow a script to run with the -T switch. + : Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r + : Accept only the first block of contiguous characters: + : Default allowed chars = "-\w.', ()" + : If $relax is true = "-\w.', ()\/=%:^<>*" + Usage : $Util->untaint($value, $relax) + Returns : String containing the untained data. + Argument: $value = string + : $relax = boolean + Comments: + This general untaint() function may not be appropriate for every situation. + To allow only a more restricted subset of special characters + (for example, untainting a regular expression), then using a custom + untainting mechanism would permit more control. + + Note that special trusted vars (like $0) require untainting. + +=cut + +#------------` +sub untaint { +#------------ + my($self,$value,$relax) = @_; + $relax ||= 0; + my $untainted; + + $DEBUG and print STDERR "\nUNTAINT: $value\n"; + + defined $value || return; + + if( $relax ) { + $value =~ /([-\w.\', ()\/=%:^<>*]+)/; + $untainted = $1 +# } elsif( $relax == 2 ) { # Could have several degrees of relax. +# $value =~ /([-\w.\', ()\/=%:^<>*]+)/; +# $untainted = $1 + } else { + $value =~ /([-\w.\', ()]+)/; + $untainted = $1 + } + + $DEBUG and print STDERR "UNTAINTED: $untainted\n"; + + $untainted; +} + + +=head2 mean_stdev + + Title : mean_stdev + Usage : ($mean, $stdev) = $Util->mean_stdev( @data ) + Purpose : Calculates the mean and standard deviation given a list of numbers. + Returns : 2-element list (mean, stdev) + Argument : list of numbers (ints or floats) + Thows : n/a + +=cut + +#--------------- +sub mean_stdev { +#--------------- + my ($self, @data) = @_; + my $mean = 0; + foreach (@data) { $mean += $_; } + $mean /= scalar @data; + my $sum_diff_sqd = 0; + foreach (@data) { $sum_diff_sqd += ($mean - $_) * ($mean - $_); } + my $stdev = sqrt(abs($sum_diff_sqd/(scalar @data)-1)); + return ($mean, $stdev); +} + + +=head2 count_files + + Title : count_files + Purpose : Counts the number of files/directories within a given directory. + : Also reports the number of text and binary files in the dir + : as well as names of these files and directories. + Usage : count_files(\%data) + : $data{-DIR} is the directory to be analyzed. Default is ./ + : $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0). + Argument : Hash reference (empty) + Returns : n/a; + : Modifies the hash ref passed in as the sole argument. + : $$href{-TOTAL} scalar + : $$href{-NUM_TEXT_FILES} scalar + : $$href{-NUM_BINARY_FILES} scalar + : $$href{-NUM_DIRS} scalar + : $$href{-T_FILE_NAMES} array ref + : $$href{-B_FILE_NAMES} array ref + : $$href{-DIRNAMES} array ref + +=cut + +#---------------- +sub count_files { +#---------------- + my $self = shift; + my $href = shift; # Reference to an empty hash. + my( $name, @fileLine); + my $dir = $$href{-DIR} || './'; + my $print = $$href{-PRINT} || 0; + + ### Make sure $dir ends with / + $dir !~ /\/$/ and do{ $dir .= '/'; $$href{-DIR} = $dir; }; + + open ( PIPE, "ls -1 $dir |" ) || $self->throw("Can't open input pipe: $!"); + + ### Initialize the hash data. + $$href{-TOTAL} = 0; + $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} = $$href{-NUM_DIRS} = 0; + $$href{-T_FILE_NAMES} = []; + $$href{-B_FILE_NAMES} = []; + $$href{-DIR_NAMES} = []; + while( <PIPE> ) { + chomp(); + $$href{-TOTAL}++; + if( -T $dir.$_ ) { + $$href{-NUM_TEXT_FILES}++; push @{$$href{-T_FILE_NAMES}}, $_; } + if( -B $dir.$_ and not -d $dir.$_) { + $$href{-NUM_BINARY_FILES}++; push @{$$href{-B_FILE_NAMES}}, $_; } + if( -d $dir.$_ ) { + $$href{-NUM_DIRS}++; push @{$$href{-DIR_NAMES}}, $_; } + } + close PIPE; + + if( $print) { + printf( "\n%4d %s\n", $$href{-TOTAL}, "total files+dirs in $dir"); + printf( "%4d %s\n", $$href{-NUM_TEXT_FILES}, "text files"); + printf( "%4d %s\n", $$href{-NUM_BINARY_FILES}, "binary files"); + printf( "%4d %s\n", $$href{-NUM_DIRS}, "directories"); + } +} + + +#=head2 file_info +# +# Title : file_info +# Purpose : Obtains a variety of date for a given file. +# : Provides an interface to Perl's stat(). +# Status : Under development. Not ready. Don't use! +# +#=cut + +#-------------- +sub file_info { +#-------------- + my ($self, %param) = @_; + my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param); + $get ||= 'all'; + $fmt ||= 'yyyy-mm-dd'; + + my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, + $atime, $mtime, $ctime, $blksize, $blocks) = stat $file; + + if($get =~ /date/i) { + ## I can get the elapsed time since the file was modified but + ## it's not so straightforward to get the date in a nice format... + ## Think about using a standard CPAN module for this, like + ## Date::Manip or Date::DateCalc. + + my $date = $mtime; + my $elsec = time - $mtime; + printf "\nFile age: %.0f sec %.0f hrs %.0f days", $elsec, $elsec/3600, $elsec/(3600*24);<STDIN>; + my $days = sprintf "%.0f", $elsec/(3600*24); + } elsif($get eq 'all') { + return stat $file; + } +} + + +#------------ +sub delete { +#------------ + my $self = shift; + my $fileName = shift; + if(not -e $fileName) { + $self->throw("Can't delete file $fileName: Does not exist."); + } elsif(not -o $fileName) { + $self->throw("Can't delete file $fileName: Not owner."); + } + my $ulval = unlink($fileName) > 0 or + $self->throw("Failed to delete file $fileName: $!"); +} + + +=head2 create_filehandle + + Usage : $object->create_filehandle(<named parameters>); + Purpose : Create a FileHandle object from a file or STDIN. + : Mainly used as a helper method by read() and get_newline(). + Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt') + Argument : Named parameters (case-insensitive): + : (all optional) + : -CLIENT => object reference for the object submitting + : the request. This facilitates use by + : Bio::Root::IOManager::read(). Default = $Util. + : -FILE => string (full path to file) or a reference + : to a FileHandle object or typeglob. This is an + : optional parameter (if not defined, STDIN is used). + Returns : Reference to a FileHandle object. + Throws : Exception if cannot open a supplied file or if supplied with a + : reference that is not a FileHandle ref. + Comments : If given a FileHandle reference, this method simply returns it. + : This method assumes the user wants to read ascii data. So, if + : the file is binary, it will be treated as a compressed (gzipped) + : file and access it using gzip -ce. The problem here is that not + : all binary files are necessarily compressed. Therefore, + : this method should probably have a -mode parameter to + : specify ascii or binary. + +See Also : L<get_newline>(), L<Bio::Root::IOManager::read>(), + +=cut + +#--------------------- +sub create_filehandle { +#--------------------- + my($self, @param) = @_; + my($client, $file, $handle) = + $self->_rearrange([qw( CLIENT FILE HANDLE )], @param); + + if(not ref $client) { $client = $self; } + $file ||= $handle; + if( $client->can('file')) { + $file = $client->file($file); + } + + my $FH; # = new FileHandle; + + my ($handle_ref); + + if($handle_ref = ref($file)) { + if($handle_ref eq 'FileHandle') { + $FH = $file; + $client->{'_input_type'} = "FileHandle"; + } elsif($handle_ref eq 'GLOB') { + $FH = $file; + $client->{'_input_type'} = "Glob"; + } else { + $self->throw("Can't read from $file: Not a FileHandle or GLOB ref."); + } + $self->verbose > 0 and printf STDERR "$ID: reading data from FileHandle\n"; + + } elsif($file) { + $client->{'_input_type'} = "FileHandle for $file"; + + # Use gzip -cd to access compressed data. + if( -B $file ) { + $client->{'_input_type'} .= " (compressed)"; + $file = "${GNU_PATH}gzip -cd $file |" + } + + $FH = new FileHandle; + open ($FH, $file) || $self->throw("Can't access data file: $file", + "$!"); + $self->verbose > 0 and printf STDERR "$ID: reading data from file $file\n"; + + } else { + # Read from STDIN. + $FH = \*STDIN; + $self->verbose > 0 and printf STDERR "$ID: reading data from STDIN\n"; + $client->{'_input_type'} = "STDIN"; + } + + return $FH; + } + +=head2 get_newline + + Usage : $object->get_newline(<named parameters>); + Purpose : Determine the character(s) used for newlines in a given file or + : input stream. Delegates to Bio::Root::Utilities::get_newline() + Example : $data = $object->get_newline(-CLIENT => $anObj, + : -FILE =>'usr/people/me/data.txt') + Argument : Same arguemnts as for create_filehandle(). + Returns : Reference to a FileHandle object. + Throws : Propogates and exceptions thrown by Bio::Root::Utilities::get_newline(). + +See Also : L<taste_file>(), L<create_filehandle>() + +=cut + +#----------------- +sub get_newline { +#----------------- + my($self, @param) = @_; + + return $NEWLINE if defined $NEWLINE; + + my($client ) = + $self->_rearrange([qw( CLIENT )], @param); + + my $FH = $self->create_filehandle(@param); + + if(not ref $client) { $client = $self; } + + if($client->{'_input_type'} =~ /STDIN|Glob|compressed/) { + # Can't taste from STDIN since we can't seek 0 on it. + # Are other non special Glob refs seek-able? + # Attempt to guess newline based on platform. + # Not robust since we could be reading Unix files on a Mac, e.g. + if(defined $ENV{'MACPERL'}) { + $NEWLINE = "\015"; # \r + } else { + $NEWLINE = "\012"; # \n + } + } else { + $NEWLINE = $self->taste_file($FH); + } + + close ($FH) unless ($client->{'_input_type'} eq 'STDIN' || + $client->{'_input_type'} eq 'FileHandle' || + $client->{'_input_type'} eq 'Glob' ); + + delete $client->{'_input_type'}; + + return $NEWLINE || $DEFAULT_NEWLINE; + } + + +=head2 taste_file + + Usage : $object->taste_file( <FileHandle> ); + : Mainly a utility method for get_newline(). + Purpose : Sample a filehandle to determine the character(s) used for a newline. + Example : $char = $Util->taste_file($FH) + Argument : Reference to a FileHandle object. + Returns : String containing an octal represenation of the newline character string. + : Unix = "\012" ("\n") + : Win32 = "\012\015" ("\r\n") + : Mac = "\015" ("\r") + Throws : Exception if no input is read within $TIMEOUT_SECS seconds. + : Exception if argument is not FileHandle object reference. + : Warning if cannot determine neewline char(s). + Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com). + +See Also : L<get_newline>() + +=cut + +#--------------- +sub taste_file { +#--------------- + my ($self, $FH) = @_; + my $BUFSIZ = 256; # Number of bytes read from the file handle. + my ($buffer, $octal, $str, $irs, $i); + my $wait = $TIMEOUT_SECS; + + ref($FH) eq 'FileHandle' or $self->throw("Can't taste file: not a FileHandle ref"); + + $buffer = ''; + + # this is a quick hack to check for availability of alarm(); just copied + # from Bio/Root/IOManager.pm HL 02/19/01 + my $alarm_available = 1; + eval { + alarm(0); + }; + if($@) { + # alarm() not available (ActiveState perl for win32 doesn't have it. + # See jitterbug PR#98) + $alarm_available = 0; + } + $SIG{ALRM} = sub { die "Timed out!"; }; + my $result; + eval { + $alarm_available && alarm( $wait ); + $result = read($FH, $buffer, $BUFSIZ); # read the $BUFSIZ characters of file + $alarm_available && alarm(0); + }; + if($@ =~ /Timed out!/) { + $self->throw("Timed out while waiting for input.", + "Timeout period = $wait seconds.\nFor longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Global.pm."); + + } elsif(not $result) { + my $err = $@; + $self->throw("read taste failed to read from FileHandle.", $err); + + } elsif($@ =~ /\S/) { + my $err = $@; + $self->throw("Unexpected error during read: $err"); + } + + seek($FH, 0, 0) or $self->throw("seek failed to seek 0 on FileHandle."); + + my @chars = split(//, $buffer); + + for ($i = 0; $i <$BUFSIZ; $i++) { + if (($chars[$i] eq "\012")) { + unless ($chars[$i-1] eq "\015") { + # Unix + $octal = "\012"; + $str = '\n'; + $irs = "^J"; + last; + } + } elsif (($chars[$i] eq "\015") && ($chars[$i+1] eq "\012")) { + # DOS + $octal = "\015\012"; + $str = '\r\n'; + $irs = "^M^J"; + last; + } elsif (($chars[$i] eq "\015")) { + # Mac + $octal = "\015"; + $str = '\r'; + $irs = "^M"; + last; + } + } + if (not $octal) { + $self->warn("Could not determine newline char. Using '\012'"); + $octal = "\012"; + } else { +# print STDERR "NEWLINE CHAR = $irs\n"; + } + return($octal); +} + +###################################### +##### Mail Functions ######## +###################################### + +=head2 mail_authority + + Title : mail_authority + Usage : $Util->mail_authority( $message ) + Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY + +See Also : L<send_mail>() + +=cut + +sub mail_authority { + + my( $self, $message ) = @_; + my $script = $self->untaint($0,1); + + send_mail( -TO=>$AUTHORITY, -SUBJ=>$script, -MSG=>$message); + +} + + +=head2 send_mail + + Title : send_mail + Usage : $Util->send_mail( named_parameters ) + Purpose : Provides an interface to /usr/lib/sendmail + Returns : n/a + Argument : Named parameters: (case-insensitive) + : -TO => e-mail address to send to + : -SUBJ => subject for message (optional) + : -MSG => message to be sent (optional) + : -CC => cc: e-mail address (optional) + Thows : Exception if TO: address appears bad or is missing + Comments : Based on TomC's tip at: + : http://www.perl.com/CPAN-local/doc/FMTEYEWTK/safe_shellings + : + : Using default 'From:' information. + : sendmail options used: + : -t: ignore the address given on the command line and + : get To:address from the e-mail header. + : -oi: prevents send_mail from ending the message if it + : finds a period at the start of a line. + +See Also : L<mail_authority>() + +=cut + + +#-------------' +sub send_mail { +#------------- + my( $self, @param) = @_; + my($recipient,$subj,$message,$cc) = $self->_rearrange([qw(TO SUBJ MSG CC)],@param); + + $self->throw("Invalid or missing e-mail address: $recipient") + if not $recipient =~ /\S+\@\S+/; + + $cc ||= ''; $subj ||= ''; $message ||= ''; + + open (SENDMAIL, "|/usr/lib/sendmail -oi -t") || + $self->throw("Can't send mail: sendmail cannot fork: $!"); + +print SENDMAIL <<QQ_EOF_QQ; +To: $recipient +Subject: $subj +Cc: $cc + +$message + +QQ_EOF_QQ + + close(SENDMAIL); + if ($?) { warn "sendmail didn't exit nicely: $?" } +} + + +###################################### +### Interactive Functions ##### +###################################### + + +=head2 yes_reply + + Title : yes_reply() + Usage : $Util->yes_reply( [query_string]); + Purpose : To test an STDIN input value for affirmation. + Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" ); + : $Util->yes_reply('Continue') || die; + Returns : Boolean, true (1) if input string begins with 'y' or 'Y' + Argument: query_string = string to be used to prompt user (optional) + : If not provided, 'Yes or no' will be used. + : Question mark is automatically appended. + +=cut + +#------------- +sub yes_reply { +#------------- + my $self = shift; + my $query = shift; + my $reply; + $query ||= 'Yes or no'; + print "\n$query? (y/n) [n] "; + chomp( $reply = <STDIN> ); + $reply =~ /^y/i; +} + + + +=head2 request_data + + Title : request_data() + Usage : $Util->request_data( [value_name]); + Purpose : To request data from a user to be entered via keyboard (STDIN). + Example : $name = $Util->request_data('Name'); + : # User will see: % Enter Name: + Returns : String, (data entered from keyboard, sans terminal newline.) + Argument: value_name = string to be used to prompt user. + : If not provided, 'data' will be used, (not very helpful). + : Question mark is automatically appended. + +=cut + +#---------------- +sub request_data { +#---------------- + my $self = shift; + my $data = shift || 'data'; + print "Enter $data: "; + # Remove the terminal newline char. + chomp($data = <STDIN>); + $data; +} + +sub quit_reply { +# Not much used since you can use request_data() +# and test for an empty string. + my $self = shift; + my $reply; + chop( $reply = <STDIN> ); + $reply =~ /^q.*/i; +} + + +=head2 verify_version + + Purpose : Checks the version of Perl used to invoke the script. + : Aborts program if version is less than the given argument. + Usage : verify_version('5.000') + +=cut + +#------------------ +sub verify_version { +#------------------ + my $self = shift; + my $reqVersion = shift; + + $] < $reqVersion and do { + printf STDERR ( "\a\n%s %0.3f.\n", "** Sorry. This Perl script requires at least version", $reqVersion); + printf STDERR ( "%s %0.3f %s\n\n", "You are running Perl version", $], "Please update your Perl!\n\n" ); + exit(1); + } +} + +# Purpose : Returns a string that can be used as a temporary file name. +# Based on localtime. +# This is used if POSIX is not available. + +sub _get_pseudo_tmpnam { + + my $date = localtime(time()); + + my $tmpnam = 'tmpnam'; + + if( $date =~ /([\d:]+)\s+(\d+)\s*$/ ) { + $tmpnam = $2. '_' . $1; + $tmpnam =~ s/:/_/g; + } + return $tmpnam; +} + + +1; +__END__ + +MODIFICATION NOTES: +--------------------- + +17 Feb 1999, sac: + * Using global $TIMEOUT_SECS in taste_file(). + +13 Feb 1999, sac: + * Renamed get_newline_char() to get_newline() since it could be >1 char. + +3 Feb 1999, sac: + * Added three new methods: create_filehandle, get_newline_char, taste_file. + create_filehandle represents functionality that was formerly buried + within Bio::Root::IOManager::read(). + +2 Dec 1998, sac: + * Removed autoloading code. + * Modified compress(), uncompress(), and delete() to properly + deal with file ownership issues. + +3 Jun 1998, sac: + * Improved file_date() to be less reliant on the output of ls. + (Note the word 'less'; it still relies on ls). + +5 Jul 1998, sac: + * compress() & uncompress() will write files to a temporary location + if the first attempt to compress/uncompress fails. + This allows users to access compressed files in directories in which they + lack write permission. + + +