comparison variant_effect_predictor/Bio/Root/Utilities.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #-----------------------------------------------------------------------------
2 # PACKAGE : Bio::Root::Utilities.pm
3 # PURPOSE : Provides general-purpose utilities of potential interest to any Perl script.
4 # AUTHOR : Steve Chervitz (sac@bioperl.org)
5 # CREATED : Feb 1996
6 # REVISION: $Id: Utilities.pm,v 1.21 2002/10/22 07:38:37 lapp Exp $
7 # STATUS : Alpha
8 #
9 # This module manages file compression and uncompression using gzip or
10 # the UNIX compress programs (see the compress() and uncompress() methods).
11 # Also, it can create filehandles from gzipped files. If you want to use a
12 # different compression utility (such as zip, pkzip, stuffit, etc.) you
13 # are on your own.
14 #
15 # If you manage to incorporate an alternate compression utility into this
16 # module, please post a note to the bio.perl.org mailing list
17 # bioperl-l@bioperl.org
18 #
19 # TODO : Configure $GNU_PATH during installation.
20 # Improve documentation (POD).
21 # Make use of Date::Manip and/or Date::DateCalc as appropriate.
22 #
23 # MODIFICATIONS: See bottom of file.
24 #
25 # Copyright (c) 1996-2000 Steve Chervitz. All Rights Reserved.
26 # This module is free software; you can redistribute it and/or
27 # modify it under the same terms as Perl itself.
28 #
29 #-----------------------------------------------------------------------------
30
31 package Bio::Root::Utilities;
32 use strict;
33
34 BEGIN {
35 use vars qw($Loaded_POSIX $Loaded_IOScalar);
36 $Loaded_POSIX = 1;
37 unless( eval "require POSIX" ) {
38 $Loaded_POSIX = 0;
39 }
40 }
41
42 use Bio::Root::Global qw(:data :std $TIMEOUT_SECS);
43 use Bio::Root::Object ();
44 use Exporter ();
45 #use AutoLoader;
46 #*AUTOLOAD = \&AutoLoader::AUTOLOAD;
47
48 use vars qw( @ISA @EXPORT_OK %EXPORT_TAGS );
49 @ISA = qw( Bio::Root::Root Exporter);
50 @EXPORT_OK = qw($Util);
51 %EXPORT_TAGS = ( obj => [qw($Util)],
52 std => [qw($Util)],);
53
54 use vars qw($ID $VERSION $Util $GNU_PATH $DEFAULT_NEWLINE);
55
56 $ID = 'Bio::Root::Utilities';
57 $VERSION = 0.05;
58
59 # $GNU_PATH points to the directory containing the gzip and gunzip
60 # executables. It may be required for executing gzip/gunzip
61 # in some situations (e.g., when $ENV{PATH} doesn't contain this dir.
62 # Customize $GNU_PATH for your site if the compress() or
63 # uncompress() functions are generating exceptions.
64 $GNU_PATH = '';
65 #$GNU_PATH = '/tools/gnu/bin/';
66
67 $DEFAULT_NEWLINE = "\012"; # \n (used if get_newline() fails for some reason)
68
69 ## Static UTIL object.
70 $Util = {};
71 bless $Util, $ID;
72 $Util->{'_name'} = 'Static Utilities object';
73
74 ## POD Documentation:
75
76 =head1 NAME
77
78 Bio::Root::Utilities - General-purpose utility module
79
80 =head1 SYNOPSIS
81
82 =head2 Object Creation
83
84 use Bio::Root::Utilities qw(:obj);
85
86 There is no need to create a new Bio::Root::Utilities.pm object when
87 the C<:obj> tag is used. This tag will import the static $Util
88 object created by Bio::Root::Utilities.pm into your name space. This
89 saves you from having to call C<new Bio::Root::Utilities>.
90
91 You are free to not use the :obj tag and create the object as you
92 like, but a Bio::Root::Utilities object is not configurable; any given
93 script only needs a single copy.
94
95 $date_stamp = $Util->date_format('yyy-mm-dd');
96
97 $clean = $Util->untaint($dirty);
98
99 $Util->mail_authority("Something you should know about...");
100
101 ...and other methods. See below.
102
103 =head1 INSTALLATION
104
105 This module is included with the central Bioperl distribution:
106
107 http://bio.perl.org/Core/Latest
108 ftp://bio.perl.org/pub/DIST
109
110 Follow the installation instructions included in the README file.
111
112 =head1 DESCRIPTION
113
114 Provides general-purpose utilities of potential interest to any Perl script.
115 Scripts and modules are expected to use the static $Util object exported by
116 this package with the C<:obj> tag.
117
118 =head1 DEPENDENCIES
119
120 B<Bio::Root::Utilities.pm> inherits from B<Bio::Root::Object.pm>.
121 It also relies on the GNU gzip program for file compression/uncompression.
122
123 =head1 SEE ALSO
124
125 Bio::Root::Object.pm - Core object
126 Bio::Root::Global.pm - Manages global variables/constants
127
128 http://bio.perl.org/Projects/modules.html - Online module documentation
129 http://bio.perl.org/ - Bioperl Project Homepage
130
131 FileHandle.pm (included in the Perl distribution or CPAN).
132
133 =head1 FEEDBACK
134
135 =head2 Mailing Lists
136
137 User feedback is an integral part of the evolution of this and other Bioperl modules.
138 Send your comments and suggestions preferably to one of the Bioperl mailing lists.
139 Your participation is much appreciated.
140
141 bioperl-l@bioperl.org - General discussion
142 http://bioperl.org/MailList.shtml - About the mailing lists
143
144 =head2 Reporting Bugs
145
146 Report bugs to the Bioperl bug tracking system to help us keep track the bugs and
147 their resolution. Bug reports can be submitted via email or the web:
148
149 bioperl-bugs@bio.perl.org
150 http://bugzilla.bioperl.org/
151
152 =head1 AUTHOR
153
154 Steve Chervitz E<lt>sac@bioperl.orgE<gt>
155
156 See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
157
158 =head1 VERSION
159
160 Bio::Root::Utilities.pm, 0.042
161
162 =head1 ACKNOWLEDGEMENTS
163
164 This module was developed under the auspices of the Saccharomyces Genome
165 Database:
166 http://genome-www.stanford.edu/Saccharomyces
167
168 =head1 COPYRIGHT
169
170 Copyright (c) 1997-98 Steve Chervitz. All Rights Reserved.
171 This module is free software; you can redistribute it and/or
172 modify it under the same terms as Perl itself.
173
174 =cut
175
176 #
177 ##
178 ###
179 #### END of main POD documentation.
180 ###
181 ##
182 #'
183
184
185 =head1 APPENDIX
186
187 Methods beginning with a leading underscore are considered private
188 and are intended for internal use by this module. They are
189 B<not> considered part of the public interface and are described here
190 for documentation purposes only.
191
192 =cut
193
194
195 ############################################################################
196 ## INSTANCE METHODS ##
197 ############################################################################
198
199 =head2 date_format
200
201 Title : date_format
202 Usage : $Util->date_format( [FMT], [DATE])
203 Purpose : -- Get a string containing the formated date or time
204 : taken when this routine is invoked.
205 : -- Provides a way to avoid using `date`.
206 : -- Provides an interface to localtime().
207 : -- Interconverts some date formats.
208 :
209 : (For additional functionality, use Date::Manip or
210 : Date::DateCalc available from CPAN).
211 Example : $Util->date_format();
212 : $date = $Util->date_format('yyyy-mmm-dd', '11/22/92');
213 Returns : String (unless 'list' is provided as argument, see below)
214 :
215 : 'yyyy-mm-dd' = 1996-05-03 # default format.
216 : 'yyyy-dd-mm' = 1996-03-05
217 : 'yyyy-mmm-dd' = 1996-May-03
218 : 'd-m-y' = 3-May-1996
219 : 'd m y' = 3 May 1996
220 : 'dmy' = 3may96
221 : 'mdy' = May 3, 1996
222 : 'ymd' = 96may3
223 : 'md' = may3
224 : 'year' = 1996
225 : 'hms' = 23:01:59 # 'hms' can be tacked on to any of the above options
226 : # to add the time stamp: eg 'dmyhms'
227 : 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998
228 : 'list' = the contents of localtime(time) in an array.
229 Argument : (all are optional)
230 : FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd |
231 : mdy | ymd | md | d-m-y | hms | hm
232 : ('hms' may be appended to any of these to
233 : add a time stamp)
234 :
235 : DATE = String containing date to be converted.
236 : Acceptable input formats:
237 : 12/1/97 (for 1 December 1997)
238 : 1997-12-01
239 : 1997-Dec-01
240 Throws :
241 Comments : Relies on the $BASE_YEAR constant exported by Bio:Root::Global.pm.
242 :
243 : If you don't care about formatting or using backticks, you can
244 : always use: $date = `date`;
245 :
246 : For more features, use Date::Manip.pm, (which I should
247 : probably switch to...)
248
249 See Also : L<file_date>(), L<month2num>()
250
251 =cut
252
253 #---------------'
254 sub date_format {
255 #---------------
256 my $self = shift;
257 my $option = shift;
258 my $date = shift; # optional date to be converted.
259
260 my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
261
262 $option ||= 'yyyy-mm-dd';
263
264 my ($month_txt, $day_txt, $month_num, $fullYear);
265 my (@date);
266
267 # Load a supplied date for conversion:
268 if(defined($date) && ($date =~ /[\D-]+/)) {
269 if( $date =~ /\//) {
270 ($mon,$mday,$year) = split(/\//, $date);
271 } elsif($date =~ /(\d{4})-(\d{1,2})-(\d{1,2})/) {
272 ($year,$mon,$mday) = ($1, $2, $3);
273 } elsif($date =~ /(\d{4})-(\w{3,})-(\d{1,2})/) {
274 ($year,$mon,$mday) = ($1, $2, $3);
275 $mon = $self->month2num($2);
276 } else {
277 print STDERR "\n*** Unsupported input date format: $date\n";
278 }
279 if(length($year) == 4) { $year = substr $year, 2; }
280 $mon -= 1;
281 } else {
282 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = @date =
283 localtime(($date ? $date : time()));
284 return @date if $option =~ /list/i;
285 }
286 $month_txt = $MONTHS[$mon];
287 $day_txt = $DAYS[$wday] if defined $wday;
288 $month_num = $mon+1;
289 $fullYear = $BASE_YEAR+$year;
290
291 # print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>;
292
293 if( $option =~ /yyyy-mm-dd/i ) {
294 $date = sprintf "%4d-%02d-%02d",$fullYear,$month_num,$mday;
295 } elsif( $option =~ /yyyy-dd-mm/i ) {
296 $date = sprintf "%4d-%02d-%02d",$fullYear,$mday,$month_num;
297 } elsif( $option =~ /yyyy-mmm-dd/i ) {
298 $date = sprintf "%4d-%3s-%02d",$fullYear,$month_txt,$mday;
299 } elsif( $option =~ /full|unix/i ) {
300 $date = sprintf "%3s %3s %2d %02d:%02d:%02d %d",$day_txt, $month_txt, $mday, $hour, $min, $sec, $fullYear;
301 } elsif( $option =~ /mdy/i ) {
302 $date = "$month_txt $mday, $fullYear";
303 } elsif( $option =~ /ymd/i ) {
304 $date = $year."\l$month_txt$mday";
305 } elsif( $option =~ /dmy/i ) {
306 $date = $mday."\l$month_txt$year";
307 } elsif( $option =~ /md/i ) {
308 $date = "\l$month_txt$mday";
309 } elsif( $option =~ /d-m-y/i ) {
310 $date = "$mday-$month_txt-$fullYear";
311 } elsif( $option =~ /d m y/i ) {
312 $date = "$mday $month_txt $fullYear";
313 } elsif( $option =~ /year/i ) {
314 $date = $fullYear;
315 } elsif( $option =~ /dmy/i ) {
316 $date = $mday.'-'.$month_txt.'-'.$fullYear;
317 } elsif($option and $option !~ /hms/i) {
318 print STDERR "\n*** Unrecognized date format request: $option\n";
319 }
320
321 if( $option =~ /hms/i) {
322 $date .= " $hour:$min:$sec" if $date;
323 $date ||= "$hour:$min:$sec";
324 }
325
326 return $date || join(" ", @date);
327 }
328
329
330 =head2 month2num
331
332 Title : month2num
333 Purpose : Converts a string containing a name of a month to integer
334 : representing the number of the month in the year.
335 Example : $Util->month2num("march"); # returns 3
336 Argument : The string argument must contain at least the first
337 : three characters of the month's name. Case insensitive.
338 Throws : Exception if the conversion fails.
339
340 =cut
341
342 #--------------'
343 sub month2num {
344 #--------------
345
346 my ($self, $str) = @_;
347
348 # Get string in proper format for conversion.
349 $str = substr($str, 0, 3);
350 for(0..$#MONTHS) {
351 return $_+1 if $str =~ /$MONTHS[$_]/i;
352 }
353 $self->throw("Invalid month name: $str");
354 }
355
356 =head2 num2month
357
358 Title : num2month
359 Purpose : Does the opposite of month2num.
360 : Converts a number into a string containing a name of a month.
361 Example : $Util->num2month(3); # returns 'Mar'
362 Throws : Exception if supplied number is out of range.
363
364 =cut
365
366 #-------------
367 sub num2month {
368 #-------------
369 my ($self, $num) = @_;
370
371 $self->throw("Month out of range: $num") if $num < 1 or $num > 12;
372 return $MONTHS[$num];
373 }
374
375 =head2 compress
376
377 Title : compress
378 Usage : $Util->compress(filename, [tmp]);
379 Purpose : Compress a file to conserve disk space.
380 Example : $Util->compress("/usr/people/me/data.txt");
381 Returns : String (name of compressed file, full path).
382 Argument : filename = String (name of file to be compressed, full path).
383 : If the supplied filename ends with '.gz' or '.Z',
384 : that extension will be removed before attempting to compress.
385 : tmp = boolean,
386 : If true, (or if user is not the owner of the file)
387 : the file is compressed to a tmp file
388 : If false, file is clobbered with the compressed version.
389 Throws : Exception if file cannot be compressed
390 : If user is not owner of the file, generates a warning
391 : and compresses to a tmp file.
392 : To avoid this warning, use the -o file test operator
393 : and call this function with a true second argument.
394 Comments : Attempts to compress using gzip (default compression level).
395 : If that fails, will attempt to use compress.
396 : In some situations, the full path to the gzip executable
397 : may be required. This can be specified with the $GNU_PATH
398 : package global variable. When installed, $GNU_PATH is an
399 : empty string.
400
401 See Also : L<uncompress>()
402
403 =cut
404
405 #------------'
406 sub compress {
407 #------------
408 my $self = shift;
409 my $fileName = shift;
410 my $tmp = shift || 0;
411
412 if($fileName =~ /(\.gz|\.Z)$/) { $fileName =~ s/$1$//; };
413 $DEBUG && print STDERR "gzipping file $fileName";
414
415 my ($compressed, @args);
416
417 if($tmp or not -o $fileName) {
418 if($Loaded_POSIX) {
419 $compressed = POSIX::tmpnam;
420 } else {
421 $compressed = _get_pseudo_tmpnam();
422 }
423 $compressed .= ".tmp.bioperl";
424 $compressed .= '.gz';
425 @args = ($GNU_PATH."gzip -f < $fileName > $compressed");
426 not $tmp and
427 $self->warn("Not owner of file $fileName\nCompressing to tmp file $compressed.");
428 $tmp = 1;
429 } else {
430 $compressed = "$fileName.gz";
431 @args = ($GNU_PATH.'gzip', '-f', $fileName);
432 }
433
434 if(system(@args) != 0) {
435 # gzip may not be present. Try compress.
436 $compressed = "$fileName.Z";
437 if($tmp) {
438 @args = ("/usr/bin/compress -f < $fileName > $compressed");
439 } else {
440 @args = ('/usr/bin/compress', '-f', $fileName);
441 }
442 system(@args) == 0 or
443 $self->throw("Failed to gzip/compress file $fileName: $!",
444 "Confirm current \$GNU_PATH: $GNU_PATH",
445 "Edit \$GNU_PATH in Bio::Root::Utilities.pm if necessary.");
446 }
447
448 return $compressed;
449 }
450
451
452 =head2 uncompress
453
454 Title : uncompress
455 Usage : $Util->uncompress(filename, [tmp]);
456 Purpose : Uncompress a file.
457 Example : $Util->uncompress("/usr/people/me/data.txt.gz");
458 Returns : String (name of uncompressed file, full path).
459 Argument : filename = String (name of file to be uncompressed, full path).
460 : If the supplied filename does not end with '.gz' or '.Z'
461 : a '.gz' will be appended before attempting to uncompress.
462 : tmp = boolean,
463 : If true, (or if user is not the owner of the file)
464 : the file is uncompressed to a tmp file
465 : If false, file is clobbered with the uncompressed version.
466 Throws : Exception if file cannot be uncompressed
467 : If user is not owner of the file, generates a warning
468 : and uncompresses to a tmp file.
469 : To avoid this warning, use the -o file test operator
470 : and call this function with a true second argument.
471 Comments : Attempts to uncompress using gunzip.
472 : If that fails, will use uncompress.
473 : In some situations, the full path to the gzip executable
474 : may be required. This can be specified with the $GNU_PATH
475 : package global variable. When installed, $GNU_PATH is an
476 : empty string.
477
478 See Also : L<compress>()
479
480 =cut
481
482 #---------------
483 sub uncompress {
484 #---------------
485 my $self = shift;
486 my $fileName = shift;
487 my $tmp = shift || 0;
488
489 if(not $fileName =~ /(\.gz|\.Z)$/) { $fileName .= '.gz'; }
490 $DEBUG && print STDERR "gunzipping file $fileName";
491
492 my($uncompressed, @args);
493
494 if($tmp or not -o $fileName) {
495 if($Loaded_POSIX) {
496 $uncompressed = POSIX::tmpnam;
497 } else {
498 $uncompressed = _get_pseudo_tmpnam();
499 }
500 $uncompressed .= ".tmp.bioperl";
501 @args = ($GNU_PATH."gunzip -f < $fileName > $uncompressed");
502 not $tmp and $self->verbose > 0 and
503 $self->warn("Not owner of file $fileName\nUncompressing to tmp file $uncompressed.");
504 $tmp = 1;
505 } else {
506 @args = ($GNU_PATH.'gunzip', '-f', $fileName);
507 ($uncompressed = $fileName) =~ s/(\.gz|\.Z)$//;
508 }
509
510 # $ENV{'PATH'} = '/tools/gnu/bin';
511
512 if(system(@args) != 0) {
513 # gunzip may not be present. Try uncompress.
514 ($uncompressed = $fileName) =~ s/(\.gz|\.Z)$//;
515 if($tmp) {
516 @args = ("/usr/bin/uncompress -f < $fileName > $uncompressed");
517 } else {
518 @args = ('/usr/bin/uncompress', '-f', $fileName);
519 }
520 system(@args) == 0 or
521 $self->throw("Failed to gunzip/uncompress file $fileName: $!",
522 "Confirm current \$GNU_PATH: $GNU_PATH",
523 "Edit \$GNU_PATH in Bio::Root::Utilities.pm if necessary.");
524 }
525
526 return $uncompressed;
527 }
528
529
530 =head2 file_date
531
532 Title : file_date
533 Usage : $Util->file_date( filename [,date_format])
534 Purpose : Obtains the date of a given file.
535 : Provides flexible formatting via date_format().
536 Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15)
537 Argument : filename = string, full path name for file
538 : date_format = string, desired format for date (see date_format()).
539 : Default = yyyy-mm-dd
540 Thows : Exception if no file is provided or does not exist.
541 Comments : Uses the mtime field as obtained by stat().
542
543 =cut
544
545 #--------------
546 sub file_date {
547 #--------------
548 my ($self, $file, $fmt) = @_;
549
550 $self->throw("No such file: $file") if not $file or not -e $file;
551
552 $fmt ||= 'yyyy-mm-dd';
553
554 my @file_data = stat($file);
555 return $self->date_format($fmt, $file_data[9]); # mtime field
556 }
557
558
559 =head2 untaint
560
561 Title : untaint
562 Purpose : To remove nasty shell characters from untrusted data
563 : and allow a script to run with the -T switch.
564 : Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r
565 : Accept only the first block of contiguous characters:
566 : Default allowed chars = "-\w.', ()"
567 : If $relax is true = "-\w.', ()\/=%:^<>*"
568 Usage : $Util->untaint($value, $relax)
569 Returns : String containing the untained data.
570 Argument: $value = string
571 : $relax = boolean
572 Comments:
573 This general untaint() function may not be appropriate for every situation.
574 To allow only a more restricted subset of special characters
575 (for example, untainting a regular expression), then using a custom
576 untainting mechanism would permit more control.
577
578 Note that special trusted vars (like $0) require untainting.
579
580 =cut
581
582 #------------`
583 sub untaint {
584 #------------
585 my($self,$value,$relax) = @_;
586 $relax ||= 0;
587 my $untainted;
588
589 $DEBUG and print STDERR "\nUNTAINT: $value\n";
590
591 defined $value || return;
592
593 if( $relax ) {
594 $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
595 $untainted = $1
596 # } elsif( $relax == 2 ) { # Could have several degrees of relax.
597 # $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
598 # $untainted = $1
599 } else {
600 $value =~ /([-\w.\', ()]+)/;
601 $untainted = $1
602 }
603
604 $DEBUG and print STDERR "UNTAINTED: $untainted\n";
605
606 $untainted;
607 }
608
609
610 =head2 mean_stdev
611
612 Title : mean_stdev
613 Usage : ($mean, $stdev) = $Util->mean_stdev( @data )
614 Purpose : Calculates the mean and standard deviation given a list of numbers.
615 Returns : 2-element list (mean, stdev)
616 Argument : list of numbers (ints or floats)
617 Thows : n/a
618
619 =cut
620
621 #---------------
622 sub mean_stdev {
623 #---------------
624 my ($self, @data) = @_;
625 my $mean = 0;
626 foreach (@data) { $mean += $_; }
627 $mean /= scalar @data;
628 my $sum_diff_sqd = 0;
629 foreach (@data) { $sum_diff_sqd += ($mean - $_) * ($mean - $_); }
630 my $stdev = sqrt(abs($sum_diff_sqd/(scalar @data)-1));
631 return ($mean, $stdev);
632 }
633
634
635 =head2 count_files
636
637 Title : count_files
638 Purpose : Counts the number of files/directories within a given directory.
639 : Also reports the number of text and binary files in the dir
640 : as well as names of these files and directories.
641 Usage : count_files(\%data)
642 : $data{-DIR} is the directory to be analyzed. Default is ./
643 : $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0).
644 Argument : Hash reference (empty)
645 Returns : n/a;
646 : Modifies the hash ref passed in as the sole argument.
647 : $$href{-TOTAL} scalar
648 : $$href{-NUM_TEXT_FILES} scalar
649 : $$href{-NUM_BINARY_FILES} scalar
650 : $$href{-NUM_DIRS} scalar
651 : $$href{-T_FILE_NAMES} array ref
652 : $$href{-B_FILE_NAMES} array ref
653 : $$href{-DIRNAMES} array ref
654
655 =cut
656
657 #----------------
658 sub count_files {
659 #----------------
660 my $self = shift;
661 my $href = shift; # Reference to an empty hash.
662 my( $name, @fileLine);
663 my $dir = $$href{-DIR} || './';
664 my $print = $$href{-PRINT} || 0;
665
666 ### Make sure $dir ends with /
667 $dir !~ /\/$/ and do{ $dir .= '/'; $$href{-DIR} = $dir; };
668
669 open ( PIPE, "ls -1 $dir |" ) || $self->throw("Can't open input pipe: $!");
670
671 ### Initialize the hash data.
672 $$href{-TOTAL} = 0;
673 $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} = $$href{-NUM_DIRS} = 0;
674 $$href{-T_FILE_NAMES} = [];
675 $$href{-B_FILE_NAMES} = [];
676 $$href{-DIR_NAMES} = [];
677 while( <PIPE> ) {
678 chomp();
679 $$href{-TOTAL}++;
680 if( -T $dir.$_ ) {
681 $$href{-NUM_TEXT_FILES}++; push @{$$href{-T_FILE_NAMES}}, $_; }
682 if( -B $dir.$_ and not -d $dir.$_) {
683 $$href{-NUM_BINARY_FILES}++; push @{$$href{-B_FILE_NAMES}}, $_; }
684 if( -d $dir.$_ ) {
685 $$href{-NUM_DIRS}++; push @{$$href{-DIR_NAMES}}, $_; }
686 }
687 close PIPE;
688
689 if( $print) {
690 printf( "\n%4d %s\n", $$href{-TOTAL}, "total files+dirs in $dir");
691 printf( "%4d %s\n", $$href{-NUM_TEXT_FILES}, "text files");
692 printf( "%4d %s\n", $$href{-NUM_BINARY_FILES}, "binary files");
693 printf( "%4d %s\n", $$href{-NUM_DIRS}, "directories");
694 }
695 }
696
697
698 #=head2 file_info
699 #
700 # Title : file_info
701 # Purpose : Obtains a variety of date for a given file.
702 # : Provides an interface to Perl's stat().
703 # Status : Under development. Not ready. Don't use!
704 #
705 #=cut
706
707 #--------------
708 sub file_info {
709 #--------------
710 my ($self, %param) = @_;
711 my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
712 $get ||= 'all';
713 $fmt ||= 'yyyy-mm-dd';
714
715 my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
716 $atime, $mtime, $ctime, $blksize, $blocks) = stat $file;
717
718 if($get =~ /date/i) {
719 ## I can get the elapsed time since the file was modified but
720 ## it's not so straightforward to get the date in a nice format...
721 ## Think about using a standard CPAN module for this, like
722 ## Date::Manip or Date::DateCalc.
723
724 my $date = $mtime;
725 my $elsec = time - $mtime;
726 printf "\nFile age: %.0f sec %.0f hrs %.0f days", $elsec, $elsec/3600, $elsec/(3600*24);<STDIN>;
727 my $days = sprintf "%.0f", $elsec/(3600*24);
728 } elsif($get eq 'all') {
729 return stat $file;
730 }
731 }
732
733
734 #------------
735 sub delete {
736 #------------
737 my $self = shift;
738 my $fileName = shift;
739 if(not -e $fileName) {
740 $self->throw("Can't delete file $fileName: Does not exist.");
741 } elsif(not -o $fileName) {
742 $self->throw("Can't delete file $fileName: Not owner.");
743 }
744 my $ulval = unlink($fileName) > 0 or
745 $self->throw("Failed to delete file $fileName: $!");
746 }
747
748
749 =head2 create_filehandle
750
751 Usage : $object->create_filehandle(<named parameters>);
752 Purpose : Create a FileHandle object from a file or STDIN.
753 : Mainly used as a helper method by read() and get_newline().
754 Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt')
755 Argument : Named parameters (case-insensitive):
756 : (all optional)
757 : -CLIENT => object reference for the object submitting
758 : the request. This facilitates use by
759 : Bio::Root::IOManager::read(). Default = $Util.
760 : -FILE => string (full path to file) or a reference
761 : to a FileHandle object or typeglob. This is an
762 : optional parameter (if not defined, STDIN is used).
763 Returns : Reference to a FileHandle object.
764 Throws : Exception if cannot open a supplied file or if supplied with a
765 : reference that is not a FileHandle ref.
766 Comments : If given a FileHandle reference, this method simply returns it.
767 : This method assumes the user wants to read ascii data. So, if
768 : the file is binary, it will be treated as a compressed (gzipped)
769 : file and access it using gzip -ce. The problem here is that not
770 : all binary files are necessarily compressed. Therefore,
771 : this method should probably have a -mode parameter to
772 : specify ascii or binary.
773
774 See Also : L<get_newline>(), L<Bio::Root::IOManager::read>(),
775
776 =cut
777
778 #---------------------
779 sub create_filehandle {
780 #---------------------
781 my($self, @param) = @_;
782 my($client, $file, $handle) =
783 $self->_rearrange([qw( CLIENT FILE HANDLE )], @param);
784
785 if(not ref $client) { $client = $self; }
786 $file ||= $handle;
787 if( $client->can('file')) {
788 $file = $client->file($file);
789 }
790
791 my $FH; # = new FileHandle;
792
793 my ($handle_ref);
794
795 if($handle_ref = ref($file)) {
796 if($handle_ref eq 'FileHandle') {
797 $FH = $file;
798 $client->{'_input_type'} = "FileHandle";
799 } elsif($handle_ref eq 'GLOB') {
800 $FH = $file;
801 $client->{'_input_type'} = "Glob";
802 } else {
803 $self->throw("Can't read from $file: Not a FileHandle or GLOB ref.");
804 }
805 $self->verbose > 0 and printf STDERR "$ID: reading data from FileHandle\n";
806
807 } elsif($file) {
808 $client->{'_input_type'} = "FileHandle for $file";
809
810 # Use gzip -cd to access compressed data.
811 if( -B $file ) {
812 $client->{'_input_type'} .= " (compressed)";
813 $file = "${GNU_PATH}gzip -cd $file |"
814 }
815
816 $FH = new FileHandle;
817 open ($FH, $file) || $self->throw("Can't access data file: $file",
818 "$!");
819 $self->verbose > 0 and printf STDERR "$ID: reading data from file $file\n";
820
821 } else {
822 # Read from STDIN.
823 $FH = \*STDIN;
824 $self->verbose > 0 and printf STDERR "$ID: reading data from STDIN\n";
825 $client->{'_input_type'} = "STDIN";
826 }
827
828 return $FH;
829 }
830
831 =head2 get_newline
832
833 Usage : $object->get_newline(<named parameters>);
834 Purpose : Determine the character(s) used for newlines in a given file or
835 : input stream. Delegates to Bio::Root::Utilities::get_newline()
836 Example : $data = $object->get_newline(-CLIENT => $anObj,
837 : -FILE =>'usr/people/me/data.txt')
838 Argument : Same arguemnts as for create_filehandle().
839 Returns : Reference to a FileHandle object.
840 Throws : Propogates and exceptions thrown by Bio::Root::Utilities::get_newline().
841
842 See Also : L<taste_file>(), L<create_filehandle>()
843
844 =cut
845
846 #-----------------
847 sub get_newline {
848 #-----------------
849 my($self, @param) = @_;
850
851 return $NEWLINE if defined $NEWLINE;
852
853 my($client ) =
854 $self->_rearrange([qw( CLIENT )], @param);
855
856 my $FH = $self->create_filehandle(@param);
857
858 if(not ref $client) { $client = $self; }
859
860 if($client->{'_input_type'} =~ /STDIN|Glob|compressed/) {
861 # Can't taste from STDIN since we can't seek 0 on it.
862 # Are other non special Glob refs seek-able?
863 # Attempt to guess newline based on platform.
864 # Not robust since we could be reading Unix files on a Mac, e.g.
865 if(defined $ENV{'MACPERL'}) {
866 $NEWLINE = "\015"; # \r
867 } else {
868 $NEWLINE = "\012"; # \n
869 }
870 } else {
871 $NEWLINE = $self->taste_file($FH);
872 }
873
874 close ($FH) unless ($client->{'_input_type'} eq 'STDIN' ||
875 $client->{'_input_type'} eq 'FileHandle' ||
876 $client->{'_input_type'} eq 'Glob' );
877
878 delete $client->{'_input_type'};
879
880 return $NEWLINE || $DEFAULT_NEWLINE;
881 }
882
883
884 =head2 taste_file
885
886 Usage : $object->taste_file( <FileHandle> );
887 : Mainly a utility method for get_newline().
888 Purpose : Sample a filehandle to determine the character(s) used for a newline.
889 Example : $char = $Util->taste_file($FH)
890 Argument : Reference to a FileHandle object.
891 Returns : String containing an octal represenation of the newline character string.
892 : Unix = "\012" ("\n")
893 : Win32 = "\012\015" ("\r\n")
894 : Mac = "\015" ("\r")
895 Throws : Exception if no input is read within $TIMEOUT_SECS seconds.
896 : Exception if argument is not FileHandle object reference.
897 : Warning if cannot determine neewline char(s).
898 Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com).
899
900 See Also : L<get_newline>()
901
902 =cut
903
904 #---------------
905 sub taste_file {
906 #---------------
907 my ($self, $FH) = @_;
908 my $BUFSIZ = 256; # Number of bytes read from the file handle.
909 my ($buffer, $octal, $str, $irs, $i);
910 my $wait = $TIMEOUT_SECS;
911
912 ref($FH) eq 'FileHandle' or $self->throw("Can't taste file: not a FileHandle ref");
913
914 $buffer = '';
915
916 # this is a quick hack to check for availability of alarm(); just copied
917 # from Bio/Root/IOManager.pm HL 02/19/01
918 my $alarm_available = 1;
919 eval {
920 alarm(0);
921 };
922 if($@) {
923 # alarm() not available (ActiveState perl for win32 doesn't have it.
924 # See jitterbug PR#98)
925 $alarm_available = 0;
926 }
927 $SIG{ALRM} = sub { die "Timed out!"; };
928 my $result;
929 eval {
930 $alarm_available && alarm( $wait );
931 $result = read($FH, $buffer, $BUFSIZ); # read the $BUFSIZ characters of file
932 $alarm_available && alarm(0);
933 };
934 if($@ =~ /Timed out!/) {
935 $self->throw("Timed out while waiting for input.",
936 "Timeout period = $wait seconds.\nFor longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Global.pm.");
937
938 } elsif(not $result) {
939 my $err = $@;
940 $self->throw("read taste failed to read from FileHandle.", $err);
941
942 } elsif($@ =~ /\S/) {
943 my $err = $@;
944 $self->throw("Unexpected error during read: $err");
945 }
946
947 seek($FH, 0, 0) or $self->throw("seek failed to seek 0 on FileHandle.");
948
949 my @chars = split(//, $buffer);
950
951 for ($i = 0; $i <$BUFSIZ; $i++) {
952 if (($chars[$i] eq "\012")) {
953 unless ($chars[$i-1] eq "\015") {
954 # Unix
955 $octal = "\012";
956 $str = '\n';
957 $irs = "^J";
958 last;
959 }
960 } elsif (($chars[$i] eq "\015") && ($chars[$i+1] eq "\012")) {
961 # DOS
962 $octal = "\015\012";
963 $str = '\r\n';
964 $irs = "^M^J";
965 last;
966 } elsif (($chars[$i] eq "\015")) {
967 # Mac
968 $octal = "\015";
969 $str = '\r';
970 $irs = "^M";
971 last;
972 }
973 }
974 if (not $octal) {
975 $self->warn("Could not determine newline char. Using '\012'");
976 $octal = "\012";
977 } else {
978 # print STDERR "NEWLINE CHAR = $irs\n";
979 }
980 return($octal);
981 }
982
983 ######################################
984 ##### Mail Functions ########
985 ######################################
986
987 =head2 mail_authority
988
989 Title : mail_authority
990 Usage : $Util->mail_authority( $message )
991 Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY
992
993 See Also : L<send_mail>()
994
995 =cut
996
997 sub mail_authority {
998
999 my( $self, $message ) = @_;
1000 my $script = $self->untaint($0,1);
1001
1002 send_mail( -TO=>$AUTHORITY, -SUBJ=>$script, -MSG=>$message);
1003
1004 }
1005
1006
1007 =head2 send_mail
1008
1009 Title : send_mail
1010 Usage : $Util->send_mail( named_parameters )
1011 Purpose : Provides an interface to /usr/lib/sendmail
1012 Returns : n/a
1013 Argument : Named parameters: (case-insensitive)
1014 : -TO => e-mail address to send to
1015 : -SUBJ => subject for message (optional)
1016 : -MSG => message to be sent (optional)
1017 : -CC => cc: e-mail address (optional)
1018 Thows : Exception if TO: address appears bad or is missing
1019 Comments : Based on TomC's tip at:
1020 : http://www.perl.com/CPAN-local/doc/FMTEYEWTK/safe_shellings
1021 :
1022 : Using default 'From:' information.
1023 : sendmail options used:
1024 : -t: ignore the address given on the command line and
1025 : get To:address from the e-mail header.
1026 : -oi: prevents send_mail from ending the message if it
1027 : finds a period at the start of a line.
1028
1029 See Also : L<mail_authority>()
1030
1031 =cut
1032
1033
1034 #-------------'
1035 sub send_mail {
1036 #-------------
1037 my( $self, @param) = @_;
1038 my($recipient,$subj,$message,$cc) = $self->_rearrange([qw(TO SUBJ MSG CC)],@param);
1039
1040 $self->throw("Invalid or missing e-mail address: $recipient")
1041 if not $recipient =~ /\S+\@\S+/;
1042
1043 $cc ||= ''; $subj ||= ''; $message ||= '';
1044
1045 open (SENDMAIL, "|/usr/lib/sendmail -oi -t") ||
1046 $self->throw("Can't send mail: sendmail cannot fork: $!");
1047
1048 print SENDMAIL <<QQ_EOF_QQ;
1049 To: $recipient
1050 Subject: $subj
1051 Cc: $cc
1052
1053 $message
1054
1055 QQ_EOF_QQ
1056
1057 close(SENDMAIL);
1058 if ($?) { warn "sendmail didn't exit nicely: $?" }
1059 }
1060
1061
1062 ######################################
1063 ### Interactive Functions #####
1064 ######################################
1065
1066
1067 =head2 yes_reply
1068
1069 Title : yes_reply()
1070 Usage : $Util->yes_reply( [query_string]);
1071 Purpose : To test an STDIN input value for affirmation.
1072 Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" );
1073 : $Util->yes_reply('Continue') || die;
1074 Returns : Boolean, true (1) if input string begins with 'y' or 'Y'
1075 Argument: query_string = string to be used to prompt user (optional)
1076 : If not provided, 'Yes or no' will be used.
1077 : Question mark is automatically appended.
1078
1079 =cut
1080
1081 #-------------
1082 sub yes_reply {
1083 #-------------
1084 my $self = shift;
1085 my $query = shift;
1086 my $reply;
1087 $query ||= 'Yes or no';
1088 print "\n$query? (y/n) [n] ";
1089 chomp( $reply = <STDIN> );
1090 $reply =~ /^y/i;
1091 }
1092
1093
1094
1095 =head2 request_data
1096
1097 Title : request_data()
1098 Usage : $Util->request_data( [value_name]);
1099 Purpose : To request data from a user to be entered via keyboard (STDIN).
1100 Example : $name = $Util->request_data('Name');
1101 : # User will see: % Enter Name:
1102 Returns : String, (data entered from keyboard, sans terminal newline.)
1103 Argument: value_name = string to be used to prompt user.
1104 : If not provided, 'data' will be used, (not very helpful).
1105 : Question mark is automatically appended.
1106
1107 =cut
1108
1109 #----------------
1110 sub request_data {
1111 #----------------
1112 my $self = shift;
1113 my $data = shift || 'data';
1114 print "Enter $data: ";
1115 # Remove the terminal newline char.
1116 chomp($data = <STDIN>);
1117 $data;
1118 }
1119
1120 sub quit_reply {
1121 # Not much used since you can use request_data()
1122 # and test for an empty string.
1123 my $self = shift;
1124 my $reply;
1125 chop( $reply = <STDIN> );
1126 $reply =~ /^q.*/i;
1127 }
1128
1129
1130 =head2 verify_version
1131
1132 Purpose : Checks the version of Perl used to invoke the script.
1133 : Aborts program if version is less than the given argument.
1134 Usage : verify_version('5.000')
1135
1136 =cut
1137
1138 #------------------
1139 sub verify_version {
1140 #------------------
1141 my $self = shift;
1142 my $reqVersion = shift;
1143
1144 $] < $reqVersion and do {
1145 printf STDERR ( "\a\n%s %0.3f.\n", "** Sorry. This Perl script requires at least version", $reqVersion);
1146 printf STDERR ( "%s %0.3f %s\n\n", "You are running Perl version", $], "Please update your Perl!\n\n" );
1147 exit(1);
1148 }
1149 }
1150
1151 # Purpose : Returns a string that can be used as a temporary file name.
1152 # Based on localtime.
1153 # This is used if POSIX is not available.
1154
1155 sub _get_pseudo_tmpnam {
1156
1157 my $date = localtime(time());
1158
1159 my $tmpnam = 'tmpnam';
1160
1161 if( $date =~ /([\d:]+)\s+(\d+)\s*$/ ) {
1162 $tmpnam = $2. '_' . $1;
1163 $tmpnam =~ s/:/_/g;
1164 }
1165 return $tmpnam;
1166 }
1167
1168
1169 1;
1170 __END__
1171
1172 MODIFICATION NOTES:
1173 ---------------------
1174
1175 17 Feb 1999, sac:
1176 * Using global $TIMEOUT_SECS in taste_file().
1177
1178 13 Feb 1999, sac:
1179 * Renamed get_newline_char() to get_newline() since it could be >1 char.
1180
1181 3 Feb 1999, sac:
1182 * Added three new methods: create_filehandle, get_newline_char, taste_file.
1183 create_filehandle represents functionality that was formerly buried
1184 within Bio::Root::IOManager::read().
1185
1186 2 Dec 1998, sac:
1187 * Removed autoloading code.
1188 * Modified compress(), uncompress(), and delete() to properly
1189 deal with file ownership issues.
1190
1191 3 Jun 1998, sac:
1192 * Improved file_date() to be less reliant on the output of ls.
1193 (Note the word 'less'; it still relies on ls).
1194
1195 5 Jul 1998, sac:
1196 * compress() & uncompress() will write files to a temporary location
1197 if the first attempt to compress/uncompress fails.
1198 This allows users to access compressed files in directories in which they
1199 lack write permission.
1200
1201
1202