| 4 | 1 #!/usr/bin/env perl | 
|  | 2 ## | 
|  | 3 ## Sort-header - wrapper for GNU sort with header-line support | 
|  | 4 ## | 
|  | 5 ## Copyright(C) A. Gordon | 
|  | 6 ## license AGPLv3+ | 
|  | 7 ## | 
|  | 8 use strict; | 
|  | 9 use warnings; | 
|  | 10 use Data::Dumper; | 
|  | 11 use IO::Handle; | 
|  | 12 use Getopt::Long qw(:config bundling no_ignore_case_always); | 
|  | 13 | 
|  | 14 ## Forward declarations | 
|  | 15 sub add_standard_sort_param(@); | 
|  | 16 sub add_standard_sort_param_value(@); | 
|  | 17 sub forbidden_sort_param(@); | 
|  | 18 sub show_help(); | 
|  | 19 sub show_version(); | 
|  | 20 sub show_examples(); | 
|  | 21 sub parse_commandline_options(); | 
|  | 22 sub reassign_input_output(); | 
|  | 23 sub process_header_lines(); | 
|  | 24 sub run_sort(); | 
|  | 25 sub read_line_non_buffered(); | 
|  | 26 | 
|  | 27 | 
|  | 28 ## | 
|  | 29 ## Runtime options | 
|  | 30 ## | 
|  | 31 my $PROGRAM="sort-header"; | 
|  | 32 my $VERSION=0.4; | 
|  | 33 | 
|  | 34 my $check_only=undef; | 
|  | 35 my $input_file=undef; | 
|  | 36 my $output_file=undef; | 
|  | 37 my $field_separator=undef; | 
|  | 38 my $header_lines =1 ; | 
|  | 39 my $debug=undef; | 
|  | 40 my $sort_exit_code=1; #by default, assume some error | 
|  | 41 | 
|  | 42 my @sort_options; | 
|  | 43 | 
|  | 44 ## | 
|  | 45 ## Program Start | 
|  | 46 ## | 
|  | 47 parse_commandline_options(); | 
|  | 48 reassign_input_output(); | 
|  | 49 process_header_lines(); | 
|  | 50 run_sort(); | 
|  | 51 exit($sort_exit_code); | 
|  | 52 ## | 
|  | 53 ## Program End | 
|  | 54 ## | 
|  | 55 | 
|  | 56 sub show_examples() | 
|  | 57 { | 
|  | 58 print<<EOF; | 
|  | 59 Sorting a file with a header line: | 
|  | 60 | 
|  | 61 \$ cat input.txt | 
|  | 62 Fruit	Color	Price | 
|  | 63 Banana	Yellow	4.1 | 
|  | 64 Avocado	Green	8.0 | 
|  | 65 Apple	Red	3.0 | 
|  | 66 Melon	Green	6.1 | 
|  | 67 | 
|  | 68 # By default, 'sort-header' assumes 1 header line | 
|  | 69 # (no need to use --header in this case). | 
|  | 70 | 
|  | 71 \$ sort-header -k3,3nr input.txt | 
|  | 72 Fruit	Color	Price | 
|  | 73 Avocado	Green	8.0 | 
|  | 74 Melon	Green	6.1 | 
|  | 75 Banana	Yellow	4.1 | 
|  | 76 Apple	Red	3.0 | 
|  | 77 | 
|  | 78 EOF | 
|  | 79 	exit(0); | 
|  | 80 } | 
|  | 81 | 
|  | 82 sub show_help() | 
|  | 83 { | 
|  | 84 print<<EOF; | 
|  | 85 ${PROGRAM}: Wrapper for GNU sort, allowing sorting files with header lines. | 
|  | 86 | 
|  | 87 Usage: $PROGRAM [HEADER-OPTIONS] [GNU sort Options] [INPUT-FILE] | 
|  | 88 | 
|  | 89 HEADER-OPTIONS: the following options are supported by '${PROGRAM}': | 
|  | 90 | 
|  | 91    --header N    =  Treat the first N lines as header lines. | 
|  | 92                     These line will NOT be sorted. They will be passed | 
|  | 93 		    directly to the output file. (default: 1) | 
|  | 94 | 
|  | 95    --version     =  Print ${PROGRAM}'s version. | 
|  | 96 | 
|  | 97    --debugheader =  Print debug messages (relating to ${PROGRAM}'s operation). | 
|  | 98 | 
|  | 99    --help        =  Show this help screen. | 
|  | 100 | 
|  | 101    --examples    =  Show usage examples. | 
|  | 102 | 
|  | 103 GNU sort options: | 
|  | 104    Most of the standard GNU sort options are supported and passed to GNU sort. | 
|  | 105    The following options can not be used with '${PROGRAM}': | 
|  | 106 | 
|  | 107    -m --merge           => ${PROGRAM} can only sort one file, not merge multiple files. | 
|  | 108    -c -C --check        => Currently not supported | 
|  | 109    --files0-from        => Currently not supported | 
|  | 110    -z --zero-terminated => Currently not supported | 
|  | 111 | 
|  | 112 INPUT-FILE: | 
|  | 113    If INPUT-FILE is not specified, $PROGRAM will use STDIN (just like GNU sort). | 
|  | 114 | 
|  | 115 EOF | 
|  | 116 	exit(0); | 
|  | 117 } | 
|  | 118 | 
|  | 119 sub show_version() | 
|  | 120 { | 
|  | 121 print<<EOF; | 
|  | 122 $PROGRAM $VERSION | 
|  | 123 Copyright (C) 2010 A. Gordon (gordon\@cshl.edu) | 
|  | 124 License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html) | 
|  | 125 | 
|  | 126 To see the GNU's sort version, run: | 
|  | 127 	sort --version | 
|  | 128 EOF | 
|  | 129 	exit(0); | 
|  | 130 } | 
|  | 131 | 
|  | 132 sub parse_commandline_options() | 
|  | 133 { | 
|  | 134 	my $rc = GetOptions( | 
|  | 135 		"ignore-leading-blanks|b" => \&add_standard_sort_param, | 
|  | 136 		"dictionary-order|d" => \&add_standard_sort_param, | 
|  | 137 		"ignore-case|f" => \&add_standard_sort_param, | 
|  | 138 		"general-numeric-sort|g" => \&add_standard_sort_param, | 
|  | 139 		"ignore-nonprinting|i" => \&add_standard_sort_param, | 
|  | 140 		"month-sort|M" => \&add_standard_sort_param, | 
|  | 141 		"human-numeric-sort|h" => \&add_standard_sort_param, | 
|  | 142 		"numeric-sort|n" => \&add_standard_sort_param, | 
|  | 143 		"random-source=s" => \&add_standard_sort_param_value, | 
|  | 144 		"random-sort|R" => \&add_standard_sort_param, | 
|  | 145 		"reverse|r" => \&add_standard_sort_param, | 
|  | 146 		"sort=s" => \&add_standard_sort_param_value, | 
|  | 147 		"version-sort|V" => \&add_standard_sort_param, | 
|  | 148 | 
|  | 149 		"check|c" => \&forbidden_sort_param, | 
|  | 150 		"C" => \&forbidden_sort_param, | 
|  | 151 		"compress-program=s" => \&add_standard_sort_param_value, | 
|  | 152 		"debug" => \&add_standard_sort_param, | 
|  | 153 | 
|  | 154 		"files0-from=s" => \&forbidden_sort_param, | 
|  | 155 | 
|  | 156 		"key|k=s" => \&add_standard_sort_param_value, | 
|  | 157 		"merge|m" => \&forbidden_sort_param, | 
|  | 158 		"batch-size=i" => \&forbidden_sort_param, | 
|  | 159 | 
|  | 160 		"parallel=i" => \&add_standard_sort_param_value, | 
|  | 161 | 
|  | 162 		"output|o=s" => \$output_file, | 
|  | 163 | 
|  | 164 		"stable|s" => \&add_standard_sort_param, | 
|  | 165 		"buffer-size|S=s" => \&add_standard_sort_param_value, | 
|  | 166 | 
|  | 167 		"field-separator|t=s" => \&add_standard_sort_param_value, | 
|  | 168 		"temporary-directory|T=s" => \&add_standard_sort_param_value, | 
|  | 169 		"unique|u" => \&add_standard_sort_param, | 
|  | 170 | 
|  | 171 		"zero-terminated|z" => \&forbidden_sort_param, | 
|  | 172 | 
|  | 173 		"help" => \&show_help, | 
|  | 174 		"version" => \&show_version, | 
|  | 175 		"examples" => \&show_examples, | 
|  | 176 | 
|  | 177 		"header=i" => \$header_lines, | 
|  | 178 		"debugheader" => \$debug, | 
|  | 179 		); | 
|  | 180 | 
|  | 181 	exit 1 unless $rc; | 
|  | 182 | 
|  | 183 	my @INPUT_FILES = @ARGV; | 
|  | 184 | 
|  | 185 	die "$PROGRAM: error: invalid number of header lines ($header_lines)\n" unless $header_lines>=0; | 
|  | 186 	die "$PROGRAM: error: Multiple input files specified. This program can sort only a signle file.\n" if (scalar(@INPUT_FILES)>1); | 
|  | 187 	$input_file = shift @INPUT_FILES if scalar(@INPUT_FILES)==1; | 
|  | 188 | 
|  | 189 	if ($debug) { | 
|  | 190 		warn "$PROGRAM: number of header lines = $header_lines\n"; | 
|  | 191 		warn "$PROGRAM: PASS-to-Sort options:\n", Dumper(\@sort_options), "\n"; | 
|  | 192 	} | 
|  | 193 } | 
|  | 194 | 
|  | 195 sub reassign_input_output() | 
|  | 196 { | 
|  | 197 	if ($output_file) { | 
|  | 198 		warn "$PROGRAM: Re-assigning STDOUT to '$output_file'\n" if $debug; | 
|  | 199 		open OUTPUT, '>', $output_file or die "$PROGRAM: Error: failed to create output file '$output_file': $!\n"; | 
|  | 200 		STDOUT->fdopen(\*OUTPUT, 'w') or die "$PROGRAM: Error: failed to reassign STDOUT to '$output_file': $!\n"; | 
|  | 201 	} | 
|  | 202 | 
|  | 203 | 
|  | 204 	if ($input_file) { | 
|  | 205 		warn "$PROGRAM: Re-assigning STDIN to '$input_file'\n" if $debug; | 
|  | 206 		open INPUT, '<', $input_file or die "$PROGRAM: Error: failed to open input file '$input_file': $!\n"; | 
|  | 207 		STDIN->fdopen(\*INPUT, 'r') or die "$PROGRAM: Error: failed to reassign STDIN to '$input_file': $!\n"; | 
|  | 208 	} | 
|  | 209 } | 
|  | 210 | 
|  | 211 sub process_header_lines() | 
|  | 212 { | 
|  | 213 	warn "$PROGRAM: Reading $header_lines header lines...\n" if $debug; | 
|  | 214 	for (my $i=0; $i<$header_lines; $i++) { | 
|  | 215 		my $line = read_line_non_buffered(); | 
|  | 216 		exit unless defined $line; | 
|  | 217 		print $line; | 
|  | 218 	} | 
|  | 219 } | 
|  | 220 | 
|  | 221 sub run_sort() | 
|  | 222 { | 
|  | 223 	warn "$PROGRAM: Running GNU sort...\n" if $debug; | 
|  | 224 	system('sort', @sort_options); | 
|  | 225 	if ($? == -1) { | 
|  | 226 		die "$PROGRAM: Error: failed to execute 'sort': $!\n"; | 
|  | 227 	} | 
|  | 228 	elsif ($? & 127) { | 
|  | 229 		my $signal = ($? & 127); | 
|  | 230 		kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide | 
|  | 231 		die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n"; | 
|  | 232 	} | 
|  | 233 	else { | 
|  | 234 		$sort_exit_code = ($? >> 8); | 
|  | 235 	} | 
|  | 236 } | 
|  | 237 | 
|  | 238 | 
|  | 239 sub add_standard_sort_param(@) | 
|  | 240 { | 
|  | 241 	my ($obj)=  @_; | 
|  | 242 	add_standard_sort_param_value($obj, undef); | 
|  | 243 } | 
|  | 244 | 
|  | 245 sub add_standard_sort_param_value(@) | 
|  | 246 { | 
|  | 247 	my ($obj,$value)=  @_; | 
|  | 248 | 
|  | 249 	my $option = "" . $obj ; #stringify the optino object, get the option name. | 
|  | 250 | 
|  | 251 	if (length($option)==1) { | 
|  | 252 		$option = "-" . $option ; | 
|  | 253 	} else { | 
|  | 254 		$option = "--" . $option ; | 
|  | 255 	} | 
|  | 256 	push @sort_options, $option ; | 
|  | 257 	push @sort_options, $value if $value; | 
|  | 258 } | 
|  | 259 | 
|  | 260 sub forbidden_sort_param(@) | 
|  | 261 { | 
|  | 262 	my ($obj,$value)=  @_; | 
|  | 263 	my $option = "" . $obj ; #stringify the optino object, get the option name. | 
|  | 264 | 
|  | 265 	die "$PROGRAM: Error: option '$option' can not be used with this program. If you must use it, run GNU sort directly. see --help for more details.\n"; | 
|  | 266 } | 
|  | 267 | 
|  | 268 sub read_line_non_buffered() | 
|  | 269 { | 
|  | 270 	my $line = ''; | 
|  | 271 	while ( 1 ) { | 
|  | 272 		my $c; | 
|  | 273 		my $rc = sysread STDIN, $c, 1; | 
|  | 274 		die "$PROGRAM: STDIN Read error: $!" unless defined $rc; | 
|  | 275 		return $line if $rc==0 && $line; | 
|  | 276 		return undef if $rc==0 && (!$line); | 
|  | 277 		$line .= $c ; | 
|  | 278 		return $line if ( $c eq "\n"); | 
|  | 279 	} | 
|  | 280 } | 
|  | 281 |