changeset 0:ec66f9d90ef0 draft

initial uploaded
author bgruening
date Thu, 05 Sep 2013 04:58:21 -0400
parents
children a4ad586d1403
files awk.xml cut.xml easyjoin easyjoin.xml find_and_replace find_and_replace.xml grep.xml head.xml multijoin multijoin.xml readme.rst scripts/ansi2html.sh sed.xml sort.xml sort_rows.xml sorted_uniq.xml tail.xml tool_dependencies.xml unsorted_uniq.py unsorted_uniq.xml
diffstat 20 files changed, 2514 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/awk.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,124 @@
+<tool id="unixtools_awk_tool" name="Awk" version="0.1.1">
+    <description></description>
+    <requirements>
+        <requirement type="package" version="4.1.0">gnu_awk</requirement>
+    </requirements>
+    <command>
+        awk --sandbox -v FS=\$'\t' -v OFS=\$'\t' --re-interval -f '$awk_script' '$input' &gt; '$output'
+    </command>
+    <inputs>
+        <param format="txt" name="input" type="data" label="File to process" />
+        <param name="url_paste" type="text" area="true" size="5x35" label="AWK Program" help="">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+  </inputs>
+  <tests>
+      <test>
+          <param name="input" value="unix_awk_input1.txt" />
+          <output name="output" file="unix_awk_output1.txt" />
+          <param name="FS" value="tab" />
+          <param name="OFS" value="tab" />
+          <param name="file_data"  value="$2>0.5 { print $2*9, $1 }" />
+      </test>
+  </tests>
+  <outputs>
+    <data format="input" name="output" metadata_source="input1"
+    />
+  </outputs>
+  <configfiles>
+      <configfile name="awk_script">
+          $url_paste
+      </configfile>
+  </configfiles>
+<help>
+
+**What it does**
+
+This tool runs the unix **awk** command on the selected data file.
+
+.. class:: infomark
+
+**TIP:** This tool uses the **extended regular** expression syntax (not the perl syntax).
+
+
+**Further reading**
+
+- Awk by Example (http://www.ibm.com/developerworks/linux/library/l-awk1.html)
+- Long AWK tutorial (http://www.grymoire.com/Unix/Awk.html)
+- Learn AWK in 1 hour (http://www.selectorweb.com/awk.html)
+- awk cheat-sheet (http://cbi.med.harvard.edu/people/peshkin/sb302/awk_cheatsheets.pdf)
+- Collection of useful awk one-liners (http://student.northpark.edu/pemente/awk/awk1line.txt)
+
+-----
+
+**AWK programs**
+
+Most AWK programs consist of **patterns** (i.e. rules that match lines of text) and **actions** (i.e. commands to execute when a pattern matches a line).
+
+The basic form of AWK program is::
+
+    pattern { action 1; action 2; action 3; }
+
+
+
+
+
+**Pattern Examples**
+
+- **$2 == "chr3"**  will match lines whose second column is the string 'chr3'
+- **$5-$4>23**  will match lines that after subtracting the value of the fourth column from the value of the fifth column, gives value alrger than 23.
+- **/AG..AG/** will match lines that contain the regular expression **AG..AG** (meaning the characeters AG followed by any two characeters followed by AG). (This is the way to specify regular expressions on the entire line, similar to GREP.)
+- **$7 ~ /A{4}U/**  will match lines whose seventh column contains 4 consecutive A's followed by a U. (This is the way to specify regular expressions on a specific field.)
+- **10000 &lt; $4 &amp;&amp; $4 &lt; 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000
+- If no pattern is specified, all lines match (meaning the **action** part will be executed on all lines).
+
+
+
+**Action Examples**
+
+- **{ print }** or **{ print $0 }**   will print the entire input line (the line that matched in **pattern**). **$0** is a special marker meaning 'the entire line'.
+- **{ print $1, $4, $5 }** will print only the first, fourth and fifth fields of the input line.
+- **{ print $4, $5-$4 }** will print the fourth column and the difference between the fifth and fourth column. (If the fourth column was start-position in the input file, and the fifth column was end-position - the output file will contain the start-position, and the length).
+- If no action part is specified (not even the curly brackets) - the default action is to print the entire line.
+
+
+
+
+
+
+
+
+
+**AWK's Regular Expression Syntax**
+
+The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. 
+
+- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
+- **^** matches the beginning of a string(but not an internal line).
+- **(** .. **)** groups a particular pattern.
+- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
+
+  - **{n}** The preceding item is matched exactly n times.
+  - **{n,}** The preceding item ismatched n or more times. 
+  - **{n,m}** The preceding item is matched at least n times but not more than m times. 
+
+- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
+- **.** Matches any single character except a newline.
+- ***** The preceding item will be matched zero or more times.
+- **?** The preceding item is optional and matched at most once.
+- **+** The preceding item will be matched one or more times.
+- **^** has two meaning:
+  - matches the beginning of a line or string. 
+  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
+- **$** matches the end of a line or string.
+- **\|** Separates alternate possibilities. 
+
+
+**Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cut.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,103 @@
+<tool id="unixtools_cut_tool" name="cut" version="0.1.1">
+    <description>columns from files</description>
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    </requirements>
+    <command>
+        cut ${complement} ${cutwhat} '${list}' '${input}' &gt; '${output}'
+    </command>
+
+    <inputs>
+        <param format="txt" name="input" type="data" label="file to cut" />
+        <param name="complement" type="select" label="Operation">
+            <option value="">Keep</option>
+            <option value="--complement">Discard</option>
+        </param>
+
+        <param name="cutwhat" type="select" label="Cut by">
+            <option value="-f">fields</option>
+            <option value="-c">characters</option>
+            <option value="-b">bytes</option>
+        </param>
+
+        <param name="list" type="text" size="20" value="" label="List of Fields/Characters/Bytes" help="These will be kept/discarded (depending on 'operation'). &lt;BR /&gt; Examples: 1,3,4 or 2-5">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+    </inputs>
+
+    <tests>
+        <test>
+            <param name="input" value="unix_cut_input1.txt" />
+            <output name="output" file="unix_cut_output1.txt" />
+            <param name="complement" value="Keep" />
+            <param name="cutwhat" value="fields" />
+            <param name="list"  value="1,3,4" />
+        </test>
+        <test>
+            <param name="input" value="unix_cut_input1.txt" />
+            <output name="output" file="unix_cut_output1.txt" />
+            <param name="complement" value="Discard" />
+            <param name="cutwhat" value="fields" />
+            <param name="list"  value="2" />
+        </test>
+    </tests>
+
+    <outputs>
+        <data format="input" name="output" metadata_source="input" />
+    </outputs>
+    <help>
+
+**What it does**
+
+This tool runs the **cut** unix command, which extract or delete columns from a file.
+
+-----
+
+Field List Example:
+
+**1,3,7** - Cut specific fields/characters.
+
+**3-**    - Cut from the third field/character to the end of the line.
+
+**2-5**   - Cut from the second to the fifth field/character.
+
+**-8**    - Cut from the first to the eight field/characters.
+
+
+
+
+Input Example::
+
+    fruit	color	price	weight
+    apple	red	1.4	0.5
+    orange	orange	1.5	0.3
+    banana	yellow	0.9	0.3
+
+
+Output Example ( **Keeping fields 1,3,4** )::
+
+    fruit	price	weight
+    apple	1.4	0.5
+    orange	1.5	0.3
+    banana	0.9	0.3
+
+Output Example ( **Discarding field 2** )::
+
+    fruit	price	weight
+    apple	1.4	0.5
+    orange	1.5	0.3
+    banana	0.9	0.3
+
+Output Example ( **Keeping 3 characters** )::
+
+    fru
+    app
+    ora
+    ban
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/easyjoin	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,308 @@
+#!/usr/bin/env perl
+## EASY Join -
+## Join with automatic pre-sorting of both files
+## Copyright (C) 2010 A. Gordon (gordon@cshl.edu)
+## license: AGPLv3+
+use strict;
+use warnings;
+use Data::Dumper;
+use Getopt::Long qw(:config bundling no_ignore_case_always);
+use File::Temp qw/tempfile/;
+use POSIX qw(locale_h);
+
+sub show_help();
+sub show_version();
+sub show_examples();
+sub parse_commandline_options();
+sub sort_file($$$);
+sub join_files($$);
+sub cleanup_files(@);
+
+
+my $PROGRAM="easyjoin";
+my $VERSION="0.6.1";
+
+my $debug=undef;
+my $HEADER=undef;
+my $IGNORE_CASE=undef;
+my $FIELD_SEP=undef;
+my $FILE1_KEY_COLUMN=1;
+my $FILE2_KEY_COLUMN=1;
+my @OUTPUT_SPECIFIERS=();
+my $OUTPUT_FORMAT=undef;
+my $EMPTY_FILLER=undef;
+my $SORT_BUFFER_SIZE=undef;
+my $SORT_TEMP_DIR=undef;
+my $input_filename1;
+my $input_filename2;
+
+##
+## Program Start
+##
+$ENV{'LANG'}="C";## "C" locale is critical for sorting and joining correctly
+parse_commandline_options();
+my (undef, $tmp_filename1) = tempfile(OPEN=>0);
+my (undef, $tmp_filename2) = tempfile(OPEN=>0);
+sort_file($input_filename1, $tmp_filename1, $FILE1_KEY_COLUMN);
+sort_file($input_filename2, $tmp_filename2, $FILE2_KEY_COLUMN);
+my $join_exit_code = join_files($tmp_filename1, $tmp_filename2);
+cleanup_files($tmp_filename1, $tmp_filename2);
+exit($join_exit_code);
+
+##
+## Program end
+##
+
+
+sub show_help()
+{
+print<<EOF;
+${PROGRAM}: Wrapper for GNU join+sort, automaticalyl sorts files before joining them.
+
+Usage: $PROGRAM [OPTIONS] [JOIN-OPTIONS] [SORT-OPTIONS] FILE1 FILE2
+
+OPTIONS: Options specific to this program:
+
+   --header      =  Both input files have a header line as the first line.
+                    The header line will be joined properly, without being sorted.
+
+   --version     =  Print ${PROGRAM}'s version.
+
+   --debug       =  Print debug messages (relating to ${PROGRAM}'s operation).
+
+   --help        =  Show this help screen.
+
+   --example     =  Show usage examples.
+
+   --all         =  Short-cut for:
+                      -a 1 -a 2 -o auto -e . -t <TAB>
+                    This will show all values (paired and unpared) from both files,
+		    Automatically formatting the columns, and using TAB as field separator.
+		    You can override the empty filler (-e X) on the command line.
+
+   --allh        =  Short-cut for:
+                       -a 1 -a 2 -o auto -e . -t <TAB> --header
+		    Same as above, but will also respect the header line from both input files.
+
+JOIN-OPTIONS:
+   All of GNU join options are supported.
+   Run:
+       join --help
+   To see all possible joining options.
+
+SORT-OPTIONS:
+   The following options are supported for the intermediate sorting step:
+
+   -S SIZE
+   --buffer-size SIZE   = GNU sort's --buffer-size option.
+
+   -T DIR
+   --temporary-directory DIR = GNU sort's --temporary-directory option.
+
+   Run:
+      sort --help
+   To learn about these options. They might improve sorting performances for big files.
+
+FILE1 FILE2:
+   The two input files to be sorted, joined.
+   Unlike GNU join,  joining STDIN is not supported. Both files must be real files.
+
+
+NOTE About "--header" and "--auto-format":
+   The "--header" feature requires GNU coreutils version 8.6 or later.
+   The "-o auto" feature requires GNU coreutils version 8.10 or later.
+
+EOF
+	exit(0);
+}
+
+sub show_version()
+{
+print<<EOF;
+$PROGRAM $VERSION
+Copyright (C) 2010 A. Gordon (gordon\@cshl.edu)
+License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html)
+
+To see the GNU's join version, run:
+	join --version
+EOF
+	exit(0);
+}
+
+sub show_examples()
+{
+print<<EOF;
+Example of joining two unsorted files (each file having a header line):
+
+\$ cat input1.txt
+Fruit	Color
+Apple	red
+Banana	yellow
+Orange	orange
+Melon	green
+
+\$ cat input2.txt
+Fruit	Price
+Orange	7
+Avocado	8
+Apple	4
+Banana	3
+
+\$ easyjoin -j 1 -a 1 -a 2 --header -e . -o auto input1.txt input2.txt
+Fruit   Color   Price
+Apple   red     4
+Avocado .       8
+Banana  yellow  3
+Melon   green   .
+Orange  orange  7
+
+## A short-cut for all the options above:
+\$ easyjoin --allh input1.txt input2.txt
+Fruit   Color   Price
+Apple   red     4
+Avocado .       8
+Banana  yellow  3
+Melon   green   .
+Orange  orange  7
+
+EOF
+	exit(0);
+}
+
+sub parse_commandline_options()
+{
+	##
+	## Parse command line
+	##
+	my $rc = GetOptions(
+			"a=i" => sub { push @OUTPUT_SPECIFIERS, '-a', $_[1] },
+			"e=s" => \$EMPTY_FILLER,
+			"ignore-case|i" => \$IGNORE_CASE,
+			"j=i" => sub { $FILE1_KEY_COLUMN = $_[1] ; $FILE2_KEY_COLUMN = $_[1] ; },
+			"o=s" => \$OUTPUT_FORMAT,
+			"t=s" => \$FIELD_SEP,
+			"v=i" => sub { push @OUTPUT_SPECIFIERS, '-v', $_[1] },
+			"1=i" => \$FILE1_KEY_COLUMN,
+			"2=i" => \$FILE2_KEY_COLUMN,
+			"debug" => \$debug,
+			"header" => \$HEADER,
+			"help" => \&show_help,
+			"version" => \&show_version,
+			"examples" => \&show_examples,
+			"buffer-size|S=s" => \$SORT_BUFFER_SIZE,
+			"temporary-directory|T=s" => \$SORT_TEMP_DIR,
+			"all" => sub {
+					push @OUTPUT_SPECIFIERS, "-a", 1, "-a", 2;
+					$FIELD_SEP = "\t";
+					$OUTPUT_FORMAT = "auto";
+					$EMPTY_FILLER = "." unless defined $EMPTY_FILLER;
+				},
+			"allh" => sub {
+					push @OUTPUT_SPECIFIERS, "-a", 1, "-a", 2;
+					$FIELD_SEP = "\t";
+					$OUTPUT_FORMAT = "auto";
+					$HEADER=1;
+					$EMPTY_FILLER = "." unless defined $EMPTY_FILLER;
+				},
+		);
+	die "$PROGRAM: invalid command-line arguments.\n" unless $rc;
+
+	## We need two file names to join
+	my @INPUT_FILES = @ARGV;
+	die "$PROGRAM: missing operand: two file names to join\n" if (scalar(@INPUT_FILES)<2);
+	die "$PROGRAM: error: too many files specified (can only join two files)\n" if (scalar(@INPUT_FILES)>2);
+	die "$PROGRAM: error: input file can't be STDIN, please use a real file name.\n" if $INPUT_FILES[0] eq "-" || $INPUT_FILES[1] eq "-";
+	die "$PROGRAM: error: input file 1 '" . $INPUT_FILES[0] . "' not found!" unless -e $INPUT_FILES[0];
+	die "$PROGRAM: error: input file 2 '" . $INPUT_FILES[1] . "' not found!" unless -e $INPUT_FILES[1];
+
+	$input_filename1 = $INPUT_FILES[0];
+	$input_filename2 = $INPUT_FILES[1];
+}
+
+sub sort_file($$$)
+{
+	my ($input_filename, $output_filename, $key_column) = @_;
+
+	my @SORT_COMMAND;
+	push @SORT_COMMAND, $HEADER ? "sort-header" : "sort" ;
+	push @SORT_COMMAND, "-f" if $IGNORE_CASE;
+	push @SORT_COMMAND, "-k${key_column},${key_column}" ;
+	push @SORT_COMMAND, "--buffer-size", $SORT_BUFFER_SIZE if $SORT_BUFFER_SIZE;
+	push @SORT_COMMAND, "--temporary-directory", $SORT_TEMP_DIR if $SORT_TEMP_DIR;
+	push @SORT_COMMAND, "--output", $output_filename;
+	push @SORT_COMMAND, "--debugheader" if $debug && $HEADER;
+	push @SORT_COMMAND, "-t", $FIELD_SEP if $FIELD_SEP;
+	push @SORT_COMMAND, $input_filename;
+
+	if ($debug) {
+		warn "$PROGRAM: Running sort on '$input_filename' => '$output_filename'\n";
+		warn "$PROGRAM: Sort command line:\n";
+		print STDERR Dumper(\@SORT_COMMAND), "\n";
+	}
+
+	my $sort_exit_code=1;
+	system(@SORT_COMMAND);
+	if ($? == -1) {
+		die "$PROGRAM: Error: failed to execute 'sort': $!\n";
+	}
+	elsif ($? & 127) {
+		my $signal = ($? & 127);
+		kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide
+		die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n";
+	}
+	else {
+		$sort_exit_code = ($? >> 8);
+	}
+	die "$PROGRAM: Error: 'sort' process failed, exit code $sort_exit_code\n" if $sort_exit_code!=0;
+}
+
+sub join_files($$)
+{
+	my ($file1, $file2) = @_;
+
+	my @join_command = qw/join/;
+	push @join_command, "--header" if $HEADER;
+	push @join_command, "--ignore-case" if $IGNORE_CASE;
+	push @join_command, "-t", $FIELD_SEP if $FIELD_SEP;
+	push @join_command, "-1", $FILE1_KEY_COLUMN if $FILE1_KEY_COLUMN;
+	push @join_command, "-2", $FILE2_KEY_COLUMN if $FILE2_KEY_COLUMN;
+	push @join_command, "-e", $EMPTY_FILLER if defined $EMPTY_FILLER;
+	push @join_command, "-o", $OUTPUT_FORMAT if $OUTPUT_FORMAT;
+	push @join_command, @OUTPUT_SPECIFIERS;
+	push @join_command, $file1, $file2;
+
+	if ($debug) {
+		warn "$PROGRAM: Running join on '$file1'  and '$file2'\n";
+		warn "$PROGRAM: join command line:\n";
+		print STDERR Dumper(\@join_command), "\n";
+	}
+
+	my $join_exit_code=1;
+	system(@join_command);
+	if ($? == -1) {
+		die "$PROGRAM: Error: failed to execute 'join': $!\n";
+	}
+	elsif ($? & 127) {
+		my $signal = ($? & 127);
+		kill 2, $$ if $signal == 2; ##if join was interrupted (CTRL-C) - just pass it on and commit suicide
+		die "$PROGRAM: Error: 'join' child-process died with signal $signal\n";
+	}
+	else {
+		$join_exit_code = ($? >> 8);
+	}
+	return $join_exit_code;
+}
+
+sub cleanup_files(@)
+{
+	my (@files) = @_;
+
+	foreach my $file (@files) {
+		if ($debug) {
+			warn "$PROGRAM: debug mode, not deleting temporary file '$file'\n";
+		} else {
+			my $count = unlink $file;
+			warn "$PROGRAM: Error: failed to delete temporary file '$file': $!\n" if ($count != 1);
+		}
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/easyjoin.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,95 @@
+<tool id="unixtools_easyjoin_tool" name="Join" version="0.1.1">
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    </requirements>
+    <description>two files</description>
+    <command interpreter="perl">easyjoin $jointype
+        -t '	'
+        $header
+        -e '$empty_string_filler'
+        -o auto
+        $ignore_case
+        -1 '$column1'
+        -2 '$column2'
+        "$input1" "$input2"
+        &gt; '$output'
+    </command>
+
+    <inputs>
+        <param format="txt" name="input1" type="data" label="1st file" />
+        <param name="column1" label="Column to use from 1st file" type="data_column" data_ref="input1" accept_default="true" />
+
+        <param format="txt" name="input2" type="data" label="2nd File" />
+        <param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="input2" accept_default="true" />
+
+        <param name="jointype" type="select" label="Output lines appearing in">
+            <option value=" ">BOTH 1st &amp; 2nd file.</option>
+            <option value="-v 1">1st but not in 2nd file. [-v 1]</option>
+            <option value="-v 2">2nd but not in 1st file. [-v 2]</option>
+            <option value="-a 1">both 1st &amp; 2nd file, plus unpairable lines from 1st file. [-a 1]</option>
+            <option value="-a 2">both 1st &amp; 2nd file, plus unpairable lines from 2st file. [-a 2]</option>
+            <option value="-a 1 -a 2">All Lines [-a 1 -a 2]</option>
+            <option value="-v 1 -v 2">All unpairable lines [-v 1 -v 2]</option>
+        </param>
+
+        <param name="header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." />
+
+        <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." />
+
+        <param name="empty_string_filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+  </inputs>
+  <outputs>
+    <data name="output" format="input" metadata_source="input1"/>
+  </outputs>
+
+<help>
+**What it does**
+
+This tool joins two tabular files based on a common key column.
+
+-----
+
+**Example**
+
+**First file**::
+
+    Fruit	Color
+    Apple	red
+    Banana	yellow
+    Orange	orange
+    Melon	green
+
+**Second File**::
+
+    Fruit	Price
+    Orange	7
+    Avocado	8
+    Apple	4
+    Banana	3
+
+**Joining** both files, using **key column 1** and a **header line**, will return::
+
+    Fruit	Color	Price
+    Apple	red	4
+    Avocado	.	8
+    Banana	yellow	3
+    Melon	green	.
+    Orange	orange	7
+
+# Input files need not be sorted.
+# The header line (**Fruit  Color  Price**) was joined and kept as first line.
+# Missing values ( Avocado's color, missing from the first file ) are replaced with a period character.
+
+-----
+
+*easyjoin* was written by A. Gordon
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/find_and_replace	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,202 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Getopt::Std;
+
+sub parse_command_line();
+sub build_regex_string();
+sub usage();
+
+my $input_file ;
+my $output_file;
+my $find_pattern ;
+my $replace_pattern ;
+my $find_complete_words ;
+my $find_pattern_is_regex ;
+my $find_in_specific_column ;
+my $find_case_insensitive ;
+my $replace_global ;
+my $skip_first_line ;
+
+
+##
+## Program Start
+##
+usage() if @ARGV<2;
+parse_command_line();
+my $regex_string = build_regex_string() ;
+
+# Allow first line to pass without filtering?
+if ( $skip_first_line ) {
+	my $line = <$input_file>;
+	print $output_file $line ;
+}
+
+
+##
+## Main loop
+##
+
+## I LOVE PERL (and hate it, at the same time...)
+##
+## So what's going on with the self-compiling perl code?
+##
+## 1. The program gets the find-pattern and the replace-pattern from the user (as strings).
+## 2. If both the find-pattern and replace-pattern are simple strings (not regex), 
+##    it would be possible to pre-compile a regex (with qr//) and use it in a 's///'
+## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references)
+##    it is still possible to pre-compile the regex and use it in a 's///'
+## However,
+## 4. If the replace-pattern contains back-references, pre-compiling is not possible.
+##    (in perl, you can't precompile a substitute regex).
+##    See these examples:
+##    http://www.perlmonks.org/?node_id=84420
+##    http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl
+##
+##    The solution:
+##    we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ),
+##    Then eval() a new perl code that contains the substitution regex as inlined code.
+##    Gotta love perl!
+
+my $perl_program ;
+if ( $find_in_specific_column ) {
+	# Find & replace in specific column
+
+	$perl_program = <<EOF;
+	while ( <STDIN> ) {
+		chomp ;
+		my \@columns = split ;
+
+		#not enough columns in this line - skip it
+		next if ( \@columns < $find_in_specific_column ) ;
+
+		\$columns [ $find_in_specific_column - 1 ] =~ $regex_string ;
+
+		print STDOUT join("\t", \@columns), "\n" ;
+	}
+EOF
+
+} else {
+	# Find & replace the entire line
+	$perl_program = <<EOF;
+		while ( <STDIN> ) {
+			$regex_string ;
+			print STDOUT;
+		}
+EOF
+}
+
+
+# The dynamic perl code reads from STDIN and writes to STDOUT,
+# so connect these handles (if the user didn't specifiy input / output
+# file names, these might be already be STDIN/OUT, so the whole could be a no-op).
+*STDIN = $input_file ;
+*STDOUT = $output_file ;
+eval $perl_program ;
+
+
+##
+## Program end
+##
+
+
+sub parse_command_line()
+{
+	my %opts ;
+	getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n";
+
+	die "$0: missing Find-Pattern argument\n" if (@ARGV==0); 
+	$find_pattern = $ARGV[0];
+	die "$0: missing Replace-Pattern argument\n" if (@ARGV==1); 
+	$replace_pattern = $ARGV[1];
+
+	$find_complete_words = ( exists $opts{w} ) ;
+	$find_case_insensitive = ( exists $opts{i} ) ;
+	$skip_first_line = ( exists $opts{s} ) ;
+	$find_pattern_is_regex = ( exists $opts{r} ) ;
+	$replace_global = ( exists $opts{g} ) ;
+
+	# Search in specific column ?
+	if ( defined $opts{c} ) {
+		$find_in_specific_column = $opts{c};
+
+		die "$0: invalid column number ($find_in_specific_column).\n"
+			unless $find_in_specific_column =~ /^\d+$/ ;
+			
+		die "$0: invalid column number ($find_in_specific_column).\n"
+			if $find_in_specific_column <= 0; 
+	}
+	else {
+		$find_in_specific_column = 0 ;
+	}
+
+	# Output File specified (instead of STDOUT) ?
+	if ( defined $opts{o} ) {
+		my $filename = $opts{o};
+		open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
+	} else {
+		$output_file = *STDOUT ;
+	}
+
+
+	# Input file Specified (instead of STDIN) ?
+	if ( @ARGV>2 ) {
+		my $filename = $ARGV[2];
+		open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
+	} else {
+		$input_file = *STDIN;
+	}
+}
+
+sub build_regex_string()
+{
+	my $find_string ;
+	my $replace_string ;
+
+	if ( $find_pattern_is_regex ) {
+		$find_string = $find_pattern ;
+		$replace_string = $replace_pattern ;
+	} else {
+		$find_string = quotemeta $find_pattern ;
+		$replace_string = quotemeta $replace_pattern;
+	}
+
+	if ( $find_complete_words ) {
+		$find_string = "\\b($find_string)\\b"; 
+	}
+
+	my $regex_string = "s/$find_string/$replace_string/";
+
+	$regex_string .= "i" if ( $find_case_insensitive );
+	$regex_string .= "g" if ( $replace_global ) ;
+	
+
+	return $regex_string;
+}
+
+sub usage()
+{
+print <<EOF;
+
+Find and Replace
+Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
+
+Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE]
+
+   -g   - Global replace - replace all occurences in line/column. 
+          Default - replace just the first instance.
+   -w   - search for complete words (not partial sub-strings).
+   -i   - case insensitive search.
+   -c N - check only column N, instead of entire line (line split by whitespace).
+   -l   - skip first line (don't replace anything in it)
+   -r   - FIND-PATTERN and REPLACE-PATTERN are perl regular expression,
+          usable inside a 's///' statement.
+          By default, they are used as verbatim text strings.
+   -o OUT - specify output file (default = STDOUT).
+   INPUT-FILE - (optional) read from file (default = from STDIN).
+
+
+EOF
+
+	exit;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/find_and_replace.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,155 @@
+<tool id="cshl_find_and_replace" name="Find and Replace" version="0.1.1">
+    <description>text</description>
+    <command interpreter="perl">
+        find_and_replace
+            #if $searchwhere.choice == "column":
+                -c $searchwhere.column
+            #end if
+            -o $output
+            $caseinsensitive
+            $wholewords
+            $skip_first_line
+            $is_regex
+            '$url_paste'
+            '$file_data'
+            '$input'
+    </command>
+    <inputs>
+        <param format="txt" name="input" type="data" label="File to process" />
+        <param name="url_paste" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > 
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+        <param name="file_data" type="text" size="20" label="Replace with" help="Use simple text, or $&amp; (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." >
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+        <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Find-Pattern is a regular expression" 
+            help="see help section for details." />
+
+        <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search" 
+            help="" />
+
+        <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words" 
+            help="ignore partial matches (e.g. 'apple' will not match 'snapple') " />
+
+        <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line" 
+            help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
+
+        <conditional name="searchwhere">
+            <param name="choice" type="select" label="Replace text in">
+                <option value="line" selected="true">entire line</option>
+                <option value="column">specific column</option>
+            </param>
+            <when value="line" />
+
+            <when value="column">
+                <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="input" name="output" metadata_source="input" />
+    </outputs>
+
+<help>
+
+**What it does**
+
+This tool finds &amp; replaces text in an input dataset.
+
+.. class:: infomark
+
+The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
+
+.. class:: infomark
+
+When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
+
+.. class:: infomark
+
+This tool uses Perl regular expression syntax.
+
+-----
+
+**Examples of *regular-expression* Find Patterns**
+
+- **HELLO**     The word 'HELLO' (case sensitive).
+- **AG.T**      The letters A,G followed by any single character, followed by the letter T.
+- **A{4,}**     Four or more consecutive A's.
+- **chr2[012]\\t**       The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
+- **hsa-mir-([^ ]+)**        The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
+
+
+**Examples of Replace Patterns**
+
+- **WORLD**  The word 'WORLD' will be placed whereever the find pattern was found.
+- **FOO-&amp;-BAR**  Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&amp;** (dollar-ampersand) represents the matched find pattern.
+- **$1**   The text which matched the first parenthesis in the Find Pattern.
+
+
+-----
+
+**Example 1**
+
+**Find Pattern:** HELLO
+**Replace Pattern:** WORLD
+**Regular Expression:** no
+**Replace what:** entire line
+
+Every time the word HELLO is found, it will be replaced with the word WORLD. 
+
+-----
+
+**Example 2**
+
+**Find Pattern:** ^chr 
+**Replace Pattern:** (empty)
+**Regular Expression:** yes
+**Replace what:** column 11
+
+If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
+
+
+-----
+
+**Perl's Regular Expression Syntax**
+
+The Find &amp; Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. 
+
+- **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
+- **^** matches the beginning of a string(but not an internal line).
+- **(** .. **)** groups a particular pattern.
+- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
+
+  - **{n}** The preceding item is matched exactly n times.
+  - **{n,}** The preceding item ismatched n or more times. 
+  - **{n,m}** The preceding item is matched at least n times but not more than m times. 
+
+- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
+- **.** Matches any single character except a newline.
+- ***** The preceding item will be matched zero or more times.
+- **?** The preceding item is optional and matched at most once.
+- **+** The preceding item will be matched one or more times.
+- **^** has two meaning:
+  - matches the beginning of a line or string. 
+  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
+- **$** matches the end of a line or string.
+- **\\|** Separates alternate possibilities. 
+- **\\d** matches a single digit
+- **\\w** matches a single letter or digit or an underscore.
+- **\\s** matches a single white-space (space or tabs).
+
+
+</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/grep.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,144 @@
+<tool id="unixtools_grep_tool" name="Search in textfiles" version="0.1.1">
+    <description>(grep)</description>
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+        <requirement type="package" version="2.14">gnu_grep</requirement>
+        <requirement type="set_environment">UNIX_TOOLS_SCRIPT_PATH</requirement>
+    </requirements>
+    <command interpreter="sh">
+        #if $color = "COLOR":
+            GREP_COLOR='1;34' grep --color=always -P "$@" -- "${url_paste}" '${input}' | \$UNIX_TOOLS_SCRIPT_PATH/ansi2html.sh > "${output}"
+        #else:
+            grep -P "$@" -- "${url_paste}" '${input}' | grep -v "^--$" > "${output}"
+        #end if
+
+        ##grep_wrapper.sh '$input' '$output' '$url_paste' $color -A $lines_after -B $lines_before $invert $case_sensitive
+    </command>
+    <inputs>
+        <param format="txt" name="input" type="data" label="Select lines from" />
+
+        <param name="invert" type="select" label="that">
+            <option value="">Match</option>
+            <option value="-v">Don't Match</option>
+        </param>
+
+        <param name="url_paste" type="text" size="40" label="Regular Expression" help="See below for more details">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+        <param name="case_sensitive" type="select"  label="Match type">
+            <option value="-i">case insensitive</option>
+            <option value="">case sensitive</option>
+        </param>
+
+        <param name="lines_before" type="integer"  label="Show lines preceding the matched line (-B)" help="leave it at zero unless you know what you're doing" value="0" />
+        <param name="lines_after" type="integer"  label="Show lines trailing the matched line (-A)" help="leave it at zero unless you know what you're doing" value="0" />
+
+        <param name="color" type="select"  label="Output">
+            <option value="NOCOLOR">text file (for further processing)</option>
+            <option value="COLOR">Highlighted HTML (for easier viewing)</option>
+        </param>
+
+  </inputs>
+  <outputs>
+      <data format="input" name="output" metadata_source="input">
+        <change_format>
+            <when input="color" value="COLOR" format="html"
+            />
+        </change_format>
+      </data>
+  </outputs>
+  <tests>
+      <test>
+          <!-- grep a FASTA file for sequences with specific motif -->
+          <param name="input" value="unix_grep_input1.txt" />
+          <output name="output" file="unix_grep_output1.txt" />
+          <param name="case_sensitive" value="case sensitive" />
+          <param name="invert" value="" />
+          <param name="url_paste" value="AA.{2}GT" />
+          <param name="lines_before" value="1" />
+          <param name="lines_after" value="0" />
+          <param name="color" value="NOCOLOR" />
+      </test>
+      <test>
+          <!-- grep a FASTA file for sequences with specific motif -
+             show highlighed output -->
+          <param name="input" value="unix_grep_input1.txt" />
+          <output name="output" file="unix_grep_output2.html" />
+          <param name="case_sensitive" value="case sensitive" />
+          <param name="invert" value="" />
+          <param name="url_paste" value="AA.{2}GT" />
+          <param name="lines_before" value="0" />
+          <param name="lines_after" value="0" />
+          <param name="color" value="COLOR" />
+      </test>
+  </tests>
+<help>
+
+**What it does**
+
+This tool runs the unix **grep** command on the selected data file.
+
+.. class:: infomark
+
+**TIP:** This tool uses the **perl** regular expression syntax (same as running 'grep -P'). This is **NOT** the POSIX or POSIX-extended syntax (unlike the awk/sed tools).
+
+
+**Further reading**
+
+- Wikipedia's Regular Expression page (http://en.wikipedia.org/wiki/Regular_expression)
+- Regular Expressions cheat-sheet (PDF) (http://www.addedbytes.com/cheat-sheets/download/regular-expressions-cheat-sheet-v2.pdf)
+- Grep Tutorial (http://www.panix.com/~elflord/unix/grep.html)
+
+-----
+
+**Grep Examples**
+
+- **AGC.AAT** would match lines with AGC followed by any character, followed by AAT (e.g. **AGCQAAT**, **AGCPAAT**, **AGCwAAT**)
+- **C{2,5}AGC** would match lines with 2 to 5 consecutive Cs followed by AGC
+- **TTT.{4,10}AAA** would match lines with 3 Ts, followed by 4 to 10 characters (any characeters), followed by 3 As.
+- **^chr([0-9A-Za-z])+** would match lines that begin with chromsomes, such as lines in a BED format file.
+- **(ACGT){1,5}** would match at least 1 "ACGT" and at most 5 "ACGT" consecutively.
+- **hsa|mmu** would match lines containing "hsa" or "mmu" (or both).
+ 
+-----
+
+**Regular Expression Syntax**
+
+The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. 
+
+- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
+- **^** matches the beginning of a string(but not an internal line).
+- **\\d** matches a digit, same as [0-9].
+- **\\D** matches a non-digit.
+- **\\s** matches a whitespace character.
+- **\\S** matches anything BUT a whitespace.
+- **\\t** matches a tab.
+- **\\w** matches an alphanumeric character ( A to Z, 0 to 9 and underscore )
+- **\\W** matches anything but an alphanumeric character.
+- **(** .. **)** groups a particular pattern.
+- **\\Z** matches the end of a string(but not a internal line).
+- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
+
+  - **{n}** The preceding item is matched exactly n times.
+  - **{n,}** The preceding item ismatched n or more times. 
+  - **{n,m}** The preceding item is matched at least n times but not more than m times. 
+
+- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
+- **.** Matches any single character except a newline.
+- ***** The preceding item will be matched zero or more times.
+- **?** The preceding item is optional and matched at most once.
+- **+** The preceding item will be matched one or more times.
+- **^** has two meaning:
+  - matches the beginning of a line or string. 
+  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
+- **$** matches the end of a line or string.
+- **\|** Separates alternate possibilities. 
+
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/head.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,37 @@
+<tool id="unixtools_head_tool" name="Select first" version="0.1.1">
+    <description>lines from a dataset (head)</description>
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    </requirements>
+    <command>
+        head --lines $complement$count '${infile}' &gt; '${outfile}'
+    </command>
+
+    <inputs>
+        <param format="txt" name="infile" type="data" label="file to cut" />
+
+        <param name="complement" type="select" label="Operation">
+            <option value="">Keep first lines</option>
+            <option value="-">Remove last lines</option>
+        </param>
+
+        <param name="count" type="integer" size="5" value="10" label="Number of lines" help="These will be kept/discarded (depending on 'operation').">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="input" name="outfile" metadata_source="infile"/>
+    </outputs>
+    <help>
+
+**What it does**
+
+This tool runs the **head** unix command, which discards lines from the end of a file.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multijoin	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,321 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Getopt::Long qw(:config no_ignore_case);
+use Data::Dumper;
+use Carp;
+use File::Basename;
+use Sort::Key::Natural qw(natsort);
+
+my $version = "0.1.1";
+my $field_sep = "\t";
+my $key_column;
+my @values_columns;
+my $max_value_column;
+my @input_files;
+my $input_headers ;
+my $output_headers;
+my $filler = "0";
+my $filler_string ;
+my $ignore_duplicates;
+my $debug = 0 ;
+my %input_headers;
+my $have_file_labels;
+my %file_labels;
+
+sub parse_command_line_parameters();
+sub show_help();
+sub read_input_file($);
+sub print_combined_data();
+sub sanitize_filename($);
+sub print_output_header();
+sub show_examples();
+
+##
+## Program Start
+##
+
+parse_command_line_parameters();
+
+my %data;
+foreach my $file (@input_files) {
+	read_input_file($file);
+}
+#print STDERR Dumper(\%input_headers),"\n";
+#print STDERR Dumper(\%data) if $debug;
+print_output_header() if $output_headers;
+print_combined_data();
+
+
+##
+## Program End
+##
+sub print_output_header()
+{
+	my @output = ("key");
+	foreach my $file ( @input_files ) {
+		foreach my $column ( @values_columns ) {
+			my $column_name = ( exists $input_headers{$file}->{$column} ) ?
+				$input_headers{$file}->{$column} :
+				"V$column" ;
+
+			push @output, $file_labels{$file} . "_" . $column_name;
+		}
+	}
+	print join($field_sep,@output),"\n"
+		or die "Output error: can't write output line: $!\n";
+}
+
+sub print_combined_data()
+{
+	my @keys = natsort keys %data ;
+
+	foreach my $key ( @keys ) {
+		my @outputs;
+
+		foreach my $file (@input_files) {
+			push @outputs,
+				(exists $data{$key}->{$file}) ? $data{$key}->{$file} : $filler_string;
+		}
+
+		print join($field_sep,$key,@outputs),"\n"
+			or die "Output error: can't write output line: $!\n";
+	}
+}
+
+sub sanitize_filename($)
+{
+	my ($filename) = shift or croak "missing file name";
+	my $file_ID = basename($filename);
+	$file_ID =~ s/\.\w+$//; # remove extension
+	$file_ID =~ s/^[^\w\.\-]+//;
+	$file_ID =~ s/[^\w\.\-]+$//;
+	$file_ID =~ s/[^\w\.\-]+/_/g; # sanitize bad characters
+	return $file_ID;
+}
+
+sub read_input_file($)
+{
+	my ($filename) = shift or croak "Missing input file name";
+
+	my @value_indexes = map { $_-1 } @values_columns; #zero-based indexes for value columns
+
+	open FILE, "<", $filename
+		or die "Error: can't open file '$filename': $!\n";
+
+	## Read file's header
+	if ($input_headers) {
+		my $line = <FILE>;
+		chomp $line;
+		my @fields = split $field_sep, $line;
+
+		my $num_input_fields = scalar(@fields);
+		die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ;
+
+		foreach my $col (@values_columns) {
+			$input_headers{$filename}->{$col} = $fields[$col-1] ;
+		}
+	}
+
+
+	## Read file's data
+	while ( my $line = <FILE> ) {
+		chomp $line;
+		my @fields = split $field_sep, $line;
+
+		my $num_input_fields = scalar(@fields);
+		die "Input error: file '$filename' line $. doesn't have enough columns (key column = $key_column, line has only $num_input_fields columns)\n" if $num_input_fields < $key_column ;
+		die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ;
+
+
+		my $key = $fields[$key_column-1];
+		my $value = join($field_sep, @fields[@value_indexes]);
+
+		die "Input error: file '$filename' line $. have duplicated key '$key'.\n"
+			if (exists $data{$key}->{$filename} && !$ignore_duplicates) ;
+		$data{$key}->{$filename} = $value;
+	}
+	close FILE
+		or die "Error: can't write and close file '$filename': $!\n";
+}
+
+sub parse_command_line_parameters()
+{
+	my $values_columns_string;
+
+	my $rc = GetOptions("help" => \&show_help,
+			    "key|k=i" => \$key_column,
+			    "values|v=s" => \$values_columns_string,
+		            "t=s" => \$field_sep,
+		            "in-header" => \$input_headers,
+		            "out-header|h" => \$output_headers,
+		            "H" => sub { $input_headers = 1 ; $output_headers = 1 ; },
+			    "ignore-dups" => \$ignore_duplicates,
+		            "filler|f=s" => \$filler,
+		            "examples"   => \&show_examples,
+			    "labels" => \$have_file_labels,
+			);
+	die "Error: inalid command-line parameters.\n" unless $rc;
+
+	die "Error: missing key column. use --key N. see --help for more details.\n" unless defined $key_column;
+	die "Error: Invalid key column ($key_column). Must be bigger than zero. see --help for more details.\n" if $key_column <= 0 ;
+
+	die "Error: missing values column. use --values V1,V2,Vn. See --help for more details.\n" unless defined $values_columns_string;
+	@values_columns = split(/\s*,\s*/, $values_columns_string);
+
+	die "Error: missing values column. use --values N,N,N. see --help for more details.\n" unless scalar(@values_columns)>0;
+	foreach my $v (@values_columns) {
+		die "Error: invalid value column ($v), please use only numbers>=1. see --help for more details.\n"
+			unless $v =~ /^\d+$/ && $v>=1;
+
+		$max_value_column = $v unless defined $max_value_column && $max_value_column>$v;
+	}
+
+	$filler_string = join($field_sep, map { $filler } @values_columns);
+
+
+	if ($have_file_labels) {
+		## have file labels - each pair of parameters is a file/label pair.
+		die "Error: missing input files and labels\n" if scalar(@ARGV)==0;
+		die "Error: when using --labels, a pair of file names + labels is required (got odd number of argiments)\n" unless scalar(@ARGV)%2==0;
+
+		while (@ARGV) {
+			my $filename = shift @ARGV;
+			my $label = shift @ARGV;
+			$label =~ s/^[^\.\w\-]+//;
+			$label =~ s/[^\.\w\-]+$//g;
+			$label =~ s/[^\.\w\-]+/_/g;
+
+			my $file_ID = sanitize_filename($filename);
+			$file_labels{$filename} = $label;
+			push @input_files, $filename;
+		}
+	} else {
+		## no file labels - the rest of the arguments are just file names;
+		@input_files = @ARGV;
+		die "Error: missing input files\n" if scalar(@input_files)==0;
+		die "Error: need more than one input file to join.\n" if scalar(@input_files)==1;
+
+		foreach my $file (@input_files) {
+			my $file_ID = sanitize_filename($file);
+			$file_labels{$file} = $file_ID;
+		}
+	}
+
+}
+
+sub show_help()
+{
+	print<<EOF;
+Multi-File join, version $version
+Copyright (C) 2012 - A. Gordon (gordon at cshl dot edu)
+License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html)
+
+Usage:
+ multijoin [OPTIONS] -k N -v V1,V2,Vn,..  FILE1  FILE2  ... FILEn
+
+Options:
+
+ --help         This helpful help screen.
+
+  -k N
+  --key N       Use column N as key column.
+
+  -v V1,V2,Vn
+  --values V1,V2,Vn
+                 Use columns V1,V2,Vn as value columns - those will be joined
+                According to the Key column.
+		Multiple columns can be specified.
+
+  -t SEP        Use SEP as field separator character (default: tab).
+
+  -h
+  --out-header  Add a header line to the output file.
+
+  --in-header   The input files have a header line.
+                The first line will not be joined.
+		if '--out-header' is also used, the output column headers will
+		be constructed based on the input header column names.
+
+  -H
+  --headers     Same as '--in-header --out-header' combined.
+
+  --ignore-dups   Ignore duplicated keys (within a file).
+                By default, duplicated keys cause an error.
+
+ -f X
+ --filler X     Fill missing values with X.
+                (Default: '$filler').
+
+ --labels       When printning output headers with '-h', instead of using the file name,
+                use specific labels.
+		Each file name must be followed by a name.
+
+		example (without labels):
+		 \$ multijoin -h -k 1 -v 2 A.TXT B.TXT C.TXT
+
+		example (with labels):
+                 \$ multijoin -h --labels -k 1 -v 2 A.TXT Sample1 B.TXT SampleB C.TXT SampleC
+
+ --examples     Show detailed examples.
+
+EOF
+	exit(0);
+}
+
+sub show_examples()
+{
+	print<<EOF;
+
+To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:
+
+\$ head *.txt
+==> AAA.txt <==
+chr4	888449	890171	FBtr0308778	0	+	266	1527	1722
+chr4	972167	979017	FBtr0310651	0	-	3944	6428	6850
+chr4	972186	979017	FBtr0089229	0	-	3944	6428	6831
+chr4	972186	979017	FBtr0089231	0	-	3944	6428	6831
+chr4	972186	979017	FBtr0089233	0	-	3944	6428	6831
+chr4	995793	996435	FBtr0111046	0	+	7	166	642
+chr4	995793	997931	FBtr0111044	0	+	28	683	2138
+chr4	995793	997931	FBtr0111045	0	+	28	683	2138
+chr4	1034029	1047719	FBtr0089223	0	-	5293	13394	13690
+
+==> BBB.txt <==
+chr4	90286	134453	FBtr0309803	0	+	657	29084	44167
+chr4	251355	266499	FBtr0089116	0	+	56	1296	15144
+chr4	252050	266506	FBtr0308086	0	+	56	1296	14456
+chr4	252050	266506	FBtr0308087	0	+	56	1296	14456
+chr4	252053	266528	FBtr0300796	0	+	56	1296	14475
+chr4	252053	266528	FBtr0300800	0	+	56	1296	14475
+chr4	252055	266528	FBtr0300798	0	+	56	1296	14473
+chr4	252055	266528	FBtr0300799	0	+	56	1296	14473
+chr4	252541	266528	FBtr0300797	0	+	56	1296	13987
+
+==> CCC.txt <==
+chr4	972167	979017	FBtr0310651	0	-	9927	6738	6850
+chr4	972186	979017	FBtr0089229	0	-	9927	6738	6831
+chr4	972186	979017	FBtr0089231	0	-	9927	6738	6831
+chr4	972186	979017	FBtr0089233	0	-	9927	6738	6831
+chr4	995793	996435	FBtr0111046	0	+	5	304	642
+chr4	995793	997931	FBtr0111044	0	+	17	714	2138
+chr4	995793	997931	FBtr0111045	0	+	17	714	2138
+chr4	1034029	1047719	FBtr0089223	0	-	17646	13536	13690
+
+\$ multijoin -h --key 4 --values 7,8,9 *.txt | head -n 10
+key           AAA__V7   AAA__V8   AAA__V9   BBB__V7    BBB__V8    BBB__V9    CCC__V7   CCC__V8   CCC__V9
+FBtr0089116         0         0         0        56       1296      15144          0         0         0
+FBtr0089223      5293     13394     13690         0          0          0      17646     13536     13690
+FBtr0089229      3944      6428      6831         0          0          0       9927      6738      6831
+FBtr0089231      3944      6428      6831         0          0          0       9927      6738      6831
+FBtr0089233      3944      6428      6831         0          0          0       9927      6738      6831
+FBtr0111044        28       683      2138         0          0          0         17       714      2138
+FBtr0111045        28       683      2138         0          0          0         17       714      2138
+FBtr0111046         7       166       642         0          0          0          5       304       642
+FBtr0300796         0         0         0        56       1296      14475          0         0         0
+
+
+
+EOF
+	exit(0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multijoin.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,122 @@
+<tool id="unixtools_multijoin'_tool" name="Multi-Join" version="0.1.1">
+  <description>(combine multiple files)</description>
+  <command interpreter="perl">multijoin
+        --key '$key_column'
+        --values '$value_columns'
+        --filler '$filler'
+        $ignore_dups
+        $output_header
+        $input_header
+        #for $file in $files
+            '$file.filename'
+        #end for
+        &gt; '$output'
+  </command>
+
+  <inputs>
+    <repeat name="files" title="file to join">
+        <param name="filename" label="Add file" type="data" format="txt" />
+    </repeat>
+
+    <param name="key_column" label="Common key column" type="integer"
+       value="1" help="Usually gene-ID or other common value" />
+
+    <param name="value_columns" label="Column with values to preserve" type="text"
+       value="2,3,4" help="Enter comma-separated list of columns, e.g. 3,6,8">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+        </sanitizer>
+    </param>
+
+    <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" />
+    <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" />
+    <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." />
+    <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+        </sanitizer>
+    </param>
+
+  </inputs>
+  <outputs>
+    <data name="output" format="input" metadata_source="input1" />
+  </outputs>
+
+<help>
+**What it does**
+
+This tool joins multiple tabular files based on a common key column.
+
+-----
+
+**Example**
+
+To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:
+
+**First file (AAA)**::
+
+    chr4    888449    890171    FBtr0308778    0    +    266    1527    1722
+    chr4    972167    979017    FBtr0310651    0    -    3944    6428    6850
+    chr4    972186    979017    FBtr0089229    0    -    3944    6428    6831
+    chr4    972186    979017    FBtr0089231    0    -    3944    6428    6831
+    chr4    972186    979017    FBtr0089233    0    -    3944    6428    6831
+    chr4    995793    996435    FBtr0111046    0    +    7    166    642
+    chr4    995793    997931    FBtr0111044    0    +    28    683    2138
+    chr4    995793    997931    FBtr0111045    0    +    28    683    2138
+    chr4    1034029    1047719    FBtr0089223    0    -    5293    13394    13690
+    ...
+
+
+**Second File (BBB)**::
+
+    chr4    90286    134453    FBtr0309803    0    +    657    29084    44167
+    chr4    251355    266499    FBtr0089116    0    +    56    1296    15144
+    chr4    252050    266506    FBtr0308086    0    +    56    1296    14456
+    chr4    252050    266506    FBtr0308087    0    +    56    1296    14456
+    chr4    252053    266528    FBtr0300796    0    +    56    1296    14475
+    chr4    252053    266528    FBtr0300800    0    +    56    1296    14475
+    chr4    252055    266528    FBtr0300798    0    +    56    1296    14473
+    chr4    252055    266528    FBtr0300799    0    +    56    1296    14473
+    chr4    252541    266528    FBtr0300797    0    +    56    1296    13987
+    ...
+
+**Third file (CCC)**::
+
+    chr4    972167    979017    FBtr0310651    0    -    9927    6738    6850
+    chr4    972186    979017    FBtr0089229    0    -    9927    6738    6831
+    chr4    972186    979017    FBtr0089231    0    -    9927    6738    6831
+    chr4    972186    979017    FBtr0089233    0    -    9927    6738    6831
+    chr4    995793    996435    FBtr0111046    0    +    5    304    642
+    chr4    995793    997931    FBtr0111044    0    +    17    714    2138
+    chr4    995793    997931    FBtr0111045    0    +    17    714    2138
+    chr4    1034029    1047719    FBtr0089223    0    -    17646    13536    13690
+    ...
+
+
+**Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return::
+
+    key           AAA__V7   AAA__V8   AAA__V9   BBB__V7    BBB__V8    BBB__V9    CCC__V7   CCC__V8   CCC__V9
+    FBtr0089116         0         0         0        56       1296      15144          0         0         0
+    FBtr0089223      5293     13394     13690         0          0          0      17646     13536     13690
+    FBtr0089229      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0089231      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0089233      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0111044        28       683      2138         0          0          0         17       714      2138
+    FBtr0111045        28       683      2138         0          0          0         17       714      2138
+    FBtr0111046         7       166       642         0          0          0          5       304       642
+    FBtr0300796         0         0         0        56       1296      14475          0         0         0
+    ...
+
+
+# Input files need not be sorted.
+
+-----
+
+*multijoin* was written by A. Gordon (gordon at cshl dot edu)
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,86 @@
+These are Galaxy wrappers for common unix text-processing tools
+===============================================================
+
+The initial work was done by Assaf Gordon and Greg Hannon's lab ( http://hannonlab.cshl.edu ) 
+in Cold Spring Harbor Laboratory ( http://www.cshl.edu ).
+
+
+The tools are:
+
+* awk - The AWK programmning language ( http://www.gnu.org/software/gawk/ )
+* sed - Stream Editor ( http://sed.sf.net )
+* grep - Search files ( http://www.gnu.org/software/grep/ )
+* sort_columns - Sorting every line according to there columns
+* GNU Coreutils programs ( http://www.gnu.org/software/coreutils/ ):
+  * sort - sort files
+  * join - join two files, based on common key field.
+  * cut  - keep/discard fields from a file
+  * unsorted_uniq - keep unique/duplicated lines in a file
+  * sorted_uniq - keep unique/duplicated lines in a file
+  * head - keep the first X lines in a file.
+  * tail - keep the last X lines in a file.
+
+Few improvements over the standard tools:
+
+  * EasyJoin - A Join tool that does not require pre-sorted the files ( https://github.com/agordon/filo/blob/scripts/src/scripts/easyjoin )
+  * Multi-Join - Join multiple (>2) files ( https://github.com/agordon/filo/blob/scripts/src/scripts/multijoin )
+  * Find_and_Replace - Find/Replace text in a line or specific column.
+  * Grep with Perl syntax - uses grep with Perl-Compatible regular expressions.
+  * HTML'd Grep - grep text in a file, and produced high-lighted HTML output, for easier viewing ( uses https://github.com/agordon/filo/blob/scripts/src/scripts/sort-header )
+
+
+Requirements
+------------
+
+1. Coreutils vesion 8.19 or later.
+2. AWK version 4.0.1 or later.
+3. SED version 4.2 *with* a special patch
+4. Grep with PCRE support
+
+These will be installed automatically with the Galaxy Tool Shed.
+
+
+-------------------
+NOTE About Security
+-------------------
+
+The included tools are secure (barring unintentional bugs):
+The main concern might be executing system commands with awk's "system" and sed's "e" commands,
+or reading/writing arbitrary files with awk's redirection and sed's "r/w" commands.
+These commands are DISABLED using the "--sandbox" parameter to awk and sed.
+
+User trying to run an awk program similar to:
+ BEGIN { system("ls") }
+Will get an error (in Galaxy) saying:
+ fatal: 'system' function not allowed in sandbox mode.
+
+User trying to run a SED program similar to:
+ 1els
+will get an error (in Galaxy) saying:
+ sed: -e expression #1, char 2: e/r/w commands disabled in sandbox mode
+
+That being said, if you do find some vulnerability in these tools, please let me know and I'll try fix them.
+
+------------
+Installation
+------------
+
+Should be done with the Galaxy `Tool Shed`_.
+
+.. _`Tool Shed`: http://wiki.galaxyproject.org/Tool%20Shed
+
+
+----
+TODO
+----
+
+- unit-tests
+- uniqu will get a new --group funciton with the 8.22 release, its currently commended out
+- also shuf will get a major improved performance with large files http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=commit;h=20d7bce0f7e57d9a98f0ee811e31c757e9fedfff
+  we can remove the random feature from sort and use shuf instead
+- move some advanced settings under a conditional, for example the cut tools offers to cut bytes
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/ansi2html.sh	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,331 @@
+#!/bin/sh
+
+# Convert ANSI (terminal) colours and attributes to HTML
+
+# Author:
+#    http://www.pixelbeat.org/docs/terminal_colours/
+# Examples:
+#    ls -l --color=always | ansi2html.sh > ls.html
+#    git show --color | ansi2html.sh > last_change.html
+#    Generally one can use the `script` util to capture full terminal output.
+# Changes:
+#    V0.1, 24 Apr 2008, Initial release
+#    V0.2, 01 Jan 2009, Phil Harnish <philharnish@gmail.com>
+#                         Support `git diff --color` output by
+#                         matching ANSI codes that specify only
+#                         bold or background colour.
+#                       P@draigBrady.com
+#                         Support `ls --color` output by stripping
+#                         redundant leading 0s from ANSI codes.
+#                         Support `grep --color=always` by stripping
+#                         unhandled ANSI codes (specifically ^[[K).
+#    V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/
+#                         Remove cat -v usage which mangled non ascii input.
+#                         Cleanup regular expressions used.
+#                         Support other attributes like reverse, ...
+#                       P@draigBrady.com
+#                         Correctly nest <span> tags (even across lines).
+#                         Add a command line option to use a dark background.
+#                         Strip more terminal control codes.
+#    V0.4, 17 Sep 2009, P@draigBrady.com
+#                         Handle codes with combined attributes and color.
+#                         Handle isolated <bold> attributes with css.
+#                         Strip more terminal control codes.
+#    V0.12, 12 Jul 2011
+#      http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh
+
+if [ "$1" = "--version" ]; then
+    echo "0.12" && exit
+fi
+
+if [ "$1" = "--help" ]; then
+    echo "This utility converts ANSI codes in data passed to stdin" >&2
+    echo "It has 2 optional parameters:" >&2
+    echo "   --bg=dark --palette=linux|solarized|tango|xterm" >&2
+    echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2
+    exit
+fi
+
+[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
+
+if [ "$1" = "--palette=solarized" ]; then
+   # See http://ethanschoonover.com/solarized
+   P0=073642;  P1=D30102;  P2=859900;  P3=B58900;
+   P4=268BD2;  P5=D33682;  P6=2AA198;  P7=EEE8D5;
+   P8=002B36;  P9=CB4B16; P10=586E75; P11=657B83;
+  P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3;
+  shift;
+elif [ "$1" = "--palette=solarized-xterm" ]; then
+   # Above mapped onto the xterm 256 color palette
+   P0=262626;  P1=AF0000;  P2=5F8700;  P3=AF8700;
+   P4=0087FF;  P5=AF005F;  P6=00AFAF;  P7=E4E4E4;
+   P8=1C1C1C;  P9=D75F00; P10=585858; P11=626262;
+  P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7;
+  shift;
+elif [ "$1" = "--palette=tango" ]; then
+   # Gnome default
+   P0=000000;  P1=CC0000;  P2=4E9A06;  P3=C4A000;
+   P4=3465A4;  P5=75507B;  P6=06989A;  P7=D3D7CF;
+   P8=555753;  P9=EF2929; P10=8AE234; P11=FCE94F;
+  P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC;
+  shift;
+elif [ "$1" = "--palette=xterm" ]; then
+   P0=000000;  P1=CD0000;  P2=00CD00;  P3=CDCD00;
+   P4=0000EE;  P5=CD00CD;  P6=00CDCD;  P7=E5E5E5;
+   P8=7F7F7F;  P9=FF0000; P10=00FF00; P11=FFFF00;
+  P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF;
+  shift;
+else # linux console
+   P0=000000;  P1=AA0000;  P2=00AA00;  P3=AA5500;
+   P4=0000AA;  P5=AA00AA;  P6=00AAAA;  P7=AAAAAA;
+   P8=555555;  P9=FF5555; P10=55FF55; P11=FFFF55;
+  P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF;
+  [ "$1" = "--palette=linux" ] && shift
+fi
+
+[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
+
+echo -n "<html>
+<head>
+<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>
+<style type=\"text/css\">
+.ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; }
+.ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; }
+.ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; }
+.ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; }
+.ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; }
+.ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; }
+.ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; }
+.ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; }
+.ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; }
+.ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; }
+.ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; }
+.ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; }
+.ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; }
+.ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; }
+.ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; }
+.ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; }
+.eb8  { background-color: #$P8; }
+.eb9  { background-color: #$P9; }
+.eb10 { background-color: #$P10; }
+.eb11 { background-color: #$P11; }
+.eb12 { background-color: #$P12; }
+.eb13 { background-color: #$P13; }
+.eb14 { background-color: #$P14; }
+.eb15 { background-color: #$P15; }
+"
+
+# The default xterm 256 colour palette
+for red in $(seq 0 5); do
+  for green in $(seq 0 5); do
+    for blue in $(seq 0 5); do
+        c=$((16 + ($red * 36) + ($green * 6) + $blue))
+        r=$((($red * 40 + 55) * ($red > 0)))
+        g=$((($green * 40 + 55) * ($green > 0)))
+        b=$((($blue * 40 + 55) * ($blue > 0)))
+        printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b
+        printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b
+    done
+  done
+done
+for gray in $(seq 0 23); do
+  c=$(($gray+232))
+  l=$(($gray*10 + 8))
+  printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l
+  printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l
+done
+
+echo -n '
+.f9 { color: '`[ "$dark_bg" ] && echo "#$P7;" || echo "#$P0;"`' }
+.b9 { background-color: #'`[ "$dark_bg" ] && echo $P0 || echo $P15`'; }
+.f9 > .bold,.bold > .f9, body.f9 > pre > .bold {
+  /* Bold is heavy black on white, or bright white
+     depending on the default background */
+  color: '`[ "$dark_bg" ] && echo "#$P15;" || echo "#$P0;"`'
+  font-weight: '`[ "$dark_bg" ] && echo 'normal;' || echo 'bold;'`'
+}
+.reverse {
+  /* CSS doesnt support swapping fg and bg colours unfortunately,
+     so just hardcode something that will look OK on all backgrounds. */
+  '"color: #$P0; background-color: #$P7;"'
+}
+.underline { text-decoration: underline; }
+.line-through { text-decoration: line-through; }
+.blink { text-decoration: blink; }
+
+</style>
+</head>
+
+<body class="f9 b9">
+<pre>
+'
+
+p='\x1b\['        #shortcut to match escape codes
+P="\(^[^°]*\)¡$p" #expression to match prepended codes below
+
+# Handle various xterm control sequences.
+# See /usr/share/doc/xterm-*/ctlseqs.txt
+sed "
+s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
+s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
+
+#handle carriage returns
+s#^.*\r\{1,\}\([^$]\)#\1#
+s#\r\$## # strip trailing \r
+
+# strip other non SGR escape sequences
+s#[\x07]##g
+s#\x1b[]>=\][0-9;]*##g
+s#\x1bP+.\{5\}##g
+s#${p}[0-9;?]*[^0-9;?m]##g
+
+#remove backspace chars and what they're backspacing over
+:rm_bs
+s#[^\x08]\x08##g; t rm_bs
+" |
+
+# Normalize the input before transformation
+sed "
+# escape HTML
+s#\&#\&amp;#g; s#>#\&gt;#g; s#<#\&lt;#g; s#\"#\&quot;#g
+
+# normalize SGR codes a little
+
+# split 256 colors out and mark so that they're not
+# recognised by the following 'split combined' line
+:e
+s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
+s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
+
+:c
+s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c   # split combined
+s#${p}0\([0-7]\)#${p}\1#g                                 #strip leading 0
+s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g                   #bold last (with clr)
+s#${p}m#${p}0m#g                                          #add leading 0 to norm
+
+# undo any 256 color marking
+s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
+
+# map 16 color codes to color + bold
+s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
+s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
+
+# change 'reset' code to a single char, and prepend a single char to
+# other codes so that we can easily do negative matching, as sed
+# does not support look behind expressions etc.
+s#°#\&deg;#g; s#${p}0m#°#g
+s#¡#\&iexcl;#g; s#${p}[0-9;]*m#¡&#g
+" |
+
+# Convert SGR sequences to HTML
+sed "
+:ansi_to_span # replace ANSI codes with CSS classes
+t ansi_to_span # hack so t commands below only apply to preceeding s cmd
+
+/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code
+
+# common combinations to minimise html (optional)
+s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1<span class=\"f\2 b\3\">#;t span_count
+s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1<span class=\"f\3 b\2\">#;t span_count
+
+s#${P}1m#\1<span class=\"bold\">#;                            t span_count
+s#${P}4m#\1<span class=\"underline\">#;                       t span_count
+s#${P}5m#\1<span class=\"blink\">#;                           t span_count
+s#${P}7m#\1<span class=\"reverse\">#;                         t span_count
+s#${P}9m#\1<span class=\"line-through\">#;                    t span_count
+s#${P}3\([0-9]\)m#\1<span class=\"f\2\">#;                    t span_count
+s#${P}4\([0-9]\)m#\1<span class=\"b\2\">#;                    t span_count
+
+s#${P}38;5;\([0-9]\{1,3\}\)m#\1<span class=\"ef\2\">#;        t span_count
+s#${P}48;5;\([0-9]\{1,3\}\)m#\1<span class=\"eb\2\">#;        t span_count
+
+s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes
+
+b # next line of input
+
+# add a corresponding span end flag
+:span_count
+x; s/^/s/; x
+b ansi_to_span
+
+# replace 'reset code' with correct number of </span> tags
+:span_end
+x
+/^s/ {
+  s/^.//
+  x
+  s#°#</span>°#
+  b span_end
+}
+x
+s#°##
+b ansi_to_span
+" |
+
+# Convert alternative character set
+# Note we convert here, as if we do at start we have to worry about avoiding
+# conversion of SGR codes etc., whereas doing here we only have to
+# avoid conversions of stuff between &...; or <...>
+#
+# Note we could use sed to do this based around:
+#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
+# However that would be very awkward as we need to only conv some input.
+# The basic scheme that we do in the python script below is:
+#  1. enable transliterate once ¡ char seen
+#  2. disable once µ char seen (may be on diff line to ¡)
+#  3. never transliterate between &; or <> chars
+sed "
+# change 'smacs' and 'rmacs' to a single char so that we can easily do
+# negative matching, as sed does not support look behind expressions etc.
+# Note we don't use ° like above as that's part of the alternate charset.
+s#\x1b(0#¡#g;
+s#µ#\&micro;#g; s#\x1b(B#µ#g
+" |
+(
+python -c "
+# vim:fileencoding=utf8
+
+import sys
+import locale
+encoding=locale.getpreferredencoding()
+
+old='abcdefghijklmnopqrstuvwxyz{}\`~'
+new='▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
+new=unicode(new, 'utf-8')
+table=range(128)
+for o,n in zip(old, new): table[ord(o)]=n
+
+(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)
+
+state = STANDARD
+last_mode = STANDARD
+for c in unicode(sys.stdin.read(), encoding):
+  if state == HTML_TAG:
+    if c == '>':
+      state = last_mode
+  elif state == HTML_ENTITY:
+    if c == ';':
+      state = last_mode
+  else:
+    if c == '<':
+      state = HTML_TAG
+    elif c == '&':
+      state = HTML_ENTITY
+    elif c == u'¡' and state == STANDARD:
+      state = ALTERNATIVE
+      last_mode = ALTERNATIVE
+      continue
+    elif c == u'µ' and state == ALTERNATIVE:
+      state = STANDARD
+      last_mode = STANDARD
+      continue
+    elif state == ALTERNATIVE:
+      c = c.translate(table)
+  sys.stdout.write(c.encode(encoding))
+" 2>/dev/null ||
+sed 's/[¡µ]//g' # just strip aternative flag chars
+)
+
+echo "</pre>
+</body>
+</html>"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sed.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,110 @@
+<tool id="unixtools_sed_tool" name="Text transformation" version="0.1.1">
+  <description>with sed</description>
+    <requirements>
+        <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
+    </requirements>
+  <command>
+      sed --sandbox -r $silent -f '$sed_script' '$input' &gt; '$output'
+  </command>
+  <inputs>
+    <param format="txt" name="input" type="data" label="File to process" />
+
+    <param name="url_paste" type="text" area="true" size="5x35" label="SED Program" help="">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+        </sanitizer>
+    </param>
+
+    <param name="silent" type="select"  label="operation mode" help="(Same as 'sed -n', leave at 'normal' unless you know what you're doing)" >
+        <option value="">normal</option>
+        <option value="-n">silent</option>
+    </param>
+
+  </inputs>
+    <configfiles>
+      <configfile name="sed_script">
+          $url_paste
+      </configfile>
+  </configfiles>
+  <tests>
+      <test>
+          <param name="input" value="unix_sed_input1.txt" />
+          <output name="output" file="unix_sed_output1.txt" />
+          <param name="url_paste"  value="1d ; s/foo/bar/" />
+          <param name="silent" value="" />
+      </test>
+      <test>
+          <param name="input" value="unix_sed_input1.txt" />
+          <output name="output" file="unix_sed_output2.txt" />
+          <param name="url_paste"  value="/foo/ { s/foo/baz/g ; p }" />
+          <param name="silent" value="silent" />
+      </test>
+  </tests>
+  <outputs>
+    <data format="input" name="output" metadata_source="input" />
+  </outputs> 
+<help>
+
+**What it does**
+
+This tool runs the unix **sed** command on the selected data file.
+
+.. class:: infomark
+
+**TIP:** This tool uses the **extended regular** expression syntax (same as running 'sed -r').
+
+
+
+**Further reading**
+
+- Short sed tutorial (http://www.linuxhowtos.org/System/sed_tutorial.htm)
+- Long sed tutorial (http://www.grymoire.com/Unix/Sed.html)
+- sed faq with good examples (http://sed.sourceforge.net/sedfaq.html)
+- sed cheat-sheet (http://www.catonmat.net/download/sed.stream.editor.cheat.sheet.pdf)
+- Collection of useful sed one-liners (http://student.northpark.edu/pemente/sed/sed1line.txt)
+
+-----
+
+**Sed commands**
+
+The most useful sed command is **s** (substitute).
+
+**Examples**
+
+- **s/hsa//**  will remove the first instance of 'hsa' in every line.
+- **s/hsa//g**  will remove all instances (beacuse of the **g**) of 'hsa' in every line.
+- **s/A{4,}/--&amp;--/g**  will find sequences of 4 or more consecutive A's, and once found, will surround them with two dashes from each side. The **&amp;** marker is a place holder for 'whatever matched the regular expression'.
+- **s/hsa-mir-([^ ]+)/short name: \\1 full name: &amp;/**  will find strings such as 'hsa-mir-43a' (the regular expression is 'hsa-mir-' followed by non-space characters) and will replace it will string such as 'short name: 43a full name: hsa-mir-43a'.  The **\\1** marker is a place holder for 'whatever matched the first parenthesis' (similar to perl's **$1**) .
+
+
+**sed's Regular Expression Syntax**
+
+The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. 
+
+- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
+- **^** matches the beginning of a string(but not an internal line).
+- **(** .. **)** groups a particular pattern.
+- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
+
+  - **{n}** The preceding item is matched exactly n times.
+  - **{n,}** The preceding item ismatched n or more times. 
+  - **{n,m}** The preceding item is matched at least n times but not more than m times. 
+
+- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
+- **.** Matches any single character except a newline.
+- ***** The preceding item will be matched zero or more times.
+- **?** The preceding item is optional and matched at most once.
+- **+** The preceding item will be matched one or more times.
+- **^** has two meaning:
+  - matches the beginning of a line or string. 
+  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
+- **$** matches the end of a line or string.
+- **\|** Separates alternate possibilities. 
+
+
+**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sort.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,137 @@
+<tool id="unixtools_sort_header_tool" name="Sort" version="0.1.1">
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+        <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
+    </requirements>
+    <command interpreter="sh">
+        #if int($header) > 0:
+            (sed -u '${header}'q &amp;&amp; sort $unique $ignore_case --stable -t '	'
+
+                #for $key in $sortkeys
+                '-k ${key.column}${key.order}${key.style},${key.column}'
+                #end for
+
+            ) &lt; '${infile}' &gt; '${outfile}'
+        #else:
+            (sort $unique $ignore_case --stable -t '	' 
+
+                #for $key in $sortkeys
+                '-k ${key.column}${key.order}${key.style},${key.column}'
+                #end for
+
+            ) &lt; '${infile}' &gt; '${outfile}'
+        #end if
+    </command>
+
+    <inputs>
+        <param format="txt" name="infile" type="data" label="Sort Query" />
+        <param name="header" type="integer" size="5" value="1" label="Number of header lines" help="These will be ignored during sort.">
+            <validator type="in_range" message="Negative values are not allowed." min="0"/>
+        </param>
+
+        <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue=""
+            label="Output unique values" help="Print only unique values (based on sorted key columns. See help section for details." />
+
+        <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." />
+
+        <repeat name="sortkeys" title="sort key">
+            <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
+            <param name="order" type="select" display="radio" label="in">
+                <option value="">Ascending order</option>
+                <option value="r">Descending order</option>
+            </param>
+            <param name="style" type="select" display="radio" label="Flavor">
+                <option value="n">Fast numeric sort ([-n])</option>
+                <option value="g">General numeric sort ( scientific notation [-g])</option>
+                <option value="V">Natural/Version sort ([-V]) </option>
+                <option value="">Alphabetical sort</option>
+                <option value="h">Human-readable numbers (-h)</option>
+                <option value="R">Random order</option>
+            </param>
+        </repeat>
+    </inputs>
+    <tests>
+    </tests>
+    <outputs>
+        <data format="input" name="outfile" metadata_source="infile"/>
+    </outputs>
+    <help>
+
+**What it does**
+
+This tool sorts an input file.
+
+-----
+
+**Sorting Styles**
+
+* **Fast Numeric**: sort by numeric values. Handles integer values (e.g. 43, 134) and decimal-point values (e.g. 3.14). *Does not* handle scientific notation (e.g. -2.32e2).
+* **General Numeric**: sort by numeric values. Handles all numeric notations (including scientific notation). Slower than *fast numeric*, so use only when necessary.
+* **Natural Sort**: Sort in 'natural' order (natural to humans, not to computers). See example below.
+* **Alphabetical sort**: Sort in strict alphabetical order. See example below.
+* **Human-readable numbers**: Sort human readble numbers (e.g. 1G > 2M > 3K > 400)
+* **Random order**: return lines in random order.
+
+------
+
+**Example - Header line**
+
+**Input file** (note first line is a header line, should not be sorted)::
+
+    Fruit   Color   Price
+    Banana  Yellow  4.1
+    Avocado Green   8.0
+    Apple   Red     3.0
+    Melon   Green   6.1
+
+**Sorting** by **numeric order** on column **3**, with **header**, will return::
+
+    Fruit   Color   Price
+    Apple   Red     3.0
+    Banana  Yellow  4.1
+    Melon   Green   6.1
+    Avocado Green   8.0
+
+
+-----
+
+**Example - Natural vs. Alphabetical sorting**
+
+Given the following list::
+
+    chr4
+    chr13
+    chr1
+    chr10
+    chr20
+    chr2
+
+**Alphabetical sort** would produce the following sorted list::
+
+    chr1
+    chr10
+    chr13
+    chr2
+    chr20
+    chr4
+
+**Natural Sort** would produce the following sorted list::
+
+    chr1
+    chr2
+    chr4
+    chr10
+    chr13
+    chr20
+
+
+.. class:: infomark
+
+If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**,  not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs.
+
+-----
+
+*sort-header* is was written by A. Gordon ( gordon at cshl dot edu )
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sort_rows.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,26 @@
+<tool id="sort_rows" name="Sort a row" version="0.0.1">
+  <description>according to their columns</description>
+  <command>python -c 'for line in ["\t".join(sorted(line.strip().split("\t"))) for line in open("$input").readlines() ]: print line' > $outfile</command>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Tabular file that should be sorted"/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="outfile" metadata_source="input"/>
+  </outputs>
+  <options sanitize="False"/>
+  <tests>
+
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+**What it does**
+
+That tool sorts each row in a TAB separated file, according to their columns. In other words: It is a sorted reordering of all columns.
+
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sorted_uniq.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,55 @@
+<tool id="unixtools_uniq_tool" name="Unique lines">
+    <description>from sorted file</description>
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    </requirements>
+    <command>
+        uniq 
+            -f 
+            $skipfields 
+            $count 
+            $repeated 
+            $ignorecase 
+            $uniqueonly 
+            $input 
+            
+            ## feature is not yet released, it will be in the next 8.22 version
+            ##--group=$group
+            &gt; $output
+    </command>
+
+    <inputs>
+        <param format="txt" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" />
+
+        <param name="count" type="boolean" label="count [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" />
+        <param name="repeated" type="boolean" label="repeated [-d]" help="Only print duplicate lines" truevalue="-d" falsevalue="" />
+        <param name="ignorecase" type="boolean" label="ignore case [-i]" help="Ignore differences in case when comparing" truevalue="-i" falsevalue="" />
+        <param name="uniqueonly" type="boolean" label="unique only [-u]" help="Only print unique lines" truevalue="-u" falsevalue="" />
+        <param name="skipfields" type="integer" label="skip fields [-f]" help="Avoid comparing the first N fields. (use zero to start from the first field)" size="2" value="0" />
+
+        <!--
+        <param name="group" type="select" label="Output all lines, and delimit each unique group.">
+            <option value="separate">Separate unique groups with a single delimiter</option>
+            <option value="prepend">Output a delimiter before each group of unique items</option>
+            <option value="append">Output a delimiter after each group of unique items.</option>
+            <option value="both">Output a delimiter around each group of unique items.</option>
+        </param>
+        -->
+    </inputs>
+
+    <outputs>
+        <data format="input" name="output" metadata_source="input"/>
+    </outputs>
+    <help>
+This tool takes a sorted file and look for lines that are unique.
+
+.. class:: warningmark
+
+Please make sure your file is sorted, or else this tool will give you an erroneous output.
+
+.. class:: infomark
+
+You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools".
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tail.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,25 @@
+<tool id="unitools_tail_tool" name="Select last" version="0.1.1">
+    <description>lines from a dataset (tail)</description>
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    </requirements>
+    <command>
+        tail --lines $count '$input1' &gt; '$output'
+    </command>
+
+    <inputs>
+        <param format="txt" name="input1" type="data" label="file to cut" />
+        <param name="count" type="integer" size="5"  value="10" label="Output last X lines" help="" />
+    </inputs>
+
+    <outputs>
+        <data format="input" name="output" metadata_source="input1"/>
+    </outputs>
+    <help>
+
+**What it does**
+
+This tool runs the **tail** unix command, which discards lines from the beginning of a file.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="gnu_coreutils" version="8.21">
+        <repository changeset_revision="83be2b421d3b" name="package_gnu_coreutils_8_21" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="gnu_awk" version="4.1.0">
+        <repository changeset_revision="196065d1785d" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="gnu_grep" version="2.14">
+        <repository changeset_revision="af98f72cd785" name="package_gnu_grep_2_14" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="gnu_sed" version="4.2.2-sandbox">
+        <repository changeset_revision="4a4691c78042" name="package_gnu_sed_4_2_2_sandbox" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <set_environment version="1.0">
+        <environment_variable action="set_to" name="UNIX_TOOLS_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR/scripts</environment_variable>
+    </set_environment>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unsorted_uniq.py	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,36 @@
+import sys
+import subprocess
+
+"""
+    We only need that file because galaxy do not understand the -t $'\t' term.
+    Otherwise that would be the right XML-only solution:
+    sort -u 
+        $ignore_case
+        $is_numeric
+        -t \$'\t'
+        #if $adv_opts.adv_opts_selector=="advanced":
+            -k$adv_opts.column_start,$adv_opts.column_end
+        #end if
+        -o $outfile
+        $input
+"""
+
+if sys.argv[1].strip() != 'false':
+    ignore_case = sys.argv[1]
+else:
+    ignore_case = ''
+
+if sys.argv[2].strip() != 'false':
+    is_numeric = sys.argv[2]
+else:
+    is_numeric = ''
+
+try:
+    col_start = sys.argv[3]
+    col_end = sys.argv[4]
+    com = "sort -u %s %s -t '	' -k%s,%s -o %s %s" % (is_numeric, ignore_case, col_start, col_end, sys.argv[5], sys.argv[6])
+except:
+    # no advanced options selected
+    com = "sort -u %s %s -t '	' -o %s %s" % (is_numeric, ignore_case, sys.argv[3], sys.argv[4])
+
+subprocess.call(com, shell=True)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unsorted_uniq.xml	Thu Sep 05 04:58:21 2013 -0400
@@ -0,0 +1,79 @@
+<tool id="unixtools_sorted_uniq" name="Unique" version="0.3">
+  <description>occurrences of each record</description>
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    </requirements>
+    <command interpreter='python'>
+        unique_lines.py 
+            $ignore_case 
+            $is_numeric
+            #if $adv_opts.adv_opts_selector=="advanced":
+                $adv_opts.column_start
+                $adv_opts.column_end
+            #end if
+            $outfile
+            $infile
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="tabular,text" label="from query" /> 
+        <param name="ignore_case" type="boolean" label="ignore differences in case when comparing (-f)" truevalue="-f" falsevalue="false" checked="false" help="ignore differences in case when comparing"/>
+        <param name="is_numeric" type="boolean" label="column only contains numeric values (-n)" truevalue="-n" falsevalue="false" checked="false" help="did the calumn have numeric values"/>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <param name="column_start" label="Column start" type="data_column" data_ref="infile" help="Unique on specific column range"/>
+                <param name="column_end" label="Column end" type="data_column" data_ref="infile" help="Unique on specific column range"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="infile" name="outfile" metadata_source="infile"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+
+ .. class:: infomark
+
+**Syntax**
+
+This tool returns all unique lines using the 'sort -u' command. It can be used with unsorted files. 
+If you need additional options, like grouping or counting your unique results, please use the 'Unique lines from sorted file' tool.
+
+-----
+
+.. class:: infomark
+
+The input file needs to be tab separated. Please convert your file if necessary.
+
+-----
+
+**Example**
+
+- Input file::
+     
+       chr1   10  100  gene1
+       chr1  105  200  gene2
+       chr1   10  100  gene1
+       chr2   10  100  gene4
+       chr2 1000 1900  gene5
+       chr3   15 1656  gene6
+       chr2   10  100  gene4
+
+- Unique lines will result in::
+
+       chr1   10  100  gene1
+       chr1  105  200  gene2
+       chr2   10  100  gene4
+       chr2 1000 1900  gene5
+       chr3   15 1656  gene6
+
+
+</help>
+</tool>