Mercurial > repos > bgruening > text_processing

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ansi2html.sh	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,331 @@
+#!/bin/sh
+
+# Convert ANSI (terminal) colours and attributes to HTML
+
+# Author:
+#    http://www.pixelbeat.org/docs/terminal_colours/
+# Examples:
+#    ls -l --color=always | ansi2html.sh > ls.html
+#    git show --color | ansi2html.sh > last_change.html
+#    Generally one can use the `script` util to capture full terminal output.
+# Changes:
+#    V0.1, 24 Apr 2008, Initial release
+#    V0.2, 01 Jan 2009, Phil Harnish <philharnish@gmail.com>
+#                         Support `git diff --color` output by
+#                         matching ANSI codes that specify only
+#                         bold or background colour.
+#                       P@draigBrady.com
+#                         Support `ls --color` output by stripping
+#                         redundant leading 0s from ANSI codes.
+#                         Support `grep --color=always` by stripping
+#                         unhandled ANSI codes (specifically ^[[K).
+#    V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/
+#                         Remove cat -v usage which mangled non ascii input.
+#                         Cleanup regular expressions used.
+#                         Support other attributes like reverse, ...
+#                       P@draigBrady.com
+#                         Correctly nest <span> tags (even across lines).
+#                         Add a command line option to use a dark background.
+#                         Strip more terminal control codes.
+#    V0.4, 17 Sep 2009, P@draigBrady.com
+#                         Handle codes with combined attributes and color.
+#                         Handle isolated <bold> attributes with css.
+#                         Strip more terminal control codes.
+#    V0.12, 12 Jul 2011
+#      http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh
+
+if [ "$1" = "--version" ]; then
+    echo "0.12" && exit
+fi
+
+if [ "$1" = "--help" ]; then
+    echo "This utility converts ANSI codes in data passed to stdin" >&2
+    echo "It has 2 optional parameters:" >&2
+    echo "   --bg=dark --palette=linux|solarized|tango|xterm" >&2
+    echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2
+    exit
+fi
+
+[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
+
+if [ "$1" = "--palette=solarized" ]; then
+   # See http://ethanschoonover.com/solarized
+   P0=073642;  P1=D30102;  P2=859900;  P3=B58900;
+   P4=268BD2;  P5=D33682;  P6=2AA198;  P7=EEE8D5;
+   P8=002B36;  P9=CB4B16; P10=586E75; P11=657B83;
+  P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3;
+  shift;
+elif [ "$1" = "--palette=solarized-xterm" ]; then
+   # Above mapped onto the xterm 256 color palette
+   P0=262626;  P1=AF0000;  P2=5F8700;  P3=AF8700;
+   P4=0087FF;  P5=AF005F;  P6=00AFAF;  P7=E4E4E4;
+   P8=1C1C1C;  P9=D75F00; P10=585858; P11=626262;
+  P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7;
+  shift;
+elif [ "$1" = "--palette=tango" ]; then
+   # Gnome default
+   P0=000000;  P1=CC0000;  P2=4E9A06;  P3=C4A000;
+   P4=3465A4;  P5=75507B;  P6=06989A;  P7=D3D7CF;
+   P8=555753;  P9=EF2929; P10=8AE234; P11=FCE94F;
+  P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC;
+  shift;
+elif [ "$1" = "--palette=xterm" ]; then
+   P0=000000;  P1=CD0000;  P2=00CD00;  P3=CDCD00;
+   P4=0000EE;  P5=CD00CD;  P6=00CDCD;  P7=E5E5E5;
+   P8=7F7F7F;  P9=FF0000; P10=00FF00; P11=FFFF00;
+  P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF;
+  shift;
+else # linux console
+   P0=000000;  P1=AA0000;  P2=00AA00;  P3=AA5500;
+   P4=0000AA;  P5=AA00AA;  P6=00AAAA;  P7=AAAAAA;
+   P8=555555;  P9=FF5555; P10=55FF55; P11=FFFF55;
+  P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF;
+  [ "$1" = "--palette=linux" ] && shift
+fi
+
+[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
+
+echo -n "<html>
+<head>
+<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>
+<style type=\"text/css\">
+.ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; }
+.ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; }
+.ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; }
+.ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; }
+.ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; }
+.ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; }
+.ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; }
+.ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; }
+.ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; }
+.ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; }
+.ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; }
+.ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; }
+.ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; }
+.ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; }
+.ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; }
+.ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; }
+.eb8  { background-color: #$P8; }
+.eb9  { background-color: #$P9; }
+.eb10 { background-color: #$P10; }
+.eb11 { background-color: #$P11; }
+.eb12 { background-color: #$P12; }
+.eb13 { background-color: #$P13; }
+.eb14 { background-color: #$P14; }
+.eb15 { background-color: #$P15; }
+"
+
+# The default xterm 256 colour palette
+for red in $(seq 0 5); do
+  for green in $(seq 0 5); do
+    for blue in $(seq 0 5); do
+        c=$((16 + ($red * 36) + ($green * 6) + $blue))
+        r=$((($red * 40 + 55) * ($red > 0)))
+        g=$((($green * 40 + 55) * ($green > 0)))
+        b=$((($blue * 40 + 55) * ($blue > 0)))
+        printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b
+        printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b
+    done
+  done
+done
+for gray in $(seq 0 23); do
+  c=$(($gray+232))
+  l=$(($gray*10 + 8))
+  printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l
+  printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l
+done
+
+echo -n '
+.f9 { color: '`[ "$dark_bg" ] && echo "#$P7;" || echo "#$P0;"`' }
+.b9 { background-color: #'`[ "$dark_bg" ] && echo $P0 || echo $P15`'; }
+.f9 > .bold,.bold > .f9, body.f9 > pre > .bold {
+  /* Bold is heavy black on white, or bright white
+     depending on the default background */
+  color: '`[ "$dark_bg" ] && echo "#$P15;" || echo "#$P0;"`'
+  font-weight: '`[ "$dark_bg" ] && echo 'normal;' || echo 'bold;'`'
+}
+.reverse {
+  /* CSS doesnt support swapping fg and bg colours unfortunately,
+     so just hardcode something that will look OK on all backgrounds. */
+  '"color: #$P0; background-color: #$P7;"'
+}
+.underline { text-decoration: underline; }
+.line-through { text-decoration: line-through; }
+.blink { text-decoration: blink; }
+
+</style>
+</head>
+
+<body class="f9 b9">
+<pre>
+'
+
+p='\x1b\['        #shortcut to match escape codes
+P="\(^[^°]*\)¡$p" #expression to match prepended codes below
+
+# Handle various xterm control sequences.
+# See /usr/share/doc/xterm-*/ctlseqs.txt
+sed "
+s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
+s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
+
+#handle carriage returns
+s#^.*\r\{1,\}\([^$]\)#\1#
+s#\r\$## # strip trailing \r
+
+# strip other non SGR escape sequences
+s#[\x07]##g
+s#\x1b[]>=\][0-9;]*##g
+s#\x1bP+.\{5\}##g
+s#${p}[0-9;?]*[^0-9;?m]##g
+
+#remove backspace chars and what they're backspacing over
+:rm_bs
+s#[^\x08]\x08##g; t rm_bs
+" |
+
+# Normalize the input before transformation
+sed "
+# escape HTML
+s#\&#\&amp;#g; s#>#\&gt;#g; s#<#\&lt;#g; s#\"#\&quot;#g
+
+# normalize SGR codes a little
+
+# split 256 colors out and mark so that they're not
+# recognised by the following 'split combined' line
+:e
+s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
+s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
+
+:c
+s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c   # split combined
+s#${p}0\([0-7]\)#${p}\1#g                                 #strip leading 0
+s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g                   #bold last (with clr)
+s#${p}m#${p}0m#g                                          #add leading 0 to norm
+
+# undo any 256 color marking
+s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
+
+# map 16 color codes to color + bold
+s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
+s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
+
+# change 'reset' code to a single char, and prepend a single char to
+# other codes so that we can easily do negative matching, as sed
+# does not support look behind expressions etc.
+s#°#\&deg;#g; s#${p}0m#°#g
+s#¡#\&iexcl;#g; s#${p}[0-9;]*m#¡&#g
+" |
+
+# Convert SGR sequences to HTML
+sed "
+:ansi_to_span # replace ANSI codes with CSS classes
+t ansi_to_span # hack so t commands below only apply to preceeding s cmd
+
+/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code
+
+# common combinations to minimise html (optional)
+s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1<span class=\"f\2 b\3\">#;t span_count
+s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1<span class=\"f\3 b\2\">#;t span_count
+
+s#${P}1m#\1<span class=\"bold\">#;                            t span_count
+s#${P}4m#\1<span class=\"underline\">#;                       t span_count
+s#${P}5m#\1<span class=\"blink\">#;                           t span_count
+s#${P}7m#\1<span class=\"reverse\">#;                         t span_count
+s#${P}9m#\1<span class=\"line-through\">#;                    t span_count
+s#${P}3\([0-9]\)m#\1<span class=\"f\2\">#;                    t span_count
+s#${P}4\([0-9]\)m#\1<span class=\"b\2\">#;                    t span_count
+
+s#${P}38;5;\([0-9]\{1,3\}\)m#\1<span class=\"ef\2\">#;        t span_count
+s#${P}48;5;\([0-9]\{1,3\}\)m#\1<span class=\"eb\2\">#;        t span_count
+
+s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes
+
+b # next line of input
+
+# add a corresponding span end flag
+:span_count
+x; s/^/s/; x
+b ansi_to_span
+
+# replace 'reset code' with correct number of </span> tags
+:span_end
+x
+/^s/ {
+  s/^.//
+  x
+  s#°#</span>°#
+  b span_end
+}
+x
+s#°##
+b ansi_to_span
+" |
+
+# Convert alternative character set
+# Note we convert here, as if we do at start we have to worry about avoiding
+# conversion of SGR codes etc., whereas doing here we only have to
+# avoid conversions of stuff between &...; or <...>
+#
+# Note we could use sed to do this based around:
+#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
+# However that would be very awkward as we need to only conv some input.
+# The basic scheme that we do in the python script below is:
+#  1. enable transliterate once ¡ char seen
+#  2. disable once µ char seen (may be on diff line to ¡)
+#  3. never transliterate between &; or <> chars
+sed "
+# change 'smacs' and 'rmacs' to a single char so that we can easily do
+# negative matching, as sed does not support look behind expressions etc.
+# Note we don't use ° like above as that's part of the alternate charset.
+s#\x1b(0#¡#g;
+s#µ#\&micro;#g; s#\x1b(B#µ#g
+" |
+(
+python -c "
+# vim:fileencoding=utf8
+
+import sys
+import locale
+encoding=locale.getpreferredencoding()
+
+old='abcdefghijklmnopqrstuvwxyz{}\`~'
+new='▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
+new=unicode(new, 'utf-8')
+table=range(128)
+for o,n in zip(old, new): table[ord(o)]=n
+
+(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)
+
+state = STANDARD
+last_mode = STANDARD
+for c in unicode(sys.stdin.read(), encoding):
+  if state == HTML_TAG:
+    if c == '>':
+      state = last_mode
+  elif state == HTML_ENTITY:
+    if c == ';':
+      state = last_mode
+  else:
+    if c == '<':
+      state = HTML_TAG
+    elif c == '&':
+      state = HTML_ENTITY
+    elif c == u'¡' and state == STANDARD:
+      state = ALTERNATIVE
+      last_mode = ALTERNATIVE
+      continue
+    elif c == u'µ' and state == ALTERNATIVE:
+      state = STANDARD
+      last_mode = STANDARD
+      continue
+    elif state == ALTERNATIVE:
+      c = c.translate(table)
+  sys.stdout.write(c.encode(encoding))
+" 2>/dev/null ||
+sed 's/[¡µ]//g' # just strip aternative flag chars
+)
+
+echo "</pre>
+</body>
+</html>"
--- a/awk.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/awk.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_awk_tool" name="Text reformatting" version="0.1.1">
+<tool id="tp_awk_tool" name="Text reformatting" version="0.1.1">
     <description>with awk</description>
     <requirements>
         <requirement type="package" version="4.1.0">gnu_awk</requirement>
--- a/cut.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/cut.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_cut_tool" name="Cut" version="0.1.1">
+<tool id="tp_cut_tool" name="Cut" version="0.1.1">
     <description>columns from a table</description>
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>
--- a/easyjoin.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/easyjoin.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_easyjoin_tool" name="Join" version="0.1.1">
+<tool id="tp_easyjoin_tool" name="Join" version="0.1.1">
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>
     </requirements>
--- a/find_and_replace.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/find_and_replace.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="cshl_find_and_replace" name="Replace" version="0.1.1">
+<tool id="tp_find_and_replace" name="Replace" version="0.1.1">
     <description>parts of text</description>
     <command interpreter="perl">
         find_and_replace
--- a/grep.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/grep.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,13 +1,13 @@
-<tool id="unixtools_grep_tool" name="Search in textfiles" version="0.1.1">
+<tool id="tp_grep_tool" name="Search in textfiles" version="0.1.1">
     <description>(grep)</description>
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>
         <requirement type="package" version="2.14">gnu_grep</requirement>
-        <requirement type="set_environment">UNIX_TOOLS_SCRIPT_PATH</requirement>
+        <requirement type="set_environment">TP_SCRIPT_PATH</requirement>
     </requirements>
     <command>
         #if str($color) == "COLOR":
-            GREP_COLOR='1;34' grep --color=always -P "$@" -- "${url_paste}" '${input}' | \$UNIX_TOOLS_SCRIPT_PATH/ansi2html.sh > "${output}"
+            GREP_COLOR='1;34' grep --color=always -P "$@" -- "${url_paste}" '${input}' | \$TP_SCRIPT_PATH/ansi2html.sh > "${output}"
         #else:
             grep -P "$@" -- "${url_paste}" '${input}' | grep -v "^--$" > "${output}"
         #end if
--- a/head.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/head.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_head_tool" name="Select first" version="0.1.1">
+<tool id="tp_head_tool" name="Select first" version="0.1.1">
     <description>lines from a dataset (head)</description>
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>
--- a/multijoin.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/multijoin.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_multijoin'_tool" name="Multi-Join" version="0.1.1">
+<tool id="tp_multijoin'_tool" name="Multi-Join" version="0.1.1">
   <description>(combine multiple files)</description>
   <command interpreter="perl">multijoin
         --key '$key_column'
--- a/readme.rst	Thu Sep 05 12:42:48 2013 -0400
+++ b/readme.rst	Sun Oct 06 08:22:36 2013 -0400
@@ -1,17 +1,18 @@
-These are Galaxy wrappers for common unix text-processing tools
-===============================================================
+Galaxy wrappers for common unix text-processing tools
+=====================================================

 The initial work was done by Assaf Gordon and Greg Hannon's lab ( http://hannonlab.cshl.edu )
 in Cold Spring Harbor Laboratory ( http://www.cshl.edu ).


-The tools are:
+Tools:

 * awk - The AWK programmning language ( http://www.gnu.org/software/gawk/ )
 * sed - Stream Editor ( http://sed.sf.net )
 * grep - Search files ( http://www.gnu.org/software/grep/ )
 * sort_columns - Sorting every line according to there columns
 * GNU Coreutils programs ( http://www.gnu.org/software/coreutils/ ):
+
   * sort - sort files
   * join - join two files, based on common key field.
   * cut  - keep/discard fields from a file
@@ -37,7 +38,7 @@
 3. SED version 4.2 *with* a special patch
 4. Grep with PCRE support

-These will be installed automatically with the Galaxy Tool Shed.
+These will be installed automatically with the Galaxy `Tool Shed`_.


 -------------------
@@ -50,22 +51,29 @@
 These commands are DISABLED using the "--sandbox" parameter to awk and sed.

 User trying to run an awk program similar to:
+
  BEGIN { system("ls") }
+
 Will get an error (in Galaxy) saying:
+
  fatal: 'system' function not allowed in sandbox mode.

 User trying to run a SED program similar to:
+
  1els
+
 will get an error (in Galaxy) saying:
+
  sed: -e expression #1, char 2: e/r/w commands disabled in sandbox mode

+
 That being said, if you do find some vulnerability in these tools, please let me know and I'll try fix them.

 ------------
 Installation
 ------------

-Should be done with the Galaxy `Tool Shed`_.
+Should be done via the Galaxy `Tool Shed`_.

 .. _`Tool Shed`: http://wiki.galaxyproject.org/Tool%20Shed

@@ -84,6 +92,30 @@
 - evaluate the join wrappers against the Galaxy ones, maybe we should drop them


+-------
+License
+-------
+
+* Copyright (c) 2009-2013   A. Gordon  (gordon <at> cshl dot edu)
+* Copyright (c) 2013   B. Gruening  (bjoern dot gruening <at> gmail dot com)


+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:

+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/remove_ending.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,46 @@
+<tool id="tp_remove_ending" name="Remove ending" version="0.1">
+    <description>of a file</description>
+    <requirements>
+        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    </requirements>
+    <command interpreter="sh">tail -n -$num_lines $infile $outfile</command>
+    <inputs>
+        <param name="num_lines" size="5" type="integer" value="1" label="Remove last n lines" help=""/>
+        <param format="txt" name="input" type="data" label="from"/>
+    </inputs>
+    <tests>
+        <test>
+            <param name="infile" value="remove_ending_input1.txt" />
+            <output name="out_file1" file="remove_ending_output1.txt" />
+            <param name="num_lines" value="2" />
+        </test>
+    </tests>
+    <outputs>
+        <data format="input" name="outfile" metadata_source="input"/>
+    </outputs>
+    <help>
+
+**What it does**
+
+This tool removes specified number of lines from the ending of a dataset
+
+-----
+
+**Example**
+
+Input File::
+
+    chr7  56632  56652   D17003_CTCF_R6  310  +
+    chr7  56736  56756   D17003_CTCF_R7  354  +
+    chr7  56761  56781   D17003_CTCF_R4  220  +
+    chr7  56772  56792   D17003_CTCF_R7  372  +
+    chr7  56775  56795   D17003_CTCF_R4  207  +
+
+After removing the last 2 lines the dataset will look like this::
+
+    chr7  56632  56652   D17003_CTCF_R6  310  +
+    chr7  56736  56756   D17003_CTCF_R7  354  +
+    chr7  56761  56781   D17003_CTCF_R4  220  +
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_text_in_column.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,128 @@
+<tool id="tp_replace_in_column" name="Replace Text" version="0.1">
+    <description>in a specific column</description>
+    <requirements>
+        <requirement type="package" version="4.1.0">gnu_awk</requirement>
+    </requirements>
+    <command interpreter="sh">
+        #adapt to awk's quirks - to pass an acutal backslash - two backslashes are required (just like in a C string)
+        REPLACE_PATTERN=\${$replace_pattern//\\/\\\\};
+        awk -v OFS="\t" --re-interval --sandbox "{ \$$column = gensub( /$find_pattern/, \"$replace_pattern\", \"g\", \$$column ) ; print \$0 ; }" "$input" &gt; "$output"
+    </command>
+    <inputs>
+        <param format="tabular" name="input" type="data" label="File to process" />
+        <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" />
+
+        <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+        <param name="replace_pattern" type="text" size="20" label="Replace with" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+    </inputs>
+    <tests>
+          <test>
+              <param name="input" value="replace_text_in_column_in1.txt" ftype="tabular" />
+              <output name="output" file="replace_text_in_column_output1.txt" />
+              <param name="column"  value="4" />
+              <param name="url_paste"  value=".+_(R.)" />
+              <param name="file_data"  value="\1" />
+        </test>
+    </tests>
+    <outputs>
+        <data format="input" name="output" metadata_source="input" />
+    </outputs>
+    <help>
+
+**What it does**
+
+This tool performs find &amp; replace operation on a specified column in a given file.
+
+.. class:: infomark
+
+The **pattern to find** uses the **extended regular** expression syntax (same as running 'awk --re-interval').
+
+.. class:: infomark
+
+**TIP:** If you need more complex patterns, use the *awk* tool.
+
+-----
+
+
+**Examples of Find Patterns**
+
+- **HELLO**     The word 'HELLO' (case sensitive).
+- **AG.T**      The letters A,G followed by any single character, followed by the letter T.
+- **A{4,}**     Four or more consecutive A's.
+- **chr2[012]\\t**       The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
+- **hsa-mir-([^ ]+)**        The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
+
+
+**Examples of Replace Patterns**
+
+- **WORLD**  The word 'WORLD' will be placed whereever the find pattern was found.
+- **FOO-&amp;-BAR**  Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&amp;** (ampersand) represents the matched find pattern.
+- **\\1**   The text which matched the first parenthesis in the Find Pattern.
+
+
+
+
+-----
+
+**Example 1**
+
+**Find Pattern:** HELLO
+**Replace Pattern:** WORLD
+
+Every time the word HELLO is found, it will be replaced with the word WORLD. This operation affects only the selected column.
+
+-----
+
+**Example 2**
+
+**Find Pattern:** ^(.{4})
+**Replace Pattern:** &amp;\\t
+
+Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns. This operation affects only the selected column.
+
+
+-----
+
+**Extened Regular Expression Syntax**
+
+The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
+
+- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
+- **^** matches the beginning of a string(but not an internal line).
+- **(** .. **)** groups a particular pattern.
+- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
+
+  - **{n}** The preceding item is matched exactly n times.
+  - **{n,}** The preceding item ismatched n or more times.
+  - **{n,m}** The preceding item is matched at least n times but not more than m times.
+
+- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
+- **.** Matches any single character except a newline.
+- ***** The preceding item will be matched zero or more times.
+- **?** The preceding item is optional and matched at most once.
+- **+** The preceding item will be matched one or more times.
+- **^** has two meaning:
+  - matches the beginning of a line or string.
+  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
+- **$** matches the end of a line or string.
+- **\|** Separates alternate possibilities.
+
+
+**Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_text_in_line.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,128 @@
+<tool id="tp_replace_in_line" name="Replace Text" version="0.1">
+    <description>in entire line</description>
+    <requirements>
+        <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
+    </requirements>
+
+    <command interpreter="sh">
+        sed -r --sandbox "s/$find_pattern/$replace_pattern/g" "$input" &gt; "$output"
+    </command>
+
+    <inputs>
+        <param format="txt" name="input" type="data" label="File to process" />
+
+         <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+         <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+            </sanitizer>
+        </param>
+
+    </inputs>
+    <tests>
+        <test>
+            <param name="input" value="replace_text_in_line_in1.txt" ftype="tabular" />
+            <output name="output" file="replace_text_in_line_output1.txt" />
+            <param name="url_paste"  value="CTC." />
+            <param name="file_data"  value="FOOBAR" />
+        </test>
+    </tests>
+    <outputs>
+        <data format="input" name="output" metadata_source="input"/>
+    </outputs>
+    <help>
+
+**What it does**
+
+This tool performs find &amp; replace operation on a specified file.
+
+.. class:: infomark
+
+The **pattern to find** uses the **extended regular** expression syntax (same as running 'sed -r').
+
+.. class:: infomark
+
+**TIP:** If you need more complex patterns, use the *sed* tool.
+
+-----
+
+
+**Examples of Find Patterns**
+
+- **HELLO**     The word 'HELLO' (case sensitive).
+- **AG.T**      The letters A,G followed by any single character, followed by the letter T.
+- **A{4,}**     Four or more consecutive A's.
+- **chr2[012]\\t**       The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
+- **hsa-mir-([^ ]+)**        The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
+
+
+
+**Examples of Replace Patterns**
+
+- **WORLD**  The word 'WORLD' will be placed whereever the find pattern was found.
+- **FOO-&amp;-BAR**  Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&amp;** (ampersand) represents the matched find pattern.
+- **\\1**   The text which matched the first parenthesis in the Find Pattern.
+
+
+
+
+-----
+
+**Example 1**
+
+**Find Pattern:** HELLO
+**Replace Pattern:** WORLD
+
+Every time the word HELLO is found, it will be replaced with the word WORLD.
+
+
+-----
+
+**Example 2**
+
+**Find Pattern:** ^(.{4})
+**Replace Pattern:** &amp;\\t
+
+Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns.
+
+
+-----
+
+**Extened Regular Expression Syntax**
+
+The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
+
+- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
+- **^** matches the beginning of a string(but not an internal line).
+- **(** .. **)** groups a particular pattern.
+- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
+
+  - **{n}** The preceding item is matched exactly n times.
+  - **{n,}** The preceding item ismatched n or more times.
+  - **{n,m}** The preceding item is matched at least n times but not more than m times.
+
+- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
+- **.** Matches any single character except a newline.
+- ***** The preceding item will be matched zero or more times.
+- **?** The preceding item is optional and matched at most once.
+- **+** The preceding item will be matched one or more times.
+- **^** has two meaning:
+  - matches the beginning of a line or string.
+  - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
+- **$** matches the end of a line or string.
+- **\|** Separates alternate possibilities.
+
+
+**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported.
+
+    </help>
+</tool>
--- a/scripts/ansi2html.sh	Thu Sep 05 12:42:48 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,331 +0,0 @@
-#!/bin/sh
-
-# Convert ANSI (terminal) colours and attributes to HTML
-
-# Author:
-#    http://www.pixelbeat.org/docs/terminal_colours/
-# Examples:
-#    ls -l --color=always | ansi2html.sh > ls.html
-#    git show --color | ansi2html.sh > last_change.html
-#    Generally one can use the `script` util to capture full terminal output.
-# Changes:
-#    V0.1, 24 Apr 2008, Initial release
-#    V0.2, 01 Jan 2009, Phil Harnish <philharnish@gmail.com>
-#                         Support `git diff --color` output by
-#                         matching ANSI codes that specify only
-#                         bold or background colour.
-#                       P@draigBrady.com
-#                         Support `ls --color` output by stripping
-#                         redundant leading 0s from ANSI codes.
-#                         Support `grep --color=always` by stripping
-#                         unhandled ANSI codes (specifically ^[[K).
-#    V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/
-#                         Remove cat -v usage which mangled non ascii input.
-#                         Cleanup regular expressions used.
-#                         Support other attributes like reverse, ...
-#                       P@draigBrady.com
-#                         Correctly nest <span> tags (even across lines).
-#                         Add a command line option to use a dark background.
-#                         Strip more terminal control codes.
-#    V0.4, 17 Sep 2009, P@draigBrady.com
-#                         Handle codes with combined attributes and color.
-#                         Handle isolated <bold> attributes with css.
-#                         Strip more terminal control codes.
-#    V0.12, 12 Jul 2011
-#      http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh
-
-if [ "$1" = "--version" ]; then
-    echo "0.12" && exit
-fi
-
-if [ "$1" = "--help" ]; then
-    echo "This utility converts ANSI codes in data passed to stdin" >&2
-    echo "It has 2 optional parameters:" >&2
-    echo "   --bg=dark --palette=linux|solarized|tango|xterm" >&2
-    echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2
-    exit
-fi
-
-[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
-
-if [ "$1" = "--palette=solarized" ]; then
-   # See http://ethanschoonover.com/solarized
-   P0=073642;  P1=D30102;  P2=859900;  P3=B58900;
-   P4=268BD2;  P5=D33682;  P6=2AA198;  P7=EEE8D5;
-   P8=002B36;  P9=CB4B16; P10=586E75; P11=657B83;
-  P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3;
-  shift;
-elif [ "$1" = "--palette=solarized-xterm" ]; then
-   # Above mapped onto the xterm 256 color palette
-   P0=262626;  P1=AF0000;  P2=5F8700;  P3=AF8700;
-   P4=0087FF;  P5=AF005F;  P6=00AFAF;  P7=E4E4E4;
-   P8=1C1C1C;  P9=D75F00; P10=585858; P11=626262;
-  P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7;
-  shift;
-elif [ "$1" = "--palette=tango" ]; then
-   # Gnome default
-   P0=000000;  P1=CC0000;  P2=4E9A06;  P3=C4A000;
-   P4=3465A4;  P5=75507B;  P6=06989A;  P7=D3D7CF;
-   P8=555753;  P9=EF2929; P10=8AE234; P11=FCE94F;
-  P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC;
-  shift;
-elif [ "$1" = "--palette=xterm" ]; then
-   P0=000000;  P1=CD0000;  P2=00CD00;  P3=CDCD00;
-   P4=0000EE;  P5=CD00CD;  P6=00CDCD;  P7=E5E5E5;
-   P8=7F7F7F;  P9=FF0000; P10=00FF00; P11=FFFF00;
-  P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF;
-  shift;
-else # linux console
-   P0=000000;  P1=AA0000;  P2=00AA00;  P3=AA5500;
-   P4=0000AA;  P5=AA00AA;  P6=00AAAA;  P7=AAAAAA;
-   P8=555555;  P9=FF5555; P10=55FF55; P11=FFFF55;
-  P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF;
-  [ "$1" = "--palette=linux" ] && shift
-fi
-
-[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
-
-echo -n "<html>
-<head>
-<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>
-<style type=\"text/css\">
-.ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; }
-.ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; }
-.ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; }
-.ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; }
-.ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; }
-.ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; }
-.ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; }
-.ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; }
-.ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; }
-.ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; }
-.ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; }
-.ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; }
-.ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; }
-.ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; }
-.ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; }
-.ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; }
-.eb8  { background-color: #$P8; }
-.eb9  { background-color: #$P9; }
-.eb10 { background-color: #$P10; }
-.eb11 { background-color: #$P11; }
-.eb12 { background-color: #$P12; }
-.eb13 { background-color: #$P13; }
-.eb14 { background-color: #$P14; }
-.eb15 { background-color: #$P15; }
-"
-
-# The default xterm 256 colour palette
-for red in $(seq 0 5); do
-  for green in $(seq 0 5); do
-    for blue in $(seq 0 5); do
-        c=$((16 + ($red * 36) + ($green * 6) + $blue))
-        r=$((($red * 40 + 55) * ($red > 0)))
-        g=$((($green * 40 + 55) * ($green > 0)))
-        b=$((($blue * 40 + 55) * ($blue > 0)))
-        printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b
-        printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b
-    done
-  done
-done
-for gray in $(seq 0 23); do
-  c=$(($gray+232))
-  l=$(($gray*10 + 8))
-  printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l
-  printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l
-done
-
-echo -n '
-.f9 { color: '`[ "$dark_bg" ] && echo "#$P7;" || echo "#$P0;"`' }
-.b9 { background-color: #'`[ "$dark_bg" ] && echo $P0 || echo $P15`'; }
-.f9 > .bold,.bold > .f9, body.f9 > pre > .bold {
-  /* Bold is heavy black on white, or bright white
-     depending on the default background */
-  color: '`[ "$dark_bg" ] && echo "#$P15;" || echo "#$P0;"`'
-  font-weight: '`[ "$dark_bg" ] && echo 'normal;' || echo 'bold;'`'
-}
-.reverse {
-  /* CSS doesnt support swapping fg and bg colours unfortunately,
-     so just hardcode something that will look OK on all backgrounds. */
-  '"color: #$P0; background-color: #$P7;"'
-}
-.underline { text-decoration: underline; }
-.line-through { text-decoration: line-through; }
-.blink { text-decoration: blink; }
-
-</style>
-</head>
-
-<body class="f9 b9">
-<pre>
-'
-
-p='\x1b\['        #shortcut to match escape codes
-P="\(^[^°]*\)¡$p" #expression to match prepended codes below
-
-# Handle various xterm control sequences.
-# See /usr/share/doc/xterm-*/ctlseqs.txt
-sed "
-s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
-s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
-
-#handle carriage returns
-s#^.*\r\{1,\}\([^$]\)#\1#
-s#\r\$## # strip trailing \r
-
-# strip other non SGR escape sequences
-s#[\x07]##g
-s#\x1b[]>=\][0-9;]*##g
-s#\x1bP+.\{5\}##g
-s#${p}[0-9;?]*[^0-9;?m]##g
-
-#remove backspace chars and what they're backspacing over
-:rm_bs
-s#[^\x08]\x08##g; t rm_bs
-" |
-
-# Normalize the input before transformation
-sed "
-# escape HTML
-s#\&#\&amp;#g; s#>#\&gt;#g; s#<#\&lt;#g; s#\"#\&quot;#g
-
-# normalize SGR codes a little
-
-# split 256 colors out and mark so that they're not
-# recognised by the following 'split combined' line
-:e
-s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
-s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
-
-:c
-s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c   # split combined
-s#${p}0\([0-7]\)#${p}\1#g                                 #strip leading 0
-s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g                   #bold last (with clr)
-s#${p}m#${p}0m#g                                          #add leading 0 to norm
-
-# undo any 256 color marking
-s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
-
-# map 16 color codes to color + bold
-s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
-s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
-
-# change 'reset' code to a single char, and prepend a single char to
-# other codes so that we can easily do negative matching, as sed
-# does not support look behind expressions etc.
-s#°#\&deg;#g; s#${p}0m#°#g
-s#¡#\&iexcl;#g; s#${p}[0-9;]*m#¡&#g
-" |
-
-# Convert SGR sequences to HTML
-sed "
-:ansi_to_span # replace ANSI codes with CSS classes
-t ansi_to_span # hack so t commands below only apply to preceeding s cmd
-
-/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code
-
-# common combinations to minimise html (optional)
-s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1<span class=\"f\2 b\3\">#;t span_count
-s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1<span class=\"f\3 b\2\">#;t span_count
-
-s#${P}1m#\1<span class=\"bold\">#;                            t span_count
-s#${P}4m#\1<span class=\"underline\">#;                       t span_count
-s#${P}5m#\1<span class=\"blink\">#;                           t span_count
-s#${P}7m#\1<span class=\"reverse\">#;                         t span_count
-s#${P}9m#\1<span class=\"line-through\">#;                    t span_count
-s#${P}3\([0-9]\)m#\1<span class=\"f\2\">#;                    t span_count
-s#${P}4\([0-9]\)m#\1<span class=\"b\2\">#;                    t span_count
-
-s#${P}38;5;\([0-9]\{1,3\}\)m#\1<span class=\"ef\2\">#;        t span_count
-s#${P}48;5;\([0-9]\{1,3\}\)m#\1<span class=\"eb\2\">#;        t span_count
-
-s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes
-
-b # next line of input
-
-# add a corresponding span end flag
-:span_count
-x; s/^/s/; x
-b ansi_to_span
-
-# replace 'reset code' with correct number of </span> tags
-:span_end
-x
-/^s/ {
-  s/^.//
-  x
-  s#°#</span>°#
-  b span_end
-}
-x
-s#°##
-b ansi_to_span
-" |
-
-# Convert alternative character set
-# Note we convert here, as if we do at start we have to worry about avoiding
-# conversion of SGR codes etc., whereas doing here we only have to
-# avoid conversions of stuff between &...; or <...>
-#
-# Note we could use sed to do this based around:
-#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
-# However that would be very awkward as we need to only conv some input.
-# The basic scheme that we do in the python script below is:
-#  1. enable transliterate once ¡ char seen
-#  2. disable once µ char seen (may be on diff line to ¡)
-#  3. never transliterate between &; or <> chars
-sed "
-# change 'smacs' and 'rmacs' to a single char so that we can easily do
-# negative matching, as sed does not support look behind expressions etc.
-# Note we don't use ° like above as that's part of the alternate charset.
-s#\x1b(0#¡#g;
-s#µ#\&micro;#g; s#\x1b(B#µ#g
-" |
-(
-python -c "
-# vim:fileencoding=utf8
-
-import sys
-import locale
-encoding=locale.getpreferredencoding()
-
-old='abcdefghijklmnopqrstuvwxyz{}\`~'
-new='▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
-new=unicode(new, 'utf-8')
-table=range(128)
-for o,n in zip(old, new): table[ord(o)]=n
-
-(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)
-
-state = STANDARD
-last_mode = STANDARD
-for c in unicode(sys.stdin.read(), encoding):
-  if state == HTML_TAG:
-    if c == '>':
-      state = last_mode
-  elif state == HTML_ENTITY:
-    if c == ';':
-      state = last_mode
-  else:
-    if c == '<':
-      state = HTML_TAG
-    elif c == '&':
-      state = HTML_ENTITY
-    elif c == u'¡' and state == STANDARD:
-      state = ALTERNATIVE
-      last_mode = ALTERNATIVE
-      continue
-    elif c == u'µ' and state == ALTERNATIVE:
-      state = STANDARD
-      last_mode = STANDARD
-      continue
-    elif state == ALTERNATIVE:
-      c = c.translate(table)
-  sys.stdout.write(c.encode(encoding))
-" 2>/dev/null ||
-sed 's/[¡µ]//g' # just strip aternative flag chars
-)
-
-echo "</pre>
-</body>
-</html>"
--- a/sed.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/sed.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_sed_tool" name="Text transformation" version="0.1.1">
+<tool id="tp_sed_tool" name="Text transformation" version="0.1.1">
   <description>with sed</description>
     <requirements>
         <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
--- a/sort.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/sort.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_sort_header_tool" name="Sort" version="0.1.1">
+<tool id="tp_sort_header_tool" name="Sort" version="0.1.1">
     <description>data in ascending or descending order</description>
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>
--- a/sort_rows.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/sort_rows.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="sort_rows" name="Sort a row" version="0.0.1">
+<tool id="tp_sort_rows" name="Sort a row" version="0.0.1">
   <description>according to their columns</description>
   <command>python -c 'for line in ["\t".join(sorted(line.strip().split("\t"))) for line in open("$input").readlines() ]: print line' > $outfile</command>
   <inputs>
--- a/sorted_uniq.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/sorted_uniq.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_uniq_tool" name="Unique lines">
+<tool id="tp_uniq_tool" name="Unique lines">
     <description>assuming sorted input file</description>
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>
--- a/tail.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/tail.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unitools_tail_tool" name="Select last" version="0.1.1">
+<tool id="tp_tail_tool" name="Select last" version="0.1.1">
     <description>lines from a dataset (tail)</description>
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_input1__1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,9 @@
+CDKN2A	4
+CDKN2B	5
+DHX37	8
+LOC255	9
+LOC468	3
+OR4M2	12
+ORN4	1
+POTE15	3
+RI3BP	5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_input1__2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,7 @@
+CDKN2A	4
+DHX37	8
+HES7	1
+ILKA3	8
+LOC255	9
+MOUB	3
+UTJX	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_input2__1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,10 @@
+Gene	Experiment1
+CDKN2A	4
+CDKN2B	5
+DHX37	8
+LOC255	9
+LOC468	3
+OR4M2	12
+ORN4	1
+POTE15	3
+RI3BP	5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_input2__2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,8 @@
+Gene	Experiment2
+CDKN2A	4
+DHX37	8
+HES7	1
+ILKA3	8
+LOC255	9
+MOUB	3
+UTJX	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_output1_1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,13 @@
+CDKN2A	4	4
+CDKN2B	5	.
+DHX37	8	8
+HES7	.	1
+ILKA3	.	8
+LOC255	9	9
+LOC468	3	.
+MOUB	.	3
+OR4M2	12	.
+ORN4	1	.
+POTE15	3	.
+RI3BP	5	.
+UTJX	.	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_output1_2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,10 @@
+CDKN2B	5	.
+HES7	.	1
+ILKA3	.	8
+LOC468	3	.
+MOUB	.	3
+OR4M2	12	.
+ORN4	1	.
+POTE15	3	.
+RI3BP	5	.
+UTJX	.	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_output2_1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,14 @@
+Gene	Experiment1	Experiment2
+CDKN2A	4	4
+CDKN2B	5	.
+DHX37	8	8
+HES7	.	1
+ILKA3	.	8
+LOC255	9	9
+LOC468	3	.
+MOUB	.	3
+OR4M2	12	.
+ORN4	1	.
+POTE15	3	.
+RI3BP	5	.
+UTJX	.	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/join_output2_2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,11 @@
+Gene	Experiment1	Experiment2
+CDKN2B	5	.
+HES7	.	1
+ILKA3	.	8
+LOC468	3	.
+MOUB	.	3
+OR4M2	12	.
+ORN4	1	.
+POTE15	3	.
+RI3BP	5	.
+UTJX	.	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/remove_ending_input1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,5 @@
+chr7  56632  56652   D17003_CTCF_R6  310  +
+chr7  56736  56756   D17003_CTCF_R7  354  +
+chr7  56761  56781   D17003_CTCF_R4  220  +
+chr7  56772  56792   D17003_CTCF_R7  372  +
+chr7  56775  56795   D17003_CTCF_R4  207  +
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/remove_ending_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,3 @@
+chr7  56632  56652   D17003_CTCF_R6  310  +
+chr7  56736  56756   D17003_CTCF_R7  354  +
+chr7  56761  56781   D17003_CTCF_R4  220  +
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_text_in_column_in1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,3 @@
+chr7	56632	56652	D17003_CTCF_R6	310	+
+chr7	56736	56756	D17003_CTCF_R7	354	+
+chr7	56761	56781	D17003_CTCF_R4	220	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_text_in_column_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,3 @@
+chr7	56632	56652	R6	310	+
+chr7	56736	56756	R7	354	+
+chr7	56761	56781	R4	220	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_text_in_line_in1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,3 @@
+chr7	56632	56652	D17003_CTCF_R6	310	+
+chr7	56736	56756	D17003_CTCF_R7	354	+
+chr7	56761	56781	D17003_CTCF_R4	220	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_text_in_line_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,3 @@
+chr7	56632	56652	D17003_FOOBAR_R6	310	+
+chr7	56736	56756	D17003_FOOBAR_R7	354	+
+chr7	56761	56781	D17003_FOOBAR_R4	220	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort_and_join_input2__1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,10 @@
+Gene	Experiment1
+LOC468	3
+CDKN2B	5
+RI3BP	5
+ORN4	1
+POTE15	3
+OR4M2	12
+LOC255	9
+DHX37	8
+CDKN2A	4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort_and_join_input2__2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,8 @@
+Gene	Experiment2
+ILKA3	8
+UTJX	3
+HES7	1
+MOUB	3
+LOC255	9
+DHX37	8
+CDKN2A	4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort_and_join_output2_1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,14 @@
+Gene	Experiment1	Experiment2
+CDKN2A	4	4
+CDKN2B	5	.
+DHX37	8	8
+HES7	.	1
+ILKA3	.	8
+LOC255	9	9
+LOC468	3	.
+MOUB	.	3
+OR4M2	12	.
+ORN4	1	.
+POTE15	3	.
+RI3BP	5	.
+UTJX	.	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort_and_join_output2_2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,11 @@
+Gene	Experiment1	Experiment2
+CDKN2B	5	.
+HES7	.	1
+ILKA3	.	8
+LOC468	3	.
+MOUB	.	3
+OR4M2	12	.
+ORN4	1	.
+POTE15	3	.
+RI3BP	5	.
+UTJX	.	3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_awk_input1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,10 @@
+chr10	0.4
+chr1	1.4
+chrM	3e-1
+chr2	1.1e2
+chr15	3.14e-2
+chr15	0.0314
+chr4	0.1
+chr20	0.9
+chr22	+1.3
+chrX	-0.3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_awk_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,4 @@
+12.6	chr1
+990	chr2
+8.1	chr20
+11.7	chr22
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_cut_input1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,4 @@
+fruit	color	weight	price
+apple	red	1.4	0.4
+orange	orange	1.1	0.2
+banana	yellow	0.9	0.35
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_cut_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,4 @@
+fruit	weight	price
+apple	1.4	0.4
+orange	1.1	0.2
+banana	0.9	0.35
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_grep_input1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,152 @@
+>FC0000042:5:1:220:1502
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+>FC0000042:5:1:34:1398
+GATCTCAGTCCACCGCTGGGATTAACCTTGCCCCCC
+>FC0000042:5:1:164:1396
+TATCTTATAGATATTTCCCTCTATACTAGTGACCCC
+>FC0000042:5:1:333:925
+GAGCTTATAGCTTGTTATATACGTCAACCCCCCCCC
+>FC0000042:5:1:204:1476
+GTACTTATATAGATACAAAATATGTATAGGATTGTC
+>FC0000042:5:1:119:1511
+GATCTGCATGACCTGGGATTTGTTGGACCCCCCCCC
+>FC0000042:5:1:202:1487
+CATGTATAGTCTCCAGTCTATACAACAACCCCCCCC
+>FC0000042:5:1:182:1434
+GCTATAGAAATGTTAACATCGAATGTACATTATAAC
+>FC0000042:5:1:627:866
+AATATAGATATGGGACAAAACACATTTAGACCCCCC
+>FC0000042:5:1:24:1357
+GATATAATATCAATATCAATCCACGCTTGTTCCCCC
+>FC0000042:5:1:187:1492
+TATAGAAGCAGAAGAAACAACCTACTTTCACATGTT
+>FC0000042:5:1:45:1344
+CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC
+>FC0000042:5:1:87:1299
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+>FC0000042:5:1:206:1341
+GATATATAGCAGTGACCACCTCTAAGCCCCCCCCCC
+>FC0000042:5:1:144:929
+GCCCTGGCATATTGTCAATATCTTTAAACCCCCCCC
+>FC0000042:5:1:662:820
+TGTCTTTTCGATTTTTTTCTTTGCGTCACCCCCCCC
+>FC0000042:5:1:53:1507
+GACCTCACTGTGGCATGAATCATACATTCCCCCCCC
+>FC0000042:5:1:182:1502
+AATGCTTGGCAAAGCTCAACTTCGTTGCCCCCCCCC
+>FC0000042:5:1:194:1423
+GATCCTATAGGTCTCGATTGGTCTTTTATTCTTTTT
+>FC0000042:5:1:35:1444
+GCTATAGCACGGCATAGTGCGATACTAGTACCCCCC
+>FC0000042:5:1:667:872
+GACTATAGGCGGAATGATAATGTCAAATAAGTAGTT
+>FC0000042:5:1:147:1438
+GATCAAGGAGACTAGGGAGGTAGGAGTTACTCCCCC
+>FC0000042:5:1:467:510
+GAACCACTATAGTGACATGGAACACGCGTGAACCCC
+>FC0000042:5:1:1553:1707
+TATAGTTACCCTACTGGGCCGACGATTCCCTTACGA
+>FC0000042:5:1:207:964
+AATCTATAGATTTTTCTATTATTGTGTCCTCACCCC
+>FC0000042:5:1:169:1468
+GCTCTATAGTTCGAGTTACCAAACTCTTCCCCCCCC
+>FC0000042:5:1:42:1465
+GCTCTTTAGGTTTGAACCTGTAGACTTGAGGGGCAT
+>FC0000042:5:1:55:1331
+GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT
+>FC0000042:5:1:175:1501
+GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC
+>FC0000042:5:1:221:1465
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+>FC0000042:5:1:196:1450
+AATATAGTCTATCCAACAAGATGTAACCCCCCCCCC
+>FC0000042:5:1:86:1413
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+>FC0000042:5:1:453:514
+GATATCTTCGTTTTATATTGAAACTGGCCCCCCCCC
+>FC0000042:5:1:150:1415
+TATAGGGCCCTGTATGGTTGCTTGACTAGGGGCTGC
+>FC0000042:5:1:191:1475
+GATCCATCCCAATCTCTACGATTGAAAGCATCGGGA
+>FC0000042:5:1:26:1407
+GTTATAGAGGCGGGAAGGTGAGAATGCCCCCCCCCC
+>FC0000042:5:1:107:1407
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+>FC0000042:5:1:388:780
+GATCTATAGCTTCTTTAGCTTGGAAACTGGTCAGCC
+>FC0000042:5:1:223:1535
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+>FC0000042:5:1:145:783
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+>FC0000042:5:1:449:876
+GACCATCAATCAGGTGGAAAGCAGGGCCCCCCCCCC
+>FC0000042:5:1:212:1325
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+>FC0000042:5:1:194:1485
+GAACCGAATCCAACCTGTTTCATTCCTCAGATCCCC
+>FC0000042:5:1:507:494
+GATCTTATAGAATTTTTGACAACATAAGTTACCCCC
+>FC0000042:5:1:416:938
+AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC
+>FC0000042:5:1:633:480
+GAGCTGTGTGCATCTGTCCTGAGAGAGGCAAGATTT
+>FC0000042:5:1:53:1443
+GTAATGTTATAGCTAGGATTTTGGAGTTTGGTCCTC
+>FC0000042:5:1:45:915
+GTATAGCAGCCTAATAAGGAGCTGGGGACCCCCCCC
+>FC0000042:5:1:39:1343
+GTTCTATTTTCGATAAAACTGAACCACCCCCCCCCC
+>FC0000042:5:1:46:1501
+GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT
+>FC0000042:5:1:187:1507
+GAACTAATCCTGATTTATACAACGGCTCCCCCCCCC
+>FC0000042:5:1:91:1364
+AATTTATAGCCACTCTAATTCCGTTTGGTTCCCCCC
+>FC0000042:5:1:1542:1751
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+>FC0000042:5:1:146:886
+GATCTACGATGTACCTTACGCCTCCGAGCATCCCCC
+>FC0000042:5:1:615:861
+GATCTACATTATAGATAATGAAGTTCCATTTCCCCC
+>FC0000042:5:1:52:792
+GATGTGGTATAGAGAGCAATTCGTTGGTTTTGCCCC
+>FC0000042:5:1:153:1433
+GGTCTTTCTATAGAACGGAACGATATATTTTTCCCC
+>FC0000042:5:1:540:800
+GAGCGAAAGTGATAGATGGAGGACTATATCTGCCCC
+>FC0000042:5:1:160:1344
+GGTGTACTATAGCTATTAAGTCCAATCATGATAATA
+>FC0000042:5:1:544:413
+GATCTCTGGAAAATATAAACCGGTGACCCCCCCCCC
+>FC0000042:5:1:579:895
+AGTCTCGAATCAATGTATTTCATCGTGGTAATCCCC
+>FC0000042:5:1:468:495
+TATTGATGCTCCCTGCCTGAAAGATACCCCCCCCCC
+>FC0000042:5:1:383:831
+CTTCATGAATCTACTGTTGGCGTTTATTTTATCTGG
+>FC0000042:5:1:112:1416
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+>FC0000042:5:1:37:1299
+GATCGTGAGCTCTGTACCGGAAGTTCGTGGCTGCCA
+>FC0000042:5:1:205:780
+TATAGTGTTCCACAAAGACTAGGTAACGCTTCATTT
+>FC0000042:5:1:33:702
+GAACGGACTATAGCCGGTATCCAAACATAAATGTTC
+>FC0000042:5:1:54:1019
+AATCGCAGCATTCTGACACACAGGTTTCGGATGTAC
+>FC0000042:5:1:587:867
+TATCTAATGTCATATTTTCAGACAAATTACTAGAAA
+>FC0000042:5:1:319:990
+GATTTGTAAATTACTTCGAACATAGAAGTTCCCCCC
+>FC0000042:5:1:453:829
+GAACTTACGGCATTAAGTTTAATCTTCAGCCACCCC
+>FC0000042:5:1:159:1470
+GATCTGATAGTGTTGCGACGTAAATAAGTCCCCCCC
+>FC0000042:5:1:487:820
+GATCTCGCAGGGATCAGTTATCCAGGTATTCCCCCC
+>FC0000042:5:1:48:371
+AATCTATAATCTTTACCCGAGTTTAAGTCCCCCCCC
+>FC0000042:5:1:1346:1739
+GATATAGGTTATACGTTTTTAGTCTTAGAGAAGTTT
+>FC0000042:5:1:661:459
+GATCTGCTTTAACGATTGAGGACGATGCCCCCCCCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_grep_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,14 @@
+>FC0000042:5:1:182:1434
+GCTATAGAAATGTTAACATCGAATGTACATTATAAC
+>FC0000042:5:1:45:1344
+CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC
+>FC0000042:5:1:55:1331
+GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT
+>FC0000042:5:1:175:1501
+GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC
+>FC0000042:5:1:416:938
+AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC
+>FC0000042:5:1:46:1501
+GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT
+>FC0000042:5:1:33:702
+GAACGGACTATAGCCGGTATCCAAACATAAATGTTC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_grep_output2.html	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,9 @@
+<html><body><pre>
+GCTATAG<font color="blue"><b>AAATGT</b></font>TAACATCGAATGTACATTATAAC
+CAGCTAACAATC<font color="blue"><b>AAGCGT</b></font>TACAGATTAGCCCCCCCC
+GAACTTGCGTAACGTACAAAAATGCAAGCA<font color="blue"><b>AAAAGT</b></font>
+GCTCTGTTAATCTAGA<font color="blue"><b>AAATGT</b></font>GTCTCCCCCCCCCC
+<font color="blue"><b>AATCGT</b></font>ATAGCTCGGGCCGGATACTAGTACACCCCC
+GATATAGTGGATAACTAATGCTCCCCCAG<font color="blue"><b>AACTGT</b></font>T
+GAACGGACTATAGCCGGTATCCAAACAT<font color="blue"><b>AAATGT</b></font>TC
+</pre></body></html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sed_input1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,4 @@
+This is a header line
+Lorem ipsum dolor foo sit amet foo,
+consectetur adipiscing elit.
+Nam foo ut nulla non neque faucibus commodo
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sed_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,3 @@
+Lorem ipsum dolor bar sit amet foo,
+consectetur adipiscing elit.
+Nam bar ut nulla non neque faucibus commodo
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sed_output2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,2 @@
+Lorem ipsum dolor baz sit amet baz,
+Nam baz ut nulla non neque faucibus commodo
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sort_input1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,11 @@
+chrom	value
+chr10	0.4
+chr1	1.4
+chrM	3e-1
+chr2	1.1e2
+chr15	3.14e-2
+chr15	0.0314
+chr4	0.1
+chr20	0.9
+chr22	+1.3
+chrX	-0.3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sort_input2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,11 @@
+Chrom	Value
+chr10	0.4
+chr1	1.4
+chrM	3e-1
+chr2	1.1e2
+chr15	3.14e-2
+chr15	0.0314
+chr4	0.1
+chr20	0.9
+chr22	+1.3
+chrX	-0.3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sort_output1.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,11 @@
+chrom	value
+chr2	1.1e2
+chr1	1.4
+chr22	+1.3
+chr20	0.9
+chr10	0.4
+chrM	3e-1
+chr4	0.1
+chr15	0.0314
+chr15	3.14e-2
+chrX	-0.3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sort_output2.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,10 @@
+chrom	value
+chrX	-0.3
+chr15	3.14e-2
+chr4	0.1
+chrM	3e-1
+chr10	0.4
+chr20	0.9
+chr22	+1.3
+chr1	1.4
+chr2	1.1e2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unix_sort_output3.txt	Sun Oct 06 08:22:36 2013 -0400
@@ -0,0 +1,11 @@
+chrom	value
+chr1	1.4
+chr2	1.1e2
+chr4	0.1
+chr10	0.4
+chr15	0.0314
+chr15	3.14e-2
+chr20	0.9
+chr22	+1.3
+chrM	3e-1
+chrX	-0.3
--- a/tool_dependencies.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/tool_dependencies.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -4,7 +4,7 @@
         <repository changeset_revision="83be2b421d3b" name="package_gnu_coreutils_8_21" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
     </package>
     <package name="gnu_awk" version="4.1.0">
-        <repository changeset_revision="196065d1785d" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="cbe9f1c8c98b" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
     </package>
     <package name="gnu_grep" version="2.14">
         <repository changeset_revision="af98f72cd785" name="package_gnu_grep_2_14" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
@@ -13,6 +13,6 @@
         <repository changeset_revision="4a4691c78042" name="package_gnu_sed_4_2_2_sandbox" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
     </package>
     <set_environment version="1.0">
-        <environment_variable action="set_to" name="UNIX_TOOLS_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR/scripts</environment_variable>
+        <environment_variable action="set_to" name="TP_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>
     </set_environment>
 </tool_dependency>
--- a/unsorted_uniq.xml	Thu Sep 05 12:42:48 2013 -0400
+++ b/unsorted_uniq.xml	Sun Oct 06 08:22:36 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="unixtools_sorted_uniq" name="Unique" version="0.3">
+<tool id="tp_sorted_uniq" name="Unique" version="0.3">
   <description>occurrences of each record</description>
     <requirements>
         <requirement type="package" version="8.21">gnu_coreutils</requirement>