# HG changeset patch # User bgruening # Date 1381062156 14400 # Node ID 7068d15482349fe0f1f8ec28ac65ff40b8fefb4c # Parent fc862d5bccaf845875564d2be042721f6441748e Uploaded diff -r fc862d5bccaf -r 7068d1548234 ansi2html.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ansi2html.sh Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,331 @@ +#!/bin/sh + +# Convert ANSI (terminal) colours and attributes to HTML + +# Author: +# http://www.pixelbeat.org/docs/terminal_colours/ +# Examples: +# ls -l --color=always | ansi2html.sh > ls.html +# git show --color | ansi2html.sh > last_change.html +# Generally one can use the `script` util to capture full terminal output. +# Changes: +# V0.1, 24 Apr 2008, Initial release +# V0.2, 01 Jan 2009, Phil Harnish +# Support `git diff --color` output by +# matching ANSI codes that specify only +# bold or background colour. +# P@draigBrady.com +# Support `ls --color` output by stripping +# redundant leading 0s from ANSI codes. +# Support `grep --color=always` by stripping +# unhandled ANSI codes (specifically ^[[K). +# V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ +# Remove cat -v usage which mangled non ascii input. +# Cleanup regular expressions used. +# Support other attributes like reverse, ... +# P@draigBrady.com +# Correctly nest tags (even across lines). +# Add a command line option to use a dark background. +# Strip more terminal control codes. +# V0.4, 17 Sep 2009, P@draigBrady.com +# Handle codes with combined attributes and color. +# Handle isolated attributes with css. +# Strip more terminal control codes. +# V0.12, 12 Jul 2011 +# http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh + +if [ "$1" = "--version" ]; then + echo "0.12" && exit +fi + +if [ "$1" = "--help" ]; then + echo "This utility converts ANSI codes in data passed to stdin" >&2 + echo "It has 2 optional parameters:" >&2 + echo " --bg=dark --palette=linux|solarized|tango|xterm" >&2 + echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2 + exit +fi + +[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } + +if [ "$1" = "--palette=solarized" ]; then + # See http://ethanschoonover.com/solarized + P0=073642; P1=D30102; P2=859900; P3=B58900; + P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5; + P8=002B36; P9=CB4B16; P10=586E75; P11=657B83; + P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3; + shift; +elif [ "$1" = "--palette=solarized-xterm" ]; then + # Above mapped onto the xterm 256 color palette + P0=262626; P1=AF0000; P2=5F8700; P3=AF8700; + P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4; + P8=1C1C1C; P9=D75F00; P10=585858; P11=626262; + P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7; + shift; +elif [ "$1" = "--palette=tango" ]; then + # Gnome default + P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000; + P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF; + P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F; + P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC; + shift; +elif [ "$1" = "--palette=xterm" ]; then + P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00; + P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5; + P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00; + P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF; + shift; +else # linux console + P0=000000; P1=AA0000; P2=00AA00; P3=AA5500; + P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA; + P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55; + P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF; + [ "$1" = "--palette=linux" ] && shift +fi + +[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } + +echo -n " + + + + + + +
+'
+
+p='\x1b\['        #shortcut to match escape codes
+P="\(^[^°]*\)¡$p" #expression to match prepended codes below
+
+# Handle various xterm control sequences.
+# See /usr/share/doc/xterm-*/ctlseqs.txt
+sed "
+s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
+s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
+
+#handle carriage returns
+s#^.*\r\{1,\}\([^$]\)#\1#
+s#\r\$## # strip trailing \r
+
+# strip other non SGR escape sequences
+s#[\x07]##g
+s#\x1b[]>=\][0-9;]*##g
+s#\x1bP+.\{5\}##g
+s#${p}[0-9;?]*[^0-9;?m]##g
+
+#remove backspace chars and what they're backspacing over
+:rm_bs
+s#[^\x08]\x08##g; t rm_bs
+" |
+
+# Normalize the input before transformation
+sed "
+# escape HTML
+s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g
+
+# normalize SGR codes a little
+
+# split 256 colors out and mark so that they're not
+# recognised by the following 'split combined' line
+:e
+s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
+s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
+
+:c
+s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c   # split combined
+s#${p}0\([0-7]\)#${p}\1#g                                 #strip leading 0
+s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g                   #bold last (with clr)
+s#${p}m#${p}0m#g                                          #add leading 0 to norm
+
+# undo any 256 color marking
+s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
+
+# map 16 color codes to color + bold
+s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
+s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
+
+# change 'reset' code to a single char, and prepend a single char to
+# other codes so that we can easily do negative matching, as sed
+# does not support look behind expressions etc.
+s#°#\°#g; s#${p}0m#°#g
+s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g
+" |
+
+# Convert SGR sequences to HTML
+sed "
+:ansi_to_span # replace ANSI codes with CSS classes
+t ansi_to_span # hack so t commands below only apply to preceeding s cmd
+
+/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code
+
+# common combinations to minimise html (optional)
+s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1#;t span_count
+s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1#;t span_count
+
+s#${P}1m#\1#;                            t span_count
+s#${P}4m#\1#;                       t span_count
+s#${P}5m#\1#;                           t span_count
+s#${P}7m#\1#;                         t span_count
+s#${P}9m#\1#;                    t span_count
+s#${P}3\([0-9]\)m#\1#;                    t span_count
+s#${P}4\([0-9]\)m#\1#;                    t span_count
+
+s#${P}38;5;\([0-9]\{1,3\}\)m#\1#;        t span_count
+s#${P}48;5;\([0-9]\{1,3\}\)m#\1#;        t span_count
+
+s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes
+
+b # next line of input
+
+# add a corresponding span end flag
+:span_count
+x; s/^/s/; x
+b ansi_to_span
+
+# replace 'reset code' with correct number of  tags
+:span_end
+x
+/^s/ {
+  s/^.//
+  x
+  s#°#°#
+  b span_end
+}
+x
+s#°##
+b ansi_to_span
+" |
+
+# Convert alternative character set
+# Note we convert here, as if we do at start we have to worry about avoiding
+# conversion of SGR codes etc., whereas doing here we only have to
+# avoid conversions of stuff between &...; or <...>
+#
+# Note we could use sed to do this based around:
+#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
+# However that would be very awkward as we need to only conv some input.
+# The basic scheme that we do in the python script below is:
+#  1. enable transliterate once ¡ char seen
+#  2. disable once µ char seen (may be on diff line to ¡)
+#  3. never transliterate between &; or <> chars
+sed "
+# change 'smacs' and 'rmacs' to a single char so that we can easily do
+# negative matching, as sed does not support look behind expressions etc.
+# Note we don't use ° like above as that's part of the alternate charset.
+s#\x1b(0#¡#g;
+s#µ#\µ#g; s#\x1b(B#µ#g
+" |
+(
+python -c "
+# vim:fileencoding=utf8
+
+import sys
+import locale
+encoding=locale.getpreferredencoding()
+
+old='abcdefghijklmnopqrstuvwxyz{}\`~'
+new='▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
+new=unicode(new, 'utf-8')
+table=range(128)
+for o,n in zip(old, new): table[ord(o)]=n
+
+(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)
+
+state = STANDARD
+last_mode = STANDARD
+for c in unicode(sys.stdin.read(), encoding):
+  if state == HTML_TAG:
+    if c == '>':
+      state = last_mode
+  elif state == HTML_ENTITY:
+    if c == ';':
+      state = last_mode
+  else:
+    if c == '<':
+      state = HTML_TAG
+    elif c == '&':
+      state = HTML_ENTITY
+    elif c == u'¡' and state == STANDARD:
+      state = ALTERNATIVE
+      last_mode = ALTERNATIVE
+      continue
+    elif c == u'µ' and state == ALTERNATIVE:
+      state = STANDARD
+      last_mode = STANDARD
+      continue
+    elif state == ALTERNATIVE:
+      c = c.translate(table)
+  sys.stdout.write(c.encode(encoding))
+" 2>/dev/null ||
+sed 's/[¡µ]//g' # just strip aternative flag chars
+)
+
+echo "
+ +" diff -r fc862d5bccaf -r 7068d1548234 awk.xml --- a/awk.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/awk.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + with awk gnu_awk diff -r fc862d5bccaf -r 7068d1548234 cut.xml --- a/cut.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/cut.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + columns from a table gnu_coreutils diff -r fc862d5bccaf -r 7068d1548234 easyjoin.xml --- a/easyjoin.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/easyjoin.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + gnu_coreutils diff -r fc862d5bccaf -r 7068d1548234 find_and_replace.xml --- a/find_and_replace.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/find_and_replace.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + parts of text find_and_replace diff -r fc862d5bccaf -r 7068d1548234 grep.xml --- a/grep.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/grep.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,13 +1,13 @@ - + (grep) gnu_coreutils gnu_grep - UNIX_TOOLS_SCRIPT_PATH + TP_SCRIPT_PATH #if str($color) == "COLOR": - GREP_COLOR='1;34' grep --color=always -P "$@" -- "${url_paste}" '${input}' | \$UNIX_TOOLS_SCRIPT_PATH/ansi2html.sh > "${output}" + GREP_COLOR='1;34' grep --color=always -P "$@" -- "${url_paste}" '${input}' | \$TP_SCRIPT_PATH/ansi2html.sh > "${output}" #else: grep -P "$@" -- "${url_paste}" '${input}' | grep -v "^--$" > "${output}" #end if diff -r fc862d5bccaf -r 7068d1548234 head.xml --- a/head.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/head.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + lines from a dataset (head) gnu_coreutils diff -r fc862d5bccaf -r 7068d1548234 multijoin.xml --- a/multijoin.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/multijoin.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + (combine multiple files) multijoin --key '$key_column' diff -r fc862d5bccaf -r 7068d1548234 readme.rst --- a/readme.rst Thu Sep 05 12:42:48 2013 -0400 +++ b/readme.rst Sun Oct 06 08:22:36 2013 -0400 @@ -1,17 +1,18 @@ -These are Galaxy wrappers for common unix text-processing tools -=============================================================== +Galaxy wrappers for common unix text-processing tools +===================================================== The initial work was done by Assaf Gordon and Greg Hannon's lab ( http://hannonlab.cshl.edu ) in Cold Spring Harbor Laboratory ( http://www.cshl.edu ). -The tools are: +Tools: * awk - The AWK programmning language ( http://www.gnu.org/software/gawk/ ) * sed - Stream Editor ( http://sed.sf.net ) * grep - Search files ( http://www.gnu.org/software/grep/ ) * sort_columns - Sorting every line according to there columns * GNU Coreutils programs ( http://www.gnu.org/software/coreutils/ ): + * sort - sort files * join - join two files, based on common key field. * cut - keep/discard fields from a file @@ -37,7 +38,7 @@ 3. SED version 4.2 *with* a special patch 4. Grep with PCRE support -These will be installed automatically with the Galaxy Tool Shed. +These will be installed automatically with the Galaxy `Tool Shed`_. ------------------- @@ -50,22 +51,29 @@ These commands are DISABLED using the "--sandbox" parameter to awk and sed. User trying to run an awk program similar to: + BEGIN { system("ls") } + Will get an error (in Galaxy) saying: + fatal: 'system' function not allowed in sandbox mode. User trying to run a SED program similar to: + 1els + will get an error (in Galaxy) saying: + sed: -e expression #1, char 2: e/r/w commands disabled in sandbox mode + That being said, if you do find some vulnerability in these tools, please let me know and I'll try fix them. ------------ Installation ------------ -Should be done with the Galaxy `Tool Shed`_. +Should be done via the Galaxy `Tool Shed`_. .. _`Tool Shed`: http://wiki.galaxyproject.org/Tool%20Shed @@ -84,6 +92,30 @@ - evaluate the join wrappers against the Galaxy ones, maybe we should drop them +------- +License +------- + +* Copyright (c) 2009-2013 A. Gordon (gordon cshl dot edu) +* Copyright (c) 2013 B. Gruening (bjoern dot gruening gmail dot com) +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff -r fc862d5bccaf -r 7068d1548234 remove_ending.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remove_ending.xml Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,46 @@ + + of a file + + gnu_coreutils + + tail -n -$num_lines $infile $outfile + + + + + + + + + + + + + + + + +**What it does** + +This tool removes specified number of lines from the ending of a dataset + +----- + +**Example** + +Input File:: + + chr7 56632 56652 D17003_CTCF_R6 310 + + chr7 56736 56756 D17003_CTCF_R7 354 + + chr7 56761 56781 D17003_CTCF_R4 220 + + chr7 56772 56792 D17003_CTCF_R7 372 + + chr7 56775 56795 D17003_CTCF_R4 207 + + +After removing the last 2 lines the dataset will look like this:: + + chr7 56632 56652 D17003_CTCF_R6 310 + + chr7 56736 56756 D17003_CTCF_R7 354 + + chr7 56761 56781 D17003_CTCF_R4 220 + + + + diff -r fc862d5bccaf -r 7068d1548234 replace_text_in_column.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_text_in_column.xml Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,128 @@ + + in a specific column + + gnu_awk + + + #adapt to awk's quirks - to pass an acutal backslash - two backslashes are required (just like in a C string) + REPLACE_PATTERN=\${$replace_pattern//\\/\\\\}; + awk -v OFS="\t" --re-interval --sandbox "{ \$$column = gensub( /$find_pattern/, \"$replace_pattern\", \"g\", \$$column ) ; print \$0 ; }" "$input" > "$output" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool performs find & replace operation on a specified column in a given file. + +.. class:: infomark + +The **pattern to find** uses the **extended regular** expression syntax (same as running 'awk --re-interval'). + +.. class:: infomark + +**TIP:** If you need more complex patterns, use the *awk* tool. + +----- + + +**Examples of Find Patterns** + +- **HELLO** The word 'HELLO' (case sensitive). +- **AG.T** The letters A,G followed by any single character, followed by the letter T. +- **A{4,}** Four or more consecutive A's. +- **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character. +- **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern. + + +**Examples of Replace Patterns** + +- **WORLD** The word 'WORLD' will be placed whereever the find pattern was found. +- **FOO-&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&** (ampersand) represents the matched find pattern. +- **\\1** The text which matched the first parenthesis in the Find Pattern. + + + + +----- + +**Example 1** + +**Find Pattern:** HELLO +**Replace Pattern:** WORLD + +Every time the word HELLO is found, it will be replaced with the word WORLD. This operation affects only the selected column. + +----- + +**Example 2** + +**Find Pattern:** ^(.{4}) +**Replace Pattern:** &\\t + +Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns. This operation affects only the selected column. + + +----- + +**Extened Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + + +**Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. + + + diff -r fc862d5bccaf -r 7068d1548234 replace_text_in_line.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_text_in_line.xml Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,128 @@ + + in entire line + + gnu_sed + + + + sed -r --sandbox "s/$find_pattern/$replace_pattern/g" "$input" > "$output" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool performs find & replace operation on a specified file. + +.. class:: infomark + +The **pattern to find** uses the **extended regular** expression syntax (same as running 'sed -r'). + +.. class:: infomark + +**TIP:** If you need more complex patterns, use the *sed* tool. + +----- + + +**Examples of Find Patterns** + +- **HELLO** The word 'HELLO' (case sensitive). +- **AG.T** The letters A,G followed by any single character, followed by the letter T. +- **A{4,}** Four or more consecutive A's. +- **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character. +- **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern. + + + +**Examples of Replace Patterns** + +- **WORLD** The word 'WORLD' will be placed whereever the find pattern was found. +- **FOO-&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&** (ampersand) represents the matched find pattern. +- **\\1** The text which matched the first parenthesis in the Find Pattern. + + + + +----- + +**Example 1** + +**Find Pattern:** HELLO +**Replace Pattern:** WORLD + +Every time the word HELLO is found, it will be replaced with the word WORLD. + + +----- + +**Example 2** + +**Find Pattern:** ^(.{4}) +**Replace Pattern:** &\\t + +Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns. + + +----- + +**Extened Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + + +**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. + + + diff -r fc862d5bccaf -r 7068d1548234 scripts/ansi2html.sh --- a/scripts/ansi2html.sh Thu Sep 05 12:42:48 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,331 +0,0 @@ -#!/bin/sh - -# Convert ANSI (terminal) colours and attributes to HTML - -# Author: -# http://www.pixelbeat.org/docs/terminal_colours/ -# Examples: -# ls -l --color=always | ansi2html.sh > ls.html -# git show --color | ansi2html.sh > last_change.html -# Generally one can use the `script` util to capture full terminal output. -# Changes: -# V0.1, 24 Apr 2008, Initial release -# V0.2, 01 Jan 2009, Phil Harnish -# Support `git diff --color` output by -# matching ANSI codes that specify only -# bold or background colour. -# P@draigBrady.com -# Support `ls --color` output by stripping -# redundant leading 0s from ANSI codes. -# Support `grep --color=always` by stripping -# unhandled ANSI codes (specifically ^[[K). -# V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ -# Remove cat -v usage which mangled non ascii input. -# Cleanup regular expressions used. -# Support other attributes like reverse, ... -# P@draigBrady.com -# Correctly nest tags (even across lines). -# Add a command line option to use a dark background. -# Strip more terminal control codes. -# V0.4, 17 Sep 2009, P@draigBrady.com -# Handle codes with combined attributes and color. -# Handle isolated attributes with css. -# Strip more terminal control codes. -# V0.12, 12 Jul 2011 -# http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh - -if [ "$1" = "--version" ]; then - echo "0.12" && exit -fi - -if [ "$1" = "--help" ]; then - echo "This utility converts ANSI codes in data passed to stdin" >&2 - echo "It has 2 optional parameters:" >&2 - echo " --bg=dark --palette=linux|solarized|tango|xterm" >&2 - echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2 - exit -fi - -[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } - -if [ "$1" = "--palette=solarized" ]; then - # See http://ethanschoonover.com/solarized - P0=073642; P1=D30102; P2=859900; P3=B58900; - P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5; - P8=002B36; P9=CB4B16; P10=586E75; P11=657B83; - P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3; - shift; -elif [ "$1" = "--palette=solarized-xterm" ]; then - # Above mapped onto the xterm 256 color palette - P0=262626; P1=AF0000; P2=5F8700; P3=AF8700; - P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4; - P8=1C1C1C; P9=D75F00; P10=585858; P11=626262; - P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7; - shift; -elif [ "$1" = "--palette=tango" ]; then - # Gnome default - P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000; - P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF; - P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F; - P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC; - shift; -elif [ "$1" = "--palette=xterm" ]; then - P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00; - P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5; - P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00; - P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF; - shift; -else # linux console - P0=000000; P1=AA0000; P2=00AA00; P3=AA5500; - P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA; - P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55; - P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF; - [ "$1" = "--palette=linux" ] && shift -fi - -[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } - -echo -n " - - - - - - -
-'
-
-p='\x1b\['        #shortcut to match escape codes
-P="\(^[^°]*\)¡$p" #expression to match prepended codes below
-
-# Handle various xterm control sequences.
-# See /usr/share/doc/xterm-*/ctlseqs.txt
-sed "
-s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
-s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
-
-#handle carriage returns
-s#^.*\r\{1,\}\([^$]\)#\1#
-s#\r\$## # strip trailing \r
-
-# strip other non SGR escape sequences
-s#[\x07]##g
-s#\x1b[]>=\][0-9;]*##g
-s#\x1bP+.\{5\}##g
-s#${p}[0-9;?]*[^0-9;?m]##g
-
-#remove backspace chars and what they're backspacing over
-:rm_bs
-s#[^\x08]\x08##g; t rm_bs
-" |
-
-# Normalize the input before transformation
-sed "
-# escape HTML
-s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g
-
-# normalize SGR codes a little
-
-# split 256 colors out and mark so that they're not
-# recognised by the following 'split combined' line
-:e
-s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
-s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
-
-:c
-s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c   # split combined
-s#${p}0\([0-7]\)#${p}\1#g                                 #strip leading 0
-s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g                   #bold last (with clr)
-s#${p}m#${p}0m#g                                          #add leading 0 to norm
-
-# undo any 256 color marking
-s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
-
-# map 16 color codes to color + bold
-s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
-s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
-
-# change 'reset' code to a single char, and prepend a single char to
-# other codes so that we can easily do negative matching, as sed
-# does not support look behind expressions etc.
-s#°#\°#g; s#${p}0m#°#g
-s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g
-" |
-
-# Convert SGR sequences to HTML
-sed "
-:ansi_to_span # replace ANSI codes with CSS classes
-t ansi_to_span # hack so t commands below only apply to preceeding s cmd
-
-/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code
-
-# common combinations to minimise html (optional)
-s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1#;t span_count
-s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1#;t span_count
-
-s#${P}1m#\1#;                            t span_count
-s#${P}4m#\1#;                       t span_count
-s#${P}5m#\1#;                           t span_count
-s#${P}7m#\1#;                         t span_count
-s#${P}9m#\1#;                    t span_count
-s#${P}3\([0-9]\)m#\1#;                    t span_count
-s#${P}4\([0-9]\)m#\1#;                    t span_count
-
-s#${P}38;5;\([0-9]\{1,3\}\)m#\1#;        t span_count
-s#${P}48;5;\([0-9]\{1,3\}\)m#\1#;        t span_count
-
-s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes
-
-b # next line of input
-
-# add a corresponding span end flag
-:span_count
-x; s/^/s/; x
-b ansi_to_span
-
-# replace 'reset code' with correct number of  tags
-:span_end
-x
-/^s/ {
-  s/^.//
-  x
-  s#°#°#
-  b span_end
-}
-x
-s#°##
-b ansi_to_span
-" |
-
-# Convert alternative character set
-# Note we convert here, as if we do at start we have to worry about avoiding
-# conversion of SGR codes etc., whereas doing here we only have to
-# avoid conversions of stuff between &...; or <...>
-#
-# Note we could use sed to do this based around:
-#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
-# However that would be very awkward as we need to only conv some input.
-# The basic scheme that we do in the python script below is:
-#  1. enable transliterate once ¡ char seen
-#  2. disable once µ char seen (may be on diff line to ¡)
-#  3. never transliterate between &; or <> chars
-sed "
-# change 'smacs' and 'rmacs' to a single char so that we can easily do
-# negative matching, as sed does not support look behind expressions etc.
-# Note we don't use ° like above as that's part of the alternate charset.
-s#\x1b(0#¡#g;
-s#µ#\µ#g; s#\x1b(B#µ#g
-" |
-(
-python -c "
-# vim:fileencoding=utf8
-
-import sys
-import locale
-encoding=locale.getpreferredencoding()
-
-old='abcdefghijklmnopqrstuvwxyz{}\`~'
-new='▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
-new=unicode(new, 'utf-8')
-table=range(128)
-for o,n in zip(old, new): table[ord(o)]=n
-
-(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)
-
-state = STANDARD
-last_mode = STANDARD
-for c in unicode(sys.stdin.read(), encoding):
-  if state == HTML_TAG:
-    if c == '>':
-      state = last_mode
-  elif state == HTML_ENTITY:
-    if c == ';':
-      state = last_mode
-  else:
-    if c == '<':
-      state = HTML_TAG
-    elif c == '&':
-      state = HTML_ENTITY
-    elif c == u'¡' and state == STANDARD:
-      state = ALTERNATIVE
-      last_mode = ALTERNATIVE
-      continue
-    elif c == u'µ' and state == ALTERNATIVE:
-      state = STANDARD
-      last_mode = STANDARD
-      continue
-    elif state == ALTERNATIVE:
-      c = c.translate(table)
-  sys.stdout.write(c.encode(encoding))
-" 2>/dev/null ||
-sed 's/[¡µ]//g' # just strip aternative flag chars
-)
-
-echo "
- -" diff -r fc862d5bccaf -r 7068d1548234 sed.xml --- a/sed.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/sed.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + with sed gnu_sed diff -r fc862d5bccaf -r 7068d1548234 sort.xml --- a/sort.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/sort.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + data in ascending or descending order gnu_coreutils diff -r fc862d5bccaf -r 7068d1548234 sort_rows.xml --- a/sort_rows.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/sort_rows.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + according to their columns python -c 'for line in ["\t".join(sorted(line.strip().split("\t"))) for line in open("$input").readlines() ]: print line' > $outfile diff -r fc862d5bccaf -r 7068d1548234 sorted_uniq.xml --- a/sorted_uniq.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/sorted_uniq.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + assuming sorted input file gnu_coreutils diff -r fc862d5bccaf -r 7068d1548234 tail.xml --- a/tail.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/tail.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + lines from a dataset (tail) gnu_coreutils diff -r fc862d5bccaf -r 7068d1548234 test-data/join_input1__1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input1__1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,9 @@ +CDKN2A 4 +CDKN2B 5 +DHX37 8 +LOC255 9 +LOC468 3 +OR4M2 12 +ORN4 1 +POTE15 3 +RI3BP 5 diff -r fc862d5bccaf -r 7068d1548234 test-data/join_input1__2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input1__2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,7 @@ +CDKN2A 4 +DHX37 8 +HES7 1 +ILKA3 8 +LOC255 9 +MOUB 3 +UTJX 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/join_input2__1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input2__1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,10 @@ +Gene Experiment1 +CDKN2A 4 +CDKN2B 5 +DHX37 8 +LOC255 9 +LOC468 3 +OR4M2 12 +ORN4 1 +POTE15 3 +RI3BP 5 diff -r fc862d5bccaf -r 7068d1548234 test-data/join_input2__2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input2__2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,8 @@ +Gene Experiment2 +CDKN2A 4 +DHX37 8 +HES7 1 +ILKA3 8 +LOC255 9 +MOUB 3 +UTJX 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/join_output1_1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output1_1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,13 @@ +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/join_output1_2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output1_2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,10 @@ +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/join_output2_1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output2_1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,14 @@ +Gene Experiment1 Experiment2 +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/join_output2_2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output2_2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,11 @@ +Gene Experiment1 Experiment2 +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/remove_ending_input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_ending_input1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,5 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + +chr7 56772 56792 D17003_CTCF_R7 372 + +chr7 56775 56795 D17003_CTCF_R4 207 + diff -r fc862d5bccaf -r 7068d1548234 test-data/remove_ending_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_ending_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + diff -r fc862d5bccaf -r 7068d1548234 test-data/replace_text_in_column_in1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_column_in1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + diff -r fc862d5bccaf -r 7068d1548234 test-data/replace_text_in_column_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_column_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,3 @@ +chr7 56632 56652 R6 310 + +chr7 56736 56756 R7 354 + +chr7 56761 56781 R4 220 + diff -r fc862d5bccaf -r 7068d1548234 test-data/replace_text_in_line_in1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_line_in1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + diff -r fc862d5bccaf -r 7068d1548234 test-data/replace_text_in_line_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_line_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_FOOBAR_R6 310 + +chr7 56736 56756 D17003_FOOBAR_R7 354 + +chr7 56761 56781 D17003_FOOBAR_R4 220 + diff -r fc862d5bccaf -r 7068d1548234 test-data/sort_and_join_input2__1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_input2__1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,10 @@ +Gene Experiment1 +LOC468 3 +CDKN2B 5 +RI3BP 5 +ORN4 1 +POTE15 3 +OR4M2 12 +LOC255 9 +DHX37 8 +CDKN2A 4 diff -r fc862d5bccaf -r 7068d1548234 test-data/sort_and_join_input2__2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_input2__2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,8 @@ +Gene Experiment2 +ILKA3 8 +UTJX 3 +HES7 1 +MOUB 3 +LOC255 9 +DHX37 8 +CDKN2A 4 diff -r fc862d5bccaf -r 7068d1548234 test-data/sort_and_join_output2_1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_output2_1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,14 @@ +Gene Experiment1 Experiment2 +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/sort_and_join_output2_2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_output2_2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,11 @@ +Gene Experiment1 Experiment2 +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_awk_input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_awk_input1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,10 @@ +chr10 0.4 +chr1 1.4 +chrM 3e-1 +chr2 1.1e2 +chr15 3.14e-2 +chr15 0.0314 +chr4 0.1 +chr20 0.9 +chr22 +1.3 +chrX -0.3 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_awk_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_awk_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,4 @@ +12.6 chr1 +990 chr2 +8.1 chr20 +11.7 chr22 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_cut_input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_cut_input1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,4 @@ +fruit color weight price +apple red 1.4 0.4 +orange orange 1.1 0.2 +banana yellow 0.9 0.35 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_cut_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_cut_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,4 @@ +fruit weight price +apple 1.4 0.4 +orange 1.1 0.2 +banana 0.9 0.35 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_grep_input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_grep_input1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,152 @@ +>FC0000042:5:1:220:1502 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:34:1398 +GATCTCAGTCCACCGCTGGGATTAACCTTGCCCCCC +>FC0000042:5:1:164:1396 +TATCTTATAGATATTTCCCTCTATACTAGTGACCCC +>FC0000042:5:1:333:925 +GAGCTTATAGCTTGTTATATACGTCAACCCCCCCCC +>FC0000042:5:1:204:1476 +GTACTTATATAGATACAAAATATGTATAGGATTGTC +>FC0000042:5:1:119:1511 +GATCTGCATGACCTGGGATTTGTTGGACCCCCCCCC +>FC0000042:5:1:202:1487 +CATGTATAGTCTCCAGTCTATACAACAACCCCCCCC +>FC0000042:5:1:182:1434 +GCTATAGAAATGTTAACATCGAATGTACATTATAAC +>FC0000042:5:1:627:866 +AATATAGATATGGGACAAAACACATTTAGACCCCCC +>FC0000042:5:1:24:1357 +GATATAATATCAATATCAATCCACGCTTGTTCCCCC +>FC0000042:5:1:187:1492 +TATAGAAGCAGAAGAAACAACCTACTTTCACATGTT +>FC0000042:5:1:45:1344 +CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC +>FC0000042:5:1:87:1299 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:206:1341 +GATATATAGCAGTGACCACCTCTAAGCCCCCCCCCC +>FC0000042:5:1:144:929 +GCCCTGGCATATTGTCAATATCTTTAAACCCCCCCC +>FC0000042:5:1:662:820 +TGTCTTTTCGATTTTTTTCTTTGCGTCACCCCCCCC +>FC0000042:5:1:53:1507 +GACCTCACTGTGGCATGAATCATACATTCCCCCCCC +>FC0000042:5:1:182:1502 +AATGCTTGGCAAAGCTCAACTTCGTTGCCCCCCCCC +>FC0000042:5:1:194:1423 +GATCCTATAGGTCTCGATTGGTCTTTTATTCTTTTT +>FC0000042:5:1:35:1444 +GCTATAGCACGGCATAGTGCGATACTAGTACCCCCC +>FC0000042:5:1:667:872 +GACTATAGGCGGAATGATAATGTCAAATAAGTAGTT +>FC0000042:5:1:147:1438 +GATCAAGGAGACTAGGGAGGTAGGAGTTACTCCCCC +>FC0000042:5:1:467:510 +GAACCACTATAGTGACATGGAACACGCGTGAACCCC +>FC0000042:5:1:1553:1707 +TATAGTTACCCTACTGGGCCGACGATTCCCTTACGA +>FC0000042:5:1:207:964 +AATCTATAGATTTTTCTATTATTGTGTCCTCACCCC +>FC0000042:5:1:169:1468 +GCTCTATAGTTCGAGTTACCAAACTCTTCCCCCCCC +>FC0000042:5:1:42:1465 +GCTCTTTAGGTTTGAACCTGTAGACTTGAGGGGCAT +>FC0000042:5:1:55:1331 +GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT +>FC0000042:5:1:175:1501 +GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC +>FC0000042:5:1:221:1465 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:196:1450 +AATATAGTCTATCCAACAAGATGTAACCCCCCCCCC +>FC0000042:5:1:86:1413 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:453:514 +GATATCTTCGTTTTATATTGAAACTGGCCCCCCCCC +>FC0000042:5:1:150:1415 +TATAGGGCCCTGTATGGTTGCTTGACTAGGGGCTGC +>FC0000042:5:1:191:1475 +GATCCATCCCAATCTCTACGATTGAAAGCATCGGGA +>FC0000042:5:1:26:1407 +GTTATAGAGGCGGGAAGGTGAGAATGCCCCCCCCCC +>FC0000042:5:1:107:1407 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:388:780 +GATCTATAGCTTCTTTAGCTTGGAAACTGGTCAGCC +>FC0000042:5:1:223:1535 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:145:783 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:449:876 +GACCATCAATCAGGTGGAAAGCAGGGCCCCCCCCCC +>FC0000042:5:1:212:1325 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:194:1485 +GAACCGAATCCAACCTGTTTCATTCCTCAGATCCCC +>FC0000042:5:1:507:494 +GATCTTATAGAATTTTTGACAACATAAGTTACCCCC +>FC0000042:5:1:416:938 +AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC +>FC0000042:5:1:633:480 +GAGCTGTGTGCATCTGTCCTGAGAGAGGCAAGATTT +>FC0000042:5:1:53:1443 +GTAATGTTATAGCTAGGATTTTGGAGTTTGGTCCTC +>FC0000042:5:1:45:915 +GTATAGCAGCCTAATAAGGAGCTGGGGACCCCCCCC +>FC0000042:5:1:39:1343 +GTTCTATTTTCGATAAAACTGAACCACCCCCCCCCC +>FC0000042:5:1:46:1501 +GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT +>FC0000042:5:1:187:1507 +GAACTAATCCTGATTTATACAACGGCTCCCCCCCCC +>FC0000042:5:1:91:1364 +AATTTATAGCCACTCTAATTCCGTTTGGTTCCCCCC +>FC0000042:5:1:1542:1751 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:146:886 +GATCTACGATGTACCTTACGCCTCCGAGCATCCCCC +>FC0000042:5:1:615:861 +GATCTACATTATAGATAATGAAGTTCCATTTCCCCC +>FC0000042:5:1:52:792 +GATGTGGTATAGAGAGCAATTCGTTGGTTTTGCCCC +>FC0000042:5:1:153:1433 +GGTCTTTCTATAGAACGGAACGATATATTTTTCCCC +>FC0000042:5:1:540:800 +GAGCGAAAGTGATAGATGGAGGACTATATCTGCCCC +>FC0000042:5:1:160:1344 +GGTGTACTATAGCTATTAAGTCCAATCATGATAATA +>FC0000042:5:1:544:413 +GATCTCTGGAAAATATAAACCGGTGACCCCCCCCCC +>FC0000042:5:1:579:895 +AGTCTCGAATCAATGTATTTCATCGTGGTAATCCCC +>FC0000042:5:1:468:495 +TATTGATGCTCCCTGCCTGAAAGATACCCCCCCCCC +>FC0000042:5:1:383:831 +CTTCATGAATCTACTGTTGGCGTTTATTTTATCTGG +>FC0000042:5:1:112:1416 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:37:1299 +GATCGTGAGCTCTGTACCGGAAGTTCGTGGCTGCCA +>FC0000042:5:1:205:780 +TATAGTGTTCCACAAAGACTAGGTAACGCTTCATTT +>FC0000042:5:1:33:702 +GAACGGACTATAGCCGGTATCCAAACATAAATGTTC +>FC0000042:5:1:54:1019 +AATCGCAGCATTCTGACACACAGGTTTCGGATGTAC +>FC0000042:5:1:587:867 +TATCTAATGTCATATTTTCAGACAAATTACTAGAAA +>FC0000042:5:1:319:990 +GATTTGTAAATTACTTCGAACATAGAAGTTCCCCCC +>FC0000042:5:1:453:829 +GAACTTACGGCATTAAGTTTAATCTTCAGCCACCCC +>FC0000042:5:1:159:1470 +GATCTGATAGTGTTGCGACGTAAATAAGTCCCCCCC +>FC0000042:5:1:487:820 +GATCTCGCAGGGATCAGTTATCCAGGTATTCCCCCC +>FC0000042:5:1:48:371 +AATCTATAATCTTTACCCGAGTTTAAGTCCCCCCCC +>FC0000042:5:1:1346:1739 +GATATAGGTTATACGTTTTTAGTCTTAGAGAAGTTT +>FC0000042:5:1:661:459 +GATCTGCTTTAACGATTGAGGACGATGCCCCCCCCC diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_grep_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_grep_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,14 @@ +>FC0000042:5:1:182:1434 +GCTATAGAAATGTTAACATCGAATGTACATTATAAC +>FC0000042:5:1:45:1344 +CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC +>FC0000042:5:1:55:1331 +GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT +>FC0000042:5:1:175:1501 +GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC +>FC0000042:5:1:416:938 +AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC +>FC0000042:5:1:46:1501 +GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT +>FC0000042:5:1:33:702 +GAACGGACTATAGCCGGTATCCAAACATAAATGTTC diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_grep_output2.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_grep_output2.html Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,9 @@ +
+GCTATAGAAATGTTAACATCGAATGTACATTATAAC
+CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC
+GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT
+GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC
+AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC
+GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT
+GAACGGACTATAGCCGGTATCCAAACATAAATGTTC
+
diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sed_input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sed_input1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,4 @@ +This is a header line +Lorem ipsum dolor foo sit amet foo, +consectetur adipiscing elit. +Nam foo ut nulla non neque faucibus commodo diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sed_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sed_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,3 @@ +Lorem ipsum dolor bar sit amet foo, +consectetur adipiscing elit. +Nam bar ut nulla non neque faucibus commodo diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sed_output2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sed_output2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,2 @@ +Lorem ipsum dolor baz sit amet baz, +Nam baz ut nulla non neque faucibus commodo diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sort_input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sort_input1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,11 @@ +chrom value +chr10 0.4 +chr1 1.4 +chrM 3e-1 +chr2 1.1e2 +chr15 3.14e-2 +chr15 0.0314 +chr4 0.1 +chr20 0.9 +chr22 +1.3 +chrX -0.3 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sort_input2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sort_input2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,11 @@ +Chrom Value +chr10 0.4 +chr1 1.4 +chrM 3e-1 +chr2 1.1e2 +chr15 3.14e-2 +chr15 0.0314 +chr4 0.1 +chr20 0.9 +chr22 +1.3 +chrX -0.3 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sort_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sort_output1.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,11 @@ +chrom value +chr2 1.1e2 +chr1 1.4 +chr22 +1.3 +chr20 0.9 +chr10 0.4 +chrM 3e-1 +chr4 0.1 +chr15 0.0314 +chr15 3.14e-2 +chrX -0.3 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sort_output2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sort_output2.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,10 @@ +chrom value +chrX -0.3 +chr15 3.14e-2 +chr4 0.1 +chrM 3e-1 +chr10 0.4 +chr20 0.9 +chr22 +1.3 +chr1 1.4 +chr2 1.1e2 diff -r fc862d5bccaf -r 7068d1548234 test-data/unix_sort_output3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unix_sort_output3.txt Sun Oct 06 08:22:36 2013 -0400 @@ -0,0 +1,11 @@ +chrom value +chr1 1.4 +chr2 1.1e2 +chr4 0.1 +chr10 0.4 +chr15 0.0314 +chr15 3.14e-2 +chr20 0.9 +chr22 +1.3 +chrM 3e-1 +chrX -0.3 diff -r fc862d5bccaf -r 7068d1548234 tool_dependencies.xml --- a/tool_dependencies.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/tool_dependencies.xml Sun Oct 06 08:22:36 2013 -0400 @@ -4,7 +4,7 @@ - + @@ -13,6 +13,6 @@ - $REPOSITORY_INSTALL_DIR/scripts + $REPOSITORY_INSTALL_DIR diff -r fc862d5bccaf -r 7068d1548234 unsorted_uniq.xml --- a/unsorted_uniq.xml Thu Sep 05 12:42:48 2013 -0400 +++ b/unsorted_uniq.xml Sun Oct 06 08:22:36 2013 -0400 @@ -1,4 +1,4 @@ - + occurrences of each record gnu_coreutils