Mercurial > repos > bgruening > text_processing
diff sorted_uniq.xml @ 1:a4ad586d1403 draft
Uploaded
author | bgruening |
---|---|
date | Thu, 05 Sep 2013 11:42:27 -0400 |
parents | ec66f9d90ef0 |
children | 7068d1548234 |
line wrap: on
line diff
--- a/sorted_uniq.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/sorted_uniq.xml Thu Sep 05 11:42:27 2013 -0400 @@ -1,7 +1,8 @@ <tool id="unixtools_uniq_tool" name="Unique lines"> - <description>from sorted file</description> + <description>assuming sorted input file</description> <requirements> <requirement type="package" version="8.21">gnu_coreutils</requirement> + <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> </requirements> <command> uniq @@ -15,17 +16,23 @@ ## feature is not yet released, it will be in the next 8.22 version ##--group=$group + + #if $count: + # count will print the count with spaces infrontof the line and + # with a space (not a tab) after the number, we need to cahnge that + | sed -e 's/ *//' -e 's/ /\t/' > $output + #end if > $output </command> <inputs> - <param format="txt" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" /> + <param format="txt,tabular" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" /> - <param name="count" type="boolean" label="count [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" /> - <param name="repeated" type="boolean" label="repeated [-d]" help="Only print duplicate lines" truevalue="-d" falsevalue="" /> - <param name="ignorecase" type="boolean" label="ignore case [-i]" help="Ignore differences in case when comparing" truevalue="-i" falsevalue="" /> - <param name="uniqueonly" type="boolean" label="unique only [-u]" help="Only print unique lines" truevalue="-u" falsevalue="" /> - <param name="skipfields" type="integer" label="skip fields [-f]" help="Avoid comparing the first N fields. (use zero to start from the first field)" size="2" value="0" /> + <param name="count" type="boolean" label="Counting number of occurrences [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" /> + <param name="repeated" type="boolean" label="Only print duplicate lines [-d]" truevalue="-d" falsevalue="" /> + <param name="ignorecase" type="boolean" label="Ignore differences in case when comparing [-i]" truevalue="-i" falsevalue="" /> + <param name="uniqueonly" type="boolean" label="Only print unique lines [-u]" checked="True" truevalue="-u" falsevalue="" /> + <param name="skipfields" type="integer" label="Avoid comparing the first N fields [-f]" help="Use zero to start from the first field" size="2" value="0" /> <!-- <param name="group" type="select" label="Output all lines, and delimit each unique group.">