Mercurial > repos > bgruening > text_processing
diff sorted_uniq.xml @ 4:56e80527c482 draft
Uploaded
author | bgruening |
---|---|
date | Wed, 07 Jan 2015 11:10:52 -0500 |
parents | 7068d1548234 |
children | 8928e6d1e7ba |
line wrap: on
line diff
--- a/sorted_uniq.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/sorted_uniq.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,53 +1,95 @@ -<tool id="tp_uniq_tool" name="Unique lines"> +<tool id="tp_uniq_tool" name="Unique lines" version="@BASE_VERSION@.0"> <description>assuming sorted input file</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> - </requirements> + </expand> + <version_command>uniq --version | head -n 1</version_command> <command> - uniq - -f - $skipfields - $count - $repeated - $ignorecase - $uniqueonly - $input - - ## feature is not yet released, it will be in the next 8.22 version - ##--group=$group - - #if $count: - # count will print the count with spaces infrontof the line and - # with a space (not a tab) after the number, we need to cahnge that - | sed -e 's/ *//' -e 's/ /\t/' > $output +<![CDATA[ + uniq + #if $skipfields: + -f $skipfields + #end if + $ignorecase + + #if $grouping.grouping_select == 'yes': + --group=$grouping.group + #else: + $grouping.count + $grouping.repeated + $grouping.uniqueonly + #end if + + "$infile" + + #if $grouping.grouping_select == 'no' and $grouping.count: + ## count will print the count with spaces in front of the line and + ## with a space (not a tab) after the number, we need to cahnge that + | sed -e 's/ *//' -e 's/ /\t/' #end if - > $output + > "$outfile" +]]> </command> - <inputs> - <param format="txt,tabular" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" /> - - <param name="count" type="boolean" label="Counting number of occurrences [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" /> - <param name="repeated" type="boolean" label="Only print duplicate lines [-d]" truevalue="-d" falsevalue="" /> - <param name="ignorecase" type="boolean" label="Ignore differences in case when comparing [-i]" truevalue="-i" falsevalue="" /> - <param name="uniqueonly" type="boolean" label="Only print unique lines [-u]" checked="True" truevalue="-u" falsevalue="" /> - <param name="skipfields" type="integer" label="Avoid comparing the first N fields [-f]" help="Use zero to start from the first field" size="2" value="0" /> + <param name="infile" format="tabular" type="data" + label="File to scan for unique values" help="Make sure you have sorted this file" /> - <!-- - <param name="group" type="select" label="Output all lines, and delimit each unique group."> - <option value="separate">Separate unique groups with a single delimiter</option> - <option value="prepend">Output a delimiter before each group of unique items</option> - <option value="append">Output a delimiter after each group of unique items.</option> - <option value="both">Output a delimiter around each group of unique items.</option> - </param> - --> + <conditional name="grouping"> + <param name="grouping_select" type="select" label="Do you want to group each unique group?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + <param name="count" type="boolean" truevalue="-c" falsevalue="" + label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" /> + <param name="repeated" type="boolean" truevalue="-d" falsevalue="" + label="Only print duplicate lines" help="(-d)"/> + <param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue="" + label="Only print unique lines" help="(-u)" /> + </when> + <when value="yes"> + <param name="group" type="select" optional="True" + label="Output all lines, and delimit each unique group" help="(--group)"> + <option value="">No grouping at all</option> + <option value="separate">Separate unique groups with a single delimiter</option> + <option value="prepend">Output a delimiter before each group of unique items</option> + <option value="append">Output a delimiter after each group of unique items</option> + <option value="both">Output a delimiter around each group of unique items</option> + </param> + </when> + </conditional> + + <param name="ignorecase" type="boolean" truevalue="-i" falsevalue="" + label="Ignore differences in case when comparing" help="(-i)"/> + <param name="skipfields" type="integer" size="2" value="0" + label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" /> + </inputs> - <outputs> - <data format="input" name="output" metadata_source="input"/> + <data format="input" name="outfile" metadata_source="infile"/> </outputs> + <tests> + <test> + <param name="infile" value="sorted_uniq1.tabular" /> + <param name="grouping_select" value="no"/> + <param name="count" value="True"/> + <param name="ignorecase" value="True"/> + <param name="uniqueonly" value="True"/> + <output name="outfile" file="sorted_uniq_results1.tabular" /> + </test> + <test> + <param name="infile" value="sorted_uniq1.tabular" /> + <param name="ignorecase" value="True"/> + <param name="grouping_select" value="yes"/> + <param name="group" value="separate"/> + <output name="outfile" file="sorted_uniq_results2.tabular" /> + </test> + </tests> <help> +<![CDATA[ This tool takes a sorted file and look for lines that are unique. .. class:: warningmark @@ -58,5 +100,7 @@ You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools". +@REFERENCES@ +]]> </help> </tool>