Mercurial > repos > bgruening > text_processing

diff sorted_uniq.xml @ 4:56e80527c482 draft
Uploaded
author: bgruening
date: Wed, 07 Jan 2015 11:10:52 -0500
parents: 7068d1548234
children: 8928e6d1e7ba
--- a/sorted_uniq.xml	Sun Oct 06 08:22:36 2013 -0400
+++ b/sorted_uniq.xml	Wed Jan 07 11:10:52 2015 -0500
@@ -1,53 +1,95 @@
-<tool id="tp_uniq_tool" name="Unique lines">
+<tool id="tp_uniq_tool" name="Unique lines" version="@BASE_VERSION@.0">
     <description>assuming sorted input file</description>
-    <requirements>
-        <requirement type="package" version="8.21">gnu_coreutils</requirement>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
         <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
-    </requirements>
+    </expand>
+    <version_command>uniq --version | head -n 1</version_command>
     <command>
-        uniq 
-            -f 
-            $skipfields 
-            $count 
-            $repeated 
-            $ignorecase 
-            $uniqueonly 
-            $input 
-            
-            ## feature is not yet released, it will be in the next 8.22 version
-            ##--group=$group
-            
-            #if $count:
-                # count will print the count with spaces infrontof the line and
-                # with a space (not a tab) after the number, we need to cahnge that
-                | sed -e 's/ *//' -e 's/ /\t/' &gt; $output
+<![CDATA[
+        uniq
+            #if $skipfields:
+                -f $skipfields
+            #end if
+            $ignorecase
+
+            #if $grouping.grouping_select == 'yes':
+                --group=$grouping.group
+            #else:
+                $grouping.count
+                $grouping.repeated
+                $grouping.uniqueonly
+            #end if
+
+            "$infile"
+
+            #if $grouping.grouping_select == 'no' and $grouping.count:
+                ## count will print the count with spaces in front of the line and
+                ## with a space (not a tab) after the number, we need to cahnge that
+                | sed -e 's/ *//' -e 's/ /\t/'
             #end if
-            &gt; $output
+            > "$outfile"
+]]>
     </command>
-
     <inputs>
-        <param format="txt,tabular" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" />
-
-        <param name="count" type="boolean" label="Counting number of occurrences [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" />
-        <param name="repeated" type="boolean" label="Only print duplicate lines [-d]" truevalue="-d" falsevalue="" />
-        <param name="ignorecase" type="boolean" label="Ignore differences in case when comparing [-i]" truevalue="-i" falsevalue="" />
-        <param name="uniqueonly" type="boolean" label="Only print unique lines [-u]" checked="True" truevalue="-u" falsevalue="" />
-        <param name="skipfields" type="integer" label="Avoid comparing the first N fields [-f]" help="Use zero to start from the first field" size="2" value="0" />
+        <param name="infile" format="tabular" type="data"
+            label="File to scan for unique values" help="Make sure you have sorted this file" />
 
-        <!--
-        <param name="group" type="select" label="Output all lines, and delimit each unique group.">
-            <option value="separate">Separate unique groups with a single delimiter</option>
-            <option value="prepend">Output a delimiter before each group of unique items</option>
-            <option value="append">Output a delimiter after each group of unique items.</option>
-            <option value="both">Output a delimiter around each group of unique items.</option>
-        </param>
-        -->
+        <conditional name="grouping">
+            <param name="grouping_select" type="select" label="Do you want to group each unique group?">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no">
+                <param name="count" type="boolean" truevalue="-c" falsevalue="" 
+                    label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" />
+                <param name="repeated" type="boolean" truevalue="-d" falsevalue="" 
+                    label="Only print duplicate lines" help="(-d)"/>
+                <param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue=""
+                    label="Only print unique lines" help="(-u)" />
+            </when>
+            <when value="yes">
+                <param name="group" type="select" optional="True"
+                    label="Output all lines, and delimit each unique group" help="(--group)">
+                    <option value="">No grouping at all</option>
+                    <option value="separate">Separate unique groups with a single delimiter</option>
+                    <option value="prepend">Output a delimiter before each group of unique items</option>
+                    <option value="append">Output a delimiter after each group of unique items</option>
+                    <option value="both">Output a delimiter around each group of unique items</option>
+                </param>
+            </when>
+        </conditional>
+
+        <param name="ignorecase" type="boolean" truevalue="-i" falsevalue="" 
+            label="Ignore differences in case when comparing" help="(-i)"/>
+        <param name="skipfields" type="integer" size="2" value="0" 
+            label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" />
+
     </inputs>
-
     <outputs>
-        <data format="input" name="output" metadata_source="input"/>
+        <data format="input" name="outfile" metadata_source="infile"/>
     </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="sorted_uniq1.tabular" />
+            <param name="grouping_select" value="no"/>
+            <param name="count" value="True"/>
+            <param name="ignorecase" value="True"/>
+            <param name="uniqueonly" value="True"/>
+            <output name="outfile" file="sorted_uniq_results1.tabular" />
+        </test>
+        <test>
+            <param name="infile" value="sorted_uniq1.tabular" />
+            <param name="ignorecase" value="True"/>
+            <param name="grouping_select" value="yes"/>
+            <param name="group" value="separate"/>
+            <output name="outfile" file="sorted_uniq_results2.tabular" />
+        </test>
+    </tests>
     <help>
+<![CDATA[
 This tool takes a sorted file and look for lines that are unique.
 
 .. class:: warningmark
@@ -58,5 +100,7 @@
 
 You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools".
 
+@REFERENCES@
+]]>
     </help>
 </tool>
author	bgruening
date	Wed, 07 Jan 2015 11:10:52 -0500
parents	7068d1548234
children	8928e6d1e7ba