Mercurial > repos > bgruening > unique

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uniq.py	Thu May 15 12:42:00 2014 -0400
@@ -0,0 +1,36 @@
+import sys
+import subprocess
+
+"""
+    We only need that file because galaxy do not understand the -t $'\t' term.
+    Otherwise that would be the right XML-only solution:
+    sort -u
+        $ignore_case
+        $is_numeric
+        -t \$'\t'
+        #if $adv_opts.adv_opts_selector=="advanced":
+            -k$adv_opts.column_start,$adv_opts.column_end
+        #end if
+        -o $outfile
+        $input
+"""
+
+if sys.argv[1].strip() != 'false':
+    ignore_case = sys.argv[1]
+else:
+    ignore_case = ''
+
+if sys.argv[2].strip() != 'false':
+    is_numeric = sys.argv[2]
+else:
+    is_numeric = ''
+
+try:
+    col_start = sys.argv[3]
+    col_end = sys.argv[4]
+    com = "sort -u %s %s -t '	' -k%s,%s -o %s %s" % (is_numeric, ignore_case, col_start, col_end, sys.argv[5], sys.argv[6])
+except:
+    # no advanced options selected
+    com = "sort -u %s %s -t '	' -o %s %s" % (is_numeric, ignore_case, sys.argv[3], sys.argv[4])
+
+subprocess.call(com, shell=True)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uniq.xml	Thu May 15 12:42:00 2014 -0400
@@ -0,0 +1,75 @@
+<tool id="bg_uniq" name="Unique" version="0.3">
+  <description>occurrences of each record</description>
+  <command interpreter='python'>
+    uniq.py
+        $ignore_case
+        $is_numeric
+        #if $adv_opts.adv_opts_selector=="advanced":
+            $adv_opts.column_start
+            $adv_opts.column_end
+        #end if
+        $outfile
+        $input
+  </command>
+  <inputs>
+    <param name="input" type="data" format="tabular,text" label="from query" />
+    <param name="ignore_case" type="boolean" label="ignore differences in case when comparing" truevalue="-f" falsevalue="false" checked="false" help="ignore differences in case when comparing"/>
+    <param name="is_numeric" type="boolean" label="column only contains numeric values" truevalue="-n" falsevalue="false" checked="false" help="did the calumn have numeric values"/>
+    <conditional name="adv_opts">
+        <param name="adv_opts_selector" type="select" label="Advanced Options">
+          <option value="basic" selected="True">Hide Advanced Options</option>
+          <option value="advanced">Show Advanced Options</option>
+        </param>
+        <when value="basic" />
+        <when value="advanced">
+            <param name="column_start" label="Column start" type="data_column" data_ref="input" help="Unique on specific column range"/>
+            <param name="column_end" label="Column end" type="data_column" data_ref="input" help="Unique on specific column range"/>
+        </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="input" name="outfile" />
+  </outputs>
+  <tests>
+    <test>
+    </test>
+  </tests>
+  <help>
+
+ .. class:: infomark
+
+**Syntax**
+
+This tool returns all unique lines using the 'sort -u' command.
+
+-----
+
+.. class:: infomark
+
+The input file needs to be tab separated. Please convert your file if necessary.
+
+-----
+
+**Example**
+
+- Input file::
+
+       chr1   10  100  gene1
+       chr1  105  200  gene2
+       chr1   10  100  gene1
+       chr2   10  100  gene4
+       chr2 1000 1900  gene5
+       chr3   15 1656  gene6
+       chr2   10  100  gene4
+
+- Unique lines will result in::
+
+       chr1   10  100  gene1
+       chr1  105  200  gene2
+       chr2   10  100  gene4
+       chr2 1000 1900  gene5
+       chr3   15 1656  gene6
+
+
+</help>
+</tool>