Mercurial > repos > iuc > column_order_header_sort

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/column_order_header_sort.py	Wed Apr 12 17:16:52 2017 -0400
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+import subprocess
+import sys
+
+AWK_CMD = """BEGIN{FS="%s"; OFS="%s";} {print %s;}"""
+
+input_filename = sys.argv[1]
+output_filename = sys.argv[2]
+delimiter = sys.argv[3]
+key_column = sys.argv[4]
+
+try:
+    key_column = int( key_column ) - 1
+except Exception:
+    key_column = None
+
+header = None
+with open( input_filename, 'r' ) as fh:
+    header = fh.readline().strip( '\r\n' )
+header = header.split( delimiter )
+assert len( header ) == len( set( header ) ), "Header values must be unique"
+sorted_header = list( header )
+if key_column is None:
+    columns = []
+else:
+    columns = [ key_column ]
+    sorted_header.pop( key_column )
+sorted_header.sort()
+
+for key in sorted_header:
+    columns.append( header.index( key ) )
+
+awk_cmd = AWK_CMD % ( delimiter, delimiter, ",".join( map( lambda x: "$%i" % ( x + 1 ), columns ) ) )
+sys.exit( subprocess.call( [ 'gawk', awk_cmd, input_filename ], stdout=open( output_filename, 'wb+' ), shell=False ) )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/column_order_header_sort.xml	Wed Apr 12 17:16:52 2017 -0400
@@ -0,0 +1,39 @@
+<tool id="column_order_header_sort" name="Sort Column Order" version="0.0.1">
+    <description>
+        by heading
+    </description>
+    <requirements>
+        <requirement type="package" version="3.6.1">python</requirement>
+        <requirement type="package" version="4.1.3">gawk</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/column_order_header_sort.py'
+        '${input_tabular}'
+        '${output_tabular}'
+        '${ str($input_tabular.unsanitized.metadata.delimiter).replace( "'", "" ) }'
+        '${key_column}'
+    ]]>
+    </command>
+    <inputs>
+        <param name="input_tabular" type="data" format="tabular" multiple="False" optional="False" label="Tabular file"/>
+        <param name="key_column" type="data_column" data_ref="input_tabular" value="0" optional="True" label="Identifier column" help="This column will be made left-most."/>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output_tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_tabular" value="in_1.tabular" ftype="tabular"/>
+            <param name="key_column" value="1"/>
+            <output name="output_tabular" file="out_1.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+        Reorders a file's columns by sorted value of header fields.
+        Specify the optional Identifier column parameter to make a column left-most; generally used for a Key column that should not be sorted within the other columns.
+        ]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in_1.tabular	Wed Apr 12 17:16:52 2017 -0400
@@ -0,0 +1,4 @@
+#KEY	b	c	a
+one	1-1	1-2	1-3
+two	1-4	1-5	1-6
+three	1-7	1-8	1-9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_1.tabular	Wed Apr 12 17:16:52 2017 -0400
@@ -0,0 +1,4 @@
+#KEY	a	b	c
+one	1-3	1-1	1-2
+two	1-6	1-4	1-5
+three	1-9	1-7	1-8
\ No newline at end of file