changeset 0:b84c81b7d762 draft

Uploaded
author bgruening
date Mon, 02 Mar 2015 09:59:32 -0500
parents
children b97f0d9a31ff
files columnArrange.xml column_arrange.py test-data/columnarrange_input1.tab test-data/columnarrange_input2.tab test-data/columnarrange_result1.tab test-data/columnarrange_result2.tab tool_dependencies.xml
diffstat 7 files changed, 124 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/columnArrange.xml	Mon Mar 02 09:59:32 2015 -0500
@@ -0,0 +1,78 @@
+<tool id="bg_column_arrange_by_header" name="Column arrange" version="0.1">
+  <description>by header name</description>
+  <requirements>
+    <requirement type="package" version="0.14.1">pandas</requirement>
+  </requirements>
+  <!--<code file="column_sort_tab_options.py"/>-->
+  <command interpreter="python">
+  <![CDATA[
+    column_arrange.py
+      --columns 
+      #for token in $rep_param_columns:
+        "$token.param_column"
+      #end for
+    --in $param_input
+    --out $output
+  ]]>
+  </command>
+  <inputs>
+    <param format="tabular" name="param_input" type="data" label="file to rearrange" />
+
+    <repeat name ="rep_param_columns" min="1" title="Specify the first few columns by name">
+
+      <param name="param_column" type="text" size="50" value="" label="column">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+        </sanitizer>
+      </param>
+      
+    </repeat>
+    <!--<param name="param_columns_dummy" type="select" optional="true" multiple="true" label="Columns" dynamic_options="get_options(param_input.file_name)" help="Do not select columns here, this field just shows possible columns."/>-->
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output" />
+  </outputs>
+  <tests>
+    <test>
+        <param name="param_input" value="columnarrange_input1.tab"/>
+        <repeat name="rep_param_columns">
+            <param name="param_column" value="fname"/>
+        </repeat>
+        <repeat name="rep_param_columns">
+            <param name="param_column" value="age"/>
+        </repeat>
+        <output name="out" file="columnarrange_result1.tab"/>
+    </test>
+    <test>
+        <param name="param_input" value="columnarrange_input2.tab"/>
+        <repeat name="rep_param_columns">
+            <param name="param_column" value="first name"/>
+        </repeat>
+        <repeat name="rep_param_columns">
+            <param name="param_column" value="nationality"/>
+        </repeat>
+        <output name="out" file="columnarrange_result2.tab"/>
+    </test>
+  </tests>
+  <help>
+**What it does**
+
+With this tool you can specify (by naming the header) which columns need to be leftmost. The columns which are not specified will be ordered as before, right of the columns which were specified.
+
+Input file::
+
+    AHeader BHeader CHeader DHeader
+    a       b       c       d
+    a       b       c       d
+
+Specifying **CHeader** and **BHeader**, as the columns that should be leftmost, generates::
+
+    CHeader BHeader AHeader DHeader
+    c       b       a       d
+    c       b       a       d
+    
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/column_arrange.py	Mon Mar 02 09:59:32 2015 -0500
@@ -0,0 +1,20 @@
+#!/usr/bin/env python 
+import pandas as pd
+import argparse
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-i', '--input', help='Tabular Input File Name')
+parser.add_argument('-o','--output', help='Tabular Output File')
+parser.add_argument('-c', '--columns',nargs='+', help='Column Headers to Sort By')
+args=parser.parse_args()
+
+cols=args.columns
+table=pd.read_csv(args.input,sep='\t')
+blist = list(table.columns)
+for token in cols:
+    blist.remove(token)
+sorted_table = table[args.columns + blist]
+# write without index, seperated by tabs
+sorted_table.to_csv(args.output,sep='\t',index=False)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/columnarrange_input1.tab	Mon Mar 02 09:59:32 2015 -0500
@@ -0,0 +1,5 @@
+name	fname	age	nationality
+Woryt	Heiko	22	german
+Humte	Alfons	49	austrian
+Witz	Gerald	12	french
+Koulibaly	Ansgard	33	nigerian
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/columnarrange_input2.tab	Mon Mar 02 09:59:32 2015 -0500
@@ -0,0 +1,5 @@
+second name	age	nationality	first name
+Woryt	22	german	Heiko
+Humte	49	austrian	Alfons
+Witz	12	french	Gerald
+Koulibaly	33	nigerian	Ansgard
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/columnarrange_result1.tab	Mon Mar 02 09:59:32 2015 -0500
@@ -0,0 +1,5 @@
+fname	age	name	nationality
+Heiko	22	Woryt	german
+Alfons	49	Humte	austrian
+Gerald	12	Witz	french
+Ansgard	33	Koulibaly	nigerian
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/columnarrange_result2.tab	Mon Mar 02 09:59:32 2015 -0500
@@ -0,0 +1,5 @@
+first name	nationality	second name	age
+Heiko	german	Woryt	22
+Alfons	austrian	Humte	49
+Gerald	french	Witz	12
+Ansgard	nigerian	Koulibaly	33
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Mar 02 09:59:32 2015 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="pandas" version="0.14.1">
+        <repository changeset_revision="ef98e20431a7" name="package_pandas_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>